10 年前 · 41a9802fd8
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -251,7 +251,7 @@ struct tcf_proto {
 
				 struct qdisc_skb_cb {
			
 
				 	unsigned int		pkt_len;
			
 
				 	u16			slave_dev_queue_mapping;
			
 
				-	u16			_pad;
			
 
				+	u16			tc_classid;
			
 
				 #define QDISC_CB_PRIV_LEN 20
			
 
				 	unsigned char		data[QDISC_CB_PRIV_LEN];
			
 
				 };
			
@@ -402,6 +402,7 @@ void __qdisc_calculate_pkt_len(struct sk_buff *skb,
 
				 			       const struct qdisc_size_table *stab);
			
 
				 bool tcf_destroy(struct tcf_proto *tp, bool force);
			
 
				 void tcf_destroy_chain(struct tcf_proto __rcu **fl);
			
 
				+int skb_do_redirect(struct sk_buff *);
			
 
				 
			
 
				 /* Reset all TX qdiscs greater then index of a device.  */
			
 
				 static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
			
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -272,6 +272,14 @@ enum bpf_func_id {
 
				 	BPF_FUNC_skb_get_tunnel_key,
			
 
				 	BPF_FUNC_skb_set_tunnel_key,
			
 
				 	BPF_FUNC_perf_event_read,	/* u64 bpf_perf_event_read(&map, index) */
			
 
				+	/**
			
 
				+	 * bpf_redirect(ifindex, flags) - redirect to another netdev
			
 
				+	 * @ifindex: ifindex of the net device
			
 
				+	 * @flags: bit 0 - if set, redirect to ingress instead of egress
			
 
				+	 *         other bits - reserved
			
 
				+	 * Return: TC_ACT_REDIRECT
			
 
				+	 */
			
 
				+	BPF_FUNC_redirect,
			
 
				 	__BPF_FUNC_MAX_ID,
			
 
				 };
			
 
				 
			
@@ -293,6 +301,7 @@ struct __sk_buff {
 
				 	__u32 tc_index;
			
 
				 	__u32 cb[5];
			
 
				 	__u32 hash;
			
 
				+	__u32 tc_classid;
			
 
				 };
			
 
				 
			
 
				 struct bpf_tunnel_key {
			
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -87,6 +87,7 @@ enum {
 
				 #define TC_ACT_STOLEN		4
			
 
				 #define TC_ACT_QUEUED		5
			
 
				 #define TC_ACT_REPEAT		6
			
 
				+#define TC_ACT_REDIRECT		7
			
 
				 #define TC_ACT_JUMP		0x10000000
			
 
				 
			
 
				 /* Action type identifiers*/
			
@@ -373,6 +374,8 @@ enum {
 
				 
			
 
				 /* BPF classifier */
			
 
				 
			
 
				+#define TCA_BPF_FLAG_ACT_DIRECT		(1 << 0)
			
 
				+
			
 
				 enum {
			
 
				 	TCA_BPF_UNSPEC,
			
 
				 	TCA_BPF_ACT,
			
@@ -382,6 +385,7 @@ enum {
 
				 	TCA_BPF_OPS,
			
 
				 	TCA_BPF_FD,
			
 
				 	TCA_BPF_NAME,
			
 
				+	TCA_BPF_FLAGS,
			
 
				 	__TCA_BPF_MAX,
			
 
				 };
			
 
				 
			
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3670,6 +3670,14 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 
				 	case TC_ACT_QUEUED:
			
 
				 		kfree_skb(skb);
			
 
				 		return NULL;
			
 
				+	case TC_ACT_REDIRECT:
			
 
				+		/* skb_mac_header check was done by cls/act_bpf, so
			
 
				+		 * we can safely push the L2 header back before
			
 
				+		 * redirecting to another netdev
			
 
				+		 */
			
 
				+		__skb_push(skb, skb->mac_len);
			
 
				+		skb_do_redirect(skb);
			
 
				+		return NULL;
			
 
				 	default:
			
 
				 		break;
			
 
				 	}
			
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1427,6 +1427,48 @@ const struct bpf_func_proto bpf_clone_redirect_proto = {
 
				 	.arg3_type      = ARG_ANYTHING,
			
 
				 };
			
 
				 
			
 
				+struct redirect_info {
			
 
				+	u32 ifindex;
			
 
				+	u32 flags;
			
 
				+};
			
 
				+
			
 
				+static DEFINE_PER_CPU(struct redirect_info, redirect_info);
			
 
				+static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5)
			
 
				+{
			
 
				+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
			
 
				+
			
 
				+	ri->ifindex = ifindex;
			
 
				+	ri->flags = flags;
			
 
				+	return TC_ACT_REDIRECT;
			
 
				+}
			
 
				+
			
 
				+int skb_do_redirect(struct sk_buff *skb)
			
 
				+{
			
 
				+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
			
 
				+	struct net_device *dev;
			
 
				+
			
 
				+	dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
			
 
				+	ri->ifindex = 0;
			
 
				+	if (unlikely(!dev)) {
			
 
				+		kfree_skb(skb);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	if (BPF_IS_REDIRECT_INGRESS(ri->flags))
			
 
				+		return dev_forward_skb(dev, skb);
			
 
				+
			
 
				+	skb->dev = dev;
			
 
				+	return dev_queue_xmit(skb);
			
 
				+}
			
 
				+
			
 
				+const struct bpf_func_proto bpf_redirect_proto = {
			
 
				+	.func           = bpf_redirect,
			
 
				+	.gpl_only       = false,
			
 
				+	.ret_type       = RET_INTEGER,
			
 
				+	.arg1_type      = ARG_ANYTHING,
			
 
				+	.arg2_type      = ARG_ANYTHING,
			
 
				+};
			
 
				+
			
 
				 static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
			
 
				 {
			
 
				 	return task_get_classid((struct sk_buff *) (unsigned long) r1);
			
@@ -1607,6 +1649,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 
				 		return &bpf_skb_get_tunnel_key_proto;
			
 
				 	case BPF_FUNC_skb_set_tunnel_key:
			
 
				 		return bpf_get_skb_set_tunnel_key_proto();
			
 
				+	case BPF_FUNC_redirect:
			
 
				+		return &bpf_redirect_proto;
			
 
				 	default:
			
 
				 		return sk_filter_func_proto(func_id);
			
 
				 	}
			
@@ -1632,6 +1676,9 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type)
 
				 static bool sk_filter_is_valid_access(int off, int size,
			
 
				 				      enum bpf_access_type type)
			
 
				 {
			
 
				+	if (off == offsetof(struct __sk_buff, tc_classid))
			
 
				+		return false;
			
 
				+
			
 
				 	if (type == BPF_WRITE) {
			
 
				 		switch (off) {
			
 
				 		case offsetof(struct __sk_buff, cb[0]) ...
			
@@ -1648,6 +1695,9 @@ static bool sk_filter_is_valid_access(int off, int size,
 
				 static bool tc_cls_act_is_valid_access(int off, int size,
			
 
				 				       enum bpf_access_type type)
			
 
				 {
			
 
				+	if (off == offsetof(struct __sk_buff, tc_classid))
			
 
				+		return type == BPF_WRITE ? true : false;
			
 
				+
			
 
				 	if (type == BPF_WRITE) {
			
 
				 		switch (off) {
			
 
				 		case offsetof(struct __sk_buff, mark):
			
@@ -1760,6 +1810,14 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 
				 			*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
			
 
				 		break;
			
 
				 
			
 
				+	case offsetof(struct __sk_buff, tc_classid):
			
 
				+		ctx_off -= offsetof(struct __sk_buff, tc_classid);
			
 
				+		ctx_off += offsetof(struct sk_buff, cb);
			
 
				+		ctx_off += offsetof(struct qdisc_skb_cb, tc_classid);
			
 
				+		WARN_ON(type != BPF_WRITE);
			
 
				+		*insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
			
 
				+		break;
			
 
				+
			
 
				 	case offsetof(struct __sk_buff, tc_index):
			
 
				 #ifdef CONFIG_NET_SCHED
			
 
				 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
			
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -72,6 +72,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
 
				 	case TC_ACT_PIPE:
			
 
				 	case TC_ACT_RECLASSIFY:
			
 
				 	case TC_ACT_OK:
			
 
				+	case TC_ACT_REDIRECT:
			
 
				 		action = filter_res;
			
 
				 		break;
			
 
				 	case TC_ACT_SHOT:
			
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -38,6 +38,7 @@ struct cls_bpf_prog {
 
				 	struct bpf_prog *filter;
			
 
				 	struct list_head link;
			
 
				 	struct tcf_result res;
			
 
				+	bool exts_integrated;
			
 
				 	struct tcf_exts exts;
			
 
				 	u32 handle;
			
 
				 	union {
			
@@ -52,6 +53,7 @@ struct cls_bpf_prog {
 
				 
			
 
				 static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
			
 
				 	[TCA_BPF_CLASSID]	= { .type = NLA_U32 },
			
 
				+	[TCA_BPF_FLAGS]		= { .type = NLA_U32 },
			
 
				 	[TCA_BPF_FD]		= { .type = NLA_U32 },
			
 
				 	[TCA_BPF_NAME]		= { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN },
			
 
				 	[TCA_BPF_OPS_LEN]	= { .type = NLA_U16 },
			
@@ -59,6 +61,23 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
 
				 				    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
			
 
				 };
			
 
				 
			
 
				+static int cls_bpf_exec_opcode(int code)
			
 
				+{
			
 
				+	switch (code) {
			
 
				+	case TC_ACT_OK:
			
 
				+	case TC_ACT_RECLASSIFY:
			
 
				+	case TC_ACT_SHOT:
			
 
				+	case TC_ACT_PIPE:
			
 
				+	case TC_ACT_STOLEN:
			
 
				+	case TC_ACT_QUEUED:
			
 
				+	case TC_ACT_REDIRECT:
			
 
				+	case TC_ACT_UNSPEC:
			
 
				+		return code;
			
 
				+	default:
			
 
				+		return TC_ACT_UNSPEC;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
			
 
				 			    struct tcf_result *res)
			
 
				 {
			
@@ -79,6 +98,8 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 
				 	list_for_each_entry_rcu(prog, &head->plist, link) {
			
 
				 		int filter_res;
			
 
				 
			
 
				+		qdisc_skb_cb(skb)->tc_classid = prog->res.classid;
			
 
				+
			
 
				 		if (at_ingress) {
			
 
				 			/* It is safe to push/pull even if skb_shared() */
			
 
				 			__skb_push(skb, skb->mac_len);
			
@@ -88,6 +109,16 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 
				 			filter_res = BPF_PROG_RUN(prog->filter, skb);
			
 
				 		}
			
 
				 
			
 
				+		if (prog->exts_integrated) {
			
 
				+			res->class = prog->res.class;
			
 
				+			res->classid = qdisc_skb_cb(skb)->tc_classid;
			
 
				+
			
 
				+			ret = cls_bpf_exec_opcode(filter_res);
			
 
				+			if (ret == TC_ACT_UNSPEC)
			
 
				+				continue;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				 		if (filter_res == 0)
			
 
				 			continue;
			
 
				 
			
@@ -195,8 +226,7 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static int cls_bpf_prog_from_ops(struct nlattr **tb,
			
 
				-				 struct cls_bpf_prog *prog, u32 classid)
			
 
				+static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
			
 
				 {
			
 
				 	struct sock_filter *bpf_ops;
			
 
				 	struct sock_fprog_kern fprog_tmp;
			
@@ -230,15 +260,13 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb,
 
				 	prog->bpf_ops = bpf_ops;
			
 
				 	prog->bpf_num_ops = bpf_num_ops;
			
 
				 	prog->bpf_name = NULL;
			
 
				-
			
 
				 	prog->filter = fp;
			
 
				-	prog->res.classid = classid;
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int cls_bpf_prog_from_efd(struct nlattr **tb,
			
 
				-				 struct cls_bpf_prog *prog, u32 classid)
			
 
				+static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
			
 
				+				 const struct tcf_proto *tp)
			
 
				 {
			
 
				 	struct bpf_prog *fp;
			
 
				 	char *name = NULL;
			
@@ -268,9 +296,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb,
 
				 	prog->bpf_ops = NULL;
			
 
				 	prog->bpf_fd = bpf_fd;
			
 
				 	prog->bpf_name = name;
			
 
				-
			
 
				 	prog->filter = fp;
			
 
				-	prog->res.classid = classid;
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -280,8 +306,8 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 
				 				   unsigned long base, struct nlattr **tb,
			
 
				 				   struct nlattr *est, bool ovr)
			
 
				 {
			
 
				+	bool is_bpf, is_ebpf, have_exts = false;
			
 
				 	struct tcf_exts exts;
			
 
				-	bool is_bpf, is_ebpf;
			
 
				 	u32 classid;
			
 
				 	int ret;
			
 
				 
			
@@ -298,9 +324,22 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 
				 		return ret;
			
 
				 
			
 
				 	classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
			
 
				+	if (tb[TCA_BPF_FLAGS]) {
			
 
				+		u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
			
 
				+
			
 
				+		if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
			
 
				+			tcf_exts_destroy(&exts);
			
 
				+			return -EINVAL;
			
 
				+		}
			
 
				+
			
 
				+		have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
			
 
				+	}
			
 
				+
			
 
				+	prog->res.classid = classid;
			
 
				+	prog->exts_integrated = have_exts;
			
 
				 
			
 
				-	ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog, classid) :
			
 
				-		       cls_bpf_prog_from_efd(tb, prog, classid);
			
 
				+	ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
			
 
				+		       cls_bpf_prog_from_efd(tb, prog, tp);
			
 
				 	if (ret < 0) {
			
 
				 		tcf_exts_destroy(&exts);
			
 
				 		return ret;
			
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -33,6 +33,10 @@ static int (*bpf_get_current_comm)(void *buf, int buf_size) =
 
				 	(void *) BPF_FUNC_get_current_comm;
			
 
				 static int (*bpf_perf_event_read)(void *map, int index) =
			
 
				 	(void *) BPF_FUNC_perf_event_read;
			
 
				+static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
			
 
				+	(void *) BPF_FUNC_clone_redirect;
			
 
				+static int (*bpf_redirect)(int ifindex, int flags) =
			
 
				+	(void *) BPF_FUNC_redirect;
			
 
				 
			
 
				 /* llvm builtin functions that eBPF C program may use to
			
 
				  * emit BPF_LD_ABS and BPF_LD_IND instructions
			
--- a/samples/bpf/tcbpf1_kern.c
+++ b/samples/bpf/tcbpf1_kern.c
@@ -5,7 +5,7 @@
 
				 #include <uapi/linux/in.h>
			
 
				 #include <uapi/linux/tcp.h>
			
 
				 #include <uapi/linux/filter.h>
			
 
				-
			
 
				+#include <uapi/linux/pkt_cls.h>
			
 
				 #include "bpf_helpers.h"
			
 
				 
			
 
				 /* compiler workaround */
			
@@ -64,4 +64,26 @@ int bpf_prog1(struct __sk_buff *skb)
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				+SEC("redirect_xmit")
			
 
				+int _redirect_xmit(struct __sk_buff *skb)
			
 
				+{
			
 
				+	return bpf_redirect(skb->ifindex + 1, 0);
			
 
				+}
			
 
				+SEC("redirect_recv")
			
 
				+int _redirect_recv(struct __sk_buff *skb)
			
 
				+{
			
 
				+	return bpf_redirect(skb->ifindex + 1, 1);
			
 
				+}
			
 
				+SEC("clone_redirect_xmit")
			
 
				+int _clone_redirect_xmit(struct __sk_buff *skb)
			
 
				+{
			
 
				+	bpf_clone_redirect(skb, skb->ifindex + 1, 0);
			
 
				+	return TC_ACT_SHOT;
			
 
				+}
			
 
				+SEC("clone_redirect_recv")
			
 
				+int _clone_redirect_recv(struct __sk_buff *skb)
			
 
				+{
			
 
				+	bpf_clone_redirect(skb, skb->ifindex + 1, 1);
			
 
				+	return TC_ACT_SHOT;
			
 
				+}
			
 
				 char _license[] SEC("license") = "GPL";