Quellcode durchsuchen

Merge branch 'gre-add-collect_md-mode-for-ERSPAN-tunnel'

William Tu says:

====================
gre: add collect_md mode for ERSPAN tunnel

This patch series provide collect_md mode for ERSPAN tunnel.  The fist patch
refactors the existing gre_fb_xmit function by exacting the route cache
portion into a new function called prepare_fb_xmit.  The second patch
introduces the collect_md mode for ERSPAN tunnel, by calling the
prepare_fb_xmit function and adding ERSPAN specific logic.  The final patch
adds the test case using bpf_skb_{set,get}_tunnel_{key,opt}.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller vor 8 Jahren
Ursprung
Commit
d5a915b978
4 geänderte Dateien mit 232 neuen und 21 gelöschten Zeilen
  1. 3 1
      include/net/ip_tunnels.h
  2. 138 19
      net/ipv4/ip_gre.c
  3. 62 1
      samples/bpf/tcbpf2_kern.c
  4. 29 0
      samples/bpf/test_tunnel_bpf.sh

+ 3 - 1
include/net/ip_tunnels.h

@@ -154,8 +154,10 @@ struct ip_tunnel {
 #define TUNNEL_GENEVE_OPT	__cpu_to_be16(0x0800)
 #define TUNNEL_VXLAN_OPT	__cpu_to_be16(0x1000)
 #define TUNNEL_NOCACHE		__cpu_to_be16(0x2000)
+#define TUNNEL_ERSPAN_OPT	__cpu_to_be16(0x4000)
 
-#define TUNNEL_OPTIONS_PRESENT	(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
+#define TUNNEL_OPTIONS_PRESENT \
+		(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)
 
 struct tnl_ptk_info {
 	__be16 flags;

+ 138 - 19
net/ipv4/ip_gre.c

@@ -113,6 +113,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
 static int ipgre_tunnel_init(struct net_device *dev);
+static void erspan_build_header(struct sk_buff *skb,
+				__be32 id, u32 index, bool truncate);
 
 static unsigned int ipgre_net_id __read_mostly;
 static unsigned int gre_tap_net_id __read_mostly;
@@ -287,7 +289,33 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 					   false, false) < 0)
 			goto drop;
 
-		tunnel->index = ntohl(index);
+		if (tunnel->collect_md) {
+			struct ip_tunnel_info *info;
+			struct erspan_metadata *md;
+			__be64 tun_id;
+			__be16 flags;
+
+			tpi->flags |= TUNNEL_KEY;
+			flags = tpi->flags;
+			tun_id = key32_to_tunnel_id(tpi->key);
+
+			tun_dst = ip_tun_rx_dst(skb, flags,
+						tun_id, sizeof(*md));
+			if (!tun_dst)
+				return PACKET_REJECT;
+
+			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
+			if (!md)
+				return PACKET_REJECT;
+
+			md->index = index;
+			info = &tun_dst->u.tun_info;
+			info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+			info->options_len = sizeof(*md);
+		} else {
+			tunnel->index = ntohl(index);
+		}
+
 		skb_reset_mac_header(skb);
 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
 		return PACKET_RCVD;
@@ -432,39 +460,33 @@ static struct rtable *gre_get_rt(struct sk_buff *skb,
 	return ip_route_output_key(net, fl);
 }
 
-static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
-			__be16 proto)
+static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
+				      struct net_device *dev,
+				      struct flowi4 *fl,
+				      int tunnel_hlen)
 {
 	struct ip_tunnel_info *tun_info;
 	const struct ip_tunnel_key *key;
 	struct rtable *rt = NULL;
-	struct flowi4 fl;
 	int min_headroom;
-	int tunnel_hlen;
-	__be16 df, flags;
 	bool use_cache;
 	int err;
 
 	tun_info = skb_tunnel_info(skb);
-	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
-		     ip_tunnel_info_af(tun_info) != AF_INET))
-		goto err_free_skb;
-
 	key = &tun_info->key;
 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
+
 	if (use_cache)
-		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
+		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
 	if (!rt) {
-		rt = gre_get_rt(skb, dev, &fl, key);
+		rt = gre_get_rt(skb, dev, fl, key);
 		if (IS_ERR(rt))
-				goto err_free_skb;
+			goto err_free_skb;
 		if (use_cache)
 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
-					  fl.saddr);
+					  fl->saddr);
 	}
 
-	tunnel_hlen = gre_calc_hlen(key->tun_flags);
-
 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 			+ tunnel_hlen + sizeof(struct iphdr);
 	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
@@ -476,6 +498,37 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 		if (unlikely(err))
 			goto err_free_rt;
 	}
+	return rt;
+
+err_free_rt:
+	ip_rt_put(rt);
+err_free_skb:
+	kfree_skb(skb);
+	dev->stats.tx_dropped++;
+	return NULL;
+}
+
+static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+			__be16 proto)
+{
+	struct ip_tunnel_info *tun_info;
+	const struct ip_tunnel_key *key;
+	struct rtable *rt = NULL;
+	struct flowi4 fl;
+	int tunnel_hlen;
+	__be16 df, flags;
+
+	tun_info = skb_tunnel_info(skb);
+	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+		     ip_tunnel_info_af(tun_info) != AF_INET))
+		goto err_free_skb;
+
+	key = &tun_info->key;
+	tunnel_hlen = gre_calc_hlen(key->tun_flags);
+
+	rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
+	if (!rt)
+		return;
 
 	/* Push Tunnel header. */
 	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
@@ -498,6 +551,64 @@ err_free_skb:
 	dev->stats.tx_dropped++;
 }
 
+static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+			   __be16 proto)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	struct ip_tunnel_info *tun_info;
+	const struct ip_tunnel_key *key;
+	struct erspan_metadata *md;
+	struct rtable *rt = NULL;
+	bool truncate = false;
+	struct flowi4 fl;
+	int tunnel_hlen;
+	__be16 df;
+
+	tun_info = skb_tunnel_info(skb);
+	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+		     ip_tunnel_info_af(tun_info) != AF_INET))
+		goto err_free_skb;
+
+	key = &tun_info->key;
+
+	/* ERSPAN has fixed 8 byte GRE header */
+	tunnel_hlen = 8 + sizeof(struct erspanhdr);
+
+	rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
+	if (!rt)
+		return;
+
+	if (gre_handle_offloads(skb, false))
+		goto err_free_rt;
+
+	if (skb->len > dev->mtu) {
+		pskb_trim(skb, dev->mtu);
+		truncate = true;
+	}
+
+	md = ip_tunnel_info_opts(tun_info);
+	if (!md)
+		goto err_free_rt;
+
+	erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
+			    ntohl(md->index), truncate);
+
+	gre_build_header(skb, 8, TUNNEL_SEQ,
+			 htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
+
+	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
+
+	iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
+		      key->tos, key->ttl, df, false);
+	return;
+
+err_free_rt:
+	ip_rt_put(rt);
+err_free_skb:
+	kfree_skb(skb);
+	dev->stats.tx_dropped++;
+}
+
 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 {
 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
@@ -611,6 +722,11 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	bool truncate = false;
 
+	if (tunnel->collect_md) {
+		erspan_fb_xmit(skb, dev, skb->protocol);
+		return NETDEV_TX_OK;
+	}
+
 	if (gre_handle_offloads(skb, false))
 		goto free_skb;
 
@@ -973,9 +1089,12 @@ static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
 		return ret;
 
 	/* ERSPAN should only have GRE sequence and key flag */
-	flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
-	flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
-	if (flags != (GRE_SEQ | GRE_KEY))
+	if (data[IFLA_GRE_OFLAGS])
+		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+	if (data[IFLA_GRE_IFLAGS])
+		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+	if (!data[IFLA_GRE_COLLECT_METADATA] &&
+	    flags != (GRE_SEQ | GRE_KEY))
 		return -EINVAL;
 
 	/* ERSPAN Session ID only has 10-bit. Since we reuse

+ 62 - 1
samples/bpf/tcbpf2_kern.c

@@ -17,6 +17,7 @@
 #include <uapi/linux/pkt_cls.h>
 #include <net/ipv6.h>
 #include "bpf_helpers.h"
+#include "bpf_endian.h"
 
 #define _htonl __builtin_bswap32
 #define ERROR(ret) do {\
@@ -38,6 +39,10 @@ struct vxlan_metadata {
 	u32     gbp;
 };
 
+struct erspan_metadata {
+	__be32 index;
+};
+
 SEC("gre_set_tunnel")
 int _gre_set_tunnel(struct __sk_buff *skb)
 {
@@ -76,6 +81,63 @@ int _gre_get_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
+SEC("erspan_set_tunnel")
+int _erspan_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	md.index = htonl(123);
+	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("erspan_get_tunnel")
+int _erspan_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "key %d remote ip 0x%x erspan index 0x%x\n";
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	u32 index;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	index = bpf_ntohl(md.index);
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, index);
+
+	return TC_ACT_OK;
+}
+
 SEC("vxlan_set_tunnel")
 int _vxlan_set_tunnel(struct __sk_buff *skb)
 {
@@ -378,5 +440,4 @@ int _ip6ip6_get_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-
 char _license[] SEC("license") = "GPL";

+ 29 - 0
samples/bpf/test_tunnel_bpf.sh

@@ -32,6 +32,19 @@ function add_gre_tunnel {
 	ip addr add dev $DEV 10.1.1.200/24
 }
 
+function add_erspan_tunnel {
+	# in namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq key 2 local 172.16.1.100 remote 172.16.1.200 erspan 123
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# out of namespace
+	ip link add dev $DEV type $TYPE external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
 function add_vxlan_tunnel {
 	# Set static ARP entry here because iptables set-mark works
 	# on L3 packet, as a result not applying to ARP packets,
@@ -99,6 +112,18 @@ function test_gre {
 	cleanup
 }
 
+function test_erspan {
+	TYPE=erspan
+	DEV_NS=erspan00
+	DEV=erspan11
+	config_device
+	add_erspan_tunnel
+	attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
+	ping -c 1 10.1.1.100
+	ip netns exec at_ns0 ping -c 1 10.1.1.200
+	cleanup
+}
+
 function test_vxlan {
 	TYPE=vxlan
 	DEV_NS=vxlan00
@@ -151,14 +176,18 @@ function cleanup {
 	ip link del gretap11
 	ip link del vxlan11
 	ip link del geneve11
+	ip link del erspan11
 	pkill tcpdump
 	pkill cat
 	set -ex
 }
 
+trap cleanup 0 2 3 6 9
 cleanup
 echo "Testing GRE tunnel..."
 test_gre
+echo "Testing ERSPAN tunnel..."
+test_erspan
 echo "Testing VXLAN tunnel..."
 test_vxlan
 echo "Testing GENEVE tunnel..."