Эх сурвалжийг харах

Merge branch 'vxlan_group_policy_extension'

Thomas Graf says:

====================
VXLAN Group Policy Extension

Implements supports for the Group Policy VXLAN extension [0] to provide
a lightweight and simple security label mechanism across network peers
based on VXLAN. The security context and associated metadata is mapped
to/from skb->mark. This allows further mapping to a SELinux context
using SECMARK, to implement ACLs directly with nftables, iptables, OVS,
tc, etc.

The extension is disabled by default and should be run on a distinct
port in mixed Linux VXLAN VTEP environments. Liberal VXLAN VTEPs
which ignore unknown reserved bits will be able to receive VXLAN-GBP
frames.

Simple usage example:

10.1.1.1:
   # ip link add vxlan0 type vxlan id 10 remote 10.1.1.2 gbp
   # iptables -I OUTPUT -m owner --uid-owner 101 -j MARK --set-mark 0x200

10.1.1.2:
   # ip link add vxlan0 type vxlan id 10 remote 10.1.1.1 gbp
   # iptables -I INPUT -m mark --mark 0x200 -j DROP

iproute2 [1] and OVS [2] support will be provided in separate patches.

[0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
[1] https://github.com/tgraf/iproute2/tree/vxlan-gbp
[2] https://github.com/tgraf/ovs/tree/vxlan-gbp
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 10 жил өмнө
parent
commit
2e62fa699f

+ 89 - 24
drivers/net/vxlan.c

@@ -263,15 +263,19 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
 	return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
 	return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
 }
 }
 
 
-/* Find VXLAN socket based on network namespace, address family and UDP port */
-static struct vxlan_sock *vxlan_find_sock(struct net *net,
-					  sa_family_t family, __be16 port)
+/* Find VXLAN socket based on network namespace, address family and UDP port
+ * and enabled unshareable flags.
+ */
+static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
+					  __be16 port, u32 flags)
 {
 {
 	struct vxlan_sock *vs;
 	struct vxlan_sock *vs;
+	u32 match_flags = flags & VXLAN_F_UNSHAREABLE;
 
 
 	hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
 	hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
 		if (inet_sk(vs->sock->sk)->inet_sport == port &&
 		if (inet_sk(vs->sock->sk)->inet_sport == port &&
-		    inet_sk(vs->sock->sk)->sk.sk_family == family)
+		    inet_sk(vs->sock->sk)->sk.sk_family == family &&
+		    (vs->flags & VXLAN_F_UNSHAREABLE) == match_flags)
 			return vs;
 			return vs;
 	}
 	}
 	return NULL;
 	return NULL;
@@ -291,11 +295,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
 
 
 /* Look up VNI in a per net namespace table */
 /* Look up VNI in a per net namespace table */
 static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id,
 static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id,
-					sa_family_t family, __be16 port)
+					sa_family_t family, __be16 port,
+					u32 flags)
 {
 {
 	struct vxlan_sock *vs;
 	struct vxlan_sock *vs;
 
 
-	vs = vxlan_find_sock(net, family, port);
+	vs = vxlan_find_sock(net, family, port, flags);
 	if (!vs)
 	if (!vs)
 		return NULL;
 		return NULL;
 
 
@@ -620,7 +625,8 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
 			continue;
 			continue;
 
 
 		vh2 = (struct vxlanhdr *)(p->data + off_vx);
 		vh2 = (struct vxlanhdr *)(p->data + off_vx);
-		if (vh->vx_vni != vh2->vx_vni) {
+		if (vh->vx_flags != vh2->vx_flags ||
+		    vh->vx_vni != vh2->vx_vni) {
 			NAPI_GRO_CB(p)->same_flow = 0;
 			NAPI_GRO_CB(p)->same_flow = 0;
 			continue;
 			continue;
 		}
 		}
@@ -1183,6 +1189,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 	struct vxlan_sock *vs;
 	struct vxlan_sock *vs;
 	struct vxlanhdr *vxh;
 	struct vxlanhdr *vxh;
 	u32 flags, vni;
 	u32 flags, vni;
+	struct vxlan_metadata md = {0};
 
 
 	/* Need Vxlan and inner Ethernet header to be present */
 	/* Need Vxlan and inner Ethernet header to be present */
 	if (!pskb_may_pull(skb, VXLAN_HLEN))
 	if (!pskb_may_pull(skb, VXLAN_HLEN))
@@ -1216,6 +1223,24 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 		vni &= VXLAN_VID_MASK;
 		vni &= VXLAN_VID_MASK;
 	}
 	}
 
 
+	/* For backwards compatibility, only allow reserved fields to be
+	 * used by VXLAN extensions if explicitly requested.
+	 */
+	if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) {
+		struct vxlanhdr_gbp *gbp;
+
+		gbp = (struct vxlanhdr_gbp *)vxh;
+		md.gbp = ntohs(gbp->policy_id);
+
+		if (gbp->dont_learn)
+			md.gbp |= VXLAN_GBP_DONT_LEARN;
+
+		if (gbp->policy_applied)
+			md.gbp |= VXLAN_GBP_POLICY_APPLIED;
+
+		flags &= ~VXLAN_GBP_USED_BITS;
+	}
+
 	if (flags || (vni & ~VXLAN_VID_MASK)) {
 	if (flags || (vni & ~VXLAN_VID_MASK)) {
 		/* If there are any unprocessed flags remaining treat
 		/* If there are any unprocessed flags remaining treat
 		 * this as a malformed packet. This behavior diverges from
 		 * this as a malformed packet. This behavior diverges from
@@ -1229,7 +1254,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 		goto bad_flags;
 		goto bad_flags;
 	}
 	}
 
 
-	vs->rcv(vs, skb, vxh->vx_vni);
+	md.vni = vxh->vx_vni;
+	vs->rcv(vs, skb, &md);
 	return 0;
 	return 0;
 
 
 drop:
 drop:
@@ -1246,8 +1272,8 @@ error:
 	return 1;
 	return 1;
 }
 }
 
 
-static void vxlan_rcv(struct vxlan_sock *vs,
-		      struct sk_buff *skb, __be32 vx_vni)
+static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
+		      struct vxlan_metadata *md)
 {
 {
 	struct iphdr *oip = NULL;
 	struct iphdr *oip = NULL;
 	struct ipv6hdr *oip6 = NULL;
 	struct ipv6hdr *oip6 = NULL;
@@ -1258,7 +1284,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
 	int err = 0;
 	int err = 0;
 	union vxlan_addr *remote_ip;
 	union vxlan_addr *remote_ip;
 
 
-	vni = ntohl(vx_vni) >> 8;
+	vni = ntohl(md->vni) >> 8;
 	/* Is this VNI defined? */
 	/* Is this VNI defined? */
 	vxlan = vxlan_vs_find_vni(vs, vni);
 	vxlan = vxlan_vs_find_vni(vs, vni);
 	if (!vxlan)
 	if (!vxlan)
@@ -1292,6 +1318,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
 		goto drop;
 		goto drop;
 
 
 	skb_reset_network_header(skb);
 	skb_reset_network_header(skb);
+	skb->mark = md->gbp;
 
 
 	if (oip6)
 	if (oip6)
 		err = IP6_ECN_decapsulate(oip6, skb);
 		err = IP6_ECN_decapsulate(oip6, skb);
@@ -1641,13 +1668,30 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
 	return false;
 	return false;
 }
 }
 
 
+static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs,
+				struct vxlan_metadata *md)
+{
+	struct vxlanhdr_gbp *gbp;
+
+	gbp = (struct vxlanhdr_gbp *)vxh;
+	vxh->vx_flags |= htonl(VXLAN_HF_GBP);
+
+	if (md->gbp & VXLAN_GBP_DONT_LEARN)
+		gbp->dont_learn = 1;
+
+	if (md->gbp & VXLAN_GBP_POLICY_APPLIED)
+		gbp->policy_applied = 1;
+
+	gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
+}
+
 #if IS_ENABLED(CONFIG_IPV6)
 #if IS_ENABLED(CONFIG_IPV6)
 static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 			   struct dst_entry *dst, struct sk_buff *skb,
 			   struct dst_entry *dst, struct sk_buff *skb,
 			   struct net_device *dev, struct in6_addr *saddr,
 			   struct net_device *dev, struct in6_addr *saddr,
 			   struct in6_addr *daddr, __u8 prio, __u8 ttl,
 			   struct in6_addr *daddr, __u8 prio, __u8 ttl,
-			   __be16 src_port, __be16 dst_port, __be32 vni,
-			   bool xnet)
+			   __be16 src_port, __be16 dst_port,
+			   struct vxlan_metadata *md, bool xnet)
 {
 {
 	struct vxlanhdr *vxh;
 	struct vxlanhdr *vxh;
 	int min_headroom;
 	int min_headroom;
@@ -1696,7 +1740,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 
 
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 	vxh->vx_flags = htonl(VXLAN_HF_VNI);
 	vxh->vx_flags = htonl(VXLAN_HF_VNI);
-	vxh->vx_vni = vni;
+	vxh->vx_vni = md->vni;
 
 
 	if (type & SKB_GSO_TUNNEL_REMCSUM) {
 	if (type & SKB_GSO_TUNNEL_REMCSUM) {
 		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
 		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
@@ -1714,6 +1758,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 		}
 		}
 	}
 	}
 
 
+	if (vs->flags & VXLAN_F_GBP)
+		vxlan_build_gbp_hdr(vxh, vs, md);
+
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
 
 	udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
 	udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
@@ -1728,7 +1775,8 @@ err:
 int vxlan_xmit_skb(struct vxlan_sock *vs,
 int vxlan_xmit_skb(struct vxlan_sock *vs,
 		   struct rtable *rt, struct sk_buff *skb,
 		   struct rtable *rt, struct sk_buff *skb,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
-		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
+		   __be16 src_port, __be16 dst_port,
+		   struct vxlan_metadata *md, bool xnet)
 {
 {
 	struct vxlanhdr *vxh;
 	struct vxlanhdr *vxh;
 	int min_headroom;
 	int min_headroom;
@@ -1771,7 +1819,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 
 
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 	vxh->vx_flags = htonl(VXLAN_HF_VNI);
 	vxh->vx_flags = htonl(VXLAN_HF_VNI);
-	vxh->vx_vni = vni;
+	vxh->vx_vni = md->vni;
 
 
 	if (type & SKB_GSO_TUNNEL_REMCSUM) {
 	if (type & SKB_GSO_TUNNEL_REMCSUM) {
 		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
 		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
@@ -1789,6 +1837,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 		}
 		}
 	}
 	}
 
 
+	if (vs->flags & VXLAN_F_GBP)
+		vxlan_build_gbp_hdr(vxh, vs, md);
+
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
 
 	return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos,
 	return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos,
@@ -1849,6 +1900,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 	const struct iphdr *old_iph;
 	const struct iphdr *old_iph;
 	struct flowi4 fl4;
 	struct flowi4 fl4;
 	union vxlan_addr *dst;
 	union vxlan_addr *dst;
+	struct vxlan_metadata md;
 	__be16 src_port = 0, dst_port;
 	__be16 src_port = 0, dst_port;
 	u32 vni;
 	u32 vni;
 	__be16 df = 0;
 	__be16 df = 0;
@@ -1910,7 +1962,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 
 
 			ip_rt_put(rt);
 			ip_rt_put(rt);
 			dst_vxlan = vxlan_find_vni(vxlan->net, vni,
 			dst_vxlan = vxlan_find_vni(vxlan->net, vni,
-						   dst->sa.sa_family, dst_port);
+						   dst->sa.sa_family, dst_port,
+						   vxlan->flags);
 			if (!dst_vxlan)
 			if (!dst_vxlan)
 				goto tx_error;
 				goto tx_error;
 			vxlan_encap_bypass(skb, vxlan, dst_vxlan);
 			vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -1919,11 +1972,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 
 
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
 		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
+		md.vni = htonl(vni << 8);
+		md.gbp = skb->mark;
 
 
 		err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
 		err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
 				     fl4.saddr, dst->sin.sin_addr.s_addr,
 				     fl4.saddr, dst->sin.sin_addr.s_addr,
-				     tos, ttl, df, src_port, dst_port,
-				     htonl(vni << 8),
+				     tos, ttl, df, src_port, dst_port, &md,
 				     !net_eq(vxlan->net, dev_net(vxlan->dev)));
 				     !net_eq(vxlan->net, dev_net(vxlan->dev)));
 		if (err < 0) {
 		if (err < 0) {
 			/* skb is already freed. */
 			/* skb is already freed. */
@@ -1968,7 +2022,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 
 
 			dst_release(ndst);
 			dst_release(ndst);
 			dst_vxlan = vxlan_find_vni(vxlan->net, vni,
 			dst_vxlan = vxlan_find_vni(vxlan->net, vni,
-						   dst->sa.sa_family, dst_port);
+						   dst->sa.sa_family, dst_port,
+						   vxlan->flags);
 			if (!dst_vxlan)
 			if (!dst_vxlan)
 				goto tx_error;
 				goto tx_error;
 			vxlan_encap_bypass(skb, vxlan, dst_vxlan);
 			vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -1976,10 +2031,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		}
 		}
 
 
 		ttl = ttl ? : ip6_dst_hoplimit(ndst);
 		ttl = ttl ? : ip6_dst_hoplimit(ndst);
+		md.vni = htonl(vni << 8);
+		md.gbp = skb->mark;
 
 
 		err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb,
 		err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb,
 				      dev, &fl6.saddr, &fl6.daddr, 0, ttl,
 				      dev, &fl6.saddr, &fl6.daddr, 0, ttl,
-				      src_port, dst_port, htonl(vni << 8),
+				      src_port, dst_port, &md,
 				      !net_eq(vxlan->net, dev_net(vxlan->dev)));
 				      !net_eq(vxlan->net, dev_net(vxlan->dev)));
 #endif
 #endif
 	}
 	}
@@ -2136,7 +2193,7 @@ static int vxlan_init(struct net_device *dev)
 
 
 	spin_lock(&vn->sock_lock);
 	spin_lock(&vn->sock_lock);
 	vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
 	vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
-			     vxlan->dst_port);
+			     vxlan->dst_port, vxlan->flags);
 	if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) {
 	if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) {
 		/* If we have a socket with same port already, reuse it */
 		/* If we have a socket with same port already, reuse it */
 		vxlan_vs_add_dev(vs, vxlan);
 		vxlan_vs_add_dev(vs, vxlan);
@@ -2382,6 +2439,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
 	[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_REMCSUM_TX]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_REMCSUM_TX]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_REMCSUM_RX]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_REMCSUM_RX]	= { .type = NLA_U8 },
+	[IFLA_VXLAN_GBP]	= { .type = NLA_FLAG, },
 };
 };
 
 
 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -2542,7 +2600,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 		return vs;
 		return vs;
 
 
 	spin_lock(&vn->sock_lock);
 	spin_lock(&vn->sock_lock);
-	vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port);
+	vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port, flags);
 	if (vs && ((vs->rcv != rcv) ||
 	if (vs && ((vs->rcv != rcv) ||
 		   !atomic_add_unless(&vs->refcnt, 1, 0)))
 		   !atomic_add_unless(&vs->refcnt, 1, 0)))
 			vs = ERR_PTR(-EBUSY);
 			vs = ERR_PTR(-EBUSY);
@@ -2706,8 +2764,11 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
 	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
 	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
 		vxlan->flags |= VXLAN_F_REMCSUM_RX;
 		vxlan->flags |= VXLAN_F_REMCSUM_RX;
 
 
+	if (data[IFLA_VXLAN_GBP])
+		vxlan->flags |= VXLAN_F_GBP;
+
 	if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET,
 	if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET,
-			   vxlan->dst_port)) {
+			   vxlan->dst_port, vxlan->flags)) {
 		pr_info("duplicate VNI %u\n", vni);
 		pr_info("duplicate VNI %u\n", vni);
 		return -EEXIST;
 		return -EEXIST;
 	}
 	}
@@ -2851,6 +2912,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
 	if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
 		goto nla_put_failure;
 		goto nla_put_failure;
 
 
+	if (vxlan->flags & VXLAN_F_GBP &&
+	    nla_put_flag(skb, IFLA_VXLAN_GBP))
+		goto nla_put_failure;
+
 	return 0;
 	return 0;
 
 
 nla_put_failure:
 nla_put_failure:

+ 4 - 1
include/net/ip_tunnels.h

@@ -97,7 +97,10 @@ struct ip_tunnel {
 #define TUNNEL_DONT_FRAGMENT    __cpu_to_be16(0x0100)
 #define TUNNEL_DONT_FRAGMENT    __cpu_to_be16(0x0100)
 #define TUNNEL_OAM		__cpu_to_be16(0x0200)
 #define TUNNEL_OAM		__cpu_to_be16(0x0200)
 #define TUNNEL_CRIT_OPT		__cpu_to_be16(0x0400)
 #define TUNNEL_CRIT_OPT		__cpu_to_be16(0x0400)
-#define TUNNEL_OPTIONS_PRESENT	__cpu_to_be16(0x0800)
+#define TUNNEL_GENEVE_OPT	__cpu_to_be16(0x0800)
+#define TUNNEL_VXLAN_OPT	__cpu_to_be16(0x1000)
+
+#define TUNNEL_OPTIONS_PRESENT	(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
 
 
 struct tnl_ptk_info {
 struct tnl_ptk_info {
 	__be16 flags;
 	__be16 flags;

+ 77 - 5
include/net/vxlan.h

@@ -11,15 +11,76 @@
 #define VNI_HASH_BITS	10
 #define VNI_HASH_BITS	10
 #define VNI_HASH_SIZE	(1<<VNI_HASH_BITS)
 #define VNI_HASH_SIZE	(1<<VNI_HASH_BITS)
 
 
-/* VXLAN protocol header */
+/*
+ * VXLAN Group Based Policy Extension:
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |1|-|-|-|1|-|-|-|R|D|R|R|A|R|R|R|        Group Policy ID        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                VXLAN Network Identifier (VNI) |   Reserved    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * D = Don't Learn bit. When set, this bit indicates that the egress
+ *     VTEP MUST NOT learn the source address of the encapsulated frame.
+ *
+ * A = Indicates that the group policy has already been applied to
+ *     this packet. Policies MUST NOT be applied by devices when the
+ *     A bit is set.
+ *
+ * [0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
+ */
+struct vxlanhdr_gbp {
+	__u8	vx_flags;
+#ifdef __LITTLE_ENDIAN_BITFIELD
+	__u8	reserved_flags1:3,
+		policy_applied:1,
+		reserved_flags2:2,
+		dont_learn:1,
+		reserved_flags3:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	__u8	reserved_flags1:1,
+		dont_learn:1,
+		reserved_flags2:2,
+		policy_applied:1,
+		reserved_flags3:3;
+#else
+#error	"Please fix <asm/byteorder.h>"
+#endif
+	__be16	policy_id;
+	__be32	vx_vni;
+};
+
+#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | 0xFFFFFF)
+
+/* skb->mark mapping
+ *
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R|        Group Policy ID        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+#define VXLAN_GBP_DONT_LEARN		(BIT(6) << 16)
+#define VXLAN_GBP_POLICY_APPLIED	(BIT(3) << 16)
+#define VXLAN_GBP_ID_MASK		(0xFFFF)
+
+/* VXLAN protocol header:
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |G|R|R|R|I|R|R|C|               Reserved                        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                VXLAN Network Identifier (VNI) |   Reserved    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * G = 1	Group Policy (VXLAN-GBP)
+ * I = 1	VXLAN Network Identifier (VNI) present
+ * C = 1	Remote checksum offload (RCO)
+ */
 struct vxlanhdr {
 struct vxlanhdr {
 	__be32 vx_flags;
 	__be32 vx_flags;
 	__be32 vx_vni;
 	__be32 vx_vni;
 };
 };
 
 
 /* VXLAN header flags. */
 /* VXLAN header flags. */
-#define VXLAN_HF_VNI 0x08000000
-#define VXLAN_HF_RCO 0x00200000
+#define VXLAN_HF_RCO BIT(24)
+#define VXLAN_HF_VNI BIT(27)
+#define VXLAN_HF_GBP BIT(31)
 
 
 /* Remote checksum offload header option */
 /* Remote checksum offload header option */
 #define VXLAN_RCO_MASK  0x7f    /* Last byte of vni field */
 #define VXLAN_RCO_MASK  0x7f    /* Last byte of vni field */
@@ -32,8 +93,14 @@ struct vxlanhdr {
 #define VXLAN_VID_MASK  (VXLAN_N_VID - 1)
 #define VXLAN_VID_MASK  (VXLAN_N_VID - 1)
 #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
 #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
 
 
+struct vxlan_metadata {
+	__be32		vni;
+	u32		gbp;
+};
+
 struct vxlan_sock;
 struct vxlan_sock;
-typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key);
+typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb,
+			   struct vxlan_metadata *md);
 
 
 /* per UDP socket information */
 /* per UDP socket information */
 struct vxlan_sock {
 struct vxlan_sock {
@@ -60,6 +127,10 @@ struct vxlan_sock {
 #define VXLAN_F_UDP_ZERO_CSUM6_RX	0x100
 #define VXLAN_F_UDP_ZERO_CSUM6_RX	0x100
 #define VXLAN_F_REMCSUM_TX		0x200
 #define VXLAN_F_REMCSUM_TX		0x200
 #define VXLAN_F_REMCSUM_RX		0x400
 #define VXLAN_F_REMCSUM_RX		0x400
+#define VXLAN_F_GBP			0x800
+
+/* These flags must match in order for a socket to be shareable */
+#define VXLAN_F_UNSHAREABLE		VXLAN_F_GBP
 
 
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 				  vxlan_rcv_t *rcv, void *data,
 				  vxlan_rcv_t *rcv, void *data,
@@ -70,7 +141,8 @@ void vxlan_sock_release(struct vxlan_sock *vs);
 int vxlan_xmit_skb(struct vxlan_sock *vs,
 int vxlan_xmit_skb(struct vxlan_sock *vs,
 		   struct rtable *rt, struct sk_buff *skb,
 		   struct rtable *rt, struct sk_buff *skb,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
-		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet);
+		   __be16 src_port, __be16 dst_port, struct vxlan_metadata *md,
+		   bool xnet);
 
 
 static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
 static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
 						     netdev_features_t features)
 						     netdev_features_t features)

+ 1 - 0
include/uapi/linux/if_link.h

@@ -372,6 +372,7 @@ enum {
 	IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
 	IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
 	IFLA_VXLAN_REMCSUM_TX,
 	IFLA_VXLAN_REMCSUM_TX,
 	IFLA_VXLAN_REMCSUM_RX,
 	IFLA_VXLAN_REMCSUM_RX,
+	IFLA_VXLAN_GBP,
 	__IFLA_VXLAN_MAX
 	__IFLA_VXLAN_MAX
 };
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)

+ 11 - 0
include/uapi/linux/openvswitch.h

@@ -252,11 +252,21 @@ enum ovs_vport_attr {
 
 
 #define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
 #define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
 
 
+enum {
+	OVS_VXLAN_EXT_UNSPEC,
+	OVS_VXLAN_EXT_GBP,	/* Flag or __u32 */
+	__OVS_VXLAN_EXT_MAX,
+};
+
+#define OVS_VXLAN_EXT_MAX (__OVS_VXLAN_EXT_MAX - 1)
+
+
 /* OVS_VPORT_ATTR_OPTIONS attributes for tunnels.
 /* OVS_VPORT_ATTR_OPTIONS attributes for tunnels.
  */
  */
 enum {
 enum {
 	OVS_TUNNEL_ATTR_UNSPEC,
 	OVS_TUNNEL_ATTR_UNSPEC,
 	OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */
 	OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */
+	OVS_TUNNEL_ATTR_EXTENSION,
 	__OVS_TUNNEL_ATTR_MAX
 	__OVS_TUNNEL_ATTR_MAX
 };
 };
 
 
@@ -328,6 +338,7 @@ enum ovs_tunnel_key_attr {
 	OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,        /* Array of Geneve options. */
 	OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,        /* Array of Geneve options. */
 	OVS_TUNNEL_KEY_ATTR_TP_SRC,		/* be16 src Transport Port. */
 	OVS_TUNNEL_KEY_ATTR_TP_SRC,		/* be16 src Transport Port. */
 	OVS_TUNNEL_KEY_ATTR_TP_DST,		/* be16 dst Transport Port. */
 	OVS_TUNNEL_KEY_ATTR_TP_DST,		/* be16 dst Transport Port. */
+	OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS,		/* Nested OVS_VXLAN_EXT_* */
 	__OVS_TUNNEL_KEY_ATTR_MAX
 	__OVS_TUNNEL_KEY_ATTR_MAX
 };
 };
 
 

+ 1 - 1
net/openvswitch/flow.c

@@ -691,7 +691,7 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
 			BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
 			BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
 						   8)) - 1
 						   8)) - 1
 					> sizeof(key->tun_opts));
 					> sizeof(key->tun_opts));
-			memcpy(GENEVE_OPTS(key, tun_info->options_len),
+			memcpy(TUN_METADATA_OPTS(key, tun_info->options_len),
 			       tun_info->options, tun_info->options_len);
 			       tun_info->options, tun_info->options_len);
 			key->tun_opts_len = tun_info->options_len;
 			key->tun_opts_len = tun_info->options_len;
 		} else {
 		} else {

+ 7 - 7
net/openvswitch/flow.h

@@ -53,7 +53,7 @@ struct ovs_key_ipv4_tunnel {
 
 
 struct ovs_tunnel_info {
 struct ovs_tunnel_info {
 	struct ovs_key_ipv4_tunnel tunnel;
 	struct ovs_key_ipv4_tunnel tunnel;
-	const struct geneve_opt *options;
+	const void *options;
 	u8 options_len;
 	u8 options_len;
 };
 };
 
 
@@ -61,10 +61,10 @@ struct ovs_tunnel_info {
  * maximum size. This allows us to get the benefits of variable length
  * maximum size. This allows us to get the benefits of variable length
  * matching for small options.
  * matching for small options.
  */
  */
-#define GENEVE_OPTS(flow_key, opt_len)	\
-	((struct geneve_opt *)((flow_key)->tun_opts + \
-			       FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \
-			       opt_len))
+#define TUN_METADATA_OFFSET(opt_len) \
+	(FIELD_SIZEOF(struct sw_flow_key, tun_opts) - opt_len)
+#define TUN_METADATA_OPTS(flow_key, opt_len) \
+	((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len)))
 
 
 static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
 static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
 					    __be32 saddr, __be32 daddr,
 					    __be32 saddr, __be32 daddr,
@@ -73,7 +73,7 @@ static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
 					    __be16 tp_dst,
 					    __be16 tp_dst,
 					    __be64 tun_id,
 					    __be64 tun_id,
 					    __be16 tun_flags,
 					    __be16 tun_flags,
-					    const struct geneve_opt *opts,
+					    const void *opts,
 					    u8 opts_len)
 					    u8 opts_len)
 {
 {
 	tun_info->tunnel.tun_id = tun_id;
 	tun_info->tunnel.tun_id = tun_id;
@@ -105,7 +105,7 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
 					  __be16 tp_dst,
 					  __be16 tp_dst,
 					  __be64 tun_id,
 					  __be64 tun_id,
 					  __be16 tun_flags,
 					  __be16 tun_flags,
-					  const struct geneve_opt *opts,
+					  const void *opts,
 					  u8 opts_len)
 					  u8 opts_len)
 {
 {
 	__ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr,
 	__ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr,

+ 193 - 93
net/openvswitch/flow_netlink.c

@@ -49,6 +49,14 @@
 #include <net/mpls.h>
 #include <net/mpls.h>
 
 
 #include "flow_netlink.h"
 #include "flow_netlink.h"
+#include "vport-vxlan.h"
+
+struct ovs_len_tbl {
+	int len;
+	const struct ovs_len_tbl *next;
+};
+
+#define OVS_ATTR_NESTED -1
 
 
 static void update_range(struct sw_flow_match *match,
 static void update_range(struct sw_flow_match *match,
 			 size_t offset, size_t size, bool is_mask)
 			 size_t offset, size_t size, bool is_mask)
@@ -261,6 +269,9 @@ size_t ovs_tun_key_attr_size(void)
 		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_CSUM */
 		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_CSUM */
 		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_OAM */
 		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_OAM */
 		+ nla_total_size(256)  /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
 		+ nla_total_size(256)  /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
+		/* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with
+		 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
+		 */
 		+ nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
 		+ nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
 		+ nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
 		+ nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
 }
 }
@@ -289,29 +300,45 @@ size_t ovs_key_attr_size(void)
 		+ nla_total_size(28); /* OVS_KEY_ATTR_ND */
 		+ nla_total_size(28); /* OVS_KEY_ATTR_ND */
 }
 }
 
 
+static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
+	[OVS_TUNNEL_KEY_ATTR_ID]	    = { .len = sizeof(u64) },
+	[OVS_TUNNEL_KEY_ATTR_IPV4_SRC]	    = { .len = sizeof(u32) },
+	[OVS_TUNNEL_KEY_ATTR_IPV4_DST]	    = { .len = sizeof(u32) },
+	[OVS_TUNNEL_KEY_ATTR_TOS]	    = { .len = 1 },
+	[OVS_TUNNEL_KEY_ATTR_TTL]	    = { .len = 1 },
+	[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 },
+	[OVS_TUNNEL_KEY_ATTR_CSUM]	    = { .len = 0 },
+	[OVS_TUNNEL_KEY_ATTR_TP_SRC]	    = { .len = sizeof(u16) },
+	[OVS_TUNNEL_KEY_ATTR_TP_DST]	    = { .len = sizeof(u16) },
+	[OVS_TUNNEL_KEY_ATTR_OAM]	    = { .len = 0 },
+	[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS]   = { .len = OVS_ATTR_NESTED },
+	[OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS]    = { .len = OVS_ATTR_NESTED },
+};
+
 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
-static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
-	[OVS_KEY_ATTR_ENCAP] = -1,
-	[OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
-	[OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
-	[OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
-	[OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
-	[OVS_KEY_ATTR_VLAN] = sizeof(__be16),
-	[OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
-	[OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
-	[OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
-	[OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
-	[OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16),
-	[OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
-	[OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
-	[OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
-	[OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
-	[OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
-	[OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
-	[OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
-	[OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
-	[OVS_KEY_ATTR_TUNNEL] = -1,
-	[OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
+static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
+	[OVS_KEY_ATTR_ENCAP]	 = { .len = OVS_ATTR_NESTED },
+	[OVS_KEY_ATTR_PRIORITY]	 = { .len = sizeof(u32) },
+	[OVS_KEY_ATTR_IN_PORT]	 = { .len = sizeof(u32) },
+	[OVS_KEY_ATTR_SKB_MARK]	 = { .len = sizeof(u32) },
+	[OVS_KEY_ATTR_ETHERNET]	 = { .len = sizeof(struct ovs_key_ethernet) },
+	[OVS_KEY_ATTR_VLAN]	 = { .len = sizeof(__be16) },
+	[OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) },
+	[OVS_KEY_ATTR_IPV4]	 = { .len = sizeof(struct ovs_key_ipv4) },
+	[OVS_KEY_ATTR_IPV6]	 = { .len = sizeof(struct ovs_key_ipv6) },
+	[OVS_KEY_ATTR_TCP]	 = { .len = sizeof(struct ovs_key_tcp) },
+	[OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) },
+	[OVS_KEY_ATTR_UDP]	 = { .len = sizeof(struct ovs_key_udp) },
+	[OVS_KEY_ATTR_SCTP]	 = { .len = sizeof(struct ovs_key_sctp) },
+	[OVS_KEY_ATTR_ICMP]	 = { .len = sizeof(struct ovs_key_icmp) },
+	[OVS_KEY_ATTR_ICMPV6]	 = { .len = sizeof(struct ovs_key_icmpv6) },
+	[OVS_KEY_ATTR_ARP]	 = { .len = sizeof(struct ovs_key_arp) },
+	[OVS_KEY_ATTR_ND]	 = { .len = sizeof(struct ovs_key_nd) },
+	[OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) },
+	[OVS_KEY_ATTR_DP_HASH]	 = { .len = sizeof(u32) },
+	[OVS_KEY_ATTR_TUNNEL]	 = { .len = OVS_ATTR_NESTED,
+				     .next = ovs_tunnel_key_lens, },
+	[OVS_KEY_ATTR_MPLS]	 = { .len = sizeof(struct ovs_key_mpls) },
 };
 };
 
 
 static bool is_all_zero(const u8 *fp, size_t size)
 static bool is_all_zero(const u8 *fp, size_t size)
@@ -352,8 +379,8 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
 			return -EINVAL;
 			return -EINVAL;
 		}
 		}
 
 
-		expected_len = ovs_key_lens[type];
-		if (nla_len(nla) != expected_len && expected_len != -1) {
+		expected_len = ovs_key_lens[type].len;
+		if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) {
 			OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
 			OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
 				  type, nla_len(nla), expected_len);
 				  type, nla_len(nla), expected_len);
 			return -EINVAL;
 			return -EINVAL;
@@ -432,13 +459,47 @@ static int genev_tun_opt_from_nlattr(const struct nlattr *a,
 		SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
 		SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
 	}
 	}
 
 
-	opt_key_offset = (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0,
-						    nla_len(a));
+	opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
 	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
 	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
 				  nla_len(a), is_mask);
 				  nla_len(a), is_mask);
 	return 0;
 	return 0;
 }
 }
 
 
+static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = {
+	[OVS_VXLAN_EXT_GBP]	= { .type = NLA_U32 },
+};
+
+static int vxlan_tun_opt_from_nlattr(const struct nlattr *a,
+				     struct sw_flow_match *match, bool is_mask,
+				     bool log)
+{
+	struct nlattr *tb[OVS_VXLAN_EXT_MAX+1];
+	unsigned long opt_key_offset;
+	struct ovs_vxlan_opts opts;
+	int err;
+
+	BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
+
+	err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy);
+	if (err < 0)
+		return err;
+
+	memset(&opts, 0, sizeof(opts));
+
+	if (tb[OVS_VXLAN_EXT_GBP])
+		opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]);
+
+	if (!is_mask)
+		SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
+	else
+		SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
+
+	opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
+	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
+				  is_mask);
+	return 0;
+}
+
 static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 				struct sw_flow_match *match, bool is_mask,
 				struct sw_flow_match *match, bool is_mask,
 				bool log)
 				bool log)
@@ -447,35 +508,22 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 	int rem;
 	int rem;
 	bool ttl = false;
 	bool ttl = false;
 	__be16 tun_flags = 0;
 	__be16 tun_flags = 0;
+	int opts_type = 0;
 
 
 	nla_for_each_nested(a, attr, rem) {
 	nla_for_each_nested(a, attr, rem) {
 		int type = nla_type(a);
 		int type = nla_type(a);
 		int err;
 		int err;
 
 
-		static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
-			[OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
-			[OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
-			[OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
-			[OVS_TUNNEL_KEY_ATTR_TOS] = 1,
-			[OVS_TUNNEL_KEY_ATTR_TTL] = 1,
-			[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
-			[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
-			[OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16),
-			[OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16),
-			[OVS_TUNNEL_KEY_ATTR_OAM] = 0,
-			[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
-		};
-
 		if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
 		if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
 			OVS_NLERR(log, "Tunnel attr %d out of range max %d",
 			OVS_NLERR(log, "Tunnel attr %d out of range max %d",
 				  type, OVS_TUNNEL_KEY_ATTR_MAX);
 				  type, OVS_TUNNEL_KEY_ATTR_MAX);
 			return -EINVAL;
 			return -EINVAL;
 		}
 		}
 
 
-		if (ovs_tunnel_key_lens[type] != nla_len(a) &&
-		    ovs_tunnel_key_lens[type] != -1) {
+		if (ovs_tunnel_key_lens[type].len != nla_len(a) &&
+		    ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) {
 			OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
 			OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
-				  type, nla_len(a), ovs_tunnel_key_lens[type]);
+				  type, nla_len(a), ovs_tunnel_key_lens[type].len);
 			return -EINVAL;
 			return -EINVAL;
 		}
 		}
 
 
@@ -520,11 +568,30 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 			tun_flags |= TUNNEL_OAM;
 			tun_flags |= TUNNEL_OAM;
 			break;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
 		case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+			if (opts_type) {
+				OVS_NLERR(log, "Multiple metadata blocks provided");
+				return -EINVAL;
+			}
+
 			err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
 			err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
 			if (err)
 			if (err)
 				return err;
 				return err;
 
 
-			tun_flags |= TUNNEL_OPTIONS_PRESENT;
+			tun_flags |= TUNNEL_GENEVE_OPT;
+			opts_type = type;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
+			if (opts_type) {
+				OVS_NLERR(log, "Multiple metadata blocks provided");
+				return -EINVAL;
+			}
+
+			err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
+			if (err)
+				return err;
+
+			tun_flags |= TUNNEL_VXLAN_OPT;
+			opts_type = type;
 			break;
 			break;
 		default:
 		default:
 			OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d",
 			OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d",
@@ -553,13 +620,29 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 		}
 		}
 	}
 	}
 
 
+	return opts_type;
+}
+
+static int vxlan_opt_to_nlattr(struct sk_buff *skb,
+			       const void *tun_opts, int swkey_tun_opts_len)
+{
+	const struct ovs_vxlan_opts *opts = tun_opts;
+	struct nlattr *nla;
+
+	nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
+	if (!nla)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
+		return -EMSGSIZE;
+
+	nla_nest_end(skb, nla);
 	return 0;
 	return 0;
 }
 }
 
 
 static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
 static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
 				const struct ovs_key_ipv4_tunnel *output,
 				const struct ovs_key_ipv4_tunnel *output,
-				const struct geneve_opt *tun_opts,
-				int swkey_tun_opts_len)
+				const void *tun_opts, int swkey_tun_opts_len)
 {
 {
 	if (output->tun_flags & TUNNEL_KEY &&
 	if (output->tun_flags & TUNNEL_KEY &&
 	    nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
 	    nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
@@ -590,18 +673,22 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
 	if ((output->tun_flags & TUNNEL_OAM) &&
 	if ((output->tun_flags & TUNNEL_OAM) &&
 	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
 	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
 		return -EMSGSIZE;
 		return -EMSGSIZE;
-	if (tun_opts &&
-	    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
-		    swkey_tun_opts_len, tun_opts))
-		return -EMSGSIZE;
+	if (tun_opts) {
+		if (output->tun_flags & TUNNEL_GENEVE_OPT &&
+		    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
+			    swkey_tun_opts_len, tun_opts))
+			return -EMSGSIZE;
+		else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
+			 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
+			return -EMSGSIZE;
+	}
 
 
 	return 0;
 	return 0;
 }
 }
 
 
 static int ipv4_tun_to_nlattr(struct sk_buff *skb,
 static int ipv4_tun_to_nlattr(struct sk_buff *skb,
 			      const struct ovs_key_ipv4_tunnel *output,
 			      const struct ovs_key_ipv4_tunnel *output,
-			      const struct geneve_opt *tun_opts,
-			      int swkey_tun_opts_len)
+			      const void *tun_opts, int swkey_tun_opts_len)
 {
 {
 	struct nlattr *nla;
 	struct nlattr *nla;
 	int err;
 	int err;
@@ -675,7 +762,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
 	}
 	}
 	if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
 	if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
 		if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
 		if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
-					 is_mask, log))
+					 is_mask, log) < 0)
 			return -EINVAL;
 			return -EINVAL;
 		*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
 		*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
 	}
 	}
@@ -915,18 +1002,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
 	return 0;
 	return 0;
 }
 }
 
 
-static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key)
+static void nlattr_set(struct nlattr *attr, u8 val,
+		       const struct ovs_len_tbl *tbl)
 {
 {
 	struct nlattr *nla;
 	struct nlattr *nla;
 	int rem;
 	int rem;
 
 
 	/* The nlattr stream should already have been validated */
 	/* The nlattr stream should already have been validated */
 	nla_for_each_nested(nla, attr, rem) {
 	nla_for_each_nested(nla, attr, rem) {
-		/* We assume that ovs_key_lens[type] == -1 means that type is a
-		 * nested attribute
-		 */
-		if (is_attr_mask_key && ovs_key_lens[nla_type(nla)] == -1)
-			nlattr_set(nla, val, false);
+		if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED)
+			nlattr_set(nla, val, tbl[nla_type(nla)].next);
 		else
 		else
 			memset(nla_data(nla), val, nla_len(nla));
 			memset(nla_data(nla), val, nla_len(nla));
 	}
 	}
@@ -934,7 +1019,7 @@ static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key)
 
 
 static void mask_set_nlattr(struct nlattr *attr, u8 val)
 static void mask_set_nlattr(struct nlattr *attr, u8 val)
 {
 {
-	nlattr_set(attr, val, true);
+	nlattr_set(attr, val, ovs_key_lens);
 }
 }
 
 
 /**
 /**
@@ -1148,10 +1233,10 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
 		goto nla_put_failure;
 		goto nla_put_failure;
 
 
 	if ((swkey->tun_key.ipv4_dst || is_mask)) {
 	if ((swkey->tun_key.ipv4_dst || is_mask)) {
-		const struct geneve_opt *opts = NULL;
+		const void *opts = NULL;
 
 
 		if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
 		if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
-			opts = GENEVE_OPTS(output, swkey->tun_opts_len);
+			opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
 
 
 		if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
 		if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
 				       swkey->tun_opts_len))
 				       swkey->tun_opts_len))
@@ -1540,6 +1625,34 @@ void ovs_match_init(struct sw_flow_match *match,
 	}
 	}
 }
 }
 
 
+static int validate_geneve_opts(struct sw_flow_key *key)
+{
+	struct geneve_opt *option;
+	int opts_len = key->tun_opts_len;
+	bool crit_opt = false;
+
+	option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len);
+	while (opts_len > 0) {
+		int len;
+
+		if (opts_len < sizeof(*option))
+			return -EINVAL;
+
+		len = sizeof(*option) + option->length * 4;
+		if (len > opts_len)
+			return -EINVAL;
+
+		crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
+
+		option = (struct geneve_opt *)((u8 *)option + len);
+		opts_len -= len;
+	};
+
+	key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
+
+	return 0;
+}
+
 static int validate_and_copy_set_tun(const struct nlattr *attr,
 static int validate_and_copy_set_tun(const struct nlattr *attr,
 				     struct sw_flow_actions **sfa, bool log)
 				     struct sw_flow_actions **sfa, bool log)
 {
 {
@@ -1547,36 +1660,23 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	struct sw_flow_key key;
 	struct sw_flow_key key;
 	struct ovs_tunnel_info *tun_info;
 	struct ovs_tunnel_info *tun_info;
 	struct nlattr *a;
 	struct nlattr *a;
-	int err, start;
+	int err, start, opts_type;
 
 
 	ovs_match_init(&match, &key, NULL);
 	ovs_match_init(&match, &key, NULL);
-	err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
-	if (err)
-		return err;
+	opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
+	if (opts_type < 0)
+		return opts_type;
 
 
 	if (key.tun_opts_len) {
 	if (key.tun_opts_len) {
-		struct geneve_opt *option = GENEVE_OPTS(&key,
-							key.tun_opts_len);
-		int opts_len = key.tun_opts_len;
-		bool crit_opt = false;
-
-		while (opts_len > 0) {
-			int len;
-
-			if (opts_len < sizeof(*option))
-				return -EINVAL;
-
-			len = sizeof(*option) + option->length * 4;
-			if (len > opts_len)
-				return -EINVAL;
-
-			crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
-
-			option = (struct geneve_opt *)((u8 *)option + len);
-			opts_len -= len;
-		};
-
-		key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
+		switch (opts_type) {
+		case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+			err = validate_geneve_opts(&key);
+			if (err < 0)
+				return err;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
+			break;
+		}
 	};
 	};
 
 
 	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
 	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
@@ -1597,9 +1697,9 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 		 * everything else will go away after flow setup. We can append
 		 * everything else will go away after flow setup. We can append
 		 * it to tun_info and then point there.
 		 * it to tun_info and then point there.
 		 */
 		 */
-		memcpy((tun_info + 1), GENEVE_OPTS(&key, key.tun_opts_len),
-		       key.tun_opts_len);
-		tun_info->options = (struct geneve_opt *)(tun_info + 1);
+		memcpy((tun_info + 1),
+		       TUN_METADATA_OPTS(&key, key.tun_opts_len), key.tun_opts_len);
+		tun_info->options = (tun_info + 1);
 	} else {
 	} else {
 		tun_info->options = NULL;
 		tun_info->options = NULL;
 	}
 	}
@@ -1622,8 +1722,8 @@ static int validate_set(const struct nlattr *a,
 		return -EINVAL;
 		return -EINVAL;
 
 
 	if (key_type > OVS_KEY_ATTR_MAX ||
 	if (key_type > OVS_KEY_ATTR_MAX ||
-	    (ovs_key_lens[key_type] != nla_len(ovs_key) &&
-	     ovs_key_lens[key_type] != -1))
+	    (ovs_key_lens[key_type].len != nla_len(ovs_key) &&
+	     ovs_key_lens[key_type].len != OVS_ATTR_NESTED))
 		return -EINVAL;
 		return -EINVAL;
 
 
 	switch (key_type) {
 	switch (key_type) {

+ 11 - 4
net/openvswitch/vport-geneve.c

@@ -88,7 +88,7 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
 
 
 	opts_len = geneveh->opt_len * 4;
 	opts_len = geneveh->opt_len * 4;
 
 
-	flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT |
+	flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
 		(udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
 		(udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
 		(geneveh->oam ? TUNNEL_OAM : 0) |
 		(geneveh->oam ? TUNNEL_OAM : 0) |
 		(geneveh->critical ? TUNNEL_CRIT_OPT : 0);
 		(geneveh->critical ? TUNNEL_CRIT_OPT : 0);
@@ -178,7 +178,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
 	__be16 sport;
 	__be16 sport;
 	struct rtable *rt;
 	struct rtable *rt;
 	struct flowi4 fl;
 	struct flowi4 fl;
-	u8 vni[3];
+	u8 vni[3], opts_len, *opts;
 	__be16 df;
 	__be16 df;
 	int err;
 	int err;
 
 
@@ -200,11 +200,18 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
 	tunnel_id_to_vni(tun_key->tun_id, vni);
 	tunnel_id_to_vni(tun_key->tun_id, vni);
 	skb->ignore_df = 1;
 	skb->ignore_df = 1;
 
 
+	if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) {
+		opts = (u8 *)tun_info->options;
+		opts_len = tun_info->options_len;
+	} else {
+		opts = NULL;
+		opts_len = 0;
+	}
+
 	err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr,
 	err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr,
 			      tun_key->ipv4_dst, tun_key->ipv4_tos,
 			      tun_key->ipv4_dst, tun_key->ipv4_tos,
 			      tun_key->ipv4_ttl, df, sport, dport,
 			      tun_key->ipv4_ttl, df, sport, dport,
-			      tun_key->tun_flags, vni,
-			      tun_info->options_len, (u8 *)tun_info->options,
+			      tun_key->tun_flags, vni, opts_len, opts,
 			      false);
 			      false);
 	if (err < 0)
 	if (err < 0)
 		ip_rt_put(rt);
 		ip_rt_put(rt);

+ 86 - 5
net/openvswitch/vport-vxlan.c

@@ -40,6 +40,7 @@
 
 
 #include "datapath.h"
 #include "datapath.h"
 #include "vport.h"
 #include "vport.h"
+#include "vport-vxlan.h"
 
 
 /**
 /**
  * struct vxlan_port - Keeps track of open UDP ports
  * struct vxlan_port - Keeps track of open UDP ports
@@ -49,6 +50,7 @@
 struct vxlan_port {
 struct vxlan_port {
 	struct vxlan_sock *vs;
 	struct vxlan_sock *vs;
 	char name[IFNAMSIZ];
 	char name[IFNAMSIZ];
+	u32 exts; /* VXLAN_F_* in <net/vxlan.h> */
 };
 };
 
 
 static struct vport_ops ovs_vxlan_vport_ops;
 static struct vport_ops ovs_vxlan_vport_ops;
@@ -59,19 +61,30 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
 }
 }
 
 
 /* Called with rcu_read_lock and BH disabled. */
 /* Called with rcu_read_lock and BH disabled. */
-static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
+static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
+		      struct vxlan_metadata *md)
 {
 {
 	struct ovs_tunnel_info tun_info;
 	struct ovs_tunnel_info tun_info;
+	struct vxlan_port *vxlan_port;
 	struct vport *vport = vs->data;
 	struct vport *vport = vs->data;
 	struct iphdr *iph;
 	struct iphdr *iph;
+	struct ovs_vxlan_opts opts = {
+		.gbp = md->gbp,
+	};
 	__be64 key;
 	__be64 key;
+	__be16 flags;
+
+	flags = TUNNEL_KEY;
+	vxlan_port = vxlan_vport(vport);
+	if (vxlan_port->exts & VXLAN_F_GBP)
+		flags |= TUNNEL_VXLAN_OPT;
 
 
 	/* Save outer tunnel values */
 	/* Save outer tunnel values */
 	iph = ip_hdr(skb);
 	iph = ip_hdr(skb);
-	key = cpu_to_be64(ntohl(vx_vni) >> 8);
+	key = cpu_to_be64(ntohl(md->vni) >> 8);
 	ovs_flow_tun_info_init(&tun_info, iph,
 	ovs_flow_tun_info_init(&tun_info, iph,
 			       udp_hdr(skb)->source, udp_hdr(skb)->dest,
 			       udp_hdr(skb)->source, udp_hdr(skb)->dest,
-			       key, TUNNEL_KEY, NULL, 0);
+			       key, flags, &opts, sizeof(opts));
 
 
 	ovs_vport_receive(vport, skb, &tun_info);
 	ovs_vport_receive(vport, skb, &tun_info);
 }
 }
@@ -83,6 +96,21 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
 
 
 	if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
 	if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
 		return -EMSGSIZE;
 		return -EMSGSIZE;
+
+	if (vxlan_port->exts) {
+		struct nlattr *exts;
+
+		exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION);
+		if (!exts)
+			return -EMSGSIZE;
+
+		if (vxlan_port->exts & VXLAN_F_GBP &&
+		    nla_put_flag(skb, OVS_VXLAN_EXT_GBP))
+			return -EMSGSIZE;
+
+		nla_nest_end(skb, exts);
+	}
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -95,6 +123,31 @@ static void vxlan_tnl_destroy(struct vport *vport)
 	ovs_vport_deferred_free(vport);
 	ovs_vport_deferred_free(vport);
 }
 }
 
 
+static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX+1] = {
+	[OVS_VXLAN_EXT_GBP]	= { .type = NLA_FLAG, },
+};
+
+static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr)
+{
+	struct nlattr *exts[OVS_VXLAN_EXT_MAX+1];
+	struct vxlan_port *vxlan_port;
+	int err;
+
+	if (nla_len(attr) < sizeof(struct nlattr))
+		return -EINVAL;
+
+	err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy);
+	if (err < 0)
+		return err;
+
+	vxlan_port = vxlan_vport(vport);
+
+	if (exts[OVS_VXLAN_EXT_GBP])
+		vxlan_port->exts |= VXLAN_F_GBP;
+
+	return 0;
+}
+
 static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
 static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
 {
 {
 	struct net *net = ovs_dp_get_net(parms->dp);
 	struct net *net = ovs_dp_get_net(parms->dp);
@@ -127,7 +180,17 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
 	vxlan_port = vxlan_vport(vport);
 	vxlan_port = vxlan_vport(vport);
 	strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
 	strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
 
 
-	vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0);
+	a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION);
+	if (a) {
+		err = vxlan_configure_exts(vport, a);
+		if (err) {
+			ovs_vport_free(vport);
+			goto error;
+		}
+	}
+
+	vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true,
+			    vxlan_port->exts);
 	if (IS_ERR(vs)) {
 	if (IS_ERR(vs)) {
 		ovs_vport_free(vport);
 		ovs_vport_free(vport);
 		return (void *)vs;
 		return (void *)vs;
@@ -140,12 +203,28 @@ error:
 	return ERR_PTR(err);
 	return ERR_PTR(err);
 }
 }
 
 
+static int vxlan_ext_gbp(struct sk_buff *skb)
+{
+	const struct ovs_tunnel_info *tun_info;
+	const struct ovs_vxlan_opts *opts;
+
+	tun_info = OVS_CB(skb)->egress_tun_info;
+	opts = tun_info->options;
+
+	if (tun_info->tunnel.tun_flags & TUNNEL_VXLAN_OPT &&
+	    tun_info->options_len >= sizeof(*opts))
+		return opts->gbp;
+	else
+		return 0;
+}
+
 static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 {
 {
 	struct net *net = ovs_dp_get_net(vport->dp);
 	struct net *net = ovs_dp_get_net(vport->dp);
 	struct vxlan_port *vxlan_port = vxlan_vport(vport);
 	struct vxlan_port *vxlan_port = vxlan_vport(vport);
 	__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
 	__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
 	const struct ovs_key_ipv4_tunnel *tun_key;
 	const struct ovs_key_ipv4_tunnel *tun_key;
+	struct vxlan_metadata md = {0};
 	struct rtable *rt;
 	struct rtable *rt;
 	struct flowi4 fl;
 	struct flowi4 fl;
 	__be16 src_port;
 	__be16 src_port;
@@ -170,12 +249,14 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	skb->ignore_df = 1;
 	skb->ignore_df = 1;
 
 
 	src_port = udp_flow_src_port(net, skb, 0, 0, true);
 	src_port = udp_flow_src_port(net, skb, 0, 0, true);
+	md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
+	md.gbp = vxlan_ext_gbp(skb);
 
 
 	err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
 	err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
 			     fl.saddr, tun_key->ipv4_dst,
 			     fl.saddr, tun_key->ipv4_dst,
 			     tun_key->ipv4_tos, tun_key->ipv4_ttl, df,
 			     tun_key->ipv4_tos, tun_key->ipv4_ttl, df,
 			     src_port, dst_port,
 			     src_port, dst_port,
-			     htonl(be64_to_cpu(tun_key->tun_id) << 8),
+			     &md,
 			     false);
 			     false);
 	if (err < 0)
 	if (err < 0)
 		ip_rt_put(rt);
 		ip_rt_put(rt);

+ 11 - 0
net/openvswitch/vport-vxlan.h

@@ -0,0 +1,11 @@
+#ifndef VPORT_VXLAN_H
+#define VPORT_VXLAN_H 1
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+struct ovs_vxlan_opts {
+	__u32 gbp;
+};
+
+#endif