|
@@ -75,8 +75,7 @@ static struct rtnl_link_ops vxlan_link_ops;
|
|
|
|
|
|
static const u8 all_zeros_mac[ETH_ALEN];
|
|
|
|
|
|
-static struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
|
|
|
- bool no_share, u32 flags);
|
|
|
+static int vxlan_sock_add(struct vxlan_dev *vxlan);
|
|
|
|
|
|
/* per-network namespace private data for this module */
|
|
|
struct vxlan_net {
|
|
@@ -994,19 +993,30 @@ static bool vxlan_snoop(struct net_device *dev,
|
|
|
static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
|
|
|
{
|
|
|
struct vxlan_dev *vxlan;
|
|
|
+ unsigned short family = dev->default_dst.remote_ip.sa.sa_family;
|
|
|
|
|
|
/* The vxlan_sock is only used by dev, leaving group has
|
|
|
* no effect on other vxlan devices.
|
|
|
*/
|
|
|
- if (atomic_read(&dev->vn_sock->refcnt) == 1)
|
|
|
+ if (family == AF_INET && dev->vn4_sock &&
|
|
|
+ atomic_read(&dev->vn4_sock->refcnt) == 1)
|
|
|
return false;
|
|
|
+#if IS_ENABLED(CONFIG_IPV6)
|
|
|
+ if (family == AF_INET6 && dev->vn6_sock &&
|
|
|
+ atomic_read(&dev->vn6_sock->refcnt) == 1)
|
|
|
+ return false;
|
|
|
+#endif
|
|
|
|
|
|
list_for_each_entry(vxlan, &vn->vxlan_list, next) {
|
|
|
if (!netif_running(vxlan->dev) || vxlan == dev)
|
|
|
continue;
|
|
|
|
|
|
- if (vxlan->vn_sock != dev->vn_sock)
|
|
|
+ if (family == AF_INET && vxlan->vn4_sock != dev->vn4_sock)
|
|
|
continue;
|
|
|
+#if IS_ENABLED(CONFIG_IPV6)
|
|
|
+ if (family == AF_INET6 && vxlan->vn6_sock != dev->vn6_sock)
|
|
|
+ continue;
|
|
|
+#endif
|
|
|
|
|
|
if (!vxlan_addr_equal(&vxlan->default_dst.remote_ip,
|
|
|
&dev->default_dst.remote_ip))
|
|
@@ -1022,15 +1032,16 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
-static void vxlan_sock_release(struct vxlan_sock *vs)
|
|
|
+static void __vxlan_sock_release(struct vxlan_sock *vs)
|
|
|
{
|
|
|
- struct sock *sk = vs->sock->sk;
|
|
|
- struct net *net = sock_net(sk);
|
|
|
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
|
|
|
+ struct vxlan_net *vn;
|
|
|
|
|
|
+ if (!vs)
|
|
|
+ return;
|
|
|
if (!atomic_dec_and_test(&vs->refcnt))
|
|
|
return;
|
|
|
|
|
|
+ vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id);
|
|
|
spin_lock(&vn->sock_lock);
|
|
|
hlist_del_rcu(&vs->hlist);
|
|
|
vxlan_notify_del_rx_port(vs);
|
|
@@ -1039,32 +1050,43 @@ static void vxlan_sock_release(struct vxlan_sock *vs)
|
|
|
queue_work(vxlan_wq, &vs->del_work);
|
|
|
}
|
|
|
|
|
|
+static void vxlan_sock_release(struct vxlan_dev *vxlan)
|
|
|
+{
|
|
|
+ __vxlan_sock_release(vxlan->vn4_sock);
|
|
|
+#if IS_ENABLED(CONFIG_IPV6)
|
|
|
+ __vxlan_sock_release(vxlan->vn6_sock);
|
|
|
+#endif
|
|
|
+}
|
|
|
+
|
|
|
/* Update multicast group membership when first VNI on
|
|
|
* multicast address is brought up
|
|
|
*/
|
|
|
static int vxlan_igmp_join(struct vxlan_dev *vxlan)
|
|
|
{
|
|
|
- struct vxlan_sock *vs = vxlan->vn_sock;
|
|
|
- struct sock *sk = vs->sock->sk;
|
|
|
+ struct sock *sk;
|
|
|
union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
|
|
|
int ifindex = vxlan->default_dst.remote_ifindex;
|
|
|
int ret = -EINVAL;
|
|
|
|
|
|
- lock_sock(sk);
|
|
|
if (ip->sa.sa_family == AF_INET) {
|
|
|
struct ip_mreqn mreq = {
|
|
|
.imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
|
|
|
.imr_ifindex = ifindex,
|
|
|
};
|
|
|
|
|
|
+ sk = vxlan->vn4_sock->sock->sk;
|
|
|
+ lock_sock(sk);
|
|
|
ret = ip_mc_join_group(sk, &mreq);
|
|
|
+ release_sock(sk);
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
|
} else {
|
|
|
+ sk = vxlan->vn6_sock->sock->sk;
|
|
|
+ lock_sock(sk);
|
|
|
ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex,
|
|
|
&ip->sin6.sin6_addr);
|
|
|
+ release_sock(sk);
|
|
|
#endif
|
|
|
}
|
|
|
- release_sock(sk);
|
|
|
|
|
|
return ret;
|
|
|
}
|
|
@@ -1072,27 +1094,30 @@ static int vxlan_igmp_join(struct vxlan_dev *vxlan)
|
|
|
/* Inverse of vxlan_igmp_join when last VNI is brought down */
|
|
|
static int vxlan_igmp_leave(struct vxlan_dev *vxlan)
|
|
|
{
|
|
|
- struct vxlan_sock *vs = vxlan->vn_sock;
|
|
|
- struct sock *sk = vs->sock->sk;
|
|
|
+ struct sock *sk;
|
|
|
union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
|
|
|
int ifindex = vxlan->default_dst.remote_ifindex;
|
|
|
int ret = -EINVAL;
|
|
|
|
|
|
- lock_sock(sk);
|
|
|
if (ip->sa.sa_family == AF_INET) {
|
|
|
struct ip_mreqn mreq = {
|
|
|
.imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
|
|
|
.imr_ifindex = ifindex,
|
|
|
};
|
|
|
|
|
|
+ sk = vxlan->vn4_sock->sock->sk;
|
|
|
+ lock_sock(sk);
|
|
|
ret = ip_mc_leave_group(sk, &mreq);
|
|
|
+ release_sock(sk);
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
|
} else {
|
|
|
+ sk = vxlan->vn6_sock->sock->sk;
|
|
|
+ lock_sock(sk);
|
|
|
ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex,
|
|
|
&ip->sin6.sin6_addr);
|
|
|
+ release_sock(sk);
|
|
|
#endif
|
|
|
}
|
|
|
- release_sock(sk);
|
|
|
|
|
|
return ret;
|
|
|
}
|
|
@@ -1873,8 +1898,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
|
|
|
{
|
|
|
struct ip_tunnel_info *info;
|
|
|
struct vxlan_dev *vxlan = netdev_priv(dev);
|
|
|
- struct sock *sk = vxlan->vn_sock->sock->sk;
|
|
|
- unsigned short family = vxlan_get_sk_family(vxlan->vn_sock);
|
|
|
+ struct sock *sk;
|
|
|
struct rtable *rt = NULL;
|
|
|
const struct iphdr *old_iph;
|
|
|
struct flowi4 fl4;
|
|
@@ -1901,13 +1925,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
|
|
|
dev->name);
|
|
|
goto drop;
|
|
|
}
|
|
|
- if (family != ip_tunnel_info_af(info))
|
|
|
- goto drop;
|
|
|
-
|
|
|
dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
|
|
|
vni = be64_to_cpu(info->key.tun_id);
|
|
|
- remote_ip.sa.sa_family = family;
|
|
|
- if (family == AF_INET)
|
|
|
+ remote_ip.sa.sa_family = ip_tunnel_info_af(info);
|
|
|
+ if (remote_ip.sa.sa_family == AF_INET)
|
|
|
remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
|
|
|
else
|
|
|
remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
|
|
@@ -1952,6 +1973,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
|
|
|
}
|
|
|
|
|
|
if (dst->sa.sa_family == AF_INET) {
|
|
|
+ if (!vxlan->vn4_sock)
|
|
|
+ goto drop;
|
|
|
+ sk = vxlan->vn4_sock->sock->sk;
|
|
|
+
|
|
|
if (info && (info->key.tun_flags & TUNNEL_DONT_FRAGMENT))
|
|
|
df = htons(IP_DF);
|
|
|
|
|
@@ -2013,6 +2038,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
|
|
|
struct flowi6 fl6;
|
|
|
u32 rt6i_flags;
|
|
|
|
|
|
+ if (!vxlan->vn6_sock)
|
|
|
+ goto drop;
|
|
|
+ sk = vxlan->vn6_sock->sock->sk;
|
|
|
+
|
|
|
memset(&fl6, 0, sizeof(fl6));
|
|
|
fl6.flowi6_oif = rdst ? rdst->remote_ifindex : 0;
|
|
|
fl6.daddr = dst->sin6.sin6_addr;
|
|
@@ -2204,7 +2233,6 @@ static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
|
|
|
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
|
|
|
__u32 vni = vxlan->default_dst.remote_vni;
|
|
|
|
|
|
- vxlan->vn_sock = vs;
|
|
|
spin_lock(&vn->sock_lock);
|
|
|
hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
|
|
|
spin_unlock(&vn->sock_lock);
|
|
@@ -2244,22 +2272,18 @@ static void vxlan_uninit(struct net_device *dev)
|
|
|
static int vxlan_open(struct net_device *dev)
|
|
|
{
|
|
|
struct vxlan_dev *vxlan = netdev_priv(dev);
|
|
|
- struct vxlan_sock *vs;
|
|
|
- int ret = 0;
|
|
|
+ int ret;
|
|
|
|
|
|
- vs = vxlan_sock_add(vxlan->net, vxlan->cfg.dst_port,
|
|
|
- vxlan->cfg.no_share, vxlan->flags);
|
|
|
- if (IS_ERR(vs))
|
|
|
- return PTR_ERR(vs);
|
|
|
-
|
|
|
- vxlan_vs_add_dev(vs, vxlan);
|
|
|
+ ret = vxlan_sock_add(vxlan);
|
|
|
+ if (ret < 0)
|
|
|
+ return ret;
|
|
|
|
|
|
if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) {
|
|
|
ret = vxlan_igmp_join(vxlan);
|
|
|
if (ret == -EADDRINUSE)
|
|
|
ret = 0;
|
|
|
if (ret) {
|
|
|
- vxlan_sock_release(vs);
|
|
|
+ vxlan_sock_release(vxlan);
|
|
|
return ret;
|
|
|
}
|
|
|
}
|
|
@@ -2294,7 +2318,6 @@ static int vxlan_stop(struct net_device *dev)
|
|
|
{
|
|
|
struct vxlan_dev *vxlan = netdev_priv(dev);
|
|
|
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
|
|
|
- struct vxlan_sock *vs = vxlan->vn_sock;
|
|
|
int ret = 0;
|
|
|
|
|
|
if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
|
|
@@ -2304,7 +2327,7 @@ static int vxlan_stop(struct net_device *dev)
|
|
|
del_timer_sync(&vxlan->age_timer);
|
|
|
|
|
|
vxlan_flush(vxlan);
|
|
|
- vxlan_sock_release(vs);
|
|
|
+ vxlan_sock_release(vxlan);
|
|
|
|
|
|
return ret;
|
|
|
}
|
|
@@ -2540,14 +2563,13 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
|
|
|
}
|
|
|
|
|
|
/* Create new listen socket if needed */
|
|
|
-static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
|
|
|
- u32 flags)
|
|
|
+static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
|
|
|
+ __be16 port, u32 flags)
|
|
|
{
|
|
|
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
|
|
|
struct vxlan_sock *vs;
|
|
|
struct socket *sock;
|
|
|
unsigned int h;
|
|
|
- bool ipv6 = !!(flags & VXLAN_F_IPV6);
|
|
|
struct udp_tunnel_sock_cfg tunnel_cfg;
|
|
|
|
|
|
vs = kzalloc(sizeof(*vs), GFP_KERNEL);
|
|
@@ -2592,27 +2614,53 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
|
|
|
return vs;
|
|
|
}
|
|
|
|
|
|
-static struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
|
|
|
- bool no_share, u32 flags)
|
|
|
+static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
|
|
|
{
|
|
|
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
|
|
|
- struct vxlan_sock *vs;
|
|
|
- bool ipv6 = flags & VXLAN_F_IPV6;
|
|
|
+ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
|
|
|
+ struct vxlan_sock *vs = NULL;
|
|
|
|
|
|
- if (!no_share) {
|
|
|
+ if (!vxlan->cfg.no_share) {
|
|
|
spin_lock(&vn->sock_lock);
|
|
|
- vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port,
|
|
|
- flags);
|
|
|
- if (vs) {
|
|
|
- if (!atomic_add_unless(&vs->refcnt, 1, 0))
|
|
|
- vs = ERR_PTR(-EBUSY);
|
|
|
+ vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
|
|
|
+ vxlan->cfg.dst_port, vxlan->flags);
|
|
|
+ if (vs && !atomic_add_unless(&vs->refcnt, 1, 0)) {
|
|
|
spin_unlock(&vn->sock_lock);
|
|
|
- return vs;
|
|
|
+ return -EBUSY;
|
|
|
}
|
|
|
spin_unlock(&vn->sock_lock);
|
|
|
}
|
|
|
+ if (!vs)
|
|
|
+ vs = vxlan_socket_create(vxlan->net, ipv6,
|
|
|
+ vxlan->cfg.dst_port, vxlan->flags);
|
|
|
+ if (IS_ERR(vs))
|
|
|
+ return PTR_ERR(vs);
|
|
|
+#if IS_ENABLED(CONFIG_IPV6)
|
|
|
+ if (ipv6)
|
|
|
+ vxlan->vn6_sock = vs;
|
|
|
+ else
|
|
|
+#endif
|
|
|
+ vxlan->vn4_sock = vs;
|
|
|
+ vxlan_vs_add_dev(vs, vxlan);
|
|
|
+ return 0;
|
|
|
+}
|
|
|
|
|
|
- return vxlan_socket_create(net, port, flags);
|
|
|
+static int vxlan_sock_add(struct vxlan_dev *vxlan)
|
|
|
+{
|
|
|
+ bool ipv6 = vxlan->flags & VXLAN_F_IPV6;
|
|
|
+ bool metadata = vxlan->flags & VXLAN_F_COLLECT_METADATA;
|
|
|
+ int ret = 0;
|
|
|
+
|
|
|
+ vxlan->vn4_sock = NULL;
|
|
|
+#if IS_ENABLED(CONFIG_IPV6)
|
|
|
+ vxlan->vn6_sock = NULL;
|
|
|
+ if (ipv6 || metadata)
|
|
|
+ ret = __vxlan_sock_add(vxlan, true);
|
|
|
+#endif
|
|
|
+ if (!ret && (!ipv6 || metadata))
|
|
|
+ ret = __vxlan_sock_add(vxlan, false);
|
|
|
+ if (ret < 0)
|
|
|
+ vxlan_sock_release(vxlan);
|
|
|
+ return ret;
|
|
|
}
|
|
|
|
|
|
static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
|
|
@@ -2621,6 +2669,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
|
|
|
struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
|
|
|
struct vxlan_dev *vxlan = netdev_priv(dev);
|
|
|
struct vxlan_rdst *dst = &vxlan->default_dst;
|
|
|
+ unsigned short needed_headroom = ETH_HLEN;
|
|
|
int err;
|
|
|
bool use_ipv6 = false;
|
|
|
__be16 default_port = vxlan->cfg.dst_port;
|
|
@@ -2640,6 +2689,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
|
|
|
if (!IS_ENABLED(CONFIG_IPV6))
|
|
|
return -EPFNOSUPPORT;
|
|
|
use_ipv6 = true;
|
|
|
+ vxlan->flags |= VXLAN_F_IPV6;
|
|
|
}
|
|
|
|
|
|
if (conf->remote_ifindex) {
|
|
@@ -2660,22 +2710,21 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
|
|
|
pr_info("IPv6 is disabled via sysctl\n");
|
|
|
return -EPERM;
|
|
|
}
|
|
|
- vxlan->flags |= VXLAN_F_IPV6;
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
if (!conf->mtu)
|
|
|
dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
|
|
|
|
|
|
- dev->needed_headroom = lowerdev->hard_header_len +
|
|
|
- (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
|
|
|
- } else if (use_ipv6) {
|
|
|
- vxlan->flags |= VXLAN_F_IPV6;
|
|
|
- dev->needed_headroom = ETH_HLEN + VXLAN6_HEADROOM;
|
|
|
- } else {
|
|
|
- dev->needed_headroom = ETH_HLEN + VXLAN_HEADROOM;
|
|
|
+ needed_headroom = lowerdev->hard_header_len;
|
|
|
}
|
|
|
|
|
|
+ if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
|
|
|
+ needed_headroom += VXLAN6_HEADROOM;
|
|
|
+ else
|
|
|
+ needed_headroom += VXLAN_HEADROOM;
|
|
|
+ dev->needed_headroom = needed_headroom;
|
|
|
+
|
|
|
memcpy(&vxlan->cfg, conf, sizeof(*conf));
|
|
|
if (!vxlan->cfg.dst_port)
|
|
|
vxlan->cfg.dst_port = default_port;
|