Browse Source

Merge tag 'ipvs2-for-v4.3' of https://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next

Simon Horman says:

====================
Second Round of IPVS Updates for v4.3

I realise these are a little late in the cycle, so if you would prefer
me to repost them for v4.4 then just let me know.

The updates include:
* A new scheduler from Raducu Deaconu
* Enhanced configurability of the sync daemon from Julian Anastasov
====================

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Pablo Neira Ayuso 10 years ago
parent
commit
1b383bf912

+ 15 - 8
include/net/ip_vs.h

@@ -846,6 +846,17 @@ struct ipvs_master_sync_state {
 /* How much time to keep dests in trash */
 /* How much time to keep dests in trash */
 #define IP_VS_DEST_TRASH_PERIOD		(120 * HZ)
 #define IP_VS_DEST_TRASH_PERIOD		(120 * HZ)
 
 
+struct ipvs_sync_daemon_cfg {
+	union nf_inet_addr	mcast_group;
+	int			syncid;
+	u16			sync_maxlen;
+	u16			mcast_port;
+	u8			mcast_af;
+	u8			mcast_ttl;
+	/* multicast interface name */
+	char			mcast_ifn[IP_VS_IFNAME_MAXLEN];
+};
+
 /* IPVS in network namespace */
 /* IPVS in network namespace */
 struct netns_ipvs {
 struct netns_ipvs {
 	int			gen;		/* Generation */
 	int			gen;		/* Generation */
@@ -961,15 +972,10 @@ struct netns_ipvs {
 	spinlock_t		sync_buff_lock;
 	spinlock_t		sync_buff_lock;
 	struct task_struct	**backup_threads;
 	struct task_struct	**backup_threads;
 	int			threads_mask;
 	int			threads_mask;
-	int			send_mesg_maxlen;
-	int			recv_mesg_maxlen;
 	volatile int		sync_state;
 	volatile int		sync_state;
-	volatile int		master_syncid;
-	volatile int		backup_syncid;
 	struct mutex		sync_mutex;
 	struct mutex		sync_mutex;
-	/* multicast interface name */
-	char			master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-	char			backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+	struct ipvs_sync_daemon_cfg	mcfg;	/* Master Configuration */
+	struct ipvs_sync_daemon_cfg	bcfg;	/* Backup Configuration */
 	/* net name space ptr */
 	/* net name space ptr */
 	struct net		*net;            /* Needed by timer routines */
 	struct net		*net;            /* Needed by timer routines */
 	/* Number of heterogeneous destinations, needed becaus heterogeneous
 	/* Number of heterogeneous destinations, needed becaus heterogeneous
@@ -1408,7 +1414,8 @@ static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest)
 /* IPVS sync daemon data and function prototypes
 /* IPVS sync daemon data and function prototypes
  * (from ip_vs_sync.c)
  * (from ip_vs_sync.c)
  */
  */
-int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid);
+int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *cfg,
+		      int state);
 int stop_sync_thread(struct net *net, int state);
 int stop_sync_thread(struct net *net, int state);
 void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts);
 void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts);
 
 

+ 5 - 0
include/uapi/linux/ip_vs.h

@@ -406,6 +406,11 @@ enum {
 	IPVS_DAEMON_ATTR_STATE,		/* sync daemon state (master/backup) */
 	IPVS_DAEMON_ATTR_STATE,		/* sync daemon state (master/backup) */
 	IPVS_DAEMON_ATTR_MCAST_IFN,	/* multicast interface name */
 	IPVS_DAEMON_ATTR_MCAST_IFN,	/* multicast interface name */
 	IPVS_DAEMON_ATTR_SYNC_ID,	/* SyncID we belong to */
 	IPVS_DAEMON_ATTR_SYNC_ID,	/* SyncID we belong to */
+	IPVS_DAEMON_ATTR_SYNC_MAXLEN,	/* UDP Payload Size */
+	IPVS_DAEMON_ATTR_MCAST_GROUP,	/* IPv4 Multicast Address */
+	IPVS_DAEMON_ATTR_MCAST_GROUP6,	/* IPv6 Multicast Address */
+	IPVS_DAEMON_ATTR_MCAST_PORT,	/* Multicast Port (base) */
+	IPVS_DAEMON_ATTR_MCAST_TTL,	/* Multicast TTL */
 	__IPVS_DAEMON_ATTR_MAX,
 	__IPVS_DAEMON_ATTR_MAX,
 };
 };
 
 

+ 11 - 0
net/netfilter/ipvs/Kconfig

@@ -162,6 +162,17 @@ config  IP_VS_FO
 	  If you want to compile it in kernel, say Y. To compile it as a
 	  If you want to compile it in kernel, say Y. To compile it as a
 	  module, choose M here. If unsure, say N.
 	  module, choose M here. If unsure, say N.
 
 
+config  IP_VS_OVF
+	tristate "weighted overflow scheduling"
+	---help---
+	  The weighted overflow scheduling algorithm directs network
+	  connections to the server with the highest weight that is
+	  currently available and overflows to the next when active
+	  connections exceed the node's weight.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
 config	IP_VS_LBLC
 config	IP_VS_LBLC
 	tristate "locality-based least-connection scheduling"
 	tristate "locality-based least-connection scheduling"
 	---help---
 	---help---

+ 1 - 0
net/netfilter/ipvs/Makefile

@@ -27,6 +27,7 @@ obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o
 obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
 obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
 obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
 obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
 obj-$(CONFIG_IP_VS_FO) += ip_vs_fo.o
 obj-$(CONFIG_IP_VS_FO) += ip_vs_fo.o
+obj-$(CONFIG_IP_VS_OVF) += ip_vs_ovf.o
 obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
 obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
 obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
 obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
 obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
 obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o

+ 110 - 33
net/netfilter/ipvs/ip_vs_ctl.c

@@ -2335,13 +2335,23 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 	    cmd == IP_VS_SO_SET_STOPDAEMON) {
 	    cmd == IP_VS_SO_SET_STOPDAEMON) {
 		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
 		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
 
 
-		mutex_lock(&ipvs->sync_mutex);
-		if (cmd == IP_VS_SO_SET_STARTDAEMON)
-			ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
-						dm->syncid);
-		else
+		if (cmd == IP_VS_SO_SET_STARTDAEMON) {
+			struct ipvs_sync_daemon_cfg cfg;
+
+			memset(&cfg, 0, sizeof(cfg));
+			strlcpy(cfg.mcast_ifn, dm->mcast_ifn,
+				sizeof(cfg.mcast_ifn));
+			cfg.syncid = dm->syncid;
+			rtnl_lock();
+			mutex_lock(&ipvs->sync_mutex);
+			ret = start_sync_thread(net, &cfg, dm->state);
+			mutex_unlock(&ipvs->sync_mutex);
+			rtnl_unlock();
+		} else {
+			mutex_lock(&ipvs->sync_mutex);
 			ret = stop_sync_thread(net, dm->state);
 			ret = stop_sync_thread(net, dm->state);
-		mutex_unlock(&ipvs->sync_mutex);
+			mutex_unlock(&ipvs->sync_mutex);
+		}
 		goto out_dec;
 		goto out_dec;
 	}
 	}
 
 
@@ -2645,15 +2655,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		mutex_lock(&ipvs->sync_mutex);
 		mutex_lock(&ipvs->sync_mutex);
 		if (ipvs->sync_state & IP_VS_STATE_MASTER) {
 		if (ipvs->sync_state & IP_VS_STATE_MASTER) {
 			d[0].state = IP_VS_STATE_MASTER;
 			d[0].state = IP_VS_STATE_MASTER;
-			strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
+			strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn,
 				sizeof(d[0].mcast_ifn));
 				sizeof(d[0].mcast_ifn));
-			d[0].syncid = ipvs->master_syncid;
+			d[0].syncid = ipvs->mcfg.syncid;
 		}
 		}
 		if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
 		if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
 			d[1].state = IP_VS_STATE_BACKUP;
 			d[1].state = IP_VS_STATE_BACKUP;
-			strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
+			strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn,
 				sizeof(d[1].mcast_ifn));
 				sizeof(d[1].mcast_ifn));
-			d[1].syncid = ipvs->backup_syncid;
+			d[1].syncid = ipvs->bcfg.syncid;
 		}
 		}
 		if (copy_to_user(user, &d, sizeof(d)) != 0)
 		if (copy_to_user(user, &d, sizeof(d)) != 0)
 			ret = -EFAULT;
 			ret = -EFAULT;
@@ -2808,6 +2818,11 @@ static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
 	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
 	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
 					    .len = IP_VS_IFNAME_MAXLEN },
 					    .len = IP_VS_IFNAME_MAXLEN },
 	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
 	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
+	[IPVS_DAEMON_ATTR_SYNC_MAXLEN]	= { .type = NLA_U16 },
+	[IPVS_DAEMON_ATTR_MCAST_GROUP]	= { .type = NLA_U32 },
+	[IPVS_DAEMON_ATTR_MCAST_GROUP6]	= { .len = sizeof(struct in6_addr) },
+	[IPVS_DAEMON_ATTR_MCAST_PORT]	= { .type = NLA_U16 },
+	[IPVS_DAEMON_ATTR_MCAST_TTL]	= { .type = NLA_U8 },
 };
 };
 
 
 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
@@ -3266,7 +3281,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
 }
 }
 
 
 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
-				  const char *mcast_ifn, __u32 syncid)
+				  struct ipvs_sync_daemon_cfg *c)
 {
 {
 	struct nlattr *nl_daemon;
 	struct nlattr *nl_daemon;
 
 
@@ -3275,9 +3290,23 @@ static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
 		return -EMSGSIZE;
 		return -EMSGSIZE;
 
 
 	if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
 	if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
-	    nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) ||
-	    nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid))
+	    nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) ||
+	    nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) ||
+	    nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) ||
+	    nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) ||
+	    nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl))
 		goto nla_put_failure;
 		goto nla_put_failure;
+#ifdef CONFIG_IP_VS_IPV6
+	if (c->mcast_af == AF_INET6) {
+		if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6,
+				     &c->mcast_group.in6))
+			goto nla_put_failure;
+	} else
+#endif
+		if (c->mcast_af == AF_INET &&
+		    nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP,
+				    c->mcast_group.ip))
+			goto nla_put_failure;
 	nla_nest_end(skb, nl_daemon);
 	nla_nest_end(skb, nl_daemon);
 
 
 	return 0;
 	return 0;
@@ -3288,7 +3317,7 @@ nla_put_failure:
 }
 }
 
 
 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
-				  const char *mcast_ifn, __u32 syncid,
+				  struct ipvs_sync_daemon_cfg *c,
 				  struct netlink_callback *cb)
 				  struct netlink_callback *cb)
 {
 {
 	void *hdr;
 	void *hdr;
@@ -3298,7 +3327,7 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
 	if (!hdr)
 	if (!hdr)
 		return -EMSGSIZE;
 		return -EMSGSIZE;
 
 
-	if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
+	if (ip_vs_genl_fill_daemon(skb, state, c))
 		goto nla_put_failure;
 		goto nla_put_failure;
 
 
 	genlmsg_end(skb, hdr);
 	genlmsg_end(skb, hdr);
@@ -3318,8 +3347,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
 	mutex_lock(&ipvs->sync_mutex);
 	mutex_lock(&ipvs->sync_mutex);
 	if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
 	if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
 		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
 		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
-					   ipvs->master_mcast_ifn,
-					   ipvs->master_syncid, cb) < 0)
+					   &ipvs->mcfg, cb) < 0)
 			goto nla_put_failure;
 			goto nla_put_failure;
 
 
 		cb->args[0] = 1;
 		cb->args[0] = 1;
@@ -3327,8 +3355,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
 
 
 	if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
 	if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
 		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
 		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
-					   ipvs->backup_mcast_ifn,
-					   ipvs->backup_syncid, cb) < 0)
+					   &ipvs->bcfg, cb) < 0)
 			goto nla_put_failure;
 			goto nla_put_failure;
 
 
 		cb->args[1] = 1;
 		cb->args[1] = 1;
@@ -3342,30 +3369,83 @@ nla_put_failure:
 
 
 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
 {
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ipvs_sync_daemon_cfg c;
+	struct nlattr *a;
+	int ret;
+
+	memset(&c, 0, sizeof(c));
 	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
 	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
 	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
 	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
 	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
 	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
 		return -EINVAL;
 		return -EINVAL;
+	strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
+		sizeof(c.mcast_ifn));
+	c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]);
+
+	a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN];
+	if (a)
+		c.sync_maxlen = nla_get_u16(a);
+
+	a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP];
+	if (a) {
+		c.mcast_af = AF_INET;
+		c.mcast_group.ip = nla_get_in_addr(a);
+		if (!ipv4_is_multicast(c.mcast_group.ip))
+			return -EINVAL;
+	} else {
+		a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6];
+		if (a) {
+#ifdef CONFIG_IP_VS_IPV6
+			int addr_type;
+
+			c.mcast_af = AF_INET6;
+			c.mcast_group.in6 = nla_get_in6_addr(a);
+			addr_type = ipv6_addr_type(&c.mcast_group.in6);
+			if (!(addr_type & IPV6_ADDR_MULTICAST))
+				return -EINVAL;
+#else
+			return -EAFNOSUPPORT;
+#endif
+		}
+	}
+
+	a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT];
+	if (a)
+		c.mcast_port = nla_get_u16(a);
+
+	a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL];
+	if (a)
+		c.mcast_ttl = nla_get_u8(a);
 
 
 	/* The synchronization protocol is incompatible with mixed family
 	/* The synchronization protocol is incompatible with mixed family
 	 * services
 	 * services
 	 */
 	 */
-	if (net_ipvs(net)->mixed_address_family_dests > 0)
+	if (ipvs->mixed_address_family_dests > 0)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	return start_sync_thread(net,
-				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
-				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
-				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
+	rtnl_lock();
+	mutex_lock(&ipvs->sync_mutex);
+	ret = start_sync_thread(net, &c,
+				nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+	mutex_unlock(&ipvs->sync_mutex);
+	rtnl_unlock();
+	return ret;
 }
 }
 
 
 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
 {
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	int ret;
+
 	if (!attrs[IPVS_DAEMON_ATTR_STATE])
 	if (!attrs[IPVS_DAEMON_ATTR_STATE])
 		return -EINVAL;
 		return -EINVAL;
 
 
-	return stop_sync_thread(net,
-				nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+	mutex_lock(&ipvs->sync_mutex);
+	ret = stop_sync_thread(net,
+			       nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+	mutex_unlock(&ipvs->sync_mutex);
+	return ret;
 }
 }
 
 
 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
@@ -3389,7 +3469,7 @@ static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
 
 
 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
 {
 {
-	int ret = 0, cmd;
+	int ret = -EINVAL, cmd;
 	struct net *net;
 	struct net *net;
 	struct netns_ipvs *ipvs;
 	struct netns_ipvs *ipvs;
 
 
@@ -3400,22 +3480,19 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
 	if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
 	if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
 		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
 		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
 
 
-		mutex_lock(&ipvs->sync_mutex);
 		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
 		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
 		    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
 		    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
 				     info->attrs[IPVS_CMD_ATTR_DAEMON],
 				     info->attrs[IPVS_CMD_ATTR_DAEMON],
-				     ip_vs_daemon_policy)) {
-			ret = -EINVAL;
+				     ip_vs_daemon_policy))
 			goto out;
 			goto out;
-		}
 
 
 		if (cmd == IPVS_CMD_NEW_DAEMON)
 		if (cmd == IPVS_CMD_NEW_DAEMON)
 			ret = ip_vs_genl_new_daemon(net, daemon_attrs);
 			ret = ip_vs_genl_new_daemon(net, daemon_attrs);
 		else
 		else
 			ret = ip_vs_genl_del_daemon(net, daemon_attrs);
 			ret = ip_vs_genl_del_daemon(net, daemon_attrs);
-out:
-		mutex_unlock(&ipvs->sync_mutex);
 	}
 	}
+
+out:
 	return ret;
 	return ret;
 }
 }
 
 

+ 86 - 0
net/netfilter/ipvs/ip_vs_ovf.c

@@ -0,0 +1,86 @@
+/*
+ * IPVS:        Overflow-Connection Scheduling module
+ *
+ * Authors:     Raducu Deaconu <rhadoo_io@yahoo.com>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Scheduler implements "overflow" loadbalancing according to number of active
+ * connections , will keep all conections to the node with the highest weight
+ * and overflow to the next node if the number of connections exceeds the node's
+ * weight.
+ * Note that this scheduler might not be suitable for UDP because it only uses
+ * active connections
+ *
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+/* OVF Connection scheduling  */
+static struct ip_vs_dest *
+ip_vs_ovf_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		   struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_dest *dest, *h = NULL;
+	int hw = 0, w;
+
+	IP_VS_DBG(6, "ip_vs_ovf_schedule(): Scheduling...\n");
+	/* select the node with highest weight, go to next in line if active
+	* connections exceed weight
+	*/
+	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+		w = atomic_read(&dest->weight);
+		if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
+		    atomic_read(&dest->activeconns) > w ||
+		    w == 0)
+			continue;
+		if (!h || w > hw) {
+			h = dest;
+			hw = w;
+		}
+	}
+
+	if (h) {
+		IP_VS_DBG_BUF(6, "OVF: server %s:%u active %d w %d\n",
+			      IP_VS_DBG_ADDR(h->af, &h->addr),
+			      ntohs(h->port),
+			      atomic_read(&h->activeconns),
+			      atomic_read(&h->weight));
+		return h;
+	}
+
+	ip_vs_scheduler_err(svc, "no destination available");
+	return NULL;
+}
+
+static struct ip_vs_scheduler ip_vs_ovf_scheduler = {
+	.name =			"ovf",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_ovf_scheduler.n_list),
+	.schedule =		ip_vs_ovf_schedule,
+};
+
+static int __init ip_vs_ovf_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_ovf_scheduler);
+}
+
+static void __exit ip_vs_ovf_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_ovf_scheduler);
+	synchronize_rcu();
+}
+
+module_init(ip_vs_ovf_init);
+module_exit(ip_vs_ovf_cleanup);
+MODULE_LICENSE("GPL");

+ 174 - 95
net/netfilter/ipvs/ip_vs_sync.c

@@ -262,6 +262,11 @@ struct ip_vs_sync_mesg {
 	/* ip_vs_sync_conn entries start here */
 	/* ip_vs_sync_conn entries start here */
 };
 };
 
 
+union ipvs_sockaddr {
+	struct sockaddr_in	in;
+	struct sockaddr_in6	in6;
+};
+
 struct ip_vs_sync_buff {
 struct ip_vs_sync_buff {
 	struct list_head        list;
 	struct list_head        list;
 	unsigned long           firstuse;
 	unsigned long           firstuse;
@@ -320,26 +325,28 @@ sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
  * Create a new sync buffer for Version 1 proto.
  * Create a new sync buffer for Version 1 proto.
  */
  */
 static inline struct ip_vs_sync_buff *
 static inline struct ip_vs_sync_buff *
-ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
+ip_vs_sync_buff_create(struct netns_ipvs *ipvs, unsigned int len)
 {
 {
 	struct ip_vs_sync_buff *sb;
 	struct ip_vs_sync_buff *sb;
 
 
 	if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
 	if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
 		return NULL;
 		return NULL;
 
 
-	sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+	len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg),
+		    ipvs->mcfg.sync_maxlen);
+	sb->mesg = kmalloc(len, GFP_ATOMIC);
 	if (!sb->mesg) {
 	if (!sb->mesg) {
 		kfree(sb);
 		kfree(sb);
 		return NULL;
 		return NULL;
 	}
 	}
 	sb->mesg->reserved = 0;  /* old nr_conns i.e. must be zero now */
 	sb->mesg->reserved = 0;  /* old nr_conns i.e. must be zero now */
 	sb->mesg->version = SYNC_PROTO_VER;
 	sb->mesg->version = SYNC_PROTO_VER;
-	sb->mesg->syncid = ipvs->master_syncid;
+	sb->mesg->syncid = ipvs->mcfg.syncid;
 	sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));
 	sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));
 	sb->mesg->nr_conns = 0;
 	sb->mesg->nr_conns = 0;
 	sb->mesg->spare = 0;
 	sb->mesg->spare = 0;
 	sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
 	sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
-	sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
+	sb->end = (unsigned char *)sb->mesg + len;
 
 
 	sb->firstuse = jiffies;
 	sb->firstuse = jiffies;
 	return sb;
 	return sb;
@@ -402,7 +409,7 @@ select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)
  * Create a new sync buffer for Version 0 proto.
  * Create a new sync buffer for Version 0 proto.
  */
  */
 static inline struct ip_vs_sync_buff *
 static inline struct ip_vs_sync_buff *
-ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
+ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs, unsigned int len)
 {
 {
 	struct ip_vs_sync_buff *sb;
 	struct ip_vs_sync_buff *sb;
 	struct ip_vs_sync_mesg_v0 *mesg;
 	struct ip_vs_sync_mesg_v0 *mesg;
@@ -410,17 +417,19 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
 	if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
 	if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
 		return NULL;
 		return NULL;
 
 
-	sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+	len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg_v0),
+		    ipvs->mcfg.sync_maxlen);
+	sb->mesg = kmalloc(len, GFP_ATOMIC);
 	if (!sb->mesg) {
 	if (!sb->mesg) {
 		kfree(sb);
 		kfree(sb);
 		return NULL;
 		return NULL;
 	}
 	}
 	mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
 	mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
 	mesg->nr_conns = 0;
 	mesg->nr_conns = 0;
-	mesg->syncid = ipvs->master_syncid;
+	mesg->syncid = ipvs->mcfg.syncid;
 	mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));
 	mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));
 	sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
 	sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
-	sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
+	sb->end = (unsigned char *)mesg + len;
 	sb->firstuse = jiffies;
 	sb->firstuse = jiffies;
 	return sb;
 	return sb;
 }
 }
@@ -533,7 +542,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
 	struct ip_vs_sync_buff *buff;
 	struct ip_vs_sync_buff *buff;
 	struct ipvs_master_sync_state *ms;
 	struct ipvs_master_sync_state *ms;
 	int id;
 	int id;
-	int len;
+	unsigned int len;
 
 
 	if (unlikely(cp->af != AF_INET))
 	if (unlikely(cp->af != AF_INET))
 		return;
 		return;
@@ -553,17 +562,19 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
 	id = select_master_thread_id(ipvs, cp);
 	id = select_master_thread_id(ipvs, cp);
 	ms = &ipvs->ms[id];
 	ms = &ipvs->ms[id];
 	buff = ms->sync_buff;
 	buff = ms->sync_buff;
+	len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
+		SIMPLE_CONN_SIZE;
 	if (buff) {
 	if (buff) {
 		m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
 		m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
 		/* Send buffer if it is for v1 */
 		/* Send buffer if it is for v1 */
-		if (!m->nr_conns) {
+		if (buff->head + len > buff->end || !m->nr_conns) {
 			sb_queue_tail(ipvs, ms);
 			sb_queue_tail(ipvs, ms);
 			ms->sync_buff = NULL;
 			ms->sync_buff = NULL;
 			buff = NULL;
 			buff = NULL;
 		}
 		}
 	}
 	}
 	if (!buff) {
 	if (!buff) {
-		buff = ip_vs_sync_buff_create_v0(ipvs);
+		buff = ip_vs_sync_buff_create_v0(ipvs, len);
 		if (!buff) {
 		if (!buff) {
 			spin_unlock_bh(&ipvs->sync_buff_lock);
 			spin_unlock_bh(&ipvs->sync_buff_lock);
 			pr_err("ip_vs_sync_buff_create failed.\n");
 			pr_err("ip_vs_sync_buff_create failed.\n");
@@ -572,8 +583,6 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
 		ms->sync_buff = buff;
 		ms->sync_buff = buff;
 	}
 	}
 
 
-	len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
-		SIMPLE_CONN_SIZE;
 	m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
 	m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
 	s = (struct ip_vs_sync_conn_v0 *) buff->head;
 	s = (struct ip_vs_sync_conn_v0 *) buff->head;
 
 
@@ -597,12 +606,6 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
 	m->nr_conns++;
 	m->nr_conns++;
 	m->size = htons(ntohs(m->size) + len);
 	m->size = htons(ntohs(m->size) + len);
 	buff->head += len;
 	buff->head += len;
-
-	/* check if there is a space for next one */
-	if (buff->head + FULL_CONN_SIZE > buff->end) {
-		sb_queue_tail(ipvs, ms);
-		ms->sync_buff = NULL;
-	}
 	spin_unlock_bh(&ipvs->sync_buff_lock);
 	spin_unlock_bh(&ipvs->sync_buff_lock);
 
 
 	/* synchronize its controller if it has */
 	/* synchronize its controller if it has */
@@ -694,7 +697,7 @@ sloop:
 	}
 	}
 
 
 	if (!buff) {
 	if (!buff) {
-		buff = ip_vs_sync_buff_create(ipvs);
+		buff = ip_vs_sync_buff_create(ipvs, len);
 		if (!buff) {
 		if (!buff) {
 			spin_unlock_bh(&ipvs->sync_buff_lock);
 			spin_unlock_bh(&ipvs->sync_buff_lock);
 			pr_err("ip_vs_sync_buff_create failed.\n");
 			pr_err("ip_vs_sync_buff_create failed.\n");
@@ -1219,7 +1222,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
 		return;
 		return;
 	}
 	}
 	/* SyncID sanity check */
 	/* SyncID sanity check */
-	if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
+	if (ipvs->bcfg.syncid != 0 && m2->syncid != ipvs->bcfg.syncid) {
 		IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
 		IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
 		return;
 		return;
 	}
 	}
@@ -1303,6 +1306,14 @@ static void set_mcast_loop(struct sock *sk, u_char loop)
 	/* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
 	/* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
 	lock_sock(sk);
 	lock_sock(sk);
 	inet->mc_loop = loop ? 1 : 0;
 	inet->mc_loop = loop ? 1 : 0;
+#ifdef CONFIG_IP_VS_IPV6
+	if (sk->sk_family == AF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		/* IPV6_MULTICAST_LOOP */
+		np->mc_loop = loop ? 1 : 0;
+	}
+#endif
 	release_sock(sk);
 	release_sock(sk);
 }
 }
 
 
@@ -1316,6 +1327,33 @@ static void set_mcast_ttl(struct sock *sk, u_char ttl)
 	/* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */
 	/* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */
 	lock_sock(sk);
 	lock_sock(sk);
 	inet->mc_ttl = ttl;
 	inet->mc_ttl = ttl;
+#ifdef CONFIG_IP_VS_IPV6
+	if (sk->sk_family == AF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		/* IPV6_MULTICAST_HOPS */
+		np->mcast_hops = ttl;
+	}
+#endif
+	release_sock(sk);
+}
+
+/* Control fragmentation of messages */
+static void set_mcast_pmtudisc(struct sock *sk, int val)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	/* setsockopt(sock, SOL_IP, IP_MTU_DISCOVER, &val, sizeof(val)); */
+	lock_sock(sk);
+	inet->pmtudisc = val;
+#ifdef CONFIG_IP_VS_IPV6
+	if (sk->sk_family == AF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		/* IPV6_MTU_DISCOVER */
+		np->pmtudisc = val;
+	}
+#endif
 	release_sock(sk);
 	release_sock(sk);
 }
 }
 
 
@@ -1338,44 +1376,15 @@ static int set_mcast_if(struct sock *sk, char *ifname)
 	lock_sock(sk);
 	lock_sock(sk);
 	inet->mc_index = dev->ifindex;
 	inet->mc_index = dev->ifindex;
 	/*  inet->mc_addr  = 0; */
 	/*  inet->mc_addr  = 0; */
-	release_sock(sk);
-
-	return 0;
-}
-
+#ifdef CONFIG_IP_VS_IPV6
+	if (sk->sk_family == AF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
 
 
-/*
- *	Set the maximum length of sync message according to the
- *	specified interface's MTU.
- */
-static int set_sync_mesg_maxlen(struct net *net, int sync_state)
-{
-	struct netns_ipvs *ipvs = net_ipvs(net);
-	struct net_device *dev;
-	int num;
-
-	if (sync_state == IP_VS_STATE_MASTER) {
-		dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
-		if (!dev)
-			return -ENODEV;
-
-		num = (dev->mtu - sizeof(struct iphdr) -
-		       sizeof(struct udphdr) -
-		       SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
-		ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
-			SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
-		IP_VS_DBG(7, "setting the maximum length of sync sending "
-			  "message %d.\n", ipvs->send_mesg_maxlen);
-	} else if (sync_state == IP_VS_STATE_BACKUP) {
-		dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
-		if (!dev)
-			return -ENODEV;
-
-		ipvs->recv_mesg_maxlen = dev->mtu -
-			sizeof(struct iphdr) - sizeof(struct udphdr);
-		IP_VS_DBG(7, "setting the maximum length of sync receiving "
-			  "message %d.\n", ipvs->recv_mesg_maxlen);
+		/* IPV6_MULTICAST_IF */
+		np->mcast_oif = dev->ifindex;
 	}
 	}
+#endif
+	release_sock(sk);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -1405,15 +1414,34 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
 
 
 	mreq.imr_ifindex = dev->ifindex;
 	mreq.imr_ifindex = dev->ifindex;
 
 
-	rtnl_lock();
 	lock_sock(sk);
 	lock_sock(sk);
 	ret = ip_mc_join_group(sk, &mreq);
 	ret = ip_mc_join_group(sk, &mreq);
 	release_sock(sk);
 	release_sock(sk);
-	rtnl_unlock();
 
 
 	return ret;
 	return ret;
 }
 }
 
 
+#ifdef CONFIG_IP_VS_IPV6
+static int join_mcast_group6(struct sock *sk, struct in6_addr *addr,
+			     char *ifname)
+{
+	struct net *net = sock_net(sk);
+	struct net_device *dev;
+	int ret;
+
+	dev = __dev_get_by_name(net, ifname);
+	if (!dev)
+		return -ENODEV;
+	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+		return -EINVAL;
+
+	lock_sock(sk);
+	ret = ipv6_sock_mc_join(sk, dev->ifindex, addr);
+	release_sock(sk);
+
+	return ret;
+}
+#endif
 
 
 static int bind_mcastif_addr(struct socket *sock, char *ifname)
 static int bind_mcastif_addr(struct socket *sock, char *ifname)
 {
 {
@@ -1442,6 +1470,26 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
 	return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
 	return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
 }
 }
 
 
+static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
+			       struct ipvs_sync_daemon_cfg *c, int id)
+{
+	if (AF_INET6 == c->mcast_af) {
+		sa->in6 = (struct sockaddr_in6) {
+			.sin6_family = AF_INET6,
+			.sin6_port = htons(c->mcast_port + id),
+		};
+		sa->in6.sin6_addr = c->mcast_group.in6;
+		*salen = sizeof(sa->in6);
+	} else {
+		sa->in = (struct sockaddr_in) {
+			.sin_family = AF_INET,
+			.sin_port = htons(c->mcast_port + id),
+		};
+		sa->in.sin_addr = c->mcast_group.in;
+		*salen = sizeof(sa->in);
+	}
+}
+
 /*
 /*
  *      Set up sending multicast socket over UDP
  *      Set up sending multicast socket over UDP
  */
  */
@@ -1449,40 +1497,43 @@ static struct socket *make_send_sock(struct net *net, int id)
 {
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	/* multicast addr */
 	/* multicast addr */
-	struct sockaddr_in mcast_addr = {
-		.sin_family		= AF_INET,
-		.sin_port		= cpu_to_be16(IP_VS_SYNC_PORT + id),
-		.sin_addr.s_addr	= cpu_to_be32(IP_VS_SYNC_GROUP),
-	};
+	union ipvs_sockaddr mcast_addr;
 	struct socket *sock;
 	struct socket *sock;
-	int result;
+	int result, salen;
 
 
 	/* First create a socket */
 	/* First create a socket */
-	result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+	result = sock_create_kern(net, ipvs->mcfg.mcast_af, SOCK_DGRAM,
+				  IPPROTO_UDP, &sock);
 	if (result < 0) {
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
 		return ERR_PTR(result);
 	}
 	}
-	result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
+	result = set_mcast_if(sock->sk, ipvs->mcfg.mcast_ifn);
 	if (result < 0) {
 	if (result < 0) {
 		pr_err("Error setting outbound mcast interface\n");
 		pr_err("Error setting outbound mcast interface\n");
 		goto error;
 		goto error;
 	}
 	}
 
 
 	set_mcast_loop(sock->sk, 0);
 	set_mcast_loop(sock->sk, 0);
-	set_mcast_ttl(sock->sk, 1);
+	set_mcast_ttl(sock->sk, ipvs->mcfg.mcast_ttl);
+	/* Allow fragmentation if MTU changes */
+	set_mcast_pmtudisc(sock->sk, IP_PMTUDISC_DONT);
 	result = sysctl_sync_sock_size(ipvs);
 	result = sysctl_sync_sock_size(ipvs);
 	if (result > 0)
 	if (result > 0)
 		set_sock_size(sock->sk, 1, result);
 		set_sock_size(sock->sk, 1, result);
 
 
-	result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
+	if (AF_INET == ipvs->mcfg.mcast_af)
+		result = bind_mcastif_addr(sock, ipvs->mcfg.mcast_ifn);
+	else
+		result = 0;
 	if (result < 0) {
 	if (result < 0) {
 		pr_err("Error binding address of the mcast interface\n");
 		pr_err("Error binding address of the mcast interface\n");
 		goto error;
 		goto error;
 	}
 	}
 
 
+	get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id);
 	result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
 	result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
-			sizeof(struct sockaddr), 0);
+				    salen, 0);
 	if (result < 0) {
 	if (result < 0) {
 		pr_err("Error connecting to the multicast addr\n");
 		pr_err("Error connecting to the multicast addr\n");
 		goto error;
 		goto error;
@@ -1503,16 +1554,13 @@ static struct socket *make_receive_sock(struct net *net, int id)
 {
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	/* multicast addr */
 	/* multicast addr */
-	struct sockaddr_in mcast_addr = {
-		.sin_family		= AF_INET,
-		.sin_port		= cpu_to_be16(IP_VS_SYNC_PORT + id),
-		.sin_addr.s_addr	= cpu_to_be32(IP_VS_SYNC_GROUP),
-	};
+	union ipvs_sockaddr mcast_addr;
 	struct socket *sock;
 	struct socket *sock;
-	int result;
+	int result, salen;
 
 
 	/* First create a socket */
 	/* First create a socket */
-	result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+	result = sock_create_kern(net, ipvs->bcfg.mcast_af, SOCK_DGRAM,
+				  IPPROTO_UDP, &sock);
 	if (result < 0) {
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
 		return ERR_PTR(result);
@@ -1523,17 +1571,22 @@ static struct socket *make_receive_sock(struct net *net, int id)
 	if (result > 0)
 	if (result > 0)
 		set_sock_size(sock->sk, 0, result);
 		set_sock_size(sock->sk, 0, result);
 
 
-	result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
-			sizeof(struct sockaddr));
+	get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
+	result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen);
 	if (result < 0) {
 	if (result < 0) {
 		pr_err("Error binding to the multicast addr\n");
 		pr_err("Error binding to the multicast addr\n");
 		goto error;
 		goto error;
 	}
 	}
 
 
 	/* join the multicast group */
 	/* join the multicast group */
-	result = join_mcast_group(sock->sk,
-			(struct in_addr *) &mcast_addr.sin_addr,
-			ipvs->backup_mcast_ifn);
+#ifdef CONFIG_IP_VS_IPV6
+	if (ipvs->bcfg.mcast_af == AF_INET6)
+		result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr,
+					   ipvs->bcfg.mcast_ifn);
+	else
+#endif
+		result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr,
+					  ipvs->bcfg.mcast_ifn);
 	if (result < 0) {
 	if (result < 0) {
 		pr_err("Error joining to the multicast group\n");
 		pr_err("Error joining to the multicast group\n");
 		goto error;
 		goto error;
@@ -1641,7 +1694,7 @@ static int sync_thread_master(void *data)
 
 
 	pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
 	pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
 		"syncid = %d, id = %d\n",
 		"syncid = %d, id = %d\n",
-		ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id);
+		ipvs->mcfg.mcast_ifn, ipvs->mcfg.syncid, tinfo->id);
 
 
 	for (;;) {
 	for (;;) {
 		sb = next_sync_buff(ipvs, ms);
 		sb = next_sync_buff(ipvs, ms);
@@ -1695,7 +1748,7 @@ static int sync_thread_backup(void *data)
 
 
 	pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
 	pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
 		"syncid = %d, id = %d\n",
 		"syncid = %d, id = %d\n",
-		ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id);
+		ipvs->bcfg.mcast_ifn, ipvs->bcfg.syncid, tinfo->id);
 
 
 	while (!kthread_should_stop()) {
 	while (!kthread_should_stop()) {
 		wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
 		wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
@@ -1705,7 +1758,7 @@ static int sync_thread_backup(void *data)
 		/* do we have data now? */
 		/* do we have data now? */
 		while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
 		while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
 			len = ip_vs_receive(tinfo->sock, tinfo->buf,
 			len = ip_vs_receive(tinfo->sock, tinfo->buf,
-					ipvs->recv_mesg_maxlen);
+					ipvs->bcfg.sync_maxlen);
 			if (len <= 0) {
 			if (len <= 0) {
 				if (len != -EAGAIN)
 				if (len != -EAGAIN)
 					pr_err("receiving message error\n");
 					pr_err("receiving message error\n");
@@ -1725,16 +1778,19 @@ static int sync_thread_backup(void *data)
 }
 }
 
 
 
 
-int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
+int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c,
+		      int state)
 {
 {
 	struct ip_vs_sync_thread_data *tinfo;
 	struct ip_vs_sync_thread_data *tinfo;
 	struct task_struct **array = NULL, *task;
 	struct task_struct **array = NULL, *task;
 	struct socket *sock;
 	struct socket *sock;
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct net_device *dev;
 	char *name;
 	char *name;
 	int (*threadfn)(void *data);
 	int (*threadfn)(void *data);
-	int id, count;
+	int id, count, hlen;
 	int result = -ENOMEM;
 	int result = -ENOMEM;
+	u16 mtu, min_mtu;
 
 
 	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
 	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
 	IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
 	IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
@@ -1746,22 +1802,46 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
 	} else
 	} else
 		count = ipvs->threads_mask + 1;
 		count = ipvs->threads_mask + 1;
 
 
+	if (c->mcast_af == AF_UNSPEC) {
+		c->mcast_af = AF_INET;
+		c->mcast_group.ip = cpu_to_be32(IP_VS_SYNC_GROUP);
+	}
+	if (!c->mcast_port)
+		c->mcast_port = IP_VS_SYNC_PORT;
+	if (!c->mcast_ttl)
+		c->mcast_ttl = 1;
+
+	dev = __dev_get_by_name(net, c->mcast_ifn);
+	if (!dev) {
+		pr_err("Unknown mcast interface: %s\n", c->mcast_ifn);
+		return -ENODEV;
+	}
+	hlen = (AF_INET6 == c->mcast_af) ?
+	       sizeof(struct ipv6hdr) + sizeof(struct udphdr) :
+	       sizeof(struct iphdr) + sizeof(struct udphdr);
+	mtu = (state == IP_VS_STATE_BACKUP) ?
+		  clamp(dev->mtu, 1500U, 65535U) : 1500U;
+	min_mtu = (state == IP_VS_STATE_BACKUP) ? 1024 : 1;
+
+	if (c->sync_maxlen)
+		c->sync_maxlen = clamp_t(unsigned int,
+					 c->sync_maxlen, min_mtu,
+					 65535 - hlen);
+	else
+		c->sync_maxlen = mtu - hlen;
+
 	if (state == IP_VS_STATE_MASTER) {
 	if (state == IP_VS_STATE_MASTER) {
 		if (ipvs->ms)
 		if (ipvs->ms)
 			return -EEXIST;
 			return -EEXIST;
 
 
-		strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
-			sizeof(ipvs->master_mcast_ifn));
-		ipvs->master_syncid = syncid;
+		ipvs->mcfg = *c;
 		name = "ipvs-m:%d:%d";
 		name = "ipvs-m:%d:%d";
 		threadfn = sync_thread_master;
 		threadfn = sync_thread_master;
 	} else if (state == IP_VS_STATE_BACKUP) {
 	} else if (state == IP_VS_STATE_BACKUP) {
 		if (ipvs->backup_threads)
 		if (ipvs->backup_threads)
 			return -EEXIST;
 			return -EEXIST;
 
 
-		strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
-			sizeof(ipvs->backup_mcast_ifn));
-		ipvs->backup_syncid = syncid;
+		ipvs->bcfg = *c;
 		name = "ipvs-b:%d:%d";
 		name = "ipvs-b:%d:%d";
 		threadfn = sync_thread_backup;
 		threadfn = sync_thread_backup;
 	} else {
 	} else {
@@ -1789,7 +1869,6 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
 		if (!array)
 		if (!array)
 			goto out;
 			goto out;
 	}
 	}
-	set_sync_mesg_maxlen(net, state);
 
 
 	tinfo = NULL;
 	tinfo = NULL;
 	for (id = 0; id < count; id++) {
 	for (id = 0; id < count; id++) {
@@ -1807,7 +1886,7 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
 		tinfo->net = net;
 		tinfo->net = net;
 		tinfo->sock = sock;
 		tinfo->sock = sock;
 		if (state == IP_VS_STATE_BACKUP) {
 		if (state == IP_VS_STATE_BACKUP) {
-			tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen,
+			tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
 					     GFP_KERNEL);
 					     GFP_KERNEL);
 			if (!tinfo->buf)
 			if (!tinfo->buf)
 				goto outtinfo;
 				goto outtinfo;