|
@@ -196,6 +196,7 @@ struct ip_vs_sync_thread_data {
|
|
struct net *net;
|
|
struct net *net;
|
|
struct socket *sock;
|
|
struct socket *sock;
|
|
char *buf;
|
|
char *buf;
|
|
|
|
+ int id;
|
|
};
|
|
};
|
|
|
|
|
|
/* Version 0 definition of packet sizes */
|
|
/* Version 0 definition of packet sizes */
|
|
@@ -271,13 +272,6 @@ struct ip_vs_sync_buff {
|
|
unsigned char *end;
|
|
unsigned char *end;
|
|
};
|
|
};
|
|
|
|
|
|
-/* multicast addr */
|
|
|
|
-static struct sockaddr_in mcast_addr = {
|
|
|
|
- .sin_family = AF_INET,
|
|
|
|
- .sin_port = cpu_to_be16(IP_VS_SYNC_PORT),
|
|
|
|
- .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
|
|
|
|
-};
|
|
|
|
-
|
|
|
|
/*
|
|
/*
|
|
* Copy of struct ip_vs_seq
|
|
* Copy of struct ip_vs_seq
|
|
* From unaligned network order to aligned host order
|
|
* From unaligned network order to aligned host order
|
|
@@ -300,18 +294,22 @@ static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
|
|
put_unaligned_be32(ho->previous_delta, &no->previous_delta);
|
|
put_unaligned_be32(ho->previous_delta, &no->previous_delta);
|
|
}
|
|
}
|
|
|
|
|
|
-static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs)
|
|
|
|
|
|
+static inline struct ip_vs_sync_buff *
|
|
|
|
+sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
|
|
{
|
|
{
|
|
struct ip_vs_sync_buff *sb;
|
|
struct ip_vs_sync_buff *sb;
|
|
|
|
|
|
spin_lock_bh(&ipvs->sync_lock);
|
|
spin_lock_bh(&ipvs->sync_lock);
|
|
- if (list_empty(&ipvs->sync_queue)) {
|
|
|
|
|
|
+ if (list_empty(&ms->sync_queue)) {
|
|
sb = NULL;
|
|
sb = NULL;
|
|
|
|
+ __set_current_state(TASK_INTERRUPTIBLE);
|
|
} else {
|
|
} else {
|
|
- sb = list_entry(ipvs->sync_queue.next,
|
|
|
|
- struct ip_vs_sync_buff,
|
|
|
|
|
|
+ sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff,
|
|
list);
|
|
list);
|
|
list_del(&sb->list);
|
|
list_del(&sb->list);
|
|
|
|
+ ms->sync_queue_len--;
|
|
|
|
+ if (!ms->sync_queue_len)
|
|
|
|
+ ms->sync_queue_delay = 0;
|
|
}
|
|
}
|
|
spin_unlock_bh(&ipvs->sync_lock);
|
|
spin_unlock_bh(&ipvs->sync_lock);
|
|
|
|
|
|
@@ -334,7 +332,7 @@ ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
|
|
kfree(sb);
|
|
kfree(sb);
|
|
return NULL;
|
|
return NULL;
|
|
}
|
|
}
|
|
- sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */
|
|
|
|
|
|
+ sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */
|
|
sb->mesg->version = SYNC_PROTO_VER;
|
|
sb->mesg->version = SYNC_PROTO_VER;
|
|
sb->mesg->syncid = ipvs->master_syncid;
|
|
sb->mesg->syncid = ipvs->master_syncid;
|
|
sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
|
|
sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
|
|
@@ -353,14 +351,22 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
|
|
kfree(sb);
|
|
kfree(sb);
|
|
}
|
|
}
|
|
|
|
|
|
-static inline void sb_queue_tail(struct netns_ipvs *ipvs)
|
|
|
|
|
|
+static inline void sb_queue_tail(struct netns_ipvs *ipvs,
|
|
|
|
+ struct ipvs_master_sync_state *ms)
|
|
{
|
|
{
|
|
- struct ip_vs_sync_buff *sb = ipvs->sync_buff;
|
|
|
|
|
|
+ struct ip_vs_sync_buff *sb = ms->sync_buff;
|
|
|
|
|
|
spin_lock(&ipvs->sync_lock);
|
|
spin_lock(&ipvs->sync_lock);
|
|
- if (ipvs->sync_state & IP_VS_STATE_MASTER)
|
|
|
|
- list_add_tail(&sb->list, &ipvs->sync_queue);
|
|
|
|
- else
|
|
|
|
|
|
+ if (ipvs->sync_state & IP_VS_STATE_MASTER &&
|
|
|
|
+ ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) {
|
|
|
|
+ if (!ms->sync_queue_len)
|
|
|
|
+ schedule_delayed_work(&ms->master_wakeup_work,
|
|
|
|
+ max(IPVS_SYNC_SEND_DELAY, 1));
|
|
|
|
+ ms->sync_queue_len++;
|
|
|
|
+ list_add_tail(&sb->list, &ms->sync_queue);
|
|
|
|
+ if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE)
|
|
|
|
+ wake_up_process(ms->master_thread);
|
|
|
|
+ } else
|
|
ip_vs_sync_buff_release(sb);
|
|
ip_vs_sync_buff_release(sb);
|
|
spin_unlock(&ipvs->sync_lock);
|
|
spin_unlock(&ipvs->sync_lock);
|
|
}
|
|
}
|
|
@@ -370,49 +376,26 @@ static inline void sb_queue_tail(struct netns_ipvs *ipvs)
|
|
* than the specified time or the specified time is zero.
|
|
* than the specified time or the specified time is zero.
|
|
*/
|
|
*/
|
|
static inline struct ip_vs_sync_buff *
|
|
static inline struct ip_vs_sync_buff *
|
|
-get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time)
|
|
|
|
|
|
+get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms,
|
|
|
|
+ unsigned long time)
|
|
{
|
|
{
|
|
struct ip_vs_sync_buff *sb;
|
|
struct ip_vs_sync_buff *sb;
|
|
|
|
|
|
spin_lock_bh(&ipvs->sync_buff_lock);
|
|
spin_lock_bh(&ipvs->sync_buff_lock);
|
|
- if (ipvs->sync_buff &&
|
|
|
|
- time_after_eq(jiffies - ipvs->sync_buff->firstuse, time)) {
|
|
|
|
- sb = ipvs->sync_buff;
|
|
|
|
- ipvs->sync_buff = NULL;
|
|
|
|
|
|
+ sb = ms->sync_buff;
|
|
|
|
+ if (sb && time_after_eq(jiffies - sb->firstuse, time)) {
|
|
|
|
+ ms->sync_buff = NULL;
|
|
|
|
+ __set_current_state(TASK_RUNNING);
|
|
} else
|
|
} else
|
|
sb = NULL;
|
|
sb = NULL;
|
|
spin_unlock_bh(&ipvs->sync_buff_lock);
|
|
spin_unlock_bh(&ipvs->sync_buff_lock);
|
|
return sb;
|
|
return sb;
|
|
}
|
|
}
|
|
|
|
|
|
-/*
|
|
|
|
- * Switch mode from sending version 0 or 1
|
|
|
|
- * - must handle sync_buf
|
|
|
|
- */
|
|
|
|
-void ip_vs_sync_switch_mode(struct net *net, int mode)
|
|
|
|
|
|
+static inline int
|
|
|
|
+select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)
|
|
{
|
|
{
|
|
- struct netns_ipvs *ipvs = net_ipvs(net);
|
|
|
|
-
|
|
|
|
- if (!(ipvs->sync_state & IP_VS_STATE_MASTER))
|
|
|
|
- return;
|
|
|
|
- if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff)
|
|
|
|
- return;
|
|
|
|
-
|
|
|
|
- spin_lock_bh(&ipvs->sync_buff_lock);
|
|
|
|
- /* Buffer empty ? then let buf_create do the job */
|
|
|
|
- if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) {
|
|
|
|
- kfree(ipvs->sync_buff);
|
|
|
|
- ipvs->sync_buff = NULL;
|
|
|
|
- } else {
|
|
|
|
- spin_lock_bh(&ipvs->sync_lock);
|
|
|
|
- if (ipvs->sync_state & IP_VS_STATE_MASTER)
|
|
|
|
- list_add_tail(&ipvs->sync_buff->list,
|
|
|
|
- &ipvs->sync_queue);
|
|
|
|
- else
|
|
|
|
- ip_vs_sync_buff_release(ipvs->sync_buff);
|
|
|
|
- spin_unlock_bh(&ipvs->sync_lock);
|
|
|
|
- }
|
|
|
|
- spin_unlock_bh(&ipvs->sync_buff_lock);
|
|
|
|
|
|
+ return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -442,15 +425,101 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
|
|
return sb;
|
|
return sb;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/* Check if conn should be synced.
|
|
|
|
+ * pkts: conn packets, use sysctl_sync_threshold to avoid packet check
|
|
|
|
+ * - (1) sync_refresh_period: reduce sync rate. Additionally, retry
|
|
|
|
+ * sync_retries times with period of sync_refresh_period/8
|
|
|
|
+ * - (2) if both sync_refresh_period and sync_period are 0 send sync only
|
|
|
|
+ * for state changes or only once when pkts matches sync_threshold
|
|
|
|
+ * - (3) templates: rate can be reduced only with sync_refresh_period or
|
|
|
|
+ * with (2)
|
|
|
|
+ */
|
|
|
|
+static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
|
|
|
|
+ struct ip_vs_conn *cp, int pkts)
|
|
|
|
+{
|
|
|
|
+ unsigned long orig = ACCESS_ONCE(cp->sync_endtime);
|
|
|
|
+ unsigned long now = jiffies;
|
|
|
|
+ unsigned long n = (now + cp->timeout) & ~3UL;
|
|
|
|
+ unsigned int sync_refresh_period;
|
|
|
|
+ int sync_period;
|
|
|
|
+ int force;
|
|
|
|
+
|
|
|
|
+ /* Check if we sync in current state */
|
|
|
|
+ if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
|
|
|
|
+ force = 0;
|
|
|
|
+ else if (likely(cp->protocol == IPPROTO_TCP)) {
|
|
|
|
+ if (!((1 << cp->state) &
|
|
|
|
+ ((1 << IP_VS_TCP_S_ESTABLISHED) |
|
|
|
|
+ (1 << IP_VS_TCP_S_FIN_WAIT) |
|
|
|
|
+ (1 << IP_VS_TCP_S_CLOSE) |
|
|
|
|
+ (1 << IP_VS_TCP_S_CLOSE_WAIT) |
|
|
|
|
+ (1 << IP_VS_TCP_S_TIME_WAIT))))
|
|
|
|
+ return 0;
|
|
|
|
+ force = cp->state != cp->old_state;
|
|
|
|
+ if (force && cp->state != IP_VS_TCP_S_ESTABLISHED)
|
|
|
|
+ goto set;
|
|
|
|
+ } else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
|
|
|
|
+ if (!((1 << cp->state) &
|
|
|
|
+ ((1 << IP_VS_SCTP_S_ESTABLISHED) |
|
|
|
|
+ (1 << IP_VS_SCTP_S_CLOSED) |
|
|
|
|
+ (1 << IP_VS_SCTP_S_SHUT_ACK_CLI) |
|
|
|
|
+ (1 << IP_VS_SCTP_S_SHUT_ACK_SER))))
|
|
|
|
+ return 0;
|
|
|
|
+ force = cp->state != cp->old_state;
|
|
|
|
+ if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
|
|
|
|
+ goto set;
|
|
|
|
+ } else {
|
|
|
|
+ /* UDP or another protocol with single state */
|
|
|
|
+ force = 0;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ sync_refresh_period = sysctl_sync_refresh_period(ipvs);
|
|
|
|
+ if (sync_refresh_period > 0) {
|
|
|
|
+ long diff = n - orig;
|
|
|
|
+ long min_diff = max(cp->timeout >> 1, 10UL * HZ);
|
|
|
|
+
|
|
|
|
+ /* Avoid sync if difference is below sync_refresh_period
|
|
|
|
+ * and below the half timeout.
|
|
|
|
+ */
|
|
|
|
+ if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) {
|
|
|
|
+ int retries = orig & 3;
|
|
|
|
+
|
|
|
|
+ if (retries >= sysctl_sync_retries(ipvs))
|
|
|
|
+ return 0;
|
|
|
|
+ if (time_before(now, orig - cp->timeout +
|
|
|
|
+ (sync_refresh_period >> 3)))
|
|
|
|
+ return 0;
|
|
|
|
+ n |= retries + 1;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ sync_period = sysctl_sync_period(ipvs);
|
|
|
|
+ if (sync_period > 0) {
|
|
|
|
+ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&
|
|
|
|
+ pkts % sync_period != sysctl_sync_threshold(ipvs))
|
|
|
|
+ return 0;
|
|
|
|
+ } else if (sync_refresh_period <= 0 &&
|
|
|
|
+ pkts != sysctl_sync_threshold(ipvs))
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+set:
|
|
|
|
+ cp->old_state = cp->state;
|
|
|
|
+ n = cmpxchg(&cp->sync_endtime, orig, n);
|
|
|
|
+ return n == orig || force;
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Version 0 , could be switched in by sys_ctl.
|
|
* Version 0 , could be switched in by sys_ctl.
|
|
* Add an ip_vs_conn information into the current sync_buff.
|
|
* Add an ip_vs_conn information into the current sync_buff.
|
|
*/
|
|
*/
|
|
-void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
|
|
|
|
|
|
+static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
|
|
|
|
+ int pkts)
|
|
{
|
|
{
|
|
struct netns_ipvs *ipvs = net_ipvs(net);
|
|
struct netns_ipvs *ipvs = net_ipvs(net);
|
|
struct ip_vs_sync_mesg_v0 *m;
|
|
struct ip_vs_sync_mesg_v0 *m;
|
|
struct ip_vs_sync_conn_v0 *s;
|
|
struct ip_vs_sync_conn_v0 *s;
|
|
|
|
+ struct ip_vs_sync_buff *buff;
|
|
|
|
+ struct ipvs_master_sync_state *ms;
|
|
|
|
+ int id;
|
|
int len;
|
|
int len;
|
|
|
|
|
|
if (unlikely(cp->af != AF_INET))
|
|
if (unlikely(cp->af != AF_INET))
|
|
@@ -459,21 +528,41 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
|
|
if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
|
|
if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
|
|
return;
|
|
return;
|
|
|
|
|
|
|
|
+ if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
|
|
|
|
+ return;
|
|
|
|
+
|
|
spin_lock(&ipvs->sync_buff_lock);
|
|
spin_lock(&ipvs->sync_buff_lock);
|
|
- if (!ipvs->sync_buff) {
|
|
|
|
- ipvs->sync_buff =
|
|
|
|
- ip_vs_sync_buff_create_v0(ipvs);
|
|
|
|
- if (!ipvs->sync_buff) {
|
|
|
|
|
|
+ if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
|
|
|
|
+ spin_unlock(&ipvs->sync_buff_lock);
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ id = select_master_thread_id(ipvs, cp);
|
|
|
|
+ ms = &ipvs->ms[id];
|
|
|
|
+ buff = ms->sync_buff;
|
|
|
|
+ if (buff) {
|
|
|
|
+ m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
|
|
|
|
+ /* Send buffer if it is for v1 */
|
|
|
|
+ if (!m->nr_conns) {
|
|
|
|
+ sb_queue_tail(ipvs, ms);
|
|
|
|
+ ms->sync_buff = NULL;
|
|
|
|
+ buff = NULL;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if (!buff) {
|
|
|
|
+ buff = ip_vs_sync_buff_create_v0(ipvs);
|
|
|
|
+ if (!buff) {
|
|
spin_unlock(&ipvs->sync_buff_lock);
|
|
spin_unlock(&ipvs->sync_buff_lock);
|
|
pr_err("ip_vs_sync_buff_create failed.\n");
|
|
pr_err("ip_vs_sync_buff_create failed.\n");
|
|
return;
|
|
return;
|
|
}
|
|
}
|
|
|
|
+ ms->sync_buff = buff;
|
|
}
|
|
}
|
|
|
|
|
|
len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
|
|
len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
|
|
SIMPLE_CONN_SIZE;
|
|
SIMPLE_CONN_SIZE;
|
|
- m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg;
|
|
|
|
- s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head;
|
|
|
|
|
|
+ m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
|
|
|
|
+ s = (struct ip_vs_sync_conn_v0 *) buff->head;
|
|
|
|
|
|
/* copy members */
|
|
/* copy members */
|
|
s->reserved = 0;
|
|
s->reserved = 0;
|
|
@@ -494,18 +583,24 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
|
|
|
|
|
|
m->nr_conns++;
|
|
m->nr_conns++;
|
|
m->size += len;
|
|
m->size += len;
|
|
- ipvs->sync_buff->head += len;
|
|
|
|
|
|
+ buff->head += len;
|
|
|
|
|
|
/* check if there is a space for next one */
|
|
/* check if there is a space for next one */
|
|
- if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) {
|
|
|
|
- sb_queue_tail(ipvs);
|
|
|
|
- ipvs->sync_buff = NULL;
|
|
|
|
|
|
+ if (buff->head + FULL_CONN_SIZE > buff->end) {
|
|
|
|
+ sb_queue_tail(ipvs, ms);
|
|
|
|
+ ms->sync_buff = NULL;
|
|
}
|
|
}
|
|
spin_unlock(&ipvs->sync_buff_lock);
|
|
spin_unlock(&ipvs->sync_buff_lock);
|
|
|
|
|
|
/* synchronize its controller if it has */
|
|
/* synchronize its controller if it has */
|
|
- if (cp->control)
|
|
|
|
- ip_vs_sync_conn(net, cp->control);
|
|
|
|
|
|
+ cp = cp->control;
|
|
|
|
+ if (cp) {
|
|
|
|
+ if (cp->flags & IP_VS_CONN_F_TEMPLATE)
|
|
|
|
+ pkts = atomic_add_return(1, &cp->in_pkts);
|
|
|
|
+ else
|
|
|
|
+ pkts = sysctl_sync_threshold(ipvs);
|
|
|
|
+ ip_vs_sync_conn(net, cp->control, pkts);
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -513,23 +608,29 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
|
|
* Called by ip_vs_in.
|
|
* Called by ip_vs_in.
|
|
* Sending Version 1 messages
|
|
* Sending Version 1 messages
|
|
*/
|
|
*/
|
|
-void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
|
|
|
|
|
|
+void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
|
|
{
|
|
{
|
|
struct netns_ipvs *ipvs = net_ipvs(net);
|
|
struct netns_ipvs *ipvs = net_ipvs(net);
|
|
struct ip_vs_sync_mesg *m;
|
|
struct ip_vs_sync_mesg *m;
|
|
union ip_vs_sync_conn *s;
|
|
union ip_vs_sync_conn *s;
|
|
|
|
+ struct ip_vs_sync_buff *buff;
|
|
|
|
+ struct ipvs_master_sync_state *ms;
|
|
|
|
+ int id;
|
|
__u8 *p;
|
|
__u8 *p;
|
|
unsigned int len, pe_name_len, pad;
|
|
unsigned int len, pe_name_len, pad;
|
|
|
|
|
|
/* Handle old version of the protocol */
|
|
/* Handle old version of the protocol */
|
|
if (sysctl_sync_ver(ipvs) == 0) {
|
|
if (sysctl_sync_ver(ipvs) == 0) {
|
|
- ip_vs_sync_conn_v0(net, cp);
|
|
|
|
|
|
+ ip_vs_sync_conn_v0(net, cp, pkts);
|
|
return;
|
|
return;
|
|
}
|
|
}
|
|
/* Do not sync ONE PACKET */
|
|
/* Do not sync ONE PACKET */
|
|
if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
|
|
if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
|
|
goto control;
|
|
goto control;
|
|
sloop:
|
|
sloop:
|
|
|
|
+ if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
|
|
|
|
+ goto control;
|
|
|
|
+
|
|
/* Sanity checks */
|
|
/* Sanity checks */
|
|
pe_name_len = 0;
|
|
pe_name_len = 0;
|
|
if (cp->pe_data_len) {
|
|
if (cp->pe_data_len) {
|
|
@@ -541,6 +642,13 @@ sloop:
|
|
}
|
|
}
|
|
|
|
|
|
spin_lock(&ipvs->sync_buff_lock);
|
|
spin_lock(&ipvs->sync_buff_lock);
|
|
|
|
+ if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
|
|
|
|
+ spin_unlock(&ipvs->sync_buff_lock);
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ id = select_master_thread_id(ipvs, cp);
|
|
|
|
+ ms = &ipvs->ms[id];
|
|
|
|
|
|
#ifdef CONFIG_IP_VS_IPV6
|
|
#ifdef CONFIG_IP_VS_IPV6
|
|
if (cp->af == AF_INET6)
|
|
if (cp->af == AF_INET6)
|
|
@@ -559,27 +667,32 @@ sloop:
|
|
|
|
|
|
/* check if there is a space for this one */
|
|
/* check if there is a space for this one */
|
|
pad = 0;
|
|
pad = 0;
|
|
- if (ipvs->sync_buff) {
|
|
|
|
- pad = (4 - (size_t)ipvs->sync_buff->head) & 3;
|
|
|
|
- if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) {
|
|
|
|
- sb_queue_tail(ipvs);
|
|
|
|
- ipvs->sync_buff = NULL;
|
|
|
|
|
|
+ buff = ms->sync_buff;
|
|
|
|
+ if (buff) {
|
|
|
|
+ m = buff->mesg;
|
|
|
|
+ pad = (4 - (size_t) buff->head) & 3;
|
|
|
|
+ /* Send buffer if it is for v0 */
|
|
|
|
+ if (buff->head + len + pad > buff->end || m->reserved) {
|
|
|
|
+ sb_queue_tail(ipvs, ms);
|
|
|
|
+ ms->sync_buff = NULL;
|
|
|
|
+ buff = NULL;
|
|
pad = 0;
|
|
pad = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- if (!ipvs->sync_buff) {
|
|
|
|
- ipvs->sync_buff = ip_vs_sync_buff_create(ipvs);
|
|
|
|
- if (!ipvs->sync_buff) {
|
|
|
|
|
|
+ if (!buff) {
|
|
|
|
+ buff = ip_vs_sync_buff_create(ipvs);
|
|
|
|
+ if (!buff) {
|
|
spin_unlock(&ipvs->sync_buff_lock);
|
|
spin_unlock(&ipvs->sync_buff_lock);
|
|
pr_err("ip_vs_sync_buff_create failed.\n");
|
|
pr_err("ip_vs_sync_buff_create failed.\n");
|
|
return;
|
|
return;
|
|
}
|
|
}
|
|
|
|
+ ms->sync_buff = buff;
|
|
|
|
+ m = buff->mesg;
|
|
}
|
|
}
|
|
|
|
|
|
- m = ipvs->sync_buff->mesg;
|
|
|
|
- p = ipvs->sync_buff->head;
|
|
|
|
- ipvs->sync_buff->head += pad + len;
|
|
|
|
|
|
+ p = buff->head;
|
|
|
|
+ buff->head += pad + len;
|
|
m->size += pad + len;
|
|
m->size += pad + len;
|
|
/* Add ev. padding from prev. sync_conn */
|
|
/* Add ev. padding from prev. sync_conn */
|
|
while (pad--)
|
|
while (pad--)
|
|
@@ -644,16 +757,10 @@ control:
|
|
cp = cp->control;
|
|
cp = cp->control;
|
|
if (!cp)
|
|
if (!cp)
|
|
return;
|
|
return;
|
|
- /*
|
|
|
|
- * Reduce sync rate for templates
|
|
|
|
- * i.e only increment in_pkts for Templates.
|
|
|
|
- */
|
|
|
|
- if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
|
|
|
|
- int pkts = atomic_add_return(1, &cp->in_pkts);
|
|
|
|
-
|
|
|
|
- if (pkts % sysctl_sync_period(ipvs) != 1)
|
|
|
|
- return;
|
|
|
|
- }
|
|
|
|
|
|
+ if (cp->flags & IP_VS_CONN_F_TEMPLATE)
|
|
|
|
+ pkts = atomic_add_return(1, &cp->in_pkts);
|
|
|
|
+ else
|
|
|
|
+ pkts = sysctl_sync_threshold(ipvs);
|
|
goto sloop;
|
|
goto sloop;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -731,9 +838,32 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
|
|
else
|
|
else
|
|
cp = ip_vs_ct_in_get(param);
|
|
cp = ip_vs_ct_in_get(param);
|
|
|
|
|
|
- if (cp && param->pe_data) /* Free pe_data */
|
|
|
|
|
|
+ if (cp) {
|
|
|
|
+ /* Free pe_data */
|
|
kfree(param->pe_data);
|
|
kfree(param->pe_data);
|
|
- if (!cp) {
|
|
|
|
|
|
+
|
|
|
|
+ dest = cp->dest;
|
|
|
|
+ spin_lock(&cp->lock);
|
|
|
|
+ if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
|
|
|
|
+ !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
|
|
|
|
+ if (flags & IP_VS_CONN_F_INACTIVE) {
|
|
|
|
+ atomic_dec(&dest->activeconns);
|
|
|
|
+ atomic_inc(&dest->inactconns);
|
|
|
|
+ } else {
|
|
|
|
+ atomic_inc(&dest->activeconns);
|
|
|
|
+ atomic_dec(&dest->inactconns);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
|
|
|
|
+ flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
|
|
|
|
+ cp->flags = flags;
|
|
|
|
+ spin_unlock(&cp->lock);
|
|
|
|
+ if (!dest) {
|
|
|
|
+ dest = ip_vs_try_bind_dest(cp);
|
|
|
|
+ if (dest)
|
|
|
|
+ atomic_dec(&dest->refcnt);
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
/*
|
|
/*
|
|
* Find the appropriate destination for the connection.
|
|
* Find the appropriate destination for the connection.
|
|
* If it is not found the connection will remain unbound
|
|
* If it is not found the connection will remain unbound
|
|
@@ -742,18 +872,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
|
|
dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
|
|
dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
|
|
param->vport, protocol, fwmark, flags);
|
|
param->vport, protocol, fwmark, flags);
|
|
|
|
|
|
- /* Set the approprite ativity flag */
|
|
|
|
- if (protocol == IPPROTO_TCP) {
|
|
|
|
- if (state != IP_VS_TCP_S_ESTABLISHED)
|
|
|
|
- flags |= IP_VS_CONN_F_INACTIVE;
|
|
|
|
- else
|
|
|
|
- flags &= ~IP_VS_CONN_F_INACTIVE;
|
|
|
|
- } else if (protocol == IPPROTO_SCTP) {
|
|
|
|
- if (state != IP_VS_SCTP_S_ESTABLISHED)
|
|
|
|
- flags |= IP_VS_CONN_F_INACTIVE;
|
|
|
|
- else
|
|
|
|
- flags &= ~IP_VS_CONN_F_INACTIVE;
|
|
|
|
- }
|
|
|
|
cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
|
|
cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
|
|
if (dest)
|
|
if (dest)
|
|
atomic_dec(&dest->refcnt);
|
|
atomic_dec(&dest->refcnt);
|
|
@@ -763,34 +881,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
|
|
IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
|
|
IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
|
|
return;
|
|
return;
|
|
}
|
|
}
|
|
- } else if (!cp->dest) {
|
|
|
|
- dest = ip_vs_try_bind_dest(cp);
|
|
|
|
- if (dest)
|
|
|
|
- atomic_dec(&dest->refcnt);
|
|
|
|
- } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
|
|
|
|
- (cp->state != state)) {
|
|
|
|
- /* update active/inactive flag for the connection */
|
|
|
|
- dest = cp->dest;
|
|
|
|
- if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
|
|
|
|
- (state != IP_VS_TCP_S_ESTABLISHED)) {
|
|
|
|
- atomic_dec(&dest->activeconns);
|
|
|
|
- atomic_inc(&dest->inactconns);
|
|
|
|
- cp->flags |= IP_VS_CONN_F_INACTIVE;
|
|
|
|
- } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
|
|
|
|
- (state == IP_VS_TCP_S_ESTABLISHED)) {
|
|
|
|
- atomic_inc(&dest->activeconns);
|
|
|
|
- atomic_dec(&dest->inactconns);
|
|
|
|
- cp->flags &= ~IP_VS_CONN_F_INACTIVE;
|
|
|
|
- }
|
|
|
|
- } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
|
|
|
|
- (cp->state != state)) {
|
|
|
|
- dest = cp->dest;
|
|
|
|
- if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
|
|
|
|
- (state != IP_VS_SCTP_S_ESTABLISHED)) {
|
|
|
|
- atomic_dec(&dest->activeconns);
|
|
|
|
- atomic_inc(&dest->inactconns);
|
|
|
|
- cp->flags &= ~IP_VS_CONN_F_INACTIVE;
|
|
|
|
- }
|
|
|
|
}
|
|
}
|
|
|
|
|
|
if (opt)
|
|
if (opt)
|
|
@@ -1148,6 +1238,28 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * Setup sndbuf (mode=1) or rcvbuf (mode=0)
|
|
|
|
+ */
|
|
|
|
+static void set_sock_size(struct sock *sk, int mode, int val)
|
|
|
|
+{
|
|
|
|
+ /* setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)); */
|
|
|
|
+ /* setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)); */
|
|
|
|
+ lock_sock(sk);
|
|
|
|
+ if (mode) {
|
|
|
|
+ val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
|
|
|
|
+ sysctl_wmem_max);
|
|
|
|
+ sk->sk_sndbuf = val * 2;
|
|
|
|
+ sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
|
|
|
|
+ } else {
|
|
|
|
+ val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
|
|
|
|
+ sysctl_rmem_max);
|
|
|
|
+ sk->sk_rcvbuf = val * 2;
|
|
|
|
+ sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
|
|
|
|
+ }
|
|
|
|
+ release_sock(sk);
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Setup loopback of outgoing multicasts on a sending socket
|
|
* Setup loopback of outgoing multicasts on a sending socket
|
|
*/
|
|
*/
|
|
@@ -1298,9 +1410,15 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
|
|
/*
|
|
/*
|
|
* Set up sending multicast socket over UDP
|
|
* Set up sending multicast socket over UDP
|
|
*/
|
|
*/
|
|
-static struct socket *make_send_sock(struct net *net)
|
|
|
|
|
|
+static struct socket *make_send_sock(struct net *net, int id)
|
|
{
|
|
{
|
|
struct netns_ipvs *ipvs = net_ipvs(net);
|
|
struct netns_ipvs *ipvs = net_ipvs(net);
|
|
|
|
+ /* multicast addr */
|
|
|
|
+ struct sockaddr_in mcast_addr = {
|
|
|
|
+ .sin_family = AF_INET,
|
|
|
|
+ .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
|
|
|
|
+ .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
|
|
|
|
+ };
|
|
struct socket *sock;
|
|
struct socket *sock;
|
|
int result;
|
|
int result;
|
|
|
|
|
|
@@ -1324,6 +1442,9 @@ static struct socket *make_send_sock(struct net *net)
|
|
|
|
|
|
set_mcast_loop(sock->sk, 0);
|
|
set_mcast_loop(sock->sk, 0);
|
|
set_mcast_ttl(sock->sk, 1);
|
|
set_mcast_ttl(sock->sk, 1);
|
|
|
|
+ result = sysctl_sync_sock_size(ipvs);
|
|
|
|
+ if (result > 0)
|
|
|
|
+ set_sock_size(sock->sk, 1, result);
|
|
|
|
|
|
result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
|
|
result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
|
|
if (result < 0) {
|
|
if (result < 0) {
|
|
@@ -1349,9 +1470,15 @@ error:
|
|
/*
|
|
/*
|
|
* Set up receiving multicast socket over UDP
|
|
* Set up receiving multicast socket over UDP
|
|
*/
|
|
*/
|
|
-static struct socket *make_receive_sock(struct net *net)
|
|
|
|
|
|
+static struct socket *make_receive_sock(struct net *net, int id)
|
|
{
|
|
{
|
|
struct netns_ipvs *ipvs = net_ipvs(net);
|
|
struct netns_ipvs *ipvs = net_ipvs(net);
|
|
|
|
+ /* multicast addr */
|
|
|
|
+ struct sockaddr_in mcast_addr = {
|
|
|
|
+ .sin_family = AF_INET,
|
|
|
|
+ .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
|
|
|
|
+ .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
|
|
|
|
+ };
|
|
struct socket *sock;
|
|
struct socket *sock;
|
|
int result;
|
|
int result;
|
|
|
|
|
|
@@ -1369,6 +1496,9 @@ static struct socket *make_receive_sock(struct net *net)
|
|
sk_change_net(sock->sk, net);
|
|
sk_change_net(sock->sk, net);
|
|
/* it is equivalent to the REUSEADDR option in user-space */
|
|
/* it is equivalent to the REUSEADDR option in user-space */
|
|
sock->sk->sk_reuse = SK_CAN_REUSE;
|
|
sock->sk->sk_reuse = SK_CAN_REUSE;
|
|
|
|
+ result = sysctl_sync_sock_size(ipvs);
|
|
|
|
+ if (result > 0)
|
|
|
|
+ set_sock_size(sock->sk, 0, result);
|
|
|
|
|
|
result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
|
|
result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
|
|
sizeof(struct sockaddr));
|
|
sizeof(struct sockaddr));
|
|
@@ -1411,18 +1541,22 @@ ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
|
|
return len;
|
|
return len;
|
|
}
|
|
}
|
|
|
|
|
|
-static void
|
|
|
|
|
|
+static int
|
|
ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
|
|
ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
|
|
{
|
|
{
|
|
int msize;
|
|
int msize;
|
|
|
|
+ int ret;
|
|
|
|
|
|
msize = msg->size;
|
|
msize = msg->size;
|
|
|
|
|
|
/* Put size in network byte order */
|
|
/* Put size in network byte order */
|
|
msg->size = htons(msg->size);
|
|
msg->size = htons(msg->size);
|
|
|
|
|
|
- if (ip_vs_send_async(sock, (char *)msg, msize) != msize)
|
|
|
|
- pr_err("ip_vs_send_async error\n");
|
|
|
|
|
|
+ ret = ip_vs_send_async(sock, (char *)msg, msize);
|
|
|
|
+ if (ret >= 0 || ret == -EAGAIN)
|
|
|
|
+ return ret;
|
|
|
|
+ pr_err("ip_vs_send_async error %d\n", ret);
|
|
|
|
+ return 0;
|
|
}
|
|
}
|
|
|
|
|
|
static int
|
|
static int
|
|
@@ -1438,48 +1572,90 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
|
|
iov.iov_base = buffer;
|
|
iov.iov_base = buffer;
|
|
iov.iov_len = (size_t)buflen;
|
|
iov.iov_len = (size_t)buflen;
|
|
|
|
|
|
- len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, 0);
|
|
|
|
|
|
+ len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT);
|
|
|
|
|
|
if (len < 0)
|
|
if (len < 0)
|
|
- return -1;
|
|
|
|
|
|
+ return len;
|
|
|
|
|
|
LeaveFunction(7);
|
|
LeaveFunction(7);
|
|
return len;
|
|
return len;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/* Wakeup the master thread for sending */
|
|
|
|
+static void master_wakeup_work_handler(struct work_struct *work)
|
|
|
|
+{
|
|
|
|
+ struct ipvs_master_sync_state *ms =
|
|
|
|
+ container_of(work, struct ipvs_master_sync_state,
|
|
|
|
+ master_wakeup_work.work);
|
|
|
|
+ struct netns_ipvs *ipvs = ms->ipvs;
|
|
|
|
+
|
|
|
|
+ spin_lock_bh(&ipvs->sync_lock);
|
|
|
|
+ if (ms->sync_queue_len &&
|
|
|
|
+ ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {
|
|
|
|
+ ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;
|
|
|
|
+ wake_up_process(ms->master_thread);
|
|
|
|
+ }
|
|
|
|
+ spin_unlock_bh(&ipvs->sync_lock);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* Get next buffer to send */
|
|
|
|
+static inline struct ip_vs_sync_buff *
|
|
|
|
+next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
|
|
|
|
+{
|
|
|
|
+ struct ip_vs_sync_buff *sb;
|
|
|
|
+
|
|
|
|
+ sb = sb_dequeue(ipvs, ms);
|
|
|
|
+ if (sb)
|
|
|
|
+ return sb;
|
|
|
|
+ /* Do not delay entries in buffer for more than 2 seconds */
|
|
|
|
+ return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME);
|
|
|
|
+}
|
|
|
|
|
|
static int sync_thread_master(void *data)
|
|
static int sync_thread_master(void *data)
|
|
{
|
|
{
|
|
struct ip_vs_sync_thread_data *tinfo = data;
|
|
struct ip_vs_sync_thread_data *tinfo = data;
|
|
struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
|
|
struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
|
|
|
|
+ struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
|
|
|
|
+ struct sock *sk = tinfo->sock->sk;
|
|
struct ip_vs_sync_buff *sb;
|
|
struct ip_vs_sync_buff *sb;
|
|
|
|
|
|
pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
|
|
pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
|
|
- "syncid = %d\n",
|
|
|
|
- ipvs->master_mcast_ifn, ipvs->master_syncid);
|
|
|
|
|
|
+ "syncid = %d, id = %d\n",
|
|
|
|
+ ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id);
|
|
|
|
|
|
- while (!kthread_should_stop()) {
|
|
|
|
- while ((sb = sb_dequeue(ipvs))) {
|
|
|
|
- ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
|
|
|
|
- ip_vs_sync_buff_release(sb);
|
|
|
|
|
|
+ for (;;) {
|
|
|
|
+ sb = next_sync_buff(ipvs, ms);
|
|
|
|
+ if (unlikely(kthread_should_stop()))
|
|
|
|
+ break;
|
|
|
|
+ if (!sb) {
|
|
|
|
+ schedule_timeout(IPVS_SYNC_CHECK_PERIOD);
|
|
|
|
+ continue;
|
|
}
|
|
}
|
|
-
|
|
|
|
- /* check if entries stay in ipvs->sync_buff for 2 seconds */
|
|
|
|
- sb = get_curr_sync_buff(ipvs, 2 * HZ);
|
|
|
|
- if (sb) {
|
|
|
|
- ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
|
|
|
|
- ip_vs_sync_buff_release(sb);
|
|
|
|
|
|
+ while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
|
|
|
|
+ int ret = 0;
|
|
|
|
+
|
|
|
|
+ __wait_event_interruptible(*sk_sleep(sk),
|
|
|
|
+ sock_writeable(sk) ||
|
|
|
|
+ kthread_should_stop(),
|
|
|
|
+ ret);
|
|
|
|
+ if (unlikely(kthread_should_stop()))
|
|
|
|
+ goto done;
|
|
}
|
|
}
|
|
-
|
|
|
|
- schedule_timeout_interruptible(HZ);
|
|
|
|
|
|
+ ip_vs_sync_buff_release(sb);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+done:
|
|
|
|
+ __set_current_state(TASK_RUNNING);
|
|
|
|
+ if (sb)
|
|
|
|
+ ip_vs_sync_buff_release(sb);
|
|
|
|
+
|
|
/* clean up the sync_buff queue */
|
|
/* clean up the sync_buff queue */
|
|
- while ((sb = sb_dequeue(ipvs)))
|
|
|
|
|
|
+ while ((sb = sb_dequeue(ipvs, ms)))
|
|
ip_vs_sync_buff_release(sb);
|
|
ip_vs_sync_buff_release(sb);
|
|
|
|
+ __set_current_state(TASK_RUNNING);
|
|
|
|
|
|
/* clean up the current sync_buff */
|
|
/* clean up the current sync_buff */
|
|
- sb = get_curr_sync_buff(ipvs, 0);
|
|
|
|
|
|
+ sb = get_curr_sync_buff(ipvs, ms, 0);
|
|
if (sb)
|
|
if (sb)
|
|
ip_vs_sync_buff_release(sb);
|
|
ip_vs_sync_buff_release(sb);
|
|
|
|
|
|
@@ -1498,8 +1674,8 @@ static int sync_thread_backup(void *data)
|
|
int len;
|
|
int len;
|
|
|
|
|
|
pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
|
|
pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
|
|
- "syncid = %d\n",
|
|
|
|
- ipvs->backup_mcast_ifn, ipvs->backup_syncid);
|
|
|
|
|
|
+ "syncid = %d, id = %d\n",
|
|
|
|
+ ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id);
|
|
|
|
|
|
while (!kthread_should_stop()) {
|
|
while (!kthread_should_stop()) {
|
|
wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
|
|
wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
|
|
@@ -1511,7 +1687,8 @@ static int sync_thread_backup(void *data)
|
|
len = ip_vs_receive(tinfo->sock, tinfo->buf,
|
|
len = ip_vs_receive(tinfo->sock, tinfo->buf,
|
|
ipvs->recv_mesg_maxlen);
|
|
ipvs->recv_mesg_maxlen);
|
|
if (len <= 0) {
|
|
if (len <= 0) {
|
|
- pr_err("receiving message error\n");
|
|
|
|
|
|
+ if (len != -EAGAIN)
|
|
|
|
+ pr_err("receiving message error\n");
|
|
break;
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1535,86 +1712,140 @@ static int sync_thread_backup(void *data)
|
|
int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
|
|
int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
|
|
{
|
|
{
|
|
struct ip_vs_sync_thread_data *tinfo;
|
|
struct ip_vs_sync_thread_data *tinfo;
|
|
- struct task_struct **realtask, *task;
|
|
|
|
|
|
+ struct task_struct **array = NULL, *task;
|
|
struct socket *sock;
|
|
struct socket *sock;
|
|
struct netns_ipvs *ipvs = net_ipvs(net);
|
|
struct netns_ipvs *ipvs = net_ipvs(net);
|
|
- char *name, *buf = NULL;
|
|
|
|
|
|
+ char *name;
|
|
int (*threadfn)(void *data);
|
|
int (*threadfn)(void *data);
|
|
|
|
+ int id, count;
|
|
int result = -ENOMEM;
|
|
int result = -ENOMEM;
|
|
|
|
|
|
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
|
|
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
|
|
IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
|
|
IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
|
|
sizeof(struct ip_vs_sync_conn_v0));
|
|
sizeof(struct ip_vs_sync_conn_v0));
|
|
|
|
|
|
|
|
+ if (!ipvs->sync_state) {
|
|
|
|
+ count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);
|
|
|
|
+ ipvs->threads_mask = count - 1;
|
|
|
|
+ } else
|
|
|
|
+ count = ipvs->threads_mask + 1;
|
|
|
|
|
|
if (state == IP_VS_STATE_MASTER) {
|
|
if (state == IP_VS_STATE_MASTER) {
|
|
- if (ipvs->master_thread)
|
|
|
|
|
|
+ if (ipvs->ms)
|
|
return -EEXIST;
|
|
return -EEXIST;
|
|
|
|
|
|
strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
|
|
strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
|
|
sizeof(ipvs->master_mcast_ifn));
|
|
sizeof(ipvs->master_mcast_ifn));
|
|
ipvs->master_syncid = syncid;
|
|
ipvs->master_syncid = syncid;
|
|
- realtask = &ipvs->master_thread;
|
|
|
|
- name = "ipvs_master:%d";
|
|
|
|
|
|
+ name = "ipvs-m:%d:%d";
|
|
threadfn = sync_thread_master;
|
|
threadfn = sync_thread_master;
|
|
- sock = make_send_sock(net);
|
|
|
|
} else if (state == IP_VS_STATE_BACKUP) {
|
|
} else if (state == IP_VS_STATE_BACKUP) {
|
|
- if (ipvs->backup_thread)
|
|
|
|
|
|
+ if (ipvs->backup_threads)
|
|
return -EEXIST;
|
|
return -EEXIST;
|
|
|
|
|
|
strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
|
|
strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
|
|
sizeof(ipvs->backup_mcast_ifn));
|
|
sizeof(ipvs->backup_mcast_ifn));
|
|
ipvs->backup_syncid = syncid;
|
|
ipvs->backup_syncid = syncid;
|
|
- realtask = &ipvs->backup_thread;
|
|
|
|
- name = "ipvs_backup:%d";
|
|
|
|
|
|
+ name = "ipvs-b:%d:%d";
|
|
threadfn = sync_thread_backup;
|
|
threadfn = sync_thread_backup;
|
|
- sock = make_receive_sock(net);
|
|
|
|
} else {
|
|
} else {
|
|
return -EINVAL;
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
|
|
- if (IS_ERR(sock)) {
|
|
|
|
- result = PTR_ERR(sock);
|
|
|
|
- goto out;
|
|
|
|
- }
|
|
|
|
|
|
+ if (state == IP_VS_STATE_MASTER) {
|
|
|
|
+ struct ipvs_master_sync_state *ms;
|
|
|
|
|
|
- set_sync_mesg_maxlen(net, state);
|
|
|
|
- if (state == IP_VS_STATE_BACKUP) {
|
|
|
|
- buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL);
|
|
|
|
- if (!buf)
|
|
|
|
- goto outsocket;
|
|
|
|
|
|
+ ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL);
|
|
|
|
+ if (!ipvs->ms)
|
|
|
|
+ goto out;
|
|
|
|
+ ms = ipvs->ms;
|
|
|
|
+ for (id = 0; id < count; id++, ms++) {
|
|
|
|
+ INIT_LIST_HEAD(&ms->sync_queue);
|
|
|
|
+ ms->sync_queue_len = 0;
|
|
|
|
+ ms->sync_queue_delay = 0;
|
|
|
|
+ INIT_DELAYED_WORK(&ms->master_wakeup_work,
|
|
|
|
+ master_wakeup_work_handler);
|
|
|
|
+ ms->ipvs = ipvs;
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ array = kzalloc(count * sizeof(struct task_struct *),
|
|
|
|
+ GFP_KERNEL);
|
|
|
|
+ if (!array)
|
|
|
|
+ goto out;
|
|
}
|
|
}
|
|
|
|
+ set_sync_mesg_maxlen(net, state);
|
|
|
|
|
|
- tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
|
|
|
|
- if (!tinfo)
|
|
|
|
- goto outbuf;
|
|
|
|
-
|
|
|
|
- tinfo->net = net;
|
|
|
|
- tinfo->sock = sock;
|
|
|
|
- tinfo->buf = buf;
|
|
|
|
|
|
+ tinfo = NULL;
|
|
|
|
+ for (id = 0; id < count; id++) {
|
|
|
|
+ if (state == IP_VS_STATE_MASTER)
|
|
|
|
+ sock = make_send_sock(net, id);
|
|
|
|
+ else
|
|
|
|
+ sock = make_receive_sock(net, id);
|
|
|
|
+ if (IS_ERR(sock)) {
|
|
|
|
+ result = PTR_ERR(sock);
|
|
|
|
+ goto outtinfo;
|
|
|
|
+ }
|
|
|
|
+ tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
|
|
|
|
+ if (!tinfo)
|
|
|
|
+ goto outsocket;
|
|
|
|
+ tinfo->net = net;
|
|
|
|
+ tinfo->sock = sock;
|
|
|
|
+ if (state == IP_VS_STATE_BACKUP) {
|
|
|
|
+ tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen,
|
|
|
|
+ GFP_KERNEL);
|
|
|
|
+ if (!tinfo->buf)
|
|
|
|
+ goto outtinfo;
|
|
|
|
+ }
|
|
|
|
+ tinfo->id = id;
|
|
|
|
|
|
- task = kthread_run(threadfn, tinfo, name, ipvs->gen);
|
|
|
|
- if (IS_ERR(task)) {
|
|
|
|
- result = PTR_ERR(task);
|
|
|
|
- goto outtinfo;
|
|
|
|
|
|
+ task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);
|
|
|
|
+ if (IS_ERR(task)) {
|
|
|
|
+ result = PTR_ERR(task);
|
|
|
|
+ goto outtinfo;
|
|
|
|
+ }
|
|
|
|
+ tinfo = NULL;
|
|
|
|
+ if (state == IP_VS_STATE_MASTER)
|
|
|
|
+ ipvs->ms[id].master_thread = task;
|
|
|
|
+ else
|
|
|
|
+ array[id] = task;
|
|
}
|
|
}
|
|
|
|
|
|
/* mark as active */
|
|
/* mark as active */
|
|
- *realtask = task;
|
|
|
|
|
|
+
|
|
|
|
+ if (state == IP_VS_STATE_BACKUP)
|
|
|
|
+ ipvs->backup_threads = array;
|
|
|
|
+ spin_lock_bh(&ipvs->sync_buff_lock);
|
|
ipvs->sync_state |= state;
|
|
ipvs->sync_state |= state;
|
|
|
|
+ spin_unlock_bh(&ipvs->sync_buff_lock);
|
|
|
|
|
|
/* increase the module use count */
|
|
/* increase the module use count */
|
|
ip_vs_use_count_inc();
|
|
ip_vs_use_count_inc();
|
|
|
|
|
|
return 0;
|
|
return 0;
|
|
|
|
|
|
-outtinfo:
|
|
|
|
- kfree(tinfo);
|
|
|
|
-outbuf:
|
|
|
|
- kfree(buf);
|
|
|
|
outsocket:
|
|
outsocket:
|
|
sk_release_kernel(sock->sk);
|
|
sk_release_kernel(sock->sk);
|
|
|
|
+
|
|
|
|
+outtinfo:
|
|
|
|
+ if (tinfo) {
|
|
|
|
+ sk_release_kernel(tinfo->sock->sk);
|
|
|
|
+ kfree(tinfo->buf);
|
|
|
|
+ kfree(tinfo);
|
|
|
|
+ }
|
|
|
|
+ count = id;
|
|
|
|
+ while (count-- > 0) {
|
|
|
|
+ if (state == IP_VS_STATE_MASTER)
|
|
|
|
+ kthread_stop(ipvs->ms[count].master_thread);
|
|
|
|
+ else
|
|
|
|
+ kthread_stop(array[count]);
|
|
|
|
+ }
|
|
|
|
+ kfree(array);
|
|
|
|
+
|
|
out:
|
|
out:
|
|
|
|
+ if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
|
|
|
|
+ kfree(ipvs->ms);
|
|
|
|
+ ipvs->ms = NULL;
|
|
|
|
+ }
|
|
return result;
|
|
return result;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1622,38 +1853,60 @@ out:
|
|
int stop_sync_thread(struct net *net, int state)
|
|
int stop_sync_thread(struct net *net, int state)
|
|
{
|
|
{
|
|
struct netns_ipvs *ipvs = net_ipvs(net);
|
|
struct netns_ipvs *ipvs = net_ipvs(net);
|
|
|
|
+ struct task_struct **array;
|
|
|
|
+ int id;
|
|
int retc = -EINVAL;
|
|
int retc = -EINVAL;
|
|
|
|
|
|
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
|
|
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
|
|
|
|
|
|
if (state == IP_VS_STATE_MASTER) {
|
|
if (state == IP_VS_STATE_MASTER) {
|
|
- if (!ipvs->master_thread)
|
|
|
|
|
|
+ if (!ipvs->ms)
|
|
return -ESRCH;
|
|
return -ESRCH;
|
|
|
|
|
|
- pr_info("stopping master sync thread %d ...\n",
|
|
|
|
- task_pid_nr(ipvs->master_thread));
|
|
|
|
-
|
|
|
|
/*
|
|
/*
|
|
* The lock synchronizes with sb_queue_tail(), so that we don't
|
|
* The lock synchronizes with sb_queue_tail(), so that we don't
|
|
* add sync buffers to the queue, when we are already in
|
|
* add sync buffers to the queue, when we are already in
|
|
* progress of stopping the master sync daemon.
|
|
* progress of stopping the master sync daemon.
|
|
*/
|
|
*/
|
|
|
|
|
|
- spin_lock_bh(&ipvs->sync_lock);
|
|
|
|
|
|
+ spin_lock_bh(&ipvs->sync_buff_lock);
|
|
|
|
+ spin_lock(&ipvs->sync_lock);
|
|
ipvs->sync_state &= ~IP_VS_STATE_MASTER;
|
|
ipvs->sync_state &= ~IP_VS_STATE_MASTER;
|
|
- spin_unlock_bh(&ipvs->sync_lock);
|
|
|
|
- retc = kthread_stop(ipvs->master_thread);
|
|
|
|
- ipvs->master_thread = NULL;
|
|
|
|
|
|
+ spin_unlock(&ipvs->sync_lock);
|
|
|
|
+ spin_unlock_bh(&ipvs->sync_buff_lock);
|
|
|
|
+
|
|
|
|
+ retc = 0;
|
|
|
|
+ for (id = ipvs->threads_mask; id >= 0; id--) {
|
|
|
|
+ struct ipvs_master_sync_state *ms = &ipvs->ms[id];
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ pr_info("stopping master sync thread %d ...\n",
|
|
|
|
+ task_pid_nr(ms->master_thread));
|
|
|
|
+ cancel_delayed_work_sync(&ms->master_wakeup_work);
|
|
|
|
+ ret = kthread_stop(ms->master_thread);
|
|
|
|
+ if (retc >= 0)
|
|
|
|
+ retc = ret;
|
|
|
|
+ }
|
|
|
|
+ kfree(ipvs->ms);
|
|
|
|
+ ipvs->ms = NULL;
|
|
} else if (state == IP_VS_STATE_BACKUP) {
|
|
} else if (state == IP_VS_STATE_BACKUP) {
|
|
- if (!ipvs->backup_thread)
|
|
|
|
|
|
+ if (!ipvs->backup_threads)
|
|
return -ESRCH;
|
|
return -ESRCH;
|
|
|
|
|
|
- pr_info("stopping backup sync thread %d ...\n",
|
|
|
|
- task_pid_nr(ipvs->backup_thread));
|
|
|
|
-
|
|
|
|
ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
|
|
ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
|
|
- retc = kthread_stop(ipvs->backup_thread);
|
|
|
|
- ipvs->backup_thread = NULL;
|
|
|
|
|
|
+ array = ipvs->backup_threads;
|
|
|
|
+ retc = 0;
|
|
|
|
+ for (id = ipvs->threads_mask; id >= 0; id--) {
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ pr_info("stopping backup sync thread %d ...\n",
|
|
|
|
+ task_pid_nr(array[id]));
|
|
|
|
+ ret = kthread_stop(array[id]);
|
|
|
|
+ if (retc >= 0)
|
|
|
|
+ retc = ret;
|
|
|
|
+ }
|
|
|
|
+ kfree(array);
|
|
|
|
+ ipvs->backup_threads = NULL;
|
|
}
|
|
}
|
|
|
|
|
|
/* decrease the module use count */
|
|
/* decrease the module use count */
|
|
@@ -1670,13 +1923,8 @@ int __net_init ip_vs_sync_net_init(struct net *net)
|
|
struct netns_ipvs *ipvs = net_ipvs(net);
|
|
struct netns_ipvs *ipvs = net_ipvs(net);
|
|
|
|
|
|
__mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
|
|
__mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
|
|
- INIT_LIST_HEAD(&ipvs->sync_queue);
|
|
|
|
spin_lock_init(&ipvs->sync_lock);
|
|
spin_lock_init(&ipvs->sync_lock);
|
|
spin_lock_init(&ipvs->sync_buff_lock);
|
|
spin_lock_init(&ipvs->sync_buff_lock);
|
|
-
|
|
|
|
- ipvs->sync_mcast_addr.sin_family = AF_INET;
|
|
|
|
- ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
|
|
|
|
- ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
|
|
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|