|
|
@@ -48,6 +48,7 @@
|
|
|
#ifdef CONFIG_IP_VS_IPV6
|
|
|
#include <net/ipv6.h>
|
|
|
#include <linux/netfilter_ipv6.h>
|
|
|
+#include <net/ip6_route.h>
|
|
|
#endif
|
|
|
|
|
|
#include <net/ip_vs.h>
|
|
|
@@ -176,6 +177,18 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction,
|
|
|
return pp->state_transition(cp, direction, skb, pp);
|
|
|
}
|
|
|
|
|
|
+static inline void
|
|
|
+ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
|
|
|
+ struct sk_buff *skb, int protocol,
|
|
|
+ const union nf_inet_addr *caddr, __be16 cport,
|
|
|
+ const union nf_inet_addr *vaddr, __be16 vport,
|
|
|
+ struct ip_vs_conn_param *p)
|
|
|
+{
|
|
|
+ ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p);
|
|
|
+ p->pe = svc->pe;
|
|
|
+ if (p->pe && p->pe->fill_param)
|
|
|
+ p->pe->fill_param(p, skb);
|
|
|
+}
|
|
|
|
|
|
/*
|
|
|
* IPVS persistent scheduling function
|
|
|
@@ -186,15 +199,16 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction,
|
|
|
*/
|
|
|
static struct ip_vs_conn *
|
|
|
ip_vs_sched_persist(struct ip_vs_service *svc,
|
|
|
- const struct sk_buff *skb,
|
|
|
+ struct sk_buff *skb,
|
|
|
__be16 ports[2])
|
|
|
{
|
|
|
struct ip_vs_conn *cp = NULL;
|
|
|
struct ip_vs_iphdr iph;
|
|
|
struct ip_vs_dest *dest;
|
|
|
struct ip_vs_conn *ct;
|
|
|
- __be16 dport; /* destination port to forward */
|
|
|
- __be16 flags;
|
|
|
+ __be16 dport = 0; /* destination port to forward */
|
|
|
+ unsigned int flags;
|
|
|
+ struct ip_vs_conn_param param;
|
|
|
union nf_inet_addr snet; /* source network of the client,
|
|
|
after masking */
|
|
|
|
|
|
@@ -227,120 +241,75 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
|
|
|
* service, and a template like <caddr, 0, vaddr, vport, daddr, dport>
|
|
|
* is created for other persistent services.
|
|
|
*/
|
|
|
- if (ports[1] == svc->port) {
|
|
|
- /* Check if a template already exists */
|
|
|
- if (svc->port != FTPPORT)
|
|
|
- ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
|
|
|
- &iph.daddr, ports[1]);
|
|
|
- else
|
|
|
- ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
|
|
|
- &iph.daddr, 0);
|
|
|
-
|
|
|
- if (!ct || !ip_vs_check_template(ct)) {
|
|
|
- /*
|
|
|
- * No template found or the dest of the connection
|
|
|
- * template is not available.
|
|
|
- */
|
|
|
- dest = svc->scheduler->schedule(svc, skb);
|
|
|
- if (dest == NULL) {
|
|
|
- IP_VS_DBG(1, "p-schedule: no dest found.\n");
|
|
|
- return NULL;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * Create a template like <protocol,caddr,0,
|
|
|
- * vaddr,vport,daddr,dport> for non-ftp service,
|
|
|
- * and <protocol,caddr,0,vaddr,0,daddr,0>
|
|
|
- * for ftp service.
|
|
|
+ {
|
|
|
+ int protocol = iph.protocol;
|
|
|
+ const union nf_inet_addr *vaddr = &iph.daddr;
|
|
|
+ const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
|
|
|
+ __be16 vport = 0;
|
|
|
+
|
|
|
+ if (ports[1] == svc->port) {
|
|
|
+ /* non-FTP template:
|
|
|
+ * <protocol, caddr, 0, vaddr, vport, daddr, dport>
|
|
|
+ * FTP template:
|
|
|
+ * <protocol, caddr, 0, vaddr, 0, daddr, 0>
|
|
|
*/
|
|
|
if (svc->port != FTPPORT)
|
|
|
- ct = ip_vs_conn_new(svc->af, iph.protocol,
|
|
|
- &snet, 0,
|
|
|
- &iph.daddr,
|
|
|
- ports[1],
|
|
|
- &dest->addr, dest->port,
|
|
|
- IP_VS_CONN_F_TEMPLATE,
|
|
|
- dest);
|
|
|
- else
|
|
|
- ct = ip_vs_conn_new(svc->af, iph.protocol,
|
|
|
- &snet, 0,
|
|
|
- &iph.daddr, 0,
|
|
|
- &dest->addr, 0,
|
|
|
- IP_VS_CONN_F_TEMPLATE,
|
|
|
- dest);
|
|
|
- if (ct == NULL)
|
|
|
- return NULL;
|
|
|
-
|
|
|
- ct->timeout = svc->timeout;
|
|
|
+ vport = ports[1];
|
|
|
} else {
|
|
|
- /* set destination with the found template */
|
|
|
- dest = ct->dest;
|
|
|
- }
|
|
|
- dport = dest->port;
|
|
|
- } else {
|
|
|
- /*
|
|
|
- * Note: persistent fwmark-based services and persistent
|
|
|
- * port zero service are handled here.
|
|
|
- * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
|
|
|
- * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
|
|
|
- */
|
|
|
- if (svc->fwmark) {
|
|
|
- union nf_inet_addr fwmark = {
|
|
|
- .ip = htonl(svc->fwmark)
|
|
|
- };
|
|
|
-
|
|
|
- ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0,
|
|
|
- &fwmark, 0);
|
|
|
- } else
|
|
|
- ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
|
|
|
- &iph.daddr, 0);
|
|
|
-
|
|
|
- if (!ct || !ip_vs_check_template(ct)) {
|
|
|
- /*
|
|
|
- * If it is not persistent port zero, return NULL,
|
|
|
- * otherwise create a connection template.
|
|
|
+ /* Note: persistent fwmark-based services and
|
|
|
+ * persistent port zero service are handled here.
|
|
|
+ * fwmark template:
|
|
|
+ * <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
|
|
|
+ * port zero template:
|
|
|
+ * <protocol,caddr,0,vaddr,0,daddr,0>
|
|
|
*/
|
|
|
- if (svc->port)
|
|
|
- return NULL;
|
|
|
-
|
|
|
- dest = svc->scheduler->schedule(svc, skb);
|
|
|
- if (dest == NULL) {
|
|
|
- IP_VS_DBG(1, "p-schedule: no dest found.\n");
|
|
|
- return NULL;
|
|
|
+ if (svc->fwmark) {
|
|
|
+ protocol = IPPROTO_IP;
|
|
|
+ vaddr = &fwmark;
|
|
|
}
|
|
|
+ }
|
|
|
+ ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
|
|
|
+ vaddr, vport, ¶m);
|
|
|
+ }
|
|
|
|
|
|
- /*
|
|
|
- * Create a template according to the service
|
|
|
- */
|
|
|
- if (svc->fwmark) {
|
|
|
- union nf_inet_addr fwmark = {
|
|
|
- .ip = htonl(svc->fwmark)
|
|
|
- };
|
|
|
-
|
|
|
- ct = ip_vs_conn_new(svc->af, IPPROTO_IP,
|
|
|
- &snet, 0,
|
|
|
- &fwmark, 0,
|
|
|
- &dest->addr, 0,
|
|
|
- IP_VS_CONN_F_TEMPLATE,
|
|
|
- dest);
|
|
|
- } else
|
|
|
- ct = ip_vs_conn_new(svc->af, iph.protocol,
|
|
|
- &snet, 0,
|
|
|
- &iph.daddr, 0,
|
|
|
- &dest->addr, 0,
|
|
|
- IP_VS_CONN_F_TEMPLATE,
|
|
|
- dest);
|
|
|
- if (ct == NULL)
|
|
|
- return NULL;
|
|
|
-
|
|
|
- ct->timeout = svc->timeout;
|
|
|
- } else {
|
|
|
- /* set destination with the found template */
|
|
|
- dest = ct->dest;
|
|
|
+ /* Check if a template already exists */
|
|
|
+ ct = ip_vs_ct_in_get(¶m);
|
|
|
+ if (!ct || !ip_vs_check_template(ct)) {
|
|
|
+ /* No template found or the dest of the connection
|
|
|
+ * template is not available.
|
|
|
+ */
|
|
|
+ dest = svc->scheduler->schedule(svc, skb);
|
|
|
+ if (!dest) {
|
|
|
+ IP_VS_DBG(1, "p-schedule: no dest found.\n");
|
|
|
+ kfree(param.pe_data);
|
|
|
+ return NULL;
|
|
|
}
|
|
|
- dport = ports[1];
|
|
|
+
|
|
|
+ if (ports[1] == svc->port && svc->port != FTPPORT)
|
|
|
+ dport = dest->port;
|
|
|
+
|
|
|
+ /* Create a template
|
|
|
+ * This adds param.pe_data to the template,
|
|
|
+ * and thus param.pe_data will be destroyed
|
|
|
+ * when the template expires */
|
|
|
+ ct = ip_vs_conn_new(¶m, &dest->addr, dport,
|
|
|
+ IP_VS_CONN_F_TEMPLATE, dest);
|
|
|
+ if (ct == NULL) {
|
|
|
+ kfree(param.pe_data);
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
+
|
|
|
+ ct->timeout = svc->timeout;
|
|
|
+ } else {
|
|
|
+ /* set destination with the found template */
|
|
|
+ dest = ct->dest;
|
|
|
+ kfree(param.pe_data);
|
|
|
}
|
|
|
|
|
|
+ dport = ports[1];
|
|
|
+ if (dport == svc->port && dest->port)
|
|
|
+ dport = dest->port;
|
|
|
+
|
|
|
flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
|
|
|
&& iph.protocol == IPPROTO_UDP)?
|
|
|
IP_VS_CONN_F_ONE_PACKET : 0;
|
|
|
@@ -348,12 +317,9 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
|
|
|
/*
|
|
|
* Create a new connection according to the template
|
|
|
*/
|
|
|
- cp = ip_vs_conn_new(svc->af, iph.protocol,
|
|
|
- &iph.saddr, ports[0],
|
|
|
- &iph.daddr, ports[1],
|
|
|
- &dest->addr, dport,
|
|
|
- flags,
|
|
|
- dest);
|
|
|
+ ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0],
|
|
|
+ &iph.daddr, ports[1], ¶m);
|
|
|
+ cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest);
|
|
|
if (cp == NULL) {
|
|
|
ip_vs_conn_put(ct);
|
|
|
return NULL;
|
|
|
@@ -377,23 +343,53 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
|
|
|
* Protocols supported: TCP, UDP
|
|
|
*/
|
|
|
struct ip_vs_conn *
|
|
|
-ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
|
|
|
+ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
|
|
|
+ struct ip_vs_protocol *pp, int *ignored)
|
|
|
{
|
|
|
struct ip_vs_conn *cp = NULL;
|
|
|
struct ip_vs_iphdr iph;
|
|
|
struct ip_vs_dest *dest;
|
|
|
- __be16 _ports[2], *pptr, flags;
|
|
|
+ __be16 _ports[2], *pptr;
|
|
|
+ unsigned int flags;
|
|
|
|
|
|
+ *ignored = 1;
|
|
|
ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
|
|
|
pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
|
|
|
if (pptr == NULL)
|
|
|
return NULL;
|
|
|
|
|
|
+ /*
|
|
|
+ * FTPDATA needs this check when using local real server.
|
|
|
+ * Never schedule Active FTPDATA connections from real server.
|
|
|
+ * For LVS-NAT they must be already created. For other methods
|
|
|
+ * with persistence the connection is created on SYN+ACK.
|
|
|
+ */
|
|
|
+ if (pptr[0] == FTPDATA) {
|
|
|
+ IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
|
|
|
+ "Not scheduling FTPDATA");
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Do not schedule replies from local real server. It is risky
|
|
|
+ * for fwmark services but mostly for persistent services.
|
|
|
+ */
|
|
|
+ if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
|
|
|
+ (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) &&
|
|
|
+ (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
|
|
|
+ IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
|
|
|
+ "Not scheduling reply for existing connection");
|
|
|
+ __ip_vs_conn_put(cp);
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
+
|
|
|
/*
|
|
|
* Persistent service
|
|
|
*/
|
|
|
- if (svc->flags & IP_VS_SVC_F_PERSISTENT)
|
|
|
+ if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
|
|
|
+ *ignored = 0;
|
|
|
return ip_vs_sched_persist(svc, skb, pptr);
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
|
* Non-persistent service
|
|
|
@@ -406,6 +402,8 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
+ *ignored = 0;
|
|
|
+
|
|
|
dest = svc->scheduler->schedule(svc, skb);
|
|
|
if (dest == NULL) {
|
|
|
IP_VS_DBG(1, "Schedule: no dest found.\n");
|
|
|
@@ -419,14 +417,16 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
|
|
|
/*
|
|
|
* Create a connection entry.
|
|
|
*/
|
|
|
- cp = ip_vs_conn_new(svc->af, iph.protocol,
|
|
|
- &iph.saddr, pptr[0],
|
|
|
- &iph.daddr, pptr[1],
|
|
|
- &dest->addr, dest->port ? dest->port : pptr[1],
|
|
|
- flags,
|
|
|
- dest);
|
|
|
- if (cp == NULL)
|
|
|
- return NULL;
|
|
|
+ {
|
|
|
+ struct ip_vs_conn_param p;
|
|
|
+ ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr,
|
|
|
+ pptr[0], &iph.daddr, pptr[1], &p);
|
|
|
+ cp = ip_vs_conn_new(&p, &dest->addr,
|
|
|
+ dest->port ? dest->port : pptr[1],
|
|
|
+ flags, dest);
|
|
|
+ if (!cp)
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
|
|
|
IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
|
|
|
"d:%s:%u conn->flags:%X conn->refcnt:%d\n",
|
|
|
@@ -473,23 +473,26 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
|
|
|
if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
|
|
|
int ret, cs;
|
|
|
struct ip_vs_conn *cp;
|
|
|
- __u16 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
|
|
|
- iph.protocol == IPPROTO_UDP)?
|
|
|
- IP_VS_CONN_F_ONE_PACKET : 0;
|
|
|
+ unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
|
|
|
+ iph.protocol == IPPROTO_UDP)?
|
|
|
+ IP_VS_CONN_F_ONE_PACKET : 0;
|
|
|
union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
|
|
|
|
|
|
ip_vs_service_put(svc);
|
|
|
|
|
|
/* create a new connection entry */
|
|
|
IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
|
|
|
- cp = ip_vs_conn_new(svc->af, iph.protocol,
|
|
|
- &iph.saddr, pptr[0],
|
|
|
- &iph.daddr, pptr[1],
|
|
|
- &daddr, 0,
|
|
|
- IP_VS_CONN_F_BYPASS | flags,
|
|
|
- NULL);
|
|
|
- if (cp == NULL)
|
|
|
- return NF_DROP;
|
|
|
+ {
|
|
|
+ struct ip_vs_conn_param p;
|
|
|
+ ip_vs_conn_fill_param(svc->af, iph.protocol,
|
|
|
+ &iph.saddr, pptr[0],
|
|
|
+ &iph.daddr, pptr[1], &p);
|
|
|
+ cp = ip_vs_conn_new(&p, &daddr, 0,
|
|
|
+ IP_VS_CONN_F_BYPASS | flags,
|
|
|
+ NULL);
|
|
|
+ if (!cp)
|
|
|
+ return NF_DROP;
|
|
|
+ }
|
|
|
|
|
|
/* statistics */
|
|
|
ip_vs_in_stats(cp, skb);
|
|
|
@@ -527,9 +530,14 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
|
|
|
* ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ
|
|
|
*/
|
|
|
#ifdef CONFIG_IP_VS_IPV6
|
|
|
- if (svc->af == AF_INET6)
|
|
|
+ if (svc->af == AF_INET6) {
|
|
|
+ if (!skb->dev) {
|
|
|
+ struct net *net = dev_net(skb_dst(skb)->dev);
|
|
|
+
|
|
|
+ skb->dev = net->loopback_dev;
|
|
|
+ }
|
|
|
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
|
|
|
- else
|
|
|
+ } else
|
|
|
#endif
|
|
|
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
|
|
|
|
|
|
@@ -541,6 +549,15 @@ __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
|
|
|
return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
|
|
|
}
|
|
|
|
|
|
+static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
|
|
|
+{
|
|
|
+ if (NF_INET_LOCAL_IN == hooknum)
|
|
|
+ return IP_DEFRAG_VS_IN;
|
|
|
+ if (NF_INET_FORWARD == hooknum)
|
|
|
+ return IP_DEFRAG_VS_FWD;
|
|
|
+ return IP_DEFRAG_VS_OUT;
|
|
|
+}
|
|
|
+
|
|
|
static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
|
|
|
{
|
|
|
int err = ip_defrag(skb, user);
|
|
|
@@ -601,10 +618,10 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
|
|
|
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
|
|
|
|
|
if (inout)
|
|
|
- IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
|
|
|
+ IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph,
|
|
|
"Forwarding altered outgoing ICMP");
|
|
|
else
|
|
|
- IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
|
|
|
+ IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph,
|
|
|
"Forwarding altered incoming ICMP");
|
|
|
}
|
|
|
|
|
|
@@ -646,11 +663,13 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
|
|
|
skb->ip_summed = CHECKSUM_PARTIAL;
|
|
|
|
|
|
if (inout)
|
|
|
- IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
|
|
|
- "Forwarding altered outgoing ICMPv6");
|
|
|
+ IP_VS_DBG_PKT(11, AF_INET6, pp, skb,
|
|
|
+ (void *)ciph - (void *)iph,
|
|
|
+ "Forwarding altered outgoing ICMPv6");
|
|
|
else
|
|
|
- IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
|
|
|
- "Forwarding altered incoming ICMPv6");
|
|
|
+ IP_VS_DBG_PKT(11, AF_INET6, pp, skb,
|
|
|
+ (void *)ciph - (void *)iph,
|
|
|
+ "Forwarding altered incoming ICMPv6");
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
@@ -691,10 +710,25 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
|
|
|
#endif
|
|
|
ip_vs_nat_icmp(skb, pp, cp, 1);
|
|
|
|
|
|
+#ifdef CONFIG_IP_VS_IPV6
|
|
|
+ if (af == AF_INET6) {
|
|
|
+ if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
|
|
|
+ goto out;
|
|
|
+ } else
|
|
|
+#endif
|
|
|
+ if ((sysctl_ip_vs_snat_reroute ||
|
|
|
+ skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
|
|
|
+ ip_route_me_harder(skb, RTN_LOCAL) != 0)
|
|
|
+ goto out;
|
|
|
+
|
|
|
/* do the statistics and put it back */
|
|
|
ip_vs_out_stats(cp, skb);
|
|
|
|
|
|
skb->ipvs_property = 1;
|
|
|
+ if (!(cp->flags & IP_VS_CONN_F_NFCT))
|
|
|
+ ip_vs_notrack(skb);
|
|
|
+ else
|
|
|
+ ip_vs_update_conntrack(skb, cp, 0);
|
|
|
verdict = NF_ACCEPT;
|
|
|
|
|
|
out:
|
|
|
@@ -708,7 +742,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
|
|
|
* Find any that might be relevant, check against existing connections.
|
|
|
* Currently handles error types - unreachable, quench, ttl exceeded.
|
|
|
*/
|
|
|
-static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
|
|
|
+static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
|
|
|
+ unsigned int hooknum)
|
|
|
{
|
|
|
struct iphdr *iph;
|
|
|
struct icmphdr _icmph, *ic;
|
|
|
@@ -723,7 +758,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
|
|
|
|
|
|
/* reassemble IP fragments */
|
|
|
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
|
|
|
- if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
|
|
|
+ if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
|
|
|
return NF_STOLEN;
|
|
|
}
|
|
|
|
|
|
@@ -766,7 +801,8 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
|
|
|
pp->dont_defrag))
|
|
|
return NF_ACCEPT;
|
|
|
|
|
|
- IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for");
|
|
|
+ IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
|
|
|
+ "Checking outgoing ICMP for");
|
|
|
|
|
|
offset += cih->ihl * 4;
|
|
|
|
|
|
@@ -782,7 +818,8 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
|
|
|
}
|
|
|
|
|
|
#ifdef CONFIG_IP_VS_IPV6
|
|
|
-static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
|
|
|
+static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
|
|
|
+ unsigned int hooknum)
|
|
|
{
|
|
|
struct ipv6hdr *iph;
|
|
|
struct icmp6hdr _icmph, *ic;
|
|
|
@@ -798,7 +835,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
|
|
|
|
|
|
/* reassemble IP fragments */
|
|
|
if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
|
|
|
- if (ip_vs_gather_frags_v6(skb, IP_DEFRAG_VS_OUT))
|
|
|
+ if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
|
|
|
return NF_STOLEN;
|
|
|
}
|
|
|
|
|
|
@@ -841,7 +878,8 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
|
|
|
if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
|
|
|
return NF_ACCEPT;
|
|
|
|
|
|
- IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for");
|
|
|
+ IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
|
|
|
+ "Checking outgoing ICMPv6 for");
|
|
|
|
|
|
offset += sizeof(struct ipv6hdr);
|
|
|
|
|
|
@@ -889,7 +927,7 @@ static unsigned int
|
|
|
handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
|
|
|
struct ip_vs_conn *cp, int ihl)
|
|
|
{
|
|
|
- IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
|
|
|
+ IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
|
|
|
|
|
|
if (!skb_make_writable(skb, ihl))
|
|
|
goto drop;
|
|
|
@@ -908,6 +946,15 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
|
|
|
ip_send_check(ip_hdr(skb));
|
|
|
}
|
|
|
|
|
|
+ /*
|
|
|
+ * nf_iterate does not expect change in the skb->dst->dev.
|
|
|
+ * It looks like it is not fatal to enable this code for hooks
|
|
|
+ * where our handlers are at the end of the chain list and
|
|
|
+ * when all next handlers use skb->dst->dev and not outdev.
|
|
|
+ * It will definitely route properly the inout NAT traffic
|
|
|
+ * when multiple paths are used.
|
|
|
+ */
|
|
|
+
|
|
|
/* For policy routing, packets originating from this
|
|
|
* machine itself may be routed differently to packets
|
|
|
* passing through. We want this packet to be routed as
|
|
|
@@ -916,21 +963,25 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
|
|
|
*/
|
|
|
#ifdef CONFIG_IP_VS_IPV6
|
|
|
if (af == AF_INET6) {
|
|
|
- if (ip6_route_me_harder(skb) != 0)
|
|
|
+ if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
|
|
|
goto drop;
|
|
|
} else
|
|
|
#endif
|
|
|
- if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
|
|
|
+ if ((sysctl_ip_vs_snat_reroute ||
|
|
|
+ skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
|
|
|
+ ip_route_me_harder(skb, RTN_LOCAL) != 0)
|
|
|
goto drop;
|
|
|
|
|
|
- IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
|
|
|
+ IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
|
|
|
|
|
|
ip_vs_out_stats(cp, skb);
|
|
|
ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
|
|
|
- ip_vs_update_conntrack(skb, cp, 0);
|
|
|
- ip_vs_conn_put(cp);
|
|
|
-
|
|
|
skb->ipvs_property = 1;
|
|
|
+ if (!(cp->flags & IP_VS_CONN_F_NFCT))
|
|
|
+ ip_vs_notrack(skb);
|
|
|
+ else
|
|
|
+ ip_vs_update_conntrack(skb, cp, 0);
|
|
|
+ ip_vs_conn_put(cp);
|
|
|
|
|
|
LeaveFunction(11);
|
|
|
return NF_ACCEPT;
|
|
|
@@ -938,35 +989,46 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
|
|
|
drop:
|
|
|
ip_vs_conn_put(cp);
|
|
|
kfree_skb(skb);
|
|
|
+ LeaveFunction(11);
|
|
|
return NF_STOLEN;
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
|
|
|
* Check if outgoing packet belongs to the established ip_vs_conn.
|
|
|
*/
|
|
|
static unsigned int
|
|
|
-ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
|
|
|
- const struct net_device *in, const struct net_device *out,
|
|
|
- int (*okfn)(struct sk_buff *))
|
|
|
+ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
|
|
|
{
|
|
|
struct ip_vs_iphdr iph;
|
|
|
struct ip_vs_protocol *pp;
|
|
|
struct ip_vs_conn *cp;
|
|
|
- int af;
|
|
|
|
|
|
EnterFunction(11);
|
|
|
|
|
|
- af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
|
|
|
-
|
|
|
+ /* Already marked as IPVS request or reply? */
|
|
|
if (skb->ipvs_property)
|
|
|
return NF_ACCEPT;
|
|
|
|
|
|
+ /* Bad... Do not break raw sockets */
|
|
|
+ if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
|
|
|
+ af == AF_INET)) {
|
|
|
+ struct sock *sk = skb->sk;
|
|
|
+ struct inet_sock *inet = inet_sk(skb->sk);
|
|
|
+
|
|
|
+ if (inet && sk->sk_family == PF_INET && inet->nodefrag)
|
|
|
+ return NF_ACCEPT;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (unlikely(!skb_dst(skb)))
|
|
|
+ return NF_ACCEPT;
|
|
|
+
|
|
|
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
|
|
|
#ifdef CONFIG_IP_VS_IPV6
|
|
|
if (af == AF_INET6) {
|
|
|
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
|
|
|
- int related, verdict = ip_vs_out_icmp_v6(skb, &related);
|
|
|
+ int related;
|
|
|
+ int verdict = ip_vs_out_icmp_v6(skb, &related,
|
|
|
+ hooknum);
|
|
|
|
|
|
if (related)
|
|
|
return verdict;
|
|
|
@@ -975,7 +1037,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
|
|
|
} else
|
|
|
#endif
|
|
|
if (unlikely(iph.protocol == IPPROTO_ICMP)) {
|
|
|
- int related, verdict = ip_vs_out_icmp(skb, &related);
|
|
|
+ int related;
|
|
|
+ int verdict = ip_vs_out_icmp(skb, &related, hooknum);
|
|
|
|
|
|
if (related)
|
|
|
return verdict;
|
|
|
@@ -989,19 +1052,19 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
|
|
|
/* reassemble IP fragments */
|
|
|
#ifdef CONFIG_IP_VS_IPV6
|
|
|
if (af == AF_INET6) {
|
|
|
- if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
|
|
|
- int related, verdict = ip_vs_out_icmp_v6(skb, &related);
|
|
|
-
|
|
|
- if (related)
|
|
|
- return verdict;
|
|
|
-
|
|
|
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
|
|
|
+ if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
|
|
|
+ if (ip_vs_gather_frags_v6(skb,
|
|
|
+ ip_vs_defrag_user(hooknum)))
|
|
|
+ return NF_STOLEN;
|
|
|
}
|
|
|
+
|
|
|
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
|
|
|
} else
|
|
|
#endif
|
|
|
if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) &&
|
|
|
!pp->dont_defrag)) {
|
|
|
- if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
|
|
|
+ if (ip_vs_gather_frags(skb,
|
|
|
+ ip_vs_defrag_user(hooknum)))
|
|
|
return NF_STOLEN;
|
|
|
|
|
|
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
|
|
|
@@ -1012,55 +1075,123 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
|
|
|
*/
|
|
|
cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
|
|
|
|
|
|
- if (unlikely(!cp)) {
|
|
|
- if (sysctl_ip_vs_nat_icmp_send &&
|
|
|
- (pp->protocol == IPPROTO_TCP ||
|
|
|
- pp->protocol == IPPROTO_UDP ||
|
|
|
- pp->protocol == IPPROTO_SCTP)) {
|
|
|
- __be16 _ports[2], *pptr;
|
|
|
-
|
|
|
- pptr = skb_header_pointer(skb, iph.len,
|
|
|
- sizeof(_ports), _ports);
|
|
|
- if (pptr == NULL)
|
|
|
- return NF_ACCEPT; /* Not for me */
|
|
|
- if (ip_vs_lookup_real_service(af, iph.protocol,
|
|
|
- &iph.saddr,
|
|
|
- pptr[0])) {
|
|
|
- /*
|
|
|
- * Notify the real server: there is no
|
|
|
- * existing entry if it is not RST
|
|
|
- * packet or not TCP packet.
|
|
|
- */
|
|
|
- if ((iph.protocol != IPPROTO_TCP &&
|
|
|
- iph.protocol != IPPROTO_SCTP)
|
|
|
- || ((iph.protocol == IPPROTO_TCP
|
|
|
- && !is_tcp_reset(skb, iph.len))
|
|
|
- || (iph.protocol == IPPROTO_SCTP
|
|
|
- && !is_sctp_abort(skb,
|
|
|
- iph.len)))) {
|
|
|
+ if (likely(cp))
|
|
|
+ return handle_response(af, skb, pp, cp, iph.len);
|
|
|
+ if (sysctl_ip_vs_nat_icmp_send &&
|
|
|
+ (pp->protocol == IPPROTO_TCP ||
|
|
|
+ pp->protocol == IPPROTO_UDP ||
|
|
|
+ pp->protocol == IPPROTO_SCTP)) {
|
|
|
+ __be16 _ports[2], *pptr;
|
|
|
+
|
|
|
+ pptr = skb_header_pointer(skb, iph.len,
|
|
|
+ sizeof(_ports), _ports);
|
|
|
+ if (pptr == NULL)
|
|
|
+ return NF_ACCEPT; /* Not for me */
|
|
|
+ if (ip_vs_lookup_real_service(af, iph.protocol,
|
|
|
+ &iph.saddr,
|
|
|
+ pptr[0])) {
|
|
|
+ /*
|
|
|
+ * Notify the real server: there is no
|
|
|
+ * existing entry if it is not RST
|
|
|
+ * packet or not TCP packet.
|
|
|
+ */
|
|
|
+ if ((iph.protocol != IPPROTO_TCP &&
|
|
|
+ iph.protocol != IPPROTO_SCTP)
|
|
|
+ || ((iph.protocol == IPPROTO_TCP
|
|
|
+ && !is_tcp_reset(skb, iph.len))
|
|
|
+ || (iph.protocol == IPPROTO_SCTP
|
|
|
+ && !is_sctp_abort(skb,
|
|
|
+ iph.len)))) {
|
|
|
#ifdef CONFIG_IP_VS_IPV6
|
|
|
- if (af == AF_INET6)
|
|
|
- icmpv6_send(skb,
|
|
|
- ICMPV6_DEST_UNREACH,
|
|
|
- ICMPV6_PORT_UNREACH,
|
|
|
- 0);
|
|
|
- else
|
|
|
+ if (af == AF_INET6) {
|
|
|
+ struct net *net =
|
|
|
+ dev_net(skb_dst(skb)->dev);
|
|
|
+
|
|
|
+ if (!skb->dev)
|
|
|
+ skb->dev = net->loopback_dev;
|
|
|
+ icmpv6_send(skb,
|
|
|
+ ICMPV6_DEST_UNREACH,
|
|
|
+ ICMPV6_PORT_UNREACH,
|
|
|
+ 0);
|
|
|
+ } else
|
|
|
#endif
|
|
|
- icmp_send(skb,
|
|
|
- ICMP_DEST_UNREACH,
|
|
|
- ICMP_PORT_UNREACH, 0);
|
|
|
- return NF_DROP;
|
|
|
- }
|
|
|
+ icmp_send(skb,
|
|
|
+ ICMP_DEST_UNREACH,
|
|
|
+ ICMP_PORT_UNREACH, 0);
|
|
|
+ return NF_DROP;
|
|
|
}
|
|
|
}
|
|
|
- IP_VS_DBG_PKT(12, pp, skb, 0,
|
|
|
- "packet continues traversal as normal");
|
|
|
- return NF_ACCEPT;
|
|
|
}
|
|
|
+ IP_VS_DBG_PKT(12, af, pp, skb, 0,
|
|
|
+ "ip_vs_out: packet continues traversal as normal");
|
|
|
+ return NF_ACCEPT;
|
|
|
+}
|
|
|
|
|
|
- return handle_response(af, skb, pp, cp, iph.len);
|
|
|
+/*
|
|
|
+ * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain,
|
|
|
+ * used only for VS/NAT.
|
|
|
+ * Check if packet is reply for established ip_vs_conn.
|
|
|
+ */
|
|
|
+static unsigned int
|
|
|
+ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
|
|
|
+ const struct net_device *in, const struct net_device *out,
|
|
|
+ int (*okfn)(struct sk_buff *))
|
|
|
+{
|
|
|
+ return ip_vs_out(hooknum, skb, AF_INET);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
|
|
|
+ * Check if packet is reply for established ip_vs_conn.
|
|
|
+ */
|
|
|
+static unsigned int
|
|
|
+ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
|
|
|
+ const struct net_device *in, const struct net_device *out,
|
|
|
+ int (*okfn)(struct sk_buff *))
|
|
|
+{
|
|
|
+ unsigned int verdict;
|
|
|
+
|
|
|
+ /* Disable BH in LOCAL_OUT until all places are fixed */
|
|
|
+ local_bh_disable();
|
|
|
+ verdict = ip_vs_out(hooknum, skb, AF_INET);
|
|
|
+ local_bh_enable();
|
|
|
+ return verdict;
|
|
|
+}
|
|
|
+
|
|
|
+#ifdef CONFIG_IP_VS_IPV6
|
|
|
+
|
|
|
+/*
|
|
|
+ * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain,
|
|
|
+ * used only for VS/NAT.
|
|
|
+ * Check if packet is reply for established ip_vs_conn.
|
|
|
+ */
|
|
|
+static unsigned int
|
|
|
+ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
|
|
|
+ const struct net_device *in, const struct net_device *out,
|
|
|
+ int (*okfn)(struct sk_buff *))
|
|
|
+{
|
|
|
+ return ip_vs_out(hooknum, skb, AF_INET6);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
|
|
|
+ * Check if packet is reply for established ip_vs_conn.
|
|
|
+ */
|
|
|
+static unsigned int
|
|
|
+ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
|
|
|
+ const struct net_device *in, const struct net_device *out,
|
|
|
+ int (*okfn)(struct sk_buff *))
|
|
|
+{
|
|
|
+ unsigned int verdict;
|
|
|
+
|
|
|
+ /* Disable BH in LOCAL_OUT until all places are fixed */
|
|
|
+ local_bh_disable();
|
|
|
+ verdict = ip_vs_out(hooknum, skb, AF_INET6);
|
|
|
+ local_bh_enable();
|
|
|
+ return verdict;
|
|
|
+}
|
|
|
+
|
|
|
+#endif
|
|
|
|
|
|
/*
|
|
|
* Handle ICMP messages in the outside-to-inside direction (incoming).
|
|
|
@@ -1084,8 +1215,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
|
|
|
|
|
|
/* reassemble IP fragments */
|
|
|
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
|
|
|
- if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ?
|
|
|
- IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
|
|
|
+ if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
|
|
|
return NF_STOLEN;
|
|
|
}
|
|
|
|
|
|
@@ -1128,7 +1258,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
|
|
|
pp->dont_defrag))
|
|
|
return NF_ACCEPT;
|
|
|
|
|
|
- IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for");
|
|
|
+ IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
|
|
|
+ "Checking incoming ICMP for");
|
|
|
|
|
|
offset += cih->ihl * 4;
|
|
|
|
|
|
@@ -1162,7 +1293,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
|
|
|
if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
|
|
|
offset += 2 * sizeof(__u16);
|
|
|
verdict = ip_vs_icmp_xmit(skb, cp, pp, offset);
|
|
|
- /* do not touch skb anymore */
|
|
|
+ /* LOCALNODE from FORWARD hook is not supported */
|
|
|
+ if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD &&
|
|
|
+ skb_rtable(skb)->rt_flags & RTCF_LOCAL) {
|
|
|
+ IP_VS_DBG(1, "%s(): "
|
|
|
+ "local delivery to %pI4 but in FORWARD\n",
|
|
|
+ __func__, &skb_rtable(skb)->rt_dst);
|
|
|
+ verdict = NF_DROP;
|
|
|
+ }
|
|
|
|
|
|
out:
|
|
|
__ip_vs_conn_put(cp);
|
|
|
@@ -1183,14 +1321,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
|
|
|
struct ip_vs_protocol *pp;
|
|
|
unsigned int offset, verdict;
|
|
|
union nf_inet_addr snet;
|
|
|
+ struct rt6_info *rt;
|
|
|
|
|
|
*related = 1;
|
|
|
|
|
|
/* reassemble IP fragments */
|
|
|
if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
|
|
|
- if (ip_vs_gather_frags_v6(skb, hooknum == NF_INET_LOCAL_IN ?
|
|
|
- IP_DEFRAG_VS_IN :
|
|
|
- IP_DEFRAG_VS_FWD))
|
|
|
+ if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
|
|
|
return NF_STOLEN;
|
|
|
}
|
|
|
|
|
|
@@ -1233,7 +1370,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
|
|
|
if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
|
|
|
return NF_ACCEPT;
|
|
|
|
|
|
- IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for");
|
|
|
+ IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
|
|
|
+ "Checking incoming ICMPv6 for");
|
|
|
|
|
|
offset += sizeof(struct ipv6hdr);
|
|
|
|
|
|
@@ -1261,7 +1399,15 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
|
|
|
IPPROTO_SCTP == cih->nexthdr)
|
|
|
offset += 2 * sizeof(__u16);
|
|
|
verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
|
|
|
- /* do not touch skb anymore */
|
|
|
+ /* LOCALNODE from FORWARD hook is not supported */
|
|
|
+ if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD &&
|
|
|
+ (rt = (struct rt6_info *) skb_dst(skb)) &&
|
|
|
+ rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK) {
|
|
|
+ IP_VS_DBG(1, "%s(): "
|
|
|
+ "local delivery to %pI6 but in FORWARD\n",
|
|
|
+ __func__, &rt->rt6i_dst);
|
|
|
+ verdict = NF_DROP;
|
|
|
+ }
|
|
|
|
|
|
__ip_vs_conn_put(cp);
|
|
|
|
|
|
@@ -1275,35 +1421,49 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
|
|
|
* and send it on its way...
|
|
|
*/
|
|
|
static unsigned int
|
|
|
-ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
|
|
|
- const struct net_device *in, const struct net_device *out,
|
|
|
- int (*okfn)(struct sk_buff *))
|
|
|
+ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
|
|
|
{
|
|
|
struct ip_vs_iphdr iph;
|
|
|
struct ip_vs_protocol *pp;
|
|
|
struct ip_vs_conn *cp;
|
|
|
- int ret, restart, af, pkts;
|
|
|
+ int ret, restart, pkts;
|
|
|
|
|
|
- af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
|
|
|
-
|
|
|
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
|
|
|
+ /* Already marked as IPVS request or reply? */
|
|
|
+ if (skb->ipvs_property)
|
|
|
+ return NF_ACCEPT;
|
|
|
|
|
|
/*
|
|
|
- * Big tappo: only PACKET_HOST, including loopback for local client
|
|
|
- * Don't handle local packets on IPv6 for now
|
|
|
+ * Big tappo:
|
|
|
+ * - remote client: only PACKET_HOST
|
|
|
+ * - route: used for struct net when skb->dev is unset
|
|
|
*/
|
|
|
- if (unlikely(skb->pkt_type != PACKET_HOST)) {
|
|
|
- IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n",
|
|
|
- skb->pkt_type,
|
|
|
- iph.protocol,
|
|
|
- IP_VS_DBG_ADDR(af, &iph.daddr));
|
|
|
+ if (unlikely((skb->pkt_type != PACKET_HOST &&
|
|
|
+ hooknum != NF_INET_LOCAL_OUT) ||
|
|
|
+ !skb_dst(skb))) {
|
|
|
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
|
|
|
+ IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
|
|
|
+ " ignored in hook %u\n",
|
|
|
+ skb->pkt_type, iph.protocol,
|
|
|
+ IP_VS_DBG_ADDR(af, &iph.daddr), hooknum);
|
|
|
return NF_ACCEPT;
|
|
|
}
|
|
|
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
|
|
|
+
|
|
|
+ /* Bad... Do not break raw sockets */
|
|
|
+ if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
|
|
|
+ af == AF_INET)) {
|
|
|
+ struct sock *sk = skb->sk;
|
|
|
+ struct inet_sock *inet = inet_sk(skb->sk);
|
|
|
+
|
|
|
+ if (inet && sk->sk_family == PF_INET && inet->nodefrag)
|
|
|
+ return NF_ACCEPT;
|
|
|
+ }
|
|
|
|
|
|
#ifdef CONFIG_IP_VS_IPV6
|
|
|
if (af == AF_INET6) {
|
|
|
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
|
|
|
- int related, verdict = ip_vs_in_icmp_v6(skb, &related, hooknum);
|
|
|
+ int related;
|
|
|
+ int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum);
|
|
|
|
|
|
if (related)
|
|
|
return verdict;
|
|
|
@@ -1312,7 +1472,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
|
|
|
} else
|
|
|
#endif
|
|
|
if (unlikely(iph.protocol == IPPROTO_ICMP)) {
|
|
|
- int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
|
|
|
+ int related;
|
|
|
+ int verdict = ip_vs_in_icmp(skb, &related, hooknum);
|
|
|
|
|
|
if (related)
|
|
|
return verdict;
|
|
|
@@ -1332,23 +1493,18 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
|
|
|
if (unlikely(!cp)) {
|
|
|
int v;
|
|
|
|
|
|
- /* For local client packets, it could be a response */
|
|
|
- cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
|
|
|
- if (cp)
|
|
|
- return handle_response(af, skb, pp, cp, iph.len);
|
|
|
-
|
|
|
if (!pp->conn_schedule(af, skb, pp, &v, &cp))
|
|
|
return v;
|
|
|
}
|
|
|
|
|
|
if (unlikely(!cp)) {
|
|
|
/* sorry, all this trouble for a no-hit :) */
|
|
|
- IP_VS_DBG_PKT(12, pp, skb, 0,
|
|
|
- "packet continues traversal as normal");
|
|
|
+ IP_VS_DBG_PKT(12, af, pp, skb, 0,
|
|
|
+ "ip_vs_in: packet continues traversal as normal");
|
|
|
return NF_ACCEPT;
|
|
|
}
|
|
|
|
|
|
- IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet");
|
|
|
+ IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
|
|
|
|
|
|
/* Check the server status */
|
|
|
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
|
|
|
@@ -1415,6 +1571,72 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * AF_INET handler in NF_INET_LOCAL_IN chain
|
|
|
+ * Schedule and forward packets from remote clients
|
|
|
+ */
|
|
|
+static unsigned int
|
|
|
+ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb,
|
|
|
+ const struct net_device *in,
|
|
|
+ const struct net_device *out,
|
|
|
+ int (*okfn)(struct sk_buff *))
|
|
|
+{
|
|
|
+ return ip_vs_in(hooknum, skb, AF_INET);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * AF_INET handler in NF_INET_LOCAL_OUT chain
|
|
|
+ * Schedule and forward packets from local clients
|
|
|
+ */
|
|
|
+static unsigned int
|
|
|
+ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
|
|
|
+ const struct net_device *in, const struct net_device *out,
|
|
|
+ int (*okfn)(struct sk_buff *))
|
|
|
+{
|
|
|
+ unsigned int verdict;
|
|
|
+
|
|
|
+ /* Disable BH in LOCAL_OUT until all places are fixed */
|
|
|
+ local_bh_disable();
|
|
|
+ verdict = ip_vs_in(hooknum, skb, AF_INET);
|
|
|
+ local_bh_enable();
|
|
|
+ return verdict;
|
|
|
+}
|
|
|
+
|
|
|
+#ifdef CONFIG_IP_VS_IPV6
|
|
|
+
|
|
|
+/*
|
|
|
+ * AF_INET6 handler in NF_INET_LOCAL_IN chain
|
|
|
+ * Schedule and forward packets from remote clients
|
|
|
+ */
|
|
|
+static unsigned int
|
|
|
+ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb,
|
|
|
+ const struct net_device *in,
|
|
|
+ const struct net_device *out,
|
|
|
+ int (*okfn)(struct sk_buff *))
|
|
|
+{
|
|
|
+ return ip_vs_in(hooknum, skb, AF_INET6);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * AF_INET6 handler in NF_INET_LOCAL_OUT chain
|
|
|
+ * Schedule and forward packets from local clients
|
|
|
+ */
|
|
|
+static unsigned int
|
|
|
+ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
|
|
|
+ const struct net_device *in, const struct net_device *out,
|
|
|
+ int (*okfn)(struct sk_buff *))
|
|
|
+{
|
|
|
+ unsigned int verdict;
|
|
|
+
|
|
|
+ /* Disable BH in LOCAL_OUT until all places are fixed */
|
|
|
+ local_bh_disable();
|
|
|
+ verdict = ip_vs_in(hooknum, skb, AF_INET6);
|
|
|
+ local_bh_enable();
|
|
|
+ return verdict;
|
|
|
+}
|
|
|
+
|
|
|
+#endif
|
|
|
+
|
|
|
|
|
|
/*
|
|
|
* It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
|
|
|
@@ -1455,23 +1677,39 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
|
|
|
|
|
|
|
|
|
static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
|
|
|
+ /* After packet filtering, change source only for VS/NAT */
|
|
|
+ {
|
|
|
+ .hook = ip_vs_reply4,
|
|
|
+ .owner = THIS_MODULE,
|
|
|
+ .pf = PF_INET,
|
|
|
+ .hooknum = NF_INET_LOCAL_IN,
|
|
|
+ .priority = 99,
|
|
|
+ },
|
|
|
/* After packet filtering, forward packet through VS/DR, VS/TUN,
|
|
|
* or VS/NAT(change destination), so that filtering rules can be
|
|
|
* applied to IPVS. */
|
|
|
{
|
|
|
- .hook = ip_vs_in,
|
|
|
+ .hook = ip_vs_remote_request4,
|
|
|
.owner = THIS_MODULE,
|
|
|
.pf = PF_INET,
|
|
|
- .hooknum = NF_INET_LOCAL_IN,
|
|
|
- .priority = 100,
|
|
|
+ .hooknum = NF_INET_LOCAL_IN,
|
|
|
+ .priority = 101,
|
|
|
},
|
|
|
- /* After packet filtering, change source only for VS/NAT */
|
|
|
+ /* Before ip_vs_in, change source only for VS/NAT */
|
|
|
{
|
|
|
- .hook = ip_vs_out,
|
|
|
+ .hook = ip_vs_local_reply4,
|
|
|
.owner = THIS_MODULE,
|
|
|
.pf = PF_INET,
|
|
|
- .hooknum = NF_INET_FORWARD,
|
|
|
- .priority = 100,
|
|
|
+ .hooknum = NF_INET_LOCAL_OUT,
|
|
|
+ .priority = -99,
|
|
|
+ },
|
|
|
+ /* After mangle, schedule and forward local requests */
|
|
|
+ {
|
|
|
+ .hook = ip_vs_local_request4,
|
|
|
+ .owner = THIS_MODULE,
|
|
|
+ .pf = PF_INET,
|
|
|
+ .hooknum = NF_INET_LOCAL_OUT,
|
|
|
+ .priority = -98,
|
|
|
},
|
|
|
/* After packet filtering (but before ip_vs_out_icmp), catch icmp
|
|
|
* destined for 0.0.0.0/0, which is for incoming IPVS connections */
|
|
|
@@ -1479,27 +1717,51 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
|
|
|
.hook = ip_vs_forward_icmp,
|
|
|
.owner = THIS_MODULE,
|
|
|
.pf = PF_INET,
|
|
|
- .hooknum = NF_INET_FORWARD,
|
|
|
- .priority = 99,
|
|
|
+ .hooknum = NF_INET_FORWARD,
|
|
|
+ .priority = 99,
|
|
|
+ },
|
|
|
+ /* After packet filtering, change source only for VS/NAT */
|
|
|
+ {
|
|
|
+ .hook = ip_vs_reply4,
|
|
|
+ .owner = THIS_MODULE,
|
|
|
+ .pf = PF_INET,
|
|
|
+ .hooknum = NF_INET_FORWARD,
|
|
|
+ .priority = 100,
|
|
|
},
|
|
|
#ifdef CONFIG_IP_VS_IPV6
|
|
|
+ /* After packet filtering, change source only for VS/NAT */
|
|
|
+ {
|
|
|
+ .hook = ip_vs_reply6,
|
|
|
+ .owner = THIS_MODULE,
|
|
|
+ .pf = PF_INET6,
|
|
|
+ .hooknum = NF_INET_LOCAL_IN,
|
|
|
+ .priority = 99,
|
|
|
+ },
|
|
|
/* After packet filtering, forward packet through VS/DR, VS/TUN,
|
|
|
* or VS/NAT(change destination), so that filtering rules can be
|
|
|
* applied to IPVS. */
|
|
|
{
|
|
|
- .hook = ip_vs_in,
|
|
|
+ .hook = ip_vs_remote_request6,
|
|
|
.owner = THIS_MODULE,
|
|
|
.pf = PF_INET6,
|
|
|
- .hooknum = NF_INET_LOCAL_IN,
|
|
|
- .priority = 100,
|
|
|
+ .hooknum = NF_INET_LOCAL_IN,
|
|
|
+ .priority = 101,
|
|
|
},
|
|
|
- /* After packet filtering, change source only for VS/NAT */
|
|
|
+ /* Before ip_vs_in, change source only for VS/NAT */
|
|
|
+ {
|
|
|
+ .hook = ip_vs_local_reply6,
|
|
|
+ .owner = THIS_MODULE,
|
|
|
+ .pf = PF_INET,
|
|
|
+ .hooknum = NF_INET_LOCAL_OUT,
|
|
|
+ .priority = -99,
|
|
|
+ },
|
|
|
+ /* After mangle, schedule and forward local requests */
|
|
|
{
|
|
|
- .hook = ip_vs_out,
|
|
|
+ .hook = ip_vs_local_request6,
|
|
|
.owner = THIS_MODULE,
|
|
|
.pf = PF_INET6,
|
|
|
- .hooknum = NF_INET_FORWARD,
|
|
|
- .priority = 100,
|
|
|
+ .hooknum = NF_INET_LOCAL_OUT,
|
|
|
+ .priority = -98,
|
|
|
},
|
|
|
/* After packet filtering (but before ip_vs_out_icmp), catch icmp
|
|
|
* destined for 0.0.0.0/0, which is for incoming IPVS connections */
|
|
|
@@ -1507,8 +1769,16 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
|
|
|
.hook = ip_vs_forward_icmp_v6,
|
|
|
.owner = THIS_MODULE,
|
|
|
.pf = PF_INET6,
|
|
|
- .hooknum = NF_INET_FORWARD,
|
|
|
- .priority = 99,
|
|
|
+ .hooknum = NF_INET_FORWARD,
|
|
|
+ .priority = 99,
|
|
|
+ },
|
|
|
+ /* After packet filtering, change source only for VS/NAT */
|
|
|
+ {
|
|
|
+ .hook = ip_vs_reply6,
|
|
|
+ .owner = THIS_MODULE,
|
|
|
+ .pf = PF_INET6,
|
|
|
+ .hooknum = NF_INET_FORWARD,
|
|
|
+ .priority = 100,
|
|
|
},
|
|
|
#endif
|
|
|
};
|