|
@@ -245,20 +245,30 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
|
|
|
const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
|
|
|
union nf_inet_addr snet; /* source network of the client,
|
|
|
after masking */
|
|
|
+ const union nf_inet_addr *src_addr, *dst_addr;
|
|
|
+
|
|
|
+ if (likely(!ip_vs_iph_inverse(iph))) {
|
|
|
+ src_addr = &iph->saddr;
|
|
|
+ dst_addr = &iph->daddr;
|
|
|
+ } else {
|
|
|
+ src_addr = &iph->daddr;
|
|
|
+ dst_addr = &iph->saddr;
|
|
|
+ }
|
|
|
+
|
|
|
|
|
|
/* Mask saddr with the netmask to adjust template granularity */
|
|
|
#ifdef CONFIG_IP_VS_IPV6
|
|
|
if (svc->af == AF_INET6)
|
|
|
- ipv6_addr_prefix(&snet.in6, &iph->saddr.in6,
|
|
|
+ ipv6_addr_prefix(&snet.in6, &src_addr->in6,
|
|
|
(__force __u32) svc->netmask);
|
|
|
else
|
|
|
#endif
|
|
|
- snet.ip = iph->saddr.ip & svc->netmask;
|
|
|
+ snet.ip = src_addr->ip & svc->netmask;
|
|
|
|
|
|
IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
|
|
|
"mnet %s\n",
|
|
|
- IP_VS_DBG_ADDR(svc->af, &iph->saddr), ntohs(src_port),
|
|
|
- IP_VS_DBG_ADDR(svc->af, &iph->daddr), ntohs(dst_port),
|
|
|
+ IP_VS_DBG_ADDR(svc->af, src_addr), ntohs(src_port),
|
|
|
+ IP_VS_DBG_ADDR(svc->af, dst_addr), ntohs(dst_port),
|
|
|
IP_VS_DBG_ADDR(svc->af, &snet));
|
|
|
|
|
|
/*
|
|
@@ -276,7 +286,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
|
|
|
*/
|
|
|
{
|
|
|
int protocol = iph->protocol;
|
|
|
- const union nf_inet_addr *vaddr = &iph->daddr;
|
|
|
+ const union nf_inet_addr *vaddr = dst_addr;
|
|
|
__be16 vport = 0;
|
|
|
|
|
|
if (dst_port == svc->port) {
|
|
@@ -366,8 +376,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
|
|
|
/*
|
|
|
* Create a new connection according to the template
|
|
|
*/
|
|
|
- ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr,
|
|
|
- src_port, &iph->daddr, dst_port, ¶m);
|
|
|
+ ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, src_addr,
|
|
|
+ src_port, dst_addr, dst_port, ¶m);
|
|
|
|
|
|
cp = ip_vs_conn_new(¶m, dest->af, &dest->addr, dport, flags, dest,
|
|
|
skb->mark);
|
|
@@ -418,7 +428,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
|
|
|
struct ip_vs_conn *cp = NULL;
|
|
|
struct ip_vs_scheduler *sched;
|
|
|
struct ip_vs_dest *dest;
|
|
|
- __be16 _ports[2], *pptr;
|
|
|
+ __be16 _ports[2], *pptr, cport, vport;
|
|
|
+ const void *caddr, *vaddr;
|
|
|
unsigned int flags;
|
|
|
|
|
|
*ignored = 1;
|
|
@@ -429,14 +440,26 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
|
|
|
if (pptr == NULL)
|
|
|
return NULL;
|
|
|
|
|
|
+ if (likely(!ip_vs_iph_inverse(iph))) {
|
|
|
+ cport = pptr[0];
|
|
|
+ caddr = &iph->saddr;
|
|
|
+ vport = pptr[1];
|
|
|
+ vaddr = &iph->daddr;
|
|
|
+ } else {
|
|
|
+ cport = pptr[1];
|
|
|
+ caddr = &iph->daddr;
|
|
|
+ vport = pptr[0];
|
|
|
+ vaddr = &iph->saddr;
|
|
|
+ }
|
|
|
+
|
|
|
/*
|
|
|
* FTPDATA needs this check when using local real server.
|
|
|
* Never schedule Active FTPDATA connections from real server.
|
|
|
* For LVS-NAT they must be already created. For other methods
|
|
|
* with persistence the connection is created on SYN+ACK.
|
|
|
*/
|
|
|
- if (pptr[0] == FTPDATA) {
|
|
|
- IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
|
|
|
+ if (cport == FTPDATA) {
|
|
|
+ IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off,
|
|
|
"Not scheduling FTPDATA");
|
|
|
return NULL;
|
|
|
}
|
|
@@ -444,19 +467,25 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
|
|
|
/*
|
|
|
* Do not schedule replies from local real server.
|
|
|
*/
|
|
|
- if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
|
|
|
- (cp = pp->conn_in_get(svc->af, skb, iph, 1))) {
|
|
|
- IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
|
|
|
- "Not scheduling reply for existing connection");
|
|
|
- __ip_vs_conn_put(cp);
|
|
|
- return NULL;
|
|
|
+ if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK)) {
|
|
|
+ iph->hdr_flags ^= IP_VS_HDR_INVERSE;
|
|
|
+ cp = pp->conn_in_get(svc->af, skb, iph);
|
|
|
+ iph->hdr_flags ^= IP_VS_HDR_INVERSE;
|
|
|
+
|
|
|
+ if (cp) {
|
|
|
+ IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off,
|
|
|
+ "Not scheduling reply for existing"
|
|
|
+ " connection");
|
|
|
+ __ip_vs_conn_put(cp);
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
* Persistent service
|
|
|
*/
|
|
|
if (svc->flags & IP_VS_SVC_F_PERSISTENT)
|
|
|
- return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored,
|
|
|
+ return ip_vs_sched_persist(svc, skb, cport, vport, ignored,
|
|
|
iph);
|
|
|
|
|
|
*ignored = 0;
|
|
@@ -464,7 +493,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
|
|
|
/*
|
|
|
* Non-persistent service
|
|
|
*/
|
|
|
- if (!svc->fwmark && pptr[1] != svc->port) {
|
|
|
+ if (!svc->fwmark && vport != svc->port) {
|
|
|
if (!svc->port)
|
|
|
pr_err("Schedule: port zero only supported "
|
|
|
"in persistent services, "
|
|
@@ -496,10 +525,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
|
|
|
struct ip_vs_conn_param p;
|
|
|
|
|
|
ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
|
|
|
- &iph->saddr, pptr[0], &iph->daddr,
|
|
|
- pptr[1], &p);
|
|
|
+ caddr, cport, vaddr, vport, &p);
|
|
|
cp = ip_vs_conn_new(&p, dest->af, &dest->addr,
|
|
|
- dest->port ? dest->port : pptr[1],
|
|
|
+ dest->port ? dest->port : vport,
|
|
|
flags, dest, skb->mark);
|
|
|
if (!cp) {
|
|
|
*ignored = -1;
|
|
@@ -519,6 +547,17 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
|
|
|
return cp;
|
|
|
}
|
|
|
|
|
|
+#ifdef CONFIG_SYSCTL
|
|
|
+static inline int ip_vs_addr_is_unicast(struct net *net, int af,
|
|
|
+ union nf_inet_addr *addr)
|
|
|
+{
|
|
|
+#ifdef CONFIG_IP_VS_IPV6
|
|
|
+ if (af == AF_INET6)
|
|
|
+ return ipv6_addr_type(&addr->in6) & IPV6_ADDR_UNICAST;
|
|
|
+#endif
|
|
|
+ return (inet_addr_type(net, addr->ip) == RTN_UNICAST);
|
|
|
+}
|
|
|
+#endif
|
|
|
|
|
|
/*
|
|
|
* Pass or drop the packet.
|
|
@@ -528,33 +567,28 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
|
|
|
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
|
|
|
struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph)
|
|
|
{
|
|
|
- __be16 _ports[2], *pptr;
|
|
|
+ __be16 _ports[2], *pptr, dport;
|
|
|
#ifdef CONFIG_SYSCTL
|
|
|
struct net *net;
|
|
|
struct netns_ipvs *ipvs;
|
|
|
- int unicast;
|
|
|
#endif
|
|
|
|
|
|
pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
|
|
|
- if (pptr == NULL) {
|
|
|
+ if (!pptr)
|
|
|
return NF_DROP;
|
|
|
- }
|
|
|
+ dport = likely(!ip_vs_iph_inverse(iph)) ? pptr[1] : pptr[0];
|
|
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
|
net = skb_net(skb);
|
|
|
|
|
|
-#ifdef CONFIG_IP_VS_IPV6
|
|
|
- if (svc->af == AF_INET6)
|
|
|
- unicast = ipv6_addr_type(&iph->daddr.in6) & IPV6_ADDR_UNICAST;
|
|
|
- else
|
|
|
-#endif
|
|
|
- unicast = (inet_addr_type(net, iph->daddr.ip) == RTN_UNICAST);
|
|
|
|
|
|
/* if it is fwmark-based service, the cache_bypass sysctl is up
|
|
|
and the destination is a non-local unicast, then create
|
|
|
a cache_bypass connection entry */
|
|
|
ipvs = net_ipvs(net);
|
|
|
- if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
|
|
|
+ if (ipvs->sysctl_cache_bypass && svc->fwmark &&
|
|
|
+ !(iph->hdr_flags & (IP_VS_HDR_INVERSE | IP_VS_HDR_ICMP)) &&
|
|
|
+ ip_vs_addr_is_unicast(net, svc->af, &iph->daddr)) {
|
|
|
int ret;
|
|
|
struct ip_vs_conn *cp;
|
|
|
unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
|
|
@@ -598,9 +632,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
|
|
|
* listed in the ipvs table), pass the packets, because it is
|
|
|
* not ipvs job to decide to drop the packets.
|
|
|
*/
|
|
|
- if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT))
|
|
|
+ if (svc->port == FTPPORT && dport != FTPPORT)
|
|
|
return NF_ACCEPT;
|
|
|
|
|
|
+ if (unlikely(ip_vs_iph_icmp(iph)))
|
|
|
+ return NF_DROP;
|
|
|
+
|
|
|
/*
|
|
|
* Notify the client that the destination is unreachable, and
|
|
|
* release the socket buffer.
|
|
@@ -934,10 +971,10 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
|
|
|
IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
|
|
|
"Checking outgoing ICMP for");
|
|
|
|
|
|
- ip_vs_fill_ip4hdr(cih, &ciph);
|
|
|
- ciph.len += offset;
|
|
|
+ ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, true, &ciph);
|
|
|
+
|
|
|
/* The embedded headers contain source and dest in reverse order */
|
|
|
- cp = pp->conn_out_get(AF_INET, skb, &ciph, 1);
|
|
|
+ cp = pp->conn_out_get(AF_INET, skb, &ciph);
|
|
|
if (!cp)
|
|
|
return NF_ACCEPT;
|
|
|
|
|
@@ -951,12 +988,11 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
|
|
|
unsigned int hooknum, struct ip_vs_iphdr *ipvsh)
|
|
|
{
|
|
|
struct icmp6hdr _icmph, *ic;
|
|
|
- struct ipv6hdr _ip6h, *ip6h; /* The ip header contained within ICMP */
|
|
|
struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
|
|
|
struct ip_vs_conn *cp;
|
|
|
struct ip_vs_protocol *pp;
|
|
|
union nf_inet_addr snet;
|
|
|
- unsigned int writable;
|
|
|
+ unsigned int offset;
|
|
|
|
|
|
*related = 1;
|
|
|
ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh);
|
|
@@ -984,31 +1020,23 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
|
|
|
ic->icmp6_type, ntohs(icmpv6_id(ic)),
|
|
|
&ipvsh->saddr, &ipvsh->daddr);
|
|
|
|
|
|
- /* Now find the contained IP header */
|
|
|
- ciph.len = ipvsh->len + sizeof(_icmph);
|
|
|
- ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
|
|
|
- if (ip6h == NULL)
|
|
|
+ if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, ipvsh->len + sizeof(_icmph),
|
|
|
+ true, &ciph))
|
|
|
return NF_ACCEPT; /* The packet looks wrong, ignore */
|
|
|
- ciph.saddr.in6 = ip6h->saddr; /* conn_out_get() handles reverse order */
|
|
|
- ciph.daddr.in6 = ip6h->daddr;
|
|
|
- /* skip possible IPv6 exthdrs of contained IPv6 packet */
|
|
|
- ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
|
|
|
- if (ciph.protocol < 0)
|
|
|
- return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
|
|
|
|
|
|
pp = ip_vs_proto_get(ciph.protocol);
|
|
|
if (!pp)
|
|
|
return NF_ACCEPT;
|
|
|
|
|
|
/* The embedded headers contain source and dest in reverse order */
|
|
|
- cp = pp->conn_out_get(AF_INET6, skb, &ciph, 1);
|
|
|
+ cp = pp->conn_out_get(AF_INET6, skb, &ciph);
|
|
|
if (!cp)
|
|
|
return NF_ACCEPT;
|
|
|
|
|
|
snet.in6 = ciph.saddr.in6;
|
|
|
- writable = ciph.len;
|
|
|
+ offset = ciph.len;
|
|
|
return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp,
|
|
|
- pp, writable, sizeof(struct ipv6hdr),
|
|
|
+ pp, offset, sizeof(struct ipv6hdr),
|
|
|
hooknum);
|
|
|
}
|
|
|
#endif
|
|
@@ -1093,7 +1121,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
|
|
|
{
|
|
|
struct ip_vs_protocol *pp = pd->pp;
|
|
|
|
|
|
- IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
|
|
|
+ IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet");
|
|
|
|
|
|
if (!skb_make_writable(skb, iph->len))
|
|
|
goto drop;
|
|
@@ -1130,7 +1158,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
|
|
|
if (ip_vs_route_me_harder(af, skb, hooknum))
|
|
|
goto drop;
|
|
|
|
|
|
- IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
|
|
|
+ IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT");
|
|
|
|
|
|
ip_vs_out_stats(cp, skb);
|
|
|
ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
|
|
@@ -1186,7 +1214,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
|
|
|
if (!net_ipvs(net)->enable)
|
|
|
return NF_ACCEPT;
|
|
|
|
|
|
- ip_vs_fill_iph_skb(af, skb, &iph);
|
|
|
+ ip_vs_fill_iph_skb(af, skb, false, &iph);
|
|
|
#ifdef CONFIG_IP_VS_IPV6
|
|
|
if (af == AF_INET6) {
|
|
|
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
|
|
@@ -1221,13 +1249,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
|
|
|
ip_vs_defrag_user(hooknum)))
|
|
|
return NF_STOLEN;
|
|
|
|
|
|
- ip_vs_fill_ip4hdr(skb_network_header(skb), &iph);
|
|
|
+ ip_vs_fill_iph_skb(AF_INET, skb, false, &iph);
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
* Check if the packet belongs to an existing entry
|
|
|
*/
|
|
|
- cp = pp->conn_out_get(af, skb, &iph, 0);
|
|
|
+ cp = pp->conn_out_get(af, skb, &iph);
|
|
|
|
|
|
if (likely(cp))
|
|
|
return handle_response(af, skb, pd, cp, &iph, hooknum);
|
|
@@ -1272,7 +1300,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- IP_VS_DBG_PKT(12, af, pp, skb, 0,
|
|
|
+ IP_VS_DBG_PKT(12, af, pp, skb, iph.off,
|
|
|
"ip_vs_out: packet continues traversal as normal");
|
|
|
return NF_ACCEPT;
|
|
|
}
|
|
@@ -1327,6 +1355,42 @@ ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
|
|
|
|
|
|
#endif
|
|
|
|
|
|
+static unsigned int
|
|
|
+ip_vs_try_to_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
|
|
|
+ int *verdict, struct ip_vs_conn **cpp,
|
|
|
+ struct ip_vs_iphdr *iph)
|
|
|
+{
|
|
|
+ struct ip_vs_protocol *pp = pd->pp;
|
|
|
+
|
|
|
+ if (!iph->fragoffs) {
|
|
|
+ /* No (second) fragments need to enter here, as nf_defrag_ipv6
|
|
|
+ * replayed fragment zero will already have created the cp
|
|
|
+ */
|
|
|
+
|
|
|
+ /* Schedule and create new connection entry into cpp */
|
|
|
+ if (!pp->conn_schedule(af, skb, pd, verdict, cpp, iph))
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (unlikely(!*cpp)) {
|
|
|
+ /* sorry, all this trouble for a no-hit :) */
|
|
|
+ IP_VS_DBG_PKT(12, af, pp, skb, iph->off,
|
|
|
+ "ip_vs_in: packet continues traversal as normal");
|
|
|
+ if (iph->fragoffs) {
|
|
|
+ /* Fragment that couldn't be mapped to a conn entry
|
|
|
+ * is missing module nf_defrag_ipv6
|
|
|
+ */
|
|
|
+ IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
|
|
|
+ IP_VS_DBG_PKT(7, af, pp, skb, iph->off,
|
|
|
+ "unhandled fragment");
|
|
|
+ }
|
|
|
+ *verdict = NF_ACCEPT;
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ return 1;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Handle ICMP messages in the outside-to-inside direction (incoming).
|
|
|
* Find any that might be relevant, check against existing connections,
|
|
@@ -1345,7 +1409,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
|
|
|
struct ip_vs_protocol *pp;
|
|
|
struct ip_vs_proto_data *pd;
|
|
|
unsigned int offset, offset2, ihl, verdict;
|
|
|
- bool ipip;
|
|
|
+ bool ipip, new_cp = false;
|
|
|
|
|
|
*related = 1;
|
|
|
|
|
@@ -1416,15 +1480,24 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
|
|
|
"Checking incoming ICMP for");
|
|
|
|
|
|
offset2 = offset;
|
|
|
- ip_vs_fill_ip4hdr(cih, &ciph);
|
|
|
- ciph.len += offset;
|
|
|
+ ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, !ipip, &ciph);
|
|
|
offset = ciph.len;
|
|
|
+
|
|
|
/* The embedded headers contain source and dest in reverse order.
|
|
|
* For IPIP this is error for request, not for reply.
|
|
|
*/
|
|
|
- cp = pp->conn_in_get(AF_INET, skb, &ciph, ipip ? 0 : 1);
|
|
|
- if (!cp)
|
|
|
- return NF_ACCEPT;
|
|
|
+ cp = pp->conn_in_get(AF_INET, skb, &ciph);
|
|
|
+
|
|
|
+ if (!cp) {
|
|
|
+ int v;
|
|
|
+
|
|
|
+ if (!sysctl_schedule_icmp(net_ipvs(net)))
|
|
|
+ return NF_ACCEPT;
|
|
|
+
|
|
|
+ if (!ip_vs_try_to_schedule(AF_INET, skb, pd, &v, &cp, &ciph))
|
|
|
+ return v;
|
|
|
+ new_cp = true;
|
|
|
+ }
|
|
|
|
|
|
verdict = NF_DROP;
|
|
|
|
|
@@ -1501,7 +1574,10 @@ ignore_ipip:
|
|
|
verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph);
|
|
|
|
|
|
out:
|
|
|
- __ip_vs_conn_put(cp);
|
|
|
+ if (likely(!new_cp))
|
|
|
+ __ip_vs_conn_put(cp);
|
|
|
+ else
|
|
|
+ ip_vs_conn_put(cp);
|
|
|
|
|
|
return verdict;
|
|
|
}
|
|
@@ -1511,13 +1587,13 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
|
|
|
unsigned int hooknum, struct ip_vs_iphdr *iph)
|
|
|
{
|
|
|
struct net *net = NULL;
|
|
|
- struct ipv6hdr _ip6h, *ip6h;
|
|
|
struct icmp6hdr _icmph, *ic;
|
|
|
struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
|
|
|
struct ip_vs_conn *cp;
|
|
|
struct ip_vs_protocol *pp;
|
|
|
struct ip_vs_proto_data *pd;
|
|
|
- unsigned int offs_ciph, writable, verdict;
|
|
|
+ unsigned int offset, verdict;
|
|
|
+ bool new_cp = false;
|
|
|
|
|
|
*related = 1;
|
|
|
|
|
@@ -1546,18 +1622,9 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
|
|
|
ic->icmp6_type, ntohs(icmpv6_id(ic)),
|
|
|
&iph->saddr, &iph->daddr);
|
|
|
|
|
|
- /* Now find the contained IP header */
|
|
|
- ciph.len = iph->len + sizeof(_icmph);
|
|
|
- offs_ciph = ciph.len; /* Save ip header offset */
|
|
|
- ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
|
|
|
- if (ip6h == NULL)
|
|
|
- return NF_ACCEPT; /* The packet looks wrong, ignore */
|
|
|
- ciph.saddr.in6 = ip6h->saddr; /* conn_in_get() handles reverse order */
|
|
|
- ciph.daddr.in6 = ip6h->daddr;
|
|
|
- /* skip possible IPv6 exthdrs of contained IPv6 packet */
|
|
|
- ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
|
|
|
- if (ciph.protocol < 0)
|
|
|
- return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
|
|
|
+ offset = iph->len + sizeof(_icmph);
|
|
|
+ if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, offset, true, &ciph))
|
|
|
+ return NF_ACCEPT;
|
|
|
|
|
|
net = skb_net(skb);
|
|
|
pd = ip_vs_proto_data_get(net, ciph.protocol);
|
|
@@ -1569,36 +1636,49 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
|
|
|
if (ciph.fragoffs)
|
|
|
return NF_ACCEPT;
|
|
|
|
|
|
- IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offs_ciph,
|
|
|
+ IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
|
|
|
"Checking incoming ICMPv6 for");
|
|
|
|
|
|
/* The embedded headers contain source and dest in reverse order
|
|
|
* if not from localhost
|
|
|
*/
|
|
|
- cp = pp->conn_in_get(AF_INET6, skb, &ciph,
|
|
|
- (hooknum == NF_INET_LOCAL_OUT) ? 0 : 1);
|
|
|
+ cp = pp->conn_in_get(AF_INET6, skb, &ciph);
|
|
|
+
|
|
|
+ if (!cp) {
|
|
|
+ int v;
|
|
|
+
|
|
|
+ if (!sysctl_schedule_icmp(net_ipvs(net)))
|
|
|
+ return NF_ACCEPT;
|
|
|
+
|
|
|
+ if (!ip_vs_try_to_schedule(AF_INET6, skb, pd, &v, &cp, &ciph))
|
|
|
+ return v;
|
|
|
+
|
|
|
+ new_cp = true;
|
|
|
+ }
|
|
|
|
|
|
- if (!cp)
|
|
|
- return NF_ACCEPT;
|
|
|
/* VS/TUN, VS/DR and LOCALNODE just let it go */
|
|
|
if ((hooknum == NF_INET_LOCAL_OUT) &&
|
|
|
(IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) {
|
|
|
- __ip_vs_conn_put(cp);
|
|
|
- return NF_ACCEPT;
|
|
|
+ verdict = NF_ACCEPT;
|
|
|
+ goto out;
|
|
|
}
|
|
|
|
|
|
/* do the statistics and put it back */
|
|
|
ip_vs_in_stats(cp, skb);
|
|
|
|
|
|
/* Need to mangle contained IPv6 header in ICMPv6 packet */
|
|
|
- writable = ciph.len;
|
|
|
+ offset = ciph.len;
|
|
|
if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol ||
|
|
|
IPPROTO_SCTP == ciph.protocol)
|
|
|
- writable += 2 * sizeof(__u16); /* Also mangle ports */
|
|
|
+ offset += 2 * sizeof(__u16); /* Also mangle ports */
|
|
|
|
|
|
- verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, writable, hooknum, &ciph);
|
|
|
+ verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum, &ciph);
|
|
|
|
|
|
- __ip_vs_conn_put(cp);
|
|
|
+out:
|
|
|
+ if (likely(!new_cp))
|
|
|
+ __ip_vs_conn_put(cp);
|
|
|
+ else
|
|
|
+ ip_vs_conn_put(cp);
|
|
|
|
|
|
return verdict;
|
|
|
}
|
|
@@ -1633,7 +1713,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
|
|
|
if (unlikely((skb->pkt_type != PACKET_HOST &&
|
|
|
hooknum != NF_INET_LOCAL_OUT) ||
|
|
|
!skb_dst(skb))) {
|
|
|
- ip_vs_fill_iph_skb(af, skb, &iph);
|
|
|
+ ip_vs_fill_iph_skb(af, skb, false, &iph);
|
|
|
IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
|
|
|
" ignored in hook %u\n",
|
|
|
skb->pkt_type, iph.protocol,
|
|
@@ -1646,7 +1726,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
|
|
|
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
|
|
|
return NF_ACCEPT;
|
|
|
|
|
|
- ip_vs_fill_iph_skb(af, skb, &iph);
|
|
|
+ ip_vs_fill_iph_skb(af, skb, false, &iph);
|
|
|
|
|
|
/* Bad... Do not break raw sockets */
|
|
|
if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
|
|
@@ -1680,13 +1760,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
|
|
|
|
|
|
/* Protocol supported? */
|
|
|
pd = ip_vs_proto_data_get(net, iph.protocol);
|
|
|
- if (unlikely(!pd))
|
|
|
+ if (unlikely(!pd)) {
|
|
|
+ /* The only way we'll see this packet again is if it's
|
|
|
+ * encapsulated, so mark it with ipvs_property=1 so we
|
|
|
+ * skip it if we're ignoring tunneled packets
|
|
|
+ */
|
|
|
+ if (sysctl_ignore_tunneled(ipvs))
|
|
|
+ skb->ipvs_property = 1;
|
|
|
+
|
|
|
return NF_ACCEPT;
|
|
|
+ }
|
|
|
pp = pd->pp;
|
|
|
/*
|
|
|
* Check if the packet belongs to an existing connection entry
|
|
|
*/
|
|
|
- cp = pp->conn_in_get(af, skb, &iph, 0);
|
|
|
+ cp = pp->conn_in_get(af, skb, &iph);
|
|
|
|
|
|
conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
|
|
|
if (conn_reuse_mode && !iph.fragoffs &&
|
|
@@ -1700,32 +1788,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
|
|
|
cp = NULL;
|
|
|
}
|
|
|
|
|
|
- if (unlikely(!cp) && !iph.fragoffs) {
|
|
|
- /* No (second) fragments need to enter here, as nf_defrag_ipv6
|
|
|
- * replayed fragment zero will already have created the cp
|
|
|
- */
|
|
|
+ if (unlikely(!cp)) {
|
|
|
int v;
|
|
|
|
|
|
- /* Schedule and create new connection entry into &cp */
|
|
|
- if (!pp->conn_schedule(af, skb, pd, &v, &cp, &iph))
|
|
|
+ if (!ip_vs_try_to_schedule(af, skb, pd, &v, &cp, &iph))
|
|
|
return v;
|
|
|
}
|
|
|
|
|
|
- if (unlikely(!cp)) {
|
|
|
- /* sorry, all this trouble for a no-hit :) */
|
|
|
- IP_VS_DBG_PKT(12, af, pp, skb, 0,
|
|
|
- "ip_vs_in: packet continues traversal as normal");
|
|
|
- if (iph.fragoffs) {
|
|
|
- /* Fragment that couldn't be mapped to a conn entry
|
|
|
- * is missing module nf_defrag_ipv6
|
|
|
- */
|
|
|
- IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
|
|
|
- IP_VS_DBG_PKT(7, af, pp, skb, 0, "unhandled fragment");
|
|
|
- }
|
|
|
- return NF_ACCEPT;
|
|
|
- }
|
|
|
+ IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet");
|
|
|
|
|
|
- IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
|
|
|
/* Check the server status */
|
|
|
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
|
|
|
/* the destination server is not available */
|
|
@@ -1859,7 +1930,7 @@ ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb,
|
|
|
struct netns_ipvs *ipvs;
|
|
|
struct ip_vs_iphdr iphdr;
|
|
|
|
|
|
- ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr);
|
|
|
+ ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr);
|
|
|
if (iphdr.protocol != IPPROTO_ICMPV6)
|
|
|
return NF_ACCEPT;
|
|
|
|