|
@@ -1,7 +1,7 @@
|
|
|
-/* NAT for netfilter; shared with compatibility layer. */
|
|
|
-
|
|
|
-/* (C) 1999-2001 Paul `Rusty' Russell
|
|
|
+/*
|
|
|
+ * (C) 1999-2001 Paul `Rusty' Russell
|
|
|
* (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
|
|
|
+ * (C) 2011 Patrick McHardy <kaber@trash.net>
|
|
|
*
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
@@ -13,39 +13,106 @@
|
|
|
#include <linux/timer.h>
|
|
|
#include <linux/skbuff.h>
|
|
|
#include <linux/gfp.h>
|
|
|
-#include <net/checksum.h>
|
|
|
-#include <net/icmp.h>
|
|
|
-#include <net/ip.h>
|
|
|
-#include <net/tcp.h> /* For tcp_prot in getorigdst */
|
|
|
-#include <linux/icmp.h>
|
|
|
-#include <linux/udp.h>
|
|
|
+#include <net/xfrm.h>
|
|
|
#include <linux/jhash.h>
|
|
|
+#include <linux/rtnetlink.h>
|
|
|
|
|
|
-#include <linux/netfilter_ipv4.h>
|
|
|
#include <net/netfilter/nf_conntrack.h>
|
|
|
#include <net/netfilter/nf_conntrack_core.h>
|
|
|
#include <net/netfilter/nf_nat.h>
|
|
|
-#include <net/netfilter/nf_nat_protocol.h>
|
|
|
+#include <net/netfilter/nf_nat_l3proto.h>
|
|
|
+#include <net/netfilter/nf_nat_l4proto.h>
|
|
|
#include <net/netfilter/nf_nat_core.h>
|
|
|
#include <net/netfilter/nf_nat_helper.h>
|
|
|
#include <net/netfilter/nf_conntrack_helper.h>
|
|
|
#include <net/netfilter/nf_conntrack_l3proto.h>
|
|
|
#include <net/netfilter/nf_conntrack_zones.h>
|
|
|
+#include <linux/netfilter/nf_nat.h>
|
|
|
|
|
|
static DEFINE_SPINLOCK(nf_nat_lock);
|
|
|
|
|
|
-static struct nf_conntrack_l3proto *l3proto __read_mostly;
|
|
|
-
|
|
|
-#define MAX_IP_NAT_PROTO 256
|
|
|
-static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO]
|
|
|
+static DEFINE_MUTEX(nf_nat_proto_mutex);
|
|
|
+static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
|
|
|
+ __read_mostly;
|
|
|
+static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
|
|
|
__read_mostly;
|
|
|
|
|
|
-static inline const struct nf_nat_protocol *
|
|
|
-__nf_nat_proto_find(u_int8_t protonum)
|
|
|
+
|
|
|
+inline const struct nf_nat_l3proto *
|
|
|
+__nf_nat_l3proto_find(u8 family)
|
|
|
{
|
|
|
- return rcu_dereference(nf_nat_protos[protonum]);
|
|
|
+ return rcu_dereference(nf_nat_l3protos[family]);
|
|
|
}
|
|
|
|
|
|
+inline const struct nf_nat_l4proto *
|
|
|
+__nf_nat_l4proto_find(u8 family, u8 protonum)
|
|
|
+{
|
|
|
+ return rcu_dereference(nf_nat_l4protos[family][protonum]);
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(__nf_nat_l4proto_find);
|
|
|
+
|
|
|
+#ifdef CONFIG_XFRM
|
|
|
+static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
|
|
|
+{
|
|
|
+ const struct nf_nat_l3proto *l3proto;
|
|
|
+ const struct nf_conn *ct;
|
|
|
+ enum ip_conntrack_info ctinfo;
|
|
|
+ enum ip_conntrack_dir dir;
|
|
|
+ unsigned long statusbit;
|
|
|
+ u8 family;
|
|
|
+
|
|
|
+ ct = nf_ct_get(skb, &ctinfo);
|
|
|
+ if (ct == NULL)
|
|
|
+ return;
|
|
|
+
|
|
|
+ family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
|
|
|
+ rcu_read_lock();
|
|
|
+ l3proto = __nf_nat_l3proto_find(family);
|
|
|
+ if (l3proto == NULL)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ dir = CTINFO2DIR(ctinfo);
|
|
|
+ if (dir == IP_CT_DIR_ORIGINAL)
|
|
|
+ statusbit = IPS_DST_NAT;
|
|
|
+ else
|
|
|
+ statusbit = IPS_SRC_NAT;
|
|
|
+
|
|
|
+ l3proto->decode_session(skb, ct, dir, statusbit, fl);
|
|
|
+out:
|
|
|
+ rcu_read_unlock();
|
|
|
+}
|
|
|
+
|
|
|
+int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
|
|
|
+{
|
|
|
+ struct flowi fl;
|
|
|
+ unsigned int hh_len;
|
|
|
+ struct dst_entry *dst;
|
|
|
+
|
|
|
+ if (xfrm_decode_session(skb, &fl, family) < 0)
|
|
|
+ return -1;
|
|
|
+
|
|
|
+ dst = skb_dst(skb);
|
|
|
+ if (dst->xfrm)
|
|
|
+ dst = ((struct xfrm_dst *)dst)->route;
|
|
|
+ dst_hold(dst);
|
|
|
+
|
|
|
+ dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0);
|
|
|
+ if (IS_ERR(dst))
|
|
|
+ return -1;
|
|
|
+
|
|
|
+ skb_dst_drop(skb);
|
|
|
+ skb_dst_set(skb, dst);
|
|
|
+
|
|
|
+ /* Change in oif may mean change in hh_len. */
|
|
|
+ hh_len = skb_dst(skb)->dev->hard_header_len;
|
|
|
+ if (skb_headroom(skb) < hh_len &&
|
|
|
+ pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
|
|
|
+ return -1;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+EXPORT_SYMBOL(nf_xfrm_me_harder);
|
|
|
+#endif /* CONFIG_XFRM */
|
|
|
+
|
|
|
/* We keep an extra hash for each conntrack, for fast searching. */
|
|
|
static inline unsigned int
|
|
|
hash_by_src(const struct net *net, u16 zone,
|
|
@@ -54,10 +121,9 @@ hash_by_src(const struct net *net, u16 zone,
|
|
|
unsigned int hash;
|
|
|
|
|
|
/* Original src, to ensure we map it consistently if poss. */
|
|
|
- hash = jhash_3words((__force u32)tuple->src.u3.ip,
|
|
|
- (__force u32)tuple->src.u.all ^ zone,
|
|
|
- tuple->dst.protonum, nf_conntrack_hash_rnd);
|
|
|
- return ((u64)hash * net->ipv4.nat_htable_size) >> 32;
|
|
|
+ hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
|
|
|
+ tuple->dst.protonum ^ zone ^ nf_conntrack_hash_rnd);
|
|
|
+ return ((u64)hash * net->ct.nat_htable_size) >> 32;
|
|
|
}
|
|
|
|
|
|
/* Is this tuple already taken? (not by us) */
|
|
@@ -66,10 +132,11 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
|
|
|
const struct nf_conn *ignored_conntrack)
|
|
|
{
|
|
|
/* Conntrack tracking doesn't keep track of outgoing tuples; only
|
|
|
- incoming ones. NAT means they don't have a fixed mapping,
|
|
|
- so we invert the tuple and look for the incoming reply.
|
|
|
-
|
|
|
- We could keep a separate hash if this proves too slow. */
|
|
|
+ * incoming ones. NAT means they don't have a fixed mapping,
|
|
|
+ * so we invert the tuple and look for the incoming reply.
|
|
|
+ *
|
|
|
+ * We could keep a separate hash if this proves too slow.
|
|
|
+ */
|
|
|
struct nf_conntrack_tuple reply;
|
|
|
|
|
|
nf_ct_invert_tuplepr(&reply, tuple);
|
|
@@ -78,31 +145,26 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
|
|
|
EXPORT_SYMBOL(nf_nat_used_tuple);
|
|
|
|
|
|
/* If we source map this tuple so reply looks like reply_tuple, will
|
|
|
- * that meet the constraints of range. */
|
|
|
-static int
|
|
|
-in_range(const struct nf_conntrack_tuple *tuple,
|
|
|
- const struct nf_nat_ipv4_range *range)
|
|
|
+ * that meet the constraints of range.
|
|
|
+ */
|
|
|
+static int in_range(const struct nf_nat_l3proto *l3proto,
|
|
|
+ const struct nf_nat_l4proto *l4proto,
|
|
|
+ const struct nf_conntrack_tuple *tuple,
|
|
|
+ const struct nf_nat_range *range)
|
|
|
{
|
|
|
- const struct nf_nat_protocol *proto;
|
|
|
- int ret = 0;
|
|
|
-
|
|
|
/* If we are supposed to map IPs, then we must be in the
|
|
|
- range specified, otherwise let this drag us onto a new src IP. */
|
|
|
- if (range->flags & NF_NAT_RANGE_MAP_IPS) {
|
|
|
- if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) ||
|
|
|
- ntohl(tuple->src.u3.ip) > ntohl(range->max_ip))
|
|
|
- return 0;
|
|
|
- }
|
|
|
+ * range specified, otherwise let this drag us onto a new src IP.
|
|
|
+ */
|
|
|
+ if (range->flags & NF_NAT_RANGE_MAP_IPS &&
|
|
|
+ !l3proto->in_range(tuple, range))
|
|
|
+ return 0;
|
|
|
|
|
|
- rcu_read_lock();
|
|
|
- proto = __nf_nat_proto_find(tuple->dst.protonum);
|
|
|
if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) ||
|
|
|
- proto->in_range(tuple, NF_NAT_MANIP_SRC,
|
|
|
- &range->min, &range->max))
|
|
|
- ret = 1;
|
|
|
- rcu_read_unlock();
|
|
|
+ l4proto->in_range(tuple, NF_NAT_MANIP_SRC,
|
|
|
+ &range->min_proto, &range->max_proto))
|
|
|
+ return 1;
|
|
|
|
|
|
- return ret;
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
static inline int
|
|
@@ -113,24 +175,25 @@ same_src(const struct nf_conn *ct,
|
|
|
|
|
|
t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
|
|
|
return (t->dst.protonum == tuple->dst.protonum &&
|
|
|
- t->src.u3.ip == tuple->src.u3.ip &&
|
|
|
+ nf_inet_addr_cmp(&t->src.u3, &tuple->src.u3) &&
|
|
|
t->src.u.all == tuple->src.u.all);
|
|
|
}
|
|
|
|
|
|
/* Only called for SRC manip */
|
|
|
static int
|
|
|
find_appropriate_src(struct net *net, u16 zone,
|
|
|
+ const struct nf_nat_l3proto *l3proto,
|
|
|
+ const struct nf_nat_l4proto *l4proto,
|
|
|
const struct nf_conntrack_tuple *tuple,
|
|
|
struct nf_conntrack_tuple *result,
|
|
|
- const struct nf_nat_ipv4_range *range)
|
|
|
+ const struct nf_nat_range *range)
|
|
|
{
|
|
|
unsigned int h = hash_by_src(net, zone, tuple);
|
|
|
const struct nf_conn_nat *nat;
|
|
|
const struct nf_conn *ct;
|
|
|
const struct hlist_node *n;
|
|
|
|
|
|
- rcu_read_lock();
|
|
|
- hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) {
|
|
|
+ hlist_for_each_entry_rcu(nat, n, &net->ct.nat_bysource[h], bysource) {
|
|
|
ct = nat->ct;
|
|
|
if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) {
|
|
|
/* Copy source part from reply tuple. */
|
|
@@ -138,119 +201,150 @@ find_appropriate_src(struct net *net, u16 zone,
|
|
|
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
|
|
|
result->dst = tuple->dst;
|
|
|
|
|
|
- if (in_range(result, range)) {
|
|
|
+ if (in_range(l3proto, l4proto, result, range)) {
|
|
|
rcu_read_unlock();
|
|
|
return 1;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- rcu_read_unlock();
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
/* For [FUTURE] fragmentation handling, we want the least-used
|
|
|
- src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
|
|
|
- if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
|
|
|
- 1-65535, we don't do pro-rata allocation based on ports; we choose
|
|
|
- the ip with the lowest src-ip/dst-ip/proto usage.
|
|
|
-*/
|
|
|
+ * src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
|
|
|
+ * if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
|
|
|
+ * 1-65535, we don't do pro-rata allocation based on ports; we choose
|
|
|
+ * the ip with the lowest src-ip/dst-ip/proto usage.
|
|
|
+ */
|
|
|
static void
|
|
|
find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
|
|
|
- const struct nf_nat_ipv4_range *range,
|
|
|
+ const struct nf_nat_range *range,
|
|
|
const struct nf_conn *ct,
|
|
|
enum nf_nat_manip_type maniptype)
|
|
|
{
|
|
|
- __be32 *var_ipp;
|
|
|
+ union nf_inet_addr *var_ipp;
|
|
|
+ unsigned int i, max;
|
|
|
/* Host order */
|
|
|
- u_int32_t minip, maxip, j;
|
|
|
+ u32 minip, maxip, j, dist;
|
|
|
+ bool full_range;
|
|
|
|
|
|
/* No IP mapping? Do nothing. */
|
|
|
if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
|
|
|
return;
|
|
|
|
|
|
if (maniptype == NF_NAT_MANIP_SRC)
|
|
|
- var_ipp = &tuple->src.u3.ip;
|
|
|
+ var_ipp = &tuple->src.u3;
|
|
|
else
|
|
|
- var_ipp = &tuple->dst.u3.ip;
|
|
|
+ var_ipp = &tuple->dst.u3;
|
|
|
|
|
|
/* Fast path: only one choice. */
|
|
|
- if (range->min_ip == range->max_ip) {
|
|
|
- *var_ipp = range->min_ip;
|
|
|
+ if (nf_inet_addr_cmp(&range->min_addr, &range->max_addr)) {
|
|
|
+ *var_ipp = range->min_addr;
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
+ if (nf_ct_l3num(ct) == NFPROTO_IPV4)
|
|
|
+ max = sizeof(var_ipp->ip) / sizeof(u32) - 1;
|
|
|
+ else
|
|
|
+ max = sizeof(var_ipp->ip6) / sizeof(u32) - 1;
|
|
|
+
|
|
|
/* Hashing source and destination IPs gives a fairly even
|
|
|
* spread in practice (if there are a small number of IPs
|
|
|
* involved, there usually aren't that many connections
|
|
|
* anyway). The consistency means that servers see the same
|
|
|
* client coming from the same IP (some Internet Banking sites
|
|
|
- * like this), even across reboots. */
|
|
|
- minip = ntohl(range->min_ip);
|
|
|
- maxip = ntohl(range->max_ip);
|
|
|
- j = jhash_2words((__force u32)tuple->src.u3.ip,
|
|
|
- range->flags & NF_NAT_RANGE_PERSISTENT ?
|
|
|
- 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0);
|
|
|
- j = ((u64)j * (maxip - minip + 1)) >> 32;
|
|
|
- *var_ipp = htonl(minip + j);
|
|
|
+ * like this), even across reboots.
|
|
|
+ */
|
|
|
+ j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3),
|
|
|
+ range->flags & NF_NAT_RANGE_PERSISTENT ?
|
|
|
+ 0 : (__force u32)tuple->dst.u3.all[max] ^ zone);
|
|
|
+
|
|
|
+ full_range = false;
|
|
|
+ for (i = 0; i <= max; i++) {
|
|
|
+ /* If first bytes of the address are at the maximum, use the
|
|
|
+ * distance. Otherwise use the full range.
|
|
|
+ */
|
|
|
+ if (!full_range) {
|
|
|
+ minip = ntohl((__force __be32)range->min_addr.all[i]);
|
|
|
+ maxip = ntohl((__force __be32)range->max_addr.all[i]);
|
|
|
+ dist = maxip - minip + 1;
|
|
|
+ } else {
|
|
|
+ minip = 0;
|
|
|
+ dist = ~0;
|
|
|
+ }
|
|
|
+
|
|
|
+ var_ipp->all[i] = (__force __u32)
|
|
|
+ htonl(minip + (((u64)j * dist) >> 32));
|
|
|
+ if (var_ipp->all[i] != range->max_addr.all[i])
|
|
|
+ full_range = true;
|
|
|
+
|
|
|
+ if (!(range->flags & NF_NAT_RANGE_PERSISTENT))
|
|
|
+ j ^= (__force u32)tuple->dst.u3.all[i];
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
-/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
|
|
|
- * we change the source to map into the range. For NF_INET_PRE_ROUTING
|
|
|
+/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
|
|
|
+ * we change the source to map into the range. For NF_INET_PRE_ROUTING
|
|
|
* and NF_INET_LOCAL_OUT, we change the destination to map into the
|
|
|
- * range. It might not be possible to get a unique tuple, but we try.
|
|
|
+ * range. It might not be possible to get a unique tuple, but we try.
|
|
|
* At worst (or if we race), we will end up with a final duplicate in
|
|
|
* __ip_conntrack_confirm and drop the packet. */
|
|
|
static void
|
|
|
get_unique_tuple(struct nf_conntrack_tuple *tuple,
|
|
|
const struct nf_conntrack_tuple *orig_tuple,
|
|
|
- const struct nf_nat_ipv4_range *range,
|
|
|
+ const struct nf_nat_range *range,
|
|
|
struct nf_conn *ct,
|
|
|
enum nf_nat_manip_type maniptype)
|
|
|
{
|
|
|
+ const struct nf_nat_l3proto *l3proto;
|
|
|
+ const struct nf_nat_l4proto *l4proto;
|
|
|
struct net *net = nf_ct_net(ct);
|
|
|
- const struct nf_nat_protocol *proto;
|
|
|
u16 zone = nf_ct_zone(ct);
|
|
|
|
|
|
- /* 1) If this srcip/proto/src-proto-part is currently mapped,
|
|
|
- and that same mapping gives a unique tuple within the given
|
|
|
- range, use that.
|
|
|
+ rcu_read_lock();
|
|
|
+ l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num);
|
|
|
+ l4proto = __nf_nat_l4proto_find(orig_tuple->src.l3num,
|
|
|
+ orig_tuple->dst.protonum);
|
|
|
|
|
|
- This is only required for source (ie. NAT/masq) mappings.
|
|
|
- So far, we don't do local source mappings, so multiple
|
|
|
- manips not an issue. */
|
|
|
+ /* 1) If this srcip/proto/src-proto-part is currently mapped,
|
|
|
+ * and that same mapping gives a unique tuple within the given
|
|
|
+ * range, use that.
|
|
|
+ *
|
|
|
+ * This is only required for source (ie. NAT/masq) mappings.
|
|
|
+ * So far, we don't do local source mappings, so multiple
|
|
|
+ * manips not an issue.
|
|
|
+ */
|
|
|
if (maniptype == NF_NAT_MANIP_SRC &&
|
|
|
!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
|
|
|
/* try the original tuple first */
|
|
|
- if (in_range(orig_tuple, range)) {
|
|
|
+ if (in_range(l3proto, l4proto, orig_tuple, range)) {
|
|
|
if (!nf_nat_used_tuple(orig_tuple, ct)) {
|
|
|
*tuple = *orig_tuple;
|
|
|
- return;
|
|
|
+ goto out;
|
|
|
}
|
|
|
- } else if (find_appropriate_src(net, zone, orig_tuple, tuple,
|
|
|
- range)) {
|
|
|
+ } else if (find_appropriate_src(net, zone, l3proto, l4proto,
|
|
|
+ orig_tuple, tuple, range)) {
|
|
|
pr_debug("get_unique_tuple: Found current src map\n");
|
|
|
if (!nf_nat_used_tuple(tuple, ct))
|
|
|
- return;
|
|
|
+ goto out;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- /* 2) Select the least-used IP/proto combination in the given
|
|
|
- range. */
|
|
|
+ /* 2) Select the least-used IP/proto combination in the given range */
|
|
|
*tuple = *orig_tuple;
|
|
|
find_best_ips_proto(zone, tuple, range, ct, maniptype);
|
|
|
|
|
|
/* 3) The per-protocol part of the manip is made to map into
|
|
|
- the range to make a unique tuple. */
|
|
|
-
|
|
|
- rcu_read_lock();
|
|
|
- proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
|
|
|
+ * the range to make a unique tuple.
|
|
|
+ */
|
|
|
|
|
|
/* Only bother mapping if it's not already in range and unique */
|
|
|
if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
|
|
|
if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
|
|
|
- if (proto->in_range(tuple, maniptype, &range->min,
|
|
|
- &range->max) &&
|
|
|
- (range->min.all == range->max.all ||
|
|
|
+ if (l4proto->in_range(tuple, maniptype,
|
|
|
+ &range->min_proto,
|
|
|
+ &range->max_proto) &&
|
|
|
+ (range->min_proto.all == range->max_proto.all ||
|
|
|
!nf_nat_used_tuple(tuple, ct)))
|
|
|
goto out;
|
|
|
} else if (!nf_nat_used_tuple(tuple, ct)) {
|
|
@@ -259,14 +353,14 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
|
|
|
}
|
|
|
|
|
|
/* Last change: get protocol to try to obtain unique tuple. */
|
|
|
- proto->unique_tuple(tuple, range, maniptype, ct);
|
|
|
+ l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct);
|
|
|
out:
|
|
|
rcu_read_unlock();
|
|
|
}
|
|
|
|
|
|
unsigned int
|
|
|
nf_nat_setup_info(struct nf_conn *ct,
|
|
|
- const struct nf_nat_ipv4_range *range,
|
|
|
+ const struct nf_nat_range *range,
|
|
|
enum nf_nat_manip_type maniptype)
|
|
|
{
|
|
|
struct net *net = nf_ct_net(ct);
|
|
@@ -288,10 +382,10 @@ nf_nat_setup_info(struct nf_conn *ct,
|
|
|
BUG_ON(nf_nat_initialized(ct, maniptype));
|
|
|
|
|
|
/* What we've got will look like inverse of reply. Normally
|
|
|
- this is what is in the conntrack, except for prior
|
|
|
- manipulations (future optimization: if num_manips == 0,
|
|
|
- orig_tp =
|
|
|
- conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
|
|
|
+ * this is what is in the conntrack, except for prior
|
|
|
+ * manipulations (future optimization: if num_manips == 0,
|
|
|
+ * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
|
|
|
+ */
|
|
|
nf_ct_invert_tuplepr(&curr_tuple,
|
|
|
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
|
|
|
|
|
@@ -317,11 +411,11 @@ nf_nat_setup_info(struct nf_conn *ct,
|
|
|
srchash = hash_by_src(net, nf_ct_zone(ct),
|
|
|
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
|
|
|
spin_lock_bh(&nf_nat_lock);
|
|
|
- /* nf_conntrack_alter_reply might re-allocate extension area */
|
|
|
+ /* nf_conntrack_alter_reply might re-allocate extension aera */
|
|
|
nat = nfct_nat(ct);
|
|
|
nat->ct = ct;
|
|
|
hlist_add_head_rcu(&nat->bysource,
|
|
|
- &net->ipv4.nat_bysource[srchash]);
|
|
|
+ &net->ct.nat_bysource[srchash]);
|
|
|
spin_unlock_bh(&nf_nat_lock);
|
|
|
}
|
|
|
|
|
@@ -335,47 +429,14 @@ nf_nat_setup_info(struct nf_conn *ct,
|
|
|
}
|
|
|
EXPORT_SYMBOL(nf_nat_setup_info);
|
|
|
|
|
|
-/* Returns true if succeeded. */
|
|
|
-static bool
|
|
|
-manip_pkt(u_int16_t proto,
|
|
|
- struct sk_buff *skb,
|
|
|
- unsigned int iphdroff,
|
|
|
- const struct nf_conntrack_tuple *target,
|
|
|
- enum nf_nat_manip_type maniptype)
|
|
|
-{
|
|
|
- struct iphdr *iph;
|
|
|
- const struct nf_nat_protocol *p;
|
|
|
-
|
|
|
- if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
|
|
|
- return false;
|
|
|
-
|
|
|
- iph = (void *)skb->data + iphdroff;
|
|
|
-
|
|
|
- /* Manipulate protcol part. */
|
|
|
-
|
|
|
- /* rcu_read_lock()ed by nf_hook_slow */
|
|
|
- p = __nf_nat_proto_find(proto);
|
|
|
- if (!p->manip_pkt(skb, iphdroff, target, maniptype))
|
|
|
- return false;
|
|
|
-
|
|
|
- iph = (void *)skb->data + iphdroff;
|
|
|
-
|
|
|
- if (maniptype == NF_NAT_MANIP_SRC) {
|
|
|
- csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
|
|
|
- iph->saddr = target->src.u3.ip;
|
|
|
- } else {
|
|
|
- csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
|
|
|
- iph->daddr = target->dst.u3.ip;
|
|
|
- }
|
|
|
- return true;
|
|
|
-}
|
|
|
-
|
|
|
/* Do packet manipulations according to nf_nat_setup_info. */
|
|
|
unsigned int nf_nat_packet(struct nf_conn *ct,
|
|
|
enum ip_conntrack_info ctinfo,
|
|
|
unsigned int hooknum,
|
|
|
struct sk_buff *skb)
|
|
|
{
|
|
|
+ const struct nf_nat_l3proto *l3proto;
|
|
|
+ const struct nf_nat_l4proto *l4proto;
|
|
|
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
|
|
|
unsigned long statusbit;
|
|
|
enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
|
|
@@ -396,129 +457,174 @@ unsigned int nf_nat_packet(struct nf_conn *ct,
|
|
|
/* We are aiming to look like inverse of other direction. */
|
|
|
nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
|
|
|
|
|
|
- if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype))
|
|
|
+ l3proto = __nf_nat_l3proto_find(target.src.l3num);
|
|
|
+ l4proto = __nf_nat_l4proto_find(target.src.l3num,
|
|
|
+ target.dst.protonum);
|
|
|
+ if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype))
|
|
|
return NF_DROP;
|
|
|
}
|
|
|
return NF_ACCEPT;
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(nf_nat_packet);
|
|
|
|
|
|
-/* Dir is direction ICMP is coming from (opposite to packet it contains) */
|
|
|
-int nf_nat_icmp_reply_translation(struct nf_conn *ct,
|
|
|
- enum ip_conntrack_info ctinfo,
|
|
|
- unsigned int hooknum,
|
|
|
- struct sk_buff *skb)
|
|
|
+struct nf_nat_proto_clean {
|
|
|
+ u8 l3proto;
|
|
|
+ u8 l4proto;
|
|
|
+ bool hash;
|
|
|
+};
|
|
|
+
|
|
|
+/* Clear NAT section of all conntracks, in case we're loaded again. */
|
|
|
+static int nf_nat_proto_clean(struct nf_conn *i, void *data)
|
|
|
{
|
|
|
- struct {
|
|
|
- struct icmphdr icmp;
|
|
|
- struct iphdr ip;
|
|
|
- } *inside;
|
|
|
- struct nf_conntrack_tuple target;
|
|
|
- int hdrlen = ip_hdrlen(skb);
|
|
|
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
|
|
|
- unsigned long statusbit;
|
|
|
- enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
|
|
|
+ const struct nf_nat_proto_clean *clean = data;
|
|
|
+ struct nf_conn_nat *nat = nfct_nat(i);
|
|
|
|
|
|
- if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
|
|
|
+ if (!nat)
|
|
|
return 0;
|
|
|
-
|
|
|
- inside = (void *)skb->data + hdrlen;
|
|
|
-
|
|
|
- /* We're actually going to mangle it beyond trivial checksum
|
|
|
- adjustment, so make sure the current checksum is correct. */
|
|
|
- if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
|
|
|
+ if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) ||
|
|
|
+ (clean->l4proto && nf_ct_protonum(i) != clean->l4proto))
|
|
|
return 0;
|
|
|
|
|
|
- /* Must be RELATED */
|
|
|
- NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED ||
|
|
|
- skb->nfctinfo == IP_CT_RELATED_REPLY);
|
|
|
-
|
|
|
- /* Redirects on non-null nats must be dropped, else they'll
|
|
|
- start talking to each other without our translation, and be
|
|
|
- confused... --RR */
|
|
|
- if (inside->icmp.type == ICMP_REDIRECT) {
|
|
|
- /* If NAT isn't finished, assume it and drop. */
|
|
|
- if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
|
|
|
- return 0;
|
|
|
-
|
|
|
- if (ct->status & IPS_NAT_MASK)
|
|
|
- return 0;
|
|
|
+ if (clean->hash) {
|
|
|
+ spin_lock_bh(&nf_nat_lock);
|
|
|
+ hlist_del_rcu(&nat->bysource);
|
|
|
+ spin_unlock_bh(&nf_nat_lock);
|
|
|
+ } else {
|
|
|
+ memset(nat, 0, sizeof(*nat));
|
|
|
+ i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK |
|
|
|
+ IPS_SEQ_ADJUST);
|
|
|
}
|
|
|
+ return 0;
|
|
|
+}
|
|
|
|
|
|
- if (manip == NF_NAT_MANIP_SRC)
|
|
|
- statusbit = IPS_SRC_NAT;
|
|
|
- else
|
|
|
- statusbit = IPS_DST_NAT;
|
|
|
-
|
|
|
- /* Invert if this is reply dir. */
|
|
|
- if (dir == IP_CT_DIR_REPLY)
|
|
|
- statusbit ^= IPS_NAT_MASK;
|
|
|
-
|
|
|
- if (!(ct->status & statusbit))
|
|
|
- return 1;
|
|
|
-
|
|
|
- pr_debug("icmp_reply_translation: translating error %p manip %u "
|
|
|
- "dir %s\n", skb, manip,
|
|
|
- dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
|
|
|
-
|
|
|
- /* Change inner back to look like incoming packet. We do the
|
|
|
- opposite manip on this hook to normal, because it might not
|
|
|
- pass all hooks (locally-generated ICMP). Consider incoming
|
|
|
- packet: PREROUTING (DST manip), routing produces ICMP, goes
|
|
|
- through POSTROUTING (which must correct the DST manip). */
|
|
|
- if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp),
|
|
|
- &ct->tuplehash[!dir].tuple, !manip))
|
|
|
- return 0;
|
|
|
+static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
|
|
|
+{
|
|
|
+ struct nf_nat_proto_clean clean = {
|
|
|
+ .l3proto = l3proto,
|
|
|
+ .l4proto = l4proto,
|
|
|
+ };
|
|
|
+ struct net *net;
|
|
|
+
|
|
|
+ rtnl_lock();
|
|
|
+ /* Step 1 - remove from bysource hash */
|
|
|
+ clean.hash = true;
|
|
|
+ for_each_net(net)
|
|
|
+ nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean);
|
|
|
+ synchronize_rcu();
|
|
|
|
|
|
- if (skb->ip_summed != CHECKSUM_PARTIAL) {
|
|
|
- /* Reloading "inside" here since manip_pkt inner. */
|
|
|
- inside = (void *)skb->data + hdrlen;
|
|
|
- inside->icmp.checksum = 0;
|
|
|
- inside->icmp.checksum =
|
|
|
- csum_fold(skb_checksum(skb, hdrlen,
|
|
|
- skb->len - hdrlen, 0));
|
|
|
- }
|
|
|
+ /* Step 2 - clean NAT section */
|
|
|
+ clean.hash = false;
|
|
|
+ for_each_net(net)
|
|
|
+ nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean);
|
|
|
+ rtnl_unlock();
|
|
|
+}
|
|
|
|
|
|
- /* Change outer to look the reply to an incoming packet
|
|
|
- * (proto 0 means don't invert per-proto part). */
|
|
|
- nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
|
|
|
- if (!manip_pkt(0, skb, 0, &target, manip))
|
|
|
- return 0;
|
|
|
+static void nf_nat_l3proto_clean(u8 l3proto)
|
|
|
+{
|
|
|
+ struct nf_nat_proto_clean clean = {
|
|
|
+ .l3proto = l3proto,
|
|
|
+ };
|
|
|
+ struct net *net;
|
|
|
+
|
|
|
+ rtnl_lock();
|
|
|
+ /* Step 1 - remove from bysource hash */
|
|
|
+ clean.hash = true;
|
|
|
+ for_each_net(net)
|
|
|
+ nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean);
|
|
|
+ synchronize_rcu();
|
|
|
|
|
|
- return 1;
|
|
|
+ /* Step 2 - clean NAT section */
|
|
|
+ clean.hash = false;
|
|
|
+ for_each_net(net)
|
|
|
+ nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean);
|
|
|
+ rtnl_unlock();
|
|
|
}
|
|
|
-EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
|
|
|
|
|
|
/* Protocol registration. */
|
|
|
-int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
|
|
|
+int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto)
|
|
|
{
|
|
|
+ const struct nf_nat_l4proto **l4protos;
|
|
|
+ unsigned int i;
|
|
|
int ret = 0;
|
|
|
|
|
|
- spin_lock_bh(&nf_nat_lock);
|
|
|
+ mutex_lock(&nf_nat_proto_mutex);
|
|
|
+ if (nf_nat_l4protos[l3proto] == NULL) {
|
|
|
+ l4protos = kmalloc(IPPROTO_MAX * sizeof(struct nf_nat_l4proto *),
|
|
|
+ GFP_KERNEL);
|
|
|
+ if (l4protos == NULL) {
|
|
|
+ ret = -ENOMEM;
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ for (i = 0; i < IPPROTO_MAX; i++)
|
|
|
+ RCU_INIT_POINTER(l4protos[i], &nf_nat_l4proto_unknown);
|
|
|
+
|
|
|
+ /* Before making proto_array visible to lockless readers,
|
|
|
+ * we must make sure its content is committed to memory.
|
|
|
+ */
|
|
|
+ smp_wmb();
|
|
|
+
|
|
|
+ nf_nat_l4protos[l3proto] = l4protos;
|
|
|
+ }
|
|
|
+
|
|
|
if (rcu_dereference_protected(
|
|
|
- nf_nat_protos[proto->protonum],
|
|
|
- lockdep_is_held(&nf_nat_lock)
|
|
|
- ) != &nf_nat_unknown_protocol) {
|
|
|
+ nf_nat_l4protos[l3proto][l4proto->l4proto],
|
|
|
+ lockdep_is_held(&nf_nat_proto_mutex)
|
|
|
+ ) != &nf_nat_l4proto_unknown) {
|
|
|
ret = -EBUSY;
|
|
|
goto out;
|
|
|
}
|
|
|
- RCU_INIT_POINTER(nf_nat_protos[proto->protonum], proto);
|
|
|
+ RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], l4proto);
|
|
|
out:
|
|
|
- spin_unlock_bh(&nf_nat_lock);
|
|
|
+ mutex_unlock(&nf_nat_proto_mutex);
|
|
|
return ret;
|
|
|
}
|
|
|
-EXPORT_SYMBOL(nf_nat_protocol_register);
|
|
|
+EXPORT_SYMBOL_GPL(nf_nat_l4proto_register);
|
|
|
|
|
|
/* No one stores the protocol anywhere; simply delete it. */
|
|
|
-void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
|
|
|
+void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto)
|
|
|
{
|
|
|
- spin_lock_bh(&nf_nat_lock);
|
|
|
- RCU_INIT_POINTER(nf_nat_protos[proto->protonum],
|
|
|
- &nf_nat_unknown_protocol);
|
|
|
- spin_unlock_bh(&nf_nat_lock);
|
|
|
+ mutex_lock(&nf_nat_proto_mutex);
|
|
|
+ RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto],
|
|
|
+ &nf_nat_l4proto_unknown);
|
|
|
+ mutex_unlock(&nf_nat_proto_mutex);
|
|
|
synchronize_rcu();
|
|
|
+
|
|
|
+ nf_nat_l4proto_clean(l3proto, l4proto->l4proto);
|
|
|
}
|
|
|
-EXPORT_SYMBOL(nf_nat_protocol_unregister);
|
|
|
+EXPORT_SYMBOL_GPL(nf_nat_l4proto_unregister);
|
|
|
+
|
|
|
+int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
|
|
|
+{
|
|
|
+ int err;
|
|
|
+
|
|
|
+ err = nf_ct_l3proto_try_module_get(l3proto->l3proto);
|
|
|
+ if (err < 0)
|
|
|
+ return err;
|
|
|
+
|
|
|
+ mutex_lock(&nf_nat_proto_mutex);
|
|
|
+ RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_TCP],
|
|
|
+ &nf_nat_l4proto_tcp);
|
|
|
+ RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP],
|
|
|
+ &nf_nat_l4proto_udp);
|
|
|
+ mutex_unlock(&nf_nat_proto_mutex);
|
|
|
+
|
|
|
+ RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto);
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(nf_nat_l3proto_register);
|
|
|
+
|
|
|
+void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *l3proto)
|
|
|
+{
|
|
|
+ mutex_lock(&nf_nat_proto_mutex);
|
|
|
+ RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], NULL);
|
|
|
+ mutex_unlock(&nf_nat_proto_mutex);
|
|
|
+ synchronize_rcu();
|
|
|
+
|
|
|
+ nf_nat_l3proto_clean(l3proto->l3proto);
|
|
|
+ nf_ct_l3proto_module_put(l3proto->l3proto);
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
|
|
|
|
|
|
/* No one using conntrack by the time this called. */
|
|
|
static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
|
|
@@ -570,34 +676,34 @@ static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
|
|
|
|
|
|
static int nfnetlink_parse_nat_proto(struct nlattr *attr,
|
|
|
const struct nf_conn *ct,
|
|
|
- struct nf_nat_ipv4_range *range)
|
|
|
+ struct nf_nat_range *range)
|
|
|
{
|
|
|
struct nlattr *tb[CTA_PROTONAT_MAX+1];
|
|
|
- const struct nf_nat_protocol *npt;
|
|
|
+ const struct nf_nat_l4proto *l4proto;
|
|
|
int err;
|
|
|
|
|
|
err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
|
|
|
if (err < 0)
|
|
|
return err;
|
|
|
|
|
|
- rcu_read_lock();
|
|
|
- npt = __nf_nat_proto_find(nf_ct_protonum(ct));
|
|
|
- if (npt->nlattr_to_range)
|
|
|
- err = npt->nlattr_to_range(tb, range);
|
|
|
- rcu_read_unlock();
|
|
|
+ l4proto = __nf_nat_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
|
|
|
+ if (l4proto->nlattr_to_range)
|
|
|
+ err = l4proto->nlattr_to_range(tb, range);
|
|
|
+
|
|
|
return err;
|
|
|
}
|
|
|
|
|
|
static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
|
|
|
- [CTA_NAT_MINIP] = { .type = NLA_U32 },
|
|
|
- [CTA_NAT_MAXIP] = { .type = NLA_U32 },
|
|
|
+ [CTA_NAT_V4_MINIP] = { .type = NLA_U32 },
|
|
|
+ [CTA_NAT_V4_MAXIP] = { .type = NLA_U32 },
|
|
|
[CTA_NAT_PROTO] = { .type = NLA_NESTED },
|
|
|
};
|
|
|
|
|
|
static int
|
|
|
nfnetlink_parse_nat(const struct nlattr *nat,
|
|
|
- const struct nf_conn *ct, struct nf_nat_ipv4_range *range)
|
|
|
+ const struct nf_conn *ct, struct nf_nat_range *range)
|
|
|
{
|
|
|
+ const struct nf_nat_l3proto *l3proto;
|
|
|
struct nlattr *tb[CTA_NAT_MAX+1];
|
|
|
int err;
|
|
|
|
|
@@ -607,25 +713,23 @@ nfnetlink_parse_nat(const struct nlattr *nat,
|
|
|
if (err < 0)
|
|
|
return err;
|
|
|
|
|
|
- if (tb[CTA_NAT_MINIP])
|
|
|
- range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]);
|
|
|
-
|
|
|
- if (!tb[CTA_NAT_MAXIP])
|
|
|
- range->max_ip = range->min_ip;
|
|
|
- else
|
|
|
- range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
|
|
|
-
|
|
|
- if (range->min_ip)
|
|
|
- range->flags |= NF_NAT_RANGE_MAP_IPS;
|
|
|
+ rcu_read_lock();
|
|
|
+ l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
|
|
|
+ if (l3proto == NULL) {
|
|
|
+ err = -EAGAIN;
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+ err = l3proto->nlattr_to_range(tb, range);
|
|
|
+ if (err < 0)
|
|
|
+ goto out;
|
|
|
|
|
|
if (!tb[CTA_NAT_PROTO])
|
|
|
- return 0;
|
|
|
+ goto out;
|
|
|
|
|
|
err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
|
|
|
- if (err < 0)
|
|
|
- return err;
|
|
|
-
|
|
|
- return 0;
|
|
|
+out:
|
|
|
+ rcu_read_unlock();
|
|
|
+ return err;
|
|
|
}
|
|
|
|
|
|
static int
|
|
@@ -633,10 +737,12 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
|
|
|
enum nf_nat_manip_type manip,
|
|
|
const struct nlattr *attr)
|
|
|
{
|
|
|
- struct nf_nat_ipv4_range range;
|
|
|
+ struct nf_nat_range range;
|
|
|
+ int err;
|
|
|
|
|
|
- if (nfnetlink_parse_nat(attr, ct, &range) < 0)
|
|
|
- return -EINVAL;
|
|
|
+ err = nfnetlink_parse_nat(attr, ct, &range);
|
|
|
+ if (err < 0)
|
|
|
+ return err;
|
|
|
if (nf_nat_initialized(ct, manip))
|
|
|
return -EEXIST;
|
|
|
|
|
@@ -655,30 +761,20 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
|
|
|
static int __net_init nf_nat_net_init(struct net *net)
|
|
|
{
|
|
|
/* Leave them the same for the moment. */
|
|
|
- net->ipv4.nat_htable_size = net->ct.htable_size;
|
|
|
- net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0);
|
|
|
- if (!net->ipv4.nat_bysource)
|
|
|
+ net->ct.nat_htable_size = net->ct.htable_size;
|
|
|
+ net->ct.nat_bysource = nf_ct_alloc_hashtable(&net->ct.nat_htable_size, 0);
|
|
|
+ if (!net->ct.nat_bysource)
|
|
|
return -ENOMEM;
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-/* Clear NAT section of all conntracks, in case we're loaded again. */
|
|
|
-static int clean_nat(struct nf_conn *i, void *data)
|
|
|
-{
|
|
|
- struct nf_conn_nat *nat = nfct_nat(i);
|
|
|
-
|
|
|
- if (!nat)
|
|
|
- return 0;
|
|
|
- memset(nat, 0, sizeof(*nat));
|
|
|
- i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
|
|
|
- return 0;
|
|
|
-}
|
|
|
-
|
|
|
static void __net_exit nf_nat_net_exit(struct net *net)
|
|
|
{
|
|
|
- nf_ct_iterate_cleanup(net, &clean_nat, NULL);
|
|
|
+ struct nf_nat_proto_clean clean = {};
|
|
|
+
|
|
|
+ nf_ct_iterate_cleanup(net, &nf_nat_proto_clean, &clean);
|
|
|
synchronize_rcu();
|
|
|
- nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size);
|
|
|
+ nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
|
|
|
}
|
|
|
|
|
|
static struct pernet_operations nf_nat_net_ops = {
|
|
@@ -697,11 +793,8 @@ static struct nfq_ct_nat_hook nfq_ct_nat = {
|
|
|
|
|
|
static int __init nf_nat_init(void)
|
|
|
{
|
|
|
- size_t i;
|
|
|
int ret;
|
|
|
|
|
|
- need_ipv4_conntrack();
|
|
|
-
|
|
|
ret = nf_ct_extend_register(&nat_extend);
|
|
|
if (ret < 0) {
|
|
|
printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
|
|
@@ -712,22 +805,11 @@ static int __init nf_nat_init(void)
|
|
|
if (ret < 0)
|
|
|
goto cleanup_extend;
|
|
|
|
|
|
- /* Sew in builtin protocols. */
|
|
|
- spin_lock_bh(&nf_nat_lock);
|
|
|
- for (i = 0; i < MAX_IP_NAT_PROTO; i++)
|
|
|
- RCU_INIT_POINTER(nf_nat_protos[i], &nf_nat_unknown_protocol);
|
|
|
- RCU_INIT_POINTER(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
|
|
|
- RCU_INIT_POINTER(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
|
|
|
- RCU_INIT_POINTER(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
|
|
|
- spin_unlock_bh(&nf_nat_lock);
|
|
|
+ nf_ct_helper_expectfn_register(&follow_master_nat);
|
|
|
|
|
|
/* Initialize fake conntrack so that NAT will skip it */
|
|
|
nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
|
|
|
|
|
|
- l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
|
|
|
-
|
|
|
- nf_ct_helper_expectfn_register(&follow_master_nat);
|
|
|
-
|
|
|
BUG_ON(nf_nat_seq_adjust_hook != NULL);
|
|
|
RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
|
|
|
BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
|
|
@@ -736,6 +818,10 @@ static int __init nf_nat_init(void)
|
|
|
BUG_ON(nf_ct_nat_offset != NULL);
|
|
|
RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset);
|
|
|
RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat);
|
|
|
+#ifdef CONFIG_XFRM
|
|
|
+ BUG_ON(nf_nat_decode_session_hook != NULL);
|
|
|
+ RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session);
|
|
|
+#endif
|
|
|
return 0;
|
|
|
|
|
|
cleanup_extend:
|
|
@@ -745,19 +831,24 @@ static int __init nf_nat_init(void)
|
|
|
|
|
|
static void __exit nf_nat_cleanup(void)
|
|
|
{
|
|
|
+ unsigned int i;
|
|
|
+
|
|
|
unregister_pernet_subsys(&nf_nat_net_ops);
|
|
|
- nf_ct_l3proto_put(l3proto);
|
|
|
nf_ct_extend_unregister(&nat_extend);
|
|
|
nf_ct_helper_expectfn_unregister(&follow_master_nat);
|
|
|
RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL);
|
|
|
RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL);
|
|
|
RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
|
|
|
RCU_INIT_POINTER(nfq_ct_nat_hook, NULL);
|
|
|
+#ifdef CONFIG_XFRM
|
|
|
+ RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL);
|
|
|
+#endif
|
|
|
+ for (i = 0; i < NFPROTO_NUMPROTO; i++)
|
|
|
+ kfree(nf_nat_l4protos[i]);
|
|
|
synchronize_net();
|
|
|
}
|
|
|
|
|
|
MODULE_LICENSE("GPL");
|
|
|
-MODULE_ALIAS("nf-nat-ipv4");
|
|
|
|
|
|
module_init(nf_nat_init);
|
|
|
module_exit(nf_nat_cleanup);
|