|
@@ -54,6 +54,7 @@
|
|
#include <net/netfilter/nf_nat.h>
|
|
#include <net/netfilter/nf_nat.h>
|
|
#include <net/netfilter/nf_nat_core.h>
|
|
#include <net/netfilter/nf_nat_core.h>
|
|
#include <net/netfilter/nf_nat_helper.h>
|
|
#include <net/netfilter/nf_nat_helper.h>
|
|
|
|
+#include <net/netns/hash.h>
|
|
|
|
|
|
#define NF_CONNTRACK_VERSION "0.5.0"
|
|
#define NF_CONNTRACK_VERSION "0.5.0"
|
|
|
|
|
|
@@ -68,7 +69,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks);
|
|
__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
|
|
__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
|
|
EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
|
|
EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
|
|
|
|
|
|
|
|
+struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
|
|
|
|
+EXPORT_SYMBOL_GPL(nf_conntrack_hash);
|
|
|
|
+
|
|
|
|
+static __read_mostly struct kmem_cache *nf_conntrack_cachep;
|
|
static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
|
|
static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
|
|
|
|
+static __read_mostly seqcount_t nf_conntrack_generation;
|
|
static __read_mostly bool nf_conntrack_locks_all;
|
|
static __read_mostly bool nf_conntrack_locks_all;
|
|
|
|
|
|
void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
|
|
void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
|
|
@@ -107,7 +113,7 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
|
|
spin_lock_nested(&nf_conntrack_locks[h1],
|
|
spin_lock_nested(&nf_conntrack_locks[h1],
|
|
SINGLE_DEPTH_NESTING);
|
|
SINGLE_DEPTH_NESTING);
|
|
}
|
|
}
|
|
- if (read_seqcount_retry(&net->ct.generation, sequence)) {
|
|
|
|
|
|
+ if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
|
|
nf_conntrack_double_unlock(h1, h2);
|
|
nf_conntrack_double_unlock(h1, h2);
|
|
return true;
|
|
return true;
|
|
}
|
|
}
|
|
@@ -141,43 +147,43 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
|
|
DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
|
|
DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
|
|
EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
|
|
EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
|
|
|
|
|
|
-unsigned int nf_conntrack_hash_rnd __read_mostly;
|
|
|
|
-EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd);
|
|
|
|
|
|
+static unsigned int nf_conntrack_hash_rnd __read_mostly;
|
|
|
|
|
|
-static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple)
|
|
|
|
|
|
+static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
|
|
|
|
+ const struct net *net)
|
|
{
|
|
{
|
|
unsigned int n;
|
|
unsigned int n;
|
|
|
|
+ u32 seed;
|
|
|
|
+
|
|
|
|
+ get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
|
|
|
|
|
|
/* The direction must be ignored, so we hash everything up to the
|
|
/* The direction must be ignored, so we hash everything up to the
|
|
* destination ports (which is a multiple of 4) and treat the last
|
|
* destination ports (which is a multiple of 4) and treat the last
|
|
* three bytes manually.
|
|
* three bytes manually.
|
|
*/
|
|
*/
|
|
|
|
+ seed = nf_conntrack_hash_rnd ^ net_hash_mix(net);
|
|
n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
|
|
n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
|
|
- return jhash2((u32 *)tuple, n, nf_conntrack_hash_rnd ^
|
|
|
|
|
|
+ return jhash2((u32 *)tuple, n, seed ^
|
|
(((__force __u16)tuple->dst.u.all << 16) |
|
|
(((__force __u16)tuple->dst.u.all << 16) |
|
|
tuple->dst.protonum));
|
|
tuple->dst.protonum));
|
|
}
|
|
}
|
|
|
|
|
|
-static u32 __hash_bucket(u32 hash, unsigned int size)
|
|
|
|
-{
|
|
|
|
- return reciprocal_scale(hash, size);
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-static u32 hash_bucket(u32 hash, const struct net *net)
|
|
|
|
|
|
+static u32 scale_hash(u32 hash)
|
|
{
|
|
{
|
|
- return __hash_bucket(hash, net->ct.htable_size);
|
|
|
|
|
|
+ return reciprocal_scale(hash, nf_conntrack_htable_size);
|
|
}
|
|
}
|
|
|
|
|
|
-static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
|
|
|
|
- unsigned int size)
|
|
|
|
|
|
+static u32 __hash_conntrack(const struct net *net,
|
|
|
|
+ const struct nf_conntrack_tuple *tuple,
|
|
|
|
+ unsigned int size)
|
|
{
|
|
{
|
|
- return __hash_bucket(hash_conntrack_raw(tuple), size);
|
|
|
|
|
|
+ return reciprocal_scale(hash_conntrack_raw(tuple, net), size);
|
|
}
|
|
}
|
|
|
|
|
|
-static inline u_int32_t hash_conntrack(const struct net *net,
|
|
|
|
- const struct nf_conntrack_tuple *tuple)
|
|
|
|
|
|
+static u32 hash_conntrack(const struct net *net,
|
|
|
|
+ const struct nf_conntrack_tuple *tuple)
|
|
{
|
|
{
|
|
- return __hash_conntrack(tuple, net->ct.htable_size);
|
|
|
|
|
|
+ return scale_hash(hash_conntrack_raw(tuple, net));
|
|
}
|
|
}
|
|
|
|
|
|
bool
|
|
bool
|
|
@@ -358,7 +364,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
|
|
}
|
|
}
|
|
rcu_read_lock();
|
|
rcu_read_lock();
|
|
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
|
|
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
|
|
- if (l4proto && l4proto->destroy)
|
|
|
|
|
|
+ if (l4proto->destroy)
|
|
l4proto->destroy(ct);
|
|
l4proto->destroy(ct);
|
|
|
|
|
|
rcu_read_unlock();
|
|
rcu_read_unlock();
|
|
@@ -393,7 +399,7 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
|
|
|
|
|
|
local_bh_disable();
|
|
local_bh_disable();
|
|
do {
|
|
do {
|
|
- sequence = read_seqcount_begin(&net->ct.generation);
|
|
|
|
|
|
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
|
|
hash = hash_conntrack(net,
|
|
hash = hash_conntrack(net,
|
|
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
|
|
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
|
|
reply_hash = hash_conntrack(net,
|
|
reply_hash = hash_conntrack(net,
|
|
@@ -445,7 +451,8 @@ static void death_by_timeout(unsigned long ul_conntrack)
|
|
static inline bool
|
|
static inline bool
|
|
nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
|
|
nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
|
|
const struct nf_conntrack_tuple *tuple,
|
|
const struct nf_conntrack_tuple *tuple,
|
|
- const struct nf_conntrack_zone *zone)
|
|
|
|
|
|
+ const struct nf_conntrack_zone *zone,
|
|
|
|
+ const struct net *net)
|
|
{
|
|
{
|
|
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
|
|
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
|
|
|
|
|
|
@@ -454,7 +461,8 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
|
|
*/
|
|
*/
|
|
return nf_ct_tuple_equal(tuple, &h->tuple) &&
|
|
return nf_ct_tuple_equal(tuple, &h->tuple) &&
|
|
nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) &&
|
|
nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) &&
|
|
- nf_ct_is_confirmed(ct);
|
|
|
|
|
|
+ nf_ct_is_confirmed(ct) &&
|
|
|
|
+ net_eq(net, nf_ct_net(ct));
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -467,21 +475,23 @@ ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
|
|
const struct nf_conntrack_tuple *tuple, u32 hash)
|
|
const struct nf_conntrack_tuple *tuple, u32 hash)
|
|
{
|
|
{
|
|
struct nf_conntrack_tuple_hash *h;
|
|
struct nf_conntrack_tuple_hash *h;
|
|
|
|
+ struct hlist_nulls_head *ct_hash;
|
|
struct hlist_nulls_node *n;
|
|
struct hlist_nulls_node *n;
|
|
- unsigned int bucket = hash_bucket(hash, net);
|
|
|
|
|
|
+ unsigned int bucket, sequence;
|
|
|
|
|
|
- /* Disable BHs the entire time since we normally need to disable them
|
|
|
|
- * at least once for the stats anyway.
|
|
|
|
- */
|
|
|
|
- local_bh_disable();
|
|
|
|
begin:
|
|
begin:
|
|
- hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) {
|
|
|
|
- if (nf_ct_key_equal(h, tuple, zone)) {
|
|
|
|
- NF_CT_STAT_INC(net, found);
|
|
|
|
- local_bh_enable();
|
|
|
|
|
|
+ do {
|
|
|
|
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
|
|
|
|
+ bucket = scale_hash(hash);
|
|
|
|
+ ct_hash = nf_conntrack_hash;
|
|
|
|
+ } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
|
|
|
|
+
|
|
|
|
+ hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
|
|
|
|
+ if (nf_ct_key_equal(h, tuple, zone, net)) {
|
|
|
|
+ NF_CT_STAT_INC_ATOMIC(net, found);
|
|
return h;
|
|
return h;
|
|
}
|
|
}
|
|
- NF_CT_STAT_INC(net, searched);
|
|
|
|
|
|
+ NF_CT_STAT_INC_ATOMIC(net, searched);
|
|
}
|
|
}
|
|
/*
|
|
/*
|
|
* if the nulls value we got at the end of this lookup is
|
|
* if the nulls value we got at the end of this lookup is
|
|
@@ -489,10 +499,9 @@ begin:
|
|
* We probably met an item that was moved to another chain.
|
|
* We probably met an item that was moved to another chain.
|
|
*/
|
|
*/
|
|
if (get_nulls_value(n) != bucket) {
|
|
if (get_nulls_value(n) != bucket) {
|
|
- NF_CT_STAT_INC(net, search_restart);
|
|
|
|
|
|
+ NF_CT_STAT_INC_ATOMIC(net, search_restart);
|
|
goto begin;
|
|
goto begin;
|
|
}
|
|
}
|
|
- local_bh_enable();
|
|
|
|
|
|
|
|
return NULL;
|
|
return NULL;
|
|
}
|
|
}
|
|
@@ -514,7 +523,7 @@ begin:
|
|
!atomic_inc_not_zero(&ct->ct_general.use)))
|
|
!atomic_inc_not_zero(&ct->ct_general.use)))
|
|
h = NULL;
|
|
h = NULL;
|
|
else {
|
|
else {
|
|
- if (unlikely(!nf_ct_key_equal(h, tuple, zone))) {
|
|
|
|
|
|
+ if (unlikely(!nf_ct_key_equal(h, tuple, zone, net))) {
|
|
nf_ct_put(ct);
|
|
nf_ct_put(ct);
|
|
goto begin;
|
|
goto begin;
|
|
}
|
|
}
|
|
@@ -530,7 +539,7 @@ nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
|
|
const struct nf_conntrack_tuple *tuple)
|
|
const struct nf_conntrack_tuple *tuple)
|
|
{
|
|
{
|
|
return __nf_conntrack_find_get(net, zone, tuple,
|
|
return __nf_conntrack_find_get(net, zone, tuple,
|
|
- hash_conntrack_raw(tuple));
|
|
|
|
|
|
+ hash_conntrack_raw(tuple, net));
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
|
|
EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
|
|
|
|
|
|
@@ -538,12 +547,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
|
|
unsigned int hash,
|
|
unsigned int hash,
|
|
unsigned int reply_hash)
|
|
unsigned int reply_hash)
|
|
{
|
|
{
|
|
- struct net *net = nf_ct_net(ct);
|
|
|
|
-
|
|
|
|
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
|
|
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
|
|
- &net->ct.hash[hash]);
|
|
|
|
|
|
+ &nf_conntrack_hash[hash]);
|
|
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
|
|
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
|
|
- &net->ct.hash[reply_hash]);
|
|
|
|
|
|
+ &nf_conntrack_hash[reply_hash]);
|
|
}
|
|
}
|
|
|
|
|
|
int
|
|
int
|
|
@@ -560,7 +567,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
|
|
|
|
|
|
local_bh_disable();
|
|
local_bh_disable();
|
|
do {
|
|
do {
|
|
- sequence = read_seqcount_begin(&net->ct.generation);
|
|
|
|
|
|
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
|
|
hash = hash_conntrack(net,
|
|
hash = hash_conntrack(net,
|
|
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
|
|
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
|
|
reply_hash = hash_conntrack(net,
|
|
reply_hash = hash_conntrack(net,
|
|
@@ -568,17 +575,14 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
|
|
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
|
|
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
|
|
|
|
|
|
/* See if there's one in the list already, including reverse */
|
|
/* See if there's one in the list already, including reverse */
|
|
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
|
|
|
|
- if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
|
|
|
|
- &h->tuple) &&
|
|
|
|
- nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
|
|
|
|
- NF_CT_DIRECTION(h)))
|
|
|
|
|
|
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
|
|
|
|
+ if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
|
|
|
|
+ zone, net))
|
|
goto out;
|
|
goto out;
|
|
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
|
|
|
|
- if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
|
|
|
|
- &h->tuple) &&
|
|
|
|
- nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
|
|
|
|
- NF_CT_DIRECTION(h)))
|
|
|
|
|
|
+
|
|
|
|
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
|
|
|
|
+ if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
|
|
|
|
+ zone, net))
|
|
goto out;
|
|
goto out;
|
|
|
|
|
|
add_timer(&ct->timeout);
|
|
add_timer(&ct->timeout);
|
|
@@ -599,6 +603,63 @@ out:
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
|
|
EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
|
|
|
|
|
|
|
|
+static inline void nf_ct_acct_update(struct nf_conn *ct,
|
|
|
|
+ enum ip_conntrack_info ctinfo,
|
|
|
|
+ unsigned int len)
|
|
|
|
+{
|
|
|
|
+ struct nf_conn_acct *acct;
|
|
|
|
+
|
|
|
|
+ acct = nf_conn_acct_find(ct);
|
|
|
|
+ if (acct) {
|
|
|
|
+ struct nf_conn_counter *counter = acct->counter;
|
|
|
|
+
|
|
|
|
+ atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
|
|
|
|
+ atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
|
|
|
|
+ const struct nf_conn *loser_ct)
|
|
|
|
+{
|
|
|
|
+ struct nf_conn_acct *acct;
|
|
|
|
+
|
|
|
|
+ acct = nf_conn_acct_find(loser_ct);
|
|
|
|
+ if (acct) {
|
|
|
|
+ struct nf_conn_counter *counter = acct->counter;
|
|
|
|
+ enum ip_conntrack_info ctinfo;
|
|
|
|
+ unsigned int bytes;
|
|
|
|
+
|
|
|
|
+ /* u32 should be fine since we must have seen one packet. */
|
|
|
|
+ bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
|
|
|
|
+ nf_ct_acct_update(ct, ctinfo, bytes);
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* Resolve race on insertion if this protocol allows this. */
|
|
|
|
+static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
|
|
|
|
+ enum ip_conntrack_info ctinfo,
|
|
|
|
+ struct nf_conntrack_tuple_hash *h)
|
|
|
|
+{
|
|
|
|
+ /* This is the conntrack entry already in hashes that won race. */
|
|
|
|
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
|
|
|
|
+ struct nf_conntrack_l4proto *l4proto;
|
|
|
|
+
|
|
|
|
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
|
|
|
|
+ if (l4proto->allow_clash &&
|
|
|
|
+ !nf_ct_is_dying(ct) &&
|
|
|
|
+ atomic_inc_not_zero(&ct->ct_general.use)) {
|
|
|
|
+ nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct);
|
|
|
|
+ nf_conntrack_put(skb->nfct);
|
|
|
|
+ /* Assign conntrack already in hashes to this skbuff. Don't
|
|
|
|
+ * modify skb->nfctinfo to ensure consistent stateful filtering.
|
|
|
|
+ */
|
|
|
|
+ skb->nfct = &ct->ct_general;
|
|
|
|
+ return NF_ACCEPT;
|
|
|
|
+ }
|
|
|
|
+ NF_CT_STAT_INC(net, drop);
|
|
|
|
+ return NF_DROP;
|
|
|
|
+}
|
|
|
|
+
|
|
/* Confirm a connection given skb; places it in hash table */
|
|
/* Confirm a connection given skb; places it in hash table */
|
|
int
|
|
int
|
|
__nf_conntrack_confirm(struct sk_buff *skb)
|
|
__nf_conntrack_confirm(struct sk_buff *skb)
|
|
@@ -613,6 +674,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
|
|
enum ip_conntrack_info ctinfo;
|
|
enum ip_conntrack_info ctinfo;
|
|
struct net *net;
|
|
struct net *net;
|
|
unsigned int sequence;
|
|
unsigned int sequence;
|
|
|
|
+ int ret = NF_DROP;
|
|
|
|
|
|
ct = nf_ct_get(skb, &ctinfo);
|
|
ct = nf_ct_get(skb, &ctinfo);
|
|
net = nf_ct_net(ct);
|
|
net = nf_ct_net(ct);
|
|
@@ -628,10 +690,10 @@ __nf_conntrack_confirm(struct sk_buff *skb)
|
|
local_bh_disable();
|
|
local_bh_disable();
|
|
|
|
|
|
do {
|
|
do {
|
|
- sequence = read_seqcount_begin(&net->ct.generation);
|
|
|
|
|
|
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
|
|
/* reuse the hash saved before */
|
|
/* reuse the hash saved before */
|
|
hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
|
|
hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
|
|
- hash = hash_bucket(hash, net);
|
|
|
|
|
|
+ hash = scale_hash(hash);
|
|
reply_hash = hash_conntrack(net,
|
|
reply_hash = hash_conntrack(net,
|
|
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
|
|
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
|
|
|
|
|
|
@@ -655,23 +717,22 @@ __nf_conntrack_confirm(struct sk_buff *skb)
|
|
*/
|
|
*/
|
|
nf_ct_del_from_dying_or_unconfirmed_list(ct);
|
|
nf_ct_del_from_dying_or_unconfirmed_list(ct);
|
|
|
|
|
|
- if (unlikely(nf_ct_is_dying(ct)))
|
|
|
|
- goto out;
|
|
|
|
|
|
+ if (unlikely(nf_ct_is_dying(ct))) {
|
|
|
|
+ nf_ct_add_to_dying_list(ct);
|
|
|
|
+ goto dying;
|
|
|
|
+ }
|
|
|
|
|
|
/* See if there's one in the list already, including reverse:
|
|
/* See if there's one in the list already, including reverse:
|
|
NAT could have grabbed it without realizing, since we're
|
|
NAT could have grabbed it without realizing, since we're
|
|
not in the hash. If there is, we lost race. */
|
|
not in the hash. If there is, we lost race. */
|
|
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
|
|
|
|
- if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
|
|
|
|
- &h->tuple) &&
|
|
|
|
- nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
|
|
|
|
- NF_CT_DIRECTION(h)))
|
|
|
|
|
|
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
|
|
|
|
+ if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
|
|
|
|
+ zone, net))
|
|
goto out;
|
|
goto out;
|
|
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
|
|
|
|
- if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
|
|
|
|
- &h->tuple) &&
|
|
|
|
- nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
|
|
|
|
- NF_CT_DIRECTION(h)))
|
|
|
|
|
|
+
|
|
|
|
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
|
|
|
|
+ if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
|
|
|
|
+ zone, net))
|
|
goto out;
|
|
goto out;
|
|
|
|
|
|
/* Timer relative to confirmation time, not original
|
|
/* Timer relative to confirmation time, not original
|
|
@@ -710,10 +771,12 @@ __nf_conntrack_confirm(struct sk_buff *skb)
|
|
|
|
|
|
out:
|
|
out:
|
|
nf_ct_add_to_dying_list(ct);
|
|
nf_ct_add_to_dying_list(ct);
|
|
|
|
+ ret = nf_ct_resolve_clash(net, skb, ctinfo, h);
|
|
|
|
+dying:
|
|
nf_conntrack_double_unlock(hash, reply_hash);
|
|
nf_conntrack_double_unlock(hash, reply_hash);
|
|
NF_CT_STAT_INC(net, insert_failed);
|
|
NF_CT_STAT_INC(net, insert_failed);
|
|
local_bh_enable();
|
|
local_bh_enable();
|
|
- return NF_DROP;
|
|
|
|
|
|
+ return ret;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
|
|
EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
|
|
|
|
|
|
@@ -726,29 +789,31 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
|
|
struct net *net = nf_ct_net(ignored_conntrack);
|
|
struct net *net = nf_ct_net(ignored_conntrack);
|
|
const struct nf_conntrack_zone *zone;
|
|
const struct nf_conntrack_zone *zone;
|
|
struct nf_conntrack_tuple_hash *h;
|
|
struct nf_conntrack_tuple_hash *h;
|
|
|
|
+ struct hlist_nulls_head *ct_hash;
|
|
|
|
+ unsigned int hash, sequence;
|
|
struct hlist_nulls_node *n;
|
|
struct hlist_nulls_node *n;
|
|
struct nf_conn *ct;
|
|
struct nf_conn *ct;
|
|
- unsigned int hash;
|
|
|
|
|
|
|
|
zone = nf_ct_zone(ignored_conntrack);
|
|
zone = nf_ct_zone(ignored_conntrack);
|
|
- hash = hash_conntrack(net, tuple);
|
|
|
|
|
|
|
|
- /* Disable BHs the entire time since we need to disable them at
|
|
|
|
- * least once for the stats anyway.
|
|
|
|
- */
|
|
|
|
- rcu_read_lock_bh();
|
|
|
|
- hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
|
|
|
|
|
|
+ rcu_read_lock();
|
|
|
|
+ do {
|
|
|
|
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
|
|
|
|
+ hash = hash_conntrack(net, tuple);
|
|
|
|
+ ct_hash = nf_conntrack_hash;
|
|
|
|
+ } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
|
|
|
|
+
|
|
|
|
+ hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
|
|
ct = nf_ct_tuplehash_to_ctrack(h);
|
|
ct = nf_ct_tuplehash_to_ctrack(h);
|
|
if (ct != ignored_conntrack &&
|
|
if (ct != ignored_conntrack &&
|
|
- nf_ct_tuple_equal(tuple, &h->tuple) &&
|
|
|
|
- nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h))) {
|
|
|
|
- NF_CT_STAT_INC(net, found);
|
|
|
|
- rcu_read_unlock_bh();
|
|
|
|
|
|
+ nf_ct_key_equal(h, tuple, zone, net)) {
|
|
|
|
+ NF_CT_STAT_INC_ATOMIC(net, found);
|
|
|
|
+ rcu_read_unlock();
|
|
return 1;
|
|
return 1;
|
|
}
|
|
}
|
|
- NF_CT_STAT_INC(net, searched);
|
|
|
|
|
|
+ NF_CT_STAT_INC_ATOMIC(net, searched);
|
|
}
|
|
}
|
|
- rcu_read_unlock_bh();
|
|
|
|
|
|
+ rcu_read_unlock();
|
|
|
|
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
@@ -762,71 +827,63 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
|
|
{
|
|
{
|
|
/* Use oldest entry, which is roughly LRU */
|
|
/* Use oldest entry, which is roughly LRU */
|
|
struct nf_conntrack_tuple_hash *h;
|
|
struct nf_conntrack_tuple_hash *h;
|
|
- struct nf_conn *ct = NULL, *tmp;
|
|
|
|
|
|
+ struct nf_conn *tmp;
|
|
struct hlist_nulls_node *n;
|
|
struct hlist_nulls_node *n;
|
|
- unsigned int i = 0, cnt = 0;
|
|
|
|
- int dropped = 0;
|
|
|
|
- unsigned int hash, sequence;
|
|
|
|
|
|
+ unsigned int i, hash, sequence;
|
|
|
|
+ struct nf_conn *ct = NULL;
|
|
spinlock_t *lockp;
|
|
spinlock_t *lockp;
|
|
|
|
+ bool ret = false;
|
|
|
|
+
|
|
|
|
+ i = 0;
|
|
|
|
|
|
local_bh_disable();
|
|
local_bh_disable();
|
|
restart:
|
|
restart:
|
|
- sequence = read_seqcount_begin(&net->ct.generation);
|
|
|
|
- hash = hash_bucket(_hash, net);
|
|
|
|
- for (; i < net->ct.htable_size; i++) {
|
|
|
|
|
|
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
|
|
|
|
+ for (; i < NF_CT_EVICTION_RANGE; i++) {
|
|
|
|
+ hash = scale_hash(_hash++);
|
|
lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
|
|
lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
|
|
nf_conntrack_lock(lockp);
|
|
nf_conntrack_lock(lockp);
|
|
- if (read_seqcount_retry(&net->ct.generation, sequence)) {
|
|
|
|
|
|
+ if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
|
|
spin_unlock(lockp);
|
|
spin_unlock(lockp);
|
|
goto restart;
|
|
goto restart;
|
|
}
|
|
}
|
|
- hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
|
|
|
|
- hnnode) {
|
|
|
|
|
|
+ hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
|
|
|
|
+ hnnode) {
|
|
tmp = nf_ct_tuplehash_to_ctrack(h);
|
|
tmp = nf_ct_tuplehash_to_ctrack(h);
|
|
- if (!test_bit(IPS_ASSURED_BIT, &tmp->status) &&
|
|
|
|
- !nf_ct_is_dying(tmp) &&
|
|
|
|
- atomic_inc_not_zero(&tmp->ct_general.use)) {
|
|
|
|
|
|
+
|
|
|
|
+ if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
|
|
|
|
+ !net_eq(nf_ct_net(tmp), net) ||
|
|
|
|
+ nf_ct_is_dying(tmp))
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ if (atomic_inc_not_zero(&tmp->ct_general.use)) {
|
|
ct = tmp;
|
|
ct = tmp;
|
|
break;
|
|
break;
|
|
}
|
|
}
|
|
- cnt++;
|
|
|
|
}
|
|
}
|
|
|
|
|
|
- hash = (hash + 1) % net->ct.htable_size;
|
|
|
|
spin_unlock(lockp);
|
|
spin_unlock(lockp);
|
|
-
|
|
|
|
- if (ct || cnt >= NF_CT_EVICTION_RANGE)
|
|
|
|
|
|
+ if (ct)
|
|
break;
|
|
break;
|
|
-
|
|
|
|
}
|
|
}
|
|
|
|
+
|
|
local_bh_enable();
|
|
local_bh_enable();
|
|
|
|
|
|
if (!ct)
|
|
if (!ct)
|
|
- return dropped;
|
|
|
|
|
|
+ return false;
|
|
|
|
|
|
- if (del_timer(&ct->timeout)) {
|
|
|
|
|
|
+ /* kill only if in same netns -- might have moved due to
|
|
|
|
+ * SLAB_DESTROY_BY_RCU rules
|
|
|
|
+ */
|
|
|
|
+ if (net_eq(nf_ct_net(ct), net) && del_timer(&ct->timeout)) {
|
|
if (nf_ct_delete(ct, 0, 0)) {
|
|
if (nf_ct_delete(ct, 0, 0)) {
|
|
- dropped = 1;
|
|
|
|
NF_CT_STAT_INC_ATOMIC(net, early_drop);
|
|
NF_CT_STAT_INC_ATOMIC(net, early_drop);
|
|
|
|
+ ret = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- nf_ct_put(ct);
|
|
|
|
- return dropped;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-void init_nf_conntrack_hash_rnd(void)
|
|
|
|
-{
|
|
|
|
- unsigned int rand;
|
|
|
|
|
|
|
|
- /*
|
|
|
|
- * Why not initialize nf_conntrack_rnd in a "init()" function ?
|
|
|
|
- * Because there isn't enough entropy when system initializing,
|
|
|
|
- * and we initialize it as late as possible.
|
|
|
|
- */
|
|
|
|
- do {
|
|
|
|
- get_random_bytes(&rand, sizeof(rand));
|
|
|
|
- } while (!rand);
|
|
|
|
- cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
|
|
|
|
|
|
+ nf_ct_put(ct);
|
|
|
|
+ return ret;
|
|
}
|
|
}
|
|
|
|
|
|
static struct nf_conn *
|
|
static struct nf_conn *
|
|
@@ -838,12 +895,6 @@ __nf_conntrack_alloc(struct net *net,
|
|
{
|
|
{
|
|
struct nf_conn *ct;
|
|
struct nf_conn *ct;
|
|
|
|
|
|
- if (unlikely(!nf_conntrack_hash_rnd)) {
|
|
|
|
- init_nf_conntrack_hash_rnd();
|
|
|
|
- /* recompute the hash as nf_conntrack_hash_rnd is initialized */
|
|
|
|
- hash = hash_conntrack_raw(orig);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
/* We don't want any race condition at early drop stage */
|
|
/* We don't want any race condition at early drop stage */
|
|
atomic_inc(&net->ct.count);
|
|
atomic_inc(&net->ct.count);
|
|
|
|
|
|
@@ -860,7 +911,7 @@ __nf_conntrack_alloc(struct net *net,
|
|
* Do not use kmem_cache_zalloc(), as this cache uses
|
|
* Do not use kmem_cache_zalloc(), as this cache uses
|
|
* SLAB_DESTROY_BY_RCU.
|
|
* SLAB_DESTROY_BY_RCU.
|
|
*/
|
|
*/
|
|
- ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp);
|
|
|
|
|
|
+ ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
|
|
if (ct == NULL)
|
|
if (ct == NULL)
|
|
goto out;
|
|
goto out;
|
|
|
|
|
|
@@ -887,7 +938,7 @@ __nf_conntrack_alloc(struct net *net,
|
|
atomic_set(&ct->ct_general.use, 0);
|
|
atomic_set(&ct->ct_general.use, 0);
|
|
return ct;
|
|
return ct;
|
|
out_free:
|
|
out_free:
|
|
- kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
|
|
|
|
|
|
+ kmem_cache_free(nf_conntrack_cachep, ct);
|
|
out:
|
|
out:
|
|
atomic_dec(&net->ct.count);
|
|
atomic_dec(&net->ct.count);
|
|
return ERR_PTR(-ENOMEM);
|
|
return ERR_PTR(-ENOMEM);
|
|
@@ -914,7 +965,7 @@ void nf_conntrack_free(struct nf_conn *ct)
|
|
|
|
|
|
nf_ct_ext_destroy(ct);
|
|
nf_ct_ext_destroy(ct);
|
|
nf_ct_ext_free(ct);
|
|
nf_ct_ext_free(ct);
|
|
- kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
|
|
|
|
|
|
+ kmem_cache_free(nf_conntrack_cachep, ct);
|
|
smp_mb__before_atomic();
|
|
smp_mb__before_atomic();
|
|
atomic_dec(&net->ct.count);
|
|
atomic_dec(&net->ct.count);
|
|
}
|
|
}
|
|
@@ -1061,7 +1112,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
|
|
|
|
|
|
/* look for tuple match */
|
|
/* look for tuple match */
|
|
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
|
|
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
|
|
- hash = hash_conntrack_raw(&tuple);
|
|
|
|
|
|
+ hash = hash_conntrack_raw(&tuple, net);
|
|
h = __nf_conntrack_find_get(net, zone, &tuple, hash);
|
|
h = __nf_conntrack_find_get(net, zone, &tuple, hash);
|
|
if (!h) {
|
|
if (!h) {
|
|
h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
|
|
h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
|
|
@@ -1270,17 +1321,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
|
|
}
|
|
}
|
|
|
|
|
|
acct:
|
|
acct:
|
|
- if (do_acct) {
|
|
|
|
- struct nf_conn_acct *acct;
|
|
|
|
-
|
|
|
|
- acct = nf_conn_acct_find(ct);
|
|
|
|
- if (acct) {
|
|
|
|
- struct nf_conn_counter *counter = acct->counter;
|
|
|
|
-
|
|
|
|
- atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
|
|
|
|
- atomic64_add(skb->len, &counter[CTINFO2DIR(ctinfo)].bytes);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+ if (do_acct)
|
|
|
|
+ nf_ct_acct_update(ct, ctinfo, skb->len);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
|
|
EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
|
|
|
|
|
|
@@ -1289,18 +1331,8 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
|
|
const struct sk_buff *skb,
|
|
const struct sk_buff *skb,
|
|
int do_acct)
|
|
int do_acct)
|
|
{
|
|
{
|
|
- if (do_acct) {
|
|
|
|
- struct nf_conn_acct *acct;
|
|
|
|
-
|
|
|
|
- acct = nf_conn_acct_find(ct);
|
|
|
|
- if (acct) {
|
|
|
|
- struct nf_conn_counter *counter = acct->counter;
|
|
|
|
-
|
|
|
|
- atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
|
|
|
|
- atomic64_add(skb->len - skb_network_offset(skb),
|
|
|
|
- &counter[CTINFO2DIR(ctinfo)].bytes);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+ if (do_acct)
|
|
|
|
+ nf_ct_acct_update(ct, ctinfo, skb->len);
|
|
|
|
|
|
if (del_timer(&ct->timeout)) {
|
|
if (del_timer(&ct->timeout)) {
|
|
ct->timeout.function((unsigned long)ct);
|
|
ct->timeout.function((unsigned long)ct);
|
|
@@ -1396,16 +1428,17 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
|
|
int cpu;
|
|
int cpu;
|
|
spinlock_t *lockp;
|
|
spinlock_t *lockp;
|
|
|
|
|
|
- for (; *bucket < net->ct.htable_size; (*bucket)++) {
|
|
|
|
|
|
+ for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
|
|
lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
|
|
lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
|
|
local_bh_disable();
|
|
local_bh_disable();
|
|
nf_conntrack_lock(lockp);
|
|
nf_conntrack_lock(lockp);
|
|
- if (*bucket < net->ct.htable_size) {
|
|
|
|
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
|
|
|
|
|
|
+ if (*bucket < nf_conntrack_htable_size) {
|
|
|
|
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
|
|
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
|
|
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
|
|
continue;
|
|
continue;
|
|
ct = nf_ct_tuplehash_to_ctrack(h);
|
|
ct = nf_ct_tuplehash_to_ctrack(h);
|
|
- if (iter(ct, data))
|
|
|
|
|
|
+ if (net_eq(nf_ct_net(ct), net) &&
|
|
|
|
+ iter(ct, data))
|
|
goto found;
|
|
goto found;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -1443,6 +1476,9 @@ void nf_ct_iterate_cleanup(struct net *net,
|
|
|
|
|
|
might_sleep();
|
|
might_sleep();
|
|
|
|
|
|
|
|
+ if (atomic_read(&net->ct.count) == 0)
|
|
|
|
+ return;
|
|
|
|
+
|
|
while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
|
|
while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
|
|
/* Time to push up daises... */
|
|
/* Time to push up daises... */
|
|
if (del_timer(&ct->timeout))
|
|
if (del_timer(&ct->timeout))
|
|
@@ -1494,6 +1530,8 @@ void nf_conntrack_cleanup_end(void)
|
|
while (untrack_refs() > 0)
|
|
while (untrack_refs() > 0)
|
|
schedule();
|
|
schedule();
|
|
|
|
|
|
|
|
+ nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
|
|
|
|
+
|
|
#ifdef CONFIG_NF_CONNTRACK_ZONES
|
|
#ifdef CONFIG_NF_CONNTRACK_ZONES
|
|
nf_ct_extend_unregister(&nf_ct_zone_extend);
|
|
nf_ct_extend_unregister(&nf_ct_zone_extend);
|
|
#endif
|
|
#endif
|
|
@@ -1544,15 +1582,12 @@ i_see_dead_people:
|
|
}
|
|
}
|
|
|
|
|
|
list_for_each_entry(net, net_exit_list, exit_list) {
|
|
list_for_each_entry(net, net_exit_list, exit_list) {
|
|
- nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
|
|
|
|
nf_conntrack_proto_pernet_fini(net);
|
|
nf_conntrack_proto_pernet_fini(net);
|
|
nf_conntrack_helper_pernet_fini(net);
|
|
nf_conntrack_helper_pernet_fini(net);
|
|
nf_conntrack_ecache_pernet_fini(net);
|
|
nf_conntrack_ecache_pernet_fini(net);
|
|
nf_conntrack_tstamp_pernet_fini(net);
|
|
nf_conntrack_tstamp_pernet_fini(net);
|
|
nf_conntrack_acct_pernet_fini(net);
|
|
nf_conntrack_acct_pernet_fini(net);
|
|
nf_conntrack_expect_pernet_fini(net);
|
|
nf_conntrack_expect_pernet_fini(net);
|
|
- kmem_cache_destroy(net->ct.nf_conntrack_cachep);
|
|
|
|
- kfree(net->ct.slabname);
|
|
|
|
free_percpu(net->ct.stat);
|
|
free_percpu(net->ct.stat);
|
|
free_percpu(net->ct.pcpu_lists);
|
|
free_percpu(net->ct.pcpu_lists);
|
|
}
|
|
}
|
|
@@ -1607,7 +1642,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
|
|
|
|
|
|
local_bh_disable();
|
|
local_bh_disable();
|
|
nf_conntrack_all_lock();
|
|
nf_conntrack_all_lock();
|
|
- write_seqcount_begin(&init_net.ct.generation);
|
|
|
|
|
|
+ write_seqcount_begin(&nf_conntrack_generation);
|
|
|
|
|
|
/* Lookups in the old hash might happen in parallel, which means we
|
|
/* Lookups in the old hash might happen in parallel, which means we
|
|
* might get false negatives during connection lookup. New connections
|
|
* might get false negatives during connection lookup. New connections
|
|
@@ -1615,26 +1650,28 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
|
|
* though since that required taking the locks.
|
|
* though since that required taking the locks.
|
|
*/
|
|
*/
|
|
|
|
|
|
- for (i = 0; i < init_net.ct.htable_size; i++) {
|
|
|
|
- while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
|
|
|
|
- h = hlist_nulls_entry(init_net.ct.hash[i].first,
|
|
|
|
- struct nf_conntrack_tuple_hash, hnnode);
|
|
|
|
|
|
+ for (i = 0; i < nf_conntrack_htable_size; i++) {
|
|
|
|
+ while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
|
|
|
|
+ h = hlist_nulls_entry(nf_conntrack_hash[i].first,
|
|
|
|
+ struct nf_conntrack_tuple_hash, hnnode);
|
|
ct = nf_ct_tuplehash_to_ctrack(h);
|
|
ct = nf_ct_tuplehash_to_ctrack(h);
|
|
hlist_nulls_del_rcu(&h->hnnode);
|
|
hlist_nulls_del_rcu(&h->hnnode);
|
|
- bucket = __hash_conntrack(&h->tuple, hashsize);
|
|
|
|
|
|
+ bucket = __hash_conntrack(nf_ct_net(ct),
|
|
|
|
+ &h->tuple, hashsize);
|
|
hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
|
|
hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- old_size = init_net.ct.htable_size;
|
|
|
|
- old_hash = init_net.ct.hash;
|
|
|
|
|
|
+ old_size = nf_conntrack_htable_size;
|
|
|
|
+ old_hash = nf_conntrack_hash;
|
|
|
|
|
|
- init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
|
|
|
|
- init_net.ct.hash = hash;
|
|
|
|
|
|
+ nf_conntrack_hash = hash;
|
|
|
|
+ nf_conntrack_htable_size = hashsize;
|
|
|
|
|
|
- write_seqcount_end(&init_net.ct.generation);
|
|
|
|
|
|
+ write_seqcount_end(&nf_conntrack_generation);
|
|
nf_conntrack_all_unlock();
|
|
nf_conntrack_all_unlock();
|
|
local_bh_enable();
|
|
local_bh_enable();
|
|
|
|
|
|
|
|
+ synchronize_net();
|
|
nf_ct_free_hashtable(old_hash, old_size);
|
|
nf_ct_free_hashtable(old_hash, old_size);
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
@@ -1655,7 +1692,10 @@ EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
|
|
int nf_conntrack_init_start(void)
|
|
int nf_conntrack_init_start(void)
|
|
{
|
|
{
|
|
int max_factor = 8;
|
|
int max_factor = 8;
|
|
- int i, ret, cpu;
|
|
|
|
|
|
+ int ret = -ENOMEM;
|
|
|
|
+ int i, cpu;
|
|
|
|
+
|
|
|
|
+ seqcount_init(&nf_conntrack_generation);
|
|
|
|
|
|
for (i = 0; i < CONNTRACK_LOCKS; i++)
|
|
for (i = 0; i < CONNTRACK_LOCKS; i++)
|
|
spin_lock_init(&nf_conntrack_locks[i]);
|
|
spin_lock_init(&nf_conntrack_locks[i]);
|
|
@@ -1682,8 +1722,19 @@ int nf_conntrack_init_start(void)
|
|
* entries. */
|
|
* entries. */
|
|
max_factor = 4;
|
|
max_factor = 4;
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1);
|
|
|
|
+ if (!nf_conntrack_hash)
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+
|
|
nf_conntrack_max = max_factor * nf_conntrack_htable_size;
|
|
nf_conntrack_max = max_factor * nf_conntrack_htable_size;
|
|
|
|
|
|
|
|
+ nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
|
|
|
|
+ sizeof(struct nf_conn), 0,
|
|
|
|
+ SLAB_DESTROY_BY_RCU, NULL);
|
|
|
|
+ if (!nf_conntrack_cachep)
|
|
|
|
+ goto err_cachep;
|
|
|
|
+
|
|
printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
|
|
printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
|
|
NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
|
|
NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
|
|
nf_conntrack_max);
|
|
nf_conntrack_max);
|
|
@@ -1760,6 +1811,9 @@ err_tstamp:
|
|
err_acct:
|
|
err_acct:
|
|
nf_conntrack_expect_fini();
|
|
nf_conntrack_expect_fini();
|
|
err_expect:
|
|
err_expect:
|
|
|
|
+ kmem_cache_destroy(nf_conntrack_cachep);
|
|
|
|
+err_cachep:
|
|
|
|
+ nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1783,7 +1837,6 @@ int nf_conntrack_init_net(struct net *net)
|
|
int cpu;
|
|
int cpu;
|
|
|
|
|
|
atomic_set(&net->ct.count, 0);
|
|
atomic_set(&net->ct.count, 0);
|
|
- seqcount_init(&net->ct.generation);
|
|
|
|
|
|
|
|
net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
|
|
net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
|
|
if (!net->ct.pcpu_lists)
|
|
if (!net->ct.pcpu_lists)
|
|
@@ -1801,24 +1854,6 @@ int nf_conntrack_init_net(struct net *net)
|
|
if (!net->ct.stat)
|
|
if (!net->ct.stat)
|
|
goto err_pcpu_lists;
|
|
goto err_pcpu_lists;
|
|
|
|
|
|
- net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
|
|
|
|
- if (!net->ct.slabname)
|
|
|
|
- goto err_slabname;
|
|
|
|
-
|
|
|
|
- net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
|
|
|
|
- sizeof(struct nf_conn), 0,
|
|
|
|
- SLAB_DESTROY_BY_RCU, NULL);
|
|
|
|
- if (!net->ct.nf_conntrack_cachep) {
|
|
|
|
- printk(KERN_ERR "Unable to create nf_conn slab cache\n");
|
|
|
|
- goto err_cache;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- net->ct.htable_size = nf_conntrack_htable_size;
|
|
|
|
- net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
|
|
|
|
- if (!net->ct.hash) {
|
|
|
|
- printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
|
|
|
|
- goto err_hash;
|
|
|
|
- }
|
|
|
|
ret = nf_conntrack_expect_pernet_init(net);
|
|
ret = nf_conntrack_expect_pernet_init(net);
|
|
if (ret < 0)
|
|
if (ret < 0)
|
|
goto err_expect;
|
|
goto err_expect;
|
|
@@ -1850,12 +1885,6 @@ err_tstamp:
|
|
err_acct:
|
|
err_acct:
|
|
nf_conntrack_expect_pernet_fini(net);
|
|
nf_conntrack_expect_pernet_fini(net);
|
|
err_expect:
|
|
err_expect:
|
|
- nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
|
|
|
|
-err_hash:
|
|
|
|
- kmem_cache_destroy(net->ct.nf_conntrack_cachep);
|
|
|
|
-err_cache:
|
|
|
|
- kfree(net->ct.slabname);
|
|
|
|
-err_slabname:
|
|
|
|
free_percpu(net->ct.stat);
|
|
free_percpu(net->ct.stat);
|
|
err_pcpu_lists:
|
|
err_pcpu_lists:
|
|
free_percpu(net->ct.pcpu_lists);
|
|
free_percpu(net->ct.pcpu_lists);
|