Explorar o código

Merge branch 'rhashtable-next'

Herbert Xu says:

====================
rhashtable: Fixes + cleanups + preparation for multiple rehash

Patch 1 fixes the walker so that it behaves properly even during
a resize.

Patch 2-3 are cleanups.

Patch 4-6 lays some ground work for the upcoming multiple rehashing.

This revision fixes the warning coming from the bucket_table->size
downsize and improves its changelog.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller %!s(int64=10) %!d(string=hai) anos
pai
achega
5a2f78dd51
Modificáronse 3 ficheiros con 80 adicións e 62 borrados
  1. 12 7
      include/linux/rhashtable.h
  2. 67 54
      lib/rhashtable.c
  3. 1 1
      lib/test_rhashtable.c

+ 12 - 7
include/linux/rhashtable.h

@@ -49,18 +49,27 @@ struct rhash_head {
 /**
  * struct bucket_table - Table of hash buckets
  * @size: Number of hash buckets
+ * @rehash: Current bucket being rehashed
  * @hash_rnd: Random seed to fold into hash
  * @shift: Current size (1 << shift)
  * @locks_mask: Mask to apply before accessing locks[]
  * @locks: Array of spinlocks protecting individual buckets
+ * @walkers: List of active walkers
+ * @rcu: RCU structure for freeing the table
+ * @future_tbl: Table under construction during rehashing
  * @buckets: size * hash buckets
  */
 struct bucket_table {
-	size_t			size;
+	unsigned int		size;
+	unsigned int		rehash;
 	u32			hash_rnd;
 	u32			shift;
 	unsigned int		locks_mask;
 	spinlock_t		*locks;
+	struct list_head	walkers;
+	struct rcu_head		rcu;
+
+	struct bucket_table __rcu *future_tbl;
 
 	struct rhash_head __rcu	*buckets[] ____cacheline_aligned_in_smp;
 };
@@ -99,33 +108,29 @@ struct rhashtable_params {
 /**
  * struct rhashtable - Hash table handle
  * @tbl: Bucket table
- * @future_tbl: Table under construction during expansion/shrinking
  * @nelems: Number of elements in table
  * @p: Configuration parameters
  * @run_work: Deferred worker to expand/shrink asynchronously
  * @mutex: Mutex to protect current/future table swapping
- * @walkers: List of active walkers
  * @being_destroyed: True if table is set up for destruction
  */
 struct rhashtable {
 	struct bucket_table __rcu	*tbl;
-	struct bucket_table __rcu       *future_tbl;
 	atomic_t			nelems;
 	bool                            being_destroyed;
 	struct rhashtable_params	p;
 	struct work_struct		run_work;
 	struct mutex                    mutex;
-	struct list_head		walkers;
 };
 
 /**
  * struct rhashtable_walker - Hash table walker
  * @list: List entry on list of walkers
- * @resize: Resize event occured
+ * @tbl: The table that we were walking over
  */
 struct rhashtable_walker {
 	struct list_head list;
-	bool resize;
+	struct bucket_table *tbl;
 };
 
 /**

+ 67 - 54
lib/rhashtable.c

@@ -33,11 +33,6 @@
 /* Base bits plus 1 bit for nulls marker */
 #define HASH_RESERVED_SPACE	(RHT_BASE_BITS + 1)
 
-enum {
-	RHT_LOCK_NORMAL,
-	RHT_LOCK_NESTED,
-};
-
 /* The bucket lock is selected based on the hash and protects mutations
  * on a group of hash buckets.
  *
@@ -146,8 +141,13 @@ static void bucket_table_free(const struct bucket_table *tbl)
 	kvfree(tbl);
 }
 
+static void bucket_table_free_rcu(struct rcu_head *head)
+{
+	bucket_table_free(container_of(head, struct bucket_table, rcu));
+}
+
 static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
-					       size_t nbuckets, u32 hash_rnd)
+					       size_t nbuckets)
 {
 	struct bucket_table *tbl = NULL;
 	size_t size;
@@ -163,13 +163,16 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 
 	tbl->size = nbuckets;
 	tbl->shift = ilog2(nbuckets);
-	tbl->hash_rnd = hash_rnd;
 
 	if (alloc_bucket_locks(ht, tbl) < 0) {
 		bucket_table_free(tbl);
 		return NULL;
 	}
 
+	INIT_LIST_HEAD(&tbl->walkers);
+
+	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
+
 	for (i = 0; i < nbuckets; i++)
 		INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
 
@@ -204,8 +207,9 @@ static bool rht_shrink_below_30(const struct rhashtable *ht,
 
 static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash)
 {
-	struct bucket_table *new_tbl = rht_dereference(ht->future_tbl, ht);
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+	struct bucket_table *new_tbl =
+		rht_dereference(old_tbl->future_tbl, ht) ?: old_tbl;
 	struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash];
 	int err = -ENOENT;
 	struct rhash_head *head, *next, *entry;
@@ -229,7 +233,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash)
 
 	new_bucket_lock = bucket_lock(new_tbl, new_hash);
 
-	spin_lock_nested(new_bucket_lock, RHT_LOCK_NESTED);
+	spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING);
 	head = rht_dereference_bucket(new_tbl->buckets[new_hash],
 				      new_tbl, new_hash);
 
@@ -257,6 +261,7 @@ static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned old_hash)
 	spin_lock_bh(old_bucket_lock);
 	while (!rhashtable_rehash_one(ht, old_hash))
 		;
+	old_tbl->rehash++;
 	spin_unlock_bh(old_bucket_lock);
 }
 
@@ -264,16 +269,13 @@ static void rhashtable_rehash(struct rhashtable *ht,
 			      struct bucket_table *new_tbl)
 {
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+	struct rhashtable_walker *walker;
 	unsigned old_hash;
 
-	get_random_bytes(&new_tbl->hash_rnd, sizeof(new_tbl->hash_rnd));
-
 	/* Make insertions go into the new, empty table right away. Deletions
 	 * and lookups will be attempted in both tables until we synchronize.
-	 * The synchronize_rcu() guarantees for the new table to be picked up
-	 * so no new additions go into the old table while we relink.
 	 */
-	rcu_assign_pointer(ht->future_tbl, new_tbl);
+	rcu_assign_pointer(old_tbl->future_tbl, new_tbl);
 
 	/* Ensure the new table is visible to readers. */
 	smp_wmb();
@@ -284,13 +286,14 @@ static void rhashtable_rehash(struct rhashtable *ht,
 	/* Publish the new table pointer. */
 	rcu_assign_pointer(ht->tbl, new_tbl);
 
+	list_for_each_entry(walker, &old_tbl->walkers, list)
+		walker->tbl = NULL;
+
 	/* Wait for readers. All new readers will see the new
 	 * table, and thus no references to the old table will
 	 * remain.
 	 */
-	synchronize_rcu();
-
-	bucket_table_free(old_tbl);
+	call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
 }
 
 /**
@@ -314,7 +317,7 @@ int rhashtable_expand(struct rhashtable *ht)
 
 	ASSERT_RHT_MUTEX(ht);
 
-	new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, old_tbl->hash_rnd);
+	new_tbl = bucket_table_alloc(ht, old_tbl->size * 2);
 	if (new_tbl == NULL)
 		return -ENOMEM;
 
@@ -345,7 +348,7 @@ int rhashtable_shrink(struct rhashtable *ht)
 
 	ASSERT_RHT_MUTEX(ht);
 
-	new_tbl = bucket_table_alloc(ht, old_tbl->size / 2, old_tbl->hash_rnd);
+	new_tbl = bucket_table_alloc(ht, old_tbl->size / 2);
 	if (new_tbl == NULL)
 		return -ENOMEM;
 
@@ -358,7 +361,6 @@ static void rht_deferred_worker(struct work_struct *work)
 {
 	struct rhashtable *ht;
 	struct bucket_table *tbl;
-	struct rhashtable_walker *walker;
 
 	ht = container_of(work, struct rhashtable, run_work);
 	mutex_lock(&ht->mutex);
@@ -367,9 +369,6 @@ static void rht_deferred_worker(struct work_struct *work)
 
 	tbl = rht_dereference(ht->tbl, ht);
 
-	list_for_each_entry(walker, &ht->walkers, list)
-		walker->resize = true;
-
 	if (rht_grow_above_75(ht, tbl))
 		rhashtable_expand(ht);
 	else if (rht_shrink_below_30(ht, tbl))
@@ -400,10 +399,10 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
 	 * also grab the bucket lock in old_tbl because until the
 	 * rehash completes ht->tbl won't be changed.
 	 */
-	tbl = rht_dereference_rcu(ht->future_tbl, ht);
+	tbl = rht_dereference_rcu(old_tbl->future_tbl, ht) ?: old_tbl;
 	if (tbl != old_tbl) {
 		hash = head_hashfn(ht, tbl, obj);
-		spin_lock_nested(bucket_lock(tbl, hash), RHT_LOCK_NESTED);
+		spin_lock_nested(bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
 	}
 
 	if (compare &&
@@ -525,7 +524,7 @@ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
 	 * visible then that guarantees the entry to still be in
 	 * old_tbl if it exists.
 	 */
-	tbl = rht_dereference_rcu(ht->future_tbl, ht);
+	tbl = rht_dereference_rcu(old_tbl->future_tbl, ht) ?: old_tbl;
 	if (!ret && old_tbl != tbl)
 		ret = __rhashtable_remove(ht, tbl, obj);
 
@@ -599,7 +598,7 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup);
 void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
 				bool (*compare)(void *, void *), void *arg)
 {
-	const struct bucket_table *tbl, *old_tbl;
+	const struct bucket_table *tbl;
 	struct rhash_head *he;
 	u32 hash;
 
@@ -618,9 +617,8 @@ restart:
 	/* Ensure we see any new tables. */
 	smp_rmb();
 
-	old_tbl = tbl;
-	tbl = rht_dereference_rcu(ht->future_tbl, ht);
-	if (unlikely(tbl != old_tbl))
+	tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+	if (unlikely(tbl))
 		goto restart;
 	rcu_read_unlock();
 
@@ -725,11 +723,9 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter)
 	if (!iter->walker)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&iter->walker->list);
-	iter->walker->resize = false;
-
 	mutex_lock(&ht->mutex);
-	list_add(&iter->walker->list, &ht->walkers);
+	iter->walker->tbl = rht_dereference(ht->tbl, ht);
+	list_add(&iter->walker->list, &iter->walker->tbl->walkers);
 	mutex_unlock(&ht->mutex);
 
 	return 0;
@@ -745,7 +741,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init);
 void rhashtable_walk_exit(struct rhashtable_iter *iter)
 {
 	mutex_lock(&iter->ht->mutex);
-	list_del(&iter->walker->list);
+	if (iter->walker->tbl)
+		list_del(&iter->walker->list);
 	mutex_unlock(&iter->ht->mutex);
 	kfree(iter->walker);
 }
@@ -767,12 +764,19 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
  */
 int rhashtable_walk_start(struct rhashtable_iter *iter)
 {
+	struct rhashtable *ht = iter->ht;
+
+	mutex_lock(&ht->mutex);
+
+	if (iter->walker->tbl)
+		list_del(&iter->walker->list);
+
 	rcu_read_lock();
 
-	if (iter->walker->resize) {
-		iter->slot = 0;
-		iter->skip = 0;
-		iter->walker->resize = false;
+	mutex_unlock(&ht->mutex);
+
+	if (!iter->walker->tbl) {
+		iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
 		return -EAGAIN;
 	}
 
@@ -794,13 +798,11 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start);
  */
 void *rhashtable_walk_next(struct rhashtable_iter *iter)
 {
-	const struct bucket_table *tbl;
+	struct bucket_table *tbl = iter->walker->tbl;
 	struct rhashtable *ht = iter->ht;
 	struct rhash_head *p = iter->p;
 	void *obj = NULL;
 
-	tbl = rht_dereference_rcu(ht->tbl, ht);
-
 	if (p) {
 		p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
 		goto next;
@@ -826,17 +828,17 @@ next:
 		iter->skip = 0;
 	}
 
-	iter->p = NULL;
-
-out:
-	if (iter->walker->resize) {
-		iter->p = NULL;
+	iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+	if (iter->walker->tbl) {
 		iter->slot = 0;
 		iter->skip = 0;
-		iter->walker->resize = false;
 		return ERR_PTR(-EAGAIN);
 	}
 
+	iter->p = NULL;
+
+out:
+
 	return obj;
 }
 EXPORT_SYMBOL_GPL(rhashtable_walk_next);
@@ -849,7 +851,23 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_next);
  */
 void rhashtable_walk_stop(struct rhashtable_iter *iter)
 {
+	struct rhashtable *ht;
+	struct bucket_table *tbl = iter->walker->tbl;
+
 	rcu_read_unlock();
+
+	if (!tbl)
+		return;
+
+	ht = iter->ht;
+
+	mutex_lock(&ht->mutex);
+	if (tbl->rehash < tbl->size)
+		list_add(&iter->walker->list, &tbl->walkers);
+	else
+		iter->walker->tbl = NULL;
+	mutex_unlock(&ht->mutex);
+
 	iter->p = NULL;
 }
 EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
@@ -907,7 +925,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 {
 	struct bucket_table *tbl;
 	size_t size;
-	u32 hash_rnd;
 
 	size = HASH_DEFAULT_SIZE;
 
@@ -927,23 +944,19 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	memset(ht, 0, sizeof(*ht));
 	mutex_init(&ht->mutex);
 	memcpy(&ht->p, params, sizeof(*params));
-	INIT_LIST_HEAD(&ht->walkers);
 
 	if (params->locks_mul)
 		ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
 	else
 		ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
 
-	get_random_bytes(&hash_rnd, sizeof(hash_rnd));
-
-	tbl = bucket_table_alloc(ht, size, hash_rnd);
+	tbl = bucket_table_alloc(ht, size);
 	if (tbl == NULL)
 		return -ENOMEM;
 
 	atomic_set(&ht->nelems, 0);
 
 	RCU_INIT_POINTER(ht->tbl, tbl);
-	RCU_INIT_POINTER(ht->future_tbl, tbl);
 
 	INIT_WORK(&ht->run_work, rht_deferred_worker);
 

+ 1 - 1
lib/test_rhashtable.c

@@ -80,7 +80,7 @@ static void test_bucket_stats(struct rhashtable *ht, bool quiet)
 		rcu_cnt = cnt = 0;
 
 		if (!quiet)
-			pr_info(" [%#4x/%zu]", i, tbl->size);
+			pr_info(" [%#4x/%u]", i, tbl->size);
 
 		rht_for_each_entry_rcu(obj, pos, tbl, i, node) {
 			cnt++;