rhashtable.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988
  1. /*
  2. * Resizable, Scalable, Concurrent Hash Table
  3. *
  4. * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
  5. * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
  6. *
  7. * Based on the following paper:
  8. * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf
  9. *
  10. * Code partially derived from nft_hash
  11. *
  12. * This program is free software; you can redistribute it and/or modify
  13. * it under the terms of the GNU General Public License version 2 as
  14. * published by the Free Software Foundation.
  15. */
  16. #include <linux/kernel.h>
  17. #include <linux/init.h>
  18. #include <linux/log2.h>
  19. #include <linux/sched.h>
  20. #include <linux/slab.h>
  21. #include <linux/vmalloc.h>
  22. #include <linux/mm.h>
  23. #include <linux/jhash.h>
  24. #include <linux/random.h>
  25. #include <linux/rhashtable.h>
  26. #include <linux/err.h>
  27. #define HASH_DEFAULT_SIZE 64UL
  28. #define HASH_MIN_SIZE 4U
  29. #define BUCKET_LOCKS_PER_CPU 128UL
  30. /* Base bits plus 1 bit for nulls marker */
  31. #define HASH_RESERVED_SPACE (RHT_BASE_BITS + 1)
  32. /* The bucket lock is selected based on the hash and protects mutations
  33. * on a group of hash buckets.
  34. *
  35. * A maximum of tbl->size/2 bucket locks is allocated. This ensures that
  36. * a single lock always covers both buckets which may both contains
  37. * entries which link to the same bucket of the old table during resizing.
  38. * This allows to simplify the locking as locking the bucket in both
  39. * tables during resize always guarantee protection.
  40. *
  41. * IMPORTANT: When holding the bucket lock of both the old and new table
  42. * during expansions and shrinking, the old bucket lock must always be
  43. * acquired first.
  44. */
  45. static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash)
  46. {
  47. return &tbl->locks[hash & tbl->locks_mask];
  48. }
  49. static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
  50. {
  51. return (void *) he - ht->p.head_offset;
  52. }
  53. static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash)
  54. {
  55. return (hash >> HASH_RESERVED_SPACE) & (tbl->size - 1);
  56. }
  57. static u32 key_hashfn(struct rhashtable *ht, const struct bucket_table *tbl,
  58. const void *key)
  59. {
  60. return rht_bucket_index(tbl, ht->p.hashfn(key, ht->p.key_len,
  61. tbl->hash_rnd));
  62. }
  63. static u32 head_hashfn(struct rhashtable *ht,
  64. const struct bucket_table *tbl,
  65. const struct rhash_head *he)
  66. {
  67. const char *ptr = rht_obj(ht, he);
  68. return likely(ht->p.key_len) ?
  69. key_hashfn(ht, tbl, ptr + ht->p.key_offset) :
  70. rht_bucket_index(tbl, ht->p.obj_hashfn(ptr, tbl->hash_rnd));
  71. }
  72. #ifdef CONFIG_PROVE_LOCKING
  73. #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
  74. int lockdep_rht_mutex_is_held(struct rhashtable *ht)
  75. {
  76. return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
  77. }
  78. EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
  79. int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
  80. {
  81. spinlock_t *lock = bucket_lock(tbl, hash);
  82. return (debug_locks) ? lockdep_is_held(lock) : 1;
  83. }
  84. EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
  85. #else
  86. #define ASSERT_RHT_MUTEX(HT)
  87. #endif
  88. static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl)
  89. {
  90. unsigned int i, size;
  91. #if defined(CONFIG_PROVE_LOCKING)
  92. unsigned int nr_pcpus = 2;
  93. #else
  94. unsigned int nr_pcpus = num_possible_cpus();
  95. #endif
  96. nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
  97. size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
  98. /* Never allocate more than 0.5 locks per bucket */
  99. size = min_t(unsigned int, size, tbl->size >> 1);
  100. if (sizeof(spinlock_t) != 0) {
  101. #ifdef CONFIG_NUMA
  102. if (size * sizeof(spinlock_t) > PAGE_SIZE)
  103. tbl->locks = vmalloc(size * sizeof(spinlock_t));
  104. else
  105. #endif
  106. tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
  107. GFP_KERNEL);
  108. if (!tbl->locks)
  109. return -ENOMEM;
  110. for (i = 0; i < size; i++)
  111. spin_lock_init(&tbl->locks[i]);
  112. }
  113. tbl->locks_mask = size - 1;
  114. return 0;
  115. }
  116. static void bucket_table_free(const struct bucket_table *tbl)
  117. {
  118. if (tbl)
  119. kvfree(tbl->locks);
  120. kvfree(tbl);
  121. }
  122. static void bucket_table_free_rcu(struct rcu_head *head)
  123. {
  124. bucket_table_free(container_of(head, struct bucket_table, rcu));
  125. }
  126. static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
  127. size_t nbuckets)
  128. {
  129. struct bucket_table *tbl = NULL;
  130. size_t size;
  131. int i;
  132. size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
  133. if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
  134. tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
  135. if (tbl == NULL)
  136. tbl = vzalloc(size);
  137. if (tbl == NULL)
  138. return NULL;
  139. tbl->size = nbuckets;
  140. if (alloc_bucket_locks(ht, tbl) < 0) {
  141. bucket_table_free(tbl);
  142. return NULL;
  143. }
  144. INIT_LIST_HEAD(&tbl->walkers);
  145. get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
  146. for (i = 0; i < nbuckets; i++)
  147. INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
  148. return tbl;
  149. }
  150. /**
  151. * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
  152. * @ht: hash table
  153. * @tbl: current table
  154. */
  155. static bool rht_grow_above_75(const struct rhashtable *ht,
  156. const struct bucket_table *tbl)
  157. {
  158. /* Expand table when exceeding 75% load */
  159. return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) &&
  160. (!ht->p.max_size || tbl->size < ht->p.max_size);
  161. }
  162. /**
  163. * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
  164. * @ht: hash table
  165. * @tbl: current table
  166. */
  167. static bool rht_shrink_below_30(const struct rhashtable *ht,
  168. const struct bucket_table *tbl)
  169. {
  170. /* Shrink table beneath 30% load */
  171. return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) &&
  172. tbl->size > ht->p.min_size;
  173. }
  174. static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash)
  175. {
  176. struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
  177. struct bucket_table *new_tbl =
  178. rht_dereference(old_tbl->future_tbl, ht) ?: old_tbl;
  179. struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash];
  180. int err = -ENOENT;
  181. struct rhash_head *head, *next, *entry;
  182. spinlock_t *new_bucket_lock;
  183. unsigned new_hash;
  184. rht_for_each(entry, old_tbl, old_hash) {
  185. err = 0;
  186. next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
  187. if (rht_is_a_nulls(next))
  188. break;
  189. pprev = &entry->next;
  190. }
  191. if (err)
  192. goto out;
  193. new_hash = head_hashfn(ht, new_tbl, entry);
  194. new_bucket_lock = bucket_lock(new_tbl, new_hash);
  195. spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING);
  196. head = rht_dereference_bucket(new_tbl->buckets[new_hash],
  197. new_tbl, new_hash);
  198. if (rht_is_a_nulls(head))
  199. INIT_RHT_NULLS_HEAD(entry->next, ht, new_hash);
  200. else
  201. RCU_INIT_POINTER(entry->next, head);
  202. rcu_assign_pointer(new_tbl->buckets[new_hash], entry);
  203. spin_unlock(new_bucket_lock);
  204. rcu_assign_pointer(*pprev, next);
  205. out:
  206. return err;
  207. }
  208. static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned old_hash)
  209. {
  210. struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
  211. spinlock_t *old_bucket_lock;
  212. old_bucket_lock = bucket_lock(old_tbl, old_hash);
  213. spin_lock_bh(old_bucket_lock);
  214. while (!rhashtable_rehash_one(ht, old_hash))
  215. ;
  216. old_tbl->rehash++;
  217. spin_unlock_bh(old_bucket_lock);
  218. }
  219. static void rhashtable_rehash(struct rhashtable *ht,
  220. struct bucket_table *new_tbl)
  221. {
  222. struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
  223. struct rhashtable_walker *walker;
  224. unsigned old_hash;
  225. /* Make insertions go into the new, empty table right away. Deletions
  226. * and lookups will be attempted in both tables until we synchronize.
  227. */
  228. rcu_assign_pointer(old_tbl->future_tbl, new_tbl);
  229. /* Ensure the new table is visible to readers. */
  230. smp_wmb();
  231. for (old_hash = 0; old_hash < old_tbl->size; old_hash++)
  232. rhashtable_rehash_chain(ht, old_hash);
  233. /* Publish the new table pointer. */
  234. rcu_assign_pointer(ht->tbl, new_tbl);
  235. list_for_each_entry(walker, &old_tbl->walkers, list)
  236. walker->tbl = NULL;
  237. /* Wait for readers. All new readers will see the new
  238. * table, and thus no references to the old table will
  239. * remain.
  240. */
  241. call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
  242. }
  243. /**
  244. * rhashtable_expand - Expand hash table while allowing concurrent lookups
  245. * @ht: the hash table to expand
  246. *
  247. * A secondary bucket array is allocated and the hash entries are migrated.
  248. *
  249. * This function may only be called in a context where it is safe to call
  250. * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
  251. *
  252. * The caller must ensure that no concurrent resizing occurs by holding
  253. * ht->mutex.
  254. *
  255. * It is valid to have concurrent insertions and deletions protected by per
  256. * bucket locks or concurrent RCU protected lookups and traversals.
  257. */
  258. int rhashtable_expand(struct rhashtable *ht)
  259. {
  260. struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
  261. ASSERT_RHT_MUTEX(ht);
  262. new_tbl = bucket_table_alloc(ht, old_tbl->size * 2);
  263. if (new_tbl == NULL)
  264. return -ENOMEM;
  265. rhashtable_rehash(ht, new_tbl);
  266. return 0;
  267. }
  268. EXPORT_SYMBOL_GPL(rhashtable_expand);
  269. /**
  270. * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
  271. * @ht: the hash table to shrink
  272. *
  273. * This function may only be called in a context where it is safe to call
  274. * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
  275. *
  276. * The caller must ensure that no concurrent resizing occurs by holding
  277. * ht->mutex.
  278. *
  279. * The caller must ensure that no concurrent table mutations take place.
  280. * It is however valid to have concurrent lookups if they are RCU protected.
  281. *
  282. * It is valid to have concurrent insertions and deletions protected by per
  283. * bucket locks or concurrent RCU protected lookups and traversals.
  284. */
  285. int rhashtable_shrink(struct rhashtable *ht)
  286. {
  287. struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
  288. ASSERT_RHT_MUTEX(ht);
  289. new_tbl = bucket_table_alloc(ht, old_tbl->size / 2);
  290. if (new_tbl == NULL)
  291. return -ENOMEM;
  292. rhashtable_rehash(ht, new_tbl);
  293. return 0;
  294. }
  295. EXPORT_SYMBOL_GPL(rhashtable_shrink);
  296. static void rht_deferred_worker(struct work_struct *work)
  297. {
  298. struct rhashtable *ht;
  299. struct bucket_table *tbl;
  300. ht = container_of(work, struct rhashtable, run_work);
  301. mutex_lock(&ht->mutex);
  302. if (ht->being_destroyed)
  303. goto unlock;
  304. tbl = rht_dereference(ht->tbl, ht);
  305. if (rht_grow_above_75(ht, tbl))
  306. rhashtable_expand(ht);
  307. else if (rht_shrink_below_30(ht, tbl))
  308. rhashtable_shrink(ht);
  309. unlock:
  310. mutex_unlock(&ht->mutex);
  311. }
  312. static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
  313. bool (*compare)(void *, void *), void *arg)
  314. {
  315. struct bucket_table *tbl, *old_tbl;
  316. struct rhash_head *head;
  317. bool no_resize_running;
  318. unsigned hash;
  319. spinlock_t *old_lock;
  320. bool success = true;
  321. rcu_read_lock();
  322. old_tbl = rht_dereference_rcu(ht->tbl, ht);
  323. hash = head_hashfn(ht, old_tbl, obj);
  324. old_lock = bucket_lock(old_tbl, hash);
  325. spin_lock_bh(old_lock);
  326. /* Because we have already taken the bucket lock in old_tbl,
  327. * if we find that future_tbl is not yet visible then that
  328. * guarantees all other insertions of the same entry will
  329. * also grab the bucket lock in old_tbl because until the
  330. * rehash completes ht->tbl won't be changed.
  331. */
  332. tbl = rht_dereference_rcu(old_tbl->future_tbl, ht) ?: old_tbl;
  333. if (tbl != old_tbl) {
  334. hash = head_hashfn(ht, tbl, obj);
  335. spin_lock_nested(bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
  336. }
  337. if (compare &&
  338. rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset,
  339. compare, arg)) {
  340. success = false;
  341. goto exit;
  342. }
  343. no_resize_running = tbl == old_tbl;
  344. head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
  345. if (rht_is_a_nulls(head))
  346. INIT_RHT_NULLS_HEAD(obj->next, ht, hash);
  347. else
  348. RCU_INIT_POINTER(obj->next, head);
  349. rcu_assign_pointer(tbl->buckets[hash], obj);
  350. atomic_inc(&ht->nelems);
  351. if (no_resize_running && rht_grow_above_75(ht, tbl))
  352. schedule_work(&ht->run_work);
  353. exit:
  354. if (tbl != old_tbl)
  355. spin_unlock(bucket_lock(tbl, hash));
  356. spin_unlock_bh(old_lock);
  357. rcu_read_unlock();
  358. return success;
  359. }
  360. /**
  361. * rhashtable_insert - insert object into hash table
  362. * @ht: hash table
  363. * @obj: pointer to hash head inside object
  364. *
  365. * Will take a per bucket spinlock to protect against mutual mutations
  366. * on the same bucket. Multiple insertions may occur in parallel unless
  367. * they map to the same bucket lock.
  368. *
  369. * It is safe to call this function from atomic context.
  370. *
  371. * Will trigger an automatic deferred table resizing if the size grows
  372. * beyond the watermark indicated by grow_decision() which can be passed
  373. * to rhashtable_init().
  374. */
  375. void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
  376. {
  377. __rhashtable_insert(ht, obj, NULL, NULL);
  378. }
  379. EXPORT_SYMBOL_GPL(rhashtable_insert);
  380. static bool __rhashtable_remove(struct rhashtable *ht,
  381. struct bucket_table *tbl,
  382. struct rhash_head *obj)
  383. {
  384. struct rhash_head __rcu **pprev;
  385. struct rhash_head *he;
  386. spinlock_t * lock;
  387. unsigned hash;
  388. bool ret = false;
  389. hash = head_hashfn(ht, tbl, obj);
  390. lock = bucket_lock(tbl, hash);
  391. spin_lock_bh(lock);
  392. pprev = &tbl->buckets[hash];
  393. rht_for_each(he, tbl, hash) {
  394. if (he != obj) {
  395. pprev = &he->next;
  396. continue;
  397. }
  398. rcu_assign_pointer(*pprev, obj->next);
  399. ret = true;
  400. break;
  401. }
  402. spin_unlock_bh(lock);
  403. return ret;
  404. }
  405. /**
  406. * rhashtable_remove - remove object from hash table
  407. * @ht: hash table
  408. * @obj: pointer to hash head inside object
  409. *
  410. * Since the hash chain is single linked, the removal operation needs to
  411. * walk the bucket chain upon removal. The removal operation is thus
  412. * considerable slow if the hash table is not correctly sized.
  413. *
  414. * Will automatically shrink the table via rhashtable_expand() if the
  415. * shrink_decision function specified at rhashtable_init() returns true.
  416. *
  417. * The caller must ensure that no concurrent table mutations occur. It is
  418. * however valid to have concurrent lookups if they are RCU protected.
  419. */
  420. bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
  421. {
  422. struct bucket_table *tbl;
  423. bool ret;
  424. rcu_read_lock();
  425. tbl = rht_dereference_rcu(ht->tbl, ht);
  426. /* Because we have already taken (and released) the bucket
  427. * lock in old_tbl, if we find that future_tbl is not yet
  428. * visible then that guarantees the entry to still be in
  429. * the old tbl if it exists.
  430. */
  431. while (!(ret = __rhashtable_remove(ht, tbl, obj)) &&
  432. (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
  433. ;
  434. if (ret) {
  435. atomic_dec(&ht->nelems);
  436. if (rht_shrink_below_30(ht, tbl))
  437. schedule_work(&ht->run_work);
  438. }
  439. rcu_read_unlock();
  440. return ret;
  441. }
  442. EXPORT_SYMBOL_GPL(rhashtable_remove);
  443. struct rhashtable_compare_arg {
  444. struct rhashtable *ht;
  445. const void *key;
  446. };
  447. static bool rhashtable_compare(void *ptr, void *arg)
  448. {
  449. struct rhashtable_compare_arg *x = arg;
  450. struct rhashtable *ht = x->ht;
  451. return !memcmp(ptr + ht->p.key_offset, x->key, ht->p.key_len);
  452. }
  453. /**
  454. * rhashtable_lookup - lookup key in hash table
  455. * @ht: hash table
  456. * @key: pointer to key
  457. *
  458. * Computes the hash value for the key and traverses the bucket chain looking
  459. * for a entry with an identical key. The first matching entry is returned.
  460. *
  461. * This lookup function may only be used for fixed key hash table (key_len
  462. * parameter set). It will BUG() if used inappropriately.
  463. *
  464. * Lookups may occur in parallel with hashtable mutations and resizing.
  465. */
  466. void *rhashtable_lookup(struct rhashtable *ht, const void *key)
  467. {
  468. struct rhashtable_compare_arg arg = {
  469. .ht = ht,
  470. .key = key,
  471. };
  472. BUG_ON(!ht->p.key_len);
  473. return rhashtable_lookup_compare(ht, key, &rhashtable_compare, &arg);
  474. }
  475. EXPORT_SYMBOL_GPL(rhashtable_lookup);
  476. /**
  477. * rhashtable_lookup_compare - search hash table with compare function
  478. * @ht: hash table
  479. * @key: the pointer to the key
  480. * @compare: compare function, must return true on match
  481. * @arg: argument passed on to compare function
  482. *
  483. * Traverses the bucket chain behind the provided hash value and calls the
  484. * specified compare function for each entry.
  485. *
  486. * Lookups may occur in parallel with hashtable mutations and resizing.
  487. *
  488. * Returns the first entry on which the compare function returned true.
  489. */
  490. void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
  491. bool (*compare)(void *, void *), void *arg)
  492. {
  493. const struct bucket_table *tbl;
  494. struct rhash_head *he;
  495. u32 hash;
  496. rcu_read_lock();
  497. tbl = rht_dereference_rcu(ht->tbl, ht);
  498. restart:
  499. hash = key_hashfn(ht, tbl, key);
  500. rht_for_each_rcu(he, tbl, hash) {
  501. if (!compare(rht_obj(ht, he), arg))
  502. continue;
  503. rcu_read_unlock();
  504. return rht_obj(ht, he);
  505. }
  506. /* Ensure we see any new tables. */
  507. smp_rmb();
  508. tbl = rht_dereference_rcu(tbl->future_tbl, ht);
  509. if (unlikely(tbl))
  510. goto restart;
  511. rcu_read_unlock();
  512. return NULL;
  513. }
  514. EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
  515. /**
  516. * rhashtable_lookup_insert - lookup and insert object into hash table
  517. * @ht: hash table
  518. * @obj: pointer to hash head inside object
  519. *
  520. * Locks down the bucket chain in both the old and new table if a resize
  521. * is in progress to ensure that writers can't remove from the old table
  522. * and can't insert to the new table during the atomic operation of search
  523. * and insertion. Searches for duplicates in both the old and new table if
  524. * a resize is in progress.
  525. *
  526. * This lookup function may only be used for fixed key hash table (key_len
  527. * parameter set). It will BUG() if used inappropriately.
  528. *
  529. * It is safe to call this function from atomic context.
  530. *
  531. * Will trigger an automatic deferred table resizing if the size grows
  532. * beyond the watermark indicated by grow_decision() which can be passed
  533. * to rhashtable_init().
  534. */
  535. bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj)
  536. {
  537. struct rhashtable_compare_arg arg = {
  538. .ht = ht,
  539. .key = rht_obj(ht, obj) + ht->p.key_offset,
  540. };
  541. BUG_ON(!ht->p.key_len);
  542. return rhashtable_lookup_compare_insert(ht, obj, &rhashtable_compare,
  543. &arg);
  544. }
  545. EXPORT_SYMBOL_GPL(rhashtable_lookup_insert);
  546. /**
  547. * rhashtable_lookup_compare_insert - search and insert object to hash table
  548. * with compare function
  549. * @ht: hash table
  550. * @obj: pointer to hash head inside object
  551. * @compare: compare function, must return true on match
  552. * @arg: argument passed on to compare function
  553. *
  554. * Locks down the bucket chain in both the old and new table if a resize
  555. * is in progress to ensure that writers can't remove from the old table
  556. * and can't insert to the new table during the atomic operation of search
  557. * and insertion. Searches for duplicates in both the old and new table if
  558. * a resize is in progress.
  559. *
  560. * Lookups may occur in parallel with hashtable mutations and resizing.
  561. *
  562. * Will trigger an automatic deferred table resizing if the size grows
  563. * beyond the watermark indicated by grow_decision() which can be passed
  564. * to rhashtable_init().
  565. */
  566. bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
  567. struct rhash_head *obj,
  568. bool (*compare)(void *, void *),
  569. void *arg)
  570. {
  571. BUG_ON(!ht->p.key_len);
  572. return __rhashtable_insert(ht, obj, compare, arg);
  573. }
  574. EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert);
  575. /**
  576. * rhashtable_walk_init - Initialise an iterator
  577. * @ht: Table to walk over
  578. * @iter: Hash table Iterator
  579. *
  580. * This function prepares a hash table walk.
  581. *
  582. * Note that if you restart a walk after rhashtable_walk_stop you
  583. * may see the same object twice. Also, you may miss objects if
  584. * there are removals in between rhashtable_walk_stop and the next
  585. * call to rhashtable_walk_start.
  586. *
  587. * For a completely stable walk you should construct your own data
  588. * structure outside the hash table.
  589. *
  590. * This function may sleep so you must not call it from interrupt
  591. * context or with spin locks held.
  592. *
  593. * You must call rhashtable_walk_exit if this function returns
  594. * successfully.
  595. */
  596. int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter)
  597. {
  598. iter->ht = ht;
  599. iter->p = NULL;
  600. iter->slot = 0;
  601. iter->skip = 0;
  602. iter->walker = kmalloc(sizeof(*iter->walker), GFP_KERNEL);
  603. if (!iter->walker)
  604. return -ENOMEM;
  605. mutex_lock(&ht->mutex);
  606. iter->walker->tbl = rht_dereference(ht->tbl, ht);
  607. list_add(&iter->walker->list, &iter->walker->tbl->walkers);
  608. mutex_unlock(&ht->mutex);
  609. return 0;
  610. }
  611. EXPORT_SYMBOL_GPL(rhashtable_walk_init);
  612. /**
  613. * rhashtable_walk_exit - Free an iterator
  614. * @iter: Hash table Iterator
  615. *
  616. * This function frees resources allocated by rhashtable_walk_init.
  617. */
  618. void rhashtable_walk_exit(struct rhashtable_iter *iter)
  619. {
  620. mutex_lock(&iter->ht->mutex);
  621. if (iter->walker->tbl)
  622. list_del(&iter->walker->list);
  623. mutex_unlock(&iter->ht->mutex);
  624. kfree(iter->walker);
  625. }
  626. EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
  627. /**
  628. * rhashtable_walk_start - Start a hash table walk
  629. * @iter: Hash table iterator
  630. *
  631. * Start a hash table walk. Note that we take the RCU lock in all
  632. * cases including when we return an error. So you must always call
  633. * rhashtable_walk_stop to clean up.
  634. *
  635. * Returns zero if successful.
  636. *
  637. * Returns -EAGAIN if resize event occured. Note that the iterator
  638. * will rewind back to the beginning and you may use it immediately
  639. * by calling rhashtable_walk_next.
  640. */
  641. int rhashtable_walk_start(struct rhashtable_iter *iter)
  642. __acquires(RCU)
  643. {
  644. struct rhashtable *ht = iter->ht;
  645. mutex_lock(&ht->mutex);
  646. if (iter->walker->tbl)
  647. list_del(&iter->walker->list);
  648. rcu_read_lock();
  649. mutex_unlock(&ht->mutex);
  650. if (!iter->walker->tbl) {
  651. iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
  652. return -EAGAIN;
  653. }
  654. return 0;
  655. }
  656. EXPORT_SYMBOL_GPL(rhashtable_walk_start);
  657. /**
  658. * rhashtable_walk_next - Return the next object and advance the iterator
  659. * @iter: Hash table iterator
  660. *
  661. * Note that you must call rhashtable_walk_stop when you are finished
  662. * with the walk.
  663. *
  664. * Returns the next object or NULL when the end of the table is reached.
  665. *
  666. * Returns -EAGAIN if resize event occured. Note that the iterator
  667. * will rewind back to the beginning and you may continue to use it.
  668. */
  669. void *rhashtable_walk_next(struct rhashtable_iter *iter)
  670. {
  671. struct bucket_table *tbl = iter->walker->tbl;
  672. struct rhashtable *ht = iter->ht;
  673. struct rhash_head *p = iter->p;
  674. void *obj = NULL;
  675. if (p) {
  676. p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
  677. goto next;
  678. }
  679. for (; iter->slot < tbl->size; iter->slot++) {
  680. int skip = iter->skip;
  681. rht_for_each_rcu(p, tbl, iter->slot) {
  682. if (!skip)
  683. break;
  684. skip--;
  685. }
  686. next:
  687. if (!rht_is_a_nulls(p)) {
  688. iter->skip++;
  689. iter->p = p;
  690. obj = rht_obj(ht, p);
  691. goto out;
  692. }
  693. iter->skip = 0;
  694. }
  695. iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht);
  696. if (iter->walker->tbl) {
  697. iter->slot = 0;
  698. iter->skip = 0;
  699. return ERR_PTR(-EAGAIN);
  700. }
  701. iter->p = NULL;
  702. out:
  703. return obj;
  704. }
  705. EXPORT_SYMBOL_GPL(rhashtable_walk_next);
  706. /**
  707. * rhashtable_walk_stop - Finish a hash table walk
  708. * @iter: Hash table iterator
  709. *
  710. * Finish a hash table walk.
  711. */
  712. void rhashtable_walk_stop(struct rhashtable_iter *iter)
  713. __releases(RCU)
  714. {
  715. struct rhashtable *ht;
  716. struct bucket_table *tbl = iter->walker->tbl;
  717. if (!tbl)
  718. goto out;
  719. ht = iter->ht;
  720. mutex_lock(&ht->mutex);
  721. if (tbl->rehash < tbl->size)
  722. list_add(&iter->walker->list, &tbl->walkers);
  723. else
  724. iter->walker->tbl = NULL;
  725. mutex_unlock(&ht->mutex);
  726. iter->p = NULL;
  727. out:
  728. rcu_read_unlock();
  729. }
  730. EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
  731. static size_t rounded_hashtable_size(struct rhashtable_params *params)
  732. {
  733. return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
  734. (unsigned long)params->min_size);
  735. }
  736. /**
  737. * rhashtable_init - initialize a new hash table
  738. * @ht: hash table to be initialized
  739. * @params: configuration parameters
  740. *
  741. * Initializes a new hash table based on the provided configuration
  742. * parameters. A table can be configured either with a variable or
  743. * fixed length key:
  744. *
  745. * Configuration Example 1: Fixed length keys
  746. * struct test_obj {
  747. * int key;
  748. * void * my_member;
  749. * struct rhash_head node;
  750. * };
  751. *
  752. * struct rhashtable_params params = {
  753. * .head_offset = offsetof(struct test_obj, node),
  754. * .key_offset = offsetof(struct test_obj, key),
  755. * .key_len = sizeof(int),
  756. * .hashfn = jhash,
  757. * .nulls_base = (1U << RHT_BASE_SHIFT),
  758. * };
  759. *
  760. * Configuration Example 2: Variable length keys
  761. * struct test_obj {
  762. * [...]
  763. * struct rhash_head node;
  764. * };
  765. *
  766. * u32 my_hash_fn(const void *data, u32 seed)
  767. * {
  768. * struct test_obj *obj = data;
  769. *
  770. * return [... hash ...];
  771. * }
  772. *
  773. * struct rhashtable_params params = {
  774. * .head_offset = offsetof(struct test_obj, node),
  775. * .hashfn = jhash,
  776. * .obj_hashfn = my_hash_fn,
  777. * };
  778. */
  779. int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
  780. {
  781. struct bucket_table *tbl;
  782. size_t size;
  783. size = HASH_DEFAULT_SIZE;
  784. if ((params->key_len && !params->hashfn) ||
  785. (!params->key_len && !params->obj_hashfn))
  786. return -EINVAL;
  787. if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
  788. return -EINVAL;
  789. if (params->nelem_hint)
  790. size = rounded_hashtable_size(params);
  791. memset(ht, 0, sizeof(*ht));
  792. mutex_init(&ht->mutex);
  793. memcpy(&ht->p, params, sizeof(*params));
  794. if (params->min_size)
  795. ht->p.min_size = roundup_pow_of_two(params->min_size);
  796. if (params->max_size)
  797. ht->p.max_size = rounddown_pow_of_two(params->max_size);
  798. ht->p.min_size = max(params->min_size, HASH_MIN_SIZE);
  799. if (params->locks_mul)
  800. ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
  801. else
  802. ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
  803. tbl = bucket_table_alloc(ht, size);
  804. if (tbl == NULL)
  805. return -ENOMEM;
  806. atomic_set(&ht->nelems, 0);
  807. RCU_INIT_POINTER(ht->tbl, tbl);
  808. INIT_WORK(&ht->run_work, rht_deferred_worker);
  809. return 0;
  810. }
  811. EXPORT_SYMBOL_GPL(rhashtable_init);
  812. /**
  813. * rhashtable_destroy - destroy hash table
  814. * @ht: the hash table to destroy
  815. *
  816. * Frees the bucket array. This function is not rcu safe, therefore the caller
  817. * has to make sure that no resizing may happen by unpublishing the hashtable
  818. * and waiting for the quiescent cycle before releasing the bucket array.
  819. */
  820. void rhashtable_destroy(struct rhashtable *ht)
  821. {
  822. ht->being_destroyed = true;
  823. cancel_work_sync(&ht->run_work);
  824. mutex_lock(&ht->mutex);
  825. bucket_table_free(rht_dereference(ht->tbl, ht));
  826. mutex_unlock(&ht->mutex);
  827. }
  828. EXPORT_SYMBOL_GPL(rhashtable_destroy);