mbcache.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857
  1. /*
  2. * linux/fs/mbcache.c
  3. * (C) 2001-2002 Andreas Gruenbacher, <a.gruenbacher@computer.org>
  4. */
  5. /*
  6. * Filesystem Meta Information Block Cache (mbcache)
  7. *
  8. * The mbcache caches blocks of block devices that need to be located
  9. * by their device/block number, as well as by other criteria (such
  10. * as the block's contents).
  11. *
  12. * There can only be one cache entry in a cache per device and block number.
  13. * Additional indexes need not be unique in this sense. The number of
  14. * additional indexes (=other criteria) can be hardwired at compile time
  15. * or specified at cache create time.
  16. *
  17. * Each cache entry is of fixed size. An entry may be `valid' or `invalid'
  18. * in the cache. A valid entry is in the main hash tables of the cache,
  19. * and may also be in the lru list. An invalid entry is not in any hashes
  20. * or lists.
  21. *
  22. * A valid cache entry is only in the lru list if no handles refer to it.
  23. * Invalid cache entries will be freed when the last handle to the cache
  24. * entry is released. Entries that cannot be freed immediately are put
  25. * back on the lru list.
  26. */
  27. /*
  28. * Lock descriptions and usage:
  29. *
  30. * Each hash chain of both the block and index hash tables now contains
  31. * a built-in lock used to serialize accesses to the hash chain.
  32. *
  33. * Accesses to global data structures mb_cache_list and mb_cache_lru_list
  34. * are serialized via the global spinlock mb_cache_spinlock.
  35. *
  36. * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize
  37. * accesses to its local data, such as e_used and e_queued.
  38. *
  39. * Lock ordering:
  40. *
  41. * Each block hash chain's lock has the highest lock order, followed by an
  42. * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's
  43. * lock), and mb_cach_spinlock, with the lowest order. While holding
  44. * either a block or index hash chain lock, a thread can acquire an
  45. * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock.
  46. *
  47. * Synchronization:
  48. *
  49. * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and
  50. * index hash chian, it needs to lock the corresponding hash chain. For each
  51. * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to
  52. * prevent either any simultaneous release or free on the entry and also
  53. * to serialize accesses to either the e_used or e_queued member of the entry.
  54. *
  55. * To avoid having a dangling reference to an already freed
  56. * mb_cache_entry, an mb_cache_entry is only freed when it is not on a
  57. * block hash chain and also no longer being referenced, both e_used,
  58. * and e_queued are 0's. When an mb_cache_entry is explicitly freed it is
  59. * first removed from a block hash chain.
  60. */
  61. #include <linux/kernel.h>
  62. #include <linux/module.h>
  63. #include <linux/hash.h>
  64. #include <linux/fs.h>
  65. #include <linux/mm.h>
  66. #include <linux/slab.h>
  67. #include <linux/sched.h>
  68. #include <linux/list_bl.h>
  69. #include <linux/mbcache.h>
  70. #include <linux/init.h>
  71. #include <linux/blockgroup_lock.h>
  72. #ifdef MB_CACHE_DEBUG
  73. # define mb_debug(f...) do { \
  74. printk(KERN_DEBUG f); \
  75. printk("\n"); \
  76. } while (0)
  77. #define mb_assert(c) do { if (!(c)) \
  78. printk(KERN_ERR "assertion " #c " failed\n"); \
  79. } while(0)
  80. #else
  81. # define mb_debug(f...) do { } while(0)
  82. # define mb_assert(c) do { } while(0)
  83. #endif
  84. #define mb_error(f...) do { \
  85. printk(KERN_ERR f); \
  86. printk("\n"); \
  87. } while(0)
  88. #define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
  89. #define MB_CACHE_ENTRY_LOCK_BITS __builtin_log2(NR_BG_LOCKS)
  90. #define MB_CACHE_ENTRY_LOCK_INDEX(ce) \
  91. (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS))
  92. static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue);
  93. static struct blockgroup_lock *mb_cache_bg_lock;
  94. static struct kmem_cache *mb_cache_kmem_cache;
  95. MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>");
  96. MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
  97. MODULE_LICENSE("GPL");
  98. EXPORT_SYMBOL(mb_cache_create);
  99. EXPORT_SYMBOL(mb_cache_shrink);
  100. EXPORT_SYMBOL(mb_cache_destroy);
  101. EXPORT_SYMBOL(mb_cache_entry_alloc);
  102. EXPORT_SYMBOL(mb_cache_entry_insert);
  103. EXPORT_SYMBOL(mb_cache_entry_release);
  104. EXPORT_SYMBOL(mb_cache_entry_free);
  105. EXPORT_SYMBOL(mb_cache_entry_get);
  106. #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
  107. EXPORT_SYMBOL(mb_cache_entry_find_first);
  108. EXPORT_SYMBOL(mb_cache_entry_find_next);
  109. #endif
  110. /*
  111. * Global data: list of all mbcache's, lru list, and a spinlock for
  112. * accessing cache data structures on SMP machines. The lru list is
  113. * global across all mbcaches.
  114. */
  115. static LIST_HEAD(mb_cache_list);
  116. static LIST_HEAD(mb_cache_lru_list);
  117. static DEFINE_SPINLOCK(mb_cache_spinlock);
  118. static inline void
  119. __spin_lock_mb_cache_entry(struct mb_cache_entry *ce)
  120. {
  121. spin_lock(bgl_lock_ptr(mb_cache_bg_lock,
  122. MB_CACHE_ENTRY_LOCK_INDEX(ce)));
  123. }
  124. static inline void
  125. __spin_unlock_mb_cache_entry(struct mb_cache_entry *ce)
  126. {
  127. spin_unlock(bgl_lock_ptr(mb_cache_bg_lock,
  128. MB_CACHE_ENTRY_LOCK_INDEX(ce)));
  129. }
  130. static inline int
  131. __mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce)
  132. {
  133. return !hlist_bl_unhashed(&ce->e_block_list);
  134. }
  135. static inline void
  136. __mb_cache_entry_unhash_block(struct mb_cache_entry *ce)
  137. {
  138. if (__mb_cache_entry_is_block_hashed(ce))
  139. hlist_bl_del_init(&ce->e_block_list);
  140. }
  141. static inline int
  142. __mb_cache_entry_is_index_hashed(struct mb_cache_entry *ce)
  143. {
  144. return !hlist_bl_unhashed(&ce->e_index.o_list);
  145. }
  146. static inline void
  147. __mb_cache_entry_unhash_index(struct mb_cache_entry *ce)
  148. {
  149. if (__mb_cache_entry_is_index_hashed(ce))
  150. hlist_bl_del_init(&ce->e_index.o_list);
  151. }
  152. /*
  153. * __mb_cache_entry_unhash_unlock()
  154. *
  155. * This function is called to unhash both the block and index hash
  156. * chain.
  157. * It assumes both the block and index hash chain is locked upon entry.
  158. * It also unlock both hash chains both exit
  159. */
  160. static inline void
  161. __mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce)
  162. {
  163. __mb_cache_entry_unhash_index(ce);
  164. hlist_bl_unlock(ce->e_index_hash_p);
  165. __mb_cache_entry_unhash_block(ce);
  166. hlist_bl_unlock(ce->e_block_hash_p);
  167. }
  168. static void
  169. __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
  170. {
  171. struct mb_cache *cache = ce->e_cache;
  172. mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)));
  173. kmem_cache_free(cache->c_entry_cache, ce);
  174. atomic_dec(&cache->c_entry_count);
  175. }
  176. static void
  177. __mb_cache_entry_release(struct mb_cache_entry *ce)
  178. {
  179. /* First lock the entry to serialize access to its local data. */
  180. __spin_lock_mb_cache_entry(ce);
  181. /* Wake up all processes queuing for this cache entry. */
  182. if (ce->e_queued)
  183. wake_up_all(&mb_cache_queue);
  184. if (ce->e_used >= MB_CACHE_WRITER)
  185. ce->e_used -= MB_CACHE_WRITER;
  186. /*
  187. * Make sure that all cache entries on lru_list have
  188. * both e_used and e_qued of 0s.
  189. */
  190. ce->e_used--;
  191. if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) {
  192. if (!__mb_cache_entry_is_block_hashed(ce)) {
  193. __spin_unlock_mb_cache_entry(ce);
  194. goto forget;
  195. }
  196. /*
  197. * Need access to lru list, first drop entry lock,
  198. * then reacquire the lock in the proper order.
  199. */
  200. spin_lock(&mb_cache_spinlock);
  201. if (list_empty(&ce->e_lru_list))
  202. list_add_tail(&ce->e_lru_list, &mb_cache_lru_list);
  203. spin_unlock(&mb_cache_spinlock);
  204. }
  205. __spin_unlock_mb_cache_entry(ce);
  206. return;
  207. forget:
  208. mb_assert(list_empty(&ce->e_lru_list));
  209. __mb_cache_entry_forget(ce, GFP_KERNEL);
  210. }
  211. /*
  212. * mb_cache_shrink_scan() memory pressure callback
  213. *
  214. * This function is called by the kernel memory management when memory
  215. * gets low.
  216. *
  217. * @shrink: (ignored)
  218. * @sc: shrink_control passed from reclaim
  219. *
  220. * Returns the number of objects freed.
  221. */
  222. static unsigned long
  223. mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
  224. {
  225. LIST_HEAD(free_list);
  226. struct mb_cache_entry *entry, *tmp;
  227. int nr_to_scan = sc->nr_to_scan;
  228. gfp_t gfp_mask = sc->gfp_mask;
  229. unsigned long freed = 0;
  230. mb_debug("trying to free %d entries", nr_to_scan);
  231. spin_lock(&mb_cache_spinlock);
  232. while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) {
  233. struct mb_cache_entry *ce =
  234. list_entry(mb_cache_lru_list.next,
  235. struct mb_cache_entry, e_lru_list);
  236. list_del_init(&ce->e_lru_list);
  237. if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))
  238. continue;
  239. spin_unlock(&mb_cache_spinlock);
  240. /* Prevent any find or get operation on the entry */
  241. hlist_bl_lock(ce->e_block_hash_p);
  242. hlist_bl_lock(ce->e_index_hash_p);
  243. /* Ignore if it is touched by a find/get */
  244. if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) ||
  245. !list_empty(&ce->e_lru_list)) {
  246. hlist_bl_unlock(ce->e_index_hash_p);
  247. hlist_bl_unlock(ce->e_block_hash_p);
  248. spin_lock(&mb_cache_spinlock);
  249. continue;
  250. }
  251. __mb_cache_entry_unhash_unlock(ce);
  252. list_add_tail(&ce->e_lru_list, &free_list);
  253. spin_lock(&mb_cache_spinlock);
  254. }
  255. spin_unlock(&mb_cache_spinlock);
  256. list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
  257. __mb_cache_entry_forget(entry, gfp_mask);
  258. freed++;
  259. }
  260. return freed;
  261. }
  262. static unsigned long
  263. mb_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
  264. {
  265. struct mb_cache *cache;
  266. unsigned long count = 0;
  267. spin_lock(&mb_cache_spinlock);
  268. list_for_each_entry(cache, &mb_cache_list, c_cache_list) {
  269. mb_debug("cache %s (%d)", cache->c_name,
  270. atomic_read(&cache->c_entry_count));
  271. count += atomic_read(&cache->c_entry_count);
  272. }
  273. spin_unlock(&mb_cache_spinlock);
  274. return vfs_pressure_ratio(count);
  275. }
  276. static struct shrinker mb_cache_shrinker = {
  277. .count_objects = mb_cache_shrink_count,
  278. .scan_objects = mb_cache_shrink_scan,
  279. .seeks = DEFAULT_SEEKS,
  280. };
  281. /*
  282. * mb_cache_create() create a new cache
  283. *
  284. * All entries in one cache are equal size. Cache entries may be from
  285. * multiple devices. If this is the first mbcache created, registers
  286. * the cache with kernel memory management. Returns NULL if no more
  287. * memory was available.
  288. *
  289. * @name: name of the cache (informal)
  290. * @bucket_bits: log2(number of hash buckets)
  291. */
  292. struct mb_cache *
  293. mb_cache_create(const char *name, int bucket_bits)
  294. {
  295. int n, bucket_count = 1 << bucket_bits;
  296. struct mb_cache *cache = NULL;
  297. if (!mb_cache_bg_lock) {
  298. mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock),
  299. GFP_KERNEL);
  300. if (!mb_cache_bg_lock)
  301. return NULL;
  302. bgl_lock_init(mb_cache_bg_lock);
  303. }
  304. cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL);
  305. if (!cache)
  306. return NULL;
  307. cache->c_name = name;
  308. atomic_set(&cache->c_entry_count, 0);
  309. cache->c_bucket_bits = bucket_bits;
  310. cache->c_block_hash = kmalloc(bucket_count *
  311. sizeof(struct hlist_bl_head), GFP_KERNEL);
  312. if (!cache->c_block_hash)
  313. goto fail;
  314. for (n=0; n<bucket_count; n++)
  315. INIT_HLIST_BL_HEAD(&cache->c_block_hash[n]);
  316. cache->c_index_hash = kmalloc(bucket_count *
  317. sizeof(struct hlist_bl_head), GFP_KERNEL);
  318. if (!cache->c_index_hash)
  319. goto fail;
  320. for (n=0; n<bucket_count; n++)
  321. INIT_HLIST_BL_HEAD(&cache->c_index_hash[n]);
  322. if (!mb_cache_kmem_cache) {
  323. mb_cache_kmem_cache = kmem_cache_create(name,
  324. sizeof(struct mb_cache_entry), 0,
  325. SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
  326. if (!mb_cache_kmem_cache)
  327. goto fail2;
  328. }
  329. cache->c_entry_cache = mb_cache_kmem_cache;
  330. /*
  331. * Set an upper limit on the number of cache entries so that the hash
  332. * chains won't grow too long.
  333. */
  334. cache->c_max_entries = bucket_count << 4;
  335. spin_lock(&mb_cache_spinlock);
  336. list_add(&cache->c_cache_list, &mb_cache_list);
  337. spin_unlock(&mb_cache_spinlock);
  338. return cache;
  339. fail2:
  340. kfree(cache->c_index_hash);
  341. fail:
  342. kfree(cache->c_block_hash);
  343. kfree(cache);
  344. return NULL;
  345. }
  346. /*
  347. * mb_cache_shrink()
  348. *
  349. * Removes all cache entries of a device from the cache. All cache entries
  350. * currently in use cannot be freed, and thus remain in the cache. All others
  351. * are freed.
  352. *
  353. * @bdev: which device's cache entries to shrink
  354. */
  355. void
  356. mb_cache_shrink(struct block_device *bdev)
  357. {
  358. LIST_HEAD(free_list);
  359. struct list_head *l;
  360. struct mb_cache_entry *ce, *tmp;
  361. l = &mb_cache_lru_list;
  362. spin_lock(&mb_cache_spinlock);
  363. while (!list_is_last(l, &mb_cache_lru_list)) {
  364. l = l->next;
  365. ce = list_entry(l, struct mb_cache_entry, e_lru_list);
  366. if (ce->e_bdev == bdev) {
  367. list_del_init(&ce->e_lru_list);
  368. if (ce->e_used || ce->e_queued ||
  369. atomic_read(&ce->e_refcnt))
  370. continue;
  371. spin_unlock(&mb_cache_spinlock);
  372. /*
  373. * Prevent any find or get operation on the entry.
  374. */
  375. hlist_bl_lock(ce->e_block_hash_p);
  376. hlist_bl_lock(ce->e_index_hash_p);
  377. /* Ignore if it is touched by a find/get */
  378. if (ce->e_used || ce->e_queued ||
  379. atomic_read(&ce->e_refcnt) ||
  380. !list_empty(&ce->e_lru_list)) {
  381. hlist_bl_unlock(ce->e_index_hash_p);
  382. hlist_bl_unlock(ce->e_block_hash_p);
  383. l = &mb_cache_lru_list;
  384. spin_lock(&mb_cache_spinlock);
  385. continue;
  386. }
  387. __mb_cache_entry_unhash_unlock(ce);
  388. mb_assert(!(ce->e_used || ce->e_queued ||
  389. atomic_read(&ce->e_refcnt)));
  390. list_add_tail(&ce->e_lru_list, &free_list);
  391. l = &mb_cache_lru_list;
  392. spin_lock(&mb_cache_spinlock);
  393. }
  394. }
  395. spin_unlock(&mb_cache_spinlock);
  396. list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
  397. __mb_cache_entry_forget(ce, GFP_KERNEL);
  398. }
  399. }
  400. /*
  401. * mb_cache_destroy()
  402. *
  403. * Shrinks the cache to its minimum possible size (hopefully 0 entries),
  404. * and then destroys it. If this was the last mbcache, un-registers the
  405. * mbcache from kernel memory management.
  406. */
  407. void
  408. mb_cache_destroy(struct mb_cache *cache)
  409. {
  410. LIST_HEAD(free_list);
  411. struct mb_cache_entry *ce, *tmp;
  412. spin_lock(&mb_cache_spinlock);
  413. list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) {
  414. if (ce->e_cache == cache)
  415. list_move_tail(&ce->e_lru_list, &free_list);
  416. }
  417. list_del(&cache->c_cache_list);
  418. spin_unlock(&mb_cache_spinlock);
  419. list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
  420. list_del_init(&ce->e_lru_list);
  421. /*
  422. * Prevent any find or get operation on the entry.
  423. */
  424. hlist_bl_lock(ce->e_block_hash_p);
  425. hlist_bl_lock(ce->e_index_hash_p);
  426. mb_assert(!(ce->e_used || ce->e_queued ||
  427. atomic_read(&ce->e_refcnt)));
  428. __mb_cache_entry_unhash_unlock(ce);
  429. __mb_cache_entry_forget(ce, GFP_KERNEL);
  430. }
  431. if (atomic_read(&cache->c_entry_count) > 0) {
  432. mb_error("cache %s: %d orphaned entries",
  433. cache->c_name,
  434. atomic_read(&cache->c_entry_count));
  435. }
  436. if (list_empty(&mb_cache_list)) {
  437. kmem_cache_destroy(mb_cache_kmem_cache);
  438. mb_cache_kmem_cache = NULL;
  439. }
  440. kfree(cache->c_index_hash);
  441. kfree(cache->c_block_hash);
  442. kfree(cache);
  443. }
  444. /*
  445. * mb_cache_entry_alloc()
  446. *
  447. * Allocates a new cache entry. The new entry will not be valid initially,
  448. * and thus cannot be looked up yet. It should be filled with data, and
  449. * then inserted into the cache using mb_cache_entry_insert(). Returns NULL
  450. * if no more memory was available.
  451. */
  452. struct mb_cache_entry *
  453. mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
  454. {
  455. struct mb_cache_entry *ce;
  456. if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) {
  457. struct list_head *l;
  458. l = &mb_cache_lru_list;
  459. spin_lock(&mb_cache_spinlock);
  460. while (!list_is_last(l, &mb_cache_lru_list)) {
  461. l = l->next;
  462. ce = list_entry(l, struct mb_cache_entry, e_lru_list);
  463. if (ce->e_cache == cache) {
  464. list_del_init(&ce->e_lru_list);
  465. if (ce->e_used || ce->e_queued ||
  466. atomic_read(&ce->e_refcnt))
  467. continue;
  468. spin_unlock(&mb_cache_spinlock);
  469. /*
  470. * Prevent any find or get operation on the
  471. * entry.
  472. */
  473. hlist_bl_lock(ce->e_block_hash_p);
  474. hlist_bl_lock(ce->e_index_hash_p);
  475. /* Ignore if it is touched by a find/get */
  476. if (ce->e_used || ce->e_queued ||
  477. atomic_read(&ce->e_refcnt) ||
  478. !list_empty(&ce->e_lru_list)) {
  479. hlist_bl_unlock(ce->e_index_hash_p);
  480. hlist_bl_unlock(ce->e_block_hash_p);
  481. l = &mb_cache_lru_list;
  482. spin_lock(&mb_cache_spinlock);
  483. continue;
  484. }
  485. mb_assert(list_empty(&ce->e_lru_list));
  486. mb_assert(!(ce->e_used || ce->e_queued ||
  487. atomic_read(&ce->e_refcnt)));
  488. __mb_cache_entry_unhash_unlock(ce);
  489. goto found;
  490. }
  491. }
  492. spin_unlock(&mb_cache_spinlock);
  493. }
  494. ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
  495. if (!ce)
  496. return NULL;
  497. atomic_inc(&cache->c_entry_count);
  498. INIT_LIST_HEAD(&ce->e_lru_list);
  499. INIT_HLIST_BL_NODE(&ce->e_block_list);
  500. INIT_HLIST_BL_NODE(&ce->e_index.o_list);
  501. ce->e_cache = cache;
  502. ce->e_queued = 0;
  503. atomic_set(&ce->e_refcnt, 0);
  504. found:
  505. ce->e_block_hash_p = &cache->c_block_hash[0];
  506. ce->e_index_hash_p = &cache->c_index_hash[0];
  507. ce->e_used = 1 + MB_CACHE_WRITER;
  508. return ce;
  509. }
  510. /*
  511. * mb_cache_entry_insert()
  512. *
  513. * Inserts an entry that was allocated using mb_cache_entry_alloc() into
  514. * the cache. After this, the cache entry can be looked up, but is not yet
  515. * in the lru list as the caller still holds a handle to it. Returns 0 on
  516. * success, or -EBUSY if a cache entry for that device + inode exists
  517. * already (this may happen after a failed lookup, but when another process
  518. * has inserted the same cache entry in the meantime).
  519. *
  520. * @bdev: device the cache entry belongs to
  521. * @block: block number
  522. * @key: lookup key
  523. */
  524. int
  525. mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
  526. sector_t block, unsigned int key)
  527. {
  528. struct mb_cache *cache = ce->e_cache;
  529. unsigned int bucket;
  530. struct hlist_bl_node *l;
  531. struct hlist_bl_head *block_hash_p;
  532. struct hlist_bl_head *index_hash_p;
  533. struct mb_cache_entry *lce;
  534. mb_assert(ce);
  535. bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
  536. cache->c_bucket_bits);
  537. block_hash_p = &cache->c_block_hash[bucket];
  538. hlist_bl_lock(block_hash_p);
  539. hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) {
  540. if (lce->e_bdev == bdev && lce->e_block == block) {
  541. hlist_bl_unlock(block_hash_p);
  542. return -EBUSY;
  543. }
  544. }
  545. mb_assert(!__mb_cache_entry_is_block_hashed(ce));
  546. __mb_cache_entry_unhash_block(ce);
  547. __mb_cache_entry_unhash_index(ce);
  548. ce->e_bdev = bdev;
  549. ce->e_block = block;
  550. ce->e_block_hash_p = block_hash_p;
  551. ce->e_index.o_key = key;
  552. hlist_bl_add_head(&ce->e_block_list, block_hash_p);
  553. hlist_bl_unlock(block_hash_p);
  554. bucket = hash_long(key, cache->c_bucket_bits);
  555. index_hash_p = &cache->c_index_hash[bucket];
  556. hlist_bl_lock(index_hash_p);
  557. ce->e_index_hash_p = index_hash_p;
  558. hlist_bl_add_head(&ce->e_index.o_list, index_hash_p);
  559. hlist_bl_unlock(index_hash_p);
  560. return 0;
  561. }
  562. /*
  563. * mb_cache_entry_release()
  564. *
  565. * Release a handle to a cache entry. When the last handle to a cache entry
  566. * is released it is either freed (if it is invalid) or otherwise inserted
  567. * in to the lru list.
  568. */
  569. void
  570. mb_cache_entry_release(struct mb_cache_entry *ce)
  571. {
  572. __mb_cache_entry_release(ce);
  573. }
  574. /*
  575. * mb_cache_entry_free()
  576. *
  577. */
  578. void
  579. mb_cache_entry_free(struct mb_cache_entry *ce)
  580. {
  581. mb_assert(ce);
  582. mb_assert(list_empty(&ce->e_lru_list));
  583. hlist_bl_lock(ce->e_index_hash_p);
  584. __mb_cache_entry_unhash_index(ce);
  585. hlist_bl_unlock(ce->e_index_hash_p);
  586. hlist_bl_lock(ce->e_block_hash_p);
  587. __mb_cache_entry_unhash_block(ce);
  588. hlist_bl_unlock(ce->e_block_hash_p);
  589. __mb_cache_entry_release(ce);
  590. }
  591. /*
  592. * mb_cache_entry_get()
  593. *
  594. * Get a cache entry by device / block number. (There can only be one entry
  595. * in the cache per device and block.) Returns NULL if no such cache entry
  596. * exists. The returned cache entry is locked for exclusive access ("single
  597. * writer").
  598. */
  599. struct mb_cache_entry *
  600. mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
  601. sector_t block)
  602. {
  603. unsigned int bucket;
  604. struct hlist_bl_node *l;
  605. struct mb_cache_entry *ce;
  606. struct hlist_bl_head *block_hash_p;
  607. bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
  608. cache->c_bucket_bits);
  609. block_hash_p = &cache->c_block_hash[bucket];
  610. /* First serialize access to the block corresponding hash chain. */
  611. hlist_bl_lock(block_hash_p);
  612. hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) {
  613. mb_assert(ce->e_block_hash_p == block_hash_p);
  614. if (ce->e_bdev == bdev && ce->e_block == block) {
  615. /*
  616. * Prevent a free from removing the entry.
  617. */
  618. atomic_inc(&ce->e_refcnt);
  619. hlist_bl_unlock(block_hash_p);
  620. __spin_lock_mb_cache_entry(ce);
  621. atomic_dec(&ce->e_refcnt);
  622. if (ce->e_used > 0) {
  623. DEFINE_WAIT(wait);
  624. while (ce->e_used > 0) {
  625. ce->e_queued++;
  626. prepare_to_wait(&mb_cache_queue, &wait,
  627. TASK_UNINTERRUPTIBLE);
  628. __spin_unlock_mb_cache_entry(ce);
  629. schedule();
  630. __spin_lock_mb_cache_entry(ce);
  631. ce->e_queued--;
  632. }
  633. finish_wait(&mb_cache_queue, &wait);
  634. }
  635. ce->e_used += 1 + MB_CACHE_WRITER;
  636. __spin_unlock_mb_cache_entry(ce);
  637. if (!list_empty(&ce->e_lru_list)) {
  638. spin_lock(&mb_cache_spinlock);
  639. list_del_init(&ce->e_lru_list);
  640. spin_unlock(&mb_cache_spinlock);
  641. }
  642. if (!__mb_cache_entry_is_block_hashed(ce)) {
  643. __mb_cache_entry_release(ce);
  644. return NULL;
  645. }
  646. return ce;
  647. }
  648. }
  649. hlist_bl_unlock(block_hash_p);
  650. return NULL;
  651. }
  652. #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
  653. static struct mb_cache_entry *
  654. __mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head,
  655. struct block_device *bdev, unsigned int key)
  656. {
  657. /* The index hash chain is alredy acquire by caller. */
  658. while (l != NULL) {
  659. struct mb_cache_entry *ce =
  660. hlist_bl_entry(l, struct mb_cache_entry,
  661. e_index.o_list);
  662. mb_assert(ce->e_index_hash_p == head);
  663. if (ce->e_bdev == bdev && ce->e_index.o_key == key) {
  664. /*
  665. * Prevent a free from removing the entry.
  666. */
  667. atomic_inc(&ce->e_refcnt);
  668. hlist_bl_unlock(head);
  669. __spin_lock_mb_cache_entry(ce);
  670. atomic_dec(&ce->e_refcnt);
  671. ce->e_used++;
  672. /* Incrementing before holding the lock gives readers
  673. priority over writers. */
  674. if (ce->e_used >= MB_CACHE_WRITER) {
  675. DEFINE_WAIT(wait);
  676. while (ce->e_used >= MB_CACHE_WRITER) {
  677. ce->e_queued++;
  678. prepare_to_wait(&mb_cache_queue, &wait,
  679. TASK_UNINTERRUPTIBLE);
  680. __spin_unlock_mb_cache_entry(ce);
  681. schedule();
  682. __spin_lock_mb_cache_entry(ce);
  683. ce->e_queued--;
  684. }
  685. finish_wait(&mb_cache_queue, &wait);
  686. }
  687. __spin_unlock_mb_cache_entry(ce);
  688. if (!list_empty(&ce->e_lru_list)) {
  689. spin_lock(&mb_cache_spinlock);
  690. list_del_init(&ce->e_lru_list);
  691. spin_unlock(&mb_cache_spinlock);
  692. }
  693. if (!__mb_cache_entry_is_block_hashed(ce)) {
  694. __mb_cache_entry_release(ce);
  695. return ERR_PTR(-EAGAIN);
  696. }
  697. return ce;
  698. }
  699. l = l->next;
  700. }
  701. hlist_bl_unlock(head);
  702. return NULL;
  703. }
  704. /*
  705. * mb_cache_entry_find_first()
  706. *
  707. * Find the first cache entry on a given device with a certain key in
  708. * an additional index. Additional matches can be found with
  709. * mb_cache_entry_find_next(). Returns NULL if no match was found. The
  710. * returned cache entry is locked for shared access ("multiple readers").
  711. *
  712. * @cache: the cache to search
  713. * @bdev: the device the cache entry should belong to
  714. * @key: the key in the index
  715. */
  716. struct mb_cache_entry *
  717. mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev,
  718. unsigned int key)
  719. {
  720. unsigned int bucket = hash_long(key, cache->c_bucket_bits);
  721. struct hlist_bl_node *l;
  722. struct mb_cache_entry *ce = NULL;
  723. struct hlist_bl_head *index_hash_p;
  724. index_hash_p = &cache->c_index_hash[bucket];
  725. hlist_bl_lock(index_hash_p);
  726. if (!hlist_bl_empty(index_hash_p)) {
  727. l = hlist_bl_first(index_hash_p);
  728. ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
  729. } else
  730. hlist_bl_unlock(index_hash_p);
  731. return ce;
  732. }
  733. /*
  734. * mb_cache_entry_find_next()
  735. *
  736. * Find the next cache entry on a given device with a certain key in an
  737. * additional index. Returns NULL if no match could be found. The previous
  738. * entry is atomatically released, so that mb_cache_entry_find_next() can
  739. * be called like this:
  740. *
  741. * entry = mb_cache_entry_find_first();
  742. * while (entry) {
  743. * ...
  744. * entry = mb_cache_entry_find_next(entry, ...);
  745. * }
  746. *
  747. * @prev: The previous match
  748. * @bdev: the device the cache entry should belong to
  749. * @key: the key in the index
  750. */
  751. struct mb_cache_entry *
  752. mb_cache_entry_find_next(struct mb_cache_entry *prev,
  753. struct block_device *bdev, unsigned int key)
  754. {
  755. struct mb_cache *cache = prev->e_cache;
  756. unsigned int bucket = hash_long(key, cache->c_bucket_bits);
  757. struct hlist_bl_node *l;
  758. struct mb_cache_entry *ce;
  759. struct hlist_bl_head *index_hash_p;
  760. index_hash_p = &cache->c_index_hash[bucket];
  761. mb_assert(prev->e_index_hash_p == index_hash_p);
  762. hlist_bl_lock(index_hash_p);
  763. mb_assert(!hlist_bl_empty(index_hash_p));
  764. l = prev->e_index.o_list.next;
  765. ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
  766. __mb_cache_entry_release(prev);
  767. return ce;
  768. }
  769. #endif /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */
  770. static int __init init_mbcache(void)
  771. {
  772. register_shrinker(&mb_cache_shrinker);
  773. return 0;
  774. }
  775. static void __exit exit_mbcache(void)
  776. {
  777. unregister_shrinker(&mb_cache_shrinker);
  778. }
  779. module_init(init_mbcache)
  780. module_exit(exit_mbcache)