list_lru.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637
  1. /*
  2. * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
  3. * Authors: David Chinner and Glauber Costa
  4. *
  5. * Generic LRU infrastructure
  6. */
  7. #include <linux/kernel.h>
  8. #include <linux/module.h>
  9. #include <linux/mm.h>
  10. #include <linux/list_lru.h>
  11. #include <linux/slab.h>
  12. #include <linux/mutex.h>
  13. #include <linux/memcontrol.h>
  14. #ifdef CONFIG_MEMCG_KMEM
  15. static LIST_HEAD(list_lrus);
  16. static DEFINE_MUTEX(list_lrus_mutex);
  17. static void list_lru_register(struct list_lru *lru)
  18. {
  19. mutex_lock(&list_lrus_mutex);
  20. list_add(&lru->list, &list_lrus);
  21. mutex_unlock(&list_lrus_mutex);
  22. }
  23. static void list_lru_unregister(struct list_lru *lru)
  24. {
  25. mutex_lock(&list_lrus_mutex);
  26. list_del(&lru->list);
  27. mutex_unlock(&list_lrus_mutex);
  28. }
  29. static int lru_shrinker_id(struct list_lru *lru)
  30. {
  31. return lru->shrinker_id;
  32. }
  33. static inline bool list_lru_memcg_aware(struct list_lru *lru)
  34. {
  35. /*
  36. * This needs node 0 to be always present, even
  37. * in the systems supporting sparse numa ids.
  38. */
  39. return !!lru->node[0].memcg_lrus;
  40. }
  41. static inline struct list_lru_one *
  42. list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
  43. {
  44. struct list_lru_memcg *memcg_lrus;
  45. /*
  46. * Either lock or RCU protects the array of per cgroup lists
  47. * from relocation (see memcg_update_list_lru_node).
  48. */
  49. memcg_lrus = rcu_dereference_check(nlru->memcg_lrus,
  50. lockdep_is_held(&nlru->lock));
  51. if (memcg_lrus && idx >= 0)
  52. return memcg_lrus->lru[idx];
  53. return &nlru->lru;
  54. }
  55. static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
  56. {
  57. struct page *page;
  58. if (!memcg_kmem_enabled())
  59. return NULL;
  60. page = virt_to_head_page(ptr);
  61. return page->mem_cgroup;
  62. }
  63. static inline struct list_lru_one *
  64. list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
  65. struct mem_cgroup **memcg_ptr)
  66. {
  67. struct list_lru_one *l = &nlru->lru;
  68. struct mem_cgroup *memcg = NULL;
  69. if (!nlru->memcg_lrus)
  70. goto out;
  71. memcg = mem_cgroup_from_kmem(ptr);
  72. if (!memcg)
  73. goto out;
  74. l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
  75. out:
  76. if (memcg_ptr)
  77. *memcg_ptr = memcg;
  78. return l;
  79. }
  80. #else
  81. static void list_lru_register(struct list_lru *lru)
  82. {
  83. }
  84. static void list_lru_unregister(struct list_lru *lru)
  85. {
  86. }
  87. static int lru_shrinker_id(struct list_lru *lru)
  88. {
  89. return -1;
  90. }
  91. static inline bool list_lru_memcg_aware(struct list_lru *lru)
  92. {
  93. return false;
  94. }
  95. static inline struct list_lru_one *
  96. list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
  97. {
  98. return &nlru->lru;
  99. }
  100. static inline struct list_lru_one *
  101. list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
  102. struct mem_cgroup **memcg_ptr)
  103. {
  104. if (memcg_ptr)
  105. *memcg_ptr = NULL;
  106. return &nlru->lru;
  107. }
  108. #endif /* CONFIG_MEMCG_KMEM */
  109. bool list_lru_add(struct list_lru *lru, struct list_head *item)
  110. {
  111. int nid = page_to_nid(virt_to_page(item));
  112. struct list_lru_node *nlru = &lru->node[nid];
  113. struct mem_cgroup *memcg;
  114. struct list_lru_one *l;
  115. spin_lock(&nlru->lock);
  116. if (list_empty(item)) {
  117. l = list_lru_from_kmem(nlru, item, &memcg);
  118. list_add_tail(item, &l->list);
  119. /* Set shrinker bit if the first element was added */
  120. if (!l->nr_items++)
  121. memcg_set_shrinker_bit(memcg, nid,
  122. lru_shrinker_id(lru));
  123. nlru->nr_items++;
  124. spin_unlock(&nlru->lock);
  125. return true;
  126. }
  127. spin_unlock(&nlru->lock);
  128. return false;
  129. }
  130. EXPORT_SYMBOL_GPL(list_lru_add);
  131. bool list_lru_del(struct list_lru *lru, struct list_head *item)
  132. {
  133. int nid = page_to_nid(virt_to_page(item));
  134. struct list_lru_node *nlru = &lru->node[nid];
  135. struct list_lru_one *l;
  136. spin_lock(&nlru->lock);
  137. if (!list_empty(item)) {
  138. l = list_lru_from_kmem(nlru, item, NULL);
  139. list_del_init(item);
  140. l->nr_items--;
  141. nlru->nr_items--;
  142. spin_unlock(&nlru->lock);
  143. return true;
  144. }
  145. spin_unlock(&nlru->lock);
  146. return false;
  147. }
  148. EXPORT_SYMBOL_GPL(list_lru_del);
  149. void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
  150. {
  151. list_del_init(item);
  152. list->nr_items--;
  153. }
  154. EXPORT_SYMBOL_GPL(list_lru_isolate);
  155. void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
  156. struct list_head *head)
  157. {
  158. list_move(item, head);
  159. list->nr_items--;
  160. }
  161. EXPORT_SYMBOL_GPL(list_lru_isolate_move);
  162. unsigned long list_lru_count_one(struct list_lru *lru,
  163. int nid, struct mem_cgroup *memcg)
  164. {
  165. struct list_lru_node *nlru = &lru->node[nid];
  166. struct list_lru_one *l;
  167. unsigned long count;
  168. rcu_read_lock();
  169. l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
  170. count = l->nr_items;
  171. rcu_read_unlock();
  172. return count;
  173. }
  174. EXPORT_SYMBOL_GPL(list_lru_count_one);
  175. unsigned long list_lru_count_node(struct list_lru *lru, int nid)
  176. {
  177. struct list_lru_node *nlru;
  178. nlru = &lru->node[nid];
  179. return nlru->nr_items;
  180. }
  181. EXPORT_SYMBOL_GPL(list_lru_count_node);
  182. static unsigned long
  183. __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
  184. list_lru_walk_cb isolate, void *cb_arg,
  185. unsigned long *nr_to_walk)
  186. {
  187. struct list_lru_node *nlru = &lru->node[nid];
  188. struct list_lru_one *l;
  189. struct list_head *item, *n;
  190. unsigned long isolated = 0;
  191. spin_lock(&nlru->lock);
  192. l = list_lru_from_memcg_idx(nlru, memcg_idx);
  193. restart:
  194. list_for_each_safe(item, n, &l->list) {
  195. enum lru_status ret;
  196. /*
  197. * decrement nr_to_walk first so that we don't livelock if we
  198. * get stuck on large numbesr of LRU_RETRY items
  199. */
  200. if (!*nr_to_walk)
  201. break;
  202. --*nr_to_walk;
  203. ret = isolate(item, l, &nlru->lock, cb_arg);
  204. switch (ret) {
  205. case LRU_REMOVED_RETRY:
  206. assert_spin_locked(&nlru->lock);
  207. /* fall through */
  208. case LRU_REMOVED:
  209. isolated++;
  210. nlru->nr_items--;
  211. /*
  212. * If the lru lock has been dropped, our list
  213. * traversal is now invalid and so we have to
  214. * restart from scratch.
  215. */
  216. if (ret == LRU_REMOVED_RETRY)
  217. goto restart;
  218. break;
  219. case LRU_ROTATE:
  220. list_move_tail(item, &l->list);
  221. break;
  222. case LRU_SKIP:
  223. break;
  224. case LRU_RETRY:
  225. /*
  226. * The lru lock has been dropped, our list traversal is
  227. * now invalid and so we have to restart from scratch.
  228. */
  229. assert_spin_locked(&nlru->lock);
  230. goto restart;
  231. default:
  232. BUG();
  233. }
  234. }
  235. spin_unlock(&nlru->lock);
  236. return isolated;
  237. }
  238. unsigned long
  239. list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
  240. list_lru_walk_cb isolate, void *cb_arg,
  241. unsigned long *nr_to_walk)
  242. {
  243. return __list_lru_walk_one(lru, nid, memcg_cache_id(memcg),
  244. isolate, cb_arg, nr_to_walk);
  245. }
  246. EXPORT_SYMBOL_GPL(list_lru_walk_one);
  247. unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
  248. list_lru_walk_cb isolate, void *cb_arg,
  249. unsigned long *nr_to_walk)
  250. {
  251. long isolated = 0;
  252. int memcg_idx;
  253. isolated += __list_lru_walk_one(lru, nid, -1, isolate, cb_arg,
  254. nr_to_walk);
  255. if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
  256. for_each_memcg_cache_index(memcg_idx) {
  257. isolated += __list_lru_walk_one(lru, nid, memcg_idx,
  258. isolate, cb_arg, nr_to_walk);
  259. if (*nr_to_walk <= 0)
  260. break;
  261. }
  262. }
  263. return isolated;
  264. }
  265. EXPORT_SYMBOL_GPL(list_lru_walk_node);
  266. static void init_one_lru(struct list_lru_one *l)
  267. {
  268. INIT_LIST_HEAD(&l->list);
  269. l->nr_items = 0;
  270. }
  271. #ifdef CONFIG_MEMCG_KMEM
  272. static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
  273. int begin, int end)
  274. {
  275. int i;
  276. for (i = begin; i < end; i++)
  277. kfree(memcg_lrus->lru[i]);
  278. }
  279. static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
  280. int begin, int end)
  281. {
  282. int i;
  283. for (i = begin; i < end; i++) {
  284. struct list_lru_one *l;
  285. l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL);
  286. if (!l)
  287. goto fail;
  288. init_one_lru(l);
  289. memcg_lrus->lru[i] = l;
  290. }
  291. return 0;
  292. fail:
  293. __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1);
  294. return -ENOMEM;
  295. }
  296. static int memcg_init_list_lru_node(struct list_lru_node *nlru)
  297. {
  298. struct list_lru_memcg *memcg_lrus;
  299. int size = memcg_nr_cache_ids;
  300. memcg_lrus = kvmalloc(sizeof(*memcg_lrus) +
  301. size * sizeof(void *), GFP_KERNEL);
  302. if (!memcg_lrus)
  303. return -ENOMEM;
  304. if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) {
  305. kvfree(memcg_lrus);
  306. return -ENOMEM;
  307. }
  308. RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus);
  309. return 0;
  310. }
  311. static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
  312. {
  313. struct list_lru_memcg *memcg_lrus;
  314. /*
  315. * This is called when shrinker has already been unregistered,
  316. * and nobody can use it. So, there is no need to use kvfree_rcu().
  317. */
  318. memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true);
  319. __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids);
  320. kvfree(memcg_lrus);
  321. }
  322. static void kvfree_rcu(struct rcu_head *head)
  323. {
  324. struct list_lru_memcg *mlru;
  325. mlru = container_of(head, struct list_lru_memcg, rcu);
  326. kvfree(mlru);
  327. }
  328. static int memcg_update_list_lru_node(struct list_lru_node *nlru,
  329. int old_size, int new_size)
  330. {
  331. struct list_lru_memcg *old, *new;
  332. BUG_ON(old_size > new_size);
  333. old = rcu_dereference_protected(nlru->memcg_lrus,
  334. lockdep_is_held(&list_lrus_mutex));
  335. new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL);
  336. if (!new)
  337. return -ENOMEM;
  338. if (__memcg_init_list_lru_node(new, old_size, new_size)) {
  339. kvfree(new);
  340. return -ENOMEM;
  341. }
  342. memcpy(&new->lru, &old->lru, old_size * sizeof(void *));
  343. /*
  344. * The locking below allows readers that hold nlru->lock avoid taking
  345. * rcu_read_lock (see list_lru_from_memcg_idx).
  346. *
  347. * Since list_lru_{add,del} may be called under an IRQ-safe lock,
  348. * we have to use IRQ-safe primitives here to avoid deadlock.
  349. */
  350. spin_lock_irq(&nlru->lock);
  351. rcu_assign_pointer(nlru->memcg_lrus, new);
  352. spin_unlock_irq(&nlru->lock);
  353. call_rcu(&old->rcu, kvfree_rcu);
  354. return 0;
  355. }
  356. static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
  357. int old_size, int new_size)
  358. {
  359. struct list_lru_memcg *memcg_lrus;
  360. memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus,
  361. lockdep_is_held(&list_lrus_mutex));
  362. /* do not bother shrinking the array back to the old size, because we
  363. * cannot handle allocation failures here */
  364. __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size);
  365. }
  366. static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
  367. {
  368. int i;
  369. if (!memcg_aware)
  370. return 0;
  371. for_each_node(i) {
  372. if (memcg_init_list_lru_node(&lru->node[i]))
  373. goto fail;
  374. }
  375. return 0;
  376. fail:
  377. for (i = i - 1; i >= 0; i--) {
  378. if (!lru->node[i].memcg_lrus)
  379. continue;
  380. memcg_destroy_list_lru_node(&lru->node[i]);
  381. }
  382. return -ENOMEM;
  383. }
  384. static void memcg_destroy_list_lru(struct list_lru *lru)
  385. {
  386. int i;
  387. if (!list_lru_memcg_aware(lru))
  388. return;
  389. for_each_node(i)
  390. memcg_destroy_list_lru_node(&lru->node[i]);
  391. }
  392. static int memcg_update_list_lru(struct list_lru *lru,
  393. int old_size, int new_size)
  394. {
  395. int i;
  396. if (!list_lru_memcg_aware(lru))
  397. return 0;
  398. for_each_node(i) {
  399. if (memcg_update_list_lru_node(&lru->node[i],
  400. old_size, new_size))
  401. goto fail;
  402. }
  403. return 0;
  404. fail:
  405. for (i = i - 1; i >= 0; i--) {
  406. if (!lru->node[i].memcg_lrus)
  407. continue;
  408. memcg_cancel_update_list_lru_node(&lru->node[i],
  409. old_size, new_size);
  410. }
  411. return -ENOMEM;
  412. }
  413. static void memcg_cancel_update_list_lru(struct list_lru *lru,
  414. int old_size, int new_size)
  415. {
  416. int i;
  417. if (!list_lru_memcg_aware(lru))
  418. return;
  419. for_each_node(i)
  420. memcg_cancel_update_list_lru_node(&lru->node[i],
  421. old_size, new_size);
  422. }
  423. int memcg_update_all_list_lrus(int new_size)
  424. {
  425. int ret = 0;
  426. struct list_lru *lru;
  427. int old_size = memcg_nr_cache_ids;
  428. mutex_lock(&list_lrus_mutex);
  429. list_for_each_entry(lru, &list_lrus, list) {
  430. ret = memcg_update_list_lru(lru, old_size, new_size);
  431. if (ret)
  432. goto fail;
  433. }
  434. out:
  435. mutex_unlock(&list_lrus_mutex);
  436. return ret;
  437. fail:
  438. list_for_each_entry_continue_reverse(lru, &list_lrus, list)
  439. memcg_cancel_update_list_lru(lru, old_size, new_size);
  440. goto out;
  441. }
  442. static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
  443. int src_idx, struct mem_cgroup *dst_memcg)
  444. {
  445. struct list_lru_node *nlru = &lru->node[nid];
  446. int dst_idx = dst_memcg->kmemcg_id;
  447. struct list_lru_one *src, *dst;
  448. bool set;
  449. /*
  450. * Since list_lru_{add,del} may be called under an IRQ-safe lock,
  451. * we have to use IRQ-safe primitives here to avoid deadlock.
  452. */
  453. spin_lock_irq(&nlru->lock);
  454. src = list_lru_from_memcg_idx(nlru, src_idx);
  455. dst = list_lru_from_memcg_idx(nlru, dst_idx);
  456. list_splice_init(&src->list, &dst->list);
  457. set = (!dst->nr_items && src->nr_items);
  458. dst->nr_items += src->nr_items;
  459. if (set)
  460. memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
  461. src->nr_items = 0;
  462. spin_unlock_irq(&nlru->lock);
  463. }
  464. static void memcg_drain_list_lru(struct list_lru *lru,
  465. int src_idx, struct mem_cgroup *dst_memcg)
  466. {
  467. int i;
  468. if (!list_lru_memcg_aware(lru))
  469. return;
  470. for_each_node(i)
  471. memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg);
  472. }
  473. void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg)
  474. {
  475. struct list_lru *lru;
  476. mutex_lock(&list_lrus_mutex);
  477. list_for_each_entry(lru, &list_lrus, list)
  478. memcg_drain_list_lru(lru, src_idx, dst_memcg);
  479. mutex_unlock(&list_lrus_mutex);
  480. }
  481. #else
  482. static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
  483. {
  484. return 0;
  485. }
  486. static void memcg_destroy_list_lru(struct list_lru *lru)
  487. {
  488. }
  489. #endif /* CONFIG_MEMCG_KMEM */
  490. int __list_lru_init(struct list_lru *lru, bool memcg_aware,
  491. struct lock_class_key *key, struct shrinker *shrinker)
  492. {
  493. int i;
  494. size_t size = sizeof(*lru->node) * nr_node_ids;
  495. int err = -ENOMEM;
  496. #ifdef CONFIG_MEMCG_KMEM
  497. if (shrinker)
  498. lru->shrinker_id = shrinker->id;
  499. else
  500. lru->shrinker_id = -1;
  501. #endif
  502. memcg_get_cache_ids();
  503. lru->node = kzalloc(size, GFP_KERNEL);
  504. if (!lru->node)
  505. goto out;
  506. for_each_node(i) {
  507. spin_lock_init(&lru->node[i].lock);
  508. if (key)
  509. lockdep_set_class(&lru->node[i].lock, key);
  510. init_one_lru(&lru->node[i].lru);
  511. }
  512. err = memcg_init_list_lru(lru, memcg_aware);
  513. if (err) {
  514. kfree(lru->node);
  515. /* Do this so a list_lru_destroy() doesn't crash: */
  516. lru->node = NULL;
  517. goto out;
  518. }
  519. list_lru_register(lru);
  520. out:
  521. memcg_put_cache_ids();
  522. return err;
  523. }
  524. EXPORT_SYMBOL_GPL(__list_lru_init);
  525. void list_lru_destroy(struct list_lru *lru)
  526. {
  527. /* Already destroyed or not yet initialized? */
  528. if (!lru->node)
  529. return;
  530. memcg_get_cache_ids();
  531. list_lru_unregister(lru);
  532. memcg_destroy_list_lru(lru);
  533. kfree(lru->node);
  534. lru->node = NULL;
  535. #ifdef CONFIG_MEMCG_KMEM
  536. lru->shrinker_id = -1;
  537. #endif
  538. memcg_put_cache_ids();
  539. }
  540. EXPORT_SYMBOL_GPL(list_lru_destroy);