list_lru.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660
  1. /*
  2. * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
  3. * Authors: David Chinner and Glauber Costa
  4. *
  5. * Generic LRU infrastructure
  6. */
  7. #include <linux/kernel.h>
  8. #include <linux/module.h>
  9. #include <linux/mm.h>
  10. #include <linux/list_lru.h>
  11. #include <linux/slab.h>
  12. #include <linux/mutex.h>
  13. #include <linux/memcontrol.h>
  14. #ifdef CONFIG_MEMCG_KMEM
  15. static LIST_HEAD(list_lrus);
  16. static DEFINE_MUTEX(list_lrus_mutex);
  17. static void list_lru_register(struct list_lru *lru)
  18. {
  19. mutex_lock(&list_lrus_mutex);
  20. list_add(&lru->list, &list_lrus);
  21. mutex_unlock(&list_lrus_mutex);
  22. }
  23. static void list_lru_unregister(struct list_lru *lru)
  24. {
  25. mutex_lock(&list_lrus_mutex);
  26. list_del(&lru->list);
  27. mutex_unlock(&list_lrus_mutex);
  28. }
  29. static int lru_shrinker_id(struct list_lru *lru)
  30. {
  31. return lru->shrinker_id;
  32. }
  33. static inline bool list_lru_memcg_aware(struct list_lru *lru)
  34. {
  35. /*
  36. * This needs node 0 to be always present, even
  37. * in the systems supporting sparse numa ids.
  38. */
  39. return !!lru->node[0].memcg_lrus;
  40. }
  41. static inline struct list_lru_one *
  42. list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
  43. {
  44. struct list_lru_memcg *memcg_lrus;
  45. /*
  46. * Either lock or RCU protects the array of per cgroup lists
  47. * from relocation (see memcg_update_list_lru_node).
  48. */
  49. memcg_lrus = rcu_dereference_check(nlru->memcg_lrus,
  50. lockdep_is_held(&nlru->lock));
  51. if (memcg_lrus && idx >= 0)
  52. return memcg_lrus->lru[idx];
  53. return &nlru->lru;
  54. }
  55. static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
  56. {
  57. struct page *page;
  58. if (!memcg_kmem_enabled())
  59. return NULL;
  60. page = virt_to_head_page(ptr);
  61. return page->mem_cgroup;
  62. }
  63. static inline struct list_lru_one *
  64. list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
  65. struct mem_cgroup **memcg_ptr)
  66. {
  67. struct list_lru_one *l = &nlru->lru;
  68. struct mem_cgroup *memcg = NULL;
  69. if (!nlru->memcg_lrus)
  70. goto out;
  71. memcg = mem_cgroup_from_kmem(ptr);
  72. if (!memcg)
  73. goto out;
  74. l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
  75. out:
  76. if (memcg_ptr)
  77. *memcg_ptr = memcg;
  78. return l;
  79. }
  80. #else
  81. static void list_lru_register(struct list_lru *lru)
  82. {
  83. }
  84. static void list_lru_unregister(struct list_lru *lru)
  85. {
  86. }
  87. static int lru_shrinker_id(struct list_lru *lru)
  88. {
  89. return -1;
  90. }
  91. static inline bool list_lru_memcg_aware(struct list_lru *lru)
  92. {
  93. return false;
  94. }
  95. static inline struct list_lru_one *
  96. list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
  97. {
  98. return &nlru->lru;
  99. }
  100. static inline struct list_lru_one *
  101. list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
  102. struct mem_cgroup **memcg_ptr)
  103. {
  104. if (memcg_ptr)
  105. *memcg_ptr = NULL;
  106. return &nlru->lru;
  107. }
  108. #endif /* CONFIG_MEMCG_KMEM */
  109. bool list_lru_add(struct list_lru *lru, struct list_head *item)
  110. {
  111. int nid = page_to_nid(virt_to_page(item));
  112. struct list_lru_node *nlru = &lru->node[nid];
  113. struct mem_cgroup *memcg;
  114. struct list_lru_one *l;
  115. spin_lock(&nlru->lock);
  116. if (list_empty(item)) {
  117. l = list_lru_from_kmem(nlru, item, &memcg);
  118. list_add_tail(item, &l->list);
  119. /* Set shrinker bit if the first element was added */
  120. if (!l->nr_items++)
  121. memcg_set_shrinker_bit(memcg, nid,
  122. lru_shrinker_id(lru));
  123. nlru->nr_items++;
  124. spin_unlock(&nlru->lock);
  125. return true;
  126. }
  127. spin_unlock(&nlru->lock);
  128. return false;
  129. }
  130. EXPORT_SYMBOL_GPL(list_lru_add);
  131. bool list_lru_del(struct list_lru *lru, struct list_head *item)
  132. {
  133. int nid = page_to_nid(virt_to_page(item));
  134. struct list_lru_node *nlru = &lru->node[nid];
  135. struct list_lru_one *l;
  136. spin_lock(&nlru->lock);
  137. if (!list_empty(item)) {
  138. l = list_lru_from_kmem(nlru, item, NULL);
  139. list_del_init(item);
  140. l->nr_items--;
  141. nlru->nr_items--;
  142. spin_unlock(&nlru->lock);
  143. return true;
  144. }
  145. spin_unlock(&nlru->lock);
  146. return false;
  147. }
  148. EXPORT_SYMBOL_GPL(list_lru_del);
  149. void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
  150. {
  151. list_del_init(item);
  152. list->nr_items--;
  153. }
  154. EXPORT_SYMBOL_GPL(list_lru_isolate);
  155. void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
  156. struct list_head *head)
  157. {
  158. list_move(item, head);
  159. list->nr_items--;
  160. }
  161. EXPORT_SYMBOL_GPL(list_lru_isolate_move);
  162. unsigned long list_lru_count_one(struct list_lru *lru,
  163. int nid, struct mem_cgroup *memcg)
  164. {
  165. struct list_lru_node *nlru = &lru->node[nid];
  166. struct list_lru_one *l;
  167. unsigned long count;
  168. rcu_read_lock();
  169. l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
  170. count = l->nr_items;
  171. rcu_read_unlock();
  172. return count;
  173. }
  174. EXPORT_SYMBOL_GPL(list_lru_count_one);
  175. unsigned long list_lru_count_node(struct list_lru *lru, int nid)
  176. {
  177. struct list_lru_node *nlru;
  178. nlru = &lru->node[nid];
  179. return nlru->nr_items;
  180. }
  181. EXPORT_SYMBOL_GPL(list_lru_count_node);
  182. static unsigned long
  183. __list_lru_walk_one(struct list_lru_node *nlru, int memcg_idx,
  184. list_lru_walk_cb isolate, void *cb_arg,
  185. unsigned long *nr_to_walk)
  186. {
  187. struct list_lru_one *l;
  188. struct list_head *item, *n;
  189. unsigned long isolated = 0;
  190. l = list_lru_from_memcg_idx(nlru, memcg_idx);
  191. restart:
  192. list_for_each_safe(item, n, &l->list) {
  193. enum lru_status ret;
  194. /*
  195. * decrement nr_to_walk first so that we don't livelock if we
  196. * get stuck on large numbesr of LRU_RETRY items
  197. */
  198. if (!*nr_to_walk)
  199. break;
  200. --*nr_to_walk;
  201. ret = isolate(item, l, &nlru->lock, cb_arg);
  202. switch (ret) {
  203. case LRU_REMOVED_RETRY:
  204. assert_spin_locked(&nlru->lock);
  205. /* fall through */
  206. case LRU_REMOVED:
  207. isolated++;
  208. nlru->nr_items--;
  209. /*
  210. * If the lru lock has been dropped, our list
  211. * traversal is now invalid and so we have to
  212. * restart from scratch.
  213. */
  214. if (ret == LRU_REMOVED_RETRY)
  215. goto restart;
  216. break;
  217. case LRU_ROTATE:
  218. list_move_tail(item, &l->list);
  219. break;
  220. case LRU_SKIP:
  221. break;
  222. case LRU_RETRY:
  223. /*
  224. * The lru lock has been dropped, our list traversal is
  225. * now invalid and so we have to restart from scratch.
  226. */
  227. assert_spin_locked(&nlru->lock);
  228. goto restart;
  229. default:
  230. BUG();
  231. }
  232. }
  233. return isolated;
  234. }
  235. unsigned long
  236. list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
  237. list_lru_walk_cb isolate, void *cb_arg,
  238. unsigned long *nr_to_walk)
  239. {
  240. struct list_lru_node *nlru = &lru->node[nid];
  241. unsigned long ret;
  242. spin_lock(&nlru->lock);
  243. ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
  244. nr_to_walk);
  245. spin_unlock(&nlru->lock);
  246. return ret;
  247. }
  248. EXPORT_SYMBOL_GPL(list_lru_walk_one);
  249. unsigned long
  250. list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
  251. list_lru_walk_cb isolate, void *cb_arg,
  252. unsigned long *nr_to_walk)
  253. {
  254. struct list_lru_node *nlru = &lru->node[nid];
  255. unsigned long ret;
  256. spin_lock_irq(&nlru->lock);
  257. ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
  258. nr_to_walk);
  259. spin_unlock_irq(&nlru->lock);
  260. return ret;
  261. }
  262. unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
  263. list_lru_walk_cb isolate, void *cb_arg,
  264. unsigned long *nr_to_walk)
  265. {
  266. long isolated = 0;
  267. int memcg_idx;
  268. isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
  269. nr_to_walk);
  270. if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
  271. for_each_memcg_cache_index(memcg_idx) {
  272. struct list_lru_node *nlru = &lru->node[nid];
  273. spin_lock(&nlru->lock);
  274. isolated += __list_lru_walk_one(nlru, memcg_idx,
  275. isolate, cb_arg,
  276. nr_to_walk);
  277. spin_unlock(&nlru->lock);
  278. if (*nr_to_walk <= 0)
  279. break;
  280. }
  281. }
  282. return isolated;
  283. }
  284. EXPORT_SYMBOL_GPL(list_lru_walk_node);
  285. static void init_one_lru(struct list_lru_one *l)
  286. {
  287. INIT_LIST_HEAD(&l->list);
  288. l->nr_items = 0;
  289. }
  290. #ifdef CONFIG_MEMCG_KMEM
  291. static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
  292. int begin, int end)
  293. {
  294. int i;
  295. for (i = begin; i < end; i++)
  296. kfree(memcg_lrus->lru[i]);
  297. }
  298. static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
  299. int begin, int end)
  300. {
  301. int i;
  302. for (i = begin; i < end; i++) {
  303. struct list_lru_one *l;
  304. l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL);
  305. if (!l)
  306. goto fail;
  307. init_one_lru(l);
  308. memcg_lrus->lru[i] = l;
  309. }
  310. return 0;
  311. fail:
  312. __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1);
  313. return -ENOMEM;
  314. }
  315. static int memcg_init_list_lru_node(struct list_lru_node *nlru)
  316. {
  317. struct list_lru_memcg *memcg_lrus;
  318. int size = memcg_nr_cache_ids;
  319. memcg_lrus = kvmalloc(sizeof(*memcg_lrus) +
  320. size * sizeof(void *), GFP_KERNEL);
  321. if (!memcg_lrus)
  322. return -ENOMEM;
  323. if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) {
  324. kvfree(memcg_lrus);
  325. return -ENOMEM;
  326. }
  327. RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus);
  328. return 0;
  329. }
  330. static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
  331. {
  332. struct list_lru_memcg *memcg_lrus;
  333. /*
  334. * This is called when shrinker has already been unregistered,
  335. * and nobody can use it. So, there is no need to use kvfree_rcu().
  336. */
  337. memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true);
  338. __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids);
  339. kvfree(memcg_lrus);
  340. }
  341. static void kvfree_rcu(struct rcu_head *head)
  342. {
  343. struct list_lru_memcg *mlru;
  344. mlru = container_of(head, struct list_lru_memcg, rcu);
  345. kvfree(mlru);
  346. }
  347. static int memcg_update_list_lru_node(struct list_lru_node *nlru,
  348. int old_size, int new_size)
  349. {
  350. struct list_lru_memcg *old, *new;
  351. BUG_ON(old_size > new_size);
  352. old = rcu_dereference_protected(nlru->memcg_lrus,
  353. lockdep_is_held(&list_lrus_mutex));
  354. new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL);
  355. if (!new)
  356. return -ENOMEM;
  357. if (__memcg_init_list_lru_node(new, old_size, new_size)) {
  358. kvfree(new);
  359. return -ENOMEM;
  360. }
  361. memcpy(&new->lru, &old->lru, old_size * sizeof(void *));
  362. /*
  363. * The locking below allows readers that hold nlru->lock avoid taking
  364. * rcu_read_lock (see list_lru_from_memcg_idx).
  365. *
  366. * Since list_lru_{add,del} may be called under an IRQ-safe lock,
  367. * we have to use IRQ-safe primitives here to avoid deadlock.
  368. */
  369. spin_lock_irq(&nlru->lock);
  370. rcu_assign_pointer(nlru->memcg_lrus, new);
  371. spin_unlock_irq(&nlru->lock);
  372. call_rcu(&old->rcu, kvfree_rcu);
  373. return 0;
  374. }
  375. static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
  376. int old_size, int new_size)
  377. {
  378. struct list_lru_memcg *memcg_lrus;
  379. memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus,
  380. lockdep_is_held(&list_lrus_mutex));
  381. /* do not bother shrinking the array back to the old size, because we
  382. * cannot handle allocation failures here */
  383. __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size);
  384. }
  385. static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
  386. {
  387. int i;
  388. if (!memcg_aware)
  389. return 0;
  390. for_each_node(i) {
  391. if (memcg_init_list_lru_node(&lru->node[i]))
  392. goto fail;
  393. }
  394. return 0;
  395. fail:
  396. for (i = i - 1; i >= 0; i--) {
  397. if (!lru->node[i].memcg_lrus)
  398. continue;
  399. memcg_destroy_list_lru_node(&lru->node[i]);
  400. }
  401. return -ENOMEM;
  402. }
  403. static void memcg_destroy_list_lru(struct list_lru *lru)
  404. {
  405. int i;
  406. if (!list_lru_memcg_aware(lru))
  407. return;
  408. for_each_node(i)
  409. memcg_destroy_list_lru_node(&lru->node[i]);
  410. }
  411. static int memcg_update_list_lru(struct list_lru *lru,
  412. int old_size, int new_size)
  413. {
  414. int i;
  415. if (!list_lru_memcg_aware(lru))
  416. return 0;
  417. for_each_node(i) {
  418. if (memcg_update_list_lru_node(&lru->node[i],
  419. old_size, new_size))
  420. goto fail;
  421. }
  422. return 0;
  423. fail:
  424. for (i = i - 1; i >= 0; i--) {
  425. if (!lru->node[i].memcg_lrus)
  426. continue;
  427. memcg_cancel_update_list_lru_node(&lru->node[i],
  428. old_size, new_size);
  429. }
  430. return -ENOMEM;
  431. }
  432. static void memcg_cancel_update_list_lru(struct list_lru *lru,
  433. int old_size, int new_size)
  434. {
  435. int i;
  436. if (!list_lru_memcg_aware(lru))
  437. return;
  438. for_each_node(i)
  439. memcg_cancel_update_list_lru_node(&lru->node[i],
  440. old_size, new_size);
  441. }
  442. int memcg_update_all_list_lrus(int new_size)
  443. {
  444. int ret = 0;
  445. struct list_lru *lru;
  446. int old_size = memcg_nr_cache_ids;
  447. mutex_lock(&list_lrus_mutex);
  448. list_for_each_entry(lru, &list_lrus, list) {
  449. ret = memcg_update_list_lru(lru, old_size, new_size);
  450. if (ret)
  451. goto fail;
  452. }
  453. out:
  454. mutex_unlock(&list_lrus_mutex);
  455. return ret;
  456. fail:
  457. list_for_each_entry_continue_reverse(lru, &list_lrus, list)
  458. memcg_cancel_update_list_lru(lru, old_size, new_size);
  459. goto out;
  460. }
  461. static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
  462. int src_idx, struct mem_cgroup *dst_memcg)
  463. {
  464. struct list_lru_node *nlru = &lru->node[nid];
  465. int dst_idx = dst_memcg->kmemcg_id;
  466. struct list_lru_one *src, *dst;
  467. bool set;
  468. /*
  469. * Since list_lru_{add,del} may be called under an IRQ-safe lock,
  470. * we have to use IRQ-safe primitives here to avoid deadlock.
  471. */
  472. spin_lock_irq(&nlru->lock);
  473. src = list_lru_from_memcg_idx(nlru, src_idx);
  474. dst = list_lru_from_memcg_idx(nlru, dst_idx);
  475. list_splice_init(&src->list, &dst->list);
  476. set = (!dst->nr_items && src->nr_items);
  477. dst->nr_items += src->nr_items;
  478. if (set)
  479. memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
  480. src->nr_items = 0;
  481. spin_unlock_irq(&nlru->lock);
  482. }
  483. static void memcg_drain_list_lru(struct list_lru *lru,
  484. int src_idx, struct mem_cgroup *dst_memcg)
  485. {
  486. int i;
  487. if (!list_lru_memcg_aware(lru))
  488. return;
  489. for_each_node(i)
  490. memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg);
  491. }
  492. void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg)
  493. {
  494. struct list_lru *lru;
  495. mutex_lock(&list_lrus_mutex);
  496. list_for_each_entry(lru, &list_lrus, list)
  497. memcg_drain_list_lru(lru, src_idx, dst_memcg);
  498. mutex_unlock(&list_lrus_mutex);
  499. }
  500. #else
  501. static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
  502. {
  503. return 0;
  504. }
  505. static void memcg_destroy_list_lru(struct list_lru *lru)
  506. {
  507. }
  508. #endif /* CONFIG_MEMCG_KMEM */
  509. int __list_lru_init(struct list_lru *lru, bool memcg_aware,
  510. struct lock_class_key *key, struct shrinker *shrinker)
  511. {
  512. int i;
  513. size_t size = sizeof(*lru->node) * nr_node_ids;
  514. int err = -ENOMEM;
  515. #ifdef CONFIG_MEMCG_KMEM
  516. if (shrinker)
  517. lru->shrinker_id = shrinker->id;
  518. else
  519. lru->shrinker_id = -1;
  520. #endif
  521. memcg_get_cache_ids();
  522. lru->node = kzalloc(size, GFP_KERNEL);
  523. if (!lru->node)
  524. goto out;
  525. for_each_node(i) {
  526. spin_lock_init(&lru->node[i].lock);
  527. if (key)
  528. lockdep_set_class(&lru->node[i].lock, key);
  529. init_one_lru(&lru->node[i].lru);
  530. }
  531. err = memcg_init_list_lru(lru, memcg_aware);
  532. if (err) {
  533. kfree(lru->node);
  534. /* Do this so a list_lru_destroy() doesn't crash: */
  535. lru->node = NULL;
  536. goto out;
  537. }
  538. list_lru_register(lru);
  539. out:
  540. memcg_put_cache_ids();
  541. return err;
  542. }
  543. EXPORT_SYMBOL_GPL(__list_lru_init);
  544. void list_lru_destroy(struct list_lru *lru)
  545. {
  546. /* Already destroyed or not yet initialized? */
  547. if (!lru->node)
  548. return;
  549. memcg_get_cache_ids();
  550. list_lru_unregister(lru);
  551. memcg_destroy_list_lru(lru);
  552. kfree(lru->node);
  553. lru->node = NULL;
  554. #ifdef CONFIG_MEMCG_KMEM
  555. lru->shrinker_id = -1;
  556. #endif
  557. memcg_put_cache_ids();
  558. }
  559. EXPORT_SYMBOL_GPL(list_lru_destroy);