slab_common.c 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Slab allocator functions that are independent of the allocator strategy
  4. *
  5. * (C) 2012 Christoph Lameter <cl@linux.com>
  6. */
  7. #include <linux/slab.h>
  8. #include <linux/mm.h>
  9. #include <linux/poison.h>
  10. #include <linux/interrupt.h>
  11. #include <linux/memory.h>
  12. #include <linux/cache.h>
  13. #include <linux/compiler.h>
  14. #include <linux/module.h>
  15. #include <linux/cpu.h>
  16. #include <linux/uaccess.h>
  17. #include <linux/seq_file.h>
  18. #include <linux/proc_fs.h>
  19. #include <asm/cacheflush.h>
  20. #include <asm/tlbflush.h>
  21. #include <asm/page.h>
  22. #include <linux/memcontrol.h>
  23. #define CREATE_TRACE_POINTS
  24. #include <trace/events/kmem.h>
  25. #include "slab.h"
  26. enum slab_state slab_state;
  27. LIST_HEAD(slab_caches);
  28. DEFINE_MUTEX(slab_mutex);
  29. struct kmem_cache *kmem_cache;
  30. #ifdef CONFIG_HARDENED_USERCOPY
  31. bool usercopy_fallback __ro_after_init =
  32. IS_ENABLED(CONFIG_HARDENED_USERCOPY_FALLBACK);
  33. module_param(usercopy_fallback, bool, 0400);
  34. MODULE_PARM_DESC(usercopy_fallback,
  35. "WARN instead of reject usercopy whitelist violations");
  36. #endif
  37. static LIST_HEAD(slab_caches_to_rcu_destroy);
  38. static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work);
  39. static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
  40. slab_caches_to_rcu_destroy_workfn);
  41. /*
  42. * Set of flags that will prevent slab merging
  43. */
  44. #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
  45. SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
  46. SLAB_FAILSLAB | SLAB_KASAN)
  47. #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
  48. SLAB_ACCOUNT)
  49. /*
  50. * Merge control. If this is set then no merging of slab caches will occur.
  51. */
  52. static bool slab_nomerge = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT);
  53. static int __init setup_slab_nomerge(char *str)
  54. {
  55. slab_nomerge = true;
  56. return 1;
  57. }
  58. #ifdef CONFIG_SLUB
  59. __setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
  60. #endif
  61. __setup("slab_nomerge", setup_slab_nomerge);
  62. /*
  63. * Determine the size of a slab object
  64. */
  65. unsigned int kmem_cache_size(struct kmem_cache *s)
  66. {
  67. return s->object_size;
  68. }
  69. EXPORT_SYMBOL(kmem_cache_size);
  70. #ifdef CONFIG_DEBUG_VM
  71. static int kmem_cache_sanity_check(const char *name, unsigned int size)
  72. {
  73. if (!name || in_interrupt() || size < sizeof(void *) ||
  74. size > KMALLOC_MAX_SIZE) {
  75. pr_err("kmem_cache_create(%s) integrity check failed\n", name);
  76. return -EINVAL;
  77. }
  78. WARN_ON(strchr(name, ' ')); /* It confuses parsers */
  79. return 0;
  80. }
  81. #else
  82. static inline int kmem_cache_sanity_check(const char *name, unsigned int size)
  83. {
  84. return 0;
  85. }
  86. #endif
  87. void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
  88. {
  89. size_t i;
  90. for (i = 0; i < nr; i++) {
  91. if (s)
  92. kmem_cache_free(s, p[i]);
  93. else
  94. kfree(p[i]);
  95. }
  96. }
  97. int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
  98. void **p)
  99. {
  100. size_t i;
  101. for (i = 0; i < nr; i++) {
  102. void *x = p[i] = kmem_cache_alloc(s, flags);
  103. if (!x) {
  104. __kmem_cache_free_bulk(s, i, p);
  105. return 0;
  106. }
  107. }
  108. return i;
  109. }
  110. #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
  111. LIST_HEAD(slab_root_caches);
  112. void slab_init_memcg_params(struct kmem_cache *s)
  113. {
  114. s->memcg_params.root_cache = NULL;
  115. RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
  116. INIT_LIST_HEAD(&s->memcg_params.children);
  117. s->memcg_params.dying = false;
  118. }
  119. static int init_memcg_params(struct kmem_cache *s,
  120. struct mem_cgroup *memcg, struct kmem_cache *root_cache)
  121. {
  122. struct memcg_cache_array *arr;
  123. if (root_cache) {
  124. s->memcg_params.root_cache = root_cache;
  125. s->memcg_params.memcg = memcg;
  126. INIT_LIST_HEAD(&s->memcg_params.children_node);
  127. INIT_LIST_HEAD(&s->memcg_params.kmem_caches_node);
  128. return 0;
  129. }
  130. slab_init_memcg_params(s);
  131. if (!memcg_nr_cache_ids)
  132. return 0;
  133. arr = kvzalloc(sizeof(struct memcg_cache_array) +
  134. memcg_nr_cache_ids * sizeof(void *),
  135. GFP_KERNEL);
  136. if (!arr)
  137. return -ENOMEM;
  138. RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
  139. return 0;
  140. }
  141. static void destroy_memcg_params(struct kmem_cache *s)
  142. {
  143. if (is_root_cache(s))
  144. kvfree(rcu_access_pointer(s->memcg_params.memcg_caches));
  145. }
  146. static void free_memcg_params(struct rcu_head *rcu)
  147. {
  148. struct memcg_cache_array *old;
  149. old = container_of(rcu, struct memcg_cache_array, rcu);
  150. kvfree(old);
  151. }
  152. static int update_memcg_params(struct kmem_cache *s, int new_array_size)
  153. {
  154. struct memcg_cache_array *old, *new;
  155. new = kvzalloc(sizeof(struct memcg_cache_array) +
  156. new_array_size * sizeof(void *), GFP_KERNEL);
  157. if (!new)
  158. return -ENOMEM;
  159. old = rcu_dereference_protected(s->memcg_params.memcg_caches,
  160. lockdep_is_held(&slab_mutex));
  161. if (old)
  162. memcpy(new->entries, old->entries,
  163. memcg_nr_cache_ids * sizeof(void *));
  164. rcu_assign_pointer(s->memcg_params.memcg_caches, new);
  165. if (old)
  166. call_rcu(&old->rcu, free_memcg_params);
  167. return 0;
  168. }
  169. int memcg_update_all_caches(int num_memcgs)
  170. {
  171. struct kmem_cache *s;
  172. int ret = 0;
  173. mutex_lock(&slab_mutex);
  174. list_for_each_entry(s, &slab_root_caches, root_caches_node) {
  175. ret = update_memcg_params(s, num_memcgs);
  176. /*
  177. * Instead of freeing the memory, we'll just leave the caches
  178. * up to this point in an updated state.
  179. */
  180. if (ret)
  181. break;
  182. }
  183. mutex_unlock(&slab_mutex);
  184. return ret;
  185. }
  186. void memcg_link_cache(struct kmem_cache *s)
  187. {
  188. if (is_root_cache(s)) {
  189. list_add(&s->root_caches_node, &slab_root_caches);
  190. } else {
  191. list_add(&s->memcg_params.children_node,
  192. &s->memcg_params.root_cache->memcg_params.children);
  193. list_add(&s->memcg_params.kmem_caches_node,
  194. &s->memcg_params.memcg->kmem_caches);
  195. }
  196. }
  197. static void memcg_unlink_cache(struct kmem_cache *s)
  198. {
  199. if (is_root_cache(s)) {
  200. list_del(&s->root_caches_node);
  201. } else {
  202. list_del(&s->memcg_params.children_node);
  203. list_del(&s->memcg_params.kmem_caches_node);
  204. }
  205. }
  206. #else
  207. static inline int init_memcg_params(struct kmem_cache *s,
  208. struct mem_cgroup *memcg, struct kmem_cache *root_cache)
  209. {
  210. return 0;
  211. }
  212. static inline void destroy_memcg_params(struct kmem_cache *s)
  213. {
  214. }
  215. static inline void memcg_unlink_cache(struct kmem_cache *s)
  216. {
  217. }
  218. #endif /* CONFIG_MEMCG && !CONFIG_SLOB */
  219. /*
  220. * Figure out what the alignment of the objects will be given a set of
  221. * flags, a user specified alignment and the size of the objects.
  222. */
  223. static unsigned int calculate_alignment(slab_flags_t flags,
  224. unsigned int align, unsigned int size)
  225. {
  226. /*
  227. * If the user wants hardware cache aligned objects then follow that
  228. * suggestion if the object is sufficiently large.
  229. *
  230. * The hardware cache alignment cannot override the specified
  231. * alignment though. If that is greater then use it.
  232. */
  233. if (flags & SLAB_HWCACHE_ALIGN) {
  234. unsigned int ralign;
  235. ralign = cache_line_size();
  236. while (size <= ralign / 2)
  237. ralign /= 2;
  238. align = max(align, ralign);
  239. }
  240. if (align < ARCH_SLAB_MINALIGN)
  241. align = ARCH_SLAB_MINALIGN;
  242. return ALIGN(align, sizeof(void *));
  243. }
  244. /*
  245. * Find a mergeable slab cache
  246. */
  247. int slab_unmergeable(struct kmem_cache *s)
  248. {
  249. if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
  250. return 1;
  251. if (!is_root_cache(s))
  252. return 1;
  253. if (s->ctor)
  254. return 1;
  255. if (s->usersize)
  256. return 1;
  257. /*
  258. * We may have set a slab to be unmergeable during bootstrap.
  259. */
  260. if (s->refcount < 0)
  261. return 1;
  262. return 0;
  263. }
  264. struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
  265. slab_flags_t flags, const char *name, void (*ctor)(void *))
  266. {
  267. struct kmem_cache *s;
  268. if (slab_nomerge)
  269. return NULL;
  270. if (ctor)
  271. return NULL;
  272. size = ALIGN(size, sizeof(void *));
  273. align = calculate_alignment(flags, align, size);
  274. size = ALIGN(size, align);
  275. flags = kmem_cache_flags(size, flags, name, NULL);
  276. if (flags & SLAB_NEVER_MERGE)
  277. return NULL;
  278. list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) {
  279. if (slab_unmergeable(s))
  280. continue;
  281. if (size > s->size)
  282. continue;
  283. if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
  284. continue;
  285. /*
  286. * Check if alignment is compatible.
  287. * Courtesy of Adrian Drzewiecki
  288. */
  289. if ((s->size & ~(align - 1)) != s->size)
  290. continue;
  291. if (s->size - size >= sizeof(void *))
  292. continue;
  293. if (IS_ENABLED(CONFIG_SLAB) && align &&
  294. (align > s->align || s->align % align))
  295. continue;
  296. return s;
  297. }
  298. return NULL;
  299. }
  300. static struct kmem_cache *create_cache(const char *name,
  301. unsigned int object_size, unsigned int align,
  302. slab_flags_t flags, unsigned int useroffset,
  303. unsigned int usersize, void (*ctor)(void *),
  304. struct mem_cgroup *memcg, struct kmem_cache *root_cache)
  305. {
  306. struct kmem_cache *s;
  307. int err;
  308. if (WARN_ON(useroffset + usersize > object_size))
  309. useroffset = usersize = 0;
  310. err = -ENOMEM;
  311. s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
  312. if (!s)
  313. goto out;
  314. s->name = name;
  315. s->size = s->object_size = object_size;
  316. s->align = align;
  317. s->ctor = ctor;
  318. s->useroffset = useroffset;
  319. s->usersize = usersize;
  320. err = init_memcg_params(s, memcg, root_cache);
  321. if (err)
  322. goto out_free_cache;
  323. err = __kmem_cache_create(s, flags);
  324. if (err)
  325. goto out_free_cache;
  326. s->refcount = 1;
  327. list_add(&s->list, &slab_caches);
  328. memcg_link_cache(s);
  329. out:
  330. if (err)
  331. return ERR_PTR(err);
  332. return s;
  333. out_free_cache:
  334. destroy_memcg_params(s);
  335. kmem_cache_free(kmem_cache, s);
  336. goto out;
  337. }
  338. /*
  339. * kmem_cache_create_usercopy - Create a cache.
  340. * @name: A string which is used in /proc/slabinfo to identify this cache.
  341. * @size: The size of objects to be created in this cache.
  342. * @align: The required alignment for the objects.
  343. * @flags: SLAB flags
  344. * @useroffset: Usercopy region offset
  345. * @usersize: Usercopy region size
  346. * @ctor: A constructor for the objects.
  347. *
  348. * Returns a ptr to the cache on success, NULL on failure.
  349. * Cannot be called within a interrupt, but can be interrupted.
  350. * The @ctor is run when new pages are allocated by the cache.
  351. *
  352. * The flags are
  353. *
  354. * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
  355. * to catch references to uninitialised memory.
  356. *
  357. * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
  358. * for buffer overruns.
  359. *
  360. * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
  361. * cacheline. This can be beneficial if you're counting cycles as closely
  362. * as davem.
  363. */
  364. struct kmem_cache *
  365. kmem_cache_create_usercopy(const char *name,
  366. unsigned int size, unsigned int align,
  367. slab_flags_t flags,
  368. unsigned int useroffset, unsigned int usersize,
  369. void (*ctor)(void *))
  370. {
  371. struct kmem_cache *s = NULL;
  372. const char *cache_name;
  373. int err;
  374. get_online_cpus();
  375. get_online_mems();
  376. memcg_get_cache_ids();
  377. mutex_lock(&slab_mutex);
  378. err = kmem_cache_sanity_check(name, size);
  379. if (err) {
  380. goto out_unlock;
  381. }
  382. /* Refuse requests with allocator specific flags */
  383. if (flags & ~SLAB_FLAGS_PERMITTED) {
  384. err = -EINVAL;
  385. goto out_unlock;
  386. }
  387. /*
  388. * Some allocators will constraint the set of valid flags to a subset
  389. * of all flags. We expect them to define CACHE_CREATE_MASK in this
  390. * case, and we'll just provide them with a sanitized version of the
  391. * passed flags.
  392. */
  393. flags &= CACHE_CREATE_MASK;
  394. /* Fail closed on bad usersize of useroffset values. */
  395. if (WARN_ON(!usersize && useroffset) ||
  396. WARN_ON(size < usersize || size - usersize < useroffset))
  397. usersize = useroffset = 0;
  398. if (!usersize)
  399. s = __kmem_cache_alias(name, size, align, flags, ctor);
  400. if (s)
  401. goto out_unlock;
  402. cache_name = kstrdup_const(name, GFP_KERNEL);
  403. if (!cache_name) {
  404. err = -ENOMEM;
  405. goto out_unlock;
  406. }
  407. s = create_cache(cache_name, size,
  408. calculate_alignment(flags, align, size),
  409. flags, useroffset, usersize, ctor, NULL, NULL);
  410. if (IS_ERR(s)) {
  411. err = PTR_ERR(s);
  412. kfree_const(cache_name);
  413. }
  414. out_unlock:
  415. mutex_unlock(&slab_mutex);
  416. memcg_put_cache_ids();
  417. put_online_mems();
  418. put_online_cpus();
  419. if (err) {
  420. if (flags & SLAB_PANIC)
  421. panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
  422. name, err);
  423. else {
  424. pr_warn("kmem_cache_create(%s) failed with error %d\n",
  425. name, err);
  426. dump_stack();
  427. }
  428. return NULL;
  429. }
  430. return s;
  431. }
  432. EXPORT_SYMBOL(kmem_cache_create_usercopy);
  433. struct kmem_cache *
  434. kmem_cache_create(const char *name, unsigned int size, unsigned int align,
  435. slab_flags_t flags, void (*ctor)(void *))
  436. {
  437. return kmem_cache_create_usercopy(name, size, align, flags, 0, 0,
  438. ctor);
  439. }
  440. EXPORT_SYMBOL(kmem_cache_create);
  441. static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
  442. {
  443. LIST_HEAD(to_destroy);
  444. struct kmem_cache *s, *s2;
  445. /*
  446. * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the
  447. * @slab_caches_to_rcu_destroy list. The slab pages are freed
  448. * through RCU and and the associated kmem_cache are dereferenced
  449. * while freeing the pages, so the kmem_caches should be freed only
  450. * after the pending RCU operations are finished. As rcu_barrier()
  451. * is a pretty slow operation, we batch all pending destructions
  452. * asynchronously.
  453. */
  454. mutex_lock(&slab_mutex);
  455. list_splice_init(&slab_caches_to_rcu_destroy, &to_destroy);
  456. mutex_unlock(&slab_mutex);
  457. if (list_empty(&to_destroy))
  458. return;
  459. rcu_barrier();
  460. list_for_each_entry_safe(s, s2, &to_destroy, list) {
  461. #ifdef SLAB_SUPPORTS_SYSFS
  462. sysfs_slab_release(s);
  463. #else
  464. slab_kmem_cache_release(s);
  465. #endif
  466. }
  467. }
  468. static int shutdown_cache(struct kmem_cache *s)
  469. {
  470. /* free asan quarantined objects */
  471. kasan_cache_shutdown(s);
  472. if (__kmem_cache_shutdown(s) != 0)
  473. return -EBUSY;
  474. memcg_unlink_cache(s);
  475. list_del(&s->list);
  476. if (s->flags & SLAB_TYPESAFE_BY_RCU) {
  477. list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
  478. schedule_work(&slab_caches_to_rcu_destroy_work);
  479. } else {
  480. #ifdef SLAB_SUPPORTS_SYSFS
  481. sysfs_slab_release(s);
  482. #else
  483. slab_kmem_cache_release(s);
  484. #endif
  485. }
  486. return 0;
  487. }
  488. #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
  489. /*
  490. * memcg_create_kmem_cache - Create a cache for a memory cgroup.
  491. * @memcg: The memory cgroup the new cache is for.
  492. * @root_cache: The parent of the new cache.
  493. *
  494. * This function attempts to create a kmem cache that will serve allocation
  495. * requests going from @memcg to @root_cache. The new cache inherits properties
  496. * from its parent.
  497. */
  498. void memcg_create_kmem_cache(struct mem_cgroup *memcg,
  499. struct kmem_cache *root_cache)
  500. {
  501. static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
  502. struct cgroup_subsys_state *css = &memcg->css;
  503. struct memcg_cache_array *arr;
  504. struct kmem_cache *s = NULL;
  505. char *cache_name;
  506. int idx;
  507. get_online_cpus();
  508. get_online_mems();
  509. mutex_lock(&slab_mutex);
  510. /*
  511. * The memory cgroup could have been offlined while the cache
  512. * creation work was pending.
  513. */
  514. if (memcg->kmem_state != KMEM_ONLINE || root_cache->memcg_params.dying)
  515. goto out_unlock;
  516. idx = memcg_cache_id(memcg);
  517. arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
  518. lockdep_is_held(&slab_mutex));
  519. /*
  520. * Since per-memcg caches are created asynchronously on first
  521. * allocation (see memcg_kmem_get_cache()), several threads can try to
  522. * create the same cache, but only one of them may succeed.
  523. */
  524. if (arr->entries[idx])
  525. goto out_unlock;
  526. cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
  527. cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name,
  528. css->serial_nr, memcg_name_buf);
  529. if (!cache_name)
  530. goto out_unlock;
  531. s = create_cache(cache_name, root_cache->object_size,
  532. root_cache->align,
  533. root_cache->flags & CACHE_CREATE_MASK,
  534. root_cache->useroffset, root_cache->usersize,
  535. root_cache->ctor, memcg, root_cache);
  536. /*
  537. * If we could not create a memcg cache, do not complain, because
  538. * that's not critical at all as we can always proceed with the root
  539. * cache.
  540. */
  541. if (IS_ERR(s)) {
  542. kfree(cache_name);
  543. goto out_unlock;
  544. }
  545. /*
  546. * Since readers won't lock (see cache_from_memcg_idx()), we need a
  547. * barrier here to ensure nobody will see the kmem_cache partially
  548. * initialized.
  549. */
  550. smp_wmb();
  551. arr->entries[idx] = s;
  552. out_unlock:
  553. mutex_unlock(&slab_mutex);
  554. put_online_mems();
  555. put_online_cpus();
  556. }
  557. static void kmemcg_deactivate_workfn(struct work_struct *work)
  558. {
  559. struct kmem_cache *s = container_of(work, struct kmem_cache,
  560. memcg_params.deact_work);
  561. get_online_cpus();
  562. get_online_mems();
  563. mutex_lock(&slab_mutex);
  564. s->memcg_params.deact_fn(s);
  565. mutex_unlock(&slab_mutex);
  566. put_online_mems();
  567. put_online_cpus();
  568. /* done, put the ref from slab_deactivate_memcg_cache_rcu_sched() */
  569. css_put(&s->memcg_params.memcg->css);
  570. }
  571. static void kmemcg_deactivate_rcufn(struct rcu_head *head)
  572. {
  573. struct kmem_cache *s = container_of(head, struct kmem_cache,
  574. memcg_params.deact_rcu_head);
  575. /*
  576. * We need to grab blocking locks. Bounce to ->deact_work. The
  577. * work item shares the space with the RCU head and can't be
  578. * initialized eariler.
  579. */
  580. INIT_WORK(&s->memcg_params.deact_work, kmemcg_deactivate_workfn);
  581. queue_work(memcg_kmem_cache_wq, &s->memcg_params.deact_work);
  582. }
  583. /**
  584. * slab_deactivate_memcg_cache_rcu_sched - schedule deactivation after a
  585. * sched RCU grace period
  586. * @s: target kmem_cache
  587. * @deact_fn: deactivation function to call
  588. *
  589. * Schedule @deact_fn to be invoked with online cpus, mems and slab_mutex
  590. * held after a sched RCU grace period. The slab is guaranteed to stay
  591. * alive until @deact_fn is finished. This is to be used from
  592. * __kmemcg_cache_deactivate().
  593. */
  594. void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
  595. void (*deact_fn)(struct kmem_cache *))
  596. {
  597. if (WARN_ON_ONCE(is_root_cache(s)) ||
  598. WARN_ON_ONCE(s->memcg_params.deact_fn))
  599. return;
  600. if (s->memcg_params.root_cache->memcg_params.dying)
  601. return;
  602. /* pin memcg so that @s doesn't get destroyed in the middle */
  603. css_get(&s->memcg_params.memcg->css);
  604. s->memcg_params.deact_fn = deact_fn;
  605. call_rcu_sched(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn);
  606. }
  607. void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
  608. {
  609. int idx;
  610. struct memcg_cache_array *arr;
  611. struct kmem_cache *s, *c;
  612. idx = memcg_cache_id(memcg);
  613. get_online_cpus();
  614. get_online_mems();
  615. mutex_lock(&slab_mutex);
  616. list_for_each_entry(s, &slab_root_caches, root_caches_node) {
  617. arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
  618. lockdep_is_held(&slab_mutex));
  619. c = arr->entries[idx];
  620. if (!c)
  621. continue;
  622. __kmemcg_cache_deactivate(c);
  623. arr->entries[idx] = NULL;
  624. }
  625. mutex_unlock(&slab_mutex);
  626. put_online_mems();
  627. put_online_cpus();
  628. }
  629. void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
  630. {
  631. struct kmem_cache *s, *s2;
  632. get_online_cpus();
  633. get_online_mems();
  634. mutex_lock(&slab_mutex);
  635. list_for_each_entry_safe(s, s2, &memcg->kmem_caches,
  636. memcg_params.kmem_caches_node) {
  637. /*
  638. * The cgroup is about to be freed and therefore has no charges
  639. * left. Hence, all its caches must be empty by now.
  640. */
  641. BUG_ON(shutdown_cache(s));
  642. }
  643. mutex_unlock(&slab_mutex);
  644. put_online_mems();
  645. put_online_cpus();
  646. }
  647. static int shutdown_memcg_caches(struct kmem_cache *s)
  648. {
  649. struct memcg_cache_array *arr;
  650. struct kmem_cache *c, *c2;
  651. LIST_HEAD(busy);
  652. int i;
  653. BUG_ON(!is_root_cache(s));
  654. /*
  655. * First, shutdown active caches, i.e. caches that belong to online
  656. * memory cgroups.
  657. */
  658. arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
  659. lockdep_is_held(&slab_mutex));
  660. for_each_memcg_cache_index(i) {
  661. c = arr->entries[i];
  662. if (!c)
  663. continue;
  664. if (shutdown_cache(c))
  665. /*
  666. * The cache still has objects. Move it to a temporary
  667. * list so as not to try to destroy it for a second
  668. * time while iterating over inactive caches below.
  669. */
  670. list_move(&c->memcg_params.children_node, &busy);
  671. else
  672. /*
  673. * The cache is empty and will be destroyed soon. Clear
  674. * the pointer to it in the memcg_caches array so that
  675. * it will never be accessed even if the root cache
  676. * stays alive.
  677. */
  678. arr->entries[i] = NULL;
  679. }
  680. /*
  681. * Second, shutdown all caches left from memory cgroups that are now
  682. * offline.
  683. */
  684. list_for_each_entry_safe(c, c2, &s->memcg_params.children,
  685. memcg_params.children_node)
  686. shutdown_cache(c);
  687. list_splice(&busy, &s->memcg_params.children);
  688. /*
  689. * A cache being destroyed must be empty. In particular, this means
  690. * that all per memcg caches attached to it must be empty too.
  691. */
  692. if (!list_empty(&s->memcg_params.children))
  693. return -EBUSY;
  694. return 0;
  695. }
  696. static void flush_memcg_workqueue(struct kmem_cache *s)
  697. {
  698. mutex_lock(&slab_mutex);
  699. s->memcg_params.dying = true;
  700. mutex_unlock(&slab_mutex);
  701. /*
  702. * SLUB deactivates the kmem_caches through call_rcu_sched. Make
  703. * sure all registered rcu callbacks have been invoked.
  704. */
  705. if (IS_ENABLED(CONFIG_SLUB))
  706. rcu_barrier_sched();
  707. /*
  708. * SLAB and SLUB create memcg kmem_caches through workqueue and SLUB
  709. * deactivates the memcg kmem_caches through workqueue. Make sure all
  710. * previous workitems on workqueue are processed.
  711. */
  712. flush_workqueue(memcg_kmem_cache_wq);
  713. }
  714. #else
  715. static inline int shutdown_memcg_caches(struct kmem_cache *s)
  716. {
  717. return 0;
  718. }
  719. static inline void flush_memcg_workqueue(struct kmem_cache *s)
  720. {
  721. }
  722. #endif /* CONFIG_MEMCG && !CONFIG_SLOB */
  723. void slab_kmem_cache_release(struct kmem_cache *s)
  724. {
  725. __kmem_cache_release(s);
  726. destroy_memcg_params(s);
  727. kfree_const(s->name);
  728. kmem_cache_free(kmem_cache, s);
  729. }
  730. void kmem_cache_destroy(struct kmem_cache *s)
  731. {
  732. int err;
  733. if (unlikely(!s))
  734. return;
  735. flush_memcg_workqueue(s);
  736. get_online_cpus();
  737. get_online_mems();
  738. mutex_lock(&slab_mutex);
  739. s->refcount--;
  740. if (s->refcount)
  741. goto out_unlock;
  742. err = shutdown_memcg_caches(s);
  743. if (!err)
  744. err = shutdown_cache(s);
  745. if (err) {
  746. pr_err("kmem_cache_destroy %s: Slab cache still has objects\n",
  747. s->name);
  748. dump_stack();
  749. }
  750. out_unlock:
  751. mutex_unlock(&slab_mutex);
  752. put_online_mems();
  753. put_online_cpus();
  754. }
  755. EXPORT_SYMBOL(kmem_cache_destroy);
  756. /**
  757. * kmem_cache_shrink - Shrink a cache.
  758. * @cachep: The cache to shrink.
  759. *
  760. * Releases as many slabs as possible for a cache.
  761. * To help debugging, a zero exit status indicates all slabs were released.
  762. */
  763. int kmem_cache_shrink(struct kmem_cache *cachep)
  764. {
  765. int ret;
  766. get_online_cpus();
  767. get_online_mems();
  768. kasan_cache_shrink(cachep);
  769. ret = __kmem_cache_shrink(cachep);
  770. put_online_mems();
  771. put_online_cpus();
  772. return ret;
  773. }
  774. EXPORT_SYMBOL(kmem_cache_shrink);
  775. bool slab_is_available(void)
  776. {
  777. return slab_state >= UP;
  778. }
  779. #ifndef CONFIG_SLOB
  780. /* Create a cache during boot when no slab services are available yet */
  781. void __init create_boot_cache(struct kmem_cache *s, const char *name,
  782. unsigned int size, slab_flags_t flags,
  783. unsigned int useroffset, unsigned int usersize)
  784. {
  785. int err;
  786. s->name = name;
  787. s->size = s->object_size = size;
  788. s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
  789. s->useroffset = useroffset;
  790. s->usersize = usersize;
  791. slab_init_memcg_params(s);
  792. err = __kmem_cache_create(s, flags);
  793. if (err)
  794. panic("Creation of kmalloc slab %s size=%u failed. Reason %d\n",
  795. name, size, err);
  796. s->refcount = -1; /* Exempt from merging for now */
  797. }
  798. struct kmem_cache *__init create_kmalloc_cache(const char *name,
  799. unsigned int size, slab_flags_t flags,
  800. unsigned int useroffset, unsigned int usersize)
  801. {
  802. struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
  803. if (!s)
  804. panic("Out of memory when creating slab %s\n", name);
  805. create_boot_cache(s, name, size, flags, useroffset, usersize);
  806. list_add(&s->list, &slab_caches);
  807. memcg_link_cache(s);
  808. s->refcount = 1;
  809. return s;
  810. }
  811. struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
  812. EXPORT_SYMBOL(kmalloc_caches);
  813. #ifdef CONFIG_ZONE_DMA
  814. struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
  815. EXPORT_SYMBOL(kmalloc_dma_caches);
  816. #endif
  817. /*
  818. * Conversion table for small slabs sizes / 8 to the index in the
  819. * kmalloc array. This is necessary for slabs < 192 since we have non power
  820. * of two cache sizes there. The size of larger slabs can be determined using
  821. * fls.
  822. */
  823. static u8 size_index[24] __ro_after_init = {
  824. 3, /* 8 */
  825. 4, /* 16 */
  826. 5, /* 24 */
  827. 5, /* 32 */
  828. 6, /* 40 */
  829. 6, /* 48 */
  830. 6, /* 56 */
  831. 6, /* 64 */
  832. 1, /* 72 */
  833. 1, /* 80 */
  834. 1, /* 88 */
  835. 1, /* 96 */
  836. 7, /* 104 */
  837. 7, /* 112 */
  838. 7, /* 120 */
  839. 7, /* 128 */
  840. 2, /* 136 */
  841. 2, /* 144 */
  842. 2, /* 152 */
  843. 2, /* 160 */
  844. 2, /* 168 */
  845. 2, /* 176 */
  846. 2, /* 184 */
  847. 2 /* 192 */
  848. };
  849. static inline unsigned int size_index_elem(unsigned int bytes)
  850. {
  851. return (bytes - 1) / 8;
  852. }
  853. /*
  854. * Find the kmem_cache structure that serves a given size of
  855. * allocation
  856. */
  857. struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
  858. {
  859. unsigned int index;
  860. if (unlikely(size > KMALLOC_MAX_SIZE)) {
  861. WARN_ON_ONCE(!(flags & __GFP_NOWARN));
  862. return NULL;
  863. }
  864. if (size <= 192) {
  865. if (!size)
  866. return ZERO_SIZE_PTR;
  867. index = size_index[size_index_elem(size)];
  868. } else
  869. index = fls(size - 1);
  870. #ifdef CONFIG_ZONE_DMA
  871. if (unlikely((flags & GFP_DMA)))
  872. return kmalloc_dma_caches[index];
  873. #endif
  874. return kmalloc_caches[index];
  875. }
  876. /*
  877. * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
  878. * kmalloc_index() supports up to 2^26=64MB, so the final entry of the table is
  879. * kmalloc-67108864.
  880. */
  881. const struct kmalloc_info_struct kmalloc_info[] __initconst = {
  882. {NULL, 0}, {"kmalloc-96", 96},
  883. {"kmalloc-192", 192}, {"kmalloc-8", 8},
  884. {"kmalloc-16", 16}, {"kmalloc-32", 32},
  885. {"kmalloc-64", 64}, {"kmalloc-128", 128},
  886. {"kmalloc-256", 256}, {"kmalloc-512", 512},
  887. {"kmalloc-1024", 1024}, {"kmalloc-2048", 2048},
  888. {"kmalloc-4096", 4096}, {"kmalloc-8192", 8192},
  889. {"kmalloc-16384", 16384}, {"kmalloc-32768", 32768},
  890. {"kmalloc-65536", 65536}, {"kmalloc-131072", 131072},
  891. {"kmalloc-262144", 262144}, {"kmalloc-524288", 524288},
  892. {"kmalloc-1048576", 1048576}, {"kmalloc-2097152", 2097152},
  893. {"kmalloc-4194304", 4194304}, {"kmalloc-8388608", 8388608},
  894. {"kmalloc-16777216", 16777216}, {"kmalloc-33554432", 33554432},
  895. {"kmalloc-67108864", 67108864}
  896. };
  897. /*
  898. * Patch up the size_index table if we have strange large alignment
  899. * requirements for the kmalloc array. This is only the case for
  900. * MIPS it seems. The standard arches will not generate any code here.
  901. *
  902. * Largest permitted alignment is 256 bytes due to the way we
  903. * handle the index determination for the smaller caches.
  904. *
  905. * Make sure that nothing crazy happens if someone starts tinkering
  906. * around with ARCH_KMALLOC_MINALIGN
  907. */
  908. void __init setup_kmalloc_cache_index_table(void)
  909. {
  910. unsigned int i;
  911. BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
  912. (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
  913. for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
  914. unsigned int elem = size_index_elem(i);
  915. if (elem >= ARRAY_SIZE(size_index))
  916. break;
  917. size_index[elem] = KMALLOC_SHIFT_LOW;
  918. }
  919. if (KMALLOC_MIN_SIZE >= 64) {
  920. /*
  921. * The 96 byte size cache is not used if the alignment
  922. * is 64 byte.
  923. */
  924. for (i = 64 + 8; i <= 96; i += 8)
  925. size_index[size_index_elem(i)] = 7;
  926. }
  927. if (KMALLOC_MIN_SIZE >= 128) {
  928. /*
  929. * The 192 byte sized cache is not used if the alignment
  930. * is 128 byte. Redirect kmalloc to use the 256 byte cache
  931. * instead.
  932. */
  933. for (i = 128 + 8; i <= 192; i += 8)
  934. size_index[size_index_elem(i)] = 8;
  935. }
  936. }
  937. static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
  938. {
  939. kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name,
  940. kmalloc_info[idx].size, flags, 0,
  941. kmalloc_info[idx].size);
  942. }
  943. /*
  944. * Create the kmalloc array. Some of the regular kmalloc arrays
  945. * may already have been created because they were needed to
  946. * enable allocations for slab creation.
  947. */
  948. void __init create_kmalloc_caches(slab_flags_t flags)
  949. {
  950. int i;
  951. for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
  952. if (!kmalloc_caches[i])
  953. new_kmalloc_cache(i, flags);
  954. /*
  955. * Caches that are not of the two-to-the-power-of size.
  956. * These have to be created immediately after the
  957. * earlier power of two caches
  958. */
  959. if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6)
  960. new_kmalloc_cache(1, flags);
  961. if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7)
  962. new_kmalloc_cache(2, flags);
  963. }
  964. /* Kmalloc array is now usable */
  965. slab_state = UP;
  966. #ifdef CONFIG_ZONE_DMA
  967. for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
  968. struct kmem_cache *s = kmalloc_caches[i];
  969. if (s) {
  970. unsigned int size = kmalloc_size(i);
  971. char *n = kasprintf(GFP_NOWAIT,
  972. "dma-kmalloc-%u", size);
  973. BUG_ON(!n);
  974. kmalloc_dma_caches[i] = create_kmalloc_cache(n,
  975. size, SLAB_CACHE_DMA | flags, 0, 0);
  976. }
  977. }
  978. #endif
  979. }
  980. #endif /* !CONFIG_SLOB */
  981. /*
  982. * To avoid unnecessary overhead, we pass through large allocation requests
  983. * directly to the page allocator. We use __GFP_COMP, because we will need to
  984. * know the allocation order to free the pages properly in kfree.
  985. */
  986. void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
  987. {
  988. void *ret;
  989. struct page *page;
  990. flags |= __GFP_COMP;
  991. page = alloc_pages(flags, order);
  992. ret = page ? page_address(page) : NULL;
  993. kmemleak_alloc(ret, size, 1, flags);
  994. kasan_kmalloc_large(ret, size, flags);
  995. return ret;
  996. }
  997. EXPORT_SYMBOL(kmalloc_order);
  998. #ifdef CONFIG_TRACING
  999. void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
  1000. {
  1001. void *ret = kmalloc_order(size, flags, order);
  1002. trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
  1003. return ret;
  1004. }
  1005. EXPORT_SYMBOL(kmalloc_order_trace);
  1006. #endif
  1007. #ifdef CONFIG_SLAB_FREELIST_RANDOM
  1008. /* Randomize a generic freelist */
  1009. static void freelist_randomize(struct rnd_state *state, unsigned int *list,
  1010. unsigned int count)
  1011. {
  1012. unsigned int rand;
  1013. unsigned int i;
  1014. for (i = 0; i < count; i++)
  1015. list[i] = i;
  1016. /* Fisher-Yates shuffle */
  1017. for (i = count - 1; i > 0; i--) {
  1018. rand = prandom_u32_state(state);
  1019. rand %= (i + 1);
  1020. swap(list[i], list[rand]);
  1021. }
  1022. }
  1023. /* Create a random sequence per cache */
  1024. int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
  1025. gfp_t gfp)
  1026. {
  1027. struct rnd_state state;
  1028. if (count < 2 || cachep->random_seq)
  1029. return 0;
  1030. cachep->random_seq = kcalloc(count, sizeof(unsigned int), gfp);
  1031. if (!cachep->random_seq)
  1032. return -ENOMEM;
  1033. /* Get best entropy at this stage of boot */
  1034. prandom_seed_state(&state, get_random_long());
  1035. freelist_randomize(&state, cachep->random_seq, count);
  1036. return 0;
  1037. }
  1038. /* Destroy the per-cache random freelist sequence */
  1039. void cache_random_seq_destroy(struct kmem_cache *cachep)
  1040. {
  1041. kfree(cachep->random_seq);
  1042. cachep->random_seq = NULL;
  1043. }
  1044. #endif /* CONFIG_SLAB_FREELIST_RANDOM */
  1045. #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
  1046. #ifdef CONFIG_SLAB
  1047. #define SLABINFO_RIGHTS (0600)
  1048. #else
  1049. #define SLABINFO_RIGHTS (0400)
  1050. #endif
  1051. static void print_slabinfo_header(struct seq_file *m)
  1052. {
  1053. /*
  1054. * Output format version, so at least we can change it
  1055. * without _too_ many complaints.
  1056. */
  1057. #ifdef CONFIG_DEBUG_SLAB
  1058. seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
  1059. #else
  1060. seq_puts(m, "slabinfo - version: 2.1\n");
  1061. #endif
  1062. seq_puts(m, "# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>");
  1063. seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
  1064. seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
  1065. #ifdef CONFIG_DEBUG_SLAB
  1066. seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
  1067. seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
  1068. #endif
  1069. seq_putc(m, '\n');
  1070. }
  1071. void *slab_start(struct seq_file *m, loff_t *pos)
  1072. {
  1073. mutex_lock(&slab_mutex);
  1074. return seq_list_start(&slab_root_caches, *pos);
  1075. }
  1076. void *slab_next(struct seq_file *m, void *p, loff_t *pos)
  1077. {
  1078. return seq_list_next(p, &slab_root_caches, pos);
  1079. }
  1080. void slab_stop(struct seq_file *m, void *p)
  1081. {
  1082. mutex_unlock(&slab_mutex);
  1083. }
  1084. static void
  1085. memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
  1086. {
  1087. struct kmem_cache *c;
  1088. struct slabinfo sinfo;
  1089. if (!is_root_cache(s))
  1090. return;
  1091. for_each_memcg_cache(c, s) {
  1092. memset(&sinfo, 0, sizeof(sinfo));
  1093. get_slabinfo(c, &sinfo);
  1094. info->active_slabs += sinfo.active_slabs;
  1095. info->num_slabs += sinfo.num_slabs;
  1096. info->shared_avail += sinfo.shared_avail;
  1097. info->active_objs += sinfo.active_objs;
  1098. info->num_objs += sinfo.num_objs;
  1099. }
  1100. }
  1101. static void cache_show(struct kmem_cache *s, struct seq_file *m)
  1102. {
  1103. struct slabinfo sinfo;
  1104. memset(&sinfo, 0, sizeof(sinfo));
  1105. get_slabinfo(s, &sinfo);
  1106. memcg_accumulate_slabinfo(s, &sinfo);
  1107. seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
  1108. cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
  1109. sinfo.objects_per_slab, (1 << sinfo.cache_order));
  1110. seq_printf(m, " : tunables %4u %4u %4u",
  1111. sinfo.limit, sinfo.batchcount, sinfo.shared);
  1112. seq_printf(m, " : slabdata %6lu %6lu %6lu",
  1113. sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
  1114. slabinfo_show_stats(m, s);
  1115. seq_putc(m, '\n');
  1116. }
  1117. static int slab_show(struct seq_file *m, void *p)
  1118. {
  1119. struct kmem_cache *s = list_entry(p, struct kmem_cache, root_caches_node);
  1120. if (p == slab_root_caches.next)
  1121. print_slabinfo_header(m);
  1122. cache_show(s, m);
  1123. return 0;
  1124. }
  1125. void dump_unreclaimable_slab(void)
  1126. {
  1127. struct kmem_cache *s, *s2;
  1128. struct slabinfo sinfo;
  1129. /*
  1130. * Here acquiring slab_mutex is risky since we don't prefer to get
  1131. * sleep in oom path. But, without mutex hold, it may introduce a
  1132. * risk of crash.
  1133. * Use mutex_trylock to protect the list traverse, dump nothing
  1134. * without acquiring the mutex.
  1135. */
  1136. if (!mutex_trylock(&slab_mutex)) {
  1137. pr_warn("excessive unreclaimable slab but cannot dump stats\n");
  1138. return;
  1139. }
  1140. pr_info("Unreclaimable slab info:\n");
  1141. pr_info("Name Used Total\n");
  1142. list_for_each_entry_safe(s, s2, &slab_caches, list) {
  1143. if (!is_root_cache(s) || (s->flags & SLAB_RECLAIM_ACCOUNT))
  1144. continue;
  1145. get_slabinfo(s, &sinfo);
  1146. if (sinfo.num_objs > 0)
  1147. pr_info("%-17s %10luKB %10luKB\n", cache_name(s),
  1148. (sinfo.active_objs * s->size) / 1024,
  1149. (sinfo.num_objs * s->size) / 1024);
  1150. }
  1151. mutex_unlock(&slab_mutex);
  1152. }
  1153. #if defined(CONFIG_MEMCG)
  1154. void *memcg_slab_start(struct seq_file *m, loff_t *pos)
  1155. {
  1156. struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
  1157. mutex_lock(&slab_mutex);
  1158. return seq_list_start(&memcg->kmem_caches, *pos);
  1159. }
  1160. void *memcg_slab_next(struct seq_file *m, void *p, loff_t *pos)
  1161. {
  1162. struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
  1163. return seq_list_next(p, &memcg->kmem_caches, pos);
  1164. }
  1165. void memcg_slab_stop(struct seq_file *m, void *p)
  1166. {
  1167. mutex_unlock(&slab_mutex);
  1168. }
  1169. int memcg_slab_show(struct seq_file *m, void *p)
  1170. {
  1171. struct kmem_cache *s = list_entry(p, struct kmem_cache,
  1172. memcg_params.kmem_caches_node);
  1173. struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
  1174. if (p == memcg->kmem_caches.next)
  1175. print_slabinfo_header(m);
  1176. cache_show(s, m);
  1177. return 0;
  1178. }
  1179. #endif
  1180. /*
  1181. * slabinfo_op - iterator that generates /proc/slabinfo
  1182. *
  1183. * Output layout:
  1184. * cache-name
  1185. * num-active-objs
  1186. * total-objs
  1187. * object size
  1188. * num-active-slabs
  1189. * total-slabs
  1190. * num-pages-per-slab
  1191. * + further values on SMP and with statistics enabled
  1192. */
  1193. static const struct seq_operations slabinfo_op = {
  1194. .start = slab_start,
  1195. .next = slab_next,
  1196. .stop = slab_stop,
  1197. .show = slab_show,
  1198. };
  1199. static int slabinfo_open(struct inode *inode, struct file *file)
  1200. {
  1201. return seq_open(file, &slabinfo_op);
  1202. }
  1203. static const struct file_operations proc_slabinfo_operations = {
  1204. .open = slabinfo_open,
  1205. .read = seq_read,
  1206. .write = slabinfo_write,
  1207. .llseek = seq_lseek,
  1208. .release = seq_release,
  1209. };
  1210. static int __init slab_proc_init(void)
  1211. {
  1212. proc_create("slabinfo", SLABINFO_RIGHTS, NULL,
  1213. &proc_slabinfo_operations);
  1214. return 0;
  1215. }
  1216. module_init(slab_proc_init);
  1217. #endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */
  1218. static __always_inline void *__do_krealloc(const void *p, size_t new_size,
  1219. gfp_t flags)
  1220. {
  1221. void *ret;
  1222. size_t ks = 0;
  1223. if (p)
  1224. ks = ksize(p);
  1225. if (ks >= new_size) {
  1226. kasan_krealloc((void *)p, new_size, flags);
  1227. return (void *)p;
  1228. }
  1229. ret = kmalloc_track_caller(new_size, flags);
  1230. if (ret && p)
  1231. memcpy(ret, p, ks);
  1232. return ret;
  1233. }
  1234. /**
  1235. * __krealloc - like krealloc() but don't free @p.
  1236. * @p: object to reallocate memory for.
  1237. * @new_size: how many bytes of memory are required.
  1238. * @flags: the type of memory to allocate.
  1239. *
  1240. * This function is like krealloc() except it never frees the originally
  1241. * allocated buffer. Use this if you don't want to free the buffer immediately
  1242. * like, for example, with RCU.
  1243. */
  1244. void *__krealloc(const void *p, size_t new_size, gfp_t flags)
  1245. {
  1246. if (unlikely(!new_size))
  1247. return ZERO_SIZE_PTR;
  1248. return __do_krealloc(p, new_size, flags);
  1249. }
  1250. EXPORT_SYMBOL(__krealloc);
  1251. /**
  1252. * krealloc - reallocate memory. The contents will remain unchanged.
  1253. * @p: object to reallocate memory for.
  1254. * @new_size: how many bytes of memory are required.
  1255. * @flags: the type of memory to allocate.
  1256. *
  1257. * The contents of the object pointed to are preserved up to the
  1258. * lesser of the new and old sizes. If @p is %NULL, krealloc()
  1259. * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a
  1260. * %NULL pointer, the object pointed to is freed.
  1261. */
  1262. void *krealloc(const void *p, size_t new_size, gfp_t flags)
  1263. {
  1264. void *ret;
  1265. if (unlikely(!new_size)) {
  1266. kfree(p);
  1267. return ZERO_SIZE_PTR;
  1268. }
  1269. ret = __do_krealloc(p, new_size, flags);
  1270. if (ret && p != ret)
  1271. kfree(p);
  1272. return ret;
  1273. }
  1274. EXPORT_SYMBOL(krealloc);
  1275. /**
  1276. * kzfree - like kfree but zero memory
  1277. * @p: object to free memory of
  1278. *
  1279. * The memory of the object @p points to is zeroed before freed.
  1280. * If @p is %NULL, kzfree() does nothing.
  1281. *
  1282. * Note: this function zeroes the whole allocated buffer which can be a good
  1283. * deal bigger than the requested buffer size passed to kmalloc(). So be
  1284. * careful when using this function in performance sensitive code.
  1285. */
  1286. void kzfree(const void *p)
  1287. {
  1288. size_t ks;
  1289. void *mem = (void *)p;
  1290. if (unlikely(ZERO_OR_NULL_PTR(mem)))
  1291. return;
  1292. ks = ksize(mem);
  1293. memset(mem, 0, ks);
  1294. kfree(mem);
  1295. }
  1296. EXPORT_SYMBOL(kzfree);
  1297. /* Tracepoints definitions. */
  1298. EXPORT_TRACEPOINT_SYMBOL(kmalloc);
  1299. EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
  1300. EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
  1301. EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
  1302. EXPORT_TRACEPOINT_SYMBOL(kfree);
  1303. EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
  1304. int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
  1305. {
  1306. if (__should_failslab(s, gfpflags))
  1307. return -ENOMEM;
  1308. return 0;
  1309. }
  1310. ALLOW_ERROR_INJECTION(should_failslab, ERRNO);