slab_common.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281
  1. /*
  2. * Slab allocator functions that are independent of the allocator strategy
  3. *
  4. * (C) 2012 Christoph Lameter <cl@linux.com>
  5. */
  6. #include <linux/slab.h>
  7. #include <linux/mm.h>
  8. #include <linux/poison.h>
  9. #include <linux/interrupt.h>
  10. #include <linux/memory.h>
  11. #include <linux/compiler.h>
  12. #include <linux/module.h>
  13. #include <linux/cpu.h>
  14. #include <linux/uaccess.h>
  15. #include <linux/seq_file.h>
  16. #include <linux/proc_fs.h>
  17. #include <asm/cacheflush.h>
  18. #include <asm/tlbflush.h>
  19. #include <asm/page.h>
  20. #include <linux/memcontrol.h>
  21. #define CREATE_TRACE_POINTS
  22. #include <trace/events/kmem.h>
  23. #include "slab.h"
  24. enum slab_state slab_state;
  25. LIST_HEAD(slab_caches);
  26. DEFINE_MUTEX(slab_mutex);
  27. struct kmem_cache *kmem_cache;
  28. /*
  29. * Set of flags that will prevent slab merging
  30. */
  31. #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
  32. SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
  33. SLAB_FAILSLAB)
  34. #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
  35. SLAB_NOTRACK | SLAB_ACCOUNT)
  36. /*
  37. * Merge control. If this is set then no merging of slab caches will occur.
  38. * (Could be removed. This was introduced to pacify the merge skeptics.)
  39. */
  40. static int slab_nomerge;
  41. static int __init setup_slab_nomerge(char *str)
  42. {
  43. slab_nomerge = 1;
  44. return 1;
  45. }
  46. #ifdef CONFIG_SLUB
  47. __setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
  48. #endif
  49. __setup("slab_nomerge", setup_slab_nomerge);
  50. /*
  51. * Determine the size of a slab object
  52. */
  53. unsigned int kmem_cache_size(struct kmem_cache *s)
  54. {
  55. return s->object_size;
  56. }
  57. EXPORT_SYMBOL(kmem_cache_size);
  58. #ifdef CONFIG_DEBUG_VM
  59. static int kmem_cache_sanity_check(const char *name, size_t size)
  60. {
  61. struct kmem_cache *s = NULL;
  62. if (!name || in_interrupt() || size < sizeof(void *) ||
  63. size > KMALLOC_MAX_SIZE) {
  64. pr_err("kmem_cache_create(%s) integrity check failed\n", name);
  65. return -EINVAL;
  66. }
  67. list_for_each_entry(s, &slab_caches, list) {
  68. char tmp;
  69. int res;
  70. /*
  71. * This happens when the module gets unloaded and doesn't
  72. * destroy its slab cache and no-one else reuses the vmalloc
  73. * area of the module. Print a warning.
  74. */
  75. res = probe_kernel_address(s->name, tmp);
  76. if (res) {
  77. pr_err("Slab cache with size %d has lost its name\n",
  78. s->object_size);
  79. continue;
  80. }
  81. }
  82. WARN_ON(strchr(name, ' ')); /* It confuses parsers */
  83. return 0;
  84. }
  85. #else
  86. static inline int kmem_cache_sanity_check(const char *name, size_t size)
  87. {
  88. return 0;
  89. }
  90. #endif
  91. void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
  92. {
  93. size_t i;
  94. for (i = 0; i < nr; i++)
  95. kmem_cache_free(s, p[i]);
  96. }
  97. int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
  98. void **p)
  99. {
  100. size_t i;
  101. for (i = 0; i < nr; i++) {
  102. void *x = p[i] = kmem_cache_alloc(s, flags);
  103. if (!x) {
  104. __kmem_cache_free_bulk(s, i, p);
  105. return 0;
  106. }
  107. }
  108. return i;
  109. }
  110. #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
  111. void slab_init_memcg_params(struct kmem_cache *s)
  112. {
  113. s->memcg_params.is_root_cache = true;
  114. INIT_LIST_HEAD(&s->memcg_params.list);
  115. RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
  116. }
  117. static int init_memcg_params(struct kmem_cache *s,
  118. struct mem_cgroup *memcg, struct kmem_cache *root_cache)
  119. {
  120. struct memcg_cache_array *arr;
  121. if (memcg) {
  122. s->memcg_params.is_root_cache = false;
  123. s->memcg_params.memcg = memcg;
  124. s->memcg_params.root_cache = root_cache;
  125. return 0;
  126. }
  127. slab_init_memcg_params(s);
  128. if (!memcg_nr_cache_ids)
  129. return 0;
  130. arr = kzalloc(sizeof(struct memcg_cache_array) +
  131. memcg_nr_cache_ids * sizeof(void *),
  132. GFP_KERNEL);
  133. if (!arr)
  134. return -ENOMEM;
  135. RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
  136. return 0;
  137. }
  138. static void destroy_memcg_params(struct kmem_cache *s)
  139. {
  140. if (is_root_cache(s))
  141. kfree(rcu_access_pointer(s->memcg_params.memcg_caches));
  142. }
  143. static int update_memcg_params(struct kmem_cache *s, int new_array_size)
  144. {
  145. struct memcg_cache_array *old, *new;
  146. if (!is_root_cache(s))
  147. return 0;
  148. new = kzalloc(sizeof(struct memcg_cache_array) +
  149. new_array_size * sizeof(void *), GFP_KERNEL);
  150. if (!new)
  151. return -ENOMEM;
  152. old = rcu_dereference_protected(s->memcg_params.memcg_caches,
  153. lockdep_is_held(&slab_mutex));
  154. if (old)
  155. memcpy(new->entries, old->entries,
  156. memcg_nr_cache_ids * sizeof(void *));
  157. rcu_assign_pointer(s->memcg_params.memcg_caches, new);
  158. if (old)
  159. kfree_rcu(old, rcu);
  160. return 0;
  161. }
  162. int memcg_update_all_caches(int num_memcgs)
  163. {
  164. struct kmem_cache *s;
  165. int ret = 0;
  166. mutex_lock(&slab_mutex);
  167. list_for_each_entry(s, &slab_caches, list) {
  168. ret = update_memcg_params(s, num_memcgs);
  169. /*
  170. * Instead of freeing the memory, we'll just leave the caches
  171. * up to this point in an updated state.
  172. */
  173. if (ret)
  174. break;
  175. }
  176. mutex_unlock(&slab_mutex);
  177. return ret;
  178. }
  179. #else
  180. static inline int init_memcg_params(struct kmem_cache *s,
  181. struct mem_cgroup *memcg, struct kmem_cache *root_cache)
  182. {
  183. return 0;
  184. }
  185. static inline void destroy_memcg_params(struct kmem_cache *s)
  186. {
  187. }
  188. #endif /* CONFIG_MEMCG && !CONFIG_SLOB */
  189. /*
  190. * Find a mergeable slab cache
  191. */
  192. int slab_unmergeable(struct kmem_cache *s)
  193. {
  194. if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
  195. return 1;
  196. if (!is_root_cache(s))
  197. return 1;
  198. if (s->ctor)
  199. return 1;
  200. /*
  201. * We may have set a slab to be unmergeable during bootstrap.
  202. */
  203. if (s->refcount < 0)
  204. return 1;
  205. return 0;
  206. }
  207. struct kmem_cache *find_mergeable(size_t size, size_t align,
  208. unsigned long flags, const char *name, void (*ctor)(void *))
  209. {
  210. struct kmem_cache *s;
  211. if (slab_nomerge || (flags & SLAB_NEVER_MERGE))
  212. return NULL;
  213. if (ctor)
  214. return NULL;
  215. size = ALIGN(size, sizeof(void *));
  216. align = calculate_alignment(flags, align, size);
  217. size = ALIGN(size, align);
  218. flags = kmem_cache_flags(size, flags, name, NULL);
  219. list_for_each_entry_reverse(s, &slab_caches, list) {
  220. if (slab_unmergeable(s))
  221. continue;
  222. if (size > s->size)
  223. continue;
  224. if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
  225. continue;
  226. /*
  227. * Check if alignment is compatible.
  228. * Courtesy of Adrian Drzewiecki
  229. */
  230. if ((s->size & ~(align - 1)) != s->size)
  231. continue;
  232. if (s->size - size >= sizeof(void *))
  233. continue;
  234. if (IS_ENABLED(CONFIG_SLAB) && align &&
  235. (align > s->align || s->align % align))
  236. continue;
  237. return s;
  238. }
  239. return NULL;
  240. }
  241. /*
  242. * Figure out what the alignment of the objects will be given a set of
  243. * flags, a user specified alignment and the size of the objects.
  244. */
  245. unsigned long calculate_alignment(unsigned long flags,
  246. unsigned long align, unsigned long size)
  247. {
  248. /*
  249. * If the user wants hardware cache aligned objects then follow that
  250. * suggestion if the object is sufficiently large.
  251. *
  252. * The hardware cache alignment cannot override the specified
  253. * alignment though. If that is greater then use it.
  254. */
  255. if (flags & SLAB_HWCACHE_ALIGN) {
  256. unsigned long ralign = cache_line_size();
  257. while (size <= ralign / 2)
  258. ralign /= 2;
  259. align = max(align, ralign);
  260. }
  261. if (align < ARCH_SLAB_MINALIGN)
  262. align = ARCH_SLAB_MINALIGN;
  263. return ALIGN(align, sizeof(void *));
  264. }
  265. static struct kmem_cache *create_cache(const char *name,
  266. size_t object_size, size_t size, size_t align,
  267. unsigned long flags, void (*ctor)(void *),
  268. struct mem_cgroup *memcg, struct kmem_cache *root_cache)
  269. {
  270. struct kmem_cache *s;
  271. int err;
  272. err = -ENOMEM;
  273. s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
  274. if (!s)
  275. goto out;
  276. s->name = name;
  277. s->object_size = object_size;
  278. s->size = size;
  279. s->align = align;
  280. s->ctor = ctor;
  281. err = init_memcg_params(s, memcg, root_cache);
  282. if (err)
  283. goto out_free_cache;
  284. err = __kmem_cache_create(s, flags);
  285. if (err)
  286. goto out_free_cache;
  287. s->refcount = 1;
  288. list_add(&s->list, &slab_caches);
  289. out:
  290. if (err)
  291. return ERR_PTR(err);
  292. return s;
  293. out_free_cache:
  294. destroy_memcg_params(s);
  295. kmem_cache_free(kmem_cache, s);
  296. goto out;
  297. }
  298. /*
  299. * kmem_cache_create - Create a cache.
  300. * @name: A string which is used in /proc/slabinfo to identify this cache.
  301. * @size: The size of objects to be created in this cache.
  302. * @align: The required alignment for the objects.
  303. * @flags: SLAB flags
  304. * @ctor: A constructor for the objects.
  305. *
  306. * Returns a ptr to the cache on success, NULL on failure.
  307. * Cannot be called within a interrupt, but can be interrupted.
  308. * The @ctor is run when new pages are allocated by the cache.
  309. *
  310. * The flags are
  311. *
  312. * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
  313. * to catch references to uninitialised memory.
  314. *
  315. * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
  316. * for buffer overruns.
  317. *
  318. * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
  319. * cacheline. This can be beneficial if you're counting cycles as closely
  320. * as davem.
  321. */
  322. struct kmem_cache *
  323. kmem_cache_create(const char *name, size_t size, size_t align,
  324. unsigned long flags, void (*ctor)(void *))
  325. {
  326. struct kmem_cache *s = NULL;
  327. const char *cache_name;
  328. int err;
  329. get_online_cpus();
  330. get_online_mems();
  331. memcg_get_cache_ids();
  332. mutex_lock(&slab_mutex);
  333. err = kmem_cache_sanity_check(name, size);
  334. if (err) {
  335. goto out_unlock;
  336. }
  337. /*
  338. * Some allocators will constraint the set of valid flags to a subset
  339. * of all flags. We expect them to define CACHE_CREATE_MASK in this
  340. * case, and we'll just provide them with a sanitized version of the
  341. * passed flags.
  342. */
  343. flags &= CACHE_CREATE_MASK;
  344. s = __kmem_cache_alias(name, size, align, flags, ctor);
  345. if (s)
  346. goto out_unlock;
  347. cache_name = kstrdup_const(name, GFP_KERNEL);
  348. if (!cache_name) {
  349. err = -ENOMEM;
  350. goto out_unlock;
  351. }
  352. s = create_cache(cache_name, size, size,
  353. calculate_alignment(flags, align, size),
  354. flags, ctor, NULL, NULL);
  355. if (IS_ERR(s)) {
  356. err = PTR_ERR(s);
  357. kfree_const(cache_name);
  358. }
  359. out_unlock:
  360. mutex_unlock(&slab_mutex);
  361. memcg_put_cache_ids();
  362. put_online_mems();
  363. put_online_cpus();
  364. if (err) {
  365. if (flags & SLAB_PANIC)
  366. panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
  367. name, err);
  368. else {
  369. printk(KERN_WARNING "kmem_cache_create(%s) failed with error %d",
  370. name, err);
  371. dump_stack();
  372. }
  373. return NULL;
  374. }
  375. return s;
  376. }
  377. EXPORT_SYMBOL(kmem_cache_create);
  378. static int shutdown_cache(struct kmem_cache *s,
  379. struct list_head *release, bool *need_rcu_barrier)
  380. {
  381. if (__kmem_cache_shutdown(s) != 0)
  382. return -EBUSY;
  383. if (s->flags & SLAB_DESTROY_BY_RCU)
  384. *need_rcu_barrier = true;
  385. list_move(&s->list, release);
  386. return 0;
  387. }
  388. static void release_caches(struct list_head *release, bool need_rcu_barrier)
  389. {
  390. struct kmem_cache *s, *s2;
  391. if (need_rcu_barrier)
  392. rcu_barrier();
  393. list_for_each_entry_safe(s, s2, release, list) {
  394. #ifdef SLAB_SUPPORTS_SYSFS
  395. sysfs_slab_remove(s);
  396. #else
  397. slab_kmem_cache_release(s);
  398. #endif
  399. }
  400. }
  401. #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
  402. /*
  403. * memcg_create_kmem_cache - Create a cache for a memory cgroup.
  404. * @memcg: The memory cgroup the new cache is for.
  405. * @root_cache: The parent of the new cache.
  406. *
  407. * This function attempts to create a kmem cache that will serve allocation
  408. * requests going from @memcg to @root_cache. The new cache inherits properties
  409. * from its parent.
  410. */
  411. void memcg_create_kmem_cache(struct mem_cgroup *memcg,
  412. struct kmem_cache *root_cache)
  413. {
  414. static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
  415. struct cgroup_subsys_state *css = &memcg->css;
  416. struct memcg_cache_array *arr;
  417. struct kmem_cache *s = NULL;
  418. char *cache_name;
  419. int idx;
  420. get_online_cpus();
  421. get_online_mems();
  422. mutex_lock(&slab_mutex);
  423. /*
  424. * The memory cgroup could have been offlined while the cache
  425. * creation work was pending.
  426. */
  427. if (!memcg_kmem_online(memcg))
  428. goto out_unlock;
  429. idx = memcg_cache_id(memcg);
  430. arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
  431. lockdep_is_held(&slab_mutex));
  432. /*
  433. * Since per-memcg caches are created asynchronously on first
  434. * allocation (see memcg_kmem_get_cache()), several threads can try to
  435. * create the same cache, but only one of them may succeed.
  436. */
  437. if (arr->entries[idx])
  438. goto out_unlock;
  439. cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
  440. cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
  441. css->id, memcg_name_buf);
  442. if (!cache_name)
  443. goto out_unlock;
  444. s = create_cache(cache_name, root_cache->object_size,
  445. root_cache->size, root_cache->align,
  446. root_cache->flags, root_cache->ctor,
  447. memcg, root_cache);
  448. /*
  449. * If we could not create a memcg cache, do not complain, because
  450. * that's not critical at all as we can always proceed with the root
  451. * cache.
  452. */
  453. if (IS_ERR(s)) {
  454. kfree(cache_name);
  455. goto out_unlock;
  456. }
  457. list_add(&s->memcg_params.list, &root_cache->memcg_params.list);
  458. /*
  459. * Since readers won't lock (see cache_from_memcg_idx()), we need a
  460. * barrier here to ensure nobody will see the kmem_cache partially
  461. * initialized.
  462. */
  463. smp_wmb();
  464. arr->entries[idx] = s;
  465. out_unlock:
  466. mutex_unlock(&slab_mutex);
  467. put_online_mems();
  468. put_online_cpus();
  469. }
  470. void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
  471. {
  472. int idx;
  473. struct memcg_cache_array *arr;
  474. struct kmem_cache *s, *c;
  475. idx = memcg_cache_id(memcg);
  476. get_online_cpus();
  477. get_online_mems();
  478. mutex_lock(&slab_mutex);
  479. list_for_each_entry(s, &slab_caches, list) {
  480. if (!is_root_cache(s))
  481. continue;
  482. arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
  483. lockdep_is_held(&slab_mutex));
  484. c = arr->entries[idx];
  485. if (!c)
  486. continue;
  487. __kmem_cache_shrink(c, true);
  488. arr->entries[idx] = NULL;
  489. }
  490. mutex_unlock(&slab_mutex);
  491. put_online_mems();
  492. put_online_cpus();
  493. }
  494. static int __shutdown_memcg_cache(struct kmem_cache *s,
  495. struct list_head *release, bool *need_rcu_barrier)
  496. {
  497. BUG_ON(is_root_cache(s));
  498. if (shutdown_cache(s, release, need_rcu_barrier))
  499. return -EBUSY;
  500. list_del(&s->memcg_params.list);
  501. return 0;
  502. }
  503. void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
  504. {
  505. LIST_HEAD(release);
  506. bool need_rcu_barrier = false;
  507. struct kmem_cache *s, *s2;
  508. get_online_cpus();
  509. get_online_mems();
  510. mutex_lock(&slab_mutex);
  511. list_for_each_entry_safe(s, s2, &slab_caches, list) {
  512. if (is_root_cache(s) || s->memcg_params.memcg != memcg)
  513. continue;
  514. /*
  515. * The cgroup is about to be freed and therefore has no charges
  516. * left. Hence, all its caches must be empty by now.
  517. */
  518. BUG_ON(__shutdown_memcg_cache(s, &release, &need_rcu_barrier));
  519. }
  520. mutex_unlock(&slab_mutex);
  521. put_online_mems();
  522. put_online_cpus();
  523. release_caches(&release, need_rcu_barrier);
  524. }
  525. static int shutdown_memcg_caches(struct kmem_cache *s,
  526. struct list_head *release, bool *need_rcu_barrier)
  527. {
  528. struct memcg_cache_array *arr;
  529. struct kmem_cache *c, *c2;
  530. LIST_HEAD(busy);
  531. int i;
  532. BUG_ON(!is_root_cache(s));
  533. /*
  534. * First, shutdown active caches, i.e. caches that belong to online
  535. * memory cgroups.
  536. */
  537. arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
  538. lockdep_is_held(&slab_mutex));
  539. for_each_memcg_cache_index(i) {
  540. c = arr->entries[i];
  541. if (!c)
  542. continue;
  543. if (__shutdown_memcg_cache(c, release, need_rcu_barrier))
  544. /*
  545. * The cache still has objects. Move it to a temporary
  546. * list so as not to try to destroy it for a second
  547. * time while iterating over inactive caches below.
  548. */
  549. list_move(&c->memcg_params.list, &busy);
  550. else
  551. /*
  552. * The cache is empty and will be destroyed soon. Clear
  553. * the pointer to it in the memcg_caches array so that
  554. * it will never be accessed even if the root cache
  555. * stays alive.
  556. */
  557. arr->entries[i] = NULL;
  558. }
  559. /*
  560. * Second, shutdown all caches left from memory cgroups that are now
  561. * offline.
  562. */
  563. list_for_each_entry_safe(c, c2, &s->memcg_params.list,
  564. memcg_params.list)
  565. __shutdown_memcg_cache(c, release, need_rcu_barrier);
  566. list_splice(&busy, &s->memcg_params.list);
  567. /*
  568. * A cache being destroyed must be empty. In particular, this means
  569. * that all per memcg caches attached to it must be empty too.
  570. */
  571. if (!list_empty(&s->memcg_params.list))
  572. return -EBUSY;
  573. return 0;
  574. }
  575. #else
  576. static inline int shutdown_memcg_caches(struct kmem_cache *s,
  577. struct list_head *release, bool *need_rcu_barrier)
  578. {
  579. return 0;
  580. }
  581. #endif /* CONFIG_MEMCG && !CONFIG_SLOB */
  582. void slab_kmem_cache_release(struct kmem_cache *s)
  583. {
  584. destroy_memcg_params(s);
  585. kfree_const(s->name);
  586. kmem_cache_free(kmem_cache, s);
  587. }
  588. void kmem_cache_destroy(struct kmem_cache *s)
  589. {
  590. LIST_HEAD(release);
  591. bool need_rcu_barrier = false;
  592. int err;
  593. if (unlikely(!s))
  594. return;
  595. get_online_cpus();
  596. get_online_mems();
  597. mutex_lock(&slab_mutex);
  598. s->refcount--;
  599. if (s->refcount)
  600. goto out_unlock;
  601. err = shutdown_memcg_caches(s, &release, &need_rcu_barrier);
  602. if (!err)
  603. err = shutdown_cache(s, &release, &need_rcu_barrier);
  604. if (err) {
  605. pr_err("kmem_cache_destroy %s: "
  606. "Slab cache still has objects\n", s->name);
  607. dump_stack();
  608. }
  609. out_unlock:
  610. mutex_unlock(&slab_mutex);
  611. put_online_mems();
  612. put_online_cpus();
  613. release_caches(&release, need_rcu_barrier);
  614. }
  615. EXPORT_SYMBOL(kmem_cache_destroy);
  616. /**
  617. * kmem_cache_shrink - Shrink a cache.
  618. * @cachep: The cache to shrink.
  619. *
  620. * Releases as many slabs as possible for a cache.
  621. * To help debugging, a zero exit status indicates all slabs were released.
  622. */
  623. int kmem_cache_shrink(struct kmem_cache *cachep)
  624. {
  625. int ret;
  626. get_online_cpus();
  627. get_online_mems();
  628. ret = __kmem_cache_shrink(cachep, false);
  629. put_online_mems();
  630. put_online_cpus();
  631. return ret;
  632. }
  633. EXPORT_SYMBOL(kmem_cache_shrink);
  634. bool slab_is_available(void)
  635. {
  636. return slab_state >= UP;
  637. }
  638. #ifndef CONFIG_SLOB
  639. /* Create a cache during boot when no slab services are available yet */
  640. void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
  641. unsigned long flags)
  642. {
  643. int err;
  644. s->name = name;
  645. s->size = s->object_size = size;
  646. s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
  647. slab_init_memcg_params(s);
  648. err = __kmem_cache_create(s, flags);
  649. if (err)
  650. panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n",
  651. name, size, err);
  652. s->refcount = -1; /* Exempt from merging for now */
  653. }
  654. struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
  655. unsigned long flags)
  656. {
  657. struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
  658. if (!s)
  659. panic("Out of memory when creating slab %s\n", name);
  660. create_boot_cache(s, name, size, flags);
  661. list_add(&s->list, &slab_caches);
  662. s->refcount = 1;
  663. return s;
  664. }
  665. struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
  666. EXPORT_SYMBOL(kmalloc_caches);
  667. #ifdef CONFIG_ZONE_DMA
  668. struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
  669. EXPORT_SYMBOL(kmalloc_dma_caches);
  670. #endif
  671. /*
  672. * Conversion table for small slabs sizes / 8 to the index in the
  673. * kmalloc array. This is necessary for slabs < 192 since we have non power
  674. * of two cache sizes there. The size of larger slabs can be determined using
  675. * fls.
  676. */
  677. static s8 size_index[24] = {
  678. 3, /* 8 */
  679. 4, /* 16 */
  680. 5, /* 24 */
  681. 5, /* 32 */
  682. 6, /* 40 */
  683. 6, /* 48 */
  684. 6, /* 56 */
  685. 6, /* 64 */
  686. 1, /* 72 */
  687. 1, /* 80 */
  688. 1, /* 88 */
  689. 1, /* 96 */
  690. 7, /* 104 */
  691. 7, /* 112 */
  692. 7, /* 120 */
  693. 7, /* 128 */
  694. 2, /* 136 */
  695. 2, /* 144 */
  696. 2, /* 152 */
  697. 2, /* 160 */
  698. 2, /* 168 */
  699. 2, /* 176 */
  700. 2, /* 184 */
  701. 2 /* 192 */
  702. };
  703. static inline int size_index_elem(size_t bytes)
  704. {
  705. return (bytes - 1) / 8;
  706. }
  707. /*
  708. * Find the kmem_cache structure that serves a given size of
  709. * allocation
  710. */
  711. struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
  712. {
  713. int index;
  714. if (unlikely(size > KMALLOC_MAX_SIZE)) {
  715. WARN_ON_ONCE(!(flags & __GFP_NOWARN));
  716. return NULL;
  717. }
  718. if (size <= 192) {
  719. if (!size)
  720. return ZERO_SIZE_PTR;
  721. index = size_index[size_index_elem(size)];
  722. } else
  723. index = fls(size - 1);
  724. #ifdef CONFIG_ZONE_DMA
  725. if (unlikely((flags & GFP_DMA)))
  726. return kmalloc_dma_caches[index];
  727. #endif
  728. return kmalloc_caches[index];
  729. }
  730. /*
  731. * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
  732. * kmalloc_index() supports up to 2^26=64MB, so the final entry of the table is
  733. * kmalloc-67108864.
  734. */
  735. static struct {
  736. const char *name;
  737. unsigned long size;
  738. } const kmalloc_info[] __initconst = {
  739. {NULL, 0}, {"kmalloc-96", 96},
  740. {"kmalloc-192", 192}, {"kmalloc-8", 8},
  741. {"kmalloc-16", 16}, {"kmalloc-32", 32},
  742. {"kmalloc-64", 64}, {"kmalloc-128", 128},
  743. {"kmalloc-256", 256}, {"kmalloc-512", 512},
  744. {"kmalloc-1024", 1024}, {"kmalloc-2048", 2048},
  745. {"kmalloc-4096", 4096}, {"kmalloc-8192", 8192},
  746. {"kmalloc-16384", 16384}, {"kmalloc-32768", 32768},
  747. {"kmalloc-65536", 65536}, {"kmalloc-131072", 131072},
  748. {"kmalloc-262144", 262144}, {"kmalloc-524288", 524288},
  749. {"kmalloc-1048576", 1048576}, {"kmalloc-2097152", 2097152},
  750. {"kmalloc-4194304", 4194304}, {"kmalloc-8388608", 8388608},
  751. {"kmalloc-16777216", 16777216}, {"kmalloc-33554432", 33554432},
  752. {"kmalloc-67108864", 67108864}
  753. };
  754. /*
  755. * Patch up the size_index table if we have strange large alignment
  756. * requirements for the kmalloc array. This is only the case for
  757. * MIPS it seems. The standard arches will not generate any code here.
  758. *
  759. * Largest permitted alignment is 256 bytes due to the way we
  760. * handle the index determination for the smaller caches.
  761. *
  762. * Make sure that nothing crazy happens if someone starts tinkering
  763. * around with ARCH_KMALLOC_MINALIGN
  764. */
  765. void __init setup_kmalloc_cache_index_table(void)
  766. {
  767. int i;
  768. BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
  769. (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
  770. for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
  771. int elem = size_index_elem(i);
  772. if (elem >= ARRAY_SIZE(size_index))
  773. break;
  774. size_index[elem] = KMALLOC_SHIFT_LOW;
  775. }
  776. if (KMALLOC_MIN_SIZE >= 64) {
  777. /*
  778. * The 96 byte size cache is not used if the alignment
  779. * is 64 byte.
  780. */
  781. for (i = 64 + 8; i <= 96; i += 8)
  782. size_index[size_index_elem(i)] = 7;
  783. }
  784. if (KMALLOC_MIN_SIZE >= 128) {
  785. /*
  786. * The 192 byte sized cache is not used if the alignment
  787. * is 128 byte. Redirect kmalloc to use the 256 byte cache
  788. * instead.
  789. */
  790. for (i = 128 + 8; i <= 192; i += 8)
  791. size_index[size_index_elem(i)] = 8;
  792. }
  793. }
  794. static void __init new_kmalloc_cache(int idx, unsigned long flags)
  795. {
  796. kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name,
  797. kmalloc_info[idx].size, flags);
  798. }
  799. /*
  800. * Create the kmalloc array. Some of the regular kmalloc arrays
  801. * may already have been created because they were needed to
  802. * enable allocations for slab creation.
  803. */
  804. void __init create_kmalloc_caches(unsigned long flags)
  805. {
  806. int i;
  807. for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
  808. if (!kmalloc_caches[i])
  809. new_kmalloc_cache(i, flags);
  810. /*
  811. * Caches that are not of the two-to-the-power-of size.
  812. * These have to be created immediately after the
  813. * earlier power of two caches
  814. */
  815. if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6)
  816. new_kmalloc_cache(1, flags);
  817. if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7)
  818. new_kmalloc_cache(2, flags);
  819. }
  820. /* Kmalloc array is now usable */
  821. slab_state = UP;
  822. #ifdef CONFIG_ZONE_DMA
  823. for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
  824. struct kmem_cache *s = kmalloc_caches[i];
  825. if (s) {
  826. int size = kmalloc_size(i);
  827. char *n = kasprintf(GFP_NOWAIT,
  828. "dma-kmalloc-%d", size);
  829. BUG_ON(!n);
  830. kmalloc_dma_caches[i] = create_kmalloc_cache(n,
  831. size, SLAB_CACHE_DMA | flags);
  832. }
  833. }
  834. #endif
  835. }
  836. #endif /* !CONFIG_SLOB */
  837. /*
  838. * To avoid unnecessary overhead, we pass through large allocation requests
  839. * directly to the page allocator. We use __GFP_COMP, because we will need to
  840. * know the allocation order to free the pages properly in kfree.
  841. */
  842. void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
  843. {
  844. void *ret;
  845. struct page *page;
  846. flags |= __GFP_COMP;
  847. page = alloc_kmem_pages(flags, order);
  848. ret = page ? page_address(page) : NULL;
  849. kmemleak_alloc(ret, size, 1, flags);
  850. kasan_kmalloc_large(ret, size);
  851. return ret;
  852. }
  853. EXPORT_SYMBOL(kmalloc_order);
  854. #ifdef CONFIG_TRACING
  855. void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
  856. {
  857. void *ret = kmalloc_order(size, flags, order);
  858. trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
  859. return ret;
  860. }
  861. EXPORT_SYMBOL(kmalloc_order_trace);
  862. #endif
  863. #ifdef CONFIG_SLABINFO
  864. #ifdef CONFIG_SLAB
  865. #define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR)
  866. #else
  867. #define SLABINFO_RIGHTS S_IRUSR
  868. #endif
  869. static void print_slabinfo_header(struct seq_file *m)
  870. {
  871. /*
  872. * Output format version, so at least we can change it
  873. * without _too_ many complaints.
  874. */
  875. #ifdef CONFIG_DEBUG_SLAB
  876. seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
  877. #else
  878. seq_puts(m, "slabinfo - version: 2.1\n");
  879. #endif
  880. seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
  881. "<objperslab> <pagesperslab>");
  882. seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
  883. seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
  884. #ifdef CONFIG_DEBUG_SLAB
  885. seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
  886. "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
  887. seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
  888. #endif
  889. seq_putc(m, '\n');
  890. }
  891. void *slab_start(struct seq_file *m, loff_t *pos)
  892. {
  893. mutex_lock(&slab_mutex);
  894. return seq_list_start(&slab_caches, *pos);
  895. }
  896. void *slab_next(struct seq_file *m, void *p, loff_t *pos)
  897. {
  898. return seq_list_next(p, &slab_caches, pos);
  899. }
  900. void slab_stop(struct seq_file *m, void *p)
  901. {
  902. mutex_unlock(&slab_mutex);
  903. }
  904. static void
  905. memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
  906. {
  907. struct kmem_cache *c;
  908. struct slabinfo sinfo;
  909. if (!is_root_cache(s))
  910. return;
  911. for_each_memcg_cache(c, s) {
  912. memset(&sinfo, 0, sizeof(sinfo));
  913. get_slabinfo(c, &sinfo);
  914. info->active_slabs += sinfo.active_slabs;
  915. info->num_slabs += sinfo.num_slabs;
  916. info->shared_avail += sinfo.shared_avail;
  917. info->active_objs += sinfo.active_objs;
  918. info->num_objs += sinfo.num_objs;
  919. }
  920. }
  921. static void cache_show(struct kmem_cache *s, struct seq_file *m)
  922. {
  923. struct slabinfo sinfo;
  924. memset(&sinfo, 0, sizeof(sinfo));
  925. get_slabinfo(s, &sinfo);
  926. memcg_accumulate_slabinfo(s, &sinfo);
  927. seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
  928. cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
  929. sinfo.objects_per_slab, (1 << sinfo.cache_order));
  930. seq_printf(m, " : tunables %4u %4u %4u",
  931. sinfo.limit, sinfo.batchcount, sinfo.shared);
  932. seq_printf(m, " : slabdata %6lu %6lu %6lu",
  933. sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
  934. slabinfo_show_stats(m, s);
  935. seq_putc(m, '\n');
  936. }
  937. static int slab_show(struct seq_file *m, void *p)
  938. {
  939. struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
  940. if (p == slab_caches.next)
  941. print_slabinfo_header(m);
  942. if (is_root_cache(s))
  943. cache_show(s, m);
  944. return 0;
  945. }
  946. #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
  947. int memcg_slab_show(struct seq_file *m, void *p)
  948. {
  949. struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
  950. struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
  951. if (p == slab_caches.next)
  952. print_slabinfo_header(m);
  953. if (!is_root_cache(s) && s->memcg_params.memcg == memcg)
  954. cache_show(s, m);
  955. return 0;
  956. }
  957. #endif
  958. /*
  959. * slabinfo_op - iterator that generates /proc/slabinfo
  960. *
  961. * Output layout:
  962. * cache-name
  963. * num-active-objs
  964. * total-objs
  965. * object size
  966. * num-active-slabs
  967. * total-slabs
  968. * num-pages-per-slab
  969. * + further values on SMP and with statistics enabled
  970. */
  971. static const struct seq_operations slabinfo_op = {
  972. .start = slab_start,
  973. .next = slab_next,
  974. .stop = slab_stop,
  975. .show = slab_show,
  976. };
  977. static int slabinfo_open(struct inode *inode, struct file *file)
  978. {
  979. return seq_open(file, &slabinfo_op);
  980. }
  981. static const struct file_operations proc_slabinfo_operations = {
  982. .open = slabinfo_open,
  983. .read = seq_read,
  984. .write = slabinfo_write,
  985. .llseek = seq_lseek,
  986. .release = seq_release,
  987. };
  988. static int __init slab_proc_init(void)
  989. {
  990. proc_create("slabinfo", SLABINFO_RIGHTS, NULL,
  991. &proc_slabinfo_operations);
  992. return 0;
  993. }
  994. module_init(slab_proc_init);
  995. #endif /* CONFIG_SLABINFO */
  996. static __always_inline void *__do_krealloc(const void *p, size_t new_size,
  997. gfp_t flags)
  998. {
  999. void *ret;
  1000. size_t ks = 0;
  1001. if (p)
  1002. ks = ksize(p);
  1003. if (ks >= new_size) {
  1004. kasan_krealloc((void *)p, new_size);
  1005. return (void *)p;
  1006. }
  1007. ret = kmalloc_track_caller(new_size, flags);
  1008. if (ret && p)
  1009. memcpy(ret, p, ks);
  1010. return ret;
  1011. }
  1012. /**
  1013. * __krealloc - like krealloc() but don't free @p.
  1014. * @p: object to reallocate memory for.
  1015. * @new_size: how many bytes of memory are required.
  1016. * @flags: the type of memory to allocate.
  1017. *
  1018. * This function is like krealloc() except it never frees the originally
  1019. * allocated buffer. Use this if you don't want to free the buffer immediately
  1020. * like, for example, with RCU.
  1021. */
  1022. void *__krealloc(const void *p, size_t new_size, gfp_t flags)
  1023. {
  1024. if (unlikely(!new_size))
  1025. return ZERO_SIZE_PTR;
  1026. return __do_krealloc(p, new_size, flags);
  1027. }
  1028. EXPORT_SYMBOL(__krealloc);
  1029. /**
  1030. * krealloc - reallocate memory. The contents will remain unchanged.
  1031. * @p: object to reallocate memory for.
  1032. * @new_size: how many bytes of memory are required.
  1033. * @flags: the type of memory to allocate.
  1034. *
  1035. * The contents of the object pointed to are preserved up to the
  1036. * lesser of the new and old sizes. If @p is %NULL, krealloc()
  1037. * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a
  1038. * %NULL pointer, the object pointed to is freed.
  1039. */
  1040. void *krealloc(const void *p, size_t new_size, gfp_t flags)
  1041. {
  1042. void *ret;
  1043. if (unlikely(!new_size)) {
  1044. kfree(p);
  1045. return ZERO_SIZE_PTR;
  1046. }
  1047. ret = __do_krealloc(p, new_size, flags);
  1048. if (ret && p != ret)
  1049. kfree(p);
  1050. return ret;
  1051. }
  1052. EXPORT_SYMBOL(krealloc);
  1053. /**
  1054. * kzfree - like kfree but zero memory
  1055. * @p: object to free memory of
  1056. *
  1057. * The memory of the object @p points to is zeroed before freed.
  1058. * If @p is %NULL, kzfree() does nothing.
  1059. *
  1060. * Note: this function zeroes the whole allocated buffer which can be a good
  1061. * deal bigger than the requested buffer size passed to kmalloc(). So be
  1062. * careful when using this function in performance sensitive code.
  1063. */
  1064. void kzfree(const void *p)
  1065. {
  1066. size_t ks;
  1067. void *mem = (void *)p;
  1068. if (unlikely(ZERO_OR_NULL_PTR(mem)))
  1069. return;
  1070. ks = ksize(mem);
  1071. memset(mem, 0, ks);
  1072. kfree(mem);
  1073. }
  1074. EXPORT_SYMBOL(kzfree);
  1075. /* Tracepoints definitions. */
  1076. EXPORT_TRACEPOINT_SYMBOL(kmalloc);
  1077. EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
  1078. EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
  1079. EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
  1080. EXPORT_TRACEPOINT_SYMBOL(kfree);
  1081. EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);