slab_common.c 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192
  1. /*
  2. * Slab allocator functions that are independent of the allocator strategy
  3. *
  4. * (C) 2012 Christoph Lameter <cl@linux.com>
  5. */
  6. #include <linux/slab.h>
  7. #include <linux/mm.h>
  8. #include <linux/poison.h>
  9. #include <linux/interrupt.h>
  10. #include <linux/memory.h>
  11. #include <linux/compiler.h>
  12. #include <linux/module.h>
  13. #include <linux/cpu.h>
  14. #include <linux/uaccess.h>
  15. #include <linux/seq_file.h>
  16. #include <linux/proc_fs.h>
  17. #include <asm/cacheflush.h>
  18. #include <asm/tlbflush.h>
  19. #include <asm/page.h>
  20. #include <linux/memcontrol.h>
  21. #define CREATE_TRACE_POINTS
  22. #include <trace/events/kmem.h>
  23. #include "slab.h"
  24. enum slab_state slab_state;
  25. LIST_HEAD(slab_caches);
  26. DEFINE_MUTEX(slab_mutex);
  27. struct kmem_cache *kmem_cache;
  28. /*
  29. * Set of flags that will prevent slab merging
  30. */
  31. #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
  32. SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
  33. SLAB_FAILSLAB)
  34. #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | SLAB_NOTRACK)
  35. /*
  36. * Merge control. If this is set then no merging of slab caches will occur.
  37. * (Could be removed. This was introduced to pacify the merge skeptics.)
  38. */
  39. static int slab_nomerge;
  40. static int __init setup_slab_nomerge(char *str)
  41. {
  42. slab_nomerge = 1;
  43. return 1;
  44. }
  45. #ifdef CONFIG_SLUB
  46. __setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
  47. #endif
  48. __setup("slab_nomerge", setup_slab_nomerge);
  49. /*
  50. * Determine the size of a slab object
  51. */
  52. unsigned int kmem_cache_size(struct kmem_cache *s)
  53. {
  54. return s->object_size;
  55. }
  56. EXPORT_SYMBOL(kmem_cache_size);
  57. #ifdef CONFIG_DEBUG_VM
  58. static int kmem_cache_sanity_check(const char *name, size_t size)
  59. {
  60. struct kmem_cache *s = NULL;
  61. if (!name || in_interrupt() || size < sizeof(void *) ||
  62. size > KMALLOC_MAX_SIZE) {
  63. pr_err("kmem_cache_create(%s) integrity check failed\n", name);
  64. return -EINVAL;
  65. }
  66. list_for_each_entry(s, &slab_caches, list) {
  67. char tmp;
  68. int res;
  69. /*
  70. * This happens when the module gets unloaded and doesn't
  71. * destroy its slab cache and no-one else reuses the vmalloc
  72. * area of the module. Print a warning.
  73. */
  74. res = probe_kernel_address(s->name, tmp);
  75. if (res) {
  76. pr_err("Slab cache with size %d has lost its name\n",
  77. s->object_size);
  78. continue;
  79. }
  80. }
  81. WARN_ON(strchr(name, ' ')); /* It confuses parsers */
  82. return 0;
  83. }
  84. #else
  85. static inline int kmem_cache_sanity_check(const char *name, size_t size)
  86. {
  87. return 0;
  88. }
  89. #endif
  90. #ifdef CONFIG_MEMCG_KMEM
  91. void slab_init_memcg_params(struct kmem_cache *s)
  92. {
  93. s->memcg_params.is_root_cache = true;
  94. INIT_LIST_HEAD(&s->memcg_params.list);
  95. RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
  96. }
  97. static int init_memcg_params(struct kmem_cache *s,
  98. struct mem_cgroup *memcg, struct kmem_cache *root_cache)
  99. {
  100. struct memcg_cache_array *arr;
  101. if (memcg) {
  102. s->memcg_params.is_root_cache = false;
  103. s->memcg_params.memcg = memcg;
  104. s->memcg_params.root_cache = root_cache;
  105. return 0;
  106. }
  107. slab_init_memcg_params(s);
  108. if (!memcg_nr_cache_ids)
  109. return 0;
  110. arr = kzalloc(sizeof(struct memcg_cache_array) +
  111. memcg_nr_cache_ids * sizeof(void *),
  112. GFP_KERNEL);
  113. if (!arr)
  114. return -ENOMEM;
  115. RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
  116. return 0;
  117. }
  118. static void destroy_memcg_params(struct kmem_cache *s)
  119. {
  120. if (is_root_cache(s))
  121. kfree(rcu_access_pointer(s->memcg_params.memcg_caches));
  122. }
  123. static int update_memcg_params(struct kmem_cache *s, int new_array_size)
  124. {
  125. struct memcg_cache_array *old, *new;
  126. if (!is_root_cache(s))
  127. return 0;
  128. new = kzalloc(sizeof(struct memcg_cache_array) +
  129. new_array_size * sizeof(void *), GFP_KERNEL);
  130. if (!new)
  131. return -ENOMEM;
  132. old = rcu_dereference_protected(s->memcg_params.memcg_caches,
  133. lockdep_is_held(&slab_mutex));
  134. if (old)
  135. memcpy(new->entries, old->entries,
  136. memcg_nr_cache_ids * sizeof(void *));
  137. rcu_assign_pointer(s->memcg_params.memcg_caches, new);
  138. if (old)
  139. kfree_rcu(old, rcu);
  140. return 0;
  141. }
  142. int memcg_update_all_caches(int num_memcgs)
  143. {
  144. struct kmem_cache *s;
  145. int ret = 0;
  146. mutex_lock(&slab_mutex);
  147. list_for_each_entry(s, &slab_caches, list) {
  148. ret = update_memcg_params(s, num_memcgs);
  149. /*
  150. * Instead of freeing the memory, we'll just leave the caches
  151. * up to this point in an updated state.
  152. */
  153. if (ret)
  154. break;
  155. }
  156. mutex_unlock(&slab_mutex);
  157. return ret;
  158. }
  159. #else
  160. static inline int init_memcg_params(struct kmem_cache *s,
  161. struct mem_cgroup *memcg, struct kmem_cache *root_cache)
  162. {
  163. return 0;
  164. }
  165. static inline void destroy_memcg_params(struct kmem_cache *s)
  166. {
  167. }
  168. #endif /* CONFIG_MEMCG_KMEM */
  169. /*
  170. * Find a mergeable slab cache
  171. */
  172. int slab_unmergeable(struct kmem_cache *s)
  173. {
  174. if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
  175. return 1;
  176. if (!is_root_cache(s))
  177. return 1;
  178. if (s->ctor)
  179. return 1;
  180. /*
  181. * We may have set a slab to be unmergeable during bootstrap.
  182. */
  183. if (s->refcount < 0)
  184. return 1;
  185. return 0;
  186. }
  187. struct kmem_cache *find_mergeable(size_t size, size_t align,
  188. unsigned long flags, const char *name, void (*ctor)(void *))
  189. {
  190. struct kmem_cache *s;
  191. if (slab_nomerge || (flags & SLAB_NEVER_MERGE))
  192. return NULL;
  193. if (ctor)
  194. return NULL;
  195. size = ALIGN(size, sizeof(void *));
  196. align = calculate_alignment(flags, align, size);
  197. size = ALIGN(size, align);
  198. flags = kmem_cache_flags(size, flags, name, NULL);
  199. list_for_each_entry_reverse(s, &slab_caches, list) {
  200. if (slab_unmergeable(s))
  201. continue;
  202. if (size > s->size)
  203. continue;
  204. if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
  205. continue;
  206. /*
  207. * Check if alignment is compatible.
  208. * Courtesy of Adrian Drzewiecki
  209. */
  210. if ((s->size & ~(align - 1)) != s->size)
  211. continue;
  212. if (s->size - size >= sizeof(void *))
  213. continue;
  214. if (IS_ENABLED(CONFIG_SLAB) && align &&
  215. (align > s->align || s->align % align))
  216. continue;
  217. return s;
  218. }
  219. return NULL;
  220. }
  221. /*
  222. * Figure out what the alignment of the objects will be given a set of
  223. * flags, a user specified alignment and the size of the objects.
  224. */
  225. unsigned long calculate_alignment(unsigned long flags,
  226. unsigned long align, unsigned long size)
  227. {
  228. /*
  229. * If the user wants hardware cache aligned objects then follow that
  230. * suggestion if the object is sufficiently large.
  231. *
  232. * The hardware cache alignment cannot override the specified
  233. * alignment though. If that is greater then use it.
  234. */
  235. if (flags & SLAB_HWCACHE_ALIGN) {
  236. unsigned long ralign = cache_line_size();
  237. while (size <= ralign / 2)
  238. ralign /= 2;
  239. align = max(align, ralign);
  240. }
  241. if (align < ARCH_SLAB_MINALIGN)
  242. align = ARCH_SLAB_MINALIGN;
  243. return ALIGN(align, sizeof(void *));
  244. }
  245. static struct kmem_cache *
  246. do_kmem_cache_create(const char *name, size_t object_size, size_t size,
  247. size_t align, unsigned long flags, void (*ctor)(void *),
  248. struct mem_cgroup *memcg, struct kmem_cache *root_cache)
  249. {
  250. struct kmem_cache *s;
  251. int err;
  252. err = -ENOMEM;
  253. s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
  254. if (!s)
  255. goto out;
  256. s->name = name;
  257. s->object_size = object_size;
  258. s->size = size;
  259. s->align = align;
  260. s->ctor = ctor;
  261. err = init_memcg_params(s, memcg, root_cache);
  262. if (err)
  263. goto out_free_cache;
  264. err = __kmem_cache_create(s, flags);
  265. if (err)
  266. goto out_free_cache;
  267. s->refcount = 1;
  268. list_add(&s->list, &slab_caches);
  269. out:
  270. if (err)
  271. return ERR_PTR(err);
  272. return s;
  273. out_free_cache:
  274. destroy_memcg_params(s);
  275. kmem_cache_free(kmem_cache, s);
  276. goto out;
  277. }
  278. /*
  279. * kmem_cache_create - Create a cache.
  280. * @name: A string which is used in /proc/slabinfo to identify this cache.
  281. * @size: The size of objects to be created in this cache.
  282. * @align: The required alignment for the objects.
  283. * @flags: SLAB flags
  284. * @ctor: A constructor for the objects.
  285. *
  286. * Returns a ptr to the cache on success, NULL on failure.
  287. * Cannot be called within a interrupt, but can be interrupted.
  288. * The @ctor is run when new pages are allocated by the cache.
  289. *
  290. * The flags are
  291. *
  292. * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
  293. * to catch references to uninitialised memory.
  294. *
  295. * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
  296. * for buffer overruns.
  297. *
  298. * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
  299. * cacheline. This can be beneficial if you're counting cycles as closely
  300. * as davem.
  301. */
  302. struct kmem_cache *
  303. kmem_cache_create(const char *name, size_t size, size_t align,
  304. unsigned long flags, void (*ctor)(void *))
  305. {
  306. struct kmem_cache *s;
  307. const char *cache_name;
  308. int err;
  309. get_online_cpus();
  310. get_online_mems();
  311. memcg_get_cache_ids();
  312. mutex_lock(&slab_mutex);
  313. err = kmem_cache_sanity_check(name, size);
  314. if (err) {
  315. s = NULL; /* suppress uninit var warning */
  316. goto out_unlock;
  317. }
  318. /*
  319. * Some allocators will constraint the set of valid flags to a subset
  320. * of all flags. We expect them to define CACHE_CREATE_MASK in this
  321. * case, and we'll just provide them with a sanitized version of the
  322. * passed flags.
  323. */
  324. flags &= CACHE_CREATE_MASK;
  325. s = __kmem_cache_alias(name, size, align, flags, ctor);
  326. if (s)
  327. goto out_unlock;
  328. cache_name = kstrdup_const(name, GFP_KERNEL);
  329. if (!cache_name) {
  330. err = -ENOMEM;
  331. goto out_unlock;
  332. }
  333. s = do_kmem_cache_create(cache_name, size, size,
  334. calculate_alignment(flags, align, size),
  335. flags, ctor, NULL, NULL);
  336. if (IS_ERR(s)) {
  337. err = PTR_ERR(s);
  338. kfree_const(cache_name);
  339. }
  340. out_unlock:
  341. mutex_unlock(&slab_mutex);
  342. memcg_put_cache_ids();
  343. put_online_mems();
  344. put_online_cpus();
  345. if (err) {
  346. if (flags & SLAB_PANIC)
  347. panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
  348. name, err);
  349. else {
  350. printk(KERN_WARNING "kmem_cache_create(%s) failed with error %d",
  351. name, err);
  352. dump_stack();
  353. }
  354. return NULL;
  355. }
  356. return s;
  357. }
  358. EXPORT_SYMBOL(kmem_cache_create);
  359. static int do_kmem_cache_shutdown(struct kmem_cache *s,
  360. struct list_head *release, bool *need_rcu_barrier)
  361. {
  362. if (__kmem_cache_shutdown(s) != 0) {
  363. printk(KERN_ERR "kmem_cache_destroy %s: "
  364. "Slab cache still has objects\n", s->name);
  365. dump_stack();
  366. return -EBUSY;
  367. }
  368. if (s->flags & SLAB_DESTROY_BY_RCU)
  369. *need_rcu_barrier = true;
  370. #ifdef CONFIG_MEMCG_KMEM
  371. if (!is_root_cache(s))
  372. list_del(&s->memcg_params.list);
  373. #endif
  374. list_move(&s->list, release);
  375. return 0;
  376. }
  377. static void do_kmem_cache_release(struct list_head *release,
  378. bool need_rcu_barrier)
  379. {
  380. struct kmem_cache *s, *s2;
  381. if (need_rcu_barrier)
  382. rcu_barrier();
  383. list_for_each_entry_safe(s, s2, release, list) {
  384. #ifdef SLAB_SUPPORTS_SYSFS
  385. sysfs_slab_remove(s);
  386. #else
  387. slab_kmem_cache_release(s);
  388. #endif
  389. }
  390. }
  391. #ifdef CONFIG_MEMCG_KMEM
  392. /*
  393. * memcg_create_kmem_cache - Create a cache for a memory cgroup.
  394. * @memcg: The memory cgroup the new cache is for.
  395. * @root_cache: The parent of the new cache.
  396. *
  397. * This function attempts to create a kmem cache that will serve allocation
  398. * requests going from @memcg to @root_cache. The new cache inherits properties
  399. * from its parent.
  400. */
  401. void memcg_create_kmem_cache(struct mem_cgroup *memcg,
  402. struct kmem_cache *root_cache)
  403. {
  404. static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
  405. struct cgroup_subsys_state *css = mem_cgroup_css(memcg);
  406. struct memcg_cache_array *arr;
  407. struct kmem_cache *s = NULL;
  408. char *cache_name;
  409. int idx;
  410. get_online_cpus();
  411. get_online_mems();
  412. mutex_lock(&slab_mutex);
  413. /*
  414. * The memory cgroup could have been deactivated while the cache
  415. * creation work was pending.
  416. */
  417. if (!memcg_kmem_is_active(memcg))
  418. goto out_unlock;
  419. idx = memcg_cache_id(memcg);
  420. arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
  421. lockdep_is_held(&slab_mutex));
  422. /*
  423. * Since per-memcg caches are created asynchronously on first
  424. * allocation (see memcg_kmem_get_cache()), several threads can try to
  425. * create the same cache, but only one of them may succeed.
  426. */
  427. if (arr->entries[idx])
  428. goto out_unlock;
  429. cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
  430. cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
  431. css->id, memcg_name_buf);
  432. if (!cache_name)
  433. goto out_unlock;
  434. s = do_kmem_cache_create(cache_name, root_cache->object_size,
  435. root_cache->size, root_cache->align,
  436. root_cache->flags, root_cache->ctor,
  437. memcg, root_cache);
  438. /*
  439. * If we could not create a memcg cache, do not complain, because
  440. * that's not critical at all as we can always proceed with the root
  441. * cache.
  442. */
  443. if (IS_ERR(s)) {
  444. kfree(cache_name);
  445. goto out_unlock;
  446. }
  447. list_add(&s->memcg_params.list, &root_cache->memcg_params.list);
  448. /*
  449. * Since readers won't lock (see cache_from_memcg_idx()), we need a
  450. * barrier here to ensure nobody will see the kmem_cache partially
  451. * initialized.
  452. */
  453. smp_wmb();
  454. arr->entries[idx] = s;
  455. out_unlock:
  456. mutex_unlock(&slab_mutex);
  457. put_online_mems();
  458. put_online_cpus();
  459. }
  460. void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
  461. {
  462. int idx;
  463. struct memcg_cache_array *arr;
  464. struct kmem_cache *s, *c;
  465. idx = memcg_cache_id(memcg);
  466. get_online_cpus();
  467. get_online_mems();
  468. mutex_lock(&slab_mutex);
  469. list_for_each_entry(s, &slab_caches, list) {
  470. if (!is_root_cache(s))
  471. continue;
  472. arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
  473. lockdep_is_held(&slab_mutex));
  474. c = arr->entries[idx];
  475. if (!c)
  476. continue;
  477. __kmem_cache_shrink(c, true);
  478. arr->entries[idx] = NULL;
  479. }
  480. mutex_unlock(&slab_mutex);
  481. put_online_mems();
  482. put_online_cpus();
  483. }
  484. void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
  485. {
  486. LIST_HEAD(release);
  487. bool need_rcu_barrier = false;
  488. struct kmem_cache *s, *s2;
  489. get_online_cpus();
  490. get_online_mems();
  491. mutex_lock(&slab_mutex);
  492. list_for_each_entry_safe(s, s2, &slab_caches, list) {
  493. if (is_root_cache(s) || s->memcg_params.memcg != memcg)
  494. continue;
  495. /*
  496. * The cgroup is about to be freed and therefore has no charges
  497. * left. Hence, all its caches must be empty by now.
  498. */
  499. BUG_ON(do_kmem_cache_shutdown(s, &release, &need_rcu_barrier));
  500. }
  501. mutex_unlock(&slab_mutex);
  502. put_online_mems();
  503. put_online_cpus();
  504. do_kmem_cache_release(&release, need_rcu_barrier);
  505. }
  506. #endif /* CONFIG_MEMCG_KMEM */
  507. void slab_kmem_cache_release(struct kmem_cache *s)
  508. {
  509. destroy_memcg_params(s);
  510. kfree_const(s->name);
  511. kmem_cache_free(kmem_cache, s);
  512. }
  513. void kmem_cache_destroy(struct kmem_cache *s)
  514. {
  515. struct kmem_cache *c, *c2;
  516. LIST_HEAD(release);
  517. bool need_rcu_barrier = false;
  518. bool busy = false;
  519. BUG_ON(!is_root_cache(s));
  520. get_online_cpus();
  521. get_online_mems();
  522. mutex_lock(&slab_mutex);
  523. s->refcount--;
  524. if (s->refcount)
  525. goto out_unlock;
  526. for_each_memcg_cache_safe(c, c2, s) {
  527. if (do_kmem_cache_shutdown(c, &release, &need_rcu_barrier))
  528. busy = true;
  529. }
  530. if (!busy)
  531. do_kmem_cache_shutdown(s, &release, &need_rcu_barrier);
  532. out_unlock:
  533. mutex_unlock(&slab_mutex);
  534. put_online_mems();
  535. put_online_cpus();
  536. do_kmem_cache_release(&release, need_rcu_barrier);
  537. }
  538. EXPORT_SYMBOL(kmem_cache_destroy);
  539. /**
  540. * kmem_cache_shrink - Shrink a cache.
  541. * @cachep: The cache to shrink.
  542. *
  543. * Releases as many slabs as possible for a cache.
  544. * To help debugging, a zero exit status indicates all slabs were released.
  545. */
  546. int kmem_cache_shrink(struct kmem_cache *cachep)
  547. {
  548. int ret;
  549. get_online_cpus();
  550. get_online_mems();
  551. ret = __kmem_cache_shrink(cachep, false);
  552. put_online_mems();
  553. put_online_cpus();
  554. return ret;
  555. }
  556. EXPORT_SYMBOL(kmem_cache_shrink);
  557. int slab_is_available(void)
  558. {
  559. return slab_state >= UP;
  560. }
  561. #ifndef CONFIG_SLOB
  562. /* Create a cache during boot when no slab services are available yet */
  563. void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
  564. unsigned long flags)
  565. {
  566. int err;
  567. s->name = name;
  568. s->size = s->object_size = size;
  569. s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
  570. slab_init_memcg_params(s);
  571. err = __kmem_cache_create(s, flags);
  572. if (err)
  573. panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n",
  574. name, size, err);
  575. s->refcount = -1; /* Exempt from merging for now */
  576. }
  577. struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
  578. unsigned long flags)
  579. {
  580. struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
  581. if (!s)
  582. panic("Out of memory when creating slab %s\n", name);
  583. create_boot_cache(s, name, size, flags);
  584. list_add(&s->list, &slab_caches);
  585. s->refcount = 1;
  586. return s;
  587. }
  588. struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
  589. EXPORT_SYMBOL(kmalloc_caches);
  590. #ifdef CONFIG_ZONE_DMA
  591. struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
  592. EXPORT_SYMBOL(kmalloc_dma_caches);
  593. #endif
  594. /*
  595. * Conversion table for small slabs sizes / 8 to the index in the
  596. * kmalloc array. This is necessary for slabs < 192 since we have non power
  597. * of two cache sizes there. The size of larger slabs can be determined using
  598. * fls.
  599. */
  600. static s8 size_index[24] = {
  601. 3, /* 8 */
  602. 4, /* 16 */
  603. 5, /* 24 */
  604. 5, /* 32 */
  605. 6, /* 40 */
  606. 6, /* 48 */
  607. 6, /* 56 */
  608. 6, /* 64 */
  609. 1, /* 72 */
  610. 1, /* 80 */
  611. 1, /* 88 */
  612. 1, /* 96 */
  613. 7, /* 104 */
  614. 7, /* 112 */
  615. 7, /* 120 */
  616. 7, /* 128 */
  617. 2, /* 136 */
  618. 2, /* 144 */
  619. 2, /* 152 */
  620. 2, /* 160 */
  621. 2, /* 168 */
  622. 2, /* 176 */
  623. 2, /* 184 */
  624. 2 /* 192 */
  625. };
  626. static inline int size_index_elem(size_t bytes)
  627. {
  628. return (bytes - 1) / 8;
  629. }
  630. /*
  631. * Find the kmem_cache structure that serves a given size of
  632. * allocation
  633. */
  634. struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
  635. {
  636. int index;
  637. if (unlikely(size > KMALLOC_MAX_SIZE)) {
  638. WARN_ON_ONCE(!(flags & __GFP_NOWARN));
  639. return NULL;
  640. }
  641. if (size <= 192) {
  642. if (!size)
  643. return ZERO_SIZE_PTR;
  644. index = size_index[size_index_elem(size)];
  645. } else
  646. index = fls(size - 1);
  647. #ifdef CONFIG_ZONE_DMA
  648. if (unlikely((flags & GFP_DMA)))
  649. return kmalloc_dma_caches[index];
  650. #endif
  651. return kmalloc_caches[index];
  652. }
  653. /*
  654. * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
  655. * kmalloc_index() supports up to 2^26=64MB, so the final entry of the table is
  656. * kmalloc-67108864.
  657. */
  658. static struct {
  659. const char *name;
  660. unsigned long size;
  661. } const kmalloc_info[] __initconst = {
  662. {NULL, 0}, {"kmalloc-96", 96},
  663. {"kmalloc-192", 192}, {"kmalloc-8", 8},
  664. {"kmalloc-16", 16}, {"kmalloc-32", 32},
  665. {"kmalloc-64", 64}, {"kmalloc-128", 128},
  666. {"kmalloc-256", 256}, {"kmalloc-512", 512},
  667. {"kmalloc-1024", 1024}, {"kmalloc-2048", 2048},
  668. {"kmalloc-4096", 4096}, {"kmalloc-8192", 8192},
  669. {"kmalloc-16384", 16384}, {"kmalloc-32768", 32768},
  670. {"kmalloc-65536", 65536}, {"kmalloc-131072", 131072},
  671. {"kmalloc-262144", 262144}, {"kmalloc-524288", 524288},
  672. {"kmalloc-1048576", 1048576}, {"kmalloc-2097152", 2097152},
  673. {"kmalloc-4194304", 4194304}, {"kmalloc-8388608", 8388608},
  674. {"kmalloc-16777216", 16777216}, {"kmalloc-33554432", 33554432},
  675. {"kmalloc-67108864", 67108864}
  676. };
  677. /*
  678. * Patch up the size_index table if we have strange large alignment
  679. * requirements for the kmalloc array. This is only the case for
  680. * MIPS it seems. The standard arches will not generate any code here.
  681. *
  682. * Largest permitted alignment is 256 bytes due to the way we
  683. * handle the index determination for the smaller caches.
  684. *
  685. * Make sure that nothing crazy happens if someone starts tinkering
  686. * around with ARCH_KMALLOC_MINALIGN
  687. */
  688. void __init setup_kmalloc_cache_index_table(void)
  689. {
  690. int i;
  691. BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
  692. (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
  693. for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
  694. int elem = size_index_elem(i);
  695. if (elem >= ARRAY_SIZE(size_index))
  696. break;
  697. size_index[elem] = KMALLOC_SHIFT_LOW;
  698. }
  699. if (KMALLOC_MIN_SIZE >= 64) {
  700. /*
  701. * The 96 byte size cache is not used if the alignment
  702. * is 64 byte.
  703. */
  704. for (i = 64 + 8; i <= 96; i += 8)
  705. size_index[size_index_elem(i)] = 7;
  706. }
  707. if (KMALLOC_MIN_SIZE >= 128) {
  708. /*
  709. * The 192 byte sized cache is not used if the alignment
  710. * is 128 byte. Redirect kmalloc to use the 256 byte cache
  711. * instead.
  712. */
  713. for (i = 128 + 8; i <= 192; i += 8)
  714. size_index[size_index_elem(i)] = 8;
  715. }
  716. }
  717. static void __init new_kmalloc_cache(int idx, unsigned long flags)
  718. {
  719. kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name,
  720. kmalloc_info[idx].size, flags);
  721. }
  722. /*
  723. * Create the kmalloc array. Some of the regular kmalloc arrays
  724. * may already have been created because they were needed to
  725. * enable allocations for slab creation.
  726. */
  727. void __init create_kmalloc_caches(unsigned long flags)
  728. {
  729. int i;
  730. for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
  731. if (!kmalloc_caches[i])
  732. new_kmalloc_cache(i, flags);
  733. /*
  734. * Caches that are not of the two-to-the-power-of size.
  735. * These have to be created immediately after the
  736. * earlier power of two caches
  737. */
  738. if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6)
  739. new_kmalloc_cache(1, flags);
  740. if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7)
  741. new_kmalloc_cache(2, flags);
  742. }
  743. /* Kmalloc array is now usable */
  744. slab_state = UP;
  745. #ifdef CONFIG_ZONE_DMA
  746. for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
  747. struct kmem_cache *s = kmalloc_caches[i];
  748. if (s) {
  749. int size = kmalloc_size(i);
  750. char *n = kasprintf(GFP_NOWAIT,
  751. "dma-kmalloc-%d", size);
  752. BUG_ON(!n);
  753. kmalloc_dma_caches[i] = create_kmalloc_cache(n,
  754. size, SLAB_CACHE_DMA | flags);
  755. }
  756. }
  757. #endif
  758. }
  759. #endif /* !CONFIG_SLOB */
  760. /*
  761. * To avoid unnecessary overhead, we pass through large allocation requests
  762. * directly to the page allocator. We use __GFP_COMP, because we will need to
  763. * know the allocation order to free the pages properly in kfree.
  764. */
  765. void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
  766. {
  767. void *ret;
  768. struct page *page;
  769. flags |= __GFP_COMP;
  770. page = alloc_kmem_pages(flags, order);
  771. ret = page ? page_address(page) : NULL;
  772. kmemleak_alloc(ret, size, 1, flags);
  773. kasan_kmalloc_large(ret, size);
  774. return ret;
  775. }
  776. EXPORT_SYMBOL(kmalloc_order);
  777. #ifdef CONFIG_TRACING
  778. void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
  779. {
  780. void *ret = kmalloc_order(size, flags, order);
  781. trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
  782. return ret;
  783. }
  784. EXPORT_SYMBOL(kmalloc_order_trace);
  785. #endif
  786. #ifdef CONFIG_SLABINFO
  787. #ifdef CONFIG_SLAB
  788. #define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR)
  789. #else
  790. #define SLABINFO_RIGHTS S_IRUSR
  791. #endif
  792. static void print_slabinfo_header(struct seq_file *m)
  793. {
  794. /*
  795. * Output format version, so at least we can change it
  796. * without _too_ many complaints.
  797. */
  798. #ifdef CONFIG_DEBUG_SLAB
  799. seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
  800. #else
  801. seq_puts(m, "slabinfo - version: 2.1\n");
  802. #endif
  803. seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
  804. "<objperslab> <pagesperslab>");
  805. seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
  806. seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
  807. #ifdef CONFIG_DEBUG_SLAB
  808. seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
  809. "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
  810. seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
  811. #endif
  812. seq_putc(m, '\n');
  813. }
  814. void *slab_start(struct seq_file *m, loff_t *pos)
  815. {
  816. mutex_lock(&slab_mutex);
  817. return seq_list_start(&slab_caches, *pos);
  818. }
  819. void *slab_next(struct seq_file *m, void *p, loff_t *pos)
  820. {
  821. return seq_list_next(p, &slab_caches, pos);
  822. }
  823. void slab_stop(struct seq_file *m, void *p)
  824. {
  825. mutex_unlock(&slab_mutex);
  826. }
  827. static void
  828. memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
  829. {
  830. struct kmem_cache *c;
  831. struct slabinfo sinfo;
  832. if (!is_root_cache(s))
  833. return;
  834. for_each_memcg_cache(c, s) {
  835. memset(&sinfo, 0, sizeof(sinfo));
  836. get_slabinfo(c, &sinfo);
  837. info->active_slabs += sinfo.active_slabs;
  838. info->num_slabs += sinfo.num_slabs;
  839. info->shared_avail += sinfo.shared_avail;
  840. info->active_objs += sinfo.active_objs;
  841. info->num_objs += sinfo.num_objs;
  842. }
  843. }
  844. static void cache_show(struct kmem_cache *s, struct seq_file *m)
  845. {
  846. struct slabinfo sinfo;
  847. memset(&sinfo, 0, sizeof(sinfo));
  848. get_slabinfo(s, &sinfo);
  849. memcg_accumulate_slabinfo(s, &sinfo);
  850. seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
  851. cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
  852. sinfo.objects_per_slab, (1 << sinfo.cache_order));
  853. seq_printf(m, " : tunables %4u %4u %4u",
  854. sinfo.limit, sinfo.batchcount, sinfo.shared);
  855. seq_printf(m, " : slabdata %6lu %6lu %6lu",
  856. sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
  857. slabinfo_show_stats(m, s);
  858. seq_putc(m, '\n');
  859. }
  860. static int slab_show(struct seq_file *m, void *p)
  861. {
  862. struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
  863. if (p == slab_caches.next)
  864. print_slabinfo_header(m);
  865. if (is_root_cache(s))
  866. cache_show(s, m);
  867. return 0;
  868. }
  869. #ifdef CONFIG_MEMCG_KMEM
  870. int memcg_slab_show(struct seq_file *m, void *p)
  871. {
  872. struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
  873. struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
  874. if (p == slab_caches.next)
  875. print_slabinfo_header(m);
  876. if (!is_root_cache(s) && s->memcg_params.memcg == memcg)
  877. cache_show(s, m);
  878. return 0;
  879. }
  880. #endif
  881. /*
  882. * slabinfo_op - iterator that generates /proc/slabinfo
  883. *
  884. * Output layout:
  885. * cache-name
  886. * num-active-objs
  887. * total-objs
  888. * object size
  889. * num-active-slabs
  890. * total-slabs
  891. * num-pages-per-slab
  892. * + further values on SMP and with statistics enabled
  893. */
  894. static const struct seq_operations slabinfo_op = {
  895. .start = slab_start,
  896. .next = slab_next,
  897. .stop = slab_stop,
  898. .show = slab_show,
  899. };
  900. static int slabinfo_open(struct inode *inode, struct file *file)
  901. {
  902. return seq_open(file, &slabinfo_op);
  903. }
  904. static const struct file_operations proc_slabinfo_operations = {
  905. .open = slabinfo_open,
  906. .read = seq_read,
  907. .write = slabinfo_write,
  908. .llseek = seq_lseek,
  909. .release = seq_release,
  910. };
  911. static int __init slab_proc_init(void)
  912. {
  913. proc_create("slabinfo", SLABINFO_RIGHTS, NULL,
  914. &proc_slabinfo_operations);
  915. return 0;
  916. }
  917. module_init(slab_proc_init);
  918. #endif /* CONFIG_SLABINFO */
  919. static __always_inline void *__do_krealloc(const void *p, size_t new_size,
  920. gfp_t flags)
  921. {
  922. void *ret;
  923. size_t ks = 0;
  924. if (p)
  925. ks = ksize(p);
  926. if (ks >= new_size) {
  927. kasan_krealloc((void *)p, new_size);
  928. return (void *)p;
  929. }
  930. ret = kmalloc_track_caller(new_size, flags);
  931. if (ret && p)
  932. memcpy(ret, p, ks);
  933. return ret;
  934. }
  935. /**
  936. * __krealloc - like krealloc() but don't free @p.
  937. * @p: object to reallocate memory for.
  938. * @new_size: how many bytes of memory are required.
  939. * @flags: the type of memory to allocate.
  940. *
  941. * This function is like krealloc() except it never frees the originally
  942. * allocated buffer. Use this if you don't want to free the buffer immediately
  943. * like, for example, with RCU.
  944. */
  945. void *__krealloc(const void *p, size_t new_size, gfp_t flags)
  946. {
  947. if (unlikely(!new_size))
  948. return ZERO_SIZE_PTR;
  949. return __do_krealloc(p, new_size, flags);
  950. }
  951. EXPORT_SYMBOL(__krealloc);
  952. /**
  953. * krealloc - reallocate memory. The contents will remain unchanged.
  954. * @p: object to reallocate memory for.
  955. * @new_size: how many bytes of memory are required.
  956. * @flags: the type of memory to allocate.
  957. *
  958. * The contents of the object pointed to are preserved up to the
  959. * lesser of the new and old sizes. If @p is %NULL, krealloc()
  960. * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a
  961. * %NULL pointer, the object pointed to is freed.
  962. */
  963. void *krealloc(const void *p, size_t new_size, gfp_t flags)
  964. {
  965. void *ret;
  966. if (unlikely(!new_size)) {
  967. kfree(p);
  968. return ZERO_SIZE_PTR;
  969. }
  970. ret = __do_krealloc(p, new_size, flags);
  971. if (ret && p != ret)
  972. kfree(p);
  973. return ret;
  974. }
  975. EXPORT_SYMBOL(krealloc);
  976. /**
  977. * kzfree - like kfree but zero memory
  978. * @p: object to free memory of
  979. *
  980. * The memory of the object @p points to is zeroed before freed.
  981. * If @p is %NULL, kzfree() does nothing.
  982. *
  983. * Note: this function zeroes the whole allocated buffer which can be a good
  984. * deal bigger than the requested buffer size passed to kmalloc(). So be
  985. * careful when using this function in performance sensitive code.
  986. */
  987. void kzfree(const void *p)
  988. {
  989. size_t ks;
  990. void *mem = (void *)p;
  991. if (unlikely(ZERO_OR_NULL_PTR(mem)))
  992. return;
  993. ks = ksize(mem);
  994. memset(mem, 0, ks);
  995. kfree(mem);
  996. }
  997. EXPORT_SYMBOL(kzfree);
  998. /* Tracepoints definitions. */
  999. EXPORT_TRACEPOINT_SYMBOL(kmalloc);
  1000. EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
  1001. EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
  1002. EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
  1003. EXPORT_TRACEPOINT_SYMBOL(kfree);
  1004. EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);