|
@@ -75,6 +75,15 @@
|
|
|
*
|
|
|
* At present, each engine can be growing a cache. This should be blocked.
|
|
|
*
|
|
|
+ * 15 March 2005. NUMA slab allocator.
|
|
|
+ * Shai Fultheim <shai@scalex86.org>.
|
|
|
+ * Shobhit Dayal <shobhit@calsoftinc.com>
|
|
|
+ * Alok N Kataria <alokk@calsoftinc.com>
|
|
|
+ * Christoph Lameter <christoph@lameter.com>
|
|
|
+ *
|
|
|
+ * Modified the slab allocator to be node aware on NUMA systems.
|
|
|
+ * Each node has its own list of partial, free and full slabs.
|
|
|
+ * All object allocations for a node occur from node specific slab lists.
|
|
|
*/
|
|
|
|
|
|
#include <linux/config.h>
|
|
@@ -93,6 +102,7 @@
|
|
|
#include <linux/module.h>
|
|
|
#include <linux/rcupdate.h>
|
|
|
#include <linux/string.h>
|
|
|
+#include <linux/nodemask.h>
|
|
|
|
|
|
#include <asm/uaccess.h>
|
|
|
#include <asm/cacheflush.h>
|
|
@@ -212,6 +222,7 @@ struct slab {
|
|
|
void *s_mem; /* including colour offset */
|
|
|
unsigned int inuse; /* num of objs active in slab */
|
|
|
kmem_bufctl_t free;
|
|
|
+ unsigned short nodeid;
|
|
|
};
|
|
|
|
|
|
/*
|
|
@@ -239,7 +250,6 @@ struct slab_rcu {
|
|
|
/*
|
|
|
* struct array_cache
|
|
|
*
|
|
|
- * Per cpu structures
|
|
|
* Purpose:
|
|
|
* - LIFO ordering, to hand out cache-warm objects from _alloc
|
|
|
* - reduce the number of linked list operations
|
|
@@ -254,6 +264,13 @@ struct array_cache {
|
|
|
unsigned int limit;
|
|
|
unsigned int batchcount;
|
|
|
unsigned int touched;
|
|
|
+ spinlock_t lock;
|
|
|
+ void *entry[0]; /*
|
|
|
+ * Must have this definition in here for the proper
|
|
|
+ * alignment of array_cache. Also simplifies accessing
|
|
|
+ * the entries.
|
|
|
+ * [0] is for gcc 2.95. It should really be [].
|
|
|
+ */
|
|
|
};
|
|
|
|
|
|
/* bootstrap: The caches do not work without cpuarrays anymore,
|
|
@@ -266,34 +283,83 @@ struct arraycache_init {
|
|
|
};
|
|
|
|
|
|
/*
|
|
|
- * The slab lists of all objects.
|
|
|
- * Hopefully reduce the internal fragmentation
|
|
|
- * NUMA: The spinlock could be moved from the kmem_cache_t
|
|
|
- * into this structure, too. Figure out what causes
|
|
|
- * fewer cross-node spinlock operations.
|
|
|
+ * The slab lists for all objects.
|
|
|
*/
|
|
|
struct kmem_list3 {
|
|
|
struct list_head slabs_partial; /* partial list first, better asm code */
|
|
|
struct list_head slabs_full;
|
|
|
struct list_head slabs_free;
|
|
|
unsigned long free_objects;
|
|
|
- int free_touched;
|
|
|
unsigned long next_reap;
|
|
|
- struct array_cache *shared;
|
|
|
+ int free_touched;
|
|
|
+ unsigned int free_limit;
|
|
|
+ spinlock_t list_lock;
|
|
|
+ struct array_cache *shared; /* shared per node */
|
|
|
+ struct array_cache **alien; /* on other nodes */
|
|
|
};
|
|
|
|
|
|
-#define LIST3_INIT(parent) \
|
|
|
- { \
|
|
|
- .slabs_full = LIST_HEAD_INIT(parent.slabs_full), \
|
|
|
- .slabs_partial = LIST_HEAD_INIT(parent.slabs_partial), \
|
|
|
- .slabs_free = LIST_HEAD_INIT(parent.slabs_free) \
|
|
|
+/*
|
|
|
+ * Need this for bootstrapping a per node allocator.
|
|
|
+ */
|
|
|
+#define NUM_INIT_LISTS (2 * MAX_NUMNODES + 1)
|
|
|
+struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
|
|
|
+#define CACHE_CACHE 0
|
|
|
+#define SIZE_AC 1
|
|
|
+#define SIZE_L3 (1 + MAX_NUMNODES)
|
|
|
+
|
|
|
+/*
|
|
|
+ * This function may be completely optimized away if
|
|
|
+ * a constant is passed to it. Mostly the same as
|
|
|
+ * what is in linux/slab.h except it returns an
|
|
|
+ * index.
|
|
|
+ */
|
|
|
+static inline int index_of(const size_t size)
|
|
|
+{
|
|
|
+ if (__builtin_constant_p(size)) {
|
|
|
+ int i = 0;
|
|
|
+
|
|
|
+#define CACHE(x) \
|
|
|
+ if (size <=x) \
|
|
|
+ return i; \
|
|
|
+ else \
|
|
|
+ i++;
|
|
|
+#include "linux/kmalloc_sizes.h"
|
|
|
+#undef CACHE
|
|
|
+ {
|
|
|
+ extern void __bad_size(void);
|
|
|
+ __bad_size();
|
|
|
+ }
|
|
|
}
|
|
|
-#define list3_data(cachep) \
|
|
|
- (&(cachep)->lists)
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+#define INDEX_AC index_of(sizeof(struct arraycache_init))
|
|
|
+#define INDEX_L3 index_of(sizeof(struct kmem_list3))
|
|
|
+
|
|
|
+static inline void kmem_list3_init(struct kmem_list3 *parent)
|
|
|
+{
|
|
|
+ INIT_LIST_HEAD(&parent->slabs_full);
|
|
|
+ INIT_LIST_HEAD(&parent->slabs_partial);
|
|
|
+ INIT_LIST_HEAD(&parent->slabs_free);
|
|
|
+ parent->shared = NULL;
|
|
|
+ parent->alien = NULL;
|
|
|
+ spin_lock_init(&parent->list_lock);
|
|
|
+ parent->free_objects = 0;
|
|
|
+ parent->free_touched = 0;
|
|
|
+}
|
|
|
|
|
|
-/* NUMA: per-node */
|
|
|
-#define list3_data_ptr(cachep, ptr) \
|
|
|
- list3_data(cachep)
|
|
|
+#define MAKE_LIST(cachep, listp, slab, nodeid) \
|
|
|
+ do { \
|
|
|
+ INIT_LIST_HEAD(listp); \
|
|
|
+ list_splice(&(cachep->nodelists[nodeid]->slab), listp); \
|
|
|
+ } while (0)
|
|
|
+
|
|
|
+#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
|
|
|
+ do { \
|
|
|
+ MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \
|
|
|
+ MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
|
|
|
+ MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
|
|
|
+ } while (0)
|
|
|
|
|
|
/*
|
|
|
* kmem_cache_t
|
|
@@ -306,13 +372,12 @@ struct kmem_cache_s {
|
|
|
struct array_cache *array[NR_CPUS];
|
|
|
unsigned int batchcount;
|
|
|
unsigned int limit;
|
|
|
-/* 2) touched by every alloc & free from the backend */
|
|
|
- struct kmem_list3 lists;
|
|
|
- /* NUMA: kmem_3list_t *nodelists[MAX_NUMNODES] */
|
|
|
+ unsigned int shared;
|
|
|
unsigned int objsize;
|
|
|
+/* 2) touched by every alloc & free from the backend */
|
|
|
+ struct kmem_list3 *nodelists[MAX_NUMNODES];
|
|
|
unsigned int flags; /* constant flags */
|
|
|
unsigned int num; /* # of objs per slab */
|
|
|
- unsigned int free_limit; /* upper limit of objects in the lists */
|
|
|
spinlock_t spinlock;
|
|
|
|
|
|
/* 3) cache_grow/shrink */
|
|
@@ -349,6 +414,7 @@ struct kmem_cache_s {
|
|
|
unsigned long errors;
|
|
|
unsigned long max_freeable;
|
|
|
unsigned long node_allocs;
|
|
|
+ unsigned long node_frees;
|
|
|
atomic_t allochit;
|
|
|
atomic_t allocmiss;
|
|
|
atomic_t freehit;
|
|
@@ -384,6 +450,7 @@ struct kmem_cache_s {
|
|
|
} while (0)
|
|
|
#define STATS_INC_ERR(x) ((x)->errors++)
|
|
|
#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
|
|
|
+#define STATS_INC_NODEFREES(x) ((x)->node_frees++)
|
|
|
#define STATS_SET_FREEABLE(x, i) \
|
|
|
do { if ((x)->max_freeable < i) \
|
|
|
(x)->max_freeable = i; \
|
|
@@ -402,6 +469,7 @@ struct kmem_cache_s {
|
|
|
#define STATS_SET_HIGH(x) do { } while (0)
|
|
|
#define STATS_INC_ERR(x) do { } while (0)
|
|
|
#define STATS_INC_NODEALLOCS(x) do { } while (0)
|
|
|
+#define STATS_INC_NODEFREES(x) do { } while (0)
|
|
|
#define STATS_SET_FREEABLE(x, i) \
|
|
|
do { } while (0)
|
|
|
|
|
@@ -534,9 +602,9 @@ static struct arraycache_init initarray_generic =
|
|
|
|
|
|
/* internal cache of cache description objs */
|
|
|
static kmem_cache_t cache_cache = {
|
|
|
- .lists = LIST3_INIT(cache_cache.lists),
|
|
|
.batchcount = 1,
|
|
|
.limit = BOOT_CPUCACHE_ENTRIES,
|
|
|
+ .shared = 1,
|
|
|
.objsize = sizeof(kmem_cache_t),
|
|
|
.flags = SLAB_NO_REAP,
|
|
|
.spinlock = SPIN_LOCK_UNLOCKED,
|
|
@@ -557,7 +625,6 @@ static struct list_head cache_chain;
|
|
|
* SLAB_RECLAIM_ACCOUNT turns this on per-slab
|
|
|
*/
|
|
|
atomic_t slab_reclaim_pages;
|
|
|
-EXPORT_SYMBOL(slab_reclaim_pages);
|
|
|
|
|
|
/*
|
|
|
* chicken and egg problem: delay the per-cpu array allocation
|
|
@@ -565,7 +632,8 @@ EXPORT_SYMBOL(slab_reclaim_pages);
|
|
|
*/
|
|
|
static enum {
|
|
|
NONE,
|
|
|
- PARTIAL,
|
|
|
+ PARTIAL_AC,
|
|
|
+ PARTIAL_L3,
|
|
|
FULL
|
|
|
} g_cpucache_up;
|
|
|
|
|
@@ -574,11 +642,7 @@ static DEFINE_PER_CPU(struct work_struct, reap_work);
|
|
|
static void free_block(kmem_cache_t* cachep, void** objpp, int len);
|
|
|
static void enable_cpucache (kmem_cache_t *cachep);
|
|
|
static void cache_reap (void *unused);
|
|
|
-
|
|
|
-static inline void **ac_entry(struct array_cache *ac)
|
|
|
-{
|
|
|
- return (void**)(ac+1);
|
|
|
-}
|
|
|
+static int __node_shrink(kmem_cache_t *cachep, int node);
|
|
|
|
|
|
static inline struct array_cache *ac_data(kmem_cache_t *cachep)
|
|
|
{
|
|
@@ -676,48 +740,160 @@ static void __devinit start_cpu_timer(int cpu)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-static struct array_cache *alloc_arraycache(int cpu, int entries,
|
|
|
+static struct array_cache *alloc_arraycache(int node, int entries,
|
|
|
int batchcount)
|
|
|
{
|
|
|
int memsize = sizeof(void*)*entries+sizeof(struct array_cache);
|
|
|
struct array_cache *nc = NULL;
|
|
|
|
|
|
- if (cpu == -1)
|
|
|
- nc = kmalloc(memsize, GFP_KERNEL);
|
|
|
- else
|
|
|
- nc = kmalloc_node(memsize, GFP_KERNEL, cpu_to_node(cpu));
|
|
|
-
|
|
|
+ nc = kmalloc_node(memsize, GFP_KERNEL, node);
|
|
|
if (nc) {
|
|
|
nc->avail = 0;
|
|
|
nc->limit = entries;
|
|
|
nc->batchcount = batchcount;
|
|
|
nc->touched = 0;
|
|
|
+ spin_lock_init(&nc->lock);
|
|
|
}
|
|
|
return nc;
|
|
|
}
|
|
|
|
|
|
+#ifdef CONFIG_NUMA
|
|
|
+static inline struct array_cache **alloc_alien_cache(int node, int limit)
|
|
|
+{
|
|
|
+ struct array_cache **ac_ptr;
|
|
|
+ int memsize = sizeof(void*)*MAX_NUMNODES;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ if (limit > 1)
|
|
|
+ limit = 12;
|
|
|
+ ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node);
|
|
|
+ if (ac_ptr) {
|
|
|
+ for_each_node(i) {
|
|
|
+ if (i == node || !node_online(i)) {
|
|
|
+ ac_ptr[i] = NULL;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d);
|
|
|
+ if (!ac_ptr[i]) {
|
|
|
+ for (i--; i <=0; i--)
|
|
|
+ kfree(ac_ptr[i]);
|
|
|
+ kfree(ac_ptr);
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return ac_ptr;
|
|
|
+}
|
|
|
+
|
|
|
+static inline void free_alien_cache(struct array_cache **ac_ptr)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+
|
|
|
+ if (!ac_ptr)
|
|
|
+ return;
|
|
|
+
|
|
|
+ for_each_node(i)
|
|
|
+ kfree(ac_ptr[i]);
|
|
|
+
|
|
|
+ kfree(ac_ptr);
|
|
|
+}
|
|
|
+
|
|
|
+static inline void __drain_alien_cache(kmem_cache_t *cachep, struct array_cache *ac, int node)
|
|
|
+{
|
|
|
+ struct kmem_list3 *rl3 = cachep->nodelists[node];
|
|
|
+
|
|
|
+ if (ac->avail) {
|
|
|
+ spin_lock(&rl3->list_lock);
|
|
|
+ free_block(cachep, ac->entry, ac->avail);
|
|
|
+ ac->avail = 0;
|
|
|
+ spin_unlock(&rl3->list_lock);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static void drain_alien_cache(kmem_cache_t *cachep, struct kmem_list3 *l3)
|
|
|
+{
|
|
|
+ int i=0;
|
|
|
+ struct array_cache *ac;
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ for_each_online_node(i) {
|
|
|
+ ac = l3->alien[i];
|
|
|
+ if (ac) {
|
|
|
+ spin_lock_irqsave(&ac->lock, flags);
|
|
|
+ __drain_alien_cache(cachep, ac, i);
|
|
|
+ spin_unlock_irqrestore(&ac->lock, flags);
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+#else
|
|
|
+#define alloc_alien_cache(node, limit) do { } while (0)
|
|
|
+#define free_alien_cache(ac_ptr) do { } while (0)
|
|
|
+#define drain_alien_cache(cachep, l3) do { } while (0)
|
|
|
+#endif
|
|
|
+
|
|
|
static int __devinit cpuup_callback(struct notifier_block *nfb,
|
|
|
unsigned long action, void *hcpu)
|
|
|
{
|
|
|
long cpu = (long)hcpu;
|
|
|
kmem_cache_t* cachep;
|
|
|
+ struct kmem_list3 *l3 = NULL;
|
|
|
+ int node = cpu_to_node(cpu);
|
|
|
+ int memsize = sizeof(struct kmem_list3);
|
|
|
+ struct array_cache *nc = NULL;
|
|
|
|
|
|
switch (action) {
|
|
|
case CPU_UP_PREPARE:
|
|
|
down(&cache_chain_sem);
|
|
|
+ /* we need to do this right in the beginning since
|
|
|
+ * alloc_arraycache's are going to use this list.
|
|
|
+ * kmalloc_node allows us to add the slab to the right
|
|
|
+ * kmem_list3 and not this cpu's kmem_list3
|
|
|
+ */
|
|
|
+
|
|
|
list_for_each_entry(cachep, &cache_chain, next) {
|
|
|
- struct array_cache *nc;
|
|
|
+ /* setup the size64 kmemlist for cpu before we can
|
|
|
+ * begin anything. Make sure some other cpu on this
|
|
|
+ * node has not already allocated this
|
|
|
+ */
|
|
|
+ if (!cachep->nodelists[node]) {
|
|
|
+ if (!(l3 = kmalloc_node(memsize,
|
|
|
+ GFP_KERNEL, node)))
|
|
|
+ goto bad;
|
|
|
+ kmem_list3_init(l3);
|
|
|
+ l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
|
|
|
+ ((unsigned long)cachep)%REAPTIMEOUT_LIST3;
|
|
|
+
|
|
|
+ cachep->nodelists[node] = l3;
|
|
|
+ }
|
|
|
+
|
|
|
+ spin_lock_irq(&cachep->nodelists[node]->list_lock);
|
|
|
+ cachep->nodelists[node]->free_limit =
|
|
|
+ (1 + nr_cpus_node(node)) *
|
|
|
+ cachep->batchcount + cachep->num;
|
|
|
+ spin_unlock_irq(&cachep->nodelists[node]->list_lock);
|
|
|
+ }
|
|
|
|
|
|
- nc = alloc_arraycache(cpu, cachep->limit, cachep->batchcount);
|
|
|
+ /* Now we can go ahead with allocating the shared array's
|
|
|
+ & array cache's */
|
|
|
+ list_for_each_entry(cachep, &cache_chain, next) {
|
|
|
+ nc = alloc_arraycache(node, cachep->limit,
|
|
|
+ cachep->batchcount);
|
|
|
if (!nc)
|
|
|
goto bad;
|
|
|
-
|
|
|
- spin_lock_irq(&cachep->spinlock);
|
|
|
cachep->array[cpu] = nc;
|
|
|
- cachep->free_limit = (1+num_online_cpus())*cachep->batchcount
|
|
|
- + cachep->num;
|
|
|
- spin_unlock_irq(&cachep->spinlock);
|
|
|
|
|
|
+ l3 = cachep->nodelists[node];
|
|
|
+ BUG_ON(!l3);
|
|
|
+ if (!l3->shared) {
|
|
|
+ if (!(nc = alloc_arraycache(node,
|
|
|
+ cachep->shared*cachep->batchcount,
|
|
|
+ 0xbaadf00d)))
|
|
|
+ goto bad;
|
|
|
+
|
|
|
+ /* we are serialised from CPU_DEAD or
|
|
|
+ CPU_UP_CANCELLED by the cpucontrol lock */
|
|
|
+ l3->shared = nc;
|
|
|
+ }
|
|
|
}
|
|
|
up(&cache_chain_sem);
|
|
|
break;
|
|
@@ -732,13 +908,51 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
|
|
|
|
|
|
list_for_each_entry(cachep, &cache_chain, next) {
|
|
|
struct array_cache *nc;
|
|
|
+ cpumask_t mask;
|
|
|
|
|
|
+ mask = node_to_cpumask(node);
|
|
|
spin_lock_irq(&cachep->spinlock);
|
|
|
/* cpu is dead; no one can alloc from it. */
|
|
|
nc = cachep->array[cpu];
|
|
|
cachep->array[cpu] = NULL;
|
|
|
- cachep->free_limit -= cachep->batchcount;
|
|
|
- free_block(cachep, ac_entry(nc), nc->avail);
|
|
|
+ l3 = cachep->nodelists[node];
|
|
|
+
|
|
|
+ if (!l3)
|
|
|
+ goto unlock_cache;
|
|
|
+
|
|
|
+ spin_lock(&l3->list_lock);
|
|
|
+
|
|
|
+ /* Free limit for this kmem_list3 */
|
|
|
+ l3->free_limit -= cachep->batchcount;
|
|
|
+ if (nc)
|
|
|
+ free_block(cachep, nc->entry, nc->avail);
|
|
|
+
|
|
|
+ if (!cpus_empty(mask)) {
|
|
|
+ spin_unlock(&l3->list_lock);
|
|
|
+ goto unlock_cache;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (l3->shared) {
|
|
|
+ free_block(cachep, l3->shared->entry,
|
|
|
+ l3->shared->avail);
|
|
|
+ kfree(l3->shared);
|
|
|
+ l3->shared = NULL;
|
|
|
+ }
|
|
|
+ if (l3->alien) {
|
|
|
+ drain_alien_cache(cachep, l3);
|
|
|
+ free_alien_cache(l3->alien);
|
|
|
+ l3->alien = NULL;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* free slabs belonging to this node */
|
|
|
+ if (__node_shrink(cachep, node)) {
|
|
|
+ cachep->nodelists[node] = NULL;
|
|
|
+ spin_unlock(&l3->list_lock);
|
|
|
+ kfree(l3);
|
|
|
+ } else {
|
|
|
+ spin_unlock(&l3->list_lock);
|
|
|
+ }
|
|
|
+unlock_cache:
|
|
|
spin_unlock_irq(&cachep->spinlock);
|
|
|
kfree(nc);
|
|
|
}
|
|
@@ -754,6 +968,25 @@ bad:
|
|
|
|
|
|
static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 };
|
|
|
|
|
|
+/*
|
|
|
+ * swap the static kmem_list3 with kmalloced memory
|
|
|
+ */
|
|
|
+static void init_list(kmem_cache_t *cachep, struct kmem_list3 *list,
|
|
|
+ int nodeid)
|
|
|
+{
|
|
|
+ struct kmem_list3 *ptr;
|
|
|
+
|
|
|
+ BUG_ON(cachep->nodelists[nodeid] != list);
|
|
|
+ ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid);
|
|
|
+ BUG_ON(!ptr);
|
|
|
+
|
|
|
+ local_irq_disable();
|
|
|
+ memcpy(ptr, list, sizeof(struct kmem_list3));
|
|
|
+ MAKE_ALL_LISTS(cachep, ptr, nodeid);
|
|
|
+ cachep->nodelists[nodeid] = ptr;
|
|
|
+ local_irq_enable();
|
|
|
+}
|
|
|
+
|
|
|
/* Initialisation.
|
|
|
* Called after the gfp() functions have been enabled, and before smp_init().
|
|
|
*/
|
|
@@ -762,6 +995,13 @@ void __init kmem_cache_init(void)
|
|
|
size_t left_over;
|
|
|
struct cache_sizes *sizes;
|
|
|
struct cache_names *names;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for (i = 0; i < NUM_INIT_LISTS; i++) {
|
|
|
+ kmem_list3_init(&initkmem_list3[i]);
|
|
|
+ if (i < MAX_NUMNODES)
|
|
|
+ cache_cache.nodelists[i] = NULL;
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
|
* Fragmentation resistance on low memory - only use bigger
|
|
@@ -770,21 +1010,24 @@ void __init kmem_cache_init(void)
|
|
|
if (num_physpages > (32 << 20) >> PAGE_SHIFT)
|
|
|
slab_break_gfp_order = BREAK_GFP_ORDER_HI;
|
|
|
|
|
|
-
|
|
|
/* Bootstrap is tricky, because several objects are allocated
|
|
|
* from caches that do not exist yet:
|
|
|
* 1) initialize the cache_cache cache: it contains the kmem_cache_t
|
|
|
* structures of all caches, except cache_cache itself: cache_cache
|
|
|
* is statically allocated.
|
|
|
- * Initially an __init data area is used for the head array, it's
|
|
|
- * replaced with a kmalloc allocated array at the end of the bootstrap.
|
|
|
+ * Initially an __init data area is used for the head array and the
|
|
|
+ * kmem_list3 structures, it's replaced with a kmalloc allocated
|
|
|
+ * array at the end of the bootstrap.
|
|
|
* 2) Create the first kmalloc cache.
|
|
|
- * The kmem_cache_t for the new cache is allocated normally. An __init
|
|
|
- * data area is used for the head array.
|
|
|
- * 3) Create the remaining kmalloc caches, with minimally sized head arrays.
|
|
|
+ * The kmem_cache_t for the new cache is allocated normally.
|
|
|
+ * An __init data area is used for the head array.
|
|
|
+ * 3) Create the remaining kmalloc caches, with minimally sized
|
|
|
+ * head arrays.
|
|
|
* 4) Replace the __init data head arrays for cache_cache and the first
|
|
|
* kmalloc cache with kmalloc allocated arrays.
|
|
|
- * 5) Resize the head arrays of the kmalloc caches to their final sizes.
|
|
|
+ * 5) Replace the __init data for kmem_list3 for cache_cache and
|
|
|
+ * the other cache's with kmalloc allocated memory.
|
|
|
+ * 6) Resize the head arrays of the kmalloc caches to their final sizes.
|
|
|
*/
|
|
|
|
|
|
/* 1) create the cache_cache */
|
|
@@ -793,6 +1036,7 @@ void __init kmem_cache_init(void)
|
|
|
list_add(&cache_cache.next, &cache_chain);
|
|
|
cache_cache.colour_off = cache_line_size();
|
|
|
cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
|
|
|
+ cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE];
|
|
|
|
|
|
cache_cache.objsize = ALIGN(cache_cache.objsize, cache_line_size());
|
|
|
|
|
@@ -810,15 +1054,33 @@ void __init kmem_cache_init(void)
|
|
|
sizes = malloc_sizes;
|
|
|
names = cache_names;
|
|
|
|
|
|
+ /* Initialize the caches that provide memory for the array cache
|
|
|
+ * and the kmem_list3 structures first.
|
|
|
+ * Without this, further allocations will bug
|
|
|
+ */
|
|
|
+
|
|
|
+ sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
|
|
|
+ sizes[INDEX_AC].cs_size, ARCH_KMALLOC_MINALIGN,
|
|
|
+ (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL);
|
|
|
+
|
|
|
+ if (INDEX_AC != INDEX_L3)
|
|
|
+ sizes[INDEX_L3].cs_cachep =
|
|
|
+ kmem_cache_create(names[INDEX_L3].name,
|
|
|
+ sizes[INDEX_L3].cs_size, ARCH_KMALLOC_MINALIGN,
|
|
|
+ (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL);
|
|
|
+
|
|
|
while (sizes->cs_size != ULONG_MAX) {
|
|
|
- /* For performance, all the general caches are L1 aligned.
|
|
|
+ /*
|
|
|
+ * For performance, all the general caches are L1 aligned.
|
|
|
* This should be particularly beneficial on SMP boxes, as it
|
|
|
* eliminates "false sharing".
|
|
|
* Note for systems short on memory removing the alignment will
|
|
|
- * allow tighter packing of the smaller caches. */
|
|
|
- sizes->cs_cachep = kmem_cache_create(names->name,
|
|
|
- sizes->cs_size, ARCH_KMALLOC_MINALIGN,
|
|
|
- (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL);
|
|
|
+ * allow tighter packing of the smaller caches.
|
|
|
+ */
|
|
|
+ if(!sizes->cs_cachep)
|
|
|
+ sizes->cs_cachep = kmem_cache_create(names->name,
|
|
|
+ sizes->cs_size, ARCH_KMALLOC_MINALIGN,
|
|
|
+ (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL);
|
|
|
|
|
|
/* Inc off-slab bufctl limit until the ceiling is hit. */
|
|
|
if (!(OFF_SLAB(sizes->cs_cachep))) {
|
|
@@ -837,24 +1099,47 @@ void __init kmem_cache_init(void)
|
|
|
/* 4) Replace the bootstrap head arrays */
|
|
|
{
|
|
|
void * ptr;
|
|
|
-
|
|
|
+
|
|
|
ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
|
|
|
+
|
|
|
local_irq_disable();
|
|
|
BUG_ON(ac_data(&cache_cache) != &initarray_cache.cache);
|
|
|
- memcpy(ptr, ac_data(&cache_cache), sizeof(struct arraycache_init));
|
|
|
+ memcpy(ptr, ac_data(&cache_cache),
|
|
|
+ sizeof(struct arraycache_init));
|
|
|
cache_cache.array[smp_processor_id()] = ptr;
|
|
|
local_irq_enable();
|
|
|
-
|
|
|
+
|
|
|
ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
|
|
|
+
|
|
|
local_irq_disable();
|
|
|
- BUG_ON(ac_data(malloc_sizes[0].cs_cachep) != &initarray_generic.cache);
|
|
|
- memcpy(ptr, ac_data(malloc_sizes[0].cs_cachep),
|
|
|
+ BUG_ON(ac_data(malloc_sizes[INDEX_AC].cs_cachep)
|
|
|
+ != &initarray_generic.cache);
|
|
|
+ memcpy(ptr, ac_data(malloc_sizes[INDEX_AC].cs_cachep),
|
|
|
sizeof(struct arraycache_init));
|
|
|
- malloc_sizes[0].cs_cachep->array[smp_processor_id()] = ptr;
|
|
|
+ malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
|
|
|
+ ptr;
|
|
|
local_irq_enable();
|
|
|
}
|
|
|
+ /* 5) Replace the bootstrap kmem_list3's */
|
|
|
+ {
|
|
|
+ int node;
|
|
|
+ /* Replace the static kmem_list3 structures for the boot cpu */
|
|
|
+ init_list(&cache_cache, &initkmem_list3[CACHE_CACHE],
|
|
|
+ numa_node_id());
|
|
|
+
|
|
|
+ for_each_online_node(node) {
|
|
|
+ init_list(malloc_sizes[INDEX_AC].cs_cachep,
|
|
|
+ &initkmem_list3[SIZE_AC+node], node);
|
|
|
+
|
|
|
+ if (INDEX_AC != INDEX_L3) {
|
|
|
+ init_list(malloc_sizes[INDEX_L3].cs_cachep,
|
|
|
+ &initkmem_list3[SIZE_L3+node],
|
|
|
+ node);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- /* 5) resize the head arrays to their final sizes */
|
|
|
+ /* 6) resize the head arrays to their final sizes */
|
|
|
{
|
|
|
kmem_cache_t *cachep;
|
|
|
down(&cache_chain_sem);
|
|
@@ -870,7 +1155,6 @@ void __init kmem_cache_init(void)
|
|
|
* that initializes ac_data for all new cpus
|
|
|
*/
|
|
|
register_cpu_notifier(&cpucache_notifier);
|
|
|
-
|
|
|
|
|
|
/* The reap timers are started later, with a module init call:
|
|
|
* That part of the kernel is not yet operational.
|
|
@@ -885,10 +1169,8 @@ static int __init cpucache_init(void)
|
|
|
* Register the timers that return unneeded
|
|
|
* pages to gfp.
|
|
|
*/
|
|
|
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
|
|
|
- if (cpu_online(cpu))
|
|
|
- start_cpu_timer(cpu);
|
|
|
- }
|
|
|
+ for_each_online_cpu(cpu)
|
|
|
+ start_cpu_timer(cpu);
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
@@ -1167,6 +1449,20 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+/* For setting up all the kmem_list3s for cache whose objsize is same
|
|
|
+ as size of kmem_list3. */
|
|
|
+static inline void set_up_list3s(kmem_cache_t *cachep, int index)
|
|
|
+{
|
|
|
+ int node;
|
|
|
+
|
|
|
+ for_each_online_node(node) {
|
|
|
+ cachep->nodelists[node] = &initkmem_list3[index+node];
|
|
|
+ cachep->nodelists[node]->next_reap = jiffies +
|
|
|
+ REAPTIMEOUT_LIST3 +
|
|
|
+ ((unsigned long)cachep)%REAPTIMEOUT_LIST3;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* kmem_cache_create - Create a cache.
|
|
|
* @name: A string which is used in /proc/slabinfo to identify this cache.
|
|
@@ -1320,7 +1616,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
|
|
|
size += BYTES_PER_WORD;
|
|
|
}
|
|
|
#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
|
|
|
- if (size > 128 && cachep->reallen > cache_line_size() && size < PAGE_SIZE) {
|
|
|
+ if (size >= malloc_sizes[INDEX_L3+1].cs_size && cachep->reallen > cache_line_size() && size < PAGE_SIZE) {
|
|
|
cachep->dbghead += PAGE_SIZE - size;
|
|
|
size = PAGE_SIZE;
|
|
|
}
|
|
@@ -1422,10 +1718,6 @@ next:
|
|
|
cachep->gfpflags |= GFP_DMA;
|
|
|
spin_lock_init(&cachep->spinlock);
|
|
|
cachep->objsize = size;
|
|
|
- /* NUMA */
|
|
|
- INIT_LIST_HEAD(&cachep->lists.slabs_full);
|
|
|
- INIT_LIST_HEAD(&cachep->lists.slabs_partial);
|
|
|
- INIT_LIST_HEAD(&cachep->lists.slabs_free);
|
|
|
|
|
|
if (flags & CFLGS_OFF_SLAB)
|
|
|
cachep->slabp_cache = kmem_find_general_cachep(slab_size,0);
|
|
@@ -1444,11 +1736,43 @@ next:
|
|
|
* the cache that's used by kmalloc(24), otherwise
|
|
|
* the creation of further caches will BUG().
|
|
|
*/
|
|
|
- cachep->array[smp_processor_id()] = &initarray_generic.cache;
|
|
|
- g_cpucache_up = PARTIAL;
|
|
|
+ cachep->array[smp_processor_id()] =
|
|
|
+ &initarray_generic.cache;
|
|
|
+
|
|
|
+ /* If the cache that's used by
|
|
|
+ * kmalloc(sizeof(kmem_list3)) is the first cache,
|
|
|
+ * then we need to set up all its list3s, otherwise
|
|
|
+ * the creation of further caches will BUG().
|
|
|
+ */
|
|
|
+ set_up_list3s(cachep, SIZE_AC);
|
|
|
+ if (INDEX_AC == INDEX_L3)
|
|
|
+ g_cpucache_up = PARTIAL_L3;
|
|
|
+ else
|
|
|
+ g_cpucache_up = PARTIAL_AC;
|
|
|
} else {
|
|
|
- cachep->array[smp_processor_id()] = kmalloc(sizeof(struct arraycache_init),GFP_KERNEL);
|
|
|
+ cachep->array[smp_processor_id()] =
|
|
|
+ kmalloc(sizeof(struct arraycache_init),
|
|
|
+ GFP_KERNEL);
|
|
|
+
|
|
|
+ if (g_cpucache_up == PARTIAL_AC) {
|
|
|
+ set_up_list3s(cachep, SIZE_L3);
|
|
|
+ g_cpucache_up = PARTIAL_L3;
|
|
|
+ } else {
|
|
|
+ int node;
|
|
|
+ for_each_online_node(node) {
|
|
|
+
|
|
|
+ cachep->nodelists[node] =
|
|
|
+ kmalloc_node(sizeof(struct kmem_list3),
|
|
|
+ GFP_KERNEL, node);
|
|
|
+ BUG_ON(!cachep->nodelists[node]);
|
|
|
+ kmem_list3_init(cachep->nodelists[node]);
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
+ cachep->nodelists[numa_node_id()]->next_reap =
|
|
|
+ jiffies + REAPTIMEOUT_LIST3 +
|
|
|
+ ((unsigned long)cachep)%REAPTIMEOUT_LIST3;
|
|
|
+
|
|
|
BUG_ON(!ac_data(cachep));
|
|
|
ac_data(cachep)->avail = 0;
|
|
|
ac_data(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
|
|
@@ -1456,13 +1780,8 @@ next:
|
|
|
ac_data(cachep)->touched = 0;
|
|
|
cachep->batchcount = 1;
|
|
|
cachep->limit = BOOT_CPUCACHE_ENTRIES;
|
|
|
- cachep->free_limit = (1+num_online_cpus())*cachep->batchcount
|
|
|
- + cachep->num;
|
|
|
}
|
|
|
|
|
|
- cachep->lists.next_reap = jiffies + REAPTIMEOUT_LIST3 +
|
|
|
- ((unsigned long)cachep)%REAPTIMEOUT_LIST3;
|
|
|
-
|
|
|
/* Need the semaphore to access the chain. */
|
|
|
down(&cache_chain_sem);
|
|
|
{
|
|
@@ -1519,13 +1838,23 @@ static void check_spinlock_acquired(kmem_cache_t *cachep)
|
|
|
{
|
|
|
#ifdef CONFIG_SMP
|
|
|
check_irq_off();
|
|
|
- BUG_ON(spin_trylock(&cachep->spinlock));
|
|
|
+ assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock);
|
|
|
#endif
|
|
|
}
|
|
|
+
|
|
|
+static inline void check_spinlock_acquired_node(kmem_cache_t *cachep, int node)
|
|
|
+{
|
|
|
+#ifdef CONFIG_SMP
|
|
|
+ check_irq_off();
|
|
|
+ assert_spin_locked(&cachep->nodelists[node]->list_lock);
|
|
|
+#endif
|
|
|
+}
|
|
|
+
|
|
|
#else
|
|
|
#define check_irq_off() do { } while(0)
|
|
|
#define check_irq_on() do { } while(0)
|
|
|
#define check_spinlock_acquired(x) do { } while(0)
|
|
|
+#define check_spinlock_acquired_node(x, y) do { } while(0)
|
|
|
#endif
|
|
|
|
|
|
/*
|
|
@@ -1547,7 +1876,7 @@ static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg)
|
|
|
}
|
|
|
|
|
|
static void drain_array_locked(kmem_cache_t* cachep,
|
|
|
- struct array_cache *ac, int force);
|
|
|
+ struct array_cache *ac, int force, int node);
|
|
|
|
|
|
static void do_drain(void *arg)
|
|
|
{
|
|
@@ -1556,59 +1885,82 @@ static void do_drain(void *arg)
|
|
|
|
|
|
check_irq_off();
|
|
|
ac = ac_data(cachep);
|
|
|
- spin_lock(&cachep->spinlock);
|
|
|
- free_block(cachep, &ac_entry(ac)[0], ac->avail);
|
|
|
- spin_unlock(&cachep->spinlock);
|
|
|
+ spin_lock(&cachep->nodelists[numa_node_id()]->list_lock);
|
|
|
+ free_block(cachep, ac->entry, ac->avail);
|
|
|
+ spin_unlock(&cachep->nodelists[numa_node_id()]->list_lock);
|
|
|
ac->avail = 0;
|
|
|
}
|
|
|
|
|
|
static void drain_cpu_caches(kmem_cache_t *cachep)
|
|
|
{
|
|
|
+ struct kmem_list3 *l3;
|
|
|
+ int node;
|
|
|
+
|
|
|
smp_call_function_all_cpus(do_drain, cachep);
|
|
|
check_irq_on();
|
|
|
spin_lock_irq(&cachep->spinlock);
|
|
|
- if (cachep->lists.shared)
|
|
|
- drain_array_locked(cachep, cachep->lists.shared, 1);
|
|
|
+ for_each_online_node(node) {
|
|
|
+ l3 = cachep->nodelists[node];
|
|
|
+ if (l3) {
|
|
|
+ spin_lock(&l3->list_lock);
|
|
|
+ drain_array_locked(cachep, l3->shared, 1, node);
|
|
|
+ spin_unlock(&l3->list_lock);
|
|
|
+ if (l3->alien)
|
|
|
+ drain_alien_cache(cachep, l3);
|
|
|
+ }
|
|
|
+ }
|
|
|
spin_unlock_irq(&cachep->spinlock);
|
|
|
}
|
|
|
|
|
|
-
|
|
|
-/* NUMA shrink all list3s */
|
|
|
-static int __cache_shrink(kmem_cache_t *cachep)
|
|
|
+static int __node_shrink(kmem_cache_t *cachep, int node)
|
|
|
{
|
|
|
struct slab *slabp;
|
|
|
+ struct kmem_list3 *l3 = cachep->nodelists[node];
|
|
|
int ret;
|
|
|
|
|
|
- drain_cpu_caches(cachep);
|
|
|
-
|
|
|
- check_irq_on();
|
|
|
- spin_lock_irq(&cachep->spinlock);
|
|
|
-
|
|
|
- for(;;) {
|
|
|
+ for (;;) {
|
|
|
struct list_head *p;
|
|
|
|
|
|
- p = cachep->lists.slabs_free.prev;
|
|
|
- if (p == &cachep->lists.slabs_free)
|
|
|
+ p = l3->slabs_free.prev;
|
|
|
+ if (p == &l3->slabs_free)
|
|
|
break;
|
|
|
|
|
|
- slabp = list_entry(cachep->lists.slabs_free.prev, struct slab, list);
|
|
|
+ slabp = list_entry(l3->slabs_free.prev, struct slab, list);
|
|
|
#if DEBUG
|
|
|
if (slabp->inuse)
|
|
|
BUG();
|
|
|
#endif
|
|
|
list_del(&slabp->list);
|
|
|
|
|
|
- cachep->lists.free_objects -= cachep->num;
|
|
|
- spin_unlock_irq(&cachep->spinlock);
|
|
|
+ l3->free_objects -= cachep->num;
|
|
|
+ spin_unlock_irq(&l3->list_lock);
|
|
|
slab_destroy(cachep, slabp);
|
|
|
- spin_lock_irq(&cachep->spinlock);
|
|
|
+ spin_lock_irq(&l3->list_lock);
|
|
|
}
|
|
|
- ret = !list_empty(&cachep->lists.slabs_full) ||
|
|
|
- !list_empty(&cachep->lists.slabs_partial);
|
|
|
- spin_unlock_irq(&cachep->spinlock);
|
|
|
+ ret = !list_empty(&l3->slabs_full) ||
|
|
|
+ !list_empty(&l3->slabs_partial);
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+static int __cache_shrink(kmem_cache_t *cachep)
|
|
|
+{
|
|
|
+ int ret = 0, i = 0;
|
|
|
+ struct kmem_list3 *l3;
|
|
|
+
|
|
|
+ drain_cpu_caches(cachep);
|
|
|
+
|
|
|
+ check_irq_on();
|
|
|
+ for_each_online_node(i) {
|
|
|
+ l3 = cachep->nodelists[i];
|
|
|
+ if (l3) {
|
|
|
+ spin_lock_irq(&l3->list_lock);
|
|
|
+ ret += __node_shrink(cachep, i);
|
|
|
+ spin_unlock_irq(&l3->list_lock);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return (ret ? 1 : 0);
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* kmem_cache_shrink - Shrink a cache.
|
|
|
* @cachep: The cache to shrink.
|
|
@@ -1645,6 +1997,7 @@ EXPORT_SYMBOL(kmem_cache_shrink);
|
|
|
int kmem_cache_destroy(kmem_cache_t * cachep)
|
|
|
{
|
|
|
int i;
|
|
|
+ struct kmem_list3 *l3;
|
|
|
|
|
|
if (!cachep || in_interrupt())
|
|
|
BUG();
|
|
@@ -1672,15 +2025,17 @@ int kmem_cache_destroy(kmem_cache_t * cachep)
|
|
|
if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
|
|
|
synchronize_rcu();
|
|
|
|
|
|
- /* no cpu_online check required here since we clear the percpu
|
|
|
- * array on cpu offline and set this to NULL.
|
|
|
- */
|
|
|
- for (i = 0; i < NR_CPUS; i++)
|
|
|
+ for_each_online_cpu(i)
|
|
|
kfree(cachep->array[i]);
|
|
|
|
|
|
/* NUMA: free the list3 structures */
|
|
|
- kfree(cachep->lists.shared);
|
|
|
- cachep->lists.shared = NULL;
|
|
|
+ for_each_online_node(i) {
|
|
|
+ if ((l3 = cachep->nodelists[i])) {
|
|
|
+ kfree(l3->shared);
|
|
|
+ free_alien_cache(l3->alien);
|
|
|
+ kfree(l3);
|
|
|
+ }
|
|
|
+ }
|
|
|
kmem_cache_free(&cache_cache, cachep);
|
|
|
|
|
|
unlock_cpu_hotplug();
|
|
@@ -1690,8 +2045,8 @@ int kmem_cache_destroy(kmem_cache_t * cachep)
|
|
|
EXPORT_SYMBOL(kmem_cache_destroy);
|
|
|
|
|
|
/* Get the memory for a slab management obj. */
|
|
|
-static struct slab* alloc_slabmgmt(kmem_cache_t *cachep,
|
|
|
- void *objp, int colour_off, unsigned int __nocast local_flags)
|
|
|
+static struct slab* alloc_slabmgmt(kmem_cache_t *cachep, void *objp,
|
|
|
+ int colour_off, unsigned int __nocast local_flags)
|
|
|
{
|
|
|
struct slab *slabp;
|
|
|
|
|
@@ -1722,7 +2077,7 @@ static void cache_init_objs(kmem_cache_t *cachep,
|
|
|
int i;
|
|
|
|
|
|
for (i = 0; i < cachep->num; i++) {
|
|
|
- void* objp = slabp->s_mem+cachep->objsize*i;
|
|
|
+ void *objp = slabp->s_mem+cachep->objsize*i;
|
|
|
#if DEBUG
|
|
|
/* need to poison the objs? */
|
|
|
if (cachep->flags & SLAB_POISON)
|
|
@@ -1799,6 +2154,7 @@ static int cache_grow(kmem_cache_t *cachep, unsigned int __nocast flags, int nod
|
|
|
size_t offset;
|
|
|
unsigned int local_flags;
|
|
|
unsigned long ctor_flags;
|
|
|
+ struct kmem_list3 *l3;
|
|
|
|
|
|
/* Be lazy and only check for valid flags here,
|
|
|
* keeping it out of the critical path in kmem_cache_alloc().
|
|
@@ -1830,6 +2186,7 @@ static int cache_grow(kmem_cache_t *cachep, unsigned int __nocast flags, int nod
|
|
|
|
|
|
spin_unlock(&cachep->spinlock);
|
|
|
|
|
|
+ check_irq_off();
|
|
|
if (local_flags & __GFP_WAIT)
|
|
|
local_irq_enable();
|
|
|
|
|
@@ -1841,8 +2198,9 @@ static int cache_grow(kmem_cache_t *cachep, unsigned int __nocast flags, int nod
|
|
|
*/
|
|
|
kmem_flagcheck(cachep, flags);
|
|
|
|
|
|
-
|
|
|
- /* Get mem for the objs. */
|
|
|
+ /* Get mem for the objs.
|
|
|
+ * Attempt to allocate a physical page from 'nodeid',
|
|
|
+ */
|
|
|
if (!(objp = kmem_getpages(cachep, flags, nodeid)))
|
|
|
goto failed;
|
|
|
|
|
@@ -1850,6 +2208,7 @@ static int cache_grow(kmem_cache_t *cachep, unsigned int __nocast flags, int nod
|
|
|
if (!(slabp = alloc_slabmgmt(cachep, objp, offset, local_flags)))
|
|
|
goto opps1;
|
|
|
|
|
|
+ slabp->nodeid = nodeid;
|
|
|
set_slab_attr(cachep, slabp, objp);
|
|
|
|
|
|
cache_init_objs(cachep, slabp, ctor_flags);
|
|
@@ -1857,13 +2216,14 @@ static int cache_grow(kmem_cache_t *cachep, unsigned int __nocast flags, int nod
|
|
|
if (local_flags & __GFP_WAIT)
|
|
|
local_irq_disable();
|
|
|
check_irq_off();
|
|
|
- spin_lock(&cachep->spinlock);
|
|
|
+ l3 = cachep->nodelists[nodeid];
|
|
|
+ spin_lock(&l3->list_lock);
|
|
|
|
|
|
/* Make slab active. */
|
|
|
- list_add_tail(&slabp->list, &(list3_data(cachep)->slabs_free));
|
|
|
+ list_add_tail(&slabp->list, &(l3->slabs_free));
|
|
|
STATS_INC_GROWN(cachep);
|
|
|
- list3_data(cachep)->free_objects += cachep->num;
|
|
|
- spin_unlock(&cachep->spinlock);
|
|
|
+ l3->free_objects += cachep->num;
|
|
|
+ spin_unlock(&l3->list_lock);
|
|
|
return 1;
|
|
|
opps1:
|
|
|
kmem_freepages(cachep, objp);
|
|
@@ -1969,7 +2329,6 @@ static void check_slabp(kmem_cache_t *cachep, struct slab *slabp)
|
|
|
kmem_bufctl_t i;
|
|
|
int entries = 0;
|
|
|
|
|
|
- check_spinlock_acquired(cachep);
|
|
|
/* Check slab's freelist to see if this obj is there. */
|
|
|
for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
|
|
|
entries++;
|
|
@@ -2012,10 +2371,11 @@ retry:
|
|
|
*/
|
|
|
batchcount = BATCHREFILL_LIMIT;
|
|
|
}
|
|
|
- l3 = list3_data(cachep);
|
|
|
+ l3 = cachep->nodelists[numa_node_id()];
|
|
|
+
|
|
|
+ BUG_ON(ac->avail > 0 || !l3);
|
|
|
+ spin_lock(&l3->list_lock);
|
|
|
|
|
|
- BUG_ON(ac->avail > 0);
|
|
|
- spin_lock(&cachep->spinlock);
|
|
|
if (l3->shared) {
|
|
|
struct array_cache *shared_array = l3->shared;
|
|
|
if (shared_array->avail) {
|
|
@@ -2023,8 +2383,9 @@ retry:
|
|
|
batchcount = shared_array->avail;
|
|
|
shared_array->avail -= batchcount;
|
|
|
ac->avail = batchcount;
|
|
|
- memcpy(ac_entry(ac), &ac_entry(shared_array)[shared_array->avail],
|
|
|
- sizeof(void*)*batchcount);
|
|
|
+ memcpy(ac->entry,
|
|
|
+ &(shared_array->entry[shared_array->avail]),
|
|
|
+ sizeof(void*)*batchcount);
|
|
|
shared_array->touched = 1;
|
|
|
goto alloc_done;
|
|
|
}
|
|
@@ -2051,7 +2412,8 @@ retry:
|
|
|
STATS_SET_HIGH(cachep);
|
|
|
|
|
|
/* get obj pointer */
|
|
|
- ac_entry(ac)[ac->avail++] = slabp->s_mem + slabp->free*cachep->objsize;
|
|
|
+ ac->entry[ac->avail++] = slabp->s_mem +
|
|
|
+ slabp->free*cachep->objsize;
|
|
|
|
|
|
slabp->inuse++;
|
|
|
next = slab_bufctl(slabp)[slabp->free];
|
|
@@ -2073,12 +2435,12 @@ retry:
|
|
|
must_grow:
|
|
|
l3->free_objects -= ac->avail;
|
|
|
alloc_done:
|
|
|
- spin_unlock(&cachep->spinlock);
|
|
|
+ spin_unlock(&l3->list_lock);
|
|
|
|
|
|
if (unlikely(!ac->avail)) {
|
|
|
int x;
|
|
|
- x = cache_grow(cachep, flags, -1);
|
|
|
-
|
|
|
+ x = cache_grow(cachep, flags, numa_node_id());
|
|
|
+
|
|
|
// cache_grow can reenable interrupts, then ac could change.
|
|
|
ac = ac_data(cachep);
|
|
|
if (!x && ac->avail == 0) // no objects in sight? abort
|
|
@@ -2088,7 +2450,7 @@ alloc_done:
|
|
|
goto retry;
|
|
|
}
|
|
|
ac->touched = 1;
|
|
|
- return ac_entry(ac)[--ac->avail];
|
|
|
+ return ac->entry[--ac->avail];
|
|
|
}
|
|
|
|
|
|
static inline void
|
|
@@ -2160,7 +2522,7 @@ static inline void *__cache_alloc(kmem_cache_t *cachep, unsigned int __nocast fl
|
|
|
if (likely(ac->avail)) {
|
|
|
STATS_INC_ALLOCHIT(cachep);
|
|
|
ac->touched = 1;
|
|
|
- objp = ac_entry(ac)[--ac->avail];
|
|
|
+ objp = ac->entry[--ac->avail];
|
|
|
} else {
|
|
|
STATS_INC_ALLOCMISS(cachep);
|
|
|
objp = cache_alloc_refill(cachep, flags);
|
|
@@ -2172,33 +2534,104 @@ static inline void *__cache_alloc(kmem_cache_t *cachep, unsigned int __nocast fl
|
|
|
return objp;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * NUMA: different approach needed if the spinlock is moved into
|
|
|
- * the l3 structure
|
|
|
+#ifdef CONFIG_NUMA
|
|
|
+/*
|
|
|
+ * A interface to enable slab creation on nodeid
|
|
|
*/
|
|
|
+static void *__cache_alloc_node(kmem_cache_t *cachep, int flags, int nodeid)
|
|
|
+{
|
|
|
+ struct list_head *entry;
|
|
|
+ struct slab *slabp;
|
|
|
+ struct kmem_list3 *l3;
|
|
|
+ void *obj;
|
|
|
+ kmem_bufctl_t next;
|
|
|
+ int x;
|
|
|
+
|
|
|
+ l3 = cachep->nodelists[nodeid];
|
|
|
+ BUG_ON(!l3);
|
|
|
+
|
|
|
+retry:
|
|
|
+ spin_lock(&l3->list_lock);
|
|
|
+ entry = l3->slabs_partial.next;
|
|
|
+ if (entry == &l3->slabs_partial) {
|
|
|
+ l3->free_touched = 1;
|
|
|
+ entry = l3->slabs_free.next;
|
|
|
+ if (entry == &l3->slabs_free)
|
|
|
+ goto must_grow;
|
|
|
+ }
|
|
|
+
|
|
|
+ slabp = list_entry(entry, struct slab, list);
|
|
|
+ check_spinlock_acquired_node(cachep, nodeid);
|
|
|
+ check_slabp(cachep, slabp);
|
|
|
+
|
|
|
+ STATS_INC_NODEALLOCS(cachep);
|
|
|
+ STATS_INC_ACTIVE(cachep);
|
|
|
+ STATS_SET_HIGH(cachep);
|
|
|
+
|
|
|
+ BUG_ON(slabp->inuse == cachep->num);
|
|
|
+
|
|
|
+ /* get obj pointer */
|
|
|
+ obj = slabp->s_mem + slabp->free*cachep->objsize;
|
|
|
+ slabp->inuse++;
|
|
|
+ next = slab_bufctl(slabp)[slabp->free];
|
|
|
+#if DEBUG
|
|
|
+ slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
|
|
|
+#endif
|
|
|
+ slabp->free = next;
|
|
|
+ check_slabp(cachep, slabp);
|
|
|
+ l3->free_objects--;
|
|
|
+ /* move slabp to correct slabp list: */
|
|
|
+ list_del(&slabp->list);
|
|
|
+
|
|
|
+ if (slabp->free == BUFCTL_END) {
|
|
|
+ list_add(&slabp->list, &l3->slabs_full);
|
|
|
+ } else {
|
|
|
+ list_add(&slabp->list, &l3->slabs_partial);
|
|
|
+ }
|
|
|
+
|
|
|
+ spin_unlock(&l3->list_lock);
|
|
|
+ goto done;
|
|
|
+
|
|
|
+must_grow:
|
|
|
+ spin_unlock(&l3->list_lock);
|
|
|
+ x = cache_grow(cachep, flags, nodeid);
|
|
|
|
|
|
+ if (!x)
|
|
|
+ return NULL;
|
|
|
+
|
|
|
+ goto retry;
|
|
|
+done:
|
|
|
+ return obj;
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
+/*
|
|
|
+ * Caller needs to acquire correct kmem_list's list_lock
|
|
|
+ */
|
|
|
static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects)
|
|
|
{
|
|
|
int i;
|
|
|
-
|
|
|
- check_spinlock_acquired(cachep);
|
|
|
-
|
|
|
- /* NUMA: move add into loop */
|
|
|
- cachep->lists.free_objects += nr_objects;
|
|
|
+ struct kmem_list3 *l3;
|
|
|
|
|
|
for (i = 0; i < nr_objects; i++) {
|
|
|
void *objp = objpp[i];
|
|
|
struct slab *slabp;
|
|
|
unsigned int objnr;
|
|
|
+ int nodeid = 0;
|
|
|
|
|
|
slabp = GET_PAGE_SLAB(virt_to_page(objp));
|
|
|
+ nodeid = slabp->nodeid;
|
|
|
+ l3 = cachep->nodelists[nodeid];
|
|
|
list_del(&slabp->list);
|
|
|
objnr = (objp - slabp->s_mem) / cachep->objsize;
|
|
|
+ check_spinlock_acquired_node(cachep, nodeid);
|
|
|
check_slabp(cachep, slabp);
|
|
|
+
|
|
|
+
|
|
|
#if DEBUG
|
|
|
if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) {
|
|
|
- printk(KERN_ERR "slab: double free detected in cache '%s', objp %p.\n",
|
|
|
- cachep->name, objp);
|
|
|
+ printk(KERN_ERR "slab: double free detected in cache "
|
|
|
+ "'%s', objp %p\n", cachep->name, objp);
|
|
|
BUG();
|
|
|
}
|
|
|
#endif
|
|
@@ -2206,24 +2639,23 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects)
|
|
|
slabp->free = objnr;
|
|
|
STATS_DEC_ACTIVE(cachep);
|
|
|
slabp->inuse--;
|
|
|
+ l3->free_objects++;
|
|
|
check_slabp(cachep, slabp);
|
|
|
|
|
|
/* fixup slab chains */
|
|
|
if (slabp->inuse == 0) {
|
|
|
- if (cachep->lists.free_objects > cachep->free_limit) {
|
|
|
- cachep->lists.free_objects -= cachep->num;
|
|
|
+ if (l3->free_objects > l3->free_limit) {
|
|
|
+ l3->free_objects -= cachep->num;
|
|
|
slab_destroy(cachep, slabp);
|
|
|
} else {
|
|
|
- list_add(&slabp->list,
|
|
|
- &list3_data_ptr(cachep, objp)->slabs_free);
|
|
|
+ list_add(&slabp->list, &l3->slabs_free);
|
|
|
}
|
|
|
} else {
|
|
|
/* Unconditionally move a slab to the end of the
|
|
|
* partial list on free - maximum time for the
|
|
|
* other objects to be freed, too.
|
|
|
*/
|
|
|
- list_add_tail(&slabp->list,
|
|
|
- &list3_data_ptr(cachep, objp)->slabs_partial);
|
|
|
+ list_add_tail(&slabp->list, &l3->slabs_partial);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -2231,36 +2663,38 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects)
|
|
|
static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac)
|
|
|
{
|
|
|
int batchcount;
|
|
|
+ struct kmem_list3 *l3;
|
|
|
|
|
|
batchcount = ac->batchcount;
|
|
|
#if DEBUG
|
|
|
BUG_ON(!batchcount || batchcount > ac->avail);
|
|
|
#endif
|
|
|
check_irq_off();
|
|
|
- spin_lock(&cachep->spinlock);
|
|
|
- if (cachep->lists.shared) {
|
|
|
- struct array_cache *shared_array = cachep->lists.shared;
|
|
|
+ l3 = cachep->nodelists[numa_node_id()];
|
|
|
+ spin_lock(&l3->list_lock);
|
|
|
+ if (l3->shared) {
|
|
|
+ struct array_cache *shared_array = l3->shared;
|
|
|
int max = shared_array->limit-shared_array->avail;
|
|
|
if (max) {
|
|
|
if (batchcount > max)
|
|
|
batchcount = max;
|
|
|
- memcpy(&ac_entry(shared_array)[shared_array->avail],
|
|
|
- &ac_entry(ac)[0],
|
|
|
+ memcpy(&(shared_array->entry[shared_array->avail]),
|
|
|
+ ac->entry,
|
|
|
sizeof(void*)*batchcount);
|
|
|
shared_array->avail += batchcount;
|
|
|
goto free_done;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- free_block(cachep, &ac_entry(ac)[0], batchcount);
|
|
|
+ free_block(cachep, ac->entry, batchcount);
|
|
|
free_done:
|
|
|
#if STATS
|
|
|
{
|
|
|
int i = 0;
|
|
|
struct list_head *p;
|
|
|
|
|
|
- p = list3_data(cachep)->slabs_free.next;
|
|
|
- while (p != &(list3_data(cachep)->slabs_free)) {
|
|
|
+ p = l3->slabs_free.next;
|
|
|
+ while (p != &(l3->slabs_free)) {
|
|
|
struct slab *slabp;
|
|
|
|
|
|
slabp = list_entry(p, struct slab, list);
|
|
@@ -2272,12 +2706,13 @@ free_done:
|
|
|
STATS_SET_FREEABLE(cachep, i);
|
|
|
}
|
|
|
#endif
|
|
|
- spin_unlock(&cachep->spinlock);
|
|
|
+ spin_unlock(&l3->list_lock);
|
|
|
ac->avail -= batchcount;
|
|
|
- memmove(&ac_entry(ac)[0], &ac_entry(ac)[batchcount],
|
|
|
+ memmove(ac->entry, &(ac->entry[batchcount]),
|
|
|
sizeof(void*)*ac->avail);
|
|
|
}
|
|
|
|
|
|
+
|
|
|
/*
|
|
|
* __cache_free
|
|
|
* Release an obj back to its cache. If the obj has a constructed
|
|
@@ -2292,14 +2727,46 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp)
|
|
|
check_irq_off();
|
|
|
objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
|
|
|
|
|
|
+ /* Make sure we are not freeing a object from another
|
|
|
+ * node to the array cache on this cpu.
|
|
|
+ */
|
|
|
+#ifdef CONFIG_NUMA
|
|
|
+ {
|
|
|
+ struct slab *slabp;
|
|
|
+ slabp = GET_PAGE_SLAB(virt_to_page(objp));
|
|
|
+ if (unlikely(slabp->nodeid != numa_node_id())) {
|
|
|
+ struct array_cache *alien = NULL;
|
|
|
+ int nodeid = slabp->nodeid;
|
|
|
+ struct kmem_list3 *l3 = cachep->nodelists[numa_node_id()];
|
|
|
+
|
|
|
+ STATS_INC_NODEFREES(cachep);
|
|
|
+ if (l3->alien && l3->alien[nodeid]) {
|
|
|
+ alien = l3->alien[nodeid];
|
|
|
+ spin_lock(&alien->lock);
|
|
|
+ if (unlikely(alien->avail == alien->limit))
|
|
|
+ __drain_alien_cache(cachep,
|
|
|
+ alien, nodeid);
|
|
|
+ alien->entry[alien->avail++] = objp;
|
|
|
+ spin_unlock(&alien->lock);
|
|
|
+ } else {
|
|
|
+ spin_lock(&(cachep->nodelists[nodeid])->
|
|
|
+ list_lock);
|
|
|
+ free_block(cachep, &objp, 1);
|
|
|
+ spin_unlock(&(cachep->nodelists[nodeid])->
|
|
|
+ list_lock);
|
|
|
+ }
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ }
|
|
|
+#endif
|
|
|
if (likely(ac->avail < ac->limit)) {
|
|
|
STATS_INC_FREEHIT(cachep);
|
|
|
- ac_entry(ac)[ac->avail++] = objp;
|
|
|
+ ac->entry[ac->avail++] = objp;
|
|
|
return;
|
|
|
} else {
|
|
|
STATS_INC_FREEMISS(cachep);
|
|
|
cache_flusharray(cachep, ac);
|
|
|
- ac_entry(ac)[ac->avail++] = objp;
|
|
|
+ ac->entry[ac->avail++] = objp;
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -2369,81 +2836,30 @@ out:
|
|
|
* Identical to kmem_cache_alloc, except that this function is slow
|
|
|
* and can sleep. And it will allocate memory on the given node, which
|
|
|
* can improve the performance for cpu bound structures.
|
|
|
+ * New and improved: it will now make sure that the object gets
|
|
|
+ * put on the correct node list so that there is no false sharing.
|
|
|
*/
|
|
|
void *kmem_cache_alloc_node(kmem_cache_t *cachep, int flags, int nodeid)
|
|
|
{
|
|
|
- int loop;
|
|
|
- void *objp;
|
|
|
- struct slab *slabp;
|
|
|
- kmem_bufctl_t next;
|
|
|
-
|
|
|
- if (nodeid == -1)
|
|
|
- return kmem_cache_alloc(cachep, flags);
|
|
|
-
|
|
|
- for (loop = 0;;loop++) {
|
|
|
- struct list_head *q;
|
|
|
-
|
|
|
- objp = NULL;
|
|
|
- check_irq_on();
|
|
|
- spin_lock_irq(&cachep->spinlock);
|
|
|
- /* walk through all partial and empty slab and find one
|
|
|
- * from the right node */
|
|
|
- list_for_each(q,&cachep->lists.slabs_partial) {
|
|
|
- slabp = list_entry(q, struct slab, list);
|
|
|
-
|
|
|
- if (page_to_nid(virt_to_page(slabp->s_mem)) == nodeid ||
|
|
|
- loop > 2)
|
|
|
- goto got_slabp;
|
|
|
- }
|
|
|
- list_for_each(q, &cachep->lists.slabs_free) {
|
|
|
- slabp = list_entry(q, struct slab, list);
|
|
|
+ unsigned long save_flags;
|
|
|
+ void *ptr;
|
|
|
|
|
|
- if (page_to_nid(virt_to_page(slabp->s_mem)) == nodeid ||
|
|
|
- loop > 2)
|
|
|
- goto got_slabp;
|
|
|
- }
|
|
|
- spin_unlock_irq(&cachep->spinlock);
|
|
|
+ if (nodeid == numa_node_id() || nodeid == -1)
|
|
|
+ return __cache_alloc(cachep, flags);
|
|
|
|
|
|
- local_irq_disable();
|
|
|
- if (!cache_grow(cachep, flags, nodeid)) {
|
|
|
- local_irq_enable();
|
|
|
- return NULL;
|
|
|
- }
|
|
|
- local_irq_enable();
|
|
|
+ if (unlikely(!cachep->nodelists[nodeid])) {
|
|
|
+ /* Fall back to __cache_alloc if we run into trouble */
|
|
|
+ printk(KERN_WARNING "slab: not allocating in inactive node %d for cache %s\n", nodeid, cachep->name);
|
|
|
+ return __cache_alloc(cachep,flags);
|
|
|
}
|
|
|
-got_slabp:
|
|
|
- /* found one: allocate object */
|
|
|
- check_slabp(cachep, slabp);
|
|
|
- check_spinlock_acquired(cachep);
|
|
|
|
|
|
- STATS_INC_ALLOCED(cachep);
|
|
|
- STATS_INC_ACTIVE(cachep);
|
|
|
- STATS_SET_HIGH(cachep);
|
|
|
- STATS_INC_NODEALLOCS(cachep);
|
|
|
-
|
|
|
- objp = slabp->s_mem + slabp->free*cachep->objsize;
|
|
|
-
|
|
|
- slabp->inuse++;
|
|
|
- next = slab_bufctl(slabp)[slabp->free];
|
|
|
-#if DEBUG
|
|
|
- slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
|
|
|
-#endif
|
|
|
- slabp->free = next;
|
|
|
- check_slabp(cachep, slabp);
|
|
|
-
|
|
|
- /* move slabp to correct slabp list: */
|
|
|
- list_del(&slabp->list);
|
|
|
- if (slabp->free == BUFCTL_END)
|
|
|
- list_add(&slabp->list, &cachep->lists.slabs_full);
|
|
|
- else
|
|
|
- list_add(&slabp->list, &cachep->lists.slabs_partial);
|
|
|
-
|
|
|
- list3_data(cachep)->free_objects--;
|
|
|
- spin_unlock_irq(&cachep->spinlock);
|
|
|
+ cache_alloc_debugcheck_before(cachep, flags);
|
|
|
+ local_irq_save(save_flags);
|
|
|
+ ptr = __cache_alloc_node(cachep, flags, nodeid);
|
|
|
+ local_irq_restore(save_flags);
|
|
|
+ ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, __builtin_return_address(0));
|
|
|
|
|
|
- objp = cache_alloc_debugcheck_after(cachep, GFP_KERNEL, objp,
|
|
|
- __builtin_return_address(0));
|
|
|
- return objp;
|
|
|
+ return ptr;
|
|
|
}
|
|
|
EXPORT_SYMBOL(kmem_cache_alloc_node);
|
|
|
|
|
@@ -2513,11 +2929,18 @@ void *__alloc_percpu(size_t size, size_t align)
|
|
|
if (!pdata)
|
|
|
return NULL;
|
|
|
|
|
|
- for (i = 0; i < NR_CPUS; i++) {
|
|
|
- if (!cpu_possible(i))
|
|
|
- continue;
|
|
|
- pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL,
|
|
|
- cpu_to_node(i));
|
|
|
+ /*
|
|
|
+ * Cannot use for_each_online_cpu since a cpu may come online
|
|
|
+ * and we have no way of figuring out how to fix the array
|
|
|
+ * that we have allocated then....
|
|
|
+ */
|
|
|
+ for_each_cpu(i) {
|
|
|
+ int node = cpu_to_node(i);
|
|
|
+
|
|
|
+ if (node_online(node))
|
|
|
+ pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL, node);
|
|
|
+ else
|
|
|
+ pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
|
|
|
|
|
|
if (!pdata->ptrs[i])
|
|
|
goto unwind_oom;
|
|
@@ -2607,11 +3030,11 @@ free_percpu(const void *objp)
|
|
|
int i;
|
|
|
struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp);
|
|
|
|
|
|
- for (i = 0; i < NR_CPUS; i++) {
|
|
|
- if (!cpu_possible(i))
|
|
|
- continue;
|
|
|
+ /*
|
|
|
+ * We allocate for all cpus so we cannot use for online cpu here.
|
|
|
+ */
|
|
|
+ for_each_cpu(i)
|
|
|
kfree(p->ptrs[i]);
|
|
|
- }
|
|
|
kfree(p);
|
|
|
}
|
|
|
EXPORT_SYMBOL(free_percpu);
|
|
@@ -2629,6 +3052,64 @@ const char *kmem_cache_name(kmem_cache_t *cachep)
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(kmem_cache_name);
|
|
|
|
|
|
+/*
|
|
|
+ * This initializes kmem_list3 for all nodes.
|
|
|
+ */
|
|
|
+static int alloc_kmemlist(kmem_cache_t *cachep)
|
|
|
+{
|
|
|
+ int node;
|
|
|
+ struct kmem_list3 *l3;
|
|
|
+ int err = 0;
|
|
|
+
|
|
|
+ for_each_online_node(node) {
|
|
|
+ struct array_cache *nc = NULL, *new;
|
|
|
+ struct array_cache **new_alien = NULL;
|
|
|
+#ifdef CONFIG_NUMA
|
|
|
+ if (!(new_alien = alloc_alien_cache(node, cachep->limit)))
|
|
|
+ goto fail;
|
|
|
+#endif
|
|
|
+ if (!(new = alloc_arraycache(node, (cachep->shared*
|
|
|
+ cachep->batchcount), 0xbaadf00d)))
|
|
|
+ goto fail;
|
|
|
+ if ((l3 = cachep->nodelists[node])) {
|
|
|
+
|
|
|
+ spin_lock_irq(&l3->list_lock);
|
|
|
+
|
|
|
+ if ((nc = cachep->nodelists[node]->shared))
|
|
|
+ free_block(cachep, nc->entry,
|
|
|
+ nc->avail);
|
|
|
+
|
|
|
+ l3->shared = new;
|
|
|
+ if (!cachep->nodelists[node]->alien) {
|
|
|
+ l3->alien = new_alien;
|
|
|
+ new_alien = NULL;
|
|
|
+ }
|
|
|
+ l3->free_limit = (1 + nr_cpus_node(node))*
|
|
|
+ cachep->batchcount + cachep->num;
|
|
|
+ spin_unlock_irq(&l3->list_lock);
|
|
|
+ kfree(nc);
|
|
|
+ free_alien_cache(new_alien);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if (!(l3 = kmalloc_node(sizeof(struct kmem_list3),
|
|
|
+ GFP_KERNEL, node)))
|
|
|
+ goto fail;
|
|
|
+
|
|
|
+ kmem_list3_init(l3);
|
|
|
+ l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
|
|
|
+ ((unsigned long)cachep)%REAPTIMEOUT_LIST3;
|
|
|
+ l3->shared = new;
|
|
|
+ l3->alien = new_alien;
|
|
|
+ l3->free_limit = (1 + nr_cpus_node(node))*
|
|
|
+ cachep->batchcount + cachep->num;
|
|
|
+ cachep->nodelists[node] = l3;
|
|
|
+ }
|
|
|
+ return err;
|
|
|
+fail:
|
|
|
+ err = -ENOMEM;
|
|
|
+ return err;
|
|
|
+}
|
|
|
+
|
|
|
struct ccupdate_struct {
|
|
|
kmem_cache_t *cachep;
|
|
|
struct array_cache *new[NR_CPUS];
|
|
@@ -2641,7 +3122,7 @@ static void do_ccupdate_local(void *info)
|
|
|
|
|
|
check_irq_off();
|
|
|
old = ac_data(new->cachep);
|
|
|
-
|
|
|
+
|
|
|
new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
|
|
|
new->new[smp_processor_id()] = old;
|
|
|
}
|
|
@@ -2651,54 +3132,43 @@ static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount,
|
|
|
int shared)
|
|
|
{
|
|
|
struct ccupdate_struct new;
|
|
|
- struct array_cache *new_shared;
|
|
|
- int i;
|
|
|
+ int i, err;
|
|
|
|
|
|
memset(&new.new,0,sizeof(new.new));
|
|
|
- for (i = 0; i < NR_CPUS; i++) {
|
|
|
- if (cpu_online(i)) {
|
|
|
- new.new[i] = alloc_arraycache(i, limit, batchcount);
|
|
|
- if (!new.new[i]) {
|
|
|
- for (i--; i >= 0; i--) kfree(new.new[i]);
|
|
|
- return -ENOMEM;
|
|
|
- }
|
|
|
- } else {
|
|
|
- new.new[i] = NULL;
|
|
|
+ for_each_online_cpu(i) {
|
|
|
+ new.new[i] = alloc_arraycache(cpu_to_node(i), limit, batchcount);
|
|
|
+ if (!new.new[i]) {
|
|
|
+ for (i--; i >= 0; i--) kfree(new.new[i]);
|
|
|
+ return -ENOMEM;
|
|
|
}
|
|
|
}
|
|
|
new.cachep = cachep;
|
|
|
|
|
|
smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
|
|
|
-
|
|
|
+
|
|
|
check_irq_on();
|
|
|
spin_lock_irq(&cachep->spinlock);
|
|
|
cachep->batchcount = batchcount;
|
|
|
cachep->limit = limit;
|
|
|
- cachep->free_limit = (1+num_online_cpus())*cachep->batchcount + cachep->num;
|
|
|
+ cachep->shared = shared;
|
|
|
spin_unlock_irq(&cachep->spinlock);
|
|
|
|
|
|
- for (i = 0; i < NR_CPUS; i++) {
|
|
|
+ for_each_online_cpu(i) {
|
|
|
struct array_cache *ccold = new.new[i];
|
|
|
if (!ccold)
|
|
|
continue;
|
|
|
- spin_lock_irq(&cachep->spinlock);
|
|
|
- free_block(cachep, ac_entry(ccold), ccold->avail);
|
|
|
- spin_unlock_irq(&cachep->spinlock);
|
|
|
+ spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
|
|
|
+ free_block(cachep, ccold->entry, ccold->avail);
|
|
|
+ spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
|
|
|
kfree(ccold);
|
|
|
}
|
|
|
- new_shared = alloc_arraycache(-1, batchcount*shared, 0xbaadf00d);
|
|
|
- if (new_shared) {
|
|
|
- struct array_cache *old;
|
|
|
|
|
|
- spin_lock_irq(&cachep->spinlock);
|
|
|
- old = cachep->lists.shared;
|
|
|
- cachep->lists.shared = new_shared;
|
|
|
- if (old)
|
|
|
- free_block(cachep, ac_entry(old), old->avail);
|
|
|
- spin_unlock_irq(&cachep->spinlock);
|
|
|
- kfree(old);
|
|
|
+ err = alloc_kmemlist(cachep);
|
|
|
+ if (err) {
|
|
|
+ printk(KERN_ERR "alloc_kmemlist failed for %s, error %d.\n",
|
|
|
+ cachep->name, -err);
|
|
|
+ BUG();
|
|
|
}
|
|
|
-
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -2756,11 +3226,11 @@ static void enable_cpucache(kmem_cache_t *cachep)
|
|
|
}
|
|
|
|
|
|
static void drain_array_locked(kmem_cache_t *cachep,
|
|
|
- struct array_cache *ac, int force)
|
|
|
+ struct array_cache *ac, int force, int node)
|
|
|
{
|
|
|
int tofree;
|
|
|
|
|
|
- check_spinlock_acquired(cachep);
|
|
|
+ check_spinlock_acquired_node(cachep, node);
|
|
|
if (ac->touched && !force) {
|
|
|
ac->touched = 0;
|
|
|
} else if (ac->avail) {
|
|
@@ -2768,9 +3238,9 @@ static void drain_array_locked(kmem_cache_t *cachep,
|
|
|
if (tofree > ac->avail) {
|
|
|
tofree = (ac->avail+1)/2;
|
|
|
}
|
|
|
- free_block(cachep, ac_entry(ac), tofree);
|
|
|
+ free_block(cachep, ac->entry, tofree);
|
|
|
ac->avail -= tofree;
|
|
|
- memmove(&ac_entry(ac)[0], &ac_entry(ac)[tofree],
|
|
|
+ memmove(ac->entry, &(ac->entry[tofree]),
|
|
|
sizeof(void*)*ac->avail);
|
|
|
}
|
|
|
}
|
|
@@ -2789,6 +3259,7 @@ static void drain_array_locked(kmem_cache_t *cachep,
|
|
|
static void cache_reap(void *unused)
|
|
|
{
|
|
|
struct list_head *walk;
|
|
|
+ struct kmem_list3 *l3;
|
|
|
|
|
|
if (down_trylock(&cache_chain_sem)) {
|
|
|
/* Give up. Setup the next iteration. */
|
|
@@ -2809,27 +3280,32 @@ static void cache_reap(void *unused)
|
|
|
|
|
|
check_irq_on();
|
|
|
|
|
|
- spin_lock_irq(&searchp->spinlock);
|
|
|
+ l3 = searchp->nodelists[numa_node_id()];
|
|
|
+ if (l3->alien)
|
|
|
+ drain_alien_cache(searchp, l3);
|
|
|
+ spin_lock_irq(&l3->list_lock);
|
|
|
|
|
|
- drain_array_locked(searchp, ac_data(searchp), 0);
|
|
|
+ drain_array_locked(searchp, ac_data(searchp), 0,
|
|
|
+ numa_node_id());
|
|
|
|
|
|
- if(time_after(searchp->lists.next_reap, jiffies))
|
|
|
+ if (time_after(l3->next_reap, jiffies))
|
|
|
goto next_unlock;
|
|
|
|
|
|
- searchp->lists.next_reap = jiffies + REAPTIMEOUT_LIST3;
|
|
|
+ l3->next_reap = jiffies + REAPTIMEOUT_LIST3;
|
|
|
|
|
|
- if (searchp->lists.shared)
|
|
|
- drain_array_locked(searchp, searchp->lists.shared, 0);
|
|
|
+ if (l3->shared)
|
|
|
+ drain_array_locked(searchp, l3->shared, 0,
|
|
|
+ numa_node_id());
|
|
|
|
|
|
- if (searchp->lists.free_touched) {
|
|
|
- searchp->lists.free_touched = 0;
|
|
|
+ if (l3->free_touched) {
|
|
|
+ l3->free_touched = 0;
|
|
|
goto next_unlock;
|
|
|
}
|
|
|
|
|
|
- tofree = (searchp->free_limit+5*searchp->num-1)/(5*searchp->num);
|
|
|
+ tofree = (l3->free_limit+5*searchp->num-1)/(5*searchp->num);
|
|
|
do {
|
|
|
- p = list3_data(searchp)->slabs_free.next;
|
|
|
- if (p == &(list3_data(searchp)->slabs_free))
|
|
|
+ p = l3->slabs_free.next;
|
|
|
+ if (p == &(l3->slabs_free))
|
|
|
break;
|
|
|
|
|
|
slabp = list_entry(p, struct slab, list);
|
|
@@ -2842,13 +3318,13 @@ static void cache_reap(void *unused)
|
|
|
* searchp cannot disappear, we hold
|
|
|
* cache_chain_lock
|
|
|
*/
|
|
|
- searchp->lists.free_objects -= searchp->num;
|
|
|
- spin_unlock_irq(&searchp->spinlock);
|
|
|
+ l3->free_objects -= searchp->num;
|
|
|
+ spin_unlock_irq(&l3->list_lock);
|
|
|
slab_destroy(searchp, slabp);
|
|
|
- spin_lock_irq(&searchp->spinlock);
|
|
|
+ spin_lock_irq(&l3->list_lock);
|
|
|
} while(--tofree > 0);
|
|
|
next_unlock:
|
|
|
- spin_unlock_irq(&searchp->spinlock);
|
|
|
+ spin_unlock_irq(&l3->list_lock);
|
|
|
next:
|
|
|
cond_resched();
|
|
|
}
|
|
@@ -2882,7 +3358,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
|
|
|
seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
|
|
|
#if STATS
|
|
|
seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped>"
|
|
|
- " <error> <maxfreeable> <freelimit> <nodeallocs>");
|
|
|
+ " <error> <maxfreeable> <nodeallocs> <remotefrees>");
|
|
|
seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
|
|
|
#endif
|
|
|
seq_putc(m, '\n');
|
|
@@ -2917,39 +3393,53 @@ static int s_show(struct seq_file *m, void *p)
|
|
|
unsigned long active_objs;
|
|
|
unsigned long num_objs;
|
|
|
unsigned long active_slabs = 0;
|
|
|
- unsigned long num_slabs;
|
|
|
- const char *name;
|
|
|
+ unsigned long num_slabs, free_objects = 0, shared_avail = 0;
|
|
|
+ const char *name;
|
|
|
char *error = NULL;
|
|
|
+ int node;
|
|
|
+ struct kmem_list3 *l3;
|
|
|
|
|
|
check_irq_on();
|
|
|
spin_lock_irq(&cachep->spinlock);
|
|
|
active_objs = 0;
|
|
|
num_slabs = 0;
|
|
|
- list_for_each(q,&cachep->lists.slabs_full) {
|
|
|
- slabp = list_entry(q, struct slab, list);
|
|
|
- if (slabp->inuse != cachep->num && !error)
|
|
|
- error = "slabs_full accounting error";
|
|
|
- active_objs += cachep->num;
|
|
|
- active_slabs++;
|
|
|
- }
|
|
|
- list_for_each(q,&cachep->lists.slabs_partial) {
|
|
|
- slabp = list_entry(q, struct slab, list);
|
|
|
- if (slabp->inuse == cachep->num && !error)
|
|
|
- error = "slabs_partial inuse accounting error";
|
|
|
- if (!slabp->inuse && !error)
|
|
|
- error = "slabs_partial/inuse accounting error";
|
|
|
- active_objs += slabp->inuse;
|
|
|
- active_slabs++;
|
|
|
- }
|
|
|
- list_for_each(q,&cachep->lists.slabs_free) {
|
|
|
- slabp = list_entry(q, struct slab, list);
|
|
|
- if (slabp->inuse && !error)
|
|
|
- error = "slabs_free/inuse accounting error";
|
|
|
- num_slabs++;
|
|
|
+ for_each_online_node(node) {
|
|
|
+ l3 = cachep->nodelists[node];
|
|
|
+ if (!l3)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ spin_lock(&l3->list_lock);
|
|
|
+
|
|
|
+ list_for_each(q,&l3->slabs_full) {
|
|
|
+ slabp = list_entry(q, struct slab, list);
|
|
|
+ if (slabp->inuse != cachep->num && !error)
|
|
|
+ error = "slabs_full accounting error";
|
|
|
+ active_objs += cachep->num;
|
|
|
+ active_slabs++;
|
|
|
+ }
|
|
|
+ list_for_each(q,&l3->slabs_partial) {
|
|
|
+ slabp = list_entry(q, struct slab, list);
|
|
|
+ if (slabp->inuse == cachep->num && !error)
|
|
|
+ error = "slabs_partial inuse accounting error";
|
|
|
+ if (!slabp->inuse && !error)
|
|
|
+ error = "slabs_partial/inuse accounting error";
|
|
|
+ active_objs += slabp->inuse;
|
|
|
+ active_slabs++;
|
|
|
+ }
|
|
|
+ list_for_each(q,&l3->slabs_free) {
|
|
|
+ slabp = list_entry(q, struct slab, list);
|
|
|
+ if (slabp->inuse && !error)
|
|
|
+ error = "slabs_free/inuse accounting error";
|
|
|
+ num_slabs++;
|
|
|
+ }
|
|
|
+ free_objects += l3->free_objects;
|
|
|
+ shared_avail += l3->shared->avail;
|
|
|
+
|
|
|
+ spin_unlock(&l3->list_lock);
|
|
|
}
|
|
|
num_slabs+=active_slabs;
|
|
|
num_objs = num_slabs*cachep->num;
|
|
|
- if (num_objs - active_objs != cachep->lists.free_objects && !error)
|
|
|
+ if (num_objs - active_objs != free_objects && !error)
|
|
|
error = "free_objects accounting error";
|
|
|
|
|
|
name = cachep->name;
|
|
@@ -2961,9 +3451,9 @@ static int s_show(struct seq_file *m, void *p)
|
|
|
cachep->num, (1<<cachep->gfporder));
|
|
|
seq_printf(m, " : tunables %4u %4u %4u",
|
|
|
cachep->limit, cachep->batchcount,
|
|
|
- cachep->lists.shared->limit/cachep->batchcount);
|
|
|
- seq_printf(m, " : slabdata %6lu %6lu %6u",
|
|
|
- active_slabs, num_slabs, cachep->lists.shared->avail);
|
|
|
+ cachep->shared);
|
|
|
+ seq_printf(m, " : slabdata %6lu %6lu %6lu",
|
|
|
+ active_slabs, num_slabs, shared_avail);
|
|
|
#if STATS
|
|
|
{ /* list3 stats */
|
|
|
unsigned long high = cachep->high_mark;
|
|
@@ -2972,12 +3462,13 @@ static int s_show(struct seq_file *m, void *p)
|
|
|
unsigned long reaped = cachep->reaped;
|
|
|
unsigned long errors = cachep->errors;
|
|
|
unsigned long max_freeable = cachep->max_freeable;
|
|
|
- unsigned long free_limit = cachep->free_limit;
|
|
|
unsigned long node_allocs = cachep->node_allocs;
|
|
|
+ unsigned long node_frees = cachep->node_frees;
|
|
|
|
|
|
- seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu %4lu %4lu %4lu %4lu",
|
|
|
- allocs, high, grown, reaped, errors,
|
|
|
- max_freeable, free_limit, node_allocs);
|
|
|
+ seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
|
|
|
+ %4lu %4lu %4lu %4lu",
|
|
|
+ allocs, high, grown, reaped, errors,
|
|
|
+ max_freeable, node_allocs, node_frees);
|
|
|
}
|
|
|
/* cpu stats */
|
|
|
{
|
|
@@ -3056,9 +3547,10 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer,
|
|
|
batchcount < 1 ||
|
|
|
batchcount > limit ||
|
|
|
shared < 0) {
|
|
|
- res = -EINVAL;
|
|
|
+ res = 0;
|
|
|
} else {
|
|
|
- res = do_tune_cpucache(cachep, limit, batchcount, shared);
|
|
|
+ res = do_tune_cpucache(cachep, limit,
|
|
|
+ batchcount, shared);
|
|
|
}
|
|
|
break;
|
|
|
}
|