|
@@ -542,39 +542,10 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
|
|
return mz;
|
|
return mz;
|
|
}
|
|
}
|
|
|
|
|
|
-/*
|
|
|
|
- * Return page count for single (non recursive) @memcg.
|
|
|
|
- *
|
|
|
|
- * Implementation Note: reading percpu statistics for memcg.
|
|
|
|
- *
|
|
|
|
- * Both of vmstat[] and percpu_counter has threshold and do periodic
|
|
|
|
- * synchronization to implement "quick" read. There are trade-off between
|
|
|
|
- * reading cost and precision of value. Then, we may have a chance to implement
|
|
|
|
- * a periodic synchronization of counter in memcg's counter.
|
|
|
|
- *
|
|
|
|
- * But this _read() function is used for user interface now. The user accounts
|
|
|
|
- * memory usage by memory cgroup and he _always_ requires exact value because
|
|
|
|
- * he accounts memory. Even if we provide quick-and-fuzzy read, we always
|
|
|
|
- * have to visit all online cpus and make sum. So, for now, unnecessary
|
|
|
|
- * synchronization is not implemented. (just implemented for cpu hotplug)
|
|
|
|
- *
|
|
|
|
- * If there are kernel internal actions which can make use of some not-exact
|
|
|
|
- * value, and reading all cpu value can be performance bottleneck in some
|
|
|
|
- * common workload, threshold and synchronization as vmstat[] should be
|
|
|
|
- * implemented.
|
|
|
|
- *
|
|
|
|
- * The parameter idx can be of type enum memcg_event_item or vm_event_item.
|
|
|
|
- */
|
|
|
|
-
|
|
|
|
static unsigned long memcg_sum_events(struct mem_cgroup *memcg,
|
|
static unsigned long memcg_sum_events(struct mem_cgroup *memcg,
|
|
int event)
|
|
int event)
|
|
{
|
|
{
|
|
- unsigned long val = 0;
|
|
|
|
- int cpu;
|
|
|
|
-
|
|
|
|
- for_each_possible_cpu(cpu)
|
|
|
|
- val += per_cpu(memcg->stat->events[event], cpu);
|
|
|
|
- return val;
|
|
|
|
|
|
+ return atomic_long_read(&memcg->events[event]);
|
|
}
|
|
}
|
|
|
|
|
|
static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
|
|
static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
|
|
@@ -606,7 +577,7 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
|
|
nr_pages = -nr_pages; /* for event */
|
|
nr_pages = -nr_pages; /* for event */
|
|
}
|
|
}
|
|
|
|
|
|
- __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
|
|
|
|
|
|
+ __this_cpu_add(memcg->stat_cpu->nr_page_events, nr_pages);
|
|
}
|
|
}
|
|
|
|
|
|
unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
|
|
unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
|
|
@@ -642,8 +613,8 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
|
|
{
|
|
{
|
|
unsigned long val, next;
|
|
unsigned long val, next;
|
|
|
|
|
|
- val = __this_cpu_read(memcg->stat->nr_page_events);
|
|
|
|
- next = __this_cpu_read(memcg->stat->targets[target]);
|
|
|
|
|
|
+ val = __this_cpu_read(memcg->stat_cpu->nr_page_events);
|
|
|
|
+ next = __this_cpu_read(memcg->stat_cpu->targets[target]);
|
|
/* from time_after() in jiffies.h */
|
|
/* from time_after() in jiffies.h */
|
|
if ((long)(next - val) < 0) {
|
|
if ((long)(next - val) < 0) {
|
|
switch (target) {
|
|
switch (target) {
|
|
@@ -659,7 +630,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
|
|
default:
|
|
default:
|
|
break;
|
|
break;
|
|
}
|
|
}
|
|
- __this_cpu_write(memcg->stat->targets[target], next);
|
|
|
|
|
|
+ __this_cpu_write(memcg->stat_cpu->targets[target], next);
|
|
return true;
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
return false;
|
|
@@ -1707,11 +1678,6 @@ void unlock_page_memcg(struct page *page)
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(unlock_page_memcg);
|
|
EXPORT_SYMBOL(unlock_page_memcg);
|
|
|
|
|
|
-/*
|
|
|
|
- * size of first charge trial. "32" comes from vmscan.c's magic value.
|
|
|
|
- * TODO: maybe necessary to use big numbers in big irons.
|
|
|
|
- */
|
|
|
|
-#define CHARGE_BATCH 32U
|
|
|
|
struct memcg_stock_pcp {
|
|
struct memcg_stock_pcp {
|
|
struct mem_cgroup *cached; /* this never be root cgroup */
|
|
struct mem_cgroup *cached; /* this never be root cgroup */
|
|
unsigned int nr_pages;
|
|
unsigned int nr_pages;
|
|
@@ -1739,7 +1705,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
|
|
unsigned long flags;
|
|
unsigned long flags;
|
|
bool ret = false;
|
|
bool ret = false;
|
|
|
|
|
|
- if (nr_pages > CHARGE_BATCH)
|
|
|
|
|
|
+ if (nr_pages > MEMCG_CHARGE_BATCH)
|
|
return ret;
|
|
return ret;
|
|
|
|
|
|
local_irq_save(flags);
|
|
local_irq_save(flags);
|
|
@@ -1808,7 +1774,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
|
|
}
|
|
}
|
|
stock->nr_pages += nr_pages;
|
|
stock->nr_pages += nr_pages;
|
|
|
|
|
|
- if (stock->nr_pages > CHARGE_BATCH)
|
|
|
|
|
|
+ if (stock->nr_pages > MEMCG_CHARGE_BATCH)
|
|
drain_stock(stock);
|
|
drain_stock(stock);
|
|
|
|
|
|
local_irq_restore(flags);
|
|
local_irq_restore(flags);
|
|
@@ -1858,9 +1824,44 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
|
|
static int memcg_hotplug_cpu_dead(unsigned int cpu)
|
|
static int memcg_hotplug_cpu_dead(unsigned int cpu)
|
|
{
|
|
{
|
|
struct memcg_stock_pcp *stock;
|
|
struct memcg_stock_pcp *stock;
|
|
|
|
+ struct mem_cgroup *memcg;
|
|
|
|
|
|
stock = &per_cpu(memcg_stock, cpu);
|
|
stock = &per_cpu(memcg_stock, cpu);
|
|
drain_stock(stock);
|
|
drain_stock(stock);
|
|
|
|
+
|
|
|
|
+ for_each_mem_cgroup(memcg) {
|
|
|
|
+ int i;
|
|
|
|
+
|
|
|
|
+ for (i = 0; i < MEMCG_NR_STAT; i++) {
|
|
|
|
+ int nid;
|
|
|
|
+ long x;
|
|
|
|
+
|
|
|
|
+ x = this_cpu_xchg(memcg->stat_cpu->count[i], 0);
|
|
|
|
+ if (x)
|
|
|
|
+ atomic_long_add(x, &memcg->stat[i]);
|
|
|
|
+
|
|
|
|
+ if (i >= NR_VM_NODE_STAT_ITEMS)
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ for_each_node(nid) {
|
|
|
|
+ struct mem_cgroup_per_node *pn;
|
|
|
|
+
|
|
|
|
+ pn = mem_cgroup_nodeinfo(memcg, nid);
|
|
|
|
+ x = this_cpu_xchg(pn->lruvec_stat_cpu->count[i], 0);
|
|
|
|
+ if (x)
|
|
|
|
+ atomic_long_add(x, &pn->lruvec_stat[i]);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ for (i = 0; i < MEMCG_NR_EVENTS; i++) {
|
|
|
|
+ long x;
|
|
|
|
+
|
|
|
|
+ x = this_cpu_xchg(memcg->stat_cpu->events[i], 0);
|
|
|
|
+ if (x)
|
|
|
|
+ atomic_long_add(x, &memcg->events[i]);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1881,7 +1882,7 @@ static void high_work_func(struct work_struct *work)
|
|
struct mem_cgroup *memcg;
|
|
struct mem_cgroup *memcg;
|
|
|
|
|
|
memcg = container_of(work, struct mem_cgroup, high_work);
|
|
memcg = container_of(work, struct mem_cgroup, high_work);
|
|
- reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL);
|
|
|
|
|
|
+ reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -1905,7 +1906,7 @@ void mem_cgroup_handle_over_high(void)
|
|
static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
|
|
static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
|
|
unsigned int nr_pages)
|
|
unsigned int nr_pages)
|
|
{
|
|
{
|
|
- unsigned int batch = max(CHARGE_BATCH, nr_pages);
|
|
|
|
|
|
+ unsigned int batch = max(MEMCG_CHARGE_BATCH, nr_pages);
|
|
int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
|
|
int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
|
|
struct mem_cgroup *mem_over_limit;
|
|
struct mem_cgroup *mem_over_limit;
|
|
struct page_counter *counter;
|
|
struct page_counter *counter;
|
|
@@ -4161,8 +4162,8 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
|
|
if (!pn)
|
|
if (!pn)
|
|
return 1;
|
|
return 1;
|
|
|
|
|
|
- pn->lruvec_stat = alloc_percpu(struct lruvec_stat);
|
|
|
|
- if (!pn->lruvec_stat) {
|
|
|
|
|
|
+ pn->lruvec_stat_cpu = alloc_percpu(struct lruvec_stat);
|
|
|
|
+ if (!pn->lruvec_stat_cpu) {
|
|
kfree(pn);
|
|
kfree(pn);
|
|
return 1;
|
|
return 1;
|
|
}
|
|
}
|
|
@@ -4180,7 +4181,7 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
|
|
{
|
|
{
|
|
struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
|
|
struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
|
|
|
|
|
|
- free_percpu(pn->lruvec_stat);
|
|
|
|
|
|
+ free_percpu(pn->lruvec_stat_cpu);
|
|
kfree(pn);
|
|
kfree(pn);
|
|
}
|
|
}
|
|
|
|
|
|
@@ -4190,7 +4191,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
|
|
|
|
|
|
for_each_node(node)
|
|
for_each_node(node)
|
|
free_mem_cgroup_per_node_info(memcg, node);
|
|
free_mem_cgroup_per_node_info(memcg, node);
|
|
- free_percpu(memcg->stat);
|
|
|
|
|
|
+ free_percpu(memcg->stat_cpu);
|
|
kfree(memcg);
|
|
kfree(memcg);
|
|
}
|
|
}
|
|
|
|
|
|
@@ -4219,8 +4220,8 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
|
|
if (memcg->id.id < 0)
|
|
if (memcg->id.id < 0)
|
|
goto fail;
|
|
goto fail;
|
|
|
|
|
|
- memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
|
|
|
|
- if (!memcg->stat)
|
|
|
|
|
|
+ memcg->stat_cpu = alloc_percpu(struct mem_cgroup_stat_cpu);
|
|
|
|
+ if (!memcg->stat_cpu)
|
|
goto fail;
|
|
goto fail;
|
|
|
|
|
|
for_each_node(node)
|
|
for_each_node(node)
|
|
@@ -5638,7 +5639,7 @@ static void uncharge_batch(const struct uncharge_gather *ug)
|
|
__mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge);
|
|
__mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge);
|
|
__mod_memcg_state(ug->memcg, NR_SHMEM, -ug->nr_shmem);
|
|
__mod_memcg_state(ug->memcg, NR_SHMEM, -ug->nr_shmem);
|
|
__count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout);
|
|
__count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout);
|
|
- __this_cpu_add(ug->memcg->stat->nr_page_events, nr_pages);
|
|
|
|
|
|
+ __this_cpu_add(ug->memcg->stat_cpu->nr_page_events, nr_pages);
|
|
memcg_check_events(ug->memcg, ug->dummy_page);
|
|
memcg_check_events(ug->memcg, ug->dummy_page);
|
|
local_irq_restore(flags);
|
|
local_irq_restore(flags);
|
|
|
|
|