|
@@ -72,22 +72,13 @@ EXPORT_SYMBOL(memory_cgrp_subsys);
|
|
|
#define MEM_CGROUP_RECLAIM_RETRIES 5
|
|
|
static struct mem_cgroup *root_mem_cgroup __read_mostly;
|
|
|
|
|
|
+/* Whether the swap controller is active */
|
|
|
#ifdef CONFIG_MEMCG_SWAP
|
|
|
-/* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */
|
|
|
int do_swap_account __read_mostly;
|
|
|
-
|
|
|
-/* for remember boot option*/
|
|
|
-#ifdef CONFIG_MEMCG_SWAP_ENABLED
|
|
|
-static int really_do_swap_account __initdata = 1;
|
|
|
-#else
|
|
|
-static int really_do_swap_account __initdata;
|
|
|
-#endif
|
|
|
-
|
|
|
#else
|
|
|
#define do_swap_account 0
|
|
|
#endif
|
|
|
|
|
|
-
|
|
|
static const char * const mem_cgroup_stat_names[] = {
|
|
|
"cache",
|
|
|
"rss",
|
|
@@ -97,14 +88,6 @@ static const char * const mem_cgroup_stat_names[] = {
|
|
|
"swap",
|
|
|
};
|
|
|
|
|
|
-enum mem_cgroup_events_index {
|
|
|
- MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */
|
|
|
- MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */
|
|
|
- MEM_CGROUP_EVENTS_PGFAULT, /* # of page-faults */
|
|
|
- MEM_CGROUP_EVENTS_PGMAJFAULT, /* # of major page-faults */
|
|
|
- MEM_CGROUP_EVENTS_NSTATS,
|
|
|
-};
|
|
|
-
|
|
|
static const char * const mem_cgroup_events_names[] = {
|
|
|
"pgpgin",
|
|
|
"pgpgout",
|
|
@@ -138,7 +121,7 @@ enum mem_cgroup_events_target {
|
|
|
|
|
|
struct mem_cgroup_stat_cpu {
|
|
|
long count[MEM_CGROUP_STAT_NSTATS];
|
|
|
- unsigned long events[MEM_CGROUP_EVENTS_NSTATS];
|
|
|
+ unsigned long events[MEMCG_NR_EVENTS];
|
|
|
unsigned long nr_page_events;
|
|
|
unsigned long targets[MEM_CGROUP_NTARGETS];
|
|
|
};
|
|
@@ -284,6 +267,10 @@ struct mem_cgroup {
|
|
|
struct page_counter memsw;
|
|
|
struct page_counter kmem;
|
|
|
|
|
|
+ /* Normal memory consumption range */
|
|
|
+ unsigned long low;
|
|
|
+ unsigned long high;
|
|
|
+
|
|
|
unsigned long soft_limit;
|
|
|
|
|
|
/* vmpressure notifications */
|
|
@@ -325,9 +312,11 @@ struct mem_cgroup {
|
|
|
/*
|
|
|
* set > 0 if pages under this cgroup are moving to other cgroup.
|
|
|
*/
|
|
|
- atomic_t moving_account;
|
|
|
+ atomic_t moving_account;
|
|
|
/* taken only while moving_account > 0 */
|
|
|
- spinlock_t move_lock;
|
|
|
+ spinlock_t move_lock;
|
|
|
+ struct task_struct *move_lock_task;
|
|
|
+ unsigned long move_lock_flags;
|
|
|
/*
|
|
|
* percpu counter.
|
|
|
*/
|
|
@@ -371,21 +360,18 @@ static bool memcg_kmem_is_active(struct mem_cgroup *memcg)
|
|
|
|
|
|
/* Stuffs for move charges at task migration. */
|
|
|
/*
|
|
|
- * Types of charges to be moved. "move_charge_at_immitgrate" and
|
|
|
- * "immigrate_flags" are treated as a left-shifted bitmap of these types.
|
|
|
+ * Types of charges to be moved.
|
|
|
*/
|
|
|
-enum move_type {
|
|
|
- MOVE_CHARGE_TYPE_ANON, /* private anonymous page and swap of it */
|
|
|
- MOVE_CHARGE_TYPE_FILE, /* file page(including tmpfs) and swap of it */
|
|
|
- NR_MOVE_TYPE,
|
|
|
-};
|
|
|
+#define MOVE_ANON 0x1U
|
|
|
+#define MOVE_FILE 0x2U
|
|
|
+#define MOVE_MASK (MOVE_ANON | MOVE_FILE)
|
|
|
|
|
|
/* "mc" and its members are protected by cgroup_mutex */
|
|
|
static struct move_charge_struct {
|
|
|
spinlock_t lock; /* for from, to */
|
|
|
struct mem_cgroup *from;
|
|
|
struct mem_cgroup *to;
|
|
|
- unsigned long immigrate_flags;
|
|
|
+ unsigned long flags;
|
|
|
unsigned long precharge;
|
|
|
unsigned long moved_charge;
|
|
|
unsigned long moved_swap;
|
|
@@ -396,16 +382,6 @@ static struct move_charge_struct {
|
|
|
.waitq = __WAIT_QUEUE_HEAD_INITIALIZER(mc.waitq),
|
|
|
};
|
|
|
|
|
|
-static bool move_anon(void)
|
|
|
-{
|
|
|
- return test_bit(MOVE_CHARGE_TYPE_ANON, &mc.immigrate_flags);
|
|
|
-}
|
|
|
-
|
|
|
-static bool move_file(void)
|
|
|
-{
|
|
|
- return test_bit(MOVE_CHARGE_TYPE_FILE, &mc.immigrate_flags);
|
|
|
-}
|
|
|
-
|
|
|
/*
|
|
|
* Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft
|
|
|
* limit reclaim to prevent infinite loops, if they ever occur.
|
|
@@ -1365,6 +1341,20 @@ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
|
|
|
return inactive * inactive_ratio < active;
|
|
|
}
|
|
|
|
|
|
+bool mem_cgroup_lruvec_online(struct lruvec *lruvec)
|
|
|
+{
|
|
|
+ struct mem_cgroup_per_zone *mz;
|
|
|
+ struct mem_cgroup *memcg;
|
|
|
+
|
|
|
+ if (mem_cgroup_disabled())
|
|
|
+ return true;
|
|
|
+
|
|
|
+ mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
|
|
|
+ memcg = mz->memcg;
|
|
|
+
|
|
|
+ return !!(memcg->css.flags & CSS_ONLINE);
|
|
|
+}
|
|
|
+
|
|
|
#define mem_cgroup_from_counter(counter, member) \
|
|
|
container_of(counter, struct mem_cgroup, member)
|
|
|
|
|
@@ -1557,7 +1547,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
|
|
|
* quickly exit and free its memory.
|
|
|
*/
|
|
|
if (fatal_signal_pending(current) || task_will_free_mem(current)) {
|
|
|
- set_thread_flag(TIF_MEMDIE);
|
|
|
+ mark_tsk_oom_victim(current);
|
|
|
return;
|
|
|
}
|
|
|
|
|
@@ -1931,7 +1921,7 @@ bool mem_cgroup_oom_synchronize(bool handle)
|
|
|
if (!memcg)
|
|
|
return false;
|
|
|
|
|
|
- if (!handle)
|
|
|
+ if (!handle || oom_killer_disabled)
|
|
|
goto cleanup;
|
|
|
|
|
|
owait.memcg = memcg;
|
|
@@ -1977,34 +1967,33 @@ cleanup:
|
|
|
/**
|
|
|
* mem_cgroup_begin_page_stat - begin a page state statistics transaction
|
|
|
* @page: page that is going to change accounted state
|
|
|
- * @locked: &memcg->move_lock slowpath was taken
|
|
|
- * @flags: IRQ-state flags for &memcg->move_lock
|
|
|
*
|
|
|
* This function must mark the beginning of an accounted page state
|
|
|
* change to prevent double accounting when the page is concurrently
|
|
|
* being moved to another memcg:
|
|
|
*
|
|
|
- * memcg = mem_cgroup_begin_page_stat(page, &locked, &flags);
|
|
|
+ * memcg = mem_cgroup_begin_page_stat(page);
|
|
|
* if (TestClearPageState(page))
|
|
|
* mem_cgroup_update_page_stat(memcg, state, -1);
|
|
|
- * mem_cgroup_end_page_stat(memcg, locked, flags);
|
|
|
- *
|
|
|
- * The RCU lock is held throughout the transaction. The fast path can
|
|
|
- * get away without acquiring the memcg->move_lock (@locked is false)
|
|
|
- * because page moving starts with an RCU grace period.
|
|
|
- *
|
|
|
- * The RCU lock also protects the memcg from being freed when the page
|
|
|
- * state that is going to change is the only thing preventing the page
|
|
|
- * from being uncharged. E.g. end-writeback clearing PageWriteback(),
|
|
|
- * which allows migration to go ahead and uncharge the page before the
|
|
|
- * account transaction might be complete.
|
|
|
+ * mem_cgroup_end_page_stat(memcg);
|
|
|
*/
|
|
|
-struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page,
|
|
|
- bool *locked,
|
|
|
- unsigned long *flags)
|
|
|
+struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page)
|
|
|
{
|
|
|
struct mem_cgroup *memcg;
|
|
|
+ unsigned long flags;
|
|
|
|
|
|
+ /*
|
|
|
+ * The RCU lock is held throughout the transaction. The fast
|
|
|
+ * path can get away without acquiring the memcg->move_lock
|
|
|
+ * because page moving starts with an RCU grace period.
|
|
|
+ *
|
|
|
+ * The RCU lock also protects the memcg from being freed when
|
|
|
+ * the page state that is going to change is the only thing
|
|
|
+ * preventing the page from being uncharged.
|
|
|
+ * E.g. end-writeback clearing PageWriteback(), which allows
|
|
|
+ * migration to go ahead and uncharge the page before the
|
|
|
+ * account transaction might be complete.
|
|
|
+ */
|
|
|
rcu_read_lock();
|
|
|
|
|
|
if (mem_cgroup_disabled())
|
|
@@ -2014,16 +2003,22 @@ again:
|
|
|
if (unlikely(!memcg))
|
|
|
return NULL;
|
|
|
|
|
|
- *locked = false;
|
|
|
if (atomic_read(&memcg->moving_account) <= 0)
|
|
|
return memcg;
|
|
|
|
|
|
- spin_lock_irqsave(&memcg->move_lock, *flags);
|
|
|
+ spin_lock_irqsave(&memcg->move_lock, flags);
|
|
|
if (memcg != page->mem_cgroup) {
|
|
|
- spin_unlock_irqrestore(&memcg->move_lock, *flags);
|
|
|
+ spin_unlock_irqrestore(&memcg->move_lock, flags);
|
|
|
goto again;
|
|
|
}
|
|
|
- *locked = true;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * When charge migration first begins, we can have locked and
|
|
|
+ * unlocked page stat updates happening concurrently. Track
|
|
|
+ * the task who has the lock for mem_cgroup_end_page_stat().
|
|
|
+ */
|
|
|
+ memcg->move_lock_task = current;
|
|
|
+ memcg->move_lock_flags = flags;
|
|
|
|
|
|
return memcg;
|
|
|
}
|
|
@@ -2031,14 +2026,17 @@ again:
|
|
|
/**
|
|
|
* mem_cgroup_end_page_stat - finish a page state statistics transaction
|
|
|
* @memcg: the memcg that was accounted against
|
|
|
- * @locked: value received from mem_cgroup_begin_page_stat()
|
|
|
- * @flags: value received from mem_cgroup_begin_page_stat()
|
|
|
*/
|
|
|
-void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool *locked,
|
|
|
- unsigned long *flags)
|
|
|
+void mem_cgroup_end_page_stat(struct mem_cgroup *memcg)
|
|
|
{
|
|
|
- if (memcg && *locked)
|
|
|
- spin_unlock_irqrestore(&memcg->move_lock, *flags);
|
|
|
+ if (memcg && memcg->move_lock_task == current) {
|
|
|
+ unsigned long flags = memcg->move_lock_flags;
|
|
|
+
|
|
|
+ memcg->move_lock_task = NULL;
|
|
|
+ memcg->move_lock_flags = 0;
|
|
|
+
|
|
|
+ spin_unlock_irqrestore(&memcg->move_lock, flags);
|
|
|
+ }
|
|
|
|
|
|
rcu_read_unlock();
|
|
|
}
|
|
@@ -2131,17 +2129,6 @@ static void drain_local_stock(struct work_struct *dummy)
|
|
|
clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
|
|
|
}
|
|
|
|
|
|
-static void __init memcg_stock_init(void)
|
|
|
-{
|
|
|
- int cpu;
|
|
|
-
|
|
|
- for_each_possible_cpu(cpu) {
|
|
|
- struct memcg_stock_pcp *stock =
|
|
|
- &per_cpu(memcg_stock, cpu);
|
|
|
- INIT_WORK(&stock->work, drain_local_stock);
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
/*
|
|
|
* Cache charges(val) to local per_cpu area.
|
|
|
* This will be consumed by consume_stock() function, later.
|
|
@@ -2291,6 +2278,8 @@ retry:
|
|
|
if (!(gfp_mask & __GFP_WAIT))
|
|
|
goto nomem;
|
|
|
|
|
|
+ mem_cgroup_events(mem_over_limit, MEMCG_MAX, 1);
|
|
|
+
|
|
|
nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
|
|
|
gfp_mask, may_swap);
|
|
|
|
|
@@ -2332,6 +2321,8 @@ retry:
|
|
|
if (fatal_signal_pending(current))
|
|
|
goto bypass;
|
|
|
|
|
|
+ mem_cgroup_events(mem_over_limit, MEMCG_OOM, 1);
|
|
|
+
|
|
|
mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(nr_pages));
|
|
|
nomem:
|
|
|
if (!(gfp_mask & __GFP_NOFAIL))
|
|
@@ -2343,6 +2334,16 @@ done_restock:
|
|
|
css_get_many(&memcg->css, batch);
|
|
|
if (batch > nr_pages)
|
|
|
refill_stock(memcg, batch - nr_pages);
|
|
|
+ /*
|
|
|
+ * If the hierarchy is above the normal consumption range,
|
|
|
+ * make the charging task trim their excess contribution.
|
|
|
+ */
|
|
|
+ do {
|
|
|
+ if (page_counter_read(&memcg->memory) <= memcg->high)
|
|
|
+ continue;
|
|
|
+ mem_cgroup_events(memcg, MEMCG_HIGH, 1);
|
|
|
+ try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
|
|
|
+ } while ((memcg = parent_mem_cgroup(memcg)));
|
|
|
done:
|
|
|
return ret;
|
|
|
}
|
|
@@ -3390,7 +3391,7 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
|
|
|
int ret;
|
|
|
|
|
|
buf = strstrip(buf);
|
|
|
- ret = page_counter_memparse(buf, &nr_pages);
|
|
|
+ ret = page_counter_memparse(buf, "-1", &nr_pages);
|
|
|
if (ret)
|
|
|
return ret;
|
|
|
|
|
@@ -3466,7 +3467,7 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css,
|
|
|
{
|
|
|
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
|
|
|
|
|
- if (val >= (1 << NR_MOVE_TYPE))
|
|
|
+ if (val & ~MOVE_MASK)
|
|
|
return -EINVAL;
|
|
|
|
|
|
/*
|
|
@@ -3544,6 +3545,10 @@ static int memcg_stat_show(struct seq_file *m, void *v)
|
|
|
struct mem_cgroup *mi;
|
|
|
unsigned int i;
|
|
|
|
|
|
+ BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_stat_names) !=
|
|
|
+ MEM_CGROUP_STAT_NSTATS);
|
|
|
+ BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_events_names) !=
|
|
|
+ MEM_CGROUP_EVENTS_NSTATS);
|
|
|
BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
|
|
|
|
|
|
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
|
|
@@ -3758,7 +3763,7 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
|
|
|
unsigned long usage;
|
|
|
int i, size, ret;
|
|
|
|
|
|
- ret = page_counter_memparse(args, &threshold);
|
|
|
+ ret = page_counter_memparse(args, "-1", &threshold);
|
|
|
if (ret)
|
|
|
return ret;
|
|
|
|
|
@@ -4248,7 +4253,7 @@ out_kfree:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-static struct cftype mem_cgroup_files[] = {
|
|
|
+static struct cftype mem_cgroup_legacy_files[] = {
|
|
|
{
|
|
|
.name = "usage_in_bytes",
|
|
|
.private = MEMFILE_PRIVATE(_MEM, RES_USAGE),
|
|
@@ -4359,34 +4364,6 @@ static struct cftype mem_cgroup_files[] = {
|
|
|
{ }, /* terminate */
|
|
|
};
|
|
|
|
|
|
-#ifdef CONFIG_MEMCG_SWAP
|
|
|
-static struct cftype memsw_cgroup_files[] = {
|
|
|
- {
|
|
|
- .name = "memsw.usage_in_bytes",
|
|
|
- .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE),
|
|
|
- .read_u64 = mem_cgroup_read_u64,
|
|
|
- },
|
|
|
- {
|
|
|
- .name = "memsw.max_usage_in_bytes",
|
|
|
- .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE),
|
|
|
- .write = mem_cgroup_reset,
|
|
|
- .read_u64 = mem_cgroup_read_u64,
|
|
|
- },
|
|
|
- {
|
|
|
- .name = "memsw.limit_in_bytes",
|
|
|
- .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT),
|
|
|
- .write = mem_cgroup_write,
|
|
|
- .read_u64 = mem_cgroup_read_u64,
|
|
|
- },
|
|
|
- {
|
|
|
- .name = "memsw.failcnt",
|
|
|
- .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT),
|
|
|
- .write = mem_cgroup_reset,
|
|
|
- .read_u64 = mem_cgroup_read_u64,
|
|
|
- },
|
|
|
- { }, /* terminate */
|
|
|
-};
|
|
|
-#endif
|
|
|
static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
|
|
|
{
|
|
|
struct mem_cgroup_per_node *pn;
|
|
@@ -4482,29 +4459,6 @@ struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
|
|
|
}
|
|
|
EXPORT_SYMBOL(parent_mem_cgroup);
|
|
|
|
|
|
-static void __init mem_cgroup_soft_limit_tree_init(void)
|
|
|
-{
|
|
|
- struct mem_cgroup_tree_per_node *rtpn;
|
|
|
- struct mem_cgroup_tree_per_zone *rtpz;
|
|
|
- int tmp, node, zone;
|
|
|
-
|
|
|
- for_each_node(node) {
|
|
|
- tmp = node;
|
|
|
- if (!node_state(node, N_NORMAL_MEMORY))
|
|
|
- tmp = -1;
|
|
|
- rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp);
|
|
|
- BUG_ON(!rtpn);
|
|
|
-
|
|
|
- soft_limit_tree.rb_tree_per_node[node] = rtpn;
|
|
|
-
|
|
|
- for (zone = 0; zone < MAX_NR_ZONES; zone++) {
|
|
|
- rtpz = &rtpn->rb_tree_per_zone[zone];
|
|
|
- rtpz->rb_root = RB_ROOT;
|
|
|
- spin_lock_init(&rtpz->lock);
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
static struct cgroup_subsys_state * __ref
|
|
|
mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
|
|
{
|
|
@@ -4524,6 +4478,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
|
|
if (parent_css == NULL) {
|
|
|
root_mem_cgroup = memcg;
|
|
|
page_counter_init(&memcg->memory, NULL);
|
|
|
+ memcg->high = PAGE_COUNTER_MAX;
|
|
|
memcg->soft_limit = PAGE_COUNTER_MAX;
|
|
|
page_counter_init(&memcg->memsw, NULL);
|
|
|
page_counter_init(&memcg->kmem, NULL);
|
|
@@ -4569,6 +4524,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
|
|
|
|
|
|
if (parent->use_hierarchy) {
|
|
|
page_counter_init(&memcg->memory, &parent->memory);
|
|
|
+ memcg->high = PAGE_COUNTER_MAX;
|
|
|
memcg->soft_limit = PAGE_COUNTER_MAX;
|
|
|
page_counter_init(&memcg->memsw, &parent->memsw);
|
|
|
page_counter_init(&memcg->kmem, &parent->kmem);
|
|
@@ -4579,6 +4535,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
|
|
|
*/
|
|
|
} else {
|
|
|
page_counter_init(&memcg->memory, NULL);
|
|
|
+ memcg->high = PAGE_COUNTER_MAX;
|
|
|
memcg->soft_limit = PAGE_COUNTER_MAX;
|
|
|
page_counter_init(&memcg->memsw, NULL);
|
|
|
page_counter_init(&memcg->kmem, NULL);
|
|
@@ -4654,6 +4611,8 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
|
|
|
mem_cgroup_resize_limit(memcg, PAGE_COUNTER_MAX);
|
|
|
mem_cgroup_resize_memsw_limit(memcg, PAGE_COUNTER_MAX);
|
|
|
memcg_update_kmem_limit(memcg, PAGE_COUNTER_MAX);
|
|
|
+ memcg->low = 0;
|
|
|
+ memcg->high = PAGE_COUNTER_MAX;
|
|
|
memcg->soft_limit = PAGE_COUNTER_MAX;
|
|
|
}
|
|
|
|
|
@@ -4730,12 +4689,12 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
|
|
|
if (!page || !page_mapped(page))
|
|
|
return NULL;
|
|
|
if (PageAnon(page)) {
|
|
|
- /* we don't move shared anon */
|
|
|
- if (!move_anon())
|
|
|
+ if (!(mc.flags & MOVE_ANON))
|
|
|
return NULL;
|
|
|
- } else if (!move_file())
|
|
|
- /* we ignore mapcount for file pages */
|
|
|
- return NULL;
|
|
|
+ } else {
|
|
|
+ if (!(mc.flags & MOVE_FILE))
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
if (!get_page_unless_zero(page))
|
|
|
return NULL;
|
|
|
|
|
@@ -4749,7 +4708,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
|
|
|
struct page *page = NULL;
|
|
|
swp_entry_t ent = pte_to_swp_entry(ptent);
|
|
|
|
|
|
- if (!move_anon() || non_swap_entry(ent))
|
|
|
+ if (!(mc.flags & MOVE_ANON) || non_swap_entry(ent))
|
|
|
return NULL;
|
|
|
/*
|
|
|
* Because lookup_swap_cache() updates some statistics counter,
|
|
@@ -4778,7 +4737,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
|
|
|
|
|
|
if (!vma->vm_file) /* anonymous vma */
|
|
|
return NULL;
|
|
|
- if (!move_file())
|
|
|
+ if (!(mc.flags & MOVE_FILE))
|
|
|
return NULL;
|
|
|
|
|
|
mapping = vma->vm_file->f_mapping;
|
|
@@ -4857,7 +4816,7 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
|
|
|
|
|
|
page = pmd_page(pmd);
|
|
|
VM_BUG_ON_PAGE(!page || !PageHead(page), page);
|
|
|
- if (!move_anon())
|
|
|
+ if (!(mc.flags & MOVE_ANON))
|
|
|
return ret;
|
|
|
if (page->mem_cgroup == mc.from) {
|
|
|
ret = MC_TARGET_PAGE;
|
|
@@ -4880,7 +4839,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
|
|
|
unsigned long addr, unsigned long end,
|
|
|
struct mm_walk *walk)
|
|
|
{
|
|
|
- struct vm_area_struct *vma = walk->private;
|
|
|
+ struct vm_area_struct *vma = walk->vma;
|
|
|
pte_t *pte;
|
|
|
spinlock_t *ptl;
|
|
|
|
|
@@ -4906,20 +4865,13 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
|
|
|
static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
|
|
|
{
|
|
|
unsigned long precharge;
|
|
|
- struct vm_area_struct *vma;
|
|
|
|
|
|
+ struct mm_walk mem_cgroup_count_precharge_walk = {
|
|
|
+ .pmd_entry = mem_cgroup_count_precharge_pte_range,
|
|
|
+ .mm = mm,
|
|
|
+ };
|
|
|
down_read(&mm->mmap_sem);
|
|
|
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
|
|
- struct mm_walk mem_cgroup_count_precharge_walk = {
|
|
|
- .pmd_entry = mem_cgroup_count_precharge_pte_range,
|
|
|
- .mm = mm,
|
|
|
- .private = vma,
|
|
|
- };
|
|
|
- if (is_vm_hugetlb_page(vma))
|
|
|
- continue;
|
|
|
- walk_page_range(vma->vm_start, vma->vm_end,
|
|
|
- &mem_cgroup_count_precharge_walk);
|
|
|
- }
|
|
|
+ walk_page_range(0, ~0UL, &mem_cgroup_count_precharge_walk);
|
|
|
up_read(&mm->mmap_sem);
|
|
|
|
|
|
precharge = mc.precharge;
|
|
@@ -4999,15 +4951,15 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
|
|
|
struct task_struct *p = cgroup_taskset_first(tset);
|
|
|
int ret = 0;
|
|
|
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
|
|
- unsigned long move_charge_at_immigrate;
|
|
|
+ unsigned long move_flags;
|
|
|
|
|
|
/*
|
|
|
* We are now commited to this value whatever it is. Changes in this
|
|
|
* tunable will only affect upcoming migrations, not the current one.
|
|
|
* So we need to save it, and keep it going.
|
|
|
*/
|
|
|
- move_charge_at_immigrate = memcg->move_charge_at_immigrate;
|
|
|
- if (move_charge_at_immigrate) {
|
|
|
+ move_flags = ACCESS_ONCE(memcg->move_charge_at_immigrate);
|
|
|
+ if (move_flags) {
|
|
|
struct mm_struct *mm;
|
|
|
struct mem_cgroup *from = mem_cgroup_from_task(p);
|
|
|
|
|
@@ -5027,7 +4979,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
|
|
|
spin_lock(&mc.lock);
|
|
|
mc.from = from;
|
|
|
mc.to = memcg;
|
|
|
- mc.immigrate_flags = move_charge_at_immigrate;
|
|
|
+ mc.flags = move_flags;
|
|
|
spin_unlock(&mc.lock);
|
|
|
/* We set mc.moving_task later */
|
|
|
|
|
@@ -5052,7 +5004,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
|
|
|
struct mm_walk *walk)
|
|
|
{
|
|
|
int ret = 0;
|
|
|
- struct vm_area_struct *vma = walk->private;
|
|
|
+ struct vm_area_struct *vma = walk->vma;
|
|
|
pte_t *pte;
|
|
|
spinlock_t *ptl;
|
|
|
enum mc_target_type target_type;
|
|
@@ -5148,7 +5100,10 @@ put: /* get_mctgt_type() gets the page */
|
|
|
|
|
|
static void mem_cgroup_move_charge(struct mm_struct *mm)
|
|
|
{
|
|
|
- struct vm_area_struct *vma;
|
|
|
+ struct mm_walk mem_cgroup_move_charge_walk = {
|
|
|
+ .pmd_entry = mem_cgroup_move_charge_pte_range,
|
|
|
+ .mm = mm,
|
|
|
+ };
|
|
|
|
|
|
lru_add_drain_all();
|
|
|
/*
|
|
@@ -5171,24 +5126,11 @@ retry:
|
|
|
cond_resched();
|
|
|
goto retry;
|
|
|
}
|
|
|
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
|
|
- int ret;
|
|
|
- struct mm_walk mem_cgroup_move_charge_walk = {
|
|
|
- .pmd_entry = mem_cgroup_move_charge_pte_range,
|
|
|
- .mm = mm,
|
|
|
- .private = vma,
|
|
|
- };
|
|
|
- if (is_vm_hugetlb_page(vma))
|
|
|
- continue;
|
|
|
- ret = walk_page_range(vma->vm_start, vma->vm_end,
|
|
|
- &mem_cgroup_move_charge_walk);
|
|
|
- if (ret)
|
|
|
- /*
|
|
|
- * means we have consumed all precharges and failed in
|
|
|
- * doing additional charge. Just abandon here.
|
|
|
- */
|
|
|
- break;
|
|
|
- }
|
|
|
+ /*
|
|
|
+ * When we have consumed all precharges and failed in doing
|
|
|
+ * additional charge, the page walk just aborts.
|
|
|
+ */
|
|
|
+ walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk);
|
|
|
up_read(&mm->mmap_sem);
|
|
|
atomic_dec(&mc.from->moving_account);
|
|
|
}
|
|
@@ -5239,118 +5181,211 @@ static void mem_cgroup_bind(struct cgroup_subsys_state *root_css)
|
|
|
mem_cgroup_from_css(root_css)->use_hierarchy = true;
|
|
|
}
|
|
|
|
|
|
-struct cgroup_subsys memory_cgrp_subsys = {
|
|
|
- .css_alloc = mem_cgroup_css_alloc,
|
|
|
- .css_online = mem_cgroup_css_online,
|
|
|
- .css_offline = mem_cgroup_css_offline,
|
|
|
- .css_free = mem_cgroup_css_free,
|
|
|
- .css_reset = mem_cgroup_css_reset,
|
|
|
- .can_attach = mem_cgroup_can_attach,
|
|
|
- .cancel_attach = mem_cgroup_cancel_attach,
|
|
|
- .attach = mem_cgroup_move_task,
|
|
|
- .bind = mem_cgroup_bind,
|
|
|
- .legacy_cftypes = mem_cgroup_files,
|
|
|
- .early_init = 0,
|
|
|
-};
|
|
|
+static u64 memory_current_read(struct cgroup_subsys_state *css,
|
|
|
+ struct cftype *cft)
|
|
|
+{
|
|
|
+ return mem_cgroup_usage(mem_cgroup_from_css(css), false);
|
|
|
+}
|
|
|
|
|
|
-#ifdef CONFIG_MEMCG_SWAP
|
|
|
-static int __init enable_swap_account(char *s)
|
|
|
+static int memory_low_show(struct seq_file *m, void *v)
|
|
|
{
|
|
|
- if (!strcmp(s, "1"))
|
|
|
- really_do_swap_account = 1;
|
|
|
- else if (!strcmp(s, "0"))
|
|
|
- really_do_swap_account = 0;
|
|
|
- return 1;
|
|
|
+ struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
|
|
|
+ unsigned long low = ACCESS_ONCE(memcg->low);
|
|
|
+
|
|
|
+ if (low == PAGE_COUNTER_MAX)
|
|
|
+ seq_puts(m, "infinity\n");
|
|
|
+ else
|
|
|
+ seq_printf(m, "%llu\n", (u64)low * PAGE_SIZE);
|
|
|
+
|
|
|
+ return 0;
|
|
|
}
|
|
|
-__setup("swapaccount=", enable_swap_account);
|
|
|
|
|
|
-static void __init memsw_file_init(void)
|
|
|
+static ssize_t memory_low_write(struct kernfs_open_file *of,
|
|
|
+ char *buf, size_t nbytes, loff_t off)
|
|
|
{
|
|
|
- WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys,
|
|
|
- memsw_cgroup_files));
|
|
|
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
|
|
|
+ unsigned long low;
|
|
|
+ int err;
|
|
|
+
|
|
|
+ buf = strstrip(buf);
|
|
|
+ err = page_counter_memparse(buf, "infinity", &low);
|
|
|
+ if (err)
|
|
|
+ return err;
|
|
|
+
|
|
|
+ memcg->low = low;
|
|
|
+
|
|
|
+ return nbytes;
|
|
|
}
|
|
|
|
|
|
-static void __init enable_swap_cgroup(void)
|
|
|
+static int memory_high_show(struct seq_file *m, void *v)
|
|
|
{
|
|
|
- if (!mem_cgroup_disabled() && really_do_swap_account) {
|
|
|
- do_swap_account = 1;
|
|
|
- memsw_file_init();
|
|
|
- }
|
|
|
+ struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
|
|
|
+ unsigned long high = ACCESS_ONCE(memcg->high);
|
|
|
+
|
|
|
+ if (high == PAGE_COUNTER_MAX)
|
|
|
+ seq_puts(m, "infinity\n");
|
|
|
+ else
|
|
|
+ seq_printf(m, "%llu\n", (u64)high * PAGE_SIZE);
|
|
|
+
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
-#else
|
|
|
-static void __init enable_swap_cgroup(void)
|
|
|
+static ssize_t memory_high_write(struct kernfs_open_file *of,
|
|
|
+ char *buf, size_t nbytes, loff_t off)
|
|
|
{
|
|
|
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
|
|
|
+ unsigned long high;
|
|
|
+ int err;
|
|
|
+
|
|
|
+ buf = strstrip(buf);
|
|
|
+ err = page_counter_memparse(buf, "infinity", &high);
|
|
|
+ if (err)
|
|
|
+ return err;
|
|
|
+
|
|
|
+ memcg->high = high;
|
|
|
+
|
|
|
+ return nbytes;
|
|
|
}
|
|
|
-#endif
|
|
|
|
|
|
-#ifdef CONFIG_MEMCG_SWAP
|
|
|
-/**
|
|
|
- * mem_cgroup_swapout - transfer a memsw charge to swap
|
|
|
- * @page: page whose memsw charge to transfer
|
|
|
- * @entry: swap entry to move the charge to
|
|
|
- *
|
|
|
- * Transfer the memsw charge of @page to @entry.
|
|
|
- */
|
|
|
-void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
|
|
|
+static int memory_max_show(struct seq_file *m, void *v)
|
|
|
{
|
|
|
- struct mem_cgroup *memcg;
|
|
|
- unsigned short oldid;
|
|
|
+ struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
|
|
|
+ unsigned long max = ACCESS_ONCE(memcg->memory.limit);
|
|
|
|
|
|
- VM_BUG_ON_PAGE(PageLRU(page), page);
|
|
|
- VM_BUG_ON_PAGE(page_count(page), page);
|
|
|
+ if (max == PAGE_COUNTER_MAX)
|
|
|
+ seq_puts(m, "infinity\n");
|
|
|
+ else
|
|
|
+ seq_printf(m, "%llu\n", (u64)max * PAGE_SIZE);
|
|
|
|
|
|
- if (!do_swap_account)
|
|
|
- return;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
|
|
|
- memcg = page->mem_cgroup;
|
|
|
+static ssize_t memory_max_write(struct kernfs_open_file *of,
|
|
|
+ char *buf, size_t nbytes, loff_t off)
|
|
|
+{
|
|
|
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
|
|
|
+ unsigned long max;
|
|
|
+ int err;
|
|
|
|
|
|
- /* Readahead page, never charged */
|
|
|
- if (!memcg)
|
|
|
- return;
|
|
|
+ buf = strstrip(buf);
|
|
|
+ err = page_counter_memparse(buf, "infinity", &max);
|
|
|
+ if (err)
|
|
|
+ return err;
|
|
|
|
|
|
- oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
|
|
|
- VM_BUG_ON_PAGE(oldid, page);
|
|
|
- mem_cgroup_swap_statistics(memcg, true);
|
|
|
+ err = mem_cgroup_resize_limit(memcg, max);
|
|
|
+ if (err)
|
|
|
+ return err;
|
|
|
|
|
|
- page->mem_cgroup = NULL;
|
|
|
+ return nbytes;
|
|
|
+}
|
|
|
|
|
|
- if (!mem_cgroup_is_root(memcg))
|
|
|
- page_counter_uncharge(&memcg->memory, 1);
|
|
|
+static int memory_events_show(struct seq_file *m, void *v)
|
|
|
+{
|
|
|
+ struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
|
|
|
|
|
|
- /* XXX: caller holds IRQ-safe mapping->tree_lock */
|
|
|
- VM_BUG_ON(!irqs_disabled());
|
|
|
+ seq_printf(m, "low %lu\n", mem_cgroup_read_events(memcg, MEMCG_LOW));
|
|
|
+ seq_printf(m, "high %lu\n", mem_cgroup_read_events(memcg, MEMCG_HIGH));
|
|
|
+ seq_printf(m, "max %lu\n", mem_cgroup_read_events(memcg, MEMCG_MAX));
|
|
|
+ seq_printf(m, "oom %lu\n", mem_cgroup_read_events(memcg, MEMCG_OOM));
|
|
|
|
|
|
- mem_cgroup_charge_statistics(memcg, page, -1);
|
|
|
- memcg_check_events(memcg, page);
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static struct cftype memory_files[] = {
|
|
|
+ {
|
|
|
+ .name = "current",
|
|
|
+ .read_u64 = memory_current_read,
|
|
|
+ },
|
|
|
+ {
|
|
|
+ .name = "low",
|
|
|
+ .flags = CFTYPE_NOT_ON_ROOT,
|
|
|
+ .seq_show = memory_low_show,
|
|
|
+ .write = memory_low_write,
|
|
|
+ },
|
|
|
+ {
|
|
|
+ .name = "high",
|
|
|
+ .flags = CFTYPE_NOT_ON_ROOT,
|
|
|
+ .seq_show = memory_high_show,
|
|
|
+ .write = memory_high_write,
|
|
|
+ },
|
|
|
+ {
|
|
|
+ .name = "max",
|
|
|
+ .flags = CFTYPE_NOT_ON_ROOT,
|
|
|
+ .seq_show = memory_max_show,
|
|
|
+ .write = memory_max_write,
|
|
|
+ },
|
|
|
+ {
|
|
|
+ .name = "events",
|
|
|
+ .flags = CFTYPE_NOT_ON_ROOT,
|
|
|
+ .seq_show = memory_events_show,
|
|
|
+ },
|
|
|
+ { } /* terminate */
|
|
|
+};
|
|
|
+
|
|
|
+struct cgroup_subsys memory_cgrp_subsys = {
|
|
|
+ .css_alloc = mem_cgroup_css_alloc,
|
|
|
+ .css_online = mem_cgroup_css_online,
|
|
|
+ .css_offline = mem_cgroup_css_offline,
|
|
|
+ .css_free = mem_cgroup_css_free,
|
|
|
+ .css_reset = mem_cgroup_css_reset,
|
|
|
+ .can_attach = mem_cgroup_can_attach,
|
|
|
+ .cancel_attach = mem_cgroup_cancel_attach,
|
|
|
+ .attach = mem_cgroup_move_task,
|
|
|
+ .bind = mem_cgroup_bind,
|
|
|
+ .dfl_cftypes = memory_files,
|
|
|
+ .legacy_cftypes = mem_cgroup_legacy_files,
|
|
|
+ .early_init = 0,
|
|
|
+};
|
|
|
+
|
|
|
+/**
|
|
|
+ * mem_cgroup_events - count memory events against a cgroup
|
|
|
+ * @memcg: the memory cgroup
|
|
|
+ * @idx: the event index
|
|
|
+ * @nr: the number of events to account for
|
|
|
+ */
|
|
|
+void mem_cgroup_events(struct mem_cgroup *memcg,
|
|
|
+ enum mem_cgroup_events_index idx,
|
|
|
+ unsigned int nr)
|
|
|
+{
|
|
|
+ this_cpu_add(memcg->stat->events[idx], nr);
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * mem_cgroup_uncharge_swap - uncharge a swap entry
|
|
|
- * @entry: swap entry to uncharge
|
|
|
+ * mem_cgroup_low - check if memory consumption is below the normal range
|
|
|
+ * @root: the highest ancestor to consider
|
|
|
+ * @memcg: the memory cgroup to check
|
|
|
*
|
|
|
- * Drop the memsw charge associated with @entry.
|
|
|
+ * Returns %true if memory consumption of @memcg, and that of all
|
|
|
+ * configurable ancestors up to @root, is below the normal range.
|
|
|
*/
|
|
|
-void mem_cgroup_uncharge_swap(swp_entry_t entry)
|
|
|
+bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg)
|
|
|
{
|
|
|
- struct mem_cgroup *memcg;
|
|
|
- unsigned short id;
|
|
|
+ if (mem_cgroup_disabled())
|
|
|
+ return false;
|
|
|
|
|
|
- if (!do_swap_account)
|
|
|
- return;
|
|
|
+ /*
|
|
|
+ * The toplevel group doesn't have a configurable range, so
|
|
|
+ * it's never low when looked at directly, and it is not
|
|
|
+ * considered an ancestor when assessing the hierarchy.
|
|
|
+ */
|
|
|
|
|
|
- id = swap_cgroup_record(entry, 0);
|
|
|
- rcu_read_lock();
|
|
|
- memcg = mem_cgroup_lookup(id);
|
|
|
- if (memcg) {
|
|
|
- if (!mem_cgroup_is_root(memcg))
|
|
|
- page_counter_uncharge(&memcg->memsw, 1);
|
|
|
- mem_cgroup_swap_statistics(memcg, false);
|
|
|
- css_put(&memcg->css);
|
|
|
+ if (memcg == root_mem_cgroup)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ if (page_counter_read(&memcg->memory) > memcg->low)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ while (memcg != root) {
|
|
|
+ memcg = parent_mem_cgroup(memcg);
|
|
|
+
|
|
|
+ if (memcg == root_mem_cgroup)
|
|
|
+ break;
|
|
|
+
|
|
|
+ if (page_counter_read(&memcg->memory) > memcg->low)
|
|
|
+ return false;
|
|
|
}
|
|
|
- rcu_read_unlock();
|
|
|
+ return true;
|
|
|
}
|
|
|
-#endif
|
|
|
|
|
|
/**
|
|
|
* mem_cgroup_try_charge - try charging a page
|
|
@@ -5684,10 +5719,155 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
|
|
|
*/
|
|
|
static int __init mem_cgroup_init(void)
|
|
|
{
|
|
|
+ int cpu, node;
|
|
|
+
|
|
|
hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
|
|
|
- enable_swap_cgroup();
|
|
|
- mem_cgroup_soft_limit_tree_init();
|
|
|
- memcg_stock_init();
|
|
|
+
|
|
|
+ for_each_possible_cpu(cpu)
|
|
|
+ INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work,
|
|
|
+ drain_local_stock);
|
|
|
+
|
|
|
+ for_each_node(node) {
|
|
|
+ struct mem_cgroup_tree_per_node *rtpn;
|
|
|
+ int zone;
|
|
|
+
|
|
|
+ rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL,
|
|
|
+ node_online(node) ? node : NUMA_NO_NODE);
|
|
|
+
|
|
|
+ for (zone = 0; zone < MAX_NR_ZONES; zone++) {
|
|
|
+ struct mem_cgroup_tree_per_zone *rtpz;
|
|
|
+
|
|
|
+ rtpz = &rtpn->rb_tree_per_zone[zone];
|
|
|
+ rtpz->rb_root = RB_ROOT;
|
|
|
+ spin_lock_init(&rtpz->lock);
|
|
|
+ }
|
|
|
+ soft_limit_tree.rb_tree_per_node[node] = rtpn;
|
|
|
+ }
|
|
|
+
|
|
|
return 0;
|
|
|
}
|
|
|
subsys_initcall(mem_cgroup_init);
|
|
|
+
|
|
|
+#ifdef CONFIG_MEMCG_SWAP
|
|
|
+/**
|
|
|
+ * mem_cgroup_swapout - transfer a memsw charge to swap
|
|
|
+ * @page: page whose memsw charge to transfer
|
|
|
+ * @entry: swap entry to move the charge to
|
|
|
+ *
|
|
|
+ * Transfer the memsw charge of @page to @entry.
|
|
|
+ */
|
|
|
+void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
|
|
|
+{
|
|
|
+ struct mem_cgroup *memcg;
|
|
|
+ unsigned short oldid;
|
|
|
+
|
|
|
+ VM_BUG_ON_PAGE(PageLRU(page), page);
|
|
|
+ VM_BUG_ON_PAGE(page_count(page), page);
|
|
|
+
|
|
|
+ if (!do_swap_account)
|
|
|
+ return;
|
|
|
+
|
|
|
+ memcg = page->mem_cgroup;
|
|
|
+
|
|
|
+ /* Readahead page, never charged */
|
|
|
+ if (!memcg)
|
|
|
+ return;
|
|
|
+
|
|
|
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
|
|
|
+ VM_BUG_ON_PAGE(oldid, page);
|
|
|
+ mem_cgroup_swap_statistics(memcg, true);
|
|
|
+
|
|
|
+ page->mem_cgroup = NULL;
|
|
|
+
|
|
|
+ if (!mem_cgroup_is_root(memcg))
|
|
|
+ page_counter_uncharge(&memcg->memory, 1);
|
|
|
+
|
|
|
+ /* XXX: caller holds IRQ-safe mapping->tree_lock */
|
|
|
+ VM_BUG_ON(!irqs_disabled());
|
|
|
+
|
|
|
+ mem_cgroup_charge_statistics(memcg, page, -1);
|
|
|
+ memcg_check_events(memcg, page);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * mem_cgroup_uncharge_swap - uncharge a swap entry
|
|
|
+ * @entry: swap entry to uncharge
|
|
|
+ *
|
|
|
+ * Drop the memsw charge associated with @entry.
|
|
|
+ */
|
|
|
+void mem_cgroup_uncharge_swap(swp_entry_t entry)
|
|
|
+{
|
|
|
+ struct mem_cgroup *memcg;
|
|
|
+ unsigned short id;
|
|
|
+
|
|
|
+ if (!do_swap_account)
|
|
|
+ return;
|
|
|
+
|
|
|
+ id = swap_cgroup_record(entry, 0);
|
|
|
+ rcu_read_lock();
|
|
|
+ memcg = mem_cgroup_lookup(id);
|
|
|
+ if (memcg) {
|
|
|
+ if (!mem_cgroup_is_root(memcg))
|
|
|
+ page_counter_uncharge(&memcg->memsw, 1);
|
|
|
+ mem_cgroup_swap_statistics(memcg, false);
|
|
|
+ css_put(&memcg->css);
|
|
|
+ }
|
|
|
+ rcu_read_unlock();
|
|
|
+}
|
|
|
+
|
|
|
+/* for remember boot option*/
|
|
|
+#ifdef CONFIG_MEMCG_SWAP_ENABLED
|
|
|
+static int really_do_swap_account __initdata = 1;
|
|
|
+#else
|
|
|
+static int really_do_swap_account __initdata;
|
|
|
+#endif
|
|
|
+
|
|
|
+static int __init enable_swap_account(char *s)
|
|
|
+{
|
|
|
+ if (!strcmp(s, "1"))
|
|
|
+ really_do_swap_account = 1;
|
|
|
+ else if (!strcmp(s, "0"))
|
|
|
+ really_do_swap_account = 0;
|
|
|
+ return 1;
|
|
|
+}
|
|
|
+__setup("swapaccount=", enable_swap_account);
|
|
|
+
|
|
|
+static struct cftype memsw_cgroup_files[] = {
|
|
|
+ {
|
|
|
+ .name = "memsw.usage_in_bytes",
|
|
|
+ .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE),
|
|
|
+ .read_u64 = mem_cgroup_read_u64,
|
|
|
+ },
|
|
|
+ {
|
|
|
+ .name = "memsw.max_usage_in_bytes",
|
|
|
+ .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE),
|
|
|
+ .write = mem_cgroup_reset,
|
|
|
+ .read_u64 = mem_cgroup_read_u64,
|
|
|
+ },
|
|
|
+ {
|
|
|
+ .name = "memsw.limit_in_bytes",
|
|
|
+ .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT),
|
|
|
+ .write = mem_cgroup_write,
|
|
|
+ .read_u64 = mem_cgroup_read_u64,
|
|
|
+ },
|
|
|
+ {
|
|
|
+ .name = "memsw.failcnt",
|
|
|
+ .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT),
|
|
|
+ .write = mem_cgroup_reset,
|
|
|
+ .read_u64 = mem_cgroup_read_u64,
|
|
|
+ },
|
|
|
+ { }, /* terminate */
|
|
|
+};
|
|
|
+
|
|
|
+static int __init mem_cgroup_swap_init(void)
|
|
|
+{
|
|
|
+ if (!mem_cgroup_disabled() && really_do_swap_account) {
|
|
|
+ do_swap_account = 1;
|
|
|
+ WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys,
|
|
|
+ memsw_cgroup_files));
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+subsys_initcall(mem_cgroup_swap_init);
|
|
|
+
|
|
|
+#endif /* CONFIG_MEMCG_SWAP */
|