|
@@ -754,9 +754,11 @@ static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
|
|
|
static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
|
|
|
struct mem_cgroup_tree_per_zone *mctz)
|
|
|
{
|
|
|
- spin_lock(&mctz->lock);
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ spin_lock_irqsave(&mctz->lock, flags);
|
|
|
__mem_cgroup_remove_exceeded(mz, mctz);
|
|
|
- spin_unlock(&mctz->lock);
|
|
|
+ spin_unlock_irqrestore(&mctz->lock, flags);
|
|
|
}
|
|
|
|
|
|
|
|
@@ -779,7 +781,9 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
|
|
|
* mem is over its softlimit.
|
|
|
*/
|
|
|
if (excess || mz->on_tree) {
|
|
|
- spin_lock(&mctz->lock);
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ spin_lock_irqsave(&mctz->lock, flags);
|
|
|
/* if on-tree, remove it */
|
|
|
if (mz->on_tree)
|
|
|
__mem_cgroup_remove_exceeded(mz, mctz);
|
|
@@ -788,7 +792,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
|
|
|
* If excess is 0, no tree ops.
|
|
|
*/
|
|
|
__mem_cgroup_insert_exceeded(mz, mctz, excess);
|
|
|
- spin_unlock(&mctz->lock);
|
|
|
+ spin_unlock_irqrestore(&mctz->lock, flags);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -839,9 +843,9 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
|
|
|
{
|
|
|
struct mem_cgroup_per_zone *mz;
|
|
|
|
|
|
- spin_lock(&mctz->lock);
|
|
|
+ spin_lock_irq(&mctz->lock);
|
|
|
mz = __mem_cgroup_largest_soft_limit_node(mctz);
|
|
|
- spin_unlock(&mctz->lock);
|
|
|
+ spin_unlock_irq(&mctz->lock);
|
|
|
return mz;
|
|
|
}
|
|
|
|
|
@@ -882,13 +886,6 @@ static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
|
|
|
return val;
|
|
|
}
|
|
|
|
|
|
-static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
|
|
|
- bool charge)
|
|
|
-{
|
|
|
- int val = (charge) ? 1 : -1;
|
|
|
- this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
|
|
|
-}
|
|
|
-
|
|
|
static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
|
|
|
enum mem_cgroup_events_index idx)
|
|
|
{
|
|
@@ -909,13 +906,13 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
|
|
|
|
|
|
static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
|
|
|
struct page *page,
|
|
|
- bool anon, int nr_pages)
|
|
|
+ int nr_pages)
|
|
|
{
|
|
|
/*
|
|
|
* Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is
|
|
|
* counted as CACHE even if it's on ANON LRU.
|
|
|
*/
|
|
|
- if (anon)
|
|
|
+ if (PageAnon(page))
|
|
|
__this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS],
|
|
|
nr_pages);
|
|
|
else
|
|
@@ -1013,7 +1010,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
|
|
|
*/
|
|
|
static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
|
|
|
{
|
|
|
- preempt_disable();
|
|
|
/* threshold event is triggered in finer grain than soft limit */
|
|
|
if (unlikely(mem_cgroup_event_ratelimit(memcg,
|
|
|
MEM_CGROUP_TARGET_THRESH))) {
|
|
@@ -1026,8 +1022,6 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
|
|
|
do_numainfo = mem_cgroup_event_ratelimit(memcg,
|
|
|
MEM_CGROUP_TARGET_NUMAINFO);
|
|
|
#endif
|
|
|
- preempt_enable();
|
|
|
-
|
|
|
mem_cgroup_threshold(memcg);
|
|
|
if (unlikely(do_softlimit))
|
|
|
mem_cgroup_update_tree(memcg, page);
|
|
@@ -1035,8 +1029,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
|
|
|
if (unlikely(do_numainfo))
|
|
|
atomic_inc(&memcg->numainfo_events);
|
|
|
#endif
|
|
|
- } else
|
|
|
- preempt_enable();
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
|
|
@@ -1347,20 +1340,6 @@ out:
|
|
|
return lruvec;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * Following LRU functions are allowed to be used without PCG_LOCK.
|
|
|
- * Operations are called by routine of global LRU independently from memcg.
|
|
|
- * What we have to take care of here is validness of pc->mem_cgroup.
|
|
|
- *
|
|
|
- * Changes to pc->mem_cgroup happens when
|
|
|
- * 1. charge
|
|
|
- * 2. moving account
|
|
|
- * In typical case, "charge" is done before add-to-lru. Exception is SwapCache.
|
|
|
- * It is added to LRU before charge.
|
|
|
- * If PCG_USED bit is not set, page_cgroup is not added to this private LRU.
|
|
|
- * When moving account, the page is not on LRU. It's isolated.
|
|
|
- */
|
|
|
-
|
|
|
/**
|
|
|
* mem_cgroup_page_lruvec - return lruvec for adding an lru page
|
|
|
* @page: the page
|
|
@@ -2261,22 +2240,14 @@ cleanup:
|
|
|
*
|
|
|
* Notes: Race condition
|
|
|
*
|
|
|
- * We usually use lock_page_cgroup() for accessing page_cgroup member but
|
|
|
- * it tends to be costly. But considering some conditions, we doesn't need
|
|
|
- * to do so _always_.
|
|
|
- *
|
|
|
- * Considering "charge", lock_page_cgroup() is not required because all
|
|
|
- * file-stat operations happen after a page is attached to radix-tree. There
|
|
|
- * are no race with "charge".
|
|
|
+ * Charging occurs during page instantiation, while the page is
|
|
|
+ * unmapped and locked in page migration, or while the page table is
|
|
|
+ * locked in THP migration. No race is possible.
|
|
|
*
|
|
|
- * Considering "uncharge", we know that memcg doesn't clear pc->mem_cgroup
|
|
|
- * at "uncharge" intentionally. So, we always see valid pc->mem_cgroup even
|
|
|
- * if there are race with "uncharge". Statistics itself is properly handled
|
|
|
- * by flags.
|
|
|
+ * Uncharge happens to pages with zero references, no race possible.
|
|
|
*
|
|
|
- * Considering "move", this is an only case we see a race. To make the race
|
|
|
- * small, we check memcg->moving_account and detect there are possibility
|
|
|
- * of race or not. If there is, we take a lock.
|
|
|
+ * Charge moving between groups is protected by checking mm->moving
|
|
|
+ * account and taking the move_lock in the slowpath.
|
|
|
*/
|
|
|
|
|
|
void __mem_cgroup_begin_update_page_stat(struct page *page,
|
|
@@ -2689,6 +2660,16 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
|
|
|
return mem_cgroup_from_id(id);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * try_get_mem_cgroup_from_page - look up page's memcg association
|
|
|
+ * @page: the page
|
|
|
+ *
|
|
|
+ * Look up, get a css reference, and return the memcg that owns @page.
|
|
|
+ *
|
|
|
+ * The page must be locked to prevent racing with swap-in and page
|
|
|
+ * cache charges. If coming from an unlocked page table, the caller
|
|
|
+ * must ensure the page is on the LRU or this can race with charging.
|
|
|
+ */
|
|
|
struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
|
|
|
{
|
|
|
struct mem_cgroup *memcg = NULL;
|
|
@@ -2699,7 +2680,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
|
|
|
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
|
|
|
|
|
pc = lookup_page_cgroup(page);
|
|
|
- lock_page_cgroup(pc);
|
|
|
if (PageCgroupUsed(pc)) {
|
|
|
memcg = pc->mem_cgroup;
|
|
|
if (memcg && !css_tryget_online(&memcg->css))
|
|
@@ -2713,19 +2693,46 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
|
|
|
memcg = NULL;
|
|
|
rcu_read_unlock();
|
|
|
}
|
|
|
- unlock_page_cgroup(pc);
|
|
|
return memcg;
|
|
|
}
|
|
|
|
|
|
+static void lock_page_lru(struct page *page, int *isolated)
|
|
|
+{
|
|
|
+ struct zone *zone = page_zone(page);
|
|
|
+
|
|
|
+ spin_lock_irq(&zone->lru_lock);
|
|
|
+ if (PageLRU(page)) {
|
|
|
+ struct lruvec *lruvec;
|
|
|
+
|
|
|
+ lruvec = mem_cgroup_page_lruvec(page, zone);
|
|
|
+ ClearPageLRU(page);
|
|
|
+ del_page_from_lru_list(page, lruvec, page_lru(page));
|
|
|
+ *isolated = 1;
|
|
|
+ } else
|
|
|
+ *isolated = 0;
|
|
|
+}
|
|
|
+
|
|
|
+static void unlock_page_lru(struct page *page, int isolated)
|
|
|
+{
|
|
|
+ struct zone *zone = page_zone(page);
|
|
|
+
|
|
|
+ if (isolated) {
|
|
|
+ struct lruvec *lruvec;
|
|
|
+
|
|
|
+ lruvec = mem_cgroup_page_lruvec(page, zone);
|
|
|
+ VM_BUG_ON_PAGE(PageLRU(page), page);
|
|
|
+ SetPageLRU(page);
|
|
|
+ add_page_to_lru_list(page, lruvec, page_lru(page));
|
|
|
+ }
|
|
|
+ spin_unlock_irq(&zone->lru_lock);
|
|
|
+}
|
|
|
+
|
|
|
static void commit_charge(struct page *page, struct mem_cgroup *memcg,
|
|
|
- unsigned int nr_pages, bool anon, bool lrucare)
|
|
|
+ unsigned int nr_pages, bool lrucare)
|
|
|
{
|
|
|
struct page_cgroup *pc = lookup_page_cgroup(page);
|
|
|
- struct zone *uninitialized_var(zone);
|
|
|
- struct lruvec *lruvec;
|
|
|
- bool was_on_lru = false;
|
|
|
+ int isolated;
|
|
|
|
|
|
- lock_page_cgroup(pc);
|
|
|
VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
|
|
|
/*
|
|
|
* we don't need page_cgroup_lock about tail pages, becase they are not
|
|
@@ -2736,39 +2743,38 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
|
|
|
* In some cases, SwapCache and FUSE(splice_buf->radixtree), the page
|
|
|
* may already be on some other mem_cgroup's LRU. Take care of it.
|
|
|
*/
|
|
|
- if (lrucare) {
|
|
|
- zone = page_zone(page);
|
|
|
- spin_lock_irq(&zone->lru_lock);
|
|
|
- if (PageLRU(page)) {
|
|
|
- lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
|
|
|
- ClearPageLRU(page);
|
|
|
- del_page_from_lru_list(page, lruvec, page_lru(page));
|
|
|
- was_on_lru = true;
|
|
|
- }
|
|
|
- }
|
|
|
+ if (lrucare)
|
|
|
+ lock_page_lru(page, &isolated);
|
|
|
|
|
|
+ /*
|
|
|
+ * Nobody should be changing or seriously looking at
|
|
|
+ * pc->mem_cgroup and pc->flags at this point:
|
|
|
+ *
|
|
|
+ * - the page is uncharged
|
|
|
+ *
|
|
|
+ * - the page is off-LRU
|
|
|
+ *
|
|
|
+ * - an anonymous fault has exclusive page access, except for
|
|
|
+ * a locked page table
|
|
|
+ *
|
|
|
+ * - a page cache insertion, a swapin fault, or a migration
|
|
|
+ * have the page locked
|
|
|
+ */
|
|
|
pc->mem_cgroup = memcg;
|
|
|
- SetPageCgroupUsed(pc);
|
|
|
-
|
|
|
- if (lrucare) {
|
|
|
- if (was_on_lru) {
|
|
|
- lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
|
|
|
- VM_BUG_ON_PAGE(PageLRU(page), page);
|
|
|
- SetPageLRU(page);
|
|
|
- add_page_to_lru_list(page, lruvec, page_lru(page));
|
|
|
- }
|
|
|
- spin_unlock_irq(&zone->lru_lock);
|
|
|
- }
|
|
|
+ pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
|
|
|
|
|
|
- mem_cgroup_charge_statistics(memcg, page, anon, nr_pages);
|
|
|
- unlock_page_cgroup(pc);
|
|
|
+ if (lrucare)
|
|
|
+ unlock_page_lru(page, isolated);
|
|
|
|
|
|
+ local_irq_disable();
|
|
|
+ mem_cgroup_charge_statistics(memcg, page, nr_pages);
|
|
|
/*
|
|
|
* "charge_statistics" updated event counter. Then, check it.
|
|
|
* Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
|
|
|
* if they exceeds softlimit.
|
|
|
*/
|
|
|
memcg_check_events(memcg, page);
|
|
|
+ local_irq_enable();
|
|
|
}
|
|
|
|
|
|
static DEFINE_MUTEX(set_limit_mutex);
|
|
@@ -3395,7 +3401,6 @@ static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
|
|
|
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
|
|
|
|
-#define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)
|
|
|
/*
|
|
|
* Because tail pages are not marked as "used", set it. We're under
|
|
|
* zone->lru_lock, 'splitting on pmd' and compound_lock.
|
|
@@ -3416,7 +3421,7 @@ void mem_cgroup_split_huge_fixup(struct page *head)
|
|
|
for (i = 1; i < HPAGE_PMD_NR; i++) {
|
|
|
pc = head_pc + i;
|
|
|
pc->mem_cgroup = memcg;
|
|
|
- pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
|
|
|
+ pc->flags = head_pc->flags;
|
|
|
}
|
|
|
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
|
|
|
HPAGE_PMD_NR);
|
|
@@ -3446,7 +3451,6 @@ static int mem_cgroup_move_account(struct page *page,
|
|
|
{
|
|
|
unsigned long flags;
|
|
|
int ret;
|
|
|
- bool anon = PageAnon(page);
|
|
|
|
|
|
VM_BUG_ON(from == to);
|
|
|
VM_BUG_ON_PAGE(PageLRU(page), page);
|
|
@@ -3460,15 +3464,21 @@ static int mem_cgroup_move_account(struct page *page,
|
|
|
if (nr_pages > 1 && !PageTransHuge(page))
|
|
|
goto out;
|
|
|
|
|
|
- lock_page_cgroup(pc);
|
|
|
+ /*
|
|
|
+ * Prevent mem_cgroup_migrate() from looking at pc->mem_cgroup
|
|
|
+ * of its source page while we change it: page migration takes
|
|
|
+ * both pages off the LRU, but page cache replacement doesn't.
|
|
|
+ */
|
|
|
+ if (!trylock_page(page))
|
|
|
+ goto out;
|
|
|
|
|
|
ret = -EINVAL;
|
|
|
if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
|
|
|
- goto unlock;
|
|
|
+ goto out_unlock;
|
|
|
|
|
|
move_lock_mem_cgroup(from, &flags);
|
|
|
|
|
|
- if (!anon && page_mapped(page)) {
|
|
|
+ if (!PageAnon(page) && page_mapped(page)) {
|
|
|
__this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
|
|
|
nr_pages);
|
|
|
__this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
|
|
@@ -3482,20 +3492,25 @@ static int mem_cgroup_move_account(struct page *page,
|
|
|
nr_pages);
|
|
|
}
|
|
|
|
|
|
- mem_cgroup_charge_statistics(from, page, anon, -nr_pages);
|
|
|
+ /*
|
|
|
+ * It is safe to change pc->mem_cgroup here because the page
|
|
|
+ * is referenced, charged, and isolated - we can't race with
|
|
|
+ * uncharging, charging, migration, or LRU putback.
|
|
|
+ */
|
|
|
|
|
|
/* caller should have done css_get */
|
|
|
pc->mem_cgroup = to;
|
|
|
- mem_cgroup_charge_statistics(to, page, anon, nr_pages);
|
|
|
move_unlock_mem_cgroup(from, &flags);
|
|
|
ret = 0;
|
|
|
-unlock:
|
|
|
- unlock_page_cgroup(pc);
|
|
|
- /*
|
|
|
- * check events
|
|
|
- */
|
|
|
+
|
|
|
+ local_irq_disable();
|
|
|
+ mem_cgroup_charge_statistics(to, page, nr_pages);
|
|
|
memcg_check_events(to, page);
|
|
|
+ mem_cgroup_charge_statistics(from, page, -nr_pages);
|
|
|
memcg_check_events(from, page);
|
|
|
+ local_irq_enable();
|
|
|
+out_unlock:
|
|
|
+ unlock_page(page);
|
|
|
out:
|
|
|
return ret;
|
|
|
}
|
|
@@ -3566,193 +3581,6 @@ out:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
|
|
|
- unsigned int nr_pages,
|
|
|
- const enum charge_type ctype)
|
|
|
-{
|
|
|
- struct memcg_batch_info *batch = NULL;
|
|
|
- bool uncharge_memsw = true;
|
|
|
-
|
|
|
- /* If swapout, usage of swap doesn't decrease */
|
|
|
- if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
|
|
|
- uncharge_memsw = false;
|
|
|
-
|
|
|
- batch = ¤t->memcg_batch;
|
|
|
- /*
|
|
|
- * In usual, we do css_get() when we remember memcg pointer.
|
|
|
- * But in this case, we keep res->usage until end of a series of
|
|
|
- * uncharges. Then, it's ok to ignore memcg's refcnt.
|
|
|
- */
|
|
|
- if (!batch->memcg)
|
|
|
- batch->memcg = memcg;
|
|
|
- /*
|
|
|
- * do_batch > 0 when unmapping pages or inode invalidate/truncate.
|
|
|
- * In those cases, all pages freed continuously can be expected to be in
|
|
|
- * the same cgroup and we have chance to coalesce uncharges.
|
|
|
- * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE)
|
|
|
- * because we want to do uncharge as soon as possible.
|
|
|
- */
|
|
|
-
|
|
|
- if (!batch->do_batch || test_thread_flag(TIF_MEMDIE))
|
|
|
- goto direct_uncharge;
|
|
|
-
|
|
|
- if (nr_pages > 1)
|
|
|
- goto direct_uncharge;
|
|
|
-
|
|
|
- /*
|
|
|
- * In typical case, batch->memcg == mem. This means we can
|
|
|
- * merge a series of uncharges to an uncharge of res_counter.
|
|
|
- * If not, we uncharge res_counter ony by one.
|
|
|
- */
|
|
|
- if (batch->memcg != memcg)
|
|
|
- goto direct_uncharge;
|
|
|
- /* remember freed charge and uncharge it later */
|
|
|
- batch->nr_pages++;
|
|
|
- if (uncharge_memsw)
|
|
|
- batch->memsw_nr_pages++;
|
|
|
- return;
|
|
|
-direct_uncharge:
|
|
|
- res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
|
|
|
- if (uncharge_memsw)
|
|
|
- res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
|
|
|
- if (unlikely(batch->memcg != memcg))
|
|
|
- memcg_oom_recover(memcg);
|
|
|
-}
|
|
|
-
|
|
|
-/*
|
|
|
- * uncharge if !page_mapped(page)
|
|
|
- */
|
|
|
-static struct mem_cgroup *
|
|
|
-__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
|
|
|
- bool end_migration)
|
|
|
-{
|
|
|
- struct mem_cgroup *memcg = NULL;
|
|
|
- unsigned int nr_pages = 1;
|
|
|
- struct page_cgroup *pc;
|
|
|
- bool anon;
|
|
|
-
|
|
|
- if (mem_cgroup_disabled())
|
|
|
- return NULL;
|
|
|
-
|
|
|
- if (PageTransHuge(page)) {
|
|
|
- nr_pages <<= compound_order(page);
|
|
|
- VM_BUG_ON_PAGE(!PageTransHuge(page), page);
|
|
|
- }
|
|
|
- /*
|
|
|
- * Check if our page_cgroup is valid
|
|
|
- */
|
|
|
- pc = lookup_page_cgroup(page);
|
|
|
- if (unlikely(!PageCgroupUsed(pc)))
|
|
|
- return NULL;
|
|
|
-
|
|
|
- lock_page_cgroup(pc);
|
|
|
-
|
|
|
- memcg = pc->mem_cgroup;
|
|
|
-
|
|
|
- if (!PageCgroupUsed(pc))
|
|
|
- goto unlock_out;
|
|
|
-
|
|
|
- anon = PageAnon(page);
|
|
|
-
|
|
|
- switch (ctype) {
|
|
|
- case MEM_CGROUP_CHARGE_TYPE_ANON:
|
|
|
- /*
|
|
|
- * Generally PageAnon tells if it's the anon statistics to be
|
|
|
- * updated; but sometimes e.g. mem_cgroup_uncharge_page() is
|
|
|
- * used before page reached the stage of being marked PageAnon.
|
|
|
- */
|
|
|
- anon = true;
|
|
|
- /* fallthrough */
|
|
|
- case MEM_CGROUP_CHARGE_TYPE_DROP:
|
|
|
- /* See mem_cgroup_prepare_migration() */
|
|
|
- if (page_mapped(page))
|
|
|
- goto unlock_out;
|
|
|
- /*
|
|
|
- * Pages under migration may not be uncharged. But
|
|
|
- * end_migration() /must/ be the one uncharging the
|
|
|
- * unused post-migration page and so it has to call
|
|
|
- * here with the migration bit still set. See the
|
|
|
- * res_counter handling below.
|
|
|
- */
|
|
|
- if (!end_migration && PageCgroupMigration(pc))
|
|
|
- goto unlock_out;
|
|
|
- break;
|
|
|
- case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
|
|
|
- if (!PageAnon(page)) { /* Shared memory */
|
|
|
- if (page->mapping && !page_is_file_cache(page))
|
|
|
- goto unlock_out;
|
|
|
- } else if (page_mapped(page)) /* Anon */
|
|
|
- goto unlock_out;
|
|
|
- break;
|
|
|
- default:
|
|
|
- break;
|
|
|
- }
|
|
|
-
|
|
|
- mem_cgroup_charge_statistics(memcg, page, anon, -nr_pages);
|
|
|
-
|
|
|
- ClearPageCgroupUsed(pc);
|
|
|
- /*
|
|
|
- * pc->mem_cgroup is not cleared here. It will be accessed when it's
|
|
|
- * freed from LRU. This is safe because uncharged page is expected not
|
|
|
- * to be reused (freed soon). Exception is SwapCache, it's handled by
|
|
|
- * special functions.
|
|
|
- */
|
|
|
-
|
|
|
- unlock_page_cgroup(pc);
|
|
|
- /*
|
|
|
- * even after unlock, we have memcg->res.usage here and this memcg
|
|
|
- * will never be freed, so it's safe to call css_get().
|
|
|
- */
|
|
|
- memcg_check_events(memcg, page);
|
|
|
- if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) {
|
|
|
- mem_cgroup_swap_statistics(memcg, true);
|
|
|
- css_get(&memcg->css);
|
|
|
- }
|
|
|
- /*
|
|
|
- * Migration does not charge the res_counter for the
|
|
|
- * replacement page, so leave it alone when phasing out the
|
|
|
- * page that is unused after the migration.
|
|
|
- */
|
|
|
- if (!end_migration)
|
|
|
- mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
|
|
|
-
|
|
|
- return memcg;
|
|
|
-
|
|
|
-unlock_out:
|
|
|
- unlock_page_cgroup(pc);
|
|
|
- return NULL;
|
|
|
-}
|
|
|
-
|
|
|
-void mem_cgroup_uncharge_page(struct page *page)
|
|
|
-{
|
|
|
- /* early check. */
|
|
|
- if (page_mapped(page))
|
|
|
- return;
|
|
|
- VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
|
|
|
- /*
|
|
|
- * If the page is in swap cache, uncharge should be deferred
|
|
|
- * to the swap path, which also properly accounts swap usage
|
|
|
- * and handles memcg lifetime.
|
|
|
- *
|
|
|
- * Note that this check is not stable and reclaim may add the
|
|
|
- * page to swap cache at any time after this. However, if the
|
|
|
- * page is not in swap cache by the time page->mapcount hits
|
|
|
- * 0, there won't be any page table references to the swap
|
|
|
- * slot, and reclaim will free it and not actually write the
|
|
|
- * page to disk.
|
|
|
- */
|
|
|
- if (PageSwapCache(page))
|
|
|
- return;
|
|
|
- __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
|
|
|
-}
|
|
|
-
|
|
|
-void mem_cgroup_uncharge_cache_page(struct page *page)
|
|
|
-{
|
|
|
- VM_BUG_ON_PAGE(page_mapped(page), page);
|
|
|
- VM_BUG_ON_PAGE(page->mapping, page);
|
|
|
- __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
|
|
|
-}
|
|
|
-
|
|
|
/*
|
|
|
* Batch_start/batch_end is called in unmap_page_range/invlidate/trucate.
|
|
|
* In that cases, pages are freed continuously and we can expect pages
|
|
@@ -3763,6 +3591,9 @@ void mem_cgroup_uncharge_cache_page(struct page *page)
|
|
|
|
|
|
void mem_cgroup_uncharge_start(void)
|
|
|
{
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ local_irq_save(flags);
|
|
|
current->memcg_batch.do_batch++;
|
|
|
/* We can do nest. */
|
|
|
if (current->memcg_batch.do_batch == 1) {
|
|
@@ -3770,21 +3601,18 @@ void mem_cgroup_uncharge_start(void)
|
|
|
current->memcg_batch.nr_pages = 0;
|
|
|
current->memcg_batch.memsw_nr_pages = 0;
|
|
|
}
|
|
|
+ local_irq_restore(flags);
|
|
|
}
|
|
|
|
|
|
void mem_cgroup_uncharge_end(void)
|
|
|
{
|
|
|
struct memcg_batch_info *batch = ¤t->memcg_batch;
|
|
|
+ unsigned long flags;
|
|
|
|
|
|
- if (!batch->do_batch)
|
|
|
- return;
|
|
|
-
|
|
|
- batch->do_batch--;
|
|
|
- if (batch->do_batch) /* If stacked, do nothing. */
|
|
|
- return;
|
|
|
-
|
|
|
- if (!batch->memcg)
|
|
|
- return;
|
|
|
+ local_irq_save(flags);
|
|
|
+ VM_BUG_ON(!batch->do_batch);
|
|
|
+ if (--batch->do_batch) /* If stacked, do nothing */
|
|
|
+ goto out;
|
|
|
/*
|
|
|
* This "batch->memcg" is valid without any css_get/put etc...
|
|
|
* bacause we hide charges behind us.
|
|
@@ -3796,61 +3624,16 @@ void mem_cgroup_uncharge_end(void)
|
|
|
res_counter_uncharge(&batch->memcg->memsw,
|
|
|
batch->memsw_nr_pages * PAGE_SIZE);
|
|
|
memcg_oom_recover(batch->memcg);
|
|
|
- /* forget this pointer (for sanity check) */
|
|
|
- batch->memcg = NULL;
|
|
|
-}
|
|
|
-
|
|
|
-#ifdef CONFIG_SWAP
|
|
|
-/*
|
|
|
- * called after __delete_from_swap_cache() and drop "page" account.
|
|
|
- * memcg information is recorded to swap_cgroup of "ent"
|
|
|
- */
|
|
|
-void
|
|
|
-mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
|
|
|
-{
|
|
|
- struct mem_cgroup *memcg;
|
|
|
- int ctype = MEM_CGROUP_CHARGE_TYPE_SWAPOUT;
|
|
|
-
|
|
|
- if (!swapout) /* this was a swap cache but the swap is unused ! */
|
|
|
- ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
|
|
|
-
|
|
|
- memcg = __mem_cgroup_uncharge_common(page, ctype, false);
|
|
|
-
|
|
|
- /*
|
|
|
- * record memcg information, if swapout && memcg != NULL,
|
|
|
- * css_get() was called in uncharge().
|
|
|
- */
|
|
|
- if (do_swap_account && swapout && memcg)
|
|
|
- swap_cgroup_record(ent, mem_cgroup_id(memcg));
|
|
|
+out:
|
|
|
+ local_irq_restore(flags);
|
|
|
}
|
|
|
-#endif
|
|
|
|
|
|
#ifdef CONFIG_MEMCG_SWAP
|
|
|
-/*
|
|
|
- * called from swap_entry_free(). remove record in swap_cgroup and
|
|
|
- * uncharge "memsw" account.
|
|
|
- */
|
|
|
-void mem_cgroup_uncharge_swap(swp_entry_t ent)
|
|
|
+static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
|
|
|
+ bool charge)
|
|
|
{
|
|
|
- struct mem_cgroup *memcg;
|
|
|
- unsigned short id;
|
|
|
-
|
|
|
- if (!do_swap_account)
|
|
|
- return;
|
|
|
-
|
|
|
- id = swap_cgroup_record(ent, 0);
|
|
|
- rcu_read_lock();
|
|
|
- memcg = mem_cgroup_lookup(id);
|
|
|
- if (memcg) {
|
|
|
- /*
|
|
|
- * We uncharge this because swap is freed. This memcg can
|
|
|
- * be obsolete one. We avoid calling css_tryget_online().
|
|
|
- */
|
|
|
- res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
|
|
|
- mem_cgroup_swap_statistics(memcg, false);
|
|
|
- css_put(&memcg->css);
|
|
|
- }
|
|
|
- rcu_read_unlock();
|
|
|
+ int val = (charge) ? 1 : -1;
|
|
|
+ this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -3902,169 +3685,6 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
-/*
|
|
|
- * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
|
|
|
- * page belongs to.
|
|
|
- */
|
|
|
-void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
|
|
|
- struct mem_cgroup **memcgp)
|
|
|
-{
|
|
|
- struct mem_cgroup *memcg = NULL;
|
|
|
- unsigned int nr_pages = 1;
|
|
|
- struct page_cgroup *pc;
|
|
|
-
|
|
|
- *memcgp = NULL;
|
|
|
-
|
|
|
- if (mem_cgroup_disabled())
|
|
|
- return;
|
|
|
-
|
|
|
- if (PageTransHuge(page))
|
|
|
- nr_pages <<= compound_order(page);
|
|
|
-
|
|
|
- pc = lookup_page_cgroup(page);
|
|
|
- lock_page_cgroup(pc);
|
|
|
- if (PageCgroupUsed(pc)) {
|
|
|
- memcg = pc->mem_cgroup;
|
|
|
- css_get(&memcg->css);
|
|
|
- /*
|
|
|
- * At migrating an anonymous page, its mapcount goes down
|
|
|
- * to 0 and uncharge() will be called. But, even if it's fully
|
|
|
- * unmapped, migration may fail and this page has to be
|
|
|
- * charged again. We set MIGRATION flag here and delay uncharge
|
|
|
- * until end_migration() is called
|
|
|
- *
|
|
|
- * Corner Case Thinking
|
|
|
- * A)
|
|
|
- * When the old page was mapped as Anon and it's unmap-and-freed
|
|
|
- * while migration was ongoing.
|
|
|
- * If unmap finds the old page, uncharge() of it will be delayed
|
|
|
- * until end_migration(). If unmap finds a new page, it's
|
|
|
- * uncharged when it make mapcount to be 1->0. If unmap code
|
|
|
- * finds swap_migration_entry, the new page will not be mapped
|
|
|
- * and end_migration() will find it(mapcount==0).
|
|
|
- *
|
|
|
- * B)
|
|
|
- * When the old page was mapped but migraion fails, the kernel
|
|
|
- * remaps it. A charge for it is kept by MIGRATION flag even
|
|
|
- * if mapcount goes down to 0. We can do remap successfully
|
|
|
- * without charging it again.
|
|
|
- *
|
|
|
- * C)
|
|
|
- * The "old" page is under lock_page() until the end of
|
|
|
- * migration, so, the old page itself will not be swapped-out.
|
|
|
- * If the new page is swapped out before end_migraton, our
|
|
|
- * hook to usual swap-out path will catch the event.
|
|
|
- */
|
|
|
- if (PageAnon(page))
|
|
|
- SetPageCgroupMigration(pc);
|
|
|
- }
|
|
|
- unlock_page_cgroup(pc);
|
|
|
- /*
|
|
|
- * If the page is not charged at this point,
|
|
|
- * we return here.
|
|
|
- */
|
|
|
- if (!memcg)
|
|
|
- return;
|
|
|
-
|
|
|
- *memcgp = memcg;
|
|
|
- /*
|
|
|
- * We charge new page before it's used/mapped. So, even if unlock_page()
|
|
|
- * is called before end_migration, we can catch all events on this new
|
|
|
- * page. In the case new page is migrated but not remapped, new page's
|
|
|
- * mapcount will be finally 0 and we call uncharge in end_migration().
|
|
|
- */
|
|
|
- /*
|
|
|
- * The page is committed to the memcg, but it's not actually
|
|
|
- * charged to the res_counter since we plan on replacing the
|
|
|
- * old one and only one page is going to be left afterwards.
|
|
|
- */
|
|
|
- commit_charge(newpage, memcg, nr_pages, PageAnon(page), false);
|
|
|
-}
|
|
|
-
|
|
|
-/* remove redundant charge if migration failed*/
|
|
|
-void mem_cgroup_end_migration(struct mem_cgroup *memcg,
|
|
|
- struct page *oldpage, struct page *newpage, bool migration_ok)
|
|
|
-{
|
|
|
- struct page *used, *unused;
|
|
|
- struct page_cgroup *pc;
|
|
|
- bool anon;
|
|
|
-
|
|
|
- if (!memcg)
|
|
|
- return;
|
|
|
-
|
|
|
- if (!migration_ok) {
|
|
|
- used = oldpage;
|
|
|
- unused = newpage;
|
|
|
- } else {
|
|
|
- used = newpage;
|
|
|
- unused = oldpage;
|
|
|
- }
|
|
|
- anon = PageAnon(used);
|
|
|
- __mem_cgroup_uncharge_common(unused,
|
|
|
- anon ? MEM_CGROUP_CHARGE_TYPE_ANON
|
|
|
- : MEM_CGROUP_CHARGE_TYPE_CACHE,
|
|
|
- true);
|
|
|
- css_put(&memcg->css);
|
|
|
- /*
|
|
|
- * We disallowed uncharge of pages under migration because mapcount
|
|
|
- * of the page goes down to zero, temporarly.
|
|
|
- * Clear the flag and check the page should be charged.
|
|
|
- */
|
|
|
- pc = lookup_page_cgroup(oldpage);
|
|
|
- lock_page_cgroup(pc);
|
|
|
- ClearPageCgroupMigration(pc);
|
|
|
- unlock_page_cgroup(pc);
|
|
|
-
|
|
|
- /*
|
|
|
- * If a page is a file cache, radix-tree replacement is very atomic
|
|
|
- * and we can skip this check. When it was an Anon page, its mapcount
|
|
|
- * goes down to 0. But because we added MIGRATION flage, it's not
|
|
|
- * uncharged yet. There are several case but page->mapcount check
|
|
|
- * and USED bit check in mem_cgroup_uncharge_page() will do enough
|
|
|
- * check. (see prepare_charge() also)
|
|
|
- */
|
|
|
- if (anon)
|
|
|
- mem_cgroup_uncharge_page(used);
|
|
|
-}
|
|
|
-
|
|
|
-/*
|
|
|
- * At replace page cache, newpage is not under any memcg but it's on
|
|
|
- * LRU. So, this function doesn't touch res_counter but handles LRU
|
|
|
- * in correct way. Both pages are locked so we cannot race with uncharge.
|
|
|
- */
|
|
|
-void mem_cgroup_replace_page_cache(struct page *oldpage,
|
|
|
- struct page *newpage)
|
|
|
-{
|
|
|
- struct mem_cgroup *memcg = NULL;
|
|
|
- struct page_cgroup *pc;
|
|
|
-
|
|
|
- if (mem_cgroup_disabled())
|
|
|
- return;
|
|
|
-
|
|
|
- pc = lookup_page_cgroup(oldpage);
|
|
|
- /* fix accounting on old pages */
|
|
|
- lock_page_cgroup(pc);
|
|
|
- if (PageCgroupUsed(pc)) {
|
|
|
- memcg = pc->mem_cgroup;
|
|
|
- mem_cgroup_charge_statistics(memcg, oldpage, false, -1);
|
|
|
- ClearPageCgroupUsed(pc);
|
|
|
- }
|
|
|
- unlock_page_cgroup(pc);
|
|
|
-
|
|
|
- /*
|
|
|
- * When called from shmem_replace_page(), in some cases the
|
|
|
- * oldpage has already been charged, and in some cases not.
|
|
|
- */
|
|
|
- if (!memcg)
|
|
|
- return;
|
|
|
- /*
|
|
|
- * Even if newpage->mapping was NULL before starting replacement,
|
|
|
- * the newpage may be on LRU(or pagevec for LRU) already. We lock
|
|
|
- * LRU while we overwrite pc->mem_cgroup.
|
|
|
- */
|
|
|
- commit_charge(newpage, memcg, 1, false, true);
|
|
|
-}
|
|
|
-
|
|
|
#ifdef CONFIG_DEBUG_VM
|
|
|
static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
|
|
|
{
|
|
@@ -4263,7 +3883,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
|
|
gfp_mask, &nr_scanned);
|
|
|
nr_reclaimed += reclaimed;
|
|
|
*total_scanned += nr_scanned;
|
|
|
- spin_lock(&mctz->lock);
|
|
|
+ spin_lock_irq(&mctz->lock);
|
|
|
|
|
|
/*
|
|
|
* If we failed to reclaim anything from this memory cgroup
|
|
@@ -4303,7 +3923,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
|
|
*/
|
|
|
/* If excess == 0, no tree ops */
|
|
|
__mem_cgroup_insert_exceeded(mz, mctz, excess);
|
|
|
- spin_unlock(&mctz->lock);
|
|
|
+ spin_unlock_irq(&mctz->lock);
|
|
|
css_put(&mz->memcg->css);
|
|
|
loop++;
|
|
|
/*
|
|
@@ -6265,9 +5885,9 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
|
|
|
if (page) {
|
|
|
pc = lookup_page_cgroup(page);
|
|
|
/*
|
|
|
- * Do only loose check w/o page_cgroup lock.
|
|
|
- * mem_cgroup_move_account() checks the pc is valid or not under
|
|
|
- * the lock.
|
|
|
+ * Do only loose check w/o serialization.
|
|
|
+ * mem_cgroup_move_account() checks the pc is valid or
|
|
|
+ * not under LRU exclusion.
|
|
|
*/
|
|
|
if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
|
|
|
ret = MC_TARGET_PAGE;
|
|
@@ -6729,6 +6349,67 @@ static void __init enable_swap_cgroup(void)
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
+#ifdef CONFIG_MEMCG_SWAP
|
|
|
+/**
|
|
|
+ * mem_cgroup_swapout - transfer a memsw charge to swap
|
|
|
+ * @page: page whose memsw charge to transfer
|
|
|
+ * @entry: swap entry to move the charge to
|
|
|
+ *
|
|
|
+ * Transfer the memsw charge of @page to @entry.
|
|
|
+ */
|
|
|
+void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
|
|
|
+{
|
|
|
+ struct page_cgroup *pc;
|
|
|
+ unsigned short oldid;
|
|
|
+
|
|
|
+ VM_BUG_ON_PAGE(PageLRU(page), page);
|
|
|
+ VM_BUG_ON_PAGE(page_count(page), page);
|
|
|
+
|
|
|
+ if (!do_swap_account)
|
|
|
+ return;
|
|
|
+
|
|
|
+ pc = lookup_page_cgroup(page);
|
|
|
+
|
|
|
+ /* Readahead page, never charged */
|
|
|
+ if (!PageCgroupUsed(pc))
|
|
|
+ return;
|
|
|
+
|
|
|
+ VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
|
|
|
+
|
|
|
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
|
|
|
+ VM_BUG_ON_PAGE(oldid, page);
|
|
|
+
|
|
|
+ pc->flags &= ~PCG_MEMSW;
|
|
|
+ css_get(&pc->mem_cgroup->css);
|
|
|
+ mem_cgroup_swap_statistics(pc->mem_cgroup, true);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * mem_cgroup_uncharge_swap - uncharge a swap entry
|
|
|
+ * @entry: swap entry to uncharge
|
|
|
+ *
|
|
|
+ * Drop the memsw charge associated with @entry.
|
|
|
+ */
|
|
|
+void mem_cgroup_uncharge_swap(swp_entry_t entry)
|
|
|
+{
|
|
|
+ struct mem_cgroup *memcg;
|
|
|
+ unsigned short id;
|
|
|
+
|
|
|
+ if (!do_swap_account)
|
|
|
+ return;
|
|
|
+
|
|
|
+ id = swap_cgroup_record(entry, 0);
|
|
|
+ rcu_read_lock();
|
|
|
+ memcg = mem_cgroup_lookup(id);
|
|
|
+ if (memcg) {
|
|
|
+ res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
|
|
|
+ mem_cgroup_swap_statistics(memcg, false);
|
|
|
+ css_put(&memcg->css);
|
|
|
+ }
|
|
|
+ rcu_read_unlock();
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
/**
|
|
|
* mem_cgroup_try_charge - try charging a page
|
|
|
* @page: page to charge
|
|
@@ -6831,7 +6512,7 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
|
|
|
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
|
|
|
}
|
|
|
|
|
|
- commit_charge(page, memcg, nr_pages, PageAnon(page), lrucare);
|
|
|
+ commit_charge(page, memcg, nr_pages, lrucare);
|
|
|
|
|
|
if (do_swap_account && PageSwapCache(page)) {
|
|
|
swp_entry_t entry = { .val = page_private(page) };
|
|
@@ -6873,6 +6554,139 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
|
|
|
cancel_charge(memcg, nr_pages);
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * mem_cgroup_uncharge - uncharge a page
|
|
|
+ * @page: page to uncharge
|
|
|
+ *
|
|
|
+ * Uncharge a page previously charged with mem_cgroup_try_charge() and
|
|
|
+ * mem_cgroup_commit_charge().
|
|
|
+ */
|
|
|
+void mem_cgroup_uncharge(struct page *page)
|
|
|
+{
|
|
|
+ struct memcg_batch_info *batch;
|
|
|
+ unsigned int nr_pages = 1;
|
|
|
+ struct mem_cgroup *memcg;
|
|
|
+ struct page_cgroup *pc;
|
|
|
+ unsigned long pc_flags;
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ VM_BUG_ON_PAGE(PageLRU(page), page);
|
|
|
+ VM_BUG_ON_PAGE(page_count(page), page);
|
|
|
+
|
|
|
+ if (mem_cgroup_disabled())
|
|
|
+ return;
|
|
|
+
|
|
|
+ pc = lookup_page_cgroup(page);
|
|
|
+
|
|
|
+ /* Every final put_page() ends up here */
|
|
|
+ if (!PageCgroupUsed(pc))
|
|
|
+ return;
|
|
|
+
|
|
|
+ if (PageTransHuge(page)) {
|
|
|
+ nr_pages <<= compound_order(page);
|
|
|
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
|
|
|
+ }
|
|
|
+ /*
|
|
|
+ * Nobody should be changing or seriously looking at
|
|
|
+ * pc->mem_cgroup and pc->flags at this point, we have fully
|
|
|
+ * exclusive access to the page.
|
|
|
+ */
|
|
|
+ memcg = pc->mem_cgroup;
|
|
|
+ pc_flags = pc->flags;
|
|
|
+ pc->flags = 0;
|
|
|
+
|
|
|
+ local_irq_save(flags);
|
|
|
+
|
|
|
+ if (nr_pages > 1)
|
|
|
+ goto direct;
|
|
|
+ if (unlikely(test_thread_flag(TIF_MEMDIE)))
|
|
|
+ goto direct;
|
|
|
+ batch = ¤t->memcg_batch;
|
|
|
+ if (!batch->do_batch)
|
|
|
+ goto direct;
|
|
|
+ if (batch->memcg && batch->memcg != memcg)
|
|
|
+ goto direct;
|
|
|
+ if (!batch->memcg)
|
|
|
+ batch->memcg = memcg;
|
|
|
+ if (pc_flags & PCG_MEM)
|
|
|
+ batch->nr_pages++;
|
|
|
+ if (pc_flags & PCG_MEMSW)
|
|
|
+ batch->memsw_nr_pages++;
|
|
|
+ goto out;
|
|
|
+direct:
|
|
|
+ if (pc_flags & PCG_MEM)
|
|
|
+ res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
|
|
|
+ if (pc_flags & PCG_MEMSW)
|
|
|
+ res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
|
|
|
+ memcg_oom_recover(memcg);
|
|
|
+out:
|
|
|
+ mem_cgroup_charge_statistics(memcg, page, -nr_pages);
|
|
|
+ memcg_check_events(memcg, page);
|
|
|
+
|
|
|
+ local_irq_restore(flags);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * mem_cgroup_migrate - migrate a charge to another page
|
|
|
+ * @oldpage: currently charged page
|
|
|
+ * @newpage: page to transfer the charge to
|
|
|
+ * @lrucare: both pages might be on the LRU already
|
|
|
+ *
|
|
|
+ * Migrate the charge from @oldpage to @newpage.
|
|
|
+ *
|
|
|
+ * Both pages must be locked, @newpage->mapping must be set up.
|
|
|
+ */
|
|
|
+void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
|
|
|
+ bool lrucare)
|
|
|
+{
|
|
|
+ unsigned int nr_pages = 1;
|
|
|
+ struct page_cgroup *pc;
|
|
|
+ int isolated;
|
|
|
+
|
|
|
+ VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
|
|
|
+ VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
|
|
|
+ VM_BUG_ON_PAGE(!lrucare && PageLRU(oldpage), oldpage);
|
|
|
+ VM_BUG_ON_PAGE(!lrucare && PageLRU(newpage), newpage);
|
|
|
+ VM_BUG_ON_PAGE(PageAnon(oldpage) != PageAnon(newpage), newpage);
|
|
|
+
|
|
|
+ if (mem_cgroup_disabled())
|
|
|
+ return;
|
|
|
+
|
|
|
+ /* Page cache replacement: new page already charged? */
|
|
|
+ pc = lookup_page_cgroup(newpage);
|
|
|
+ if (PageCgroupUsed(pc))
|
|
|
+ return;
|
|
|
+
|
|
|
+ /* Re-entrant migration: old page already uncharged? */
|
|
|
+ pc = lookup_page_cgroup(oldpage);
|
|
|
+ if (!PageCgroupUsed(pc))
|
|
|
+ return;
|
|
|
+
|
|
|
+ VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
|
|
|
+ VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);
|
|
|
+
|
|
|
+ if (PageTransHuge(oldpage)) {
|
|
|
+ nr_pages <<= compound_order(oldpage);
|
|
|
+ VM_BUG_ON_PAGE(!PageTransHuge(oldpage), oldpage);
|
|
|
+ VM_BUG_ON_PAGE(!PageTransHuge(newpage), newpage);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (lrucare)
|
|
|
+ lock_page_lru(oldpage, &isolated);
|
|
|
+
|
|
|
+ pc->flags = 0;
|
|
|
+
|
|
|
+ if (lrucare)
|
|
|
+ unlock_page_lru(oldpage, isolated);
|
|
|
+
|
|
|
+ local_irq_disable();
|
|
|
+ mem_cgroup_charge_statistics(pc->mem_cgroup, oldpage, -nr_pages);
|
|
|
+ memcg_check_events(pc->mem_cgroup, oldpage);
|
|
|
+ local_irq_enable();
|
|
|
+
|
|
|
+ commit_charge(newpage, pc->mem_cgroup, nr_pages, lrucare);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* subsys_initcall() for memory controller.
|
|
|
*
|