|
@@ -25,7 +25,7 @@
|
|
|
* GNU General Public License for more details.
|
|
|
*/
|
|
|
|
|
|
-#include <linux/res_counter.h>
|
|
|
+#include <linux/page_counter.h>
|
|
|
#include <linux/memcontrol.h>
|
|
|
#include <linux/cgroup.h>
|
|
|
#include <linux/mm.h>
|
|
@@ -165,7 +165,7 @@ struct mem_cgroup_per_zone {
|
|
|
struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
|
|
|
|
|
|
struct rb_node tree_node; /* RB tree node */
|
|
|
- unsigned long long usage_in_excess;/* Set to the value by which */
|
|
|
+ unsigned long usage_in_excess;/* Set to the value by which */
|
|
|
/* the soft limit is exceeded*/
|
|
|
bool on_tree;
|
|
|
struct mem_cgroup *memcg; /* Back pointer, we cannot */
|
|
@@ -198,7 +198,7 @@ static struct mem_cgroup_tree soft_limit_tree __read_mostly;
|
|
|
|
|
|
struct mem_cgroup_threshold {
|
|
|
struct eventfd_ctx *eventfd;
|
|
|
- u64 threshold;
|
|
|
+ unsigned long threshold;
|
|
|
};
|
|
|
|
|
|
/* For threshold */
|
|
@@ -284,10 +284,13 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
|
|
|
*/
|
|
|
struct mem_cgroup {
|
|
|
struct cgroup_subsys_state css;
|
|
|
- /*
|
|
|
- * the counter to account for memory usage
|
|
|
- */
|
|
|
- struct res_counter res;
|
|
|
+
|
|
|
+ /* Accounted resources */
|
|
|
+ struct page_counter memory;
|
|
|
+ struct page_counter memsw;
|
|
|
+ struct page_counter kmem;
|
|
|
+
|
|
|
+ unsigned long soft_limit;
|
|
|
|
|
|
/* vmpressure notifications */
|
|
|
struct vmpressure vmpressure;
|
|
@@ -295,15 +298,6 @@ struct mem_cgroup {
|
|
|
/* css_online() has been completed */
|
|
|
int initialized;
|
|
|
|
|
|
- /*
|
|
|
- * the counter to account for mem+swap usage.
|
|
|
- */
|
|
|
- struct res_counter memsw;
|
|
|
-
|
|
|
- /*
|
|
|
- * the counter to account for kernel memory usage.
|
|
|
- */
|
|
|
- struct res_counter kmem;
|
|
|
/*
|
|
|
* Should the accounting and control be hierarchical, per subtree?
|
|
|
*/
|
|
@@ -650,7 +644,7 @@ static void disarm_kmem_keys(struct mem_cgroup *memcg)
|
|
|
* This check can't live in kmem destruction function,
|
|
|
* since the charges will outlive the cgroup
|
|
|
*/
|
|
|
- WARN_ON(res_counter_read_u64(&memcg->kmem, RES_USAGE) != 0);
|
|
|
+ WARN_ON(page_counter_read(&memcg->kmem));
|
|
|
}
|
|
|
#else
|
|
|
static void disarm_kmem_keys(struct mem_cgroup *memcg)
|
|
@@ -706,7 +700,7 @@ soft_limit_tree_from_page(struct page *page)
|
|
|
|
|
|
static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz,
|
|
|
struct mem_cgroup_tree_per_zone *mctz,
|
|
|
- unsigned long long new_usage_in_excess)
|
|
|
+ unsigned long new_usage_in_excess)
|
|
|
{
|
|
|
struct rb_node **p = &mctz->rb_root.rb_node;
|
|
|
struct rb_node *parent = NULL;
|
|
@@ -755,10 +749,21 @@ static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
|
|
|
spin_unlock_irqrestore(&mctz->lock, flags);
|
|
|
}
|
|
|
|
|
|
+static unsigned long soft_limit_excess(struct mem_cgroup *memcg)
|
|
|
+{
|
|
|
+ unsigned long nr_pages = page_counter_read(&memcg->memory);
|
|
|
+ unsigned long soft_limit = ACCESS_ONCE(memcg->soft_limit);
|
|
|
+ unsigned long excess = 0;
|
|
|
+
|
|
|
+ if (nr_pages > soft_limit)
|
|
|
+ excess = nr_pages - soft_limit;
|
|
|
+
|
|
|
+ return excess;
|
|
|
+}
|
|
|
|
|
|
static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
|
|
|
{
|
|
|
- unsigned long long excess;
|
|
|
+ unsigned long excess;
|
|
|
struct mem_cgroup_per_zone *mz;
|
|
|
struct mem_cgroup_tree_per_zone *mctz;
|
|
|
|
|
@@ -769,7 +774,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
|
|
|
*/
|
|
|
for (; memcg; memcg = parent_mem_cgroup(memcg)) {
|
|
|
mz = mem_cgroup_page_zoneinfo(memcg, page);
|
|
|
- excess = res_counter_soft_limit_excess(&memcg->res);
|
|
|
+ excess = soft_limit_excess(memcg);
|
|
|
/*
|
|
|
* We have to update the tree if mz is on RB-tree or
|
|
|
* mem is over its softlimit.
|
|
@@ -825,7 +830,7 @@ retry:
|
|
|
* position in the tree.
|
|
|
*/
|
|
|
__mem_cgroup_remove_exceeded(mz, mctz);
|
|
|
- if (!res_counter_soft_limit_excess(&mz->memcg->res) ||
|
|
|
+ if (!soft_limit_excess(mz->memcg) ||
|
|
|
!css_tryget_online(&mz->memcg->css))
|
|
|
goto retry;
|
|
|
done:
|
|
@@ -1492,7 +1497,7 @@ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
|
|
|
return inactive * inactive_ratio < active;
|
|
|
}
|
|
|
|
|
|
-#define mem_cgroup_from_res_counter(counter, member) \
|
|
|
+#define mem_cgroup_from_counter(counter, member) \
|
|
|
container_of(counter, struct mem_cgroup, member)
|
|
|
|
|
|
/**
|
|
@@ -1504,12 +1509,23 @@ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
|
|
|
*/
|
|
|
static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
|
|
|
{
|
|
|
- unsigned long long margin;
|
|
|
+ unsigned long margin = 0;
|
|
|
+ unsigned long count;
|
|
|
+ unsigned long limit;
|
|
|
|
|
|
- margin = res_counter_margin(&memcg->res);
|
|
|
- if (do_swap_account)
|
|
|
- margin = min(margin, res_counter_margin(&memcg->memsw));
|
|
|
- return margin >> PAGE_SHIFT;
|
|
|
+ count = page_counter_read(&memcg->memory);
|
|
|
+ limit = ACCESS_ONCE(memcg->memory.limit);
|
|
|
+ if (count < limit)
|
|
|
+ margin = limit - count;
|
|
|
+
|
|
|
+ if (do_swap_account) {
|
|
|
+ count = page_counter_read(&memcg->memsw);
|
|
|
+ limit = ACCESS_ONCE(memcg->memsw.limit);
|
|
|
+ if (count <= limit)
|
|
|
+ margin = min(margin, limit - count);
|
|
|
+ }
|
|
|
+
|
|
|
+ return margin;
|
|
|
}
|
|
|
|
|
|
int mem_cgroup_swappiness(struct mem_cgroup *memcg)
|
|
@@ -1644,18 +1660,15 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
|
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
- pr_info("memory: usage %llukB, limit %llukB, failcnt %llu\n",
|
|
|
- res_counter_read_u64(&memcg->res, RES_USAGE) >> 10,
|
|
|
- res_counter_read_u64(&memcg->res, RES_LIMIT) >> 10,
|
|
|
- res_counter_read_u64(&memcg->res, RES_FAILCNT));
|
|
|
- pr_info("memory+swap: usage %llukB, limit %llukB, failcnt %llu\n",
|
|
|
- res_counter_read_u64(&memcg->memsw, RES_USAGE) >> 10,
|
|
|
- res_counter_read_u64(&memcg->memsw, RES_LIMIT) >> 10,
|
|
|
- res_counter_read_u64(&memcg->memsw, RES_FAILCNT));
|
|
|
- pr_info("kmem: usage %llukB, limit %llukB, failcnt %llu\n",
|
|
|
- res_counter_read_u64(&memcg->kmem, RES_USAGE) >> 10,
|
|
|
- res_counter_read_u64(&memcg->kmem, RES_LIMIT) >> 10,
|
|
|
- res_counter_read_u64(&memcg->kmem, RES_FAILCNT));
|
|
|
+ pr_info("memory: usage %llukB, limit %llukB, failcnt %lu\n",
|
|
|
+ K((u64)page_counter_read(&memcg->memory)),
|
|
|
+ K((u64)memcg->memory.limit), memcg->memory.failcnt);
|
|
|
+ pr_info("memory+swap: usage %llukB, limit %llukB, failcnt %lu\n",
|
|
|
+ K((u64)page_counter_read(&memcg->memsw)),
|
|
|
+ K((u64)memcg->memsw.limit), memcg->memsw.failcnt);
|
|
|
+ pr_info("kmem: usage %llukB, limit %llukB, failcnt %lu\n",
|
|
|
+ K((u64)page_counter_read(&memcg->kmem)),
|
|
|
+ K((u64)memcg->kmem.limit), memcg->kmem.failcnt);
|
|
|
|
|
|
for_each_mem_cgroup_tree(iter, memcg) {
|
|
|
pr_info("Memory cgroup stats for ");
|
|
@@ -1695,28 +1708,17 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg)
|
|
|
/*
|
|
|
* Return the memory (and swap, if configured) limit for a memcg.
|
|
|
*/
|
|
|
-static u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
|
|
|
+static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
|
|
|
{
|
|
|
- u64 limit;
|
|
|
+ unsigned long limit;
|
|
|
|
|
|
- limit = res_counter_read_u64(&memcg->res, RES_LIMIT);
|
|
|
-
|
|
|
- /*
|
|
|
- * Do not consider swap space if we cannot swap due to swappiness
|
|
|
- */
|
|
|
+ limit = memcg->memory.limit;
|
|
|
if (mem_cgroup_swappiness(memcg)) {
|
|
|
- u64 memsw;
|
|
|
+ unsigned long memsw_limit;
|
|
|
|
|
|
- limit += total_swap_pages << PAGE_SHIFT;
|
|
|
- memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
|
|
|
-
|
|
|
- /*
|
|
|
- * If memsw is finite and limits the amount of swap space
|
|
|
- * available to this memcg, return that limit.
|
|
|
- */
|
|
|
- limit = min(limit, memsw);
|
|
|
+ memsw_limit = memcg->memsw.limit;
|
|
|
+ limit = min(limit + total_swap_pages, memsw_limit);
|
|
|
}
|
|
|
-
|
|
|
return limit;
|
|
|
}
|
|
|
|
|
@@ -1740,7 +1742,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
|
|
|
}
|
|
|
|
|
|
check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL);
|
|
|
- totalpages = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT ? : 1;
|
|
|
+ totalpages = mem_cgroup_get_limit(memcg) ? : 1;
|
|
|
for_each_mem_cgroup_tree(iter, memcg) {
|
|
|
struct css_task_iter it;
|
|
|
struct task_struct *task;
|
|
@@ -1943,7 +1945,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
|
|
|
.priority = 0,
|
|
|
};
|
|
|
|
|
|
- excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
|
|
|
+ excess = soft_limit_excess(root_memcg);
|
|
|
|
|
|
while (1) {
|
|
|
victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
|
|
@@ -1974,7 +1976,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
|
|
|
total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
|
|
|
zone, &nr_scanned);
|
|
|
*total_scanned += nr_scanned;
|
|
|
- if (!res_counter_soft_limit_excess(&root_memcg->res))
|
|
|
+ if (!soft_limit_excess(root_memcg))
|
|
|
break;
|
|
|
}
|
|
|
mem_cgroup_iter_break(root_memcg, victim);
|
|
@@ -2316,33 +2318,31 @@ static DEFINE_MUTEX(percpu_charge_mutex);
|
|
|
static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
|
|
|
{
|
|
|
struct memcg_stock_pcp *stock;
|
|
|
- bool ret = true;
|
|
|
+ bool ret = false;
|
|
|
|
|
|
if (nr_pages > CHARGE_BATCH)
|
|
|
- return false;
|
|
|
+ return ret;
|
|
|
|
|
|
stock = &get_cpu_var(memcg_stock);
|
|
|
- if (memcg == stock->cached && stock->nr_pages >= nr_pages)
|
|
|
+ if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
|
|
|
stock->nr_pages -= nr_pages;
|
|
|
- else /* need to call res_counter_charge */
|
|
|
- ret = false;
|
|
|
+ ret = true;
|
|
|
+ }
|
|
|
put_cpu_var(memcg_stock);
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Returns stocks cached in percpu to res_counter and reset cached information.
|
|
|
+ * Returns stocks cached in percpu and reset cached information.
|
|
|
*/
|
|
|
static void drain_stock(struct memcg_stock_pcp *stock)
|
|
|
{
|
|
|
struct mem_cgroup *old = stock->cached;
|
|
|
|
|
|
if (stock->nr_pages) {
|
|
|
- unsigned long bytes = stock->nr_pages * PAGE_SIZE;
|
|
|
-
|
|
|
- res_counter_uncharge(&old->res, bytes);
|
|
|
+ page_counter_uncharge(&old->memory, stock->nr_pages);
|
|
|
if (do_swap_account)
|
|
|
- res_counter_uncharge(&old->memsw, bytes);
|
|
|
+ page_counter_uncharge(&old->memsw, stock->nr_pages);
|
|
|
stock->nr_pages = 0;
|
|
|
}
|
|
|
stock->cached = NULL;
|
|
@@ -2371,7 +2371,7 @@ static void __init memcg_stock_init(void)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Cache charges(val) which is from res_counter, to local per_cpu area.
|
|
|
+ * Cache charges(val) to local per_cpu area.
|
|
|
* This will be consumed by consume_stock() function, later.
|
|
|
*/
|
|
|
static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
|
|
@@ -2431,8 +2431,7 @@ out:
|
|
|
/*
|
|
|
* Tries to drain stocked charges in other cpus. This function is asynchronous
|
|
|
* and just put a work per cpu for draining localy on each cpu. Caller can
|
|
|
- * expects some charges will be back to res_counter later but cannot wait for
|
|
|
- * it.
|
|
|
+ * expects some charges will be back later but cannot wait for it.
|
|
|
*/
|
|
|
static void drain_all_stock_async(struct mem_cgroup *root_memcg)
|
|
|
{
|
|
@@ -2506,9 +2505,8 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
|
|
|
unsigned int batch = max(CHARGE_BATCH, nr_pages);
|
|
|
int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
|
|
|
struct mem_cgroup *mem_over_limit;
|
|
|
- struct res_counter *fail_res;
|
|
|
+ struct page_counter *counter;
|
|
|
unsigned long nr_reclaimed;
|
|
|
- unsigned long long size;
|
|
|
bool may_swap = true;
|
|
|
bool drained = false;
|
|
|
int ret = 0;
|
|
@@ -2519,16 +2517,15 @@ retry:
|
|
|
if (consume_stock(memcg, nr_pages))
|
|
|
goto done;
|
|
|
|
|
|
- size = batch * PAGE_SIZE;
|
|
|
if (!do_swap_account ||
|
|
|
- !res_counter_charge(&memcg->memsw, size, &fail_res)) {
|
|
|
- if (!res_counter_charge(&memcg->res, size, &fail_res))
|
|
|
+ !page_counter_try_charge(&memcg->memsw, batch, &counter)) {
|
|
|
+ if (!page_counter_try_charge(&memcg->memory, batch, &counter))
|
|
|
goto done_restock;
|
|
|
if (do_swap_account)
|
|
|
- res_counter_uncharge(&memcg->memsw, size);
|
|
|
- mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
|
|
|
+ page_counter_uncharge(&memcg->memsw, batch);
|
|
|
+ mem_over_limit = mem_cgroup_from_counter(counter, memory);
|
|
|
} else {
|
|
|
- mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
|
|
|
+ mem_over_limit = mem_cgroup_from_counter(counter, memsw);
|
|
|
may_swap = false;
|
|
|
}
|
|
|
|
|
@@ -2611,32 +2608,12 @@ done:
|
|
|
|
|
|
static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
|
|
|
{
|
|
|
- unsigned long bytes = nr_pages * PAGE_SIZE;
|
|
|
-
|
|
|
if (mem_cgroup_is_root(memcg))
|
|
|
return;
|
|
|
|
|
|
- res_counter_uncharge(&memcg->res, bytes);
|
|
|
+ page_counter_uncharge(&memcg->memory, nr_pages);
|
|
|
if (do_swap_account)
|
|
|
- res_counter_uncharge(&memcg->memsw, bytes);
|
|
|
-}
|
|
|
-
|
|
|
-/*
|
|
|
- * Cancel chrages in this cgroup....doesn't propagate to parent cgroup.
|
|
|
- * This is useful when moving usage to parent cgroup.
|
|
|
- */
|
|
|
-static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
|
|
|
- unsigned int nr_pages)
|
|
|
-{
|
|
|
- unsigned long bytes = nr_pages * PAGE_SIZE;
|
|
|
-
|
|
|
- if (mem_cgroup_is_root(memcg))
|
|
|
- return;
|
|
|
-
|
|
|
- res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes);
|
|
|
- if (do_swap_account)
|
|
|
- res_counter_uncharge_until(&memcg->memsw,
|
|
|
- memcg->memsw.parent, bytes);
|
|
|
+ page_counter_uncharge(&memcg->memsw, nr_pages);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -2760,8 +2737,6 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
|
|
|
unlock_page_lru(page, isolated);
|
|
|
}
|
|
|
|
|
|
-static DEFINE_MUTEX(set_limit_mutex);
|
|
|
-
|
|
|
#ifdef CONFIG_MEMCG_KMEM
|
|
|
/*
|
|
|
* The memcg_slab_mutex is held whenever a per memcg kmem cache is created or
|
|
@@ -2804,16 +2779,17 @@ static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
-static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
|
|
|
+static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
|
|
|
+ unsigned long nr_pages)
|
|
|
{
|
|
|
- struct res_counter *fail_res;
|
|
|
+ struct page_counter *counter;
|
|
|
int ret = 0;
|
|
|
|
|
|
- ret = res_counter_charge(&memcg->kmem, size, &fail_res);
|
|
|
- if (ret)
|
|
|
+ ret = page_counter_try_charge(&memcg->kmem, nr_pages, &counter);
|
|
|
+ if (ret < 0)
|
|
|
return ret;
|
|
|
|
|
|
- ret = try_charge(memcg, gfp, size >> PAGE_SHIFT);
|
|
|
+ ret = try_charge(memcg, gfp, nr_pages);
|
|
|
if (ret == -EINTR) {
|
|
|
/*
|
|
|
* try_charge() chose to bypass to root due to OOM kill or
|
|
@@ -2830,25 +2806,25 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
|
|
|
* when the allocation triggers should have been already
|
|
|
* directed to the root cgroup in memcontrol.h
|
|
|
*/
|
|
|
- res_counter_charge_nofail(&memcg->res, size, &fail_res);
|
|
|
+ page_counter_charge(&memcg->memory, nr_pages);
|
|
|
if (do_swap_account)
|
|
|
- res_counter_charge_nofail(&memcg->memsw, size,
|
|
|
- &fail_res);
|
|
|
+ page_counter_charge(&memcg->memsw, nr_pages);
|
|
|
ret = 0;
|
|
|
} else if (ret)
|
|
|
- res_counter_uncharge(&memcg->kmem, size);
|
|
|
+ page_counter_uncharge(&memcg->kmem, nr_pages);
|
|
|
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
|
|
|
+static void memcg_uncharge_kmem(struct mem_cgroup *memcg,
|
|
|
+ unsigned long nr_pages)
|
|
|
{
|
|
|
- res_counter_uncharge(&memcg->res, size);
|
|
|
+ page_counter_uncharge(&memcg->memory, nr_pages);
|
|
|
if (do_swap_account)
|
|
|
- res_counter_uncharge(&memcg->memsw, size);
|
|
|
+ page_counter_uncharge(&memcg->memsw, nr_pages);
|
|
|
|
|
|
/* Not down to 0 */
|
|
|
- if (res_counter_uncharge(&memcg->kmem, size))
|
|
|
+ if (page_counter_uncharge(&memcg->kmem, nr_pages))
|
|
|
return;
|
|
|
|
|
|
/*
|
|
@@ -3124,19 +3100,21 @@ static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
|
|
|
|
|
|
int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order)
|
|
|
{
|
|
|
+ unsigned int nr_pages = 1 << order;
|
|
|
int res;
|
|
|
|
|
|
- res = memcg_charge_kmem(cachep->memcg_params->memcg, gfp,
|
|
|
- PAGE_SIZE << order);
|
|
|
+ res = memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages);
|
|
|
if (!res)
|
|
|
- atomic_add(1 << order, &cachep->memcg_params->nr_pages);
|
|
|
+ atomic_add(nr_pages, &cachep->memcg_params->nr_pages);
|
|
|
return res;
|
|
|
}
|
|
|
|
|
|
void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
|
|
|
{
|
|
|
- memcg_uncharge_kmem(cachep->memcg_params->memcg, PAGE_SIZE << order);
|
|
|
- atomic_sub(1 << order, &cachep->memcg_params->nr_pages);
|
|
|
+ unsigned int nr_pages = 1 << order;
|
|
|
+
|
|
|
+ memcg_uncharge_kmem(cachep->memcg_params->memcg, nr_pages);
|
|
|
+ atomic_sub(nr_pages, &cachep->memcg_params->nr_pages);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -3257,7 +3235,7 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
- ret = memcg_charge_kmem(memcg, gfp, PAGE_SIZE << order);
|
|
|
+ ret = memcg_charge_kmem(memcg, gfp, 1 << order);
|
|
|
if (!ret)
|
|
|
*_memcg = memcg;
|
|
|
|
|
@@ -3274,7 +3252,7 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
|
|
|
|
|
|
/* The page allocation failed. Revert */
|
|
|
if (!page) {
|
|
|
- memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
|
|
|
+ memcg_uncharge_kmem(memcg, 1 << order);
|
|
|
return;
|
|
|
}
|
|
|
/*
|
|
@@ -3307,7 +3285,7 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
|
|
|
return;
|
|
|
|
|
|
VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
|
|
|
- memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
|
|
|
+ memcg_uncharge_kmem(memcg, 1 << order);
|
|
|
}
|
|
|
#else
|
|
|
static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
|
|
@@ -3485,8 +3463,12 @@ static int mem_cgroup_move_parent(struct page *page,
|
|
|
|
|
|
ret = mem_cgroup_move_account(page, nr_pages,
|
|
|
pc, child, parent);
|
|
|
- if (!ret)
|
|
|
- __mem_cgroup_cancel_local_charge(child, nr_pages);
|
|
|
+ if (!ret) {
|
|
|
+ /* Take charge off the local counters */
|
|
|
+ page_counter_cancel(&child->memory, nr_pages);
|
|
|
+ if (do_swap_account)
|
|
|
+ page_counter_cancel(&child->memsw, nr_pages);
|
|
|
+ }
|
|
|
|
|
|
if (nr_pages > 1)
|
|
|
compound_unlock_irqrestore(page, flags);
|
|
@@ -3516,7 +3498,7 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
|
|
|
*
|
|
|
* Returns 0 on success, -EINVAL on failure.
|
|
|
*
|
|
|
- * The caller must have charged to @to, IOW, called res_counter_charge() about
|
|
|
+ * The caller must have charged to @to, IOW, called page_counter_charge() about
|
|
|
* both res and memsw, and called css_get().
|
|
|
*/
|
|
|
static int mem_cgroup_move_swap_account(swp_entry_t entry,
|
|
@@ -3532,7 +3514,7 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
|
|
|
mem_cgroup_swap_statistics(to, true);
|
|
|
/*
|
|
|
* This function is only called from task migration context now.
|
|
|
- * It postpones res_counter and refcount handling till the end
|
|
|
+ * It postpones page_counter and refcount handling till the end
|
|
|
* of task migration(mem_cgroup_clear_mc()) for performance
|
|
|
* improvement. But we cannot postpone css_get(to) because if
|
|
|
* the process that has been moved to @to does swap-in, the
|
|
@@ -3590,60 +3572,57 @@ void mem_cgroup_print_bad_page(struct page *page)
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
+static DEFINE_MUTEX(memcg_limit_mutex);
|
|
|
+
|
|
|
static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
|
|
|
- unsigned long long val)
|
|
|
+ unsigned long limit)
|
|
|
{
|
|
|
+ unsigned long curusage;
|
|
|
+ unsigned long oldusage;
|
|
|
+ bool enlarge = false;
|
|
|
int retry_count;
|
|
|
- int ret = 0;
|
|
|
- int children = mem_cgroup_count_children(memcg);
|
|
|
- u64 curusage, oldusage;
|
|
|
- int enlarge;
|
|
|
+ int ret;
|
|
|
|
|
|
/*
|
|
|
* For keeping hierarchical_reclaim simple, how long we should retry
|
|
|
* is depends on callers. We set our retry-count to be function
|
|
|
* of # of children which we should visit in this loop.
|
|
|
*/
|
|
|
- retry_count = MEM_CGROUP_RECLAIM_RETRIES * children;
|
|
|
+ retry_count = MEM_CGROUP_RECLAIM_RETRIES *
|
|
|
+ mem_cgroup_count_children(memcg);
|
|
|
|
|
|
- oldusage = res_counter_read_u64(&memcg->res, RES_USAGE);
|
|
|
+ oldusage = page_counter_read(&memcg->memory);
|
|
|
|
|
|
- enlarge = 0;
|
|
|
- while (retry_count) {
|
|
|
+ do {
|
|
|
if (signal_pending(current)) {
|
|
|
ret = -EINTR;
|
|
|
break;
|
|
|
}
|
|
|
- /*
|
|
|
- * Rather than hide all in some function, I do this in
|
|
|
- * open coded manner. You see what this really does.
|
|
|
- * We have to guarantee memcg->res.limit <= memcg->memsw.limit.
|
|
|
- */
|
|
|
- mutex_lock(&set_limit_mutex);
|
|
|
- if (res_counter_read_u64(&memcg->memsw, RES_LIMIT) < val) {
|
|
|
+
|
|
|
+ mutex_lock(&memcg_limit_mutex);
|
|
|
+ if (limit > memcg->memsw.limit) {
|
|
|
+ mutex_unlock(&memcg_limit_mutex);
|
|
|
ret = -EINVAL;
|
|
|
- mutex_unlock(&set_limit_mutex);
|
|
|
break;
|
|
|
}
|
|
|
-
|
|
|
- if (res_counter_read_u64(&memcg->res, RES_LIMIT) < val)
|
|
|
- enlarge = 1;
|
|
|
-
|
|
|
- ret = res_counter_set_limit(&memcg->res, val);
|
|
|
- mutex_unlock(&set_limit_mutex);
|
|
|
+ if (limit > memcg->memory.limit)
|
|
|
+ enlarge = true;
|
|
|
+ ret = page_counter_limit(&memcg->memory, limit);
|
|
|
+ mutex_unlock(&memcg_limit_mutex);
|
|
|
|
|
|
if (!ret)
|
|
|
break;
|
|
|
|
|
|
try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, true);
|
|
|
|
|
|
- curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
|
|
|
+ curusage = page_counter_read(&memcg->memory);
|
|
|
/* Usage is reduced ? */
|
|
|
if (curusage >= oldusage)
|
|
|
retry_count--;
|
|
|
else
|
|
|
oldusage = curusage;
|
|
|
- }
|
|
|
+ } while (retry_count);
|
|
|
+
|
|
|
if (!ret && enlarge)
|
|
|
memcg_oom_recover(memcg);
|
|
|
|
|
@@ -3651,52 +3630,53 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
|
|
|
}
|
|
|
|
|
|
static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
|
|
|
- unsigned long long val)
|
|
|
+ unsigned long limit)
|
|
|
{
|
|
|
+ unsigned long curusage;
|
|
|
+ unsigned long oldusage;
|
|
|
+ bool enlarge = false;
|
|
|
int retry_count;
|
|
|
- u64 oldusage, curusage;
|
|
|
- int children = mem_cgroup_count_children(memcg);
|
|
|
- int ret = -EBUSY;
|
|
|
- int enlarge = 0;
|
|
|
+ int ret;
|
|
|
|
|
|
/* see mem_cgroup_resize_res_limit */
|
|
|
- retry_count = children * MEM_CGROUP_RECLAIM_RETRIES;
|
|
|
- oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
|
|
|
- while (retry_count) {
|
|
|
+ retry_count = MEM_CGROUP_RECLAIM_RETRIES *
|
|
|
+ mem_cgroup_count_children(memcg);
|
|
|
+
|
|
|
+ oldusage = page_counter_read(&memcg->memsw);
|
|
|
+
|
|
|
+ do {
|
|
|
if (signal_pending(current)) {
|
|
|
ret = -EINTR;
|
|
|
break;
|
|
|
}
|
|
|
- /*
|
|
|
- * Rather than hide all in some function, I do this in
|
|
|
- * open coded manner. You see what this really does.
|
|
|
- * We have to guarantee memcg->res.limit <= memcg->memsw.limit.
|
|
|
- */
|
|
|
- mutex_lock(&set_limit_mutex);
|
|
|
- if (res_counter_read_u64(&memcg->res, RES_LIMIT) > val) {
|
|
|
+
|
|
|
+ mutex_lock(&memcg_limit_mutex);
|
|
|
+ if (limit < memcg->memory.limit) {
|
|
|
+ mutex_unlock(&memcg_limit_mutex);
|
|
|
ret = -EINVAL;
|
|
|
- mutex_unlock(&set_limit_mutex);
|
|
|
break;
|
|
|
}
|
|
|
- if (res_counter_read_u64(&memcg->memsw, RES_LIMIT) < val)
|
|
|
- enlarge = 1;
|
|
|
- ret = res_counter_set_limit(&memcg->memsw, val);
|
|
|
- mutex_unlock(&set_limit_mutex);
|
|
|
+ if (limit > memcg->memsw.limit)
|
|
|
+ enlarge = true;
|
|
|
+ ret = page_counter_limit(&memcg->memsw, limit);
|
|
|
+ mutex_unlock(&memcg_limit_mutex);
|
|
|
|
|
|
if (!ret)
|
|
|
break;
|
|
|
|
|
|
try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, false);
|
|
|
|
|
|
- curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
|
|
|
+ curusage = page_counter_read(&memcg->memsw);
|
|
|
/* Usage is reduced ? */
|
|
|
if (curusage >= oldusage)
|
|
|
retry_count--;
|
|
|
else
|
|
|
oldusage = curusage;
|
|
|
- }
|
|
|
+ } while (retry_count);
|
|
|
+
|
|
|
if (!ret && enlarge)
|
|
|
memcg_oom_recover(memcg);
|
|
|
+
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -3709,7 +3689,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
|
|
unsigned long reclaimed;
|
|
|
int loop = 0;
|
|
|
struct mem_cgroup_tree_per_zone *mctz;
|
|
|
- unsigned long long excess;
|
|
|
+ unsigned long excess;
|
|
|
unsigned long nr_scanned;
|
|
|
|
|
|
if (order > 0)
|
|
@@ -3763,7 +3743,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
|
|
} while (1);
|
|
|
}
|
|
|
__mem_cgroup_remove_exceeded(mz, mctz);
|
|
|
- excess = res_counter_soft_limit_excess(&mz->memcg->res);
|
|
|
+ excess = soft_limit_excess(mz->memcg);
|
|
|
/*
|
|
|
* One school of thought says that we should not add
|
|
|
* back the node to the tree if reclaim returns 0.
|
|
@@ -3856,7 +3836,6 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
|
|
|
static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg)
|
|
|
{
|
|
|
int node, zid;
|
|
|
- u64 usage;
|
|
|
|
|
|
do {
|
|
|
/* This is for making all *used* pages to be on LRU. */
|
|
@@ -3888,9 +3867,8 @@ static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg)
|
|
|
* right after the check. RES_USAGE should be safe as we always
|
|
|
* charge before adding to the LRU.
|
|
|
*/
|
|
|
- usage = res_counter_read_u64(&memcg->res, RES_USAGE) -
|
|
|
- res_counter_read_u64(&memcg->kmem, RES_USAGE);
|
|
|
- } while (usage > 0);
|
|
|
+ } while (page_counter_read(&memcg->memory) -
|
|
|
+ page_counter_read(&memcg->kmem) > 0);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -3930,7 +3908,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
|
|
|
/* we call try-to-free pages for make this cgroup empty */
|
|
|
lru_add_drain_all();
|
|
|
/* try to free all pages in this cgroup */
|
|
|
- while (nr_retries && res_counter_read_u64(&memcg->res, RES_USAGE) > 0) {
|
|
|
+ while (nr_retries && page_counter_read(&memcg->memory)) {
|
|
|
int progress;
|
|
|
|
|
|
if (signal_pending(current))
|
|
@@ -4001,8 +3979,8 @@ out:
|
|
|
return retval;
|
|
|
}
|
|
|
|
|
|
-static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
|
|
|
- enum mem_cgroup_stat_index idx)
|
|
|
+static unsigned long tree_stat(struct mem_cgroup *memcg,
|
|
|
+ enum mem_cgroup_stat_index idx)
|
|
|
{
|
|
|
struct mem_cgroup *iter;
|
|
|
long val = 0;
|
|
@@ -4020,55 +3998,72 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
|
|
|
{
|
|
|
u64 val;
|
|
|
|
|
|
- if (!mem_cgroup_is_root(memcg)) {
|
|
|
+ if (mem_cgroup_is_root(memcg)) {
|
|
|
+ val = tree_stat(memcg, MEM_CGROUP_STAT_CACHE);
|
|
|
+ val += tree_stat(memcg, MEM_CGROUP_STAT_RSS);
|
|
|
+ if (swap)
|
|
|
+ val += tree_stat(memcg, MEM_CGROUP_STAT_SWAP);
|
|
|
+ } else {
|
|
|
if (!swap)
|
|
|
- return res_counter_read_u64(&memcg->res, RES_USAGE);
|
|
|
+ val = page_counter_read(&memcg->memory);
|
|
|
else
|
|
|
- return res_counter_read_u64(&memcg->memsw, RES_USAGE);
|
|
|
+ val = page_counter_read(&memcg->memsw);
|
|
|
}
|
|
|
-
|
|
|
- /*
|
|
|
- * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
|
|
|
- * as well as in MEM_CGROUP_STAT_RSS_HUGE.
|
|
|
- */
|
|
|
- val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
|
|
|
- val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
|
|
|
-
|
|
|
- if (swap)
|
|
|
- val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
|
|
|
-
|
|
|
return val << PAGE_SHIFT;
|
|
|
}
|
|
|
|
|
|
+enum {
|
|
|
+ RES_USAGE,
|
|
|
+ RES_LIMIT,
|
|
|
+ RES_MAX_USAGE,
|
|
|
+ RES_FAILCNT,
|
|
|
+ RES_SOFT_LIMIT,
|
|
|
+};
|
|
|
|
|
|
static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
|
|
|
struct cftype *cft)
|
|
|
{
|
|
|
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
|
|
- enum res_type type = MEMFILE_TYPE(cft->private);
|
|
|
- int name = MEMFILE_ATTR(cft->private);
|
|
|
+ struct page_counter *counter;
|
|
|
|
|
|
- switch (type) {
|
|
|
+ switch (MEMFILE_TYPE(cft->private)) {
|
|
|
case _MEM:
|
|
|
- if (name == RES_USAGE)
|
|
|
- return mem_cgroup_usage(memcg, false);
|
|
|
- return res_counter_read_u64(&memcg->res, name);
|
|
|
+ counter = &memcg->memory;
|
|
|
+ break;
|
|
|
case _MEMSWAP:
|
|
|
- if (name == RES_USAGE)
|
|
|
- return mem_cgroup_usage(memcg, true);
|
|
|
- return res_counter_read_u64(&memcg->memsw, name);
|
|
|
+ counter = &memcg->memsw;
|
|
|
+ break;
|
|
|
case _KMEM:
|
|
|
- return res_counter_read_u64(&memcg->kmem, name);
|
|
|
+ counter = &memcg->kmem;
|
|
|
break;
|
|
|
default:
|
|
|
BUG();
|
|
|
}
|
|
|
+
|
|
|
+ switch (MEMFILE_ATTR(cft->private)) {
|
|
|
+ case RES_USAGE:
|
|
|
+ if (counter == &memcg->memory)
|
|
|
+ return mem_cgroup_usage(memcg, false);
|
|
|
+ if (counter == &memcg->memsw)
|
|
|
+ return mem_cgroup_usage(memcg, true);
|
|
|
+ return (u64)page_counter_read(counter) * PAGE_SIZE;
|
|
|
+ case RES_LIMIT:
|
|
|
+ return (u64)counter->limit * PAGE_SIZE;
|
|
|
+ case RES_MAX_USAGE:
|
|
|
+ return (u64)counter->watermark * PAGE_SIZE;
|
|
|
+ case RES_FAILCNT:
|
|
|
+ return counter->failcnt;
|
|
|
+ case RES_SOFT_LIMIT:
|
|
|
+ return (u64)memcg->soft_limit * PAGE_SIZE;
|
|
|
+ default:
|
|
|
+ BUG();
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
#ifdef CONFIG_MEMCG_KMEM
|
|
|
/* should be called with activate_kmem_mutex held */
|
|
|
static int __memcg_activate_kmem(struct mem_cgroup *memcg,
|
|
|
- unsigned long long limit)
|
|
|
+ unsigned long nr_pages)
|
|
|
{
|
|
|
int err = 0;
|
|
|
int memcg_id;
|
|
@@ -4115,7 +4110,7 @@ static int __memcg_activate_kmem(struct mem_cgroup *memcg,
|
|
|
* We couldn't have accounted to this cgroup, because it hasn't got the
|
|
|
* active bit set yet, so this should succeed.
|
|
|
*/
|
|
|
- err = res_counter_set_limit(&memcg->kmem, limit);
|
|
|
+ err = page_counter_limit(&memcg->kmem, nr_pages);
|
|
|
VM_BUG_ON(err);
|
|
|
|
|
|
static_key_slow_inc(&memcg_kmem_enabled_key);
|
|
@@ -4131,25 +4126,27 @@ out:
|
|
|
}
|
|
|
|
|
|
static int memcg_activate_kmem(struct mem_cgroup *memcg,
|
|
|
- unsigned long long limit)
|
|
|
+ unsigned long nr_pages)
|
|
|
{
|
|
|
int ret;
|
|
|
|
|
|
mutex_lock(&activate_kmem_mutex);
|
|
|
- ret = __memcg_activate_kmem(memcg, limit);
|
|
|
+ ret = __memcg_activate_kmem(memcg, nr_pages);
|
|
|
mutex_unlock(&activate_kmem_mutex);
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
|
|
|
- unsigned long long val)
|
|
|
+ unsigned long limit)
|
|
|
{
|
|
|
int ret;
|
|
|
|
|
|
+ mutex_lock(&memcg_limit_mutex);
|
|
|
if (!memcg_kmem_is_active(memcg))
|
|
|
- ret = memcg_activate_kmem(memcg, val);
|
|
|
+ ret = memcg_activate_kmem(memcg, limit);
|
|
|
else
|
|
|
- ret = res_counter_set_limit(&memcg->kmem, val);
|
|
|
+ ret = page_counter_limit(&memcg->kmem, limit);
|
|
|
+ mutex_unlock(&memcg_limit_mutex);
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -4167,13 +4164,13 @@ static int memcg_propagate_kmem(struct mem_cgroup *memcg)
|
|
|
* after this point, because it has at least one child already.
|
|
|
*/
|
|
|
if (memcg_kmem_is_active(parent))
|
|
|
- ret = __memcg_activate_kmem(memcg, RES_COUNTER_MAX);
|
|
|
+ ret = __memcg_activate_kmem(memcg, PAGE_COUNTER_MAX);
|
|
|
mutex_unlock(&activate_kmem_mutex);
|
|
|
return ret;
|
|
|
}
|
|
|
#else
|
|
|
static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
|
|
|
- unsigned long long val)
|
|
|
+ unsigned long limit)
|
|
|
{
|
|
|
return -EINVAL;
|
|
|
}
|
|
@@ -4187,110 +4184,69 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
|
|
|
char *buf, size_t nbytes, loff_t off)
|
|
|
{
|
|
|
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
|
|
|
- enum res_type type;
|
|
|
- int name;
|
|
|
- unsigned long long val;
|
|
|
+ unsigned long nr_pages;
|
|
|
int ret;
|
|
|
|
|
|
buf = strstrip(buf);
|
|
|
- type = MEMFILE_TYPE(of_cft(of)->private);
|
|
|
- name = MEMFILE_ATTR(of_cft(of)->private);
|
|
|
+ ret = page_counter_memparse(buf, &nr_pages);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
|
|
|
- switch (name) {
|
|
|
+ switch (MEMFILE_ATTR(of_cft(of)->private)) {
|
|
|
case RES_LIMIT:
|
|
|
if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */
|
|
|
ret = -EINVAL;
|
|
|
break;
|
|
|
}
|
|
|
- /* This function does all necessary parse...reuse it */
|
|
|
- ret = res_counter_memparse_write_strategy(buf, &val);
|
|
|
- if (ret)
|
|
|
+ switch (MEMFILE_TYPE(of_cft(of)->private)) {
|
|
|
+ case _MEM:
|
|
|
+ ret = mem_cgroup_resize_limit(memcg, nr_pages);
|
|
|
break;
|
|
|
- if (type == _MEM)
|
|
|
- ret = mem_cgroup_resize_limit(memcg, val);
|
|
|
- else if (type == _MEMSWAP)
|
|
|
- ret = mem_cgroup_resize_memsw_limit(memcg, val);
|
|
|
- else if (type == _KMEM)
|
|
|
- ret = memcg_update_kmem_limit(memcg, val);
|
|
|
- else
|
|
|
- return -EINVAL;
|
|
|
- break;
|
|
|
- case RES_SOFT_LIMIT:
|
|
|
- ret = res_counter_memparse_write_strategy(buf, &val);
|
|
|
- if (ret)
|
|
|
+ case _MEMSWAP:
|
|
|
+ ret = mem_cgroup_resize_memsw_limit(memcg, nr_pages);
|
|
|
break;
|
|
|
- /*
|
|
|
- * For memsw, soft limits are hard to implement in terms
|
|
|
- * of semantics, for now, we support soft limits for
|
|
|
- * control without swap
|
|
|
- */
|
|
|
- if (type == _MEM)
|
|
|
- ret = res_counter_set_soft_limit(&memcg->res, val);
|
|
|
- else
|
|
|
- ret = -EINVAL;
|
|
|
+ case _KMEM:
|
|
|
+ ret = memcg_update_kmem_limit(memcg, nr_pages);
|
|
|
+ break;
|
|
|
+ }
|
|
|
break;
|
|
|
- default:
|
|
|
- ret = -EINVAL; /* should be BUG() ? */
|
|
|
+ case RES_SOFT_LIMIT:
|
|
|
+ memcg->soft_limit = nr_pages;
|
|
|
+ ret = 0;
|
|
|
break;
|
|
|
}
|
|
|
return ret ?: nbytes;
|
|
|
}
|
|
|
|
|
|
-static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
|
|
|
- unsigned long long *mem_limit, unsigned long long *memsw_limit)
|
|
|
-{
|
|
|
- unsigned long long min_limit, min_memsw_limit, tmp;
|
|
|
-
|
|
|
- min_limit = res_counter_read_u64(&memcg->res, RES_LIMIT);
|
|
|
- min_memsw_limit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
|
|
|
- if (!memcg->use_hierarchy)
|
|
|
- goto out;
|
|
|
-
|
|
|
- while (memcg->css.parent) {
|
|
|
- memcg = mem_cgroup_from_css(memcg->css.parent);
|
|
|
- if (!memcg->use_hierarchy)
|
|
|
- break;
|
|
|
- tmp = res_counter_read_u64(&memcg->res, RES_LIMIT);
|
|
|
- min_limit = min(min_limit, tmp);
|
|
|
- tmp = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
|
|
|
- min_memsw_limit = min(min_memsw_limit, tmp);
|
|
|
- }
|
|
|
-out:
|
|
|
- *mem_limit = min_limit;
|
|
|
- *memsw_limit = min_memsw_limit;
|
|
|
-}
|
|
|
-
|
|
|
static ssize_t mem_cgroup_reset(struct kernfs_open_file *of, char *buf,
|
|
|
size_t nbytes, loff_t off)
|
|
|
{
|
|
|
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
|
|
|
- int name;
|
|
|
- enum res_type type;
|
|
|
+ struct page_counter *counter;
|
|
|
|
|
|
- type = MEMFILE_TYPE(of_cft(of)->private);
|
|
|
- name = MEMFILE_ATTR(of_cft(of)->private);
|
|
|
+ switch (MEMFILE_TYPE(of_cft(of)->private)) {
|
|
|
+ case _MEM:
|
|
|
+ counter = &memcg->memory;
|
|
|
+ break;
|
|
|
+ case _MEMSWAP:
|
|
|
+ counter = &memcg->memsw;
|
|
|
+ break;
|
|
|
+ case _KMEM:
|
|
|
+ counter = &memcg->kmem;
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ BUG();
|
|
|
+ }
|
|
|
|
|
|
- switch (name) {
|
|
|
+ switch (MEMFILE_ATTR(of_cft(of)->private)) {
|
|
|
case RES_MAX_USAGE:
|
|
|
- if (type == _MEM)
|
|
|
- res_counter_reset_max(&memcg->res);
|
|
|
- else if (type == _MEMSWAP)
|
|
|
- res_counter_reset_max(&memcg->memsw);
|
|
|
- else if (type == _KMEM)
|
|
|
- res_counter_reset_max(&memcg->kmem);
|
|
|
- else
|
|
|
- return -EINVAL;
|
|
|
+ page_counter_reset_watermark(counter);
|
|
|
break;
|
|
|
case RES_FAILCNT:
|
|
|
- if (type == _MEM)
|
|
|
- res_counter_reset_failcnt(&memcg->res);
|
|
|
- else if (type == _MEMSWAP)
|
|
|
- res_counter_reset_failcnt(&memcg->memsw);
|
|
|
- else if (type == _KMEM)
|
|
|
- res_counter_reset_failcnt(&memcg->kmem);
|
|
|
- else
|
|
|
- return -EINVAL;
|
|
|
+ counter->failcnt = 0;
|
|
|
break;
|
|
|
+ default:
|
|
|
+ BUG();
|
|
|
}
|
|
|
|
|
|
return nbytes;
|
|
@@ -4387,6 +4343,7 @@ static inline void mem_cgroup_lru_names_not_uptodate(void)
|
|
|
static int memcg_stat_show(struct seq_file *m, void *v)
|
|
|
{
|
|
|
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
|
|
|
+ unsigned long memory, memsw;
|
|
|
struct mem_cgroup *mi;
|
|
|
unsigned int i;
|
|
|
|
|
@@ -4406,14 +4363,16 @@ static int memcg_stat_show(struct seq_file *m, void *v)
|
|
|
mem_cgroup_nr_lru_pages(memcg, BIT(i)) * PAGE_SIZE);
|
|
|
|
|
|
/* Hierarchical information */
|
|
|
- {
|
|
|
- unsigned long long limit, memsw_limit;
|
|
|
- memcg_get_hierarchical_limit(memcg, &limit, &memsw_limit);
|
|
|
- seq_printf(m, "hierarchical_memory_limit %llu\n", limit);
|
|
|
- if (do_swap_account)
|
|
|
- seq_printf(m, "hierarchical_memsw_limit %llu\n",
|
|
|
- memsw_limit);
|
|
|
+ memory = memsw = PAGE_COUNTER_MAX;
|
|
|
+ for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) {
|
|
|
+ memory = min(memory, mi->memory.limit);
|
|
|
+ memsw = min(memsw, mi->memsw.limit);
|
|
|
}
|
|
|
+ seq_printf(m, "hierarchical_memory_limit %llu\n",
|
|
|
+ (u64)memory * PAGE_SIZE);
|
|
|
+ if (do_swap_account)
|
|
|
+ seq_printf(m, "hierarchical_memsw_limit %llu\n",
|
|
|
+ (u64)memsw * PAGE_SIZE);
|
|
|
|
|
|
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
|
|
|
long long val = 0;
|
|
@@ -4497,7 +4456,7 @@ static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css,
|
|
|
static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
|
|
|
{
|
|
|
struct mem_cgroup_threshold_ary *t;
|
|
|
- u64 usage;
|
|
|
+ unsigned long usage;
|
|
|
int i;
|
|
|
|
|
|
rcu_read_lock();
|
|
@@ -4596,10 +4555,11 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
|
|
|
{
|
|
|
struct mem_cgroup_thresholds *thresholds;
|
|
|
struct mem_cgroup_threshold_ary *new;
|
|
|
- u64 threshold, usage;
|
|
|
+ unsigned long threshold;
|
|
|
+ unsigned long usage;
|
|
|
int i, size, ret;
|
|
|
|
|
|
- ret = res_counter_memparse_write_strategy(args, &threshold);
|
|
|
+ ret = page_counter_memparse(args, &threshold);
|
|
|
if (ret)
|
|
|
return ret;
|
|
|
|
|
@@ -4689,7 +4649,7 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
|
|
|
{
|
|
|
struct mem_cgroup_thresholds *thresholds;
|
|
|
struct mem_cgroup_threshold_ary *new;
|
|
|
- u64 usage;
|
|
|
+ unsigned long usage;
|
|
|
int i, j, size;
|
|
|
|
|
|
mutex_lock(&memcg->thresholds_lock);
|
|
@@ -4883,7 +4843,7 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg)
|
|
|
|
|
|
memcg_kmem_mark_dead(memcg);
|
|
|
|
|
|
- if (res_counter_read_u64(&memcg->kmem, RES_USAGE) != 0)
|
|
|
+ if (page_counter_read(&memcg->kmem))
|
|
|
return;
|
|
|
|
|
|
if (memcg_kmem_test_and_clear_dead(memcg))
|
|
@@ -5363,9 +5323,9 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
|
|
|
*/
|
|
|
struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
|
|
|
{
|
|
|
- if (!memcg->res.parent)
|
|
|
+ if (!memcg->memory.parent)
|
|
|
return NULL;
|
|
|
- return mem_cgroup_from_res_counter(memcg->res.parent, res);
|
|
|
+ return mem_cgroup_from_counter(memcg->memory.parent, memory);
|
|
|
}
|
|
|
EXPORT_SYMBOL(parent_mem_cgroup);
|
|
|
|
|
@@ -5410,9 +5370,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
|
|
/* root ? */
|
|
|
if (parent_css == NULL) {
|
|
|
root_mem_cgroup = memcg;
|
|
|
- res_counter_init(&memcg->res, NULL);
|
|
|
- res_counter_init(&memcg->memsw, NULL);
|
|
|
- res_counter_init(&memcg->kmem, NULL);
|
|
|
+ page_counter_init(&memcg->memory, NULL);
|
|
|
+ page_counter_init(&memcg->memsw, NULL);
|
|
|
+ page_counter_init(&memcg->kmem, NULL);
|
|
|
}
|
|
|
|
|
|
memcg->last_scanned_node = MAX_NUMNODES;
|
|
@@ -5451,18 +5411,18 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
|
|
|
memcg->swappiness = mem_cgroup_swappiness(parent);
|
|
|
|
|
|
if (parent->use_hierarchy) {
|
|
|
- res_counter_init(&memcg->res, &parent->res);
|
|
|
- res_counter_init(&memcg->memsw, &parent->memsw);
|
|
|
- res_counter_init(&memcg->kmem, &parent->kmem);
|
|
|
+ page_counter_init(&memcg->memory, &parent->memory);
|
|
|
+ page_counter_init(&memcg->memsw, &parent->memsw);
|
|
|
+ page_counter_init(&memcg->kmem, &parent->kmem);
|
|
|
|
|
|
/*
|
|
|
* No need to take a reference to the parent because cgroup
|
|
|
* core guarantees its existence.
|
|
|
*/
|
|
|
} else {
|
|
|
- res_counter_init(&memcg->res, NULL);
|
|
|
- res_counter_init(&memcg->memsw, NULL);
|
|
|
- res_counter_init(&memcg->kmem, NULL);
|
|
|
+ page_counter_init(&memcg->memory, NULL);
|
|
|
+ page_counter_init(&memcg->memsw, NULL);
|
|
|
+ page_counter_init(&memcg->kmem, NULL);
|
|
|
/*
|
|
|
* Deeper hierachy with use_hierarchy == false doesn't make
|
|
|
* much sense so let cgroup subsystem know about this
|
|
@@ -5544,7 +5504,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
|
|
|
/*
|
|
|
* XXX: css_offline() would be where we should reparent all
|
|
|
* memory to prepare the cgroup for destruction. However,
|
|
|
- * memcg does not do css_tryget_online() and res_counter charging
|
|
|
+ * memcg does not do css_tryget_online() and page_counter charging
|
|
|
* under the same RCU lock region, which means that charging
|
|
|
* could race with offlining. Offlining only happens to
|
|
|
* cgroups with no tasks in them but charges can show up
|
|
@@ -5564,7 +5524,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
|
|
|
* call_rcu()
|
|
|
* offline_css()
|
|
|
* reparent_charges()
|
|
|
- * res_counter_charge()
|
|
|
+ * page_counter_try_charge()
|
|
|
* css_put()
|
|
|
* css_free()
|
|
|
* pc->mem_cgroup = dead memcg
|
|
@@ -5599,10 +5559,10 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
|
|
|
{
|
|
|
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
|
|
|
|
|
- mem_cgroup_resize_limit(memcg, ULLONG_MAX);
|
|
|
- mem_cgroup_resize_memsw_limit(memcg, ULLONG_MAX);
|
|
|
- memcg_update_kmem_limit(memcg, ULLONG_MAX);
|
|
|
- res_counter_set_soft_limit(&memcg->res, ULLONG_MAX);
|
|
|
+ mem_cgroup_resize_limit(memcg, PAGE_COUNTER_MAX);
|
|
|
+ mem_cgroup_resize_memsw_limit(memcg, PAGE_COUNTER_MAX);
|
|
|
+ memcg_update_kmem_limit(memcg, PAGE_COUNTER_MAX);
|
|
|
+ memcg->soft_limit = 0;
|
|
|
}
|
|
|
|
|
|
#ifdef CONFIG_MMU
|
|
@@ -5916,19 +5876,18 @@ static void __mem_cgroup_clear_mc(void)
|
|
|
if (mc.moved_swap) {
|
|
|
/* uncharge swap account from the old cgroup */
|
|
|
if (!mem_cgroup_is_root(mc.from))
|
|
|
- res_counter_uncharge(&mc.from->memsw,
|
|
|
- PAGE_SIZE * mc.moved_swap);
|
|
|
-
|
|
|
- for (i = 0; i < mc.moved_swap; i++)
|
|
|
- css_put(&mc.from->css);
|
|
|
+ page_counter_uncharge(&mc.from->memsw, mc.moved_swap);
|
|
|
|
|
|
/*
|
|
|
- * we charged both to->res and to->memsw, so we should
|
|
|
- * uncharge to->res.
|
|
|
+ * we charged both to->memory and to->memsw, so we
|
|
|
+ * should uncharge to->memory.
|
|
|
*/
|
|
|
if (!mem_cgroup_is_root(mc.to))
|
|
|
- res_counter_uncharge(&mc.to->res,
|
|
|
- PAGE_SIZE * mc.moved_swap);
|
|
|
+ page_counter_uncharge(&mc.to->memory, mc.moved_swap);
|
|
|
+
|
|
|
+ for (i = 0; i < mc.moved_swap; i++)
|
|
|
+ css_put(&mc.from->css);
|
|
|
+
|
|
|
/* we've already done css_get(mc.to) */
|
|
|
mc.moved_swap = 0;
|
|
|
}
|
|
@@ -6294,7 +6253,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
|
|
|
memcg = mem_cgroup_lookup(id);
|
|
|
if (memcg) {
|
|
|
if (!mem_cgroup_is_root(memcg))
|
|
|
- res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
|
|
|
+ page_counter_uncharge(&memcg->memsw, 1);
|
|
|
mem_cgroup_swap_statistics(memcg, false);
|
|
|
css_put(&memcg->css);
|
|
|
}
|
|
@@ -6460,11 +6419,9 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
|
|
|
|
|
|
if (!mem_cgroup_is_root(memcg)) {
|
|
|
if (nr_mem)
|
|
|
- res_counter_uncharge(&memcg->res,
|
|
|
- nr_mem * PAGE_SIZE);
|
|
|
+ page_counter_uncharge(&memcg->memory, nr_mem);
|
|
|
if (nr_memsw)
|
|
|
- res_counter_uncharge(&memcg->memsw,
|
|
|
- nr_memsw * PAGE_SIZE);
|
|
|
+ page_counter_uncharge(&memcg->memsw, nr_memsw);
|
|
|
memcg_oom_recover(memcg);
|
|
|
}
|
|
|
|