|
@@ -4275,6 +4275,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
|
|
|
}
|
|
|
spin_unlock(&memcg->event_list_lock);
|
|
|
|
|
|
+ page_counter_set_min(&memcg->memory, 0);
|
|
|
page_counter_set_low(&memcg->memory, 0);
|
|
|
|
|
|
memcg_offline_kmem(memcg);
|
|
@@ -4329,6 +4330,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
|
|
|
page_counter_set_max(&memcg->memsw, PAGE_COUNTER_MAX);
|
|
|
page_counter_set_max(&memcg->kmem, PAGE_COUNTER_MAX);
|
|
|
page_counter_set_max(&memcg->tcpmem, PAGE_COUNTER_MAX);
|
|
|
+ page_counter_set_min(&memcg->memory, 0);
|
|
|
page_counter_set_low(&memcg->memory, 0);
|
|
|
memcg->high = PAGE_COUNTER_MAX;
|
|
|
memcg->soft_limit = PAGE_COUNTER_MAX;
|
|
@@ -5066,6 +5068,36 @@ static u64 memory_current_read(struct cgroup_subsys_state *css,
|
|
|
return (u64)page_counter_read(&memcg->memory) * PAGE_SIZE;
|
|
|
}
|
|
|
|
|
|
+static int memory_min_show(struct seq_file *m, void *v)
|
|
|
+{
|
|
|
+ struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
|
|
|
+ unsigned long min = READ_ONCE(memcg->memory.min);
|
|
|
+
|
|
|
+ if (min == PAGE_COUNTER_MAX)
|
|
|
+ seq_puts(m, "max\n");
|
|
|
+ else
|
|
|
+ seq_printf(m, "%llu\n", (u64)min * PAGE_SIZE);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static ssize_t memory_min_write(struct kernfs_open_file *of,
|
|
|
+ char *buf, size_t nbytes, loff_t off)
|
|
|
+{
|
|
|
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
|
|
|
+ unsigned long min;
|
|
|
+ int err;
|
|
|
+
|
|
|
+ buf = strstrip(buf);
|
|
|
+ err = page_counter_memparse(buf, "max", &min);
|
|
|
+ if (err)
|
|
|
+ return err;
|
|
|
+
|
|
|
+ page_counter_set_min(&memcg->memory, min);
|
|
|
+
|
|
|
+ return nbytes;
|
|
|
+}
|
|
|
+
|
|
|
static int memory_low_show(struct seq_file *m, void *v)
|
|
|
{
|
|
|
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
|
|
@@ -5300,6 +5332,12 @@ static struct cftype memory_files[] = {
|
|
|
.flags = CFTYPE_NOT_ON_ROOT,
|
|
|
.read_u64 = memory_current_read,
|
|
|
},
|
|
|
+ {
|
|
|
+ .name = "min",
|
|
|
+ .flags = CFTYPE_NOT_ON_ROOT,
|
|
|
+ .seq_show = memory_min_show,
|
|
|
+ .write = memory_min_write,
|
|
|
+ },
|
|
|
{
|
|
|
.name = "low",
|
|
|
.flags = CFTYPE_NOT_ON_ROOT,
|
|
@@ -5349,19 +5387,24 @@ struct cgroup_subsys memory_cgrp_subsys = {
|
|
|
};
|
|
|
|
|
|
/**
|
|
|
- * mem_cgroup_low - check if memory consumption is in the normal range
|
|
|
+ * mem_cgroup_protected - check if memory consumption is in the normal range
|
|
|
* @root: the top ancestor of the sub-tree being checked
|
|
|
* @memcg: the memory cgroup to check
|
|
|
*
|
|
|
* WARNING: This function is not stateless! It can only be used as part
|
|
|
* of a top-down tree iteration, not for isolated queries.
|
|
|
*
|
|
|
- * Returns %true if memory consumption of @memcg is in the normal range.
|
|
|
+ * Returns one of the following:
|
|
|
+ * MEMCG_PROT_NONE: cgroup memory is not protected
|
|
|
+ * MEMCG_PROT_LOW: cgroup memory is protected as long there is
|
|
|
+ * an unprotected supply of reclaimable memory from other cgroups.
|
|
|
+ * MEMCG_PROT_MIN: cgroup memory is protected
|
|
|
*
|
|
|
- * @root is exclusive; it is never low when looked at directly
|
|
|
+ * @root is exclusive; it is never protected when looked at directly
|
|
|
*
|
|
|
- * To provide a proper hierarchical behavior, effective memory.low value
|
|
|
- * is used.
|
|
|
+ * To provide a proper hierarchical behavior, effective memory.min/low values
|
|
|
+ * are used. Below is the description of how effective memory.low is calculated.
|
|
|
+ * Effective memory.min values is calculated in the same way.
|
|
|
*
|
|
|
* Effective memory.low is always equal or less than the original memory.low.
|
|
|
* If there is no memory.low overcommittment (which is always true for
|
|
@@ -5406,51 +5449,78 @@ struct cgroup_subsys memory_cgrp_subsys = {
|
|
|
* E/memory.current = 0
|
|
|
*
|
|
|
* These calculations require constant tracking of the actual low usages
|
|
|
- * (see propagate_low_usage()), as well as recursive calculation of
|
|
|
- * effective memory.low values. But as we do call mem_cgroup_low()
|
|
|
+ * (see propagate_protected_usage()), as well as recursive calculation of
|
|
|
+ * effective memory.low values. But as we do call mem_cgroup_protected()
|
|
|
* path for each memory cgroup top-down from the reclaim,
|
|
|
* it's possible to optimize this part, and save calculated elow
|
|
|
* for next usage. This part is intentionally racy, but it's ok,
|
|
|
* as memory.low is a best-effort mechanism.
|
|
|
*/
|
|
|
-bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg)
|
|
|
+enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
|
|
|
+ struct mem_cgroup *memcg)
|
|
|
{
|
|
|
- unsigned long usage, low_usage, siblings_low_usage;
|
|
|
- unsigned long elow, parent_elow;
|
|
|
struct mem_cgroup *parent;
|
|
|
+ unsigned long emin, parent_emin;
|
|
|
+ unsigned long elow, parent_elow;
|
|
|
+ unsigned long usage;
|
|
|
|
|
|
if (mem_cgroup_disabled())
|
|
|
- return false;
|
|
|
+ return MEMCG_PROT_NONE;
|
|
|
|
|
|
if (!root)
|
|
|
root = root_mem_cgroup;
|
|
|
if (memcg == root)
|
|
|
- return false;
|
|
|
+ return MEMCG_PROT_NONE;
|
|
|
|
|
|
- elow = memcg->memory.low;
|
|
|
usage = page_counter_read(&memcg->memory);
|
|
|
- parent = parent_mem_cgroup(memcg);
|
|
|
+ if (!usage)
|
|
|
+ return MEMCG_PROT_NONE;
|
|
|
+
|
|
|
+ emin = memcg->memory.min;
|
|
|
+ elow = memcg->memory.low;
|
|
|
|
|
|
+ parent = parent_mem_cgroup(memcg);
|
|
|
if (parent == root)
|
|
|
goto exit;
|
|
|
|
|
|
+ parent_emin = READ_ONCE(parent->memory.emin);
|
|
|
+ emin = min(emin, parent_emin);
|
|
|
+ if (emin && parent_emin) {
|
|
|
+ unsigned long min_usage, siblings_min_usage;
|
|
|
+
|
|
|
+ min_usage = min(usage, memcg->memory.min);
|
|
|
+ siblings_min_usage = atomic_long_read(
|
|
|
+ &parent->memory.children_min_usage);
|
|
|
+
|
|
|
+ if (min_usage && siblings_min_usage)
|
|
|
+ emin = min(emin, parent_emin * min_usage /
|
|
|
+ siblings_min_usage);
|
|
|
+ }
|
|
|
+
|
|
|
parent_elow = READ_ONCE(parent->memory.elow);
|
|
|
elow = min(elow, parent_elow);
|
|
|
+ if (elow && parent_elow) {
|
|
|
+ unsigned long low_usage, siblings_low_usage;
|
|
|
|
|
|
- if (!elow || !parent_elow)
|
|
|
- goto exit;
|
|
|
+ low_usage = min(usage, memcg->memory.low);
|
|
|
+ siblings_low_usage = atomic_long_read(
|
|
|
+ &parent->memory.children_low_usage);
|
|
|
|
|
|
- low_usage = min(usage, memcg->memory.low);
|
|
|
- siblings_low_usage = atomic_long_read(
|
|
|
- &parent->memory.children_low_usage);
|
|
|
-
|
|
|
- if (!low_usage || !siblings_low_usage)
|
|
|
- goto exit;
|
|
|
+ if (low_usage && siblings_low_usage)
|
|
|
+ elow = min(elow, parent_elow * low_usage /
|
|
|
+ siblings_low_usage);
|
|
|
+ }
|
|
|
|
|
|
- elow = min(elow, parent_elow * low_usage / siblings_low_usage);
|
|
|
exit:
|
|
|
+ memcg->memory.emin = emin;
|
|
|
memcg->memory.elow = elow;
|
|
|
- return usage && usage <= elow;
|
|
|
+
|
|
|
+ if (usage <= emin)
|
|
|
+ return MEMCG_PROT_MIN;
|
|
|
+ else if (usage <= elow)
|
|
|
+ return MEMCG_PROT_LOW;
|
|
|
+ else
|
|
|
+ return MEMCG_PROT_NONE;
|
|
|
}
|
|
|
|
|
|
/**
|