|
@@ -54,6 +54,7 @@
|
|
#include <linux/proc_ns.h>
|
|
#include <linux/proc_ns.h>
|
|
#include <linux/nsproxy.h>
|
|
#include <linux/nsproxy.h>
|
|
#include <linux/file.h>
|
|
#include <linux/file.h>
|
|
|
|
+#include <linux/sched/cputime.h>
|
|
#include <net/sock.h>
|
|
#include <net/sock.h>
|
|
|
|
|
|
#define CREATE_TRACE_POINTS
|
|
#define CREATE_TRACE_POINTS
|
|
@@ -61,6 +62,8 @@
|
|
|
|
|
|
#define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \
|
|
#define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \
|
|
MAX_CFTYPE_NAME + 2)
|
|
MAX_CFTYPE_NAME + 2)
|
|
|
|
+/* let's not notify more than 100 times per second */
|
|
|
|
+#define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100)
|
|
|
|
|
|
/*
|
|
/*
|
|
* cgroup_mutex is the master lock. Any modification to cgroup or its
|
|
* cgroup_mutex is the master lock. Any modification to cgroup or its
|
|
@@ -142,14 +145,14 @@ static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
|
|
};
|
|
};
|
|
#undef SUBSYS
|
|
#undef SUBSYS
|
|
|
|
|
|
-static DEFINE_PER_CPU(struct cgroup_cpu_stat, cgrp_dfl_root_cpu_stat);
|
|
|
|
|
|
+static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu);
|
|
|
|
|
|
/*
|
|
/*
|
|
* The default hierarchy, reserved for the subsystems that are otherwise
|
|
* The default hierarchy, reserved for the subsystems that are otherwise
|
|
* unattached - it never has more than a single cgroup, and all tasks are
|
|
* unattached - it never has more than a single cgroup, and all tasks are
|
|
* part of that cgroup.
|
|
* part of that cgroup.
|
|
*/
|
|
*/
|
|
-struct cgroup_root cgrp_dfl_root = { .cgrp.cpu_stat = &cgrp_dfl_root_cpu_stat };
|
|
|
|
|
|
+struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu };
|
|
EXPORT_SYMBOL_GPL(cgrp_dfl_root);
|
|
EXPORT_SYMBOL_GPL(cgrp_dfl_root);
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -1554,6 +1557,8 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
|
|
spin_lock_irq(&cgroup_file_kn_lock);
|
|
spin_lock_irq(&cgroup_file_kn_lock);
|
|
cfile->kn = NULL;
|
|
cfile->kn = NULL;
|
|
spin_unlock_irq(&cgroup_file_kn_lock);
|
|
spin_unlock_irq(&cgroup_file_kn_lock);
|
|
|
|
+
|
|
|
|
+ del_timer_sync(&cfile->notify_timer);
|
|
}
|
|
}
|
|
|
|
|
|
kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
|
|
kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
|
|
@@ -1573,8 +1578,17 @@ static void css_clear_dir(struct cgroup_subsys_state *css)
|
|
|
|
|
|
css->flags &= ~CSS_VISIBLE;
|
|
css->flags &= ~CSS_VISIBLE;
|
|
|
|
|
|
- list_for_each_entry(cfts, &css->ss->cfts, node)
|
|
|
|
|
|
+ if (!css->ss) {
|
|
|
|
+ if (cgroup_on_dfl(cgrp))
|
|
|
|
+ cfts = cgroup_base_files;
|
|
|
|
+ else
|
|
|
|
+ cfts = cgroup1_base_files;
|
|
|
|
+
|
|
cgroup_addrm_files(css, cgrp, cfts, false);
|
|
cgroup_addrm_files(css, cgrp, cfts, false);
|
|
|
|
+ } else {
|
|
|
|
+ list_for_each_entry(cfts, &css->ss->cfts, node)
|
|
|
|
+ cgroup_addrm_files(css, cgrp, cfts, false);
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -1598,14 +1612,16 @@ static int css_populate_dir(struct cgroup_subsys_state *css)
|
|
else
|
|
else
|
|
cfts = cgroup1_base_files;
|
|
cfts = cgroup1_base_files;
|
|
|
|
|
|
- return cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- list_for_each_entry(cfts, &css->ss->cfts, node) {
|
|
|
|
- ret = cgroup_addrm_files(css, cgrp, cfts, true);
|
|
|
|
- if (ret < 0) {
|
|
|
|
- failed_cfts = cfts;
|
|
|
|
- goto err;
|
|
|
|
|
|
+ ret = cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
|
|
|
|
+ if (ret < 0)
|
|
|
|
+ return ret;
|
|
|
|
+ } else {
|
|
|
|
+ list_for_each_entry(cfts, &css->ss->cfts, node) {
|
|
|
|
+ ret = cgroup_addrm_files(css, cgrp, cfts, true);
|
|
|
|
+ if (ret < 0) {
|
|
|
|
+ failed_cfts = cfts;
|
|
|
|
+ goto err;
|
|
|
|
+ }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1782,13 +1798,6 @@ static void cgroup_enable_task_cg_lists(void)
|
|
{
|
|
{
|
|
struct task_struct *p, *g;
|
|
struct task_struct *p, *g;
|
|
|
|
|
|
- spin_lock_irq(&css_set_lock);
|
|
|
|
-
|
|
|
|
- if (use_task_css_set_links)
|
|
|
|
- goto out_unlock;
|
|
|
|
-
|
|
|
|
- use_task_css_set_links = true;
|
|
|
|
-
|
|
|
|
/*
|
|
/*
|
|
* We need tasklist_lock because RCU is not safe against
|
|
* We need tasklist_lock because RCU is not safe against
|
|
* while_each_thread(). Besides, a forking task that has passed
|
|
* while_each_thread(). Besides, a forking task that has passed
|
|
@@ -1797,6 +1806,13 @@ static void cgroup_enable_task_cg_lists(void)
|
|
* tasklist if we walk through it with RCU.
|
|
* tasklist if we walk through it with RCU.
|
|
*/
|
|
*/
|
|
read_lock(&tasklist_lock);
|
|
read_lock(&tasklist_lock);
|
|
|
|
+ spin_lock_irq(&css_set_lock);
|
|
|
|
+
|
|
|
|
+ if (use_task_css_set_links)
|
|
|
|
+ goto out_unlock;
|
|
|
|
+
|
|
|
|
+ use_task_css_set_links = true;
|
|
|
|
+
|
|
do_each_thread(g, p) {
|
|
do_each_thread(g, p) {
|
|
WARN_ON_ONCE(!list_empty(&p->cg_list) ||
|
|
WARN_ON_ONCE(!list_empty(&p->cg_list) ||
|
|
task_css_set(p) != &init_css_set);
|
|
task_css_set(p) != &init_css_set);
|
|
@@ -1824,9 +1840,9 @@ static void cgroup_enable_task_cg_lists(void)
|
|
}
|
|
}
|
|
spin_unlock(&p->sighand->siglock);
|
|
spin_unlock(&p->sighand->siglock);
|
|
} while_each_thread(g, p);
|
|
} while_each_thread(g, p);
|
|
- read_unlock(&tasklist_lock);
|
|
|
|
out_unlock:
|
|
out_unlock:
|
|
spin_unlock_irq(&css_set_lock);
|
|
spin_unlock_irq(&css_set_lock);
|
|
|
|
+ read_unlock(&tasklist_lock);
|
|
}
|
|
}
|
|
|
|
|
|
static void init_cgroup_housekeeping(struct cgroup *cgrp)
|
|
static void init_cgroup_housekeeping(struct cgroup *cgrp)
|
|
@@ -1844,6 +1860,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
|
|
cgrp->dom_cgrp = cgrp;
|
|
cgrp->dom_cgrp = cgrp;
|
|
cgrp->max_descendants = INT_MAX;
|
|
cgrp->max_descendants = INT_MAX;
|
|
cgrp->max_depth = INT_MAX;
|
|
cgrp->max_depth = INT_MAX;
|
|
|
|
+ INIT_LIST_HEAD(&cgrp->rstat_css_list);
|
|
|
|
+ prev_cputime_init(&cgrp->prev_cputime);
|
|
|
|
|
|
for_each_subsys(ss, ssid)
|
|
for_each_subsys(ss, ssid)
|
|
INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
|
|
INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
|
|
@@ -3381,7 +3399,7 @@ static int cpu_stat_show(struct seq_file *seq, void *v)
|
|
struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
|
|
struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
|
|
int ret = 0;
|
|
int ret = 0;
|
|
|
|
|
|
- cgroup_stat_show_cputime(seq);
|
|
|
|
|
|
+ cgroup_base_stat_cputime_show(seq);
|
|
#ifdef CONFIG_CGROUP_SCHED
|
|
#ifdef CONFIG_CGROUP_SCHED
|
|
ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id);
|
|
ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id);
|
|
#endif
|
|
#endif
|
|
@@ -3521,6 +3539,12 @@ static int cgroup_kn_set_ugid(struct kernfs_node *kn)
|
|
return kernfs_setattr(kn, &iattr);
|
|
return kernfs_setattr(kn, &iattr);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static void cgroup_file_notify_timer(struct timer_list *timer)
|
|
|
|
+{
|
|
|
|
+ cgroup_file_notify(container_of(timer, struct cgroup_file,
|
|
|
|
+ notify_timer));
|
|
|
|
+}
|
|
|
|
+
|
|
static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
|
|
static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
|
|
struct cftype *cft)
|
|
struct cftype *cft)
|
|
{
|
|
{
|
|
@@ -3547,6 +3571,8 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
|
|
if (cft->file_offset) {
|
|
if (cft->file_offset) {
|
|
struct cgroup_file *cfile = (void *)css + cft->file_offset;
|
|
struct cgroup_file *cfile = (void *)css + cft->file_offset;
|
|
|
|
|
|
|
|
+ timer_setup(&cfile->notify_timer, cgroup_file_notify_timer, 0);
|
|
|
|
+
|
|
spin_lock_irq(&cgroup_file_kn_lock);
|
|
spin_lock_irq(&cgroup_file_kn_lock);
|
|
cfile->kn = kn;
|
|
cfile->kn = kn;
|
|
spin_unlock_irq(&cgroup_file_kn_lock);
|
|
spin_unlock_irq(&cgroup_file_kn_lock);
|
|
@@ -3796,8 +3822,17 @@ void cgroup_file_notify(struct cgroup_file *cfile)
|
|
unsigned long flags;
|
|
unsigned long flags;
|
|
|
|
|
|
spin_lock_irqsave(&cgroup_file_kn_lock, flags);
|
|
spin_lock_irqsave(&cgroup_file_kn_lock, flags);
|
|
- if (cfile->kn)
|
|
|
|
- kernfs_notify(cfile->kn);
|
|
|
|
|
|
+ if (cfile->kn) {
|
|
|
|
+ unsigned long last = cfile->notified_at;
|
|
|
|
+ unsigned long next = last + CGROUP_FILE_NOTIFY_MIN_INTV;
|
|
|
|
+
|
|
|
|
+ if (time_in_range(jiffies, last, next)) {
|
|
|
|
+ timer_reduce(&cfile->notify_timer, next);
|
|
|
|
+ } else {
|
|
|
|
+ kernfs_notify(cfile->kn);
|
|
|
|
+ cfile->notified_at = jiffies;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
|
|
spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
|
|
}
|
|
}
|
|
|
|
|
|
@@ -4560,7 +4595,7 @@ static void css_free_rwork_fn(struct work_struct *work)
|
|
cgroup_put(cgroup_parent(cgrp));
|
|
cgroup_put(cgroup_parent(cgrp));
|
|
kernfs_put(cgrp->kn);
|
|
kernfs_put(cgrp->kn);
|
|
if (cgroup_on_dfl(cgrp))
|
|
if (cgroup_on_dfl(cgrp))
|
|
- cgroup_stat_exit(cgrp);
|
|
|
|
|
|
+ cgroup_rstat_exit(cgrp);
|
|
kfree(cgrp);
|
|
kfree(cgrp);
|
|
} else {
|
|
} else {
|
|
/*
|
|
/*
|
|
@@ -4587,6 +4622,11 @@ static void css_release_work_fn(struct work_struct *work)
|
|
|
|
|
|
if (ss) {
|
|
if (ss) {
|
|
/* css release path */
|
|
/* css release path */
|
|
|
|
+ if (!list_empty(&css->rstat_css_node)) {
|
|
|
|
+ cgroup_rstat_flush(cgrp);
|
|
|
|
+ list_del_rcu(&css->rstat_css_node);
|
|
|
|
+ }
|
|
|
|
+
|
|
cgroup_idr_replace(&ss->css_idr, NULL, css->id);
|
|
cgroup_idr_replace(&ss->css_idr, NULL, css->id);
|
|
if (ss->css_released)
|
|
if (ss->css_released)
|
|
ss->css_released(css);
|
|
ss->css_released(css);
|
|
@@ -4597,7 +4637,7 @@ static void css_release_work_fn(struct work_struct *work)
|
|
trace_cgroup_release(cgrp);
|
|
trace_cgroup_release(cgrp);
|
|
|
|
|
|
if (cgroup_on_dfl(cgrp))
|
|
if (cgroup_on_dfl(cgrp))
|
|
- cgroup_stat_flush(cgrp);
|
|
|
|
|
|
+ cgroup_rstat_flush(cgrp);
|
|
|
|
|
|
for (tcgrp = cgroup_parent(cgrp); tcgrp;
|
|
for (tcgrp = cgroup_parent(cgrp); tcgrp;
|
|
tcgrp = cgroup_parent(tcgrp))
|
|
tcgrp = cgroup_parent(tcgrp))
|
|
@@ -4648,6 +4688,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
|
|
css->id = -1;
|
|
css->id = -1;
|
|
INIT_LIST_HEAD(&css->sibling);
|
|
INIT_LIST_HEAD(&css->sibling);
|
|
INIT_LIST_HEAD(&css->children);
|
|
INIT_LIST_HEAD(&css->children);
|
|
|
|
+ INIT_LIST_HEAD(&css->rstat_css_node);
|
|
css->serial_nr = css_serial_nr_next++;
|
|
css->serial_nr = css_serial_nr_next++;
|
|
atomic_set(&css->online_cnt, 0);
|
|
atomic_set(&css->online_cnt, 0);
|
|
|
|
|
|
@@ -4656,6 +4697,9 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
|
|
css_get(css->parent);
|
|
css_get(css->parent);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ if (cgroup_on_dfl(cgrp) && ss->css_rstat_flush)
|
|
|
|
+ list_add_rcu(&css->rstat_css_node, &cgrp->rstat_css_list);
|
|
|
|
+
|
|
BUG_ON(cgroup_css(cgrp, ss));
|
|
BUG_ON(cgroup_css(cgrp, ss));
|
|
}
|
|
}
|
|
|
|
|
|
@@ -4757,6 +4801,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
|
|
err_list_del:
|
|
err_list_del:
|
|
list_del_rcu(&css->sibling);
|
|
list_del_rcu(&css->sibling);
|
|
err_free_css:
|
|
err_free_css:
|
|
|
|
+ list_del_rcu(&css->rstat_css_node);
|
|
INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
|
|
INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
|
|
queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
|
|
queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
|
|
return ERR_PTR(err);
|
|
return ERR_PTR(err);
|
|
@@ -4785,7 +4830,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
|
|
goto out_free_cgrp;
|
|
goto out_free_cgrp;
|
|
|
|
|
|
if (cgroup_on_dfl(parent)) {
|
|
if (cgroup_on_dfl(parent)) {
|
|
- ret = cgroup_stat_init(cgrp);
|
|
|
|
|
|
+ ret = cgroup_rstat_init(cgrp);
|
|
if (ret)
|
|
if (ret)
|
|
goto out_cancel_ref;
|
|
goto out_cancel_ref;
|
|
}
|
|
}
|
|
@@ -4850,7 +4895,7 @@ out_idr_free:
|
|
cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
|
|
cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
|
|
out_stat_exit:
|
|
out_stat_exit:
|
|
if (cgroup_on_dfl(parent))
|
|
if (cgroup_on_dfl(parent))
|
|
- cgroup_stat_exit(cgrp);
|
|
|
|
|
|
+ cgroup_rstat_exit(cgrp);
|
|
out_cancel_ref:
|
|
out_cancel_ref:
|
|
percpu_ref_exit(&cgrp->self.refcnt);
|
|
percpu_ref_exit(&cgrp->self.refcnt);
|
|
out_free_cgrp:
|
|
out_free_cgrp:
|
|
@@ -5090,10 +5135,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
|
|
for_each_css(css, ssid, cgrp)
|
|
for_each_css(css, ssid, cgrp)
|
|
kill_css(css);
|
|
kill_css(css);
|
|
|
|
|
|
- /*
|
|
|
|
- * Remove @cgrp directory along with the base files. @cgrp has an
|
|
|
|
- * extra ref on its kn.
|
|
|
|
- */
|
|
|
|
|
|
+ /* clear and remove @cgrp dir, @cgrp has an extra ref on its kn */
|
|
|
|
+ css_clear_dir(&cgrp->self);
|
|
kernfs_remove(cgrp->kn);
|
|
kernfs_remove(cgrp->kn);
|
|
|
|
|
|
if (parent && cgroup_is_threaded(cgrp))
|
|
if (parent && cgroup_is_threaded(cgrp))
|
|
@@ -5245,7 +5288,7 @@ int __init cgroup_init(void)
|
|
BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
|
|
BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
|
|
BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
|
|
BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
|
|
|
|
|
|
- cgroup_stat_boot();
|
|
|
|
|
|
+ cgroup_rstat_boot();
|
|
|
|
|
|
/*
|
|
/*
|
|
* The latency of the synchronize_sched() is too high for cgroups,
|
|
* The latency of the synchronize_sched() is too high for cgroups,
|