7 years ago · 9f25a8da42
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -105,6 +105,8 @@ enum {
 
				 struct cgroup_file {
			
 
				 	/* do not access any fields from outside cgroup core */
			
 
				 	struct kernfs_node *kn;
			
 
				+	unsigned long notified_at;
			
 
				+	struct timer_list notify_timer;
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -128,6 +130,9 @@ struct cgroup_subsys_state {
 
				 	struct list_head sibling;
			
 
				 	struct list_head children;
			
 
				 
			
 
				+	/* flush target list anchored at cgrp->rstat_css_list */
			
 
				+	struct list_head rstat_css_node;
			
 
				+
			
 
				 	/*
			
 
				 	 * PI: Subsys-unique ID.  0 is unused and root is always 1.  The
			
 
				 	 * matching css can be looked up using css_from_id().
			
@@ -256,12 +261,16 @@ struct css_set {
 
				 	struct rcu_head rcu_head;
			
 
				 };
			
 
				 
			
 
				+struct cgroup_base_stat {
			
 
				+	struct task_cputime cputime;
			
 
				+};
			
 
				+
			
 
				 /*
			
 
				- * cgroup basic resource usage statistics.  Accounting is done per-cpu in
			
 
				- * cgroup_cpu_stat which is then lazily propagated up the hierarchy on
			
 
				- * reads.
			
 
				+ * rstat - cgroup scalable recursive statistics.  Accounting is done
			
 
				+ * per-cpu in cgroup_rstat_cpu which is then lazily propagated up the
			
 
				+ * hierarchy on reads.
			
 
				  *
			
 
				- * When a stat gets updated, the cgroup_cpu_stat and its ancestors are
			
 
				+ * When a stat gets updated, the cgroup_rstat_cpu and its ancestors are
			
 
				  * linked into the updated tree.  On the following read, propagation only
			
 
				  * considers and consumes the updated tree.  This makes reading O(the
			
 
				  * number of descendants which have been active since last read) instead of
			
@@ -271,20 +280,24 @@ struct css_set {
 
				  * aren't active and stat may be read frequently.  The combination can
			
 
				  * become very expensive.  By propagating selectively, increasing reading
			
 
				  * frequency decreases the cost of each read.
			
 
				+ *
			
 
				+ * This struct hosts both the fields which implement the above -
			
 
				+ * updated_children and updated_next - and the fields which track basic
			
 
				+ * resource statistics on top of it - bsync, bstat and last_bstat.
			
 
				  */
			
 
				-struct cgroup_cpu_stat {
			
 
				+struct cgroup_rstat_cpu {
			
 
				 	/*
			
 
				-	 * ->sync protects all the current counters.  These are the only
			
 
				-	 * fields which get updated in the hot path.
			
 
				+	 * ->bsync protects ->bstat.  These are the only fields which get
			
 
				+	 * updated in the hot path.
			
 
				 	 */
			
 
				-	struct u64_stats_sync sync;
			
 
				-	struct task_cputime cputime;
			
 
				+	struct u64_stats_sync bsync;
			
 
				+	struct cgroup_base_stat bstat;
			
 
				 
			
 
				 	/*
			
 
				 	 * Snapshots at the last reading.  These are used to calculate the
			
 
				 	 * deltas to propagate to the global counters.
			
 
				 	 */
			
 
				-	struct task_cputime last_cputime;
			
 
				+	struct cgroup_base_stat last_bstat;
			
 
				 
			
 
				 	/*
			
 
				 	 * Child cgroups with stat updates on this cpu since the last read
			
@@ -295,18 +308,12 @@ struct cgroup_cpu_stat {
 
				 	 * to the cgroup makes it unnecessary for each per-cpu struct to
			
 
				 	 * point back to the associated cgroup.
			
 
				 	 *
			
 
				-	 * Protected by per-cpu cgroup_cpu_stat_lock.
			
 
				+	 * Protected by per-cpu cgroup_rstat_cpu_lock.
			
 
				 	 */
			
 
				 	struct cgroup *updated_children;	/* terminated by self cgroup */
			
 
				 	struct cgroup *updated_next;		/* NULL iff not on the list */
			
 
				 };
			
 
				 
			
 
				-struct cgroup_stat {
			
 
				-	/* per-cpu statistics are collected into the folowing global counters */
			
 
				-	struct task_cputime cputime;
			
 
				-	struct prev_cputime prev_cputime;
			
 
				-};
			
 
				-
			
 
				 struct cgroup {
			
 
				 	/* self css with NULL ->ss, points back to this cgroup */
			
 
				 	struct cgroup_subsys_state self;
			
@@ -406,10 +413,14 @@ struct cgroup {
 
				 	 */
			
 
				 	struct cgroup *dom_cgrp;
			
 
				 
			
 
				+	/* per-cpu recursive resource statistics */
			
 
				+	struct cgroup_rstat_cpu __percpu *rstat_cpu;
			
 
				+	struct list_head rstat_css_list;
			
 
				+
			
 
				 	/* cgroup basic resource statistics */
			
 
				-	struct cgroup_cpu_stat __percpu *cpu_stat;
			
 
				-	struct cgroup_stat pending_stat;	/* pending from children */
			
 
				-	struct cgroup_stat stat;
			
 
				+	struct cgroup_base_stat pending_bstat;	/* pending from children */
			
 
				+	struct cgroup_base_stat bstat;
			
 
				+	struct prev_cputime prev_cputime;	/* for printing out cputime */
			
 
				 
			
 
				 	/*
			
 
				 	 * list of pidlists, up to two for each namespace (one for procs, one
			
@@ -570,6 +581,7 @@ struct cgroup_subsys {
 
				 	void (*css_released)(struct cgroup_subsys_state *css);
			
 
				 	void (*css_free)(struct cgroup_subsys_state *css);
			
 
				 	void (*css_reset)(struct cgroup_subsys_state *css);
			
 
				+	void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu);
			
 
				 	int (*css_extra_stat_show)(struct seq_file *seq,
			
 
				 				   struct cgroup_subsys_state *css);
			
 
				 
			
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -690,11 +690,19 @@ static inline void cgroup_path_from_kernfs_id(const union kernfs_node_id *id,
 
				 	char *buf, size_t buflen) {}
			
 
				 #endif /* !CONFIG_CGROUPS */
			
 
				 
			
 
				+#ifdef CONFIG_CGROUPS
			
 
				 /*
			
 
				- * Basic resource stats.
			
 
				+ * cgroup scalable recursive statistics.
			
 
				  */
			
 
				-#ifdef CONFIG_CGROUPS
			
 
				+void cgroup_rstat_updated(struct cgroup *cgrp, int cpu);
			
 
				+void cgroup_rstat_flush(struct cgroup *cgrp);
			
 
				+void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp);
			
 
				+void cgroup_rstat_flush_hold(struct cgroup *cgrp);
			
 
				+void cgroup_rstat_flush_release(void);
			
 
				 
			
 
				+/*
			
 
				+ * Basic resource stats.
			
 
				+ */
			
 
				 #ifdef CONFIG_CGROUP_CPUACCT
			
 
				 void cpuacct_charge(struct task_struct *tsk, u64 cputime);
			
 
				 void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
			
--- a/kernel/cgroup/Makefile
+++ b/kernel/cgroup/Makefile
@@ -1,5 +1,5 @@
 
				 # SPDX-License-Identifier: GPL-2.0
			
 
				-obj-y := cgroup.o stat.o namespace.o cgroup-v1.o
			
 
				+obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o
			
 
				 
			
 
				 obj-$(CONFIG_CGROUP_FREEZER) += freezer.o
			
 
				 obj-$(CONFIG_CGROUP_PIDS) += pids.o
			
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -201,13 +201,12 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
 
				 int cgroup_task_count(const struct cgroup *cgrp);
			
 
				 
			
 
				 /*
			
 
				- * stat.c
			
 
				+ * rstat.c
			
 
				  */
			
 
				-void cgroup_stat_flush(struct cgroup *cgrp);
			
 
				-int cgroup_stat_init(struct cgroup *cgrp);
			
 
				-void cgroup_stat_exit(struct cgroup *cgrp);
			
 
				-void cgroup_stat_show_cputime(struct seq_file *seq);
			
 
				-void cgroup_stat_boot(void);
			
 
				+int cgroup_rstat_init(struct cgroup *cgrp);
			
 
				+void cgroup_rstat_exit(struct cgroup *cgrp);
			
 
				+void cgroup_rstat_boot(void);
			
 
				+void cgroup_base_stat_cputime_show(struct seq_file *seq);
			
 
				 
			
 
				 /*
			
 
				  * namespace.c
			
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -54,6 +54,7 @@
 
				 #include <linux/proc_ns.h>
			
 
				 #include <linux/nsproxy.h>
			
 
				 #include <linux/file.h>
			
 
				+#include <linux/sched/cputime.h>
			
 
				 #include <net/sock.h>
			
 
				 
			
 
				 #define CREATE_TRACE_POINTS
			
@@ -61,6 +62,8 @@
 
				 
			
 
				 #define CGROUP_FILE_NAME_MAX		(MAX_CGROUP_TYPE_NAMELEN +	\
			
 
				 					 MAX_CFTYPE_NAME + 2)
			
 
				+/* let's not notify more than 100 times per second */
			
 
				+#define CGROUP_FILE_NOTIFY_MIN_INTV	DIV_ROUND_UP(HZ, 100)
			
 
				 
			
 
				 /*
			
 
				  * cgroup_mutex is the master lock.  Any modification to cgroup or its
			
@@ -142,14 +145,14 @@ static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
 
				 };
			
 
				 #undef SUBSYS
			
 
				 
			
 
				-static DEFINE_PER_CPU(struct cgroup_cpu_stat, cgrp_dfl_root_cpu_stat);
			
 
				+static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu);
			
 
				 
			
 
				 /*
			
 
				  * The default hierarchy, reserved for the subsystems that are otherwise
			
 
				  * unattached - it never has more than a single cgroup, and all tasks are
			
 
				  * part of that cgroup.
			
 
				  */
			
 
				-struct cgroup_root cgrp_dfl_root = { .cgrp.cpu_stat = &cgrp_dfl_root_cpu_stat };
			
 
				+struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu };
			
 
				 EXPORT_SYMBOL_GPL(cgrp_dfl_root);
			
 
				 
			
 
				 /*
			
@@ -1554,6 +1557,8 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
 
				 		spin_lock_irq(&cgroup_file_kn_lock);
			
 
				 		cfile->kn = NULL;
			
 
				 		spin_unlock_irq(&cgroup_file_kn_lock);
			
 
				+
			
 
				+		del_timer_sync(&cfile->notify_timer);
			
 
				 	}
			
 
				 
			
 
				 	kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
			
@@ -1573,8 +1578,17 @@ static void css_clear_dir(struct cgroup_subsys_state *css)
 
				 
			
 
				 	css->flags &= ~CSS_VISIBLE;
			
 
				 
			
 
				-	list_for_each_entry(cfts, &css->ss->cfts, node)
			
 
				+	if (!css->ss) {
			
 
				+		if (cgroup_on_dfl(cgrp))
			
 
				+			cfts = cgroup_base_files;
			
 
				+		else
			
 
				+			cfts = cgroup1_base_files;
			
 
				+
			
 
				 		cgroup_addrm_files(css, cgrp, cfts, false);
			
 
				+	} else {
			
 
				+		list_for_each_entry(cfts, &css->ss->cfts, node)
			
 
				+			cgroup_addrm_files(css, cgrp, cfts, false);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -1598,14 +1612,16 @@ static int css_populate_dir(struct cgroup_subsys_state *css)
 
				 		else
			
 
				 			cfts = cgroup1_base_files;
			
 
				 
			
 
				-		return cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
			
 
				-	}
			
 
				-
			
 
				-	list_for_each_entry(cfts, &css->ss->cfts, node) {
			
 
				-		ret = cgroup_addrm_files(css, cgrp, cfts, true);
			
 
				-		if (ret < 0) {
			
 
				-			failed_cfts = cfts;
			
 
				-			goto err;
			
 
				+		ret = cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
			
 
				+		if (ret < 0)
			
 
				+			return ret;
			
 
				+	} else {
			
 
				+		list_for_each_entry(cfts, &css->ss->cfts, node) {
			
 
				+			ret = cgroup_addrm_files(css, cgrp, cfts, true);
			
 
				+			if (ret < 0) {
			
 
				+				failed_cfts = cfts;
			
 
				+				goto err;
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -1782,13 +1798,6 @@ static void cgroup_enable_task_cg_lists(void)
 
				 {
			
 
				 	struct task_struct *p, *g;
			
 
				 
			
 
				-	spin_lock_irq(&css_set_lock);
			
 
				-
			
 
				-	if (use_task_css_set_links)
			
 
				-		goto out_unlock;
			
 
				-
			
 
				-	use_task_css_set_links = true;
			
 
				-
			
 
				 	/*
			
 
				 	 * We need tasklist_lock because RCU is not safe against
			
 
				 	 * while_each_thread(). Besides, a forking task that has passed
			
@@ -1797,6 +1806,13 @@ static void cgroup_enable_task_cg_lists(void)
 
				 	 * tasklist if we walk through it with RCU.
			
 
				 	 */
			
 
				 	read_lock(&tasklist_lock);
			
 
				+	spin_lock_irq(&css_set_lock);
			
 
				+
			
 
				+	if (use_task_css_set_links)
			
 
				+		goto out_unlock;
			
 
				+
			
 
				+	use_task_css_set_links = true;
			
 
				+
			
 
				 	do_each_thread(g, p) {
			
 
				 		WARN_ON_ONCE(!list_empty(&p->cg_list) ||
			
 
				 			     task_css_set(p) != &init_css_set);
			
@@ -1824,9 +1840,9 @@ static void cgroup_enable_task_cg_lists(void)
 
				 		}
			
 
				 		spin_unlock(&p->sighand->siglock);
			
 
				 	} while_each_thread(g, p);
			
 
				-	read_unlock(&tasklist_lock);
			
 
				 out_unlock:
			
 
				 	spin_unlock_irq(&css_set_lock);
			
 
				+	read_unlock(&tasklist_lock);
			
 
				 }
			
 
				 
			
 
				 static void init_cgroup_housekeeping(struct cgroup *cgrp)
			
@@ -1844,6 +1860,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 
				 	cgrp->dom_cgrp = cgrp;
			
 
				 	cgrp->max_descendants = INT_MAX;
			
 
				 	cgrp->max_depth = INT_MAX;
			
 
				+	INIT_LIST_HEAD(&cgrp->rstat_css_list);
			
 
				+	prev_cputime_init(&cgrp->prev_cputime);
			
 
				 
			
 
				 	for_each_subsys(ss, ssid)
			
 
				 		INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
			
@@ -3381,7 +3399,7 @@ static int cpu_stat_show(struct seq_file *seq, void *v)
 
				 	struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
			
 
				 	int ret = 0;
			
 
				 
			
 
				-	cgroup_stat_show_cputime(seq);
			
 
				+	cgroup_base_stat_cputime_show(seq);
			
 
				 #ifdef CONFIG_CGROUP_SCHED
			
 
				 	ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id);
			
 
				 #endif
			
@@ -3521,6 +3539,12 @@ static int cgroup_kn_set_ugid(struct kernfs_node *kn)
 
				 	return kernfs_setattr(kn, &iattr);
			
 
				 }
			
 
				 
			
 
				+static void cgroup_file_notify_timer(struct timer_list *timer)
			
 
				+{
			
 
				+	cgroup_file_notify(container_of(timer, struct cgroup_file,
			
 
				+					notify_timer));
			
 
				+}
			
 
				+
			
 
				 static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
			
 
				 			   struct cftype *cft)
			
 
				 {
			
@@ -3547,6 +3571,8 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
 
				 	if (cft->file_offset) {
			
 
				 		struct cgroup_file *cfile = (void *)css + cft->file_offset;
			
 
				 
			
 
				+		timer_setup(&cfile->notify_timer, cgroup_file_notify_timer, 0);
			
 
				+
			
 
				 		spin_lock_irq(&cgroup_file_kn_lock);
			
 
				 		cfile->kn = kn;
			
 
				 		spin_unlock_irq(&cgroup_file_kn_lock);
			
@@ -3796,8 +3822,17 @@ void cgroup_file_notify(struct cgroup_file *cfile)
 
				 	unsigned long flags;
			
 
				 
			
 
				 	spin_lock_irqsave(&cgroup_file_kn_lock, flags);
			
 
				-	if (cfile->kn)
			
 
				-		kernfs_notify(cfile->kn);
			
 
				+	if (cfile->kn) {
			
 
				+		unsigned long last = cfile->notified_at;
			
 
				+		unsigned long next = last + CGROUP_FILE_NOTIFY_MIN_INTV;
			
 
				+
			
 
				+		if (time_in_range(jiffies, last, next)) {
			
 
				+			timer_reduce(&cfile->notify_timer, next);
			
 
				+		} else {
			
 
				+			kernfs_notify(cfile->kn);
			
 
				+			cfile->notified_at = jiffies;
			
 
				+		}
			
 
				+	}
			
 
				 	spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
			
 
				 }
			
 
				 
			
@@ -4560,7 +4595,7 @@ static void css_free_rwork_fn(struct work_struct *work)
 
				 			cgroup_put(cgroup_parent(cgrp));
			
 
				 			kernfs_put(cgrp->kn);
			
 
				 			if (cgroup_on_dfl(cgrp))
			
 
				-				cgroup_stat_exit(cgrp);
			
 
				+				cgroup_rstat_exit(cgrp);
			
 
				 			kfree(cgrp);
			
 
				 		} else {
			
 
				 			/*
			
@@ -4587,6 +4622,11 @@ static void css_release_work_fn(struct work_struct *work)
 
				 
			
 
				 	if (ss) {
			
 
				 		/* css release path */
			
 
				+		if (!list_empty(&css->rstat_css_node)) {
			
 
				+			cgroup_rstat_flush(cgrp);
			
 
				+			list_del_rcu(&css->rstat_css_node);
			
 
				+		}
			
 
				+
			
 
				 		cgroup_idr_replace(&ss->css_idr, NULL, css->id);
			
 
				 		if (ss->css_released)
			
 
				 			ss->css_released(css);
			
@@ -4597,7 +4637,7 @@ static void css_release_work_fn(struct work_struct *work)
 
				 		trace_cgroup_release(cgrp);
			
 
				 
			
 
				 		if (cgroup_on_dfl(cgrp))
			
 
				-			cgroup_stat_flush(cgrp);
			
 
				+			cgroup_rstat_flush(cgrp);
			
 
				 
			
 
				 		for (tcgrp = cgroup_parent(cgrp); tcgrp;
			
 
				 		     tcgrp = cgroup_parent(tcgrp))
			
@@ -4648,6 +4688,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
 
				 	css->id = -1;
			
 
				 	INIT_LIST_HEAD(&css->sibling);
			
 
				 	INIT_LIST_HEAD(&css->children);
			
 
				+	INIT_LIST_HEAD(&css->rstat_css_node);
			
 
				 	css->serial_nr = css_serial_nr_next++;
			
 
				 	atomic_set(&css->online_cnt, 0);
			
 
				 
			
@@ -4656,6 +4697,9 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
 
				 		css_get(css->parent);
			
 
				 	}
			
 
				 
			
 
				+	if (cgroup_on_dfl(cgrp) && ss->css_rstat_flush)
			
 
				+		list_add_rcu(&css->rstat_css_node, &cgrp->rstat_css_list);
			
 
				+
			
 
				 	BUG_ON(cgroup_css(cgrp, ss));
			
 
				 }
			
 
				 
			
@@ -4757,6 +4801,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
 
				 err_list_del:
			
 
				 	list_del_rcu(&css->sibling);
			
 
				 err_free_css:
			
 
				+	list_del_rcu(&css->rstat_css_node);
			
 
				 	INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
			
 
				 	queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
			
 
				 	return ERR_PTR(err);
			
@@ -4785,7 +4830,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
 
				 		goto out_free_cgrp;
			
 
				 
			
 
				 	if (cgroup_on_dfl(parent)) {
			
 
				-		ret = cgroup_stat_init(cgrp);
			
 
				+		ret = cgroup_rstat_init(cgrp);
			
 
				 		if (ret)
			
 
				 			goto out_cancel_ref;
			
 
				 	}
			
@@ -4850,7 +4895,7 @@ out_idr_free:
 
				 	cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
			
 
				 out_stat_exit:
			
 
				 	if (cgroup_on_dfl(parent))
			
 
				-		cgroup_stat_exit(cgrp);
			
 
				+		cgroup_rstat_exit(cgrp);
			
 
				 out_cancel_ref:
			
 
				 	percpu_ref_exit(&cgrp->self.refcnt);
			
 
				 out_free_cgrp:
			
@@ -5090,10 +5135,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 
				 	for_each_css(css, ssid, cgrp)
			
 
				 		kill_css(css);
			
 
				 
			
 
				-	/*
			
 
				-	 * Remove @cgrp directory along with the base files.  @cgrp has an
			
 
				-	 * extra ref on its kn.
			
 
				-	 */
			
 
				+	/* clear and remove @cgrp dir, @cgrp has an extra ref on its kn */
			
 
				+	css_clear_dir(&cgrp->self);
			
 
				 	kernfs_remove(cgrp->kn);
			
 
				 
			
 
				 	if (parent && cgroup_is_threaded(cgrp))
			
@@ -5245,7 +5288,7 @@ int __init cgroup_init(void)
 
				 	BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
			
 
				 	BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
			
 
				 
			
 
				-	cgroup_stat_boot();
			
 
				+	cgroup_rstat_boot();
			
 
				 
			
 
				 	/*
			
 
				 	 * The latency of the synchronize_sched() is too high for cgroups,
			
--- a/kernel/cgroup/rdma.c
+++ b/kernel/cgroup/rdma.c
@@ -362,35 +362,32 @@ EXPORT_SYMBOL(rdmacg_unregister_device);
 
				 static int parse_resource(char *c, int *intval)
			
 
				 {
			
 
				 	substring_t argstr;
			
 
				-	const char **table = &rdmacg_resource_names[0];
			
 
				 	char *name, *value = c;
			
 
				 	size_t len;
			
 
				-	int ret, i = 0;
			
 
				+	int ret, i;
			
 
				 
			
 
				 	name = strsep(&value, "=");
			
 
				 	if (!name || !value)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	len = strlen(value);
			
 
				+	i = match_string(rdmacg_resource_names, RDMACG_RESOURCE_MAX, name);
			
 
				+	if (i < 0)
			
 
				+		return i;
			
 
				 
			
 
				-	for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
			
 
				-		if (strcmp(table[i], name))
			
 
				-			continue;
			
 
				+	len = strlen(value);
			
 
				 
			
 
				-		argstr.from = value;
			
 
				-		argstr.to = value + len;
			
 
				+	argstr.from = value;
			
 
				+	argstr.to = value + len;
			
 
				 
			
 
				-		ret = match_int(&argstr, intval);
			
 
				-		if (ret >= 0) {
			
 
				-			if (*intval < 0)
			
 
				-				break;
			
 
				-			return i;
			
 
				-		}
			
 
				-		if (strncmp(value, RDMACG_MAX_STR, len) == 0) {
			
 
				-			*intval = S32_MAX;
			
 
				-			return i;
			
 
				-		}
			
 
				-		break;
			
 
				+	ret = match_int(&argstr, intval);
			
 
				+	if (ret >= 0) {
			
 
				+		if (*intval < 0)
			
 
				+			return -EINVAL;
			
 
				+		return i;
			
 
				+	}
			
 
				+	if (strncmp(value, RDMACG_MAX_STR, len) == 0) {
			
 
				+		*intval = S32_MAX;
			
 
				+		return i;
			
 
				 	}
			
 
				 	return -EINVAL;
			
 
				 }
			
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -0,0 +1,416 @@
 
				+#include "cgroup-internal.h"
			
 
				+
			
 
				+#include <linux/sched/cputime.h>
			
 
				+
			
 
				+static DEFINE_SPINLOCK(cgroup_rstat_lock);
			
 
				+static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock);
			
 
				+
			
 
				+static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu);
			
 
				+
			
 
				+static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu)
			
 
				+{
			
 
				+	return per_cpu_ptr(cgrp->rstat_cpu, cpu);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * cgroup_rstat_updated - keep track of updated rstat_cpu
			
 
				+ * @cgrp: target cgroup
			
 
				+ * @cpu: cpu on which rstat_cpu was updated
			
 
				+ *
			
 
				+ * @cgrp's rstat_cpu on @cpu was updated.  Put it on the parent's matching
			
 
				+ * rstat_cpu->updated_children list.  See the comment on top of
			
 
				+ * cgroup_rstat_cpu definition for details.
			
 
				+ */
			
 
				+void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
			
 
				+{
			
 
				+	raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
			
 
				+	struct cgroup *parent;
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	/* nothing to do for root */
			
 
				+	if (!cgroup_parent(cgrp))
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * Paired with the one in cgroup_rstat_cpu_pop_upated().  Either we
			
 
				+	 * see NULL updated_next or they see our updated stat.
			
 
				+	 */
			
 
				+	smp_mb();
			
 
				+
			
 
				+	/*
			
 
				+	 * Because @parent's updated_children is terminated with @parent
			
 
				+	 * instead of NULL, we can tell whether @cgrp is on the list by
			
 
				+	 * testing the next pointer for NULL.
			
 
				+	 */
			
 
				+	if (cgroup_rstat_cpu(cgrp, cpu)->updated_next)
			
 
				+		return;
			
 
				+
			
 
				+	raw_spin_lock_irqsave(cpu_lock, flags);
			
 
				+
			
 
				+	/* put @cgrp and all ancestors on the corresponding updated lists */
			
 
				+	for (parent = cgroup_parent(cgrp); parent;
			
 
				+	     cgrp = parent, parent = cgroup_parent(cgrp)) {
			
 
				+		struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
			
 
				+		struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu);
			
 
				+
			
 
				+		/*
			
 
				+		 * Both additions and removals are bottom-up.  If a cgroup
			
 
				+		 * is already in the tree, all ancestors are.
			
 
				+		 */
			
 
				+		if (rstatc->updated_next)
			
 
				+			break;
			
 
				+
			
 
				+		rstatc->updated_next = prstatc->updated_children;
			
 
				+		prstatc->updated_children = cgrp;
			
 
				+	}
			
 
				+
			
 
				+	raw_spin_unlock_irqrestore(cpu_lock, flags);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(cgroup_rstat_updated);
			
 
				+
			
 
				+/**
			
 
				+ * cgroup_rstat_cpu_pop_updated - iterate and dismantle rstat_cpu updated tree
			
 
				+ * @pos: current position
			
 
				+ * @root: root of the tree to traversal
			
 
				+ * @cpu: target cpu
			
 
				+ *
			
 
				+ * Walks the udpated rstat_cpu tree on @cpu from @root.  %NULL @pos starts
			
 
				+ * the traversal and %NULL return indicates the end.  During traversal,
			
 
				+ * each returned cgroup is unlinked from the tree.  Must be called with the
			
 
				+ * matching cgroup_rstat_cpu_lock held.
			
 
				+ *
			
 
				+ * The only ordering guarantee is that, for a parent and a child pair
			
 
				+ * covered by a given traversal, if a child is visited, its parent is
			
 
				+ * guaranteed to be visited afterwards.
			
 
				+ */
			
 
				+static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
			
 
				+						   struct cgroup *root, int cpu)
			
 
				+{
			
 
				+	struct cgroup_rstat_cpu *rstatc;
			
 
				+	struct cgroup *parent;
			
 
				+
			
 
				+	if (pos == root)
			
 
				+		return NULL;
			
 
				+
			
 
				+	/*
			
 
				+	 * We're gonna walk down to the first leaf and visit/remove it.  We
			
 
				+	 * can pick whatever unvisited node as the starting point.
			
 
				+	 */
			
 
				+	if (!pos)
			
 
				+		pos = root;
			
 
				+	else
			
 
				+		pos = cgroup_parent(pos);
			
 
				+
			
 
				+	/* walk down to the first leaf */
			
 
				+	while (true) {
			
 
				+		rstatc = cgroup_rstat_cpu(pos, cpu);
			
 
				+		if (rstatc->updated_children == pos)
			
 
				+			break;
			
 
				+		pos = rstatc->updated_children;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Unlink @pos from the tree.  As the updated_children list is
			
 
				+	 * singly linked, we have to walk it to find the removal point.
			
 
				+	 * However, due to the way we traverse, @pos will be the first
			
 
				+	 * child in most cases. The only exception is @root.
			
 
				+	 */
			
 
				+	parent = cgroup_parent(pos);
			
 
				+	if (parent && rstatc->updated_next) {
			
 
				+		struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu);
			
 
				+		struct cgroup_rstat_cpu *nrstatc;
			
 
				+		struct cgroup **nextp;
			
 
				+
			
 
				+		nextp = &prstatc->updated_children;
			
 
				+		while (true) {
			
 
				+			nrstatc = cgroup_rstat_cpu(*nextp, cpu);
			
 
				+			if (*nextp == pos)
			
 
				+				break;
			
 
				+
			
 
				+			WARN_ON_ONCE(*nextp == parent);
			
 
				+			nextp = &nrstatc->updated_next;
			
 
				+		}
			
 
				+
			
 
				+		*nextp = rstatc->updated_next;
			
 
				+		rstatc->updated_next = NULL;
			
 
				+
			
 
				+		/*
			
 
				+		 * Paired with the one in cgroup_rstat_cpu_updated().
			
 
				+		 * Either they see NULL updated_next or we see their
			
 
				+		 * updated stat.
			
 
				+		 */
			
 
				+		smp_mb();
			
 
				+	}
			
 
				+
			
 
				+	return pos;
			
 
				+}
			
 
				+
			
 
				+/* see cgroup_rstat_flush() */
			
 
				+static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
			
 
				+	__releases(&cgroup_rstat_lock) __acquires(&cgroup_rstat_lock)
			
 
				+{
			
 
				+	int cpu;
			
 
				+
			
 
				+	lockdep_assert_held(&cgroup_rstat_lock);
			
 
				+
			
 
				+	for_each_possible_cpu(cpu) {
			
 
				+		raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock,
			
 
				+						       cpu);
			
 
				+		struct cgroup *pos = NULL;
			
 
				+
			
 
				+		raw_spin_lock(cpu_lock);
			
 
				+		while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) {
			
 
				+			struct cgroup_subsys_state *css;
			
 
				+
			
 
				+			cgroup_base_stat_flush(pos, cpu);
			
 
				+
			
 
				+			rcu_read_lock();
			
 
				+			list_for_each_entry_rcu(css, &pos->rstat_css_list,
			
 
				+						rstat_css_node)
			
 
				+				css->ss->css_rstat_flush(css, cpu);
			
 
				+			rcu_read_unlock();
			
 
				+		}
			
 
				+		raw_spin_unlock(cpu_lock);
			
 
				+
			
 
				+		/* if @may_sleep, play nice and yield if necessary */
			
 
				+		if (may_sleep && (need_resched() ||
			
 
				+				  spin_needbreak(&cgroup_rstat_lock))) {
			
 
				+			spin_unlock_irq(&cgroup_rstat_lock);
			
 
				+			if (!cond_resched())
			
 
				+				cpu_relax();
			
 
				+			spin_lock_irq(&cgroup_rstat_lock);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * cgroup_rstat_flush - flush stats in @cgrp's subtree
			
 
				+ * @cgrp: target cgroup
			
 
				+ *
			
 
				+ * Collect all per-cpu stats in @cgrp's subtree into the global counters
			
 
				+ * and propagate them upwards.  After this function returns, all cgroups in
			
 
				+ * the subtree have up-to-date ->stat.
			
 
				+ *
			
 
				+ * This also gets all cgroups in the subtree including @cgrp off the
			
 
				+ * ->updated_children lists.
			
 
				+ *
			
 
				+ * This function may block.
			
 
				+ */
			
 
				+void cgroup_rstat_flush(struct cgroup *cgrp)
			
 
				+{
			
 
				+	might_sleep();
			
 
				+
			
 
				+	spin_lock_irq(&cgroup_rstat_lock);
			
 
				+	cgroup_rstat_flush_locked(cgrp, true);
			
 
				+	spin_unlock_irq(&cgroup_rstat_lock);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * cgroup_rstat_flush_irqsafe - irqsafe version of cgroup_rstat_flush()
			
 
				+ * @cgrp: target cgroup
			
 
				+ *
			
 
				+ * This function can be called from any context.
			
 
				+ */
			
 
				+void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	spin_lock_irqsave(&cgroup_rstat_lock, flags);
			
 
				+	cgroup_rstat_flush_locked(cgrp, false);
			
 
				+	spin_unlock_irqrestore(&cgroup_rstat_lock, flags);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * cgroup_rstat_flush_begin - flush stats in @cgrp's subtree and hold
			
 
				+ * @cgrp: target cgroup
			
 
				+ *
			
 
				+ * Flush stats in @cgrp's subtree and prevent further flushes.  Must be
			
 
				+ * paired with cgroup_rstat_flush_release().
			
 
				+ *
			
 
				+ * This function may block.
			
 
				+ */
			
 
				+void cgroup_rstat_flush_hold(struct cgroup *cgrp)
			
 
				+	__acquires(&cgroup_rstat_lock)
			
 
				+{
			
 
				+	might_sleep();
			
 
				+	spin_lock_irq(&cgroup_rstat_lock);
			
 
				+	cgroup_rstat_flush_locked(cgrp, true);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * cgroup_rstat_flush_release - release cgroup_rstat_flush_hold()
			
 
				+ */
			
 
				+void cgroup_rstat_flush_release(void)
			
 
				+	__releases(&cgroup_rstat_lock)
			
 
				+{
			
 
				+	spin_unlock_irq(&cgroup_rstat_lock);
			
 
				+}
			
 
				+
			
 
				+int cgroup_rstat_init(struct cgroup *cgrp)
			
 
				+{
			
 
				+	int cpu;
			
 
				+
			
 
				+	/* the root cgrp has rstat_cpu preallocated */
			
 
				+	if (!cgrp->rstat_cpu) {
			
 
				+		cgrp->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
			
 
				+		if (!cgrp->rstat_cpu)
			
 
				+			return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	/* ->updated_children list is self terminated */
			
 
				+	for_each_possible_cpu(cpu) {
			
 
				+		struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
			
 
				+
			
 
				+		rstatc->updated_children = cgrp;
			
 
				+		u64_stats_init(&rstatc->bsync);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void cgroup_rstat_exit(struct cgroup *cgrp)
			
 
				+{
			
 
				+	int cpu;
			
 
				+
			
 
				+	cgroup_rstat_flush(cgrp);
			
 
				+
			
 
				+	/* sanity check */
			
 
				+	for_each_possible_cpu(cpu) {
			
 
				+		struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
			
 
				+
			
 
				+		if (WARN_ON_ONCE(rstatc->updated_children != cgrp) ||
			
 
				+		    WARN_ON_ONCE(rstatc->updated_next))
			
 
				+			return;
			
 
				+	}
			
 
				+
			
 
				+	free_percpu(cgrp->rstat_cpu);
			
 
				+	cgrp->rstat_cpu = NULL;
			
 
				+}
			
 
				+
			
 
				+void __init cgroup_rstat_boot(void)
			
 
				+{
			
 
				+	int cpu;
			
 
				+
			
 
				+	for_each_possible_cpu(cpu)
			
 
				+		raw_spin_lock_init(per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu));
			
 
				+
			
 
				+	BUG_ON(cgroup_rstat_init(&cgrp_dfl_root.cgrp));
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Functions for cgroup basic resource statistics implemented on top of
			
 
				+ * rstat.
			
 
				+ */
			
 
				+static void cgroup_base_stat_accumulate(struct cgroup_base_stat *dst_bstat,
			
 
				+					struct cgroup_base_stat *src_bstat)
			
 
				+{
			
 
				+	dst_bstat->cputime.utime += src_bstat->cputime.utime;
			
 
				+	dst_bstat->cputime.stime += src_bstat->cputime.stime;
			
 
				+	dst_bstat->cputime.sum_exec_runtime += src_bstat->cputime.sum_exec_runtime;
			
 
				+}
			
 
				+
			
 
				+static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
			
 
				+{
			
 
				+	struct cgroup *parent = cgroup_parent(cgrp);
			
 
				+	struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
			
 
				+	struct task_cputime *last_cputime = &rstatc->last_bstat.cputime;
			
 
				+	struct task_cputime cputime;
			
 
				+	struct cgroup_base_stat delta;
			
 
				+	unsigned seq;
			
 
				+
			
 
				+	/* fetch the current per-cpu values */
			
 
				+	do {
			
 
				+		seq = __u64_stats_fetch_begin(&rstatc->bsync);
			
 
				+		cputime = rstatc->bstat.cputime;
			
 
				+	} while (__u64_stats_fetch_retry(&rstatc->bsync, seq));
			
 
				+
			
 
				+	/* calculate the delta to propgate */
			
 
				+	delta.cputime.utime = cputime.utime - last_cputime->utime;
			
 
				+	delta.cputime.stime = cputime.stime - last_cputime->stime;
			
 
				+	delta.cputime.sum_exec_runtime = cputime.sum_exec_runtime -
			
 
				+					 last_cputime->sum_exec_runtime;
			
 
				+	*last_cputime = cputime;
			
 
				+
			
 
				+	/* transfer the pending stat into delta */
			
 
				+	cgroup_base_stat_accumulate(&delta, &cgrp->pending_bstat);
			
 
				+	memset(&cgrp->pending_bstat, 0, sizeof(cgrp->pending_bstat));
			
 
				+
			
 
				+	/* propagate delta into the global stat and the parent's pending */
			
 
				+	cgroup_base_stat_accumulate(&cgrp->bstat, &delta);
			
 
				+	if (parent)
			
 
				+		cgroup_base_stat_accumulate(&parent->pending_bstat, &delta);
			
 
				+}
			
 
				+
			
 
				+static struct cgroup_rstat_cpu *
			
 
				+cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp)
			
 
				+{
			
 
				+	struct cgroup_rstat_cpu *rstatc;
			
 
				+
			
 
				+	rstatc = get_cpu_ptr(cgrp->rstat_cpu);
			
 
				+	u64_stats_update_begin(&rstatc->bsync);
			
 
				+	return rstatc;
			
 
				+}
			
 
				+
			
 
				+static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
			
 
				+						 struct cgroup_rstat_cpu *rstatc)
			
 
				+{
			
 
				+	u64_stats_update_end(&rstatc->bsync);
			
 
				+	cgroup_rstat_updated(cgrp, smp_processor_id());
			
 
				+	put_cpu_ptr(rstatc);
			
 
				+}
			
 
				+
			
 
				+void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
			
 
				+{
			
 
				+	struct cgroup_rstat_cpu *rstatc;
			
 
				+
			
 
				+	rstatc = cgroup_base_stat_cputime_account_begin(cgrp);
			
 
				+	rstatc->bstat.cputime.sum_exec_runtime += delta_exec;
			
 
				+	cgroup_base_stat_cputime_account_end(cgrp, rstatc);
			
 
				+}
			
 
				+
			
 
				+void __cgroup_account_cputime_field(struct cgroup *cgrp,
			
 
				+				    enum cpu_usage_stat index, u64 delta_exec)
			
 
				+{
			
 
				+	struct cgroup_rstat_cpu *rstatc;
			
 
				+
			
 
				+	rstatc = cgroup_base_stat_cputime_account_begin(cgrp);
			
 
				+
			
 
				+	switch (index) {
			
 
				+	case CPUTIME_USER:
			
 
				+	case CPUTIME_NICE:
			
 
				+		rstatc->bstat.cputime.utime += delta_exec;
			
 
				+		break;
			
 
				+	case CPUTIME_SYSTEM:
			
 
				+	case CPUTIME_IRQ:
			
 
				+	case CPUTIME_SOFTIRQ:
			
 
				+		rstatc->bstat.cputime.stime += delta_exec;
			
 
				+		break;
			
 
				+	default:
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	cgroup_base_stat_cputime_account_end(cgrp, rstatc);
			
 
				+}
			
 
				+
			
 
				+void cgroup_base_stat_cputime_show(struct seq_file *seq)
			
 
				+{
			
 
				+	struct cgroup *cgrp = seq_css(seq)->cgroup;
			
 
				+	u64 usage, utime, stime;
			
 
				+
			
 
				+	if (!cgroup_parent(cgrp))
			
 
				+		return;
			
 
				+
			
 
				+	cgroup_rstat_flush_hold(cgrp);
			
 
				+	usage = cgrp->bstat.cputime.sum_exec_runtime;
			
 
				+	cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime, &utime, &stime);
			
 
				+	cgroup_rstat_flush_release();
			
 
				+
			
 
				+	do_div(usage, NSEC_PER_USEC);
			
 
				+	do_div(utime, NSEC_PER_USEC);
			
 
				+	do_div(stime, NSEC_PER_USEC);
			
 
				+
			
 
				+	seq_printf(seq, "usage_usec %llu\n"
			
 
				+		   "user_usec %llu\n"
			
 
				+		   "system_usec %llu\n",
			
 
				+		   usage, utime, stime);
			
 
				+}
			
--- a/kernel/cgroup/stat.c
+++ b/kernel/cgroup/stat.c
@@ -1,338 +0,0 @@
 
				-#include "cgroup-internal.h"
			
 
				-
			
 
				-#include <linux/sched/cputime.h>
			
 
				-
			
 
				-static DEFINE_MUTEX(cgroup_stat_mutex);
			
 
				-static DEFINE_PER_CPU(raw_spinlock_t, cgroup_cpu_stat_lock);
			
 
				-
			
 
				-static struct cgroup_cpu_stat *cgroup_cpu_stat(struct cgroup *cgrp, int cpu)
			
 
				-{
			
 
				-	return per_cpu_ptr(cgrp->cpu_stat, cpu);
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * cgroup_cpu_stat_updated - keep track of updated cpu_stat
			
 
				- * @cgrp: target cgroup
			
 
				- * @cpu: cpu on which cpu_stat was updated
			
 
				- *
			
 
				- * @cgrp's cpu_stat on @cpu was updated.  Put it on the parent's matching
			
 
				- * cpu_stat->updated_children list.  See the comment on top of
			
 
				- * cgroup_cpu_stat definition for details.
			
 
				- */
			
 
				-static void cgroup_cpu_stat_updated(struct cgroup *cgrp, int cpu)
			
 
				-{
			
 
				-	raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu);
			
 
				-	struct cgroup *parent;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	/*
			
 
				-	 * Speculative already-on-list test.  This may race leading to
			
 
				-	 * temporary inaccuracies, which is fine.
			
 
				-	 *
			
 
				-	 * Because @parent's updated_children is terminated with @parent
			
 
				-	 * instead of NULL, we can tell whether @cgrp is on the list by
			
 
				-	 * testing the next pointer for NULL.
			
 
				-	 */
			
 
				-	if (cgroup_cpu_stat(cgrp, cpu)->updated_next)
			
 
				-		return;
			
 
				-
			
 
				-	raw_spin_lock_irqsave(cpu_lock, flags);
			
 
				-
			
 
				-	/* put @cgrp and all ancestors on the corresponding updated lists */
			
 
				-	for (parent = cgroup_parent(cgrp); parent;
			
 
				-	     cgrp = parent, parent = cgroup_parent(cgrp)) {
			
 
				-		struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
			
 
				-		struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu);
			
 
				-
			
 
				-		/*
			
 
				-		 * Both additions and removals are bottom-up.  If a cgroup
			
 
				-		 * is already in the tree, all ancestors are.
			
 
				-		 */
			
 
				-		if (cstat->updated_next)
			
 
				-			break;
			
 
				-
			
 
				-		cstat->updated_next = pcstat->updated_children;
			
 
				-		pcstat->updated_children = cgrp;
			
 
				-	}
			
 
				-
			
 
				-	raw_spin_unlock_irqrestore(cpu_lock, flags);
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * cgroup_cpu_stat_pop_updated - iterate and dismantle cpu_stat updated tree
			
 
				- * @pos: current position
			
 
				- * @root: root of the tree to traversal
			
 
				- * @cpu: target cpu
			
 
				- *
			
 
				- * Walks the udpated cpu_stat tree on @cpu from @root.  %NULL @pos starts
			
 
				- * the traversal and %NULL return indicates the end.  During traversal,
			
 
				- * each returned cgroup is unlinked from the tree.  Must be called with the
			
 
				- * matching cgroup_cpu_stat_lock held.
			
 
				- *
			
 
				- * The only ordering guarantee is that, for a parent and a child pair
			
 
				- * covered by a given traversal, if a child is visited, its parent is
			
 
				- * guaranteed to be visited afterwards.
			
 
				- */
			
 
				-static struct cgroup *cgroup_cpu_stat_pop_updated(struct cgroup *pos,
			
 
				-						  struct cgroup *root, int cpu)
			
 
				-{
			
 
				-	struct cgroup_cpu_stat *cstat;
			
 
				-	struct cgroup *parent;
			
 
				-
			
 
				-	if (pos == root)
			
 
				-		return NULL;
			
 
				-
			
 
				-	/*
			
 
				-	 * We're gonna walk down to the first leaf and visit/remove it.  We
			
 
				-	 * can pick whatever unvisited node as the starting point.
			
 
				-	 */
			
 
				-	if (!pos)
			
 
				-		pos = root;
			
 
				-	else
			
 
				-		pos = cgroup_parent(pos);
			
 
				-
			
 
				-	/* walk down to the first leaf */
			
 
				-	while (true) {
			
 
				-		cstat = cgroup_cpu_stat(pos, cpu);
			
 
				-		if (cstat->updated_children == pos)
			
 
				-			break;
			
 
				-		pos = cstat->updated_children;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Unlink @pos from the tree.  As the updated_children list is
			
 
				-	 * singly linked, we have to walk it to find the removal point.
			
 
				-	 * However, due to the way we traverse, @pos will be the first
			
 
				-	 * child in most cases. The only exception is @root.
			
 
				-	 */
			
 
				-	parent = cgroup_parent(pos);
			
 
				-	if (parent && cstat->updated_next) {
			
 
				-		struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu);
			
 
				-		struct cgroup_cpu_stat *ncstat;
			
 
				-		struct cgroup **nextp;
			
 
				-
			
 
				-		nextp = &pcstat->updated_children;
			
 
				-		while (true) {
			
 
				-			ncstat = cgroup_cpu_stat(*nextp, cpu);
			
 
				-			if (*nextp == pos)
			
 
				-				break;
			
 
				-
			
 
				-			WARN_ON_ONCE(*nextp == parent);
			
 
				-			nextp = &ncstat->updated_next;
			
 
				-		}
			
 
				-
			
 
				-		*nextp = cstat->updated_next;
			
 
				-		cstat->updated_next = NULL;
			
 
				-	}
			
 
				-
			
 
				-	return pos;
			
 
				-}
			
 
				-
			
 
				-static void cgroup_stat_accumulate(struct cgroup_stat *dst_stat,
			
 
				-				   struct cgroup_stat *src_stat)
			
 
				-{
			
 
				-	dst_stat->cputime.utime += src_stat->cputime.utime;
			
 
				-	dst_stat->cputime.stime += src_stat->cputime.stime;
			
 
				-	dst_stat->cputime.sum_exec_runtime += src_stat->cputime.sum_exec_runtime;
			
 
				-}
			
 
				-
			
 
				-static void cgroup_cpu_stat_flush_one(struct cgroup *cgrp, int cpu)
			
 
				-{
			
 
				-	struct cgroup *parent = cgroup_parent(cgrp);
			
 
				-	struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
			
 
				-	struct task_cputime *last_cputime = &cstat->last_cputime;
			
 
				-	struct task_cputime cputime;
			
 
				-	struct cgroup_stat delta;
			
 
				-	unsigned seq;
			
 
				-
			
 
				-	lockdep_assert_held(&cgroup_stat_mutex);
			
 
				-
			
 
				-	/* fetch the current per-cpu values */
			
 
				-	do {
			
 
				-		seq = __u64_stats_fetch_begin(&cstat->sync);
			
 
				-		cputime = cstat->cputime;
			
 
				-	} while (__u64_stats_fetch_retry(&cstat->sync, seq));
			
 
				-
			
 
				-	/* accumulate the deltas to propgate */
			
 
				-	delta.cputime.utime = cputime.utime - last_cputime->utime;
			
 
				-	delta.cputime.stime = cputime.stime - last_cputime->stime;
			
 
				-	delta.cputime.sum_exec_runtime = cputime.sum_exec_runtime -
			
 
				-					 last_cputime->sum_exec_runtime;
			
 
				-	*last_cputime = cputime;
			
 
				-
			
 
				-	/* transfer the pending stat into delta */
			
 
				-	cgroup_stat_accumulate(&delta, &cgrp->pending_stat);
			
 
				-	memset(&cgrp->pending_stat, 0, sizeof(cgrp->pending_stat));
			
 
				-
			
 
				-	/* propagate delta into the global stat and the parent's pending */
			
 
				-	cgroup_stat_accumulate(&cgrp->stat, &delta);
			
 
				-	if (parent)
			
 
				-		cgroup_stat_accumulate(&parent->pending_stat, &delta);
			
 
				-}
			
 
				-
			
 
				-/* see cgroup_stat_flush() */
			
 
				-static void cgroup_stat_flush_locked(struct cgroup *cgrp)
			
 
				-{
			
 
				-	int cpu;
			
 
				-
			
 
				-	lockdep_assert_held(&cgroup_stat_mutex);
			
 
				-
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu);
			
 
				-		struct cgroup *pos = NULL;
			
 
				-
			
 
				-		raw_spin_lock_irq(cpu_lock);
			
 
				-		while ((pos = cgroup_cpu_stat_pop_updated(pos, cgrp, cpu)))
			
 
				-			cgroup_cpu_stat_flush_one(pos, cpu);
			
 
				-		raw_spin_unlock_irq(cpu_lock);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * cgroup_stat_flush - flush stats in @cgrp's subtree
			
 
				- * @cgrp: target cgroup
			
 
				- *
			
 
				- * Collect all per-cpu stats in @cgrp's subtree into the global counters
			
 
				- * and propagate them upwards.  After this function returns, all cgroups in
			
 
				- * the subtree have up-to-date ->stat.
			
 
				- *
			
 
				- * This also gets all cgroups in the subtree including @cgrp off the
			
 
				- * ->updated_children lists.
			
 
				- */
			
 
				-void cgroup_stat_flush(struct cgroup *cgrp)
			
 
				-{
			
 
				-	mutex_lock(&cgroup_stat_mutex);
			
 
				-	cgroup_stat_flush_locked(cgrp);
			
 
				-	mutex_unlock(&cgroup_stat_mutex);
			
 
				-}
			
 
				-
			
 
				-static struct cgroup_cpu_stat *cgroup_cpu_stat_account_begin(struct cgroup *cgrp)
			
 
				-{
			
 
				-	struct cgroup_cpu_stat *cstat;
			
 
				-
			
 
				-	cstat = get_cpu_ptr(cgrp->cpu_stat);
			
 
				-	u64_stats_update_begin(&cstat->sync);
			
 
				-	return cstat;
			
 
				-}
			
 
				-
			
 
				-static void cgroup_cpu_stat_account_end(struct cgroup *cgrp,
			
 
				-					struct cgroup_cpu_stat *cstat)
			
 
				-{
			
 
				-	u64_stats_update_end(&cstat->sync);
			
 
				-	cgroup_cpu_stat_updated(cgrp, smp_processor_id());
			
 
				-	put_cpu_ptr(cstat);
			
 
				-}
			
 
				-
			
 
				-void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
			
 
				-{
			
 
				-	struct cgroup_cpu_stat *cstat;
			
 
				-
			
 
				-	cstat = cgroup_cpu_stat_account_begin(cgrp);
			
 
				-	cstat->cputime.sum_exec_runtime += delta_exec;
			
 
				-	cgroup_cpu_stat_account_end(cgrp, cstat);
			
 
				-}
			
 
				-
			
 
				-void __cgroup_account_cputime_field(struct cgroup *cgrp,
			
 
				-				    enum cpu_usage_stat index, u64 delta_exec)
			
 
				-{
			
 
				-	struct cgroup_cpu_stat *cstat;
			
 
				-
			
 
				-	cstat = cgroup_cpu_stat_account_begin(cgrp);
			
 
				-
			
 
				-	switch (index) {
			
 
				-	case CPUTIME_USER:
			
 
				-	case CPUTIME_NICE:
			
 
				-		cstat->cputime.utime += delta_exec;
			
 
				-		break;
			
 
				-	case CPUTIME_SYSTEM:
			
 
				-	case CPUTIME_IRQ:
			
 
				-	case CPUTIME_SOFTIRQ:
			
 
				-		cstat->cputime.stime += delta_exec;
			
 
				-		break;
			
 
				-	default:
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	cgroup_cpu_stat_account_end(cgrp, cstat);
			
 
				-}
			
 
				-
			
 
				-void cgroup_stat_show_cputime(struct seq_file *seq)
			
 
				-{
			
 
				-	struct cgroup *cgrp = seq_css(seq)->cgroup;
			
 
				-	u64 usage, utime, stime;
			
 
				-
			
 
				-	if (!cgroup_parent(cgrp))
			
 
				-		return;
			
 
				-
			
 
				-	mutex_lock(&cgroup_stat_mutex);
			
 
				-
			
 
				-	cgroup_stat_flush_locked(cgrp);
			
 
				-
			
 
				-	usage = cgrp->stat.cputime.sum_exec_runtime;
			
 
				-	cputime_adjust(&cgrp->stat.cputime, &cgrp->stat.prev_cputime,
			
 
				-		       &utime, &stime);
			
 
				-
			
 
				-	mutex_unlock(&cgroup_stat_mutex);
			
 
				-
			
 
				-	do_div(usage, NSEC_PER_USEC);
			
 
				-	do_div(utime, NSEC_PER_USEC);
			
 
				-	do_div(stime, NSEC_PER_USEC);
			
 
				-
			
 
				-	seq_printf(seq, "usage_usec %llu\n"
			
 
				-		   "user_usec %llu\n"
			
 
				-		   "system_usec %llu\n",
			
 
				-		   usage, utime, stime);
			
 
				-}
			
 
				-
			
 
				-int cgroup_stat_init(struct cgroup *cgrp)
			
 
				-{
			
 
				-	int cpu;
			
 
				-
			
 
				-	/* the root cgrp has cpu_stat preallocated */
			
 
				-	if (!cgrp->cpu_stat) {
			
 
				-		cgrp->cpu_stat = alloc_percpu(struct cgroup_cpu_stat);
			
 
				-		if (!cgrp->cpu_stat)
			
 
				-			return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	/* ->updated_children list is self terminated */
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
			
 
				-
			
 
				-		cstat->updated_children = cgrp;
			
 
				-		u64_stats_init(&cstat->sync);
			
 
				-	}
			
 
				-
			
 
				-	prev_cputime_init(&cgrp->stat.prev_cputime);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void cgroup_stat_exit(struct cgroup *cgrp)
			
 
				-{
			
 
				-	int cpu;
			
 
				-
			
 
				-	cgroup_stat_flush(cgrp);
			
 
				-
			
 
				-	/* sanity check */
			
 
				-	for_each_possible_cpu(cpu) {
			
 
				-		struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
			
 
				-
			
 
				-		if (WARN_ON_ONCE(cstat->updated_children != cgrp) ||
			
 
				-		    WARN_ON_ONCE(cstat->updated_next))
			
 
				-			return;
			
 
				-	}
			
 
				-
			
 
				-	free_percpu(cgrp->cpu_stat);
			
 
				-	cgrp->cpu_stat = NULL;
			
 
				-}
			
 
				-
			
 
				-void __init cgroup_stat_boot(void)
			
 
				-{
			
 
				-	int cpu;
			
 
				-
			
 
				-	for_each_possible_cpu(cpu)
			
 
				-		raw_spin_lock_init(per_cpu_ptr(&cgroup_cpu_stat_lock, cpu));
			
 
				-
			
 
				-	BUG_ON(cgroup_stat_init(&cgrp_dfl_root.cgrp));
			
 
				-}