8 سال پیش · 608c1d3c17
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -18,7 +18,9 @@ v1 is available under Documentation/cgroup-v1/.
 
				      1-2. What is cgroup?
			
 
				    2. Basic Operations
			
 
				      2-1. Mounting
			
 
				-     2-2. Organizing Processes
			
 
				+     2-2. Organizing Processes and Threads
			
 
				+       2-2-1. Processes
			
 
				+       2-2-2. Threads
			
 
				      2-3. [Un]populated Notification
			
 
				      2-4. Controlling Controllers
			
 
				        2-4-1. Enabling and Disabling
			
@@ -167,8 +169,11 @@ cgroup v2 currently supports the following mount options.
 
				 	Delegation section for details.
			
 
				 
			
 
				 
			
 
				-Organizing Processes
			
 
				---------------------
			
 
				+Organizing Processes and Threads
			
 
				+--------------------------------
			
 
				+
			
 
				+Processes
			
 
				+~~~~~~~~~
			
 
				 
			
 
				 Initially, only the root cgroup exists to which all processes belong.
			
 
				 A child cgroup can be created by creating a sub-directory::
			
@@ -219,6 +224,105 @@ is removed subsequently, " (deleted)" is appended to the path::
 
				   0::/test-cgroup/test-cgroup-nested (deleted)
			
 
				 
			
 
				 
			
 
				+Threads
			
 
				+~~~~~~~
			
 
				+
			
 
				+cgroup v2 supports thread granularity for a subset of controllers to
			
 
				+support use cases requiring hierarchical resource distribution across
			
 
				+the threads of a group of processes.  By default, all threads of a
			
 
				+process belong to the same cgroup, which also serves as the resource
			
 
				+domain to host resource consumptions which are not specific to a
			
 
				+process or thread.  The thread mode allows threads to be spread across
			
 
				+a subtree while still maintaining the common resource domain for them.
			
 
				+
			
 
				+Controllers which support thread mode are called threaded controllers.
			
 
				+The ones which don't are called domain controllers.
			
 
				+
			
 
				+Marking a cgroup threaded makes it join the resource domain of its
			
 
				+parent as a threaded cgroup.  The parent may be another threaded
			
 
				+cgroup whose resource domain is further up in the hierarchy.  The root
			
 
				+of a threaded subtree, that is, the nearest ancestor which is not
			
 
				+threaded, is called threaded domain or thread root interchangeably and
			
 
				+serves as the resource domain for the entire subtree.
			
 
				+
			
 
				+Inside a threaded subtree, threads of a process can be put in
			
 
				+different cgroups and are not subject to the no internal process
			
 
				+constraint - threaded controllers can be enabled on non-leaf cgroups
			
 
				+whether they have threads in them or not.
			
 
				+
			
 
				+As the threaded domain cgroup hosts all the domain resource
			
 
				+consumptions of the subtree, it is considered to have internal
			
 
				+resource consumptions whether there are processes in it or not and
			
 
				+can't have populated child cgroups which aren't threaded.  Because the
			
 
				+root cgroup is not subject to no internal process constraint, it can
			
 
				+serve both as a threaded domain and a parent to domain cgroups.
			
 
				+
			
 
				+The current operation mode or type of the cgroup is shown in the
			
 
				+"cgroup.type" file which indicates whether the cgroup is a normal
			
 
				+domain, a domain which is serving as the domain of a threaded subtree,
			
 
				+or a threaded cgroup.
			
 
				+
			
 
				+On creation, a cgroup is always a domain cgroup and can be made
			
 
				+threaded by writing "threaded" to the "cgroup.type" file.  The
			
 
				+operation is single direction::
			
 
				+
			
 
				+  # echo threaded > cgroup.type
			
 
				+
			
 
				+Once threaded, the cgroup can't be made a domain again.  To enable the
			
 
				+thread mode, the following conditions must be met.
			
 
				+
			
 
				+- As the cgroup will join the parent's resource domain.  The parent
			
 
				+  must either be a valid (threaded) domain or a threaded cgroup.
			
 
				+
			
 
				+- When the parent is an unthreaded domain, it must not have any domain
			
 
				+  controllers enabled or populated domain children.  The root is
			
 
				+  exempt from this requirement.
			
 
				+
			
 
				+Topology-wise, a cgroup can be in an invalid state.  Please consider
			
 
				+the following toplogy::
			
 
				+
			
 
				+  A (threaded domain) - B (threaded) - C (domain, just created)
			
 
				+
			
 
				+C is created as a domain but isn't connected to a parent which can
			
 
				+host child domains.  C can't be used until it is turned into a
			
 
				+threaded cgroup.  "cgroup.type" file will report "domain (invalid)" in
			
 
				+these cases.  Operations which fail due to invalid topology use
			
 
				+EOPNOTSUPP as the errno.
			
 
				+
			
 
				+A domain cgroup is turned into a threaded domain when one of its child
			
 
				+cgroup becomes threaded or threaded controllers are enabled in the
			
 
				+"cgroup.subtree_control" file while there are processes in the cgroup.
			
 
				+A threaded domain reverts to a normal domain when the conditions
			
 
				+clear.
			
 
				+
			
 
				+When read, "cgroup.threads" contains the list of the thread IDs of all
			
 
				+threads in the cgroup.  Except that the operations are per-thread
			
 
				+instead of per-process, "cgroup.threads" has the same format and
			
 
				+behaves the same way as "cgroup.procs".  While "cgroup.threads" can be
			
 
				+written to in any cgroup, as it can only move threads inside the same
			
 
				+threaded domain, its operations are confined inside each threaded
			
 
				+subtree.
			
 
				+
			
 
				+The threaded domain cgroup serves as the resource domain for the whole
			
 
				+subtree, and, while the threads can be scattered across the subtree,
			
 
				+all the processes are considered to be in the threaded domain cgroup.
			
 
				+"cgroup.procs" in a threaded domain cgroup contains the PIDs of all
			
 
				+processes in the subtree and is not readable in the subtree proper.
			
 
				+However, "cgroup.procs" can be written to from anywhere in the subtree
			
 
				+to migrate all threads of the matching process to the cgroup.
			
 
				+
			
 
				+Only threaded controllers can be enabled in a threaded subtree.  When
			
 
				+a threaded controller is enabled inside a threaded subtree, it only
			
 
				+accounts for and controls resource consumptions associated with the
			
 
				+threads in the cgroup and its descendants.  All consumptions which
			
 
				+aren't tied to a specific thread belong to the threaded domain cgroup.
			
 
				+
			
 
				+Because a threaded subtree is exempt from no internal process
			
 
				+constraint, a threaded controller must be able to handle competition
			
 
				+between threads in a non-leaf cgroup and its child cgroups.  Each
			
 
				+threaded controller defines how such competitions are handled.
			
 
				+
			
 
				+
			
 
				 [Un]populated Notification
			
 
				 --------------------------
			
 
				 
			
@@ -302,15 +406,15 @@ disabled if one or more children have it enabled.
 
				 No Internal Process Constraint
			
 
				 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				 
			
 
				-Non-root cgroups can only distribute resources to their children when
			
 
				-they don't have any processes of their own.  In other words, only
			
 
				-cgroups which don't contain any processes can have controllers enabled
			
 
				-in their "cgroup.subtree_control" files.
			
 
				+Non-root cgroups can distribute domain resources to their children
			
 
				+only when they don't have any processes of their own.  In other words,
			
 
				+only domain cgroups which don't contain any processes can have domain
			
 
				+controllers enabled in their "cgroup.subtree_control" files.
			
 
				 
			
 
				-This guarantees that, when a controller is looking at the part of the
			
 
				-hierarchy which has it enabled, processes are always only on the
			
 
				-leaves.  This rules out situations where child cgroups compete against
			
 
				-internal processes of the parent.
			
 
				+This guarantees that, when a domain controller is looking at the part
			
 
				+of the hierarchy which has it enabled, processes are always only on
			
 
				+the leaves.  This rules out situations where child cgroups compete
			
 
				+against internal processes of the parent.
			
 
				 
			
 
				 The root cgroup is exempt from this restriction.  Root contains
			
 
				 processes and anonymous resource consumption which can't be associated
			
@@ -334,10 +438,10 @@ Model of Delegation
 
				 ~~~~~~~~~~~~~~~~~~~
			
 
				 
			
 
				 A cgroup can be delegated in two ways.  First, to a less privileged
			
 
				-user by granting write access of the directory and its "cgroup.procs"
			
 
				-and "cgroup.subtree_control" files to the user.  Second, if the
			
 
				-"nsdelegate" mount option is set, automatically to a cgroup namespace
			
 
				-on namespace creation.
			
 
				+user by granting write access of the directory and its "cgroup.procs",
			
 
				+"cgroup.threads" and "cgroup.subtree_control" files to the user.
			
 
				+Second, if the "nsdelegate" mount option is set, automatically to a
			
 
				+cgroup namespace on namespace creation.
			
 
				 
			
 
				 Because the resource control interface files in a given directory
			
 
				 control the distribution of the parent's resources, the delegatee
			
@@ -644,6 +748,29 @@ Core Interface Files
 
				 
			
 
				 All cgroup core files are prefixed with "cgroup."
			
 
				 
			
 
				+  cgroup.type
			
 
				+
			
 
				+	A read-write single value file which exists on non-root
			
 
				+	cgroups.
			
 
				+
			
 
				+	When read, it indicates the current type of the cgroup, which
			
 
				+	can be one of the following values.
			
 
				+
			
 
				+	- "domain" : A normal valid domain cgroup.
			
 
				+
			
 
				+	- "domain threaded" : A threaded domain cgroup which is
			
 
				+          serving as the root of a threaded subtree.
			
 
				+
			
 
				+	- "domain invalid" : A cgroup which is in an invalid state.
			
 
				+	  It can't be populated or have controllers enabled.  It may
			
 
				+	  be allowed to become a threaded cgroup.
			
 
				+
			
 
				+	- "threaded" : A threaded cgroup which is a member of a
			
 
				+          threaded subtree.
			
 
				+
			
 
				+	A cgroup can be turned into a threaded cgroup by writing
			
 
				+	"threaded" to this file.
			
 
				+
			
 
				   cgroup.procs
			
 
				 	A read-write new-line separated values file which exists on
			
 
				 	all cgroups.
			
@@ -658,9 +785,6 @@ All cgroup core files are prefixed with "cgroup."
 
				 	the PID to the cgroup.  The writer should match all of the
			
 
				 	following conditions.
			
 
				 
			
 
				-	- Its euid is either root or must match either uid or suid of
			
 
				-          the target process.
			
 
				-
			
 
				 	- It must have write access to the "cgroup.procs" file.
			
 
				 
			
 
				 	- It must have write access to the "cgroup.procs" file of the
			
@@ -669,6 +793,35 @@ All cgroup core files are prefixed with "cgroup."
 
				 	When delegating a sub-hierarchy, write access to this file
			
 
				 	should be granted along with the containing directory.
			
 
				 
			
 
				+	In a threaded cgroup, reading this file fails with EOPNOTSUPP
			
 
				+	as all the processes belong to the thread root.  Writing is
			
 
				+	supported and moves every thread of the process to the cgroup.
			
 
				+
			
 
				+  cgroup.threads
			
 
				+	A read-write new-line separated values file which exists on
			
 
				+	all cgroups.
			
 
				+
			
 
				+	When read, it lists the TIDs of all threads which belong to
			
 
				+	the cgroup one-per-line.  The TIDs are not ordered and the
			
 
				+	same TID may show up more than once if the thread got moved to
			
 
				+	another cgroup and then back or the TID got recycled while
			
 
				+	reading.
			
 
				+
			
 
				+	A TID can be written to migrate the thread associated with the
			
 
				+	TID to the cgroup.  The writer should match all of the
			
 
				+	following conditions.
			
 
				+
			
 
				+	- It must have write access to the "cgroup.threads" file.
			
 
				+
			
 
				+	- The cgroup that the thread is currently in must be in the
			
 
				+          same resource domain as the destination cgroup.
			
 
				+
			
 
				+	- It must have write access to the "cgroup.procs" file of the
			
 
				+	  common ancestor of the source and destination cgroups.
			
 
				+
			
 
				+	When delegating a sub-hierarchy, write access to this file
			
 
				+	should be granted along with the containing directory.
			
 
				+
			
 
				   cgroup.controllers
			
 
				 	A read-only space separated values file which exists on all
			
 
				 	cgroups.
			
@@ -701,6 +854,38 @@ All cgroup core files are prefixed with "cgroup."
 
				 		1 if the cgroup or its descendants contains any live
			
 
				 		processes; otherwise, 0.
			
 
				 
			
 
				+  cgroup.max.descendants
			
 
				+	A read-write single value files.  The default is "max".
			
 
				+
			
 
				+	Maximum allowed number of descent cgroups.
			
 
				+	If the actual number of descendants is equal or larger,
			
 
				+	an attempt to create a new cgroup in the hierarchy will fail.
			
 
				+
			
 
				+  cgroup.max.depth
			
 
				+	A read-write single value files.  The default is "max".
			
 
				+
			
 
				+	Maximum allowed descent depth below the current cgroup.
			
 
				+	If the actual descent depth is equal or larger,
			
 
				+	an attempt to create a new child cgroup will fail.
			
 
				+
			
 
				+  cgroup.stat
			
 
				+	A read-only flat-keyed file with the following entries:
			
 
				+
			
 
				+	  nr_descendants
			
 
				+		Total number of visible descendant cgroups.
			
 
				+
			
 
				+	  nr_dying_descendants
			
 
				+		Total number of dying descendant cgroups. A cgroup becomes
			
 
				+		dying after being deleted by a user. The cgroup will remain
			
 
				+		in dying state for some time undefined time (which can depend
			
 
				+		on system load) before being completely destroyed.
			
 
				+
			
 
				+		A process can't enter a dying cgroup under any circumstances,
			
 
				+		a dying cgroup can't revive.
			
 
				+
			
 
				+		A dying cgroup can consume system resources not exceeding
			
 
				+		limits, which were active at the moment of cgroup deletion.
			
 
				+
			
 
				 
			
 
				 Controllers
			
 
				 ===========
			
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -74,6 +74,11 @@ enum {
 
				 	 * aren't writeable from inside the namespace.
			
 
				 	 */
			
 
				 	CGRP_ROOT_NS_DELEGATE	= (1 << 3),
			
 
				+
			
 
				+	/*
			
 
				+	 * Enable cpuset controller in v1 cgroup to use v2 behavior.
			
 
				+	 */
			
 
				+	CGRP_ROOT_CPUSET_V2_MODE = (1 << 4),
			
 
				 };
			
 
				 
			
 
				 /* cftype->flags */
			
@@ -172,6 +177,14 @@ struct css_set {
 
				 	/* reference count */
			
 
				 	refcount_t refcount;
			
 
				 
			
 
				+	/*
			
 
				+	 * For a domain cgroup, the following points to self.  If threaded,
			
 
				+	 * to the matching cset of the nearest domain ancestor.  The
			
 
				+	 * dom_cset provides access to the domain cgroup and its csses to
			
 
				+	 * which domain level resource consumptions should be charged.
			
 
				+	 */
			
 
				+	struct css_set *dom_cset;
			
 
				+
			
 
				 	/* the default cgroup associated with this css_set */
			
 
				 	struct cgroup *dfl_cgrp;
			
 
				 
			
@@ -200,6 +213,10 @@ struct css_set {
 
				 	 */
			
 
				 	struct list_head e_cset_node[CGROUP_SUBSYS_COUNT];
			
 
				 
			
 
				+	/* all threaded csets whose ->dom_cset points to this cset */
			
 
				+	struct list_head threaded_csets;
			
 
				+	struct list_head threaded_csets_node;
			
 
				+
			
 
				 	/*
			
 
				 	 * List running through all cgroup groups in the same hash
			
 
				 	 * slot. Protected by css_set_lock
			
@@ -261,13 +278,35 @@ struct cgroup {
 
				 	 */
			
 
				 	int level;
			
 
				 
			
 
				+	/* Maximum allowed descent tree depth */
			
 
				+	int max_depth;
			
 
				+
			
 
				+	/*
			
 
				+	 * Keep track of total numbers of visible and dying descent cgroups.
			
 
				+	 * Dying cgroups are cgroups which were deleted by a user,
			
 
				+	 * but are still existing because someone else is holding a reference.
			
 
				+	 * max_descendants is a maximum allowed number of descent cgroups.
			
 
				+	 */
			
 
				+	int nr_descendants;
			
 
				+	int nr_dying_descendants;
			
 
				+	int max_descendants;
			
 
				+
			
 
				 	/*
			
 
				 	 * Each non-empty css_set associated with this cgroup contributes
			
 
				-	 * one to populated_cnt.  All children with non-zero popuplated_cnt
			
 
				-	 * of their own contribute one.  The count is zero iff there's no
			
 
				-	 * task in this cgroup or its subtree.
			
 
				+	 * one to nr_populated_csets.  The counter is zero iff this cgroup
			
 
				+	 * doesn't have any tasks.
			
 
				+	 *
			
 
				+	 * All children which have non-zero nr_populated_csets and/or
			
 
				+	 * nr_populated_children of their own contribute one to either
			
 
				+	 * nr_populated_domain_children or nr_populated_threaded_children
			
 
				+	 * depending on their type.  Each counter is zero iff all cgroups
			
 
				+	 * of the type in the subtree proper don't have any tasks.
			
 
				 	 */
			
 
				-	int populated_cnt;
			
 
				+	int nr_populated_csets;
			
 
				+	int nr_populated_domain_children;
			
 
				+	int nr_populated_threaded_children;
			
 
				+
			
 
				+	int nr_threaded_children;	/* # of live threaded child cgroups */
			
 
				 
			
 
				 	struct kernfs_node *kn;		/* cgroup kernfs entry */
			
 
				 	struct cgroup_file procs_file;	/* handle for "cgroup.procs" */
			
@@ -305,6 +344,15 @@ struct cgroup {
 
				 	 */
			
 
				 	struct list_head e_csets[CGROUP_SUBSYS_COUNT];
			
 
				 
			
 
				+	/*
			
 
				+	 * If !threaded, self.  If threaded, it points to the nearest
			
 
				+	 * domain ancestor.  Inside a threaded subtree, cgroups are exempt
			
 
				+	 * from process granularity and no-internal-task constraint.
			
 
				+	 * Domain level resource consumptions which aren't tied to a
			
 
				+	 * specific task are charged to the dom_cgrp.
			
 
				+	 */
			
 
				+	struct cgroup *dom_cgrp;
			
 
				+
			
 
				 	/*
			
 
				 	 * list of pidlists, up to two for each namespace (one for procs, one
			
 
				 	 * for tasks); created on demand.
			
@@ -491,6 +539,18 @@ struct cgroup_subsys {
 
				 	 */
			
 
				 	bool implicit_on_dfl:1;
			
 
				 
			
 
				+	/*
			
 
				+	 * If %true, the controller, supports threaded mode on the default
			
 
				+	 * hierarchy.  In a threaded subtree, both process granularity and
			
 
				+	 * no-internal-process constraint are ignored and a threaded
			
 
				+	 * controllers should be able to handle that.
			
 
				+	 *
			
 
				+	 * Note that as an implicit controller is automatically enabled on
			
 
				+	 * all cgroups on the default hierarchy, it should also be
			
 
				+	 * threaded.  implicit && !threaded is not supported.
			
 
				+	 */
			
 
				+	bool threaded:1;
			
 
				+
			
 
				 	/*
			
 
				 	 * If %false, this subsystem is properly hierarchical -
			
 
				 	 * configuration, resource accounting and restriction on a parent
			
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -36,18 +36,28 @@
 
				 #define CGROUP_WEIGHT_DFL		100
			
 
				 #define CGROUP_WEIGHT_MAX		10000
			
 
				 
			
 
				+/* walk only threadgroup leaders */
			
 
				+#define CSS_TASK_ITER_PROCS		(1U << 0)
			
 
				+/* walk all threaded css_sets in the domain */
			
 
				+#define CSS_TASK_ITER_THREADED		(1U << 1)
			
 
				+
			
 
				 /* a css_task_iter should be treated as an opaque object */
			
 
				 struct css_task_iter {
			
 
				 	struct cgroup_subsys		*ss;
			
 
				+	unsigned int			flags;
			
 
				 
			
 
				 	struct list_head		*cset_pos;
			
 
				 	struct list_head		*cset_head;
			
 
				 
			
 
				+	struct list_head		*tcset_pos;
			
 
				+	struct list_head		*tcset_head;
			
 
				+
			
 
				 	struct list_head		*task_pos;
			
 
				 	struct list_head		*tasks_head;
			
 
				 	struct list_head		*mg_tasks_head;
			
 
				 
			
 
				 	struct css_set			*cur_cset;
			
 
				+	struct css_set			*cur_dcset;
			
 
				 	struct task_struct		*cur_task;
			
 
				 	struct list_head		iters_node;	/* css_set->task_iters */
			
 
				 };
			
@@ -129,7 +139,7 @@ struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
 
				 struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
			
 
				 					struct cgroup_subsys_state **dst_cssp);
			
 
				 
			
 
				-void css_task_iter_start(struct cgroup_subsys_state *css,
			
 
				+void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
			
 
				 			 struct css_task_iter *it);
			
 
				 struct task_struct *css_task_iter_next(struct css_task_iter *it);
			
 
				 void css_task_iter_end(struct css_task_iter *it);
			
@@ -388,6 +398,16 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
 
				 		percpu_ref_put_many(&css->refcnt, n);
			
 
				 }
			
 
				 
			
 
				+static inline void cgroup_get(struct cgroup *cgrp)
			
 
				+{
			
 
				+	css_get(&cgrp->self);
			
 
				+}
			
 
				+
			
 
				+static inline bool cgroup_tryget(struct cgroup *cgrp)
			
 
				+{
			
 
				+	return css_tryget(&cgrp->self);
			
 
				+}
			
 
				+
			
 
				 static inline void cgroup_put(struct cgroup *cgrp)
			
 
				 {
			
 
				 	css_put(&cgrp->self);
			
@@ -500,6 +520,20 @@ static inline struct cgroup *task_cgroup(struct task_struct *task,
 
				 	return task_css(task, subsys_id)->cgroup;
			
 
				 }
			
 
				 
			
 
				+static inline struct cgroup *task_dfl_cgroup(struct task_struct *task)
			
 
				+{
			
 
				+	return task_css_set(task)->dfl_cgrp;
			
 
				+}
			
 
				+
			
 
				+static inline struct cgroup *cgroup_parent(struct cgroup *cgrp)
			
 
				+{
			
 
				+	struct cgroup_subsys_state *parent_css = cgrp->self.parent;
			
 
				+
			
 
				+	if (parent_css)
			
 
				+		return container_of(parent_css, struct cgroup, self);
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * cgroup_is_descendant - test ancestry
			
 
				  * @cgrp: the cgroup to be tested
			
@@ -537,7 +571,8 @@ static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
 
				 /* no synchronization, the result can only be used as a hint */
			
 
				 static inline bool cgroup_is_populated(struct cgroup *cgrp)
			
 
				 {
			
 
				-	return cgrp->populated_cnt;
			
 
				+	return cgrp->nr_populated_csets + cgrp->nr_populated_domain_children +
			
 
				+		cgrp->nr_populated_threaded_children;
			
 
				 }
			
 
				 
			
 
				 /* returns ino associated with a cgroup */
			
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -156,6 +156,8 @@ static inline void get_css_set(struct css_set *cset)
 
				 
			
 
				 bool cgroup_ssid_enabled(int ssid);
			
 
				 bool cgroup_on_dfl(const struct cgroup *cgrp);
			
 
				+bool cgroup_is_thread_root(struct cgroup *cgrp);
			
 
				+bool cgroup_is_threaded(struct cgroup *cgrp);
			
 
				 
			
 
				 struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root);
			
 
				 struct cgroup *task_cgroup_from_root(struct task_struct *task,
			
@@ -173,7 +175,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
 
				 			       struct cgroup_root *root, unsigned long magic,
			
 
				 			       struct cgroup_namespace *ns);
			
 
				 
			
 
				-bool cgroup_may_migrate_to(struct cgroup *dst_cgrp);
			
 
				+int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp);
			
 
				 void cgroup_migrate_finish(struct cgroup_mgctx *mgctx);
			
 
				 void cgroup_migrate_add_src(struct css_set *src_cset, struct cgroup *dst_cgrp,
			
 
				 			    struct cgroup_mgctx *mgctx);
			
@@ -183,10 +185,10 @@ int cgroup_migrate(struct task_struct *leader, bool threadgroup,
 
				 
			
 
				 int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
			
 
				 		       bool threadgroup);
			
 
				-ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
			
 
				-			     size_t nbytes, loff_t off, bool threadgroup);
			
 
				-ssize_t cgroup_procs_write(struct kernfs_open_file *of, char *buf, size_t nbytes,
			
 
				-			   loff_t off);
			
 
				+struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
			
 
				+	__acquires(&cgroup_threadgroup_rwsem);
			
 
				+void cgroup_procs_write_finish(struct task_struct *task)
			
 
				+	__releases(&cgroup_threadgroup_rwsem);
			
 
				 
			
 
				 void cgroup_lock_and_drain_offline(struct cgroup *cgrp);
			
 
				 
			
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -99,8 +99,9 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
 
				 	if (cgroup_on_dfl(to))
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	if (!cgroup_may_migrate_to(to))
			
 
				-		return -EBUSY;
			
 
				+	ret = cgroup_migrate_vet_dst(to);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				 
			
 
				 	mutex_lock(&cgroup_mutex);
			
 
				 
			
@@ -121,7 +122,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
 
				 	 * ->can_attach() fails.
			
 
				 	 */
			
 
				 	do {
			
 
				-		css_task_iter_start(&from->self, &it);
			
 
				+		css_task_iter_start(&from->self, 0, &it);
			
 
				 		task = css_task_iter_next(&it);
			
 
				 		if (task)
			
 
				 			get_task_struct(task);
			
@@ -373,7 +374,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
 
				 	if (!array)
			
 
				 		return -ENOMEM;
			
 
				 	/* now, populate the array */
			
 
				-	css_task_iter_start(&cgrp->self, &it);
			
 
				+	css_task_iter_start(&cgrp->self, 0, &it);
			
 
				 	while ((tsk = css_task_iter_next(&it))) {
			
 
				 		if (unlikely(n == length))
			
 
				 			break;
			
@@ -510,10 +511,58 @@ static int cgroup_pidlist_show(struct seq_file *s, void *v)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static ssize_t cgroup_tasks_write(struct kernfs_open_file *of,
			
 
				-				  char *buf, size_t nbytes, loff_t off)
			
 
				+static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
			
 
				+				     char *buf, size_t nbytes, loff_t off,
			
 
				+				     bool threadgroup)
			
 
				 {
			
 
				-	return __cgroup_procs_write(of, buf, nbytes, off, false);
			
 
				+	struct cgroup *cgrp;
			
 
				+	struct task_struct *task;
			
 
				+	const struct cred *cred, *tcred;
			
 
				+	ssize_t ret;
			
 
				+
			
 
				+	cgrp = cgroup_kn_lock_live(of->kn, false);
			
 
				+	if (!cgrp)
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	task = cgroup_procs_write_start(buf, threadgroup);
			
 
				+	ret = PTR_ERR_OR_ZERO(task);
			
 
				+	if (ret)
			
 
				+		goto out_unlock;
			
 
				+
			
 
				+	/*
			
 
				+	 * Even if we're attaching all tasks in the thread group, we only
			
 
				+	 * need to check permissions on one of them.
			
 
				+	 */
			
 
				+	cred = current_cred();
			
 
				+	tcred = get_task_cred(task);
			
 
				+	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
			
 
				+	    !uid_eq(cred->euid, tcred->uid) &&
			
 
				+	    !uid_eq(cred->euid, tcred->suid))
			
 
				+		ret = -EACCES;
			
 
				+	put_cred(tcred);
			
 
				+	if (ret)
			
 
				+		goto out_finish;
			
 
				+
			
 
				+	ret = cgroup_attach_task(cgrp, task, threadgroup);
			
 
				+
			
 
				+out_finish:
			
 
				+	cgroup_procs_write_finish(task);
			
 
				+out_unlock:
			
 
				+	cgroup_kn_unlock(of->kn);
			
 
				+
			
 
				+	return ret ?: nbytes;
			
 
				+}
			
 
				+
			
 
				+static ssize_t cgroup1_procs_write(struct kernfs_open_file *of,
			
 
				+				   char *buf, size_t nbytes, loff_t off)
			
 
				+{
			
 
				+	return __cgroup1_procs_write(of, buf, nbytes, off, true);
			
 
				+}
			
 
				+
			
 
				+static ssize_t cgroup1_tasks_write(struct kernfs_open_file *of,
			
 
				+				   char *buf, size_t nbytes, loff_t off)
			
 
				+{
			
 
				+	return __cgroup1_procs_write(of, buf, nbytes, off, false);
			
 
				 }
			
 
				 
			
 
				 static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
			
@@ -592,7 +641,7 @@ struct cftype cgroup1_base_files[] = {
 
				 		.seq_stop = cgroup_pidlist_stop,
			
 
				 		.seq_show = cgroup_pidlist_show,
			
 
				 		.private = CGROUP_FILE_PROCS,
			
 
				-		.write = cgroup_procs_write,
			
 
				+		.write = cgroup1_procs_write,
			
 
				 	},
			
 
				 	{
			
 
				 		.name = "cgroup.clone_children",
			
@@ -611,7 +660,7 @@ struct cftype cgroup1_base_files[] = {
 
				 		.seq_stop = cgroup_pidlist_stop,
			
 
				 		.seq_show = cgroup_pidlist_show,
			
 
				 		.private = CGROUP_FILE_TASKS,
			
 
				-		.write = cgroup_tasks_write,
			
 
				+		.write = cgroup1_tasks_write,
			
 
				 	},
			
 
				 	{
			
 
				 		.name = "notify_on_release",
			
@@ -701,7 +750,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
 
				 	}
			
 
				 	rcu_read_unlock();
			
 
				 
			
 
				-	css_task_iter_start(&cgrp->self, &it);
			
 
				+	css_task_iter_start(&cgrp->self, 0, &it);
			
 
				 	while ((tsk = css_task_iter_next(&it))) {
			
 
				 		switch (tsk->state) {
			
 
				 		case TASK_RUNNING:
			
@@ -846,6 +895,8 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo
 
				 		seq_puts(seq, ",noprefix");
			
 
				 	if (root->flags & CGRP_ROOT_XATTR)
			
 
				 		seq_puts(seq, ",xattr");
			
 
				+	if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
			
 
				+		seq_puts(seq, ",cpuset_v2_mode");
			
 
				 
			
 
				 	spin_lock(&release_agent_path_lock);
			
 
				 	if (strlen(root->release_agent_path))
			
@@ -900,6 +951,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 
				 			opts->cpuset_clone_children = true;
			
 
				 			continue;
			
 
				 		}
			
 
				+		if (!strcmp(token, "cpuset_v2_mode")) {
			
 
				+			opts->flags |= CGRP_ROOT_CPUSET_V2_MODE;
			
 
				+			continue;
			
 
				+		}
			
 
				 		if (!strcmp(token, "xattr")) {
			
 
				 			opts->flags |= CGRP_ROOT_XATTR;
			
 
				 			continue;
			
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -162,6 +162,9 @@ static u16 cgrp_dfl_inhibit_ss_mask;
 
				 /* some controllers are implicitly enabled on the default hierarchy */
			
 
				 static u16 cgrp_dfl_implicit_ss_mask;
			
 
				 
			
 
				+/* some controllers can be threaded on the default hierarchy */
			
 
				+static u16 cgrp_dfl_threaded_ss_mask;
			
 
				+
			
 
				 /* The list of hierarchy roots */
			
 
				 LIST_HEAD(cgroup_roots);
			
 
				 static int cgroup_root_count;
			
@@ -316,13 +319,87 @@ static void cgroup_idr_remove(struct idr *idr, int id)
 
				 	spin_unlock_bh(&cgroup_idr_lock);
			
 
				 }
			
 
				 
			
 
				-static struct cgroup *cgroup_parent(struct cgroup *cgrp)
			
 
				+static bool cgroup_has_tasks(struct cgroup *cgrp)
			
 
				 {
			
 
				-	struct cgroup_subsys_state *parent_css = cgrp->self.parent;
			
 
				+	return cgrp->nr_populated_csets;
			
 
				+}
			
 
				 
			
 
				-	if (parent_css)
			
 
				-		return container_of(parent_css, struct cgroup, self);
			
 
				-	return NULL;
			
 
				+bool cgroup_is_threaded(struct cgroup *cgrp)
			
 
				+{
			
 
				+	return cgrp->dom_cgrp != cgrp;
			
 
				+}
			
 
				+
			
 
				+/* can @cgrp host both domain and threaded children? */
			
 
				+static bool cgroup_is_mixable(struct cgroup *cgrp)
			
 
				+{
			
 
				+	/*
			
 
				+	 * Root isn't under domain level resource control exempting it from
			
 
				+	 * the no-internal-process constraint, so it can serve as a thread
			
 
				+	 * root and a parent of resource domains at the same time.
			
 
				+	 */
			
 
				+	return !cgroup_parent(cgrp);
			
 
				+}
			
 
				+
			
 
				+/* can @cgrp become a thread root? should always be true for a thread root */
			
 
				+static bool cgroup_can_be_thread_root(struct cgroup *cgrp)
			
 
				+{
			
 
				+	/* mixables don't care */
			
 
				+	if (cgroup_is_mixable(cgrp))
			
 
				+		return true;
			
 
				+
			
 
				+	/* domain roots can't be nested under threaded */
			
 
				+	if (cgroup_is_threaded(cgrp))
			
 
				+		return false;
			
 
				+
			
 
				+	/* can only have either domain or threaded children */
			
 
				+	if (cgrp->nr_populated_domain_children)
			
 
				+		return false;
			
 
				+
			
 
				+	/* and no domain controllers can be enabled */
			
 
				+	if (cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
			
 
				+		return false;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+/* is @cgrp root of a threaded subtree? */
			
 
				+bool cgroup_is_thread_root(struct cgroup *cgrp)
			
 
				+{
			
 
				+	/* thread root should be a domain */
			
 
				+	if (cgroup_is_threaded(cgrp))
			
 
				+		return false;
			
 
				+
			
 
				+	/* a domain w/ threaded children is a thread root */
			
 
				+	if (cgrp->nr_threaded_children)
			
 
				+		return true;
			
 
				+
			
 
				+	/*
			
 
				+	 * A domain which has tasks and explicit threaded controllers
			
 
				+	 * enabled is a thread root.
			
 
				+	 */
			
 
				+	if (cgroup_has_tasks(cgrp) &&
			
 
				+	    (cgrp->subtree_control & cgrp_dfl_threaded_ss_mask))
			
 
				+		return true;
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+/* a domain which isn't connected to the root w/o brekage can't be used */
			
 
				+static bool cgroup_is_valid_domain(struct cgroup *cgrp)
			
 
				+{
			
 
				+	/* the cgroup itself can be a thread root */
			
 
				+	if (cgroup_is_threaded(cgrp))
			
 
				+		return false;
			
 
				+
			
 
				+	/* but the ancestors can't be unless mixable */
			
 
				+	while ((cgrp = cgroup_parent(cgrp))) {
			
 
				+		if (!cgroup_is_mixable(cgrp) && cgroup_is_thread_root(cgrp))
			
 
				+			return false;
			
 
				+		if (cgroup_is_threaded(cgrp))
			
 
				+			return false;
			
 
				+	}
			
 
				+
			
 
				+	return true;
			
 
				 }
			
 
				 
			
 
				 /* subsystems visibly enabled on a cgroup */
			
@@ -331,8 +408,14 @@ static u16 cgroup_control(struct cgroup *cgrp)
 
				 	struct cgroup *parent = cgroup_parent(cgrp);
			
 
				 	u16 root_ss_mask = cgrp->root->subsys_mask;
			
 
				 
			
 
				-	if (parent)
			
 
				-		return parent->subtree_control;
			
 
				+	if (parent) {
			
 
				+		u16 ss_mask = parent->subtree_control;
			
 
				+
			
 
				+		/* threaded cgroups can only have threaded controllers */
			
 
				+		if (cgroup_is_threaded(cgrp))
			
 
				+			ss_mask &= cgrp_dfl_threaded_ss_mask;
			
 
				+		return ss_mask;
			
 
				+	}
			
 
				 
			
 
				 	if (cgroup_on_dfl(cgrp))
			
 
				 		root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask |
			
@@ -345,8 +428,14 @@ static u16 cgroup_ss_mask(struct cgroup *cgrp)
 
				 {
			
 
				 	struct cgroup *parent = cgroup_parent(cgrp);
			
 
				 
			
 
				-	if (parent)
			
 
				-		return parent->subtree_ss_mask;
			
 
				+	if (parent) {
			
 
				+		u16 ss_mask = parent->subtree_ss_mask;
			
 
				+
			
 
				+		/* threaded cgroups can only have threaded controllers */
			
 
				+		if (cgroup_is_threaded(cgrp))
			
 
				+			ss_mask &= cgrp_dfl_threaded_ss_mask;
			
 
				+		return ss_mask;
			
 
				+	}
			
 
				 
			
 
				 	return cgrp->root->subsys_mask;
			
 
				 }
			
@@ -436,22 +525,12 @@ out_unlock:
 
				 	return css;
			
 
				 }
			
 
				 
			
 
				-static void __maybe_unused cgroup_get(struct cgroup *cgrp)
			
 
				-{
			
 
				-	css_get(&cgrp->self);
			
 
				-}
			
 
				-
			
 
				 static void cgroup_get_live(struct cgroup *cgrp)
			
 
				 {
			
 
				 	WARN_ON_ONCE(cgroup_is_dead(cgrp));
			
 
				 	css_get(&cgrp->self);
			
 
				 }
			
 
				 
			
 
				-static bool cgroup_tryget(struct cgroup *cgrp)
			
 
				-{
			
 
				-	return css_tryget(&cgrp->self);
			
 
				-}
			
 
				-
			
 
				 struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
			
 
				 {
			
 
				 	struct cgroup *cgrp = of->kn->parent->priv;
			
@@ -560,9 +639,11 @@ EXPORT_SYMBOL_GPL(of_css);
 
				  */
			
 
				 struct css_set init_css_set = {
			
 
				 	.refcount		= REFCOUNT_INIT(1),
			
 
				+	.dom_cset		= &init_css_set,
			
 
				 	.tasks			= LIST_HEAD_INIT(init_css_set.tasks),
			
 
				 	.mg_tasks		= LIST_HEAD_INIT(init_css_set.mg_tasks),
			
 
				 	.task_iters		= LIST_HEAD_INIT(init_css_set.task_iters),
			
 
				+	.threaded_csets		= LIST_HEAD_INIT(init_css_set.threaded_csets),
			
 
				 	.cgrp_links		= LIST_HEAD_INIT(init_css_set.cgrp_links),
			
 
				 	.mg_preload_node	= LIST_HEAD_INIT(init_css_set.mg_preload_node),
			
 
				 	.mg_node		= LIST_HEAD_INIT(init_css_set.mg_node),
			
@@ -570,6 +651,11 @@ struct css_set init_css_set = {
 
				 
			
 
				 static int css_set_count	= 1;	/* 1 for init_css_set */
			
 
				 
			
 
				+static bool css_set_threaded(struct css_set *cset)
			
 
				+{
			
 
				+	return cset->dom_cset != cset;
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * css_set_populated - does a css_set contain any tasks?
			
 
				  * @cset: target css_set
			
@@ -587,39 +673,48 @@ static bool css_set_populated(struct css_set *cset)
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * cgroup_update_populated - updated populated count of a cgroup
			
 
				+ * cgroup_update_populated - update the populated count of a cgroup
			
 
				  * @cgrp: the target cgroup
			
 
				  * @populated: inc or dec populated count
			
 
				  *
			
 
				  * One of the css_sets associated with @cgrp is either getting its first
			
 
				- * task or losing the last.  Update @cgrp->populated_cnt accordingly.  The
			
 
				- * count is propagated towards root so that a given cgroup's populated_cnt
			
 
				- * is zero iff the cgroup and all its descendants don't contain any tasks.
			
 
				+ * task or losing the last.  Update @cgrp->nr_populated_* accordingly.  The
			
 
				+ * count is propagated towards root so that a given cgroup's
			
 
				+ * nr_populated_children is zero iff none of its descendants contain any
			
 
				+ * tasks.
			
 
				  *
			
 
				- * @cgrp's interface file "cgroup.populated" is zero if
			
 
				- * @cgrp->populated_cnt is zero and 1 otherwise.  When @cgrp->populated_cnt
			
 
				- * changes from or to zero, userland is notified that the content of the
			
 
				- * interface file has changed.  This can be used to detect when @cgrp and
			
 
				- * its descendants become populated or empty.
			
 
				+ * @cgrp's interface file "cgroup.populated" is zero if both
			
 
				+ * @cgrp->nr_populated_csets and @cgrp->nr_populated_children are zero and
			
 
				+ * 1 otherwise.  When the sum changes from or to zero, userland is notified
			
 
				+ * that the content of the interface file has changed.  This can be used to
			
 
				+ * detect when @cgrp and its descendants become populated or empty.
			
 
				  */
			
 
				 static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
			
 
				 {
			
 
				+	struct cgroup *child = NULL;
			
 
				+	int adj = populated ? 1 : -1;
			
 
				+
			
 
				 	lockdep_assert_held(&css_set_lock);
			
 
				 
			
 
				 	do {
			
 
				-		bool trigger;
			
 
				+		bool was_populated = cgroup_is_populated(cgrp);
			
 
				 
			
 
				-		if (populated)
			
 
				-			trigger = !cgrp->populated_cnt++;
			
 
				-		else
			
 
				-			trigger = !--cgrp->populated_cnt;
			
 
				+		if (!child) {
			
 
				+			cgrp->nr_populated_csets += adj;
			
 
				+		} else {
			
 
				+			if (cgroup_is_threaded(child))
			
 
				+				cgrp->nr_populated_threaded_children += adj;
			
 
				+			else
			
 
				+				cgrp->nr_populated_domain_children += adj;
			
 
				+		}
			
 
				 
			
 
				-		if (!trigger)
			
 
				+		if (was_populated == cgroup_is_populated(cgrp))
			
 
				 			break;
			
 
				 
			
 
				 		cgroup1_check_for_release(cgrp);
			
 
				 		cgroup_file_notify(&cgrp->events_file);
			
 
				 
			
 
				+		child = cgrp;
			
 
				 		cgrp = cgroup_parent(cgrp);
			
 
				 	} while (cgrp);
			
 
				 }
			
@@ -630,7 +725,7 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
 
				  * @populated: whether @cset is populated or depopulated
			
 
				  *
			
 
				  * @cset is either getting the first task or losing the last.  Update the
			
 
				- * ->populated_cnt of all associated cgroups accordingly.
			
 
				+ * populated counters of all associated cgroups accordingly.
			
 
				  */
			
 
				 static void css_set_update_populated(struct css_set *cset, bool populated)
			
 
				 {
			
@@ -653,7 +748,7 @@ static void css_set_update_populated(struct css_set *cset, bool populated)
 
				  * css_set, @from_cset can be NULL.  If @task is being disassociated
			
 
				  * instead of moved, @to_cset can be NULL.
			
 
				  *
			
 
				- * This function automatically handles populated_cnt updates and
			
 
				+ * This function automatically handles populated counter updates and
			
 
				  * css_task_iter adjustments but the caller is responsible for managing
			
 
				  * @from_cset and @to_cset's reference counts.
			
 
				  */
			
@@ -737,6 +832,8 @@ void put_css_set_locked(struct css_set *cset)
 
				 	if (!refcount_dec_and_test(&cset->refcount))
			
 
				 		return;
			
 
				 
			
 
				+	WARN_ON_ONCE(!list_empty(&cset->threaded_csets));
			
 
				+
			
 
				 	/* This css_set is dead. unlink it and release cgroup and css refs */
			
 
				 	for_each_subsys(ss, ssid) {
			
 
				 		list_del(&cset->e_cset_node[ssid]);
			
@@ -753,6 +850,11 @@ void put_css_set_locked(struct css_set *cset)
 
				 		kfree(link);
			
 
				 	}
			
 
				 
			
 
				+	if (css_set_threaded(cset)) {
			
 
				+		list_del(&cset->threaded_csets_node);
			
 
				+		put_css_set_locked(cset->dom_cset);
			
 
				+	}
			
 
				+
			
 
				 	kfree_rcu(cset, rcu_head);
			
 
				 }
			
 
				 
			
@@ -771,6 +873,7 @@ static bool compare_css_sets(struct css_set *cset,
 
				 			     struct cgroup *new_cgrp,
			
 
				 			     struct cgroup_subsys_state *template[])
			
 
				 {
			
 
				+	struct cgroup *new_dfl_cgrp;
			
 
				 	struct list_head *l1, *l2;
			
 
				 
			
 
				 	/*
			
@@ -781,6 +884,16 @@ static bool compare_css_sets(struct css_set *cset,
 
				 	if (memcmp(template, cset->subsys, sizeof(cset->subsys)))
			
 
				 		return false;
			
 
				 
			
 
				+
			
 
				+	/* @cset's domain should match the default cgroup's */
			
 
				+	if (cgroup_on_dfl(new_cgrp))
			
 
				+		new_dfl_cgrp = new_cgrp;
			
 
				+	else
			
 
				+		new_dfl_cgrp = old_cset->dfl_cgrp;
			
 
				+
			
 
				+	if (new_dfl_cgrp->dom_cgrp != cset->dom_cset->dfl_cgrp)
			
 
				+		return false;
			
 
				+
			
 
				 	/*
			
 
				 	 * Compare cgroup pointers in order to distinguish between
			
 
				 	 * different cgroups in hierarchies.  As different cgroups may
			
@@ -988,9 +1101,11 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 
				 	}
			
 
				 
			
 
				 	refcount_set(&cset->refcount, 1);
			
 
				+	cset->dom_cset = cset;
			
 
				 	INIT_LIST_HEAD(&cset->tasks);
			
 
				 	INIT_LIST_HEAD(&cset->mg_tasks);
			
 
				 	INIT_LIST_HEAD(&cset->task_iters);
			
 
				+	INIT_LIST_HEAD(&cset->threaded_csets);
			
 
				 	INIT_HLIST_NODE(&cset->hlist);
			
 
				 	INIT_LIST_HEAD(&cset->cgrp_links);
			
 
				 	INIT_LIST_HEAD(&cset->mg_preload_node);
			
@@ -1028,6 +1143,28 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 
				 
			
 
				 	spin_unlock_irq(&css_set_lock);
			
 
				 
			
 
				+	/*
			
 
				+	 * If @cset should be threaded, look up the matching dom_cset and
			
 
				+	 * link them up.  We first fully initialize @cset then look for the
			
 
				+	 * dom_cset.  It's simpler this way and safe as @cset is guaranteed
			
 
				+	 * to stay empty until we return.
			
 
				+	 */
			
 
				+	if (cgroup_is_threaded(cset->dfl_cgrp)) {
			
 
				+		struct css_set *dcset;
			
 
				+
			
 
				+		dcset = find_css_set(cset, cset->dfl_cgrp->dom_cgrp);
			
 
				+		if (!dcset) {
			
 
				+			put_css_set(cset);
			
 
				+			return NULL;
			
 
				+		}
			
 
				+
			
 
				+		spin_lock_irq(&css_set_lock);
			
 
				+		cset->dom_cset = dcset;
			
 
				+		list_add_tail(&cset->threaded_csets_node,
			
 
				+			      &dcset->threaded_csets);
			
 
				+		spin_unlock_irq(&css_set_lock);
			
 
				+	}
			
 
				+
			
 
				 	return cset;
			
 
				 }
			
 
				 
			
@@ -1155,6 +1292,8 @@ static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
 
				 
			
 
				 	if (cset == &init_css_set) {
			
 
				 		res = &root->cgrp;
			
 
				+	} else if (root == &cgrp_dfl_root) {
			
 
				+		res = cset->dfl_cgrp;
			
 
				 	} else {
			
 
				 		struct cgrp_cset_link *link;
			
 
				 
			
@@ -1670,6 +1809,9 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 
				 	mutex_init(&cgrp->pidlist_mutex);
			
 
				 	cgrp->self.cgroup = cgrp;
			
 
				 	cgrp->self.flags |= CSS_ONLINE;
			
 
				+	cgrp->dom_cgrp = cgrp;
			
 
				+	cgrp->max_descendants = INT_MAX;
			
 
				+	cgrp->max_depth = INT_MAX;
			
 
				 
			
 
				 	for_each_subsys(ss, ssid)
			
 
				 		INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
			
@@ -2172,17 +2314,40 @@ out_release_tset:
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * cgroup_may_migrate_to - verify whether a cgroup can be migration destination
			
 
				+ * cgroup_migrate_vet_dst - verify whether a cgroup can be migration destination
			
 
				  * @dst_cgrp: destination cgroup to test
			
 
				  *
			
 
				- * On the default hierarchy, except for the root, subtree_control must be
			
 
				- * zero for migration destination cgroups with tasks so that child cgroups
			
 
				- * don't compete against tasks.
			
 
				+ * On the default hierarchy, except for the mixable, (possible) thread root
			
 
				+ * and threaded cgroups, subtree_control must be zero for migration
			
 
				+ * destination cgroups with tasks so that child cgroups don't compete
			
 
				+ * against tasks.
			
 
				  */
			
 
				-bool cgroup_may_migrate_to(struct cgroup *dst_cgrp)
			
 
				+int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp)
			
 
				 {
			
 
				-	return !cgroup_on_dfl(dst_cgrp) || !cgroup_parent(dst_cgrp) ||
			
 
				-		!dst_cgrp->subtree_control;
			
 
				+	/* v1 doesn't have any restriction */
			
 
				+	if (!cgroup_on_dfl(dst_cgrp))
			
 
				+		return 0;
			
 
				+
			
 
				+	/* verify @dst_cgrp can host resources */
			
 
				+	if (!cgroup_is_valid_domain(dst_cgrp->dom_cgrp))
			
 
				+		return -EOPNOTSUPP;
			
 
				+
			
 
				+	/* mixables don't care */
			
 
				+	if (cgroup_is_mixable(dst_cgrp))
			
 
				+		return 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * If @dst_cgrp is already or can become a thread root or is
			
 
				+	 * threaded, it doesn't matter.
			
 
				+	 */
			
 
				+	if (cgroup_can_be_thread_root(dst_cgrp) || cgroup_is_threaded(dst_cgrp))
			
 
				+		return 0;
			
 
				+
			
 
				+	/* apply no-internal-process constraint */
			
 
				+	if (dst_cgrp->subtree_control)
			
 
				+		return -EBUSY;
			
 
				+
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -2387,8 +2552,9 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
 
				 	struct task_struct *task;
			
 
				 	int ret;
			
 
				 
			
 
				-	if (!cgroup_may_migrate_to(dst_cgrp))
			
 
				-		return -EBUSY;
			
 
				+	ret = cgroup_migrate_vet_dst(dst_cgrp);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				 
			
 
				 	/* look up all src csets */
			
 
				 	spin_lock_irq(&css_set_lock);
			
@@ -2415,96 +2581,23 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static int cgroup_procs_write_permission(struct task_struct *task,
			
 
				-					 struct cgroup *dst_cgrp,
			
 
				-					 struct kernfs_open_file *of)
			
 
				-{
			
 
				-	struct super_block *sb = of->file->f_path.dentry->d_sb;
			
 
				-	struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
			
 
				-	struct cgroup *root_cgrp = ns->root_cset->dfl_cgrp;
			
 
				-	struct cgroup *src_cgrp, *com_cgrp;
			
 
				-	struct inode *inode;
			
 
				-	int ret;
			
 
				-
			
 
				-	if (!cgroup_on_dfl(dst_cgrp)) {
			
 
				-		const struct cred *cred = current_cred();
			
 
				-		const struct cred *tcred = get_task_cred(task);
			
 
				-
			
 
				-		/*
			
 
				-		 * even if we're attaching all tasks in the thread group,
			
 
				-		 * we only need to check permissions on one of them.
			
 
				-		 */
			
 
				-		if (uid_eq(cred->euid, GLOBAL_ROOT_UID) ||
			
 
				-		    uid_eq(cred->euid, tcred->uid) ||
			
 
				-		    uid_eq(cred->euid, tcred->suid))
			
 
				-			ret = 0;
			
 
				-		else
			
 
				-			ret = -EACCES;
			
 
				-
			
 
				-		put_cred(tcred);
			
 
				-		return ret;
			
 
				-	}
			
 
				-
			
 
				-	/* find the source cgroup */
			
 
				-	spin_lock_irq(&css_set_lock);
			
 
				-	src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
			
 
				-	spin_unlock_irq(&css_set_lock);
			
 
				-
			
 
				-	/* and the common ancestor */
			
 
				-	com_cgrp = src_cgrp;
			
 
				-	while (!cgroup_is_descendant(dst_cgrp, com_cgrp))
			
 
				-		com_cgrp = cgroup_parent(com_cgrp);
			
 
				-
			
 
				-	/* %current should be authorized to migrate to the common ancestor */
			
 
				-	inode = kernfs_get_inode(sb, com_cgrp->procs_file.kn);
			
 
				-	if (!inode)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	ret = inode_permission(inode, MAY_WRITE);
			
 
				-	iput(inode);
			
 
				-	if (ret)
			
 
				-		return ret;
			
 
				-
			
 
				-	/*
			
 
				-	 * If namespaces are delegation boundaries, %current must be able
			
 
				-	 * to see both source and destination cgroups from its namespace.
			
 
				-	 */
			
 
				-	if ((cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) &&
			
 
				-	    (!cgroup_is_descendant(src_cgrp, root_cgrp) ||
			
 
				-	     !cgroup_is_descendant(dst_cgrp, root_cgrp)))
			
 
				-		return -ENOENT;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Find the task_struct of the task to attach by vpid and pass it along to the
			
 
				- * function to attach either it or all tasks in its threadgroup. Will lock
			
 
				- * cgroup_mutex and threadgroup.
			
 
				- */
			
 
				-ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
			
 
				-			     size_t nbytes, loff_t off, bool threadgroup)
			
 
				+struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
			
 
				+	__acquires(&cgroup_threadgroup_rwsem)
			
 
				 {
			
 
				 	struct task_struct *tsk;
			
 
				-	struct cgroup_subsys *ss;
			
 
				-	struct cgroup *cgrp;
			
 
				 	pid_t pid;
			
 
				-	int ssid, ret;
			
 
				 
			
 
				 	if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	cgrp = cgroup_kn_lock_live(of->kn, false);
			
 
				-	if (!cgrp)
			
 
				-		return -ENODEV;
			
 
				+		return ERR_PTR(-EINVAL);
			
 
				 
			
 
				 	percpu_down_write(&cgroup_threadgroup_rwsem);
			
 
				+
			
 
				 	rcu_read_lock();
			
 
				 	if (pid) {
			
 
				 		tsk = find_task_by_vpid(pid);
			
 
				 		if (!tsk) {
			
 
				-			ret = -ESRCH;
			
 
				-			goto out_unlock_rcu;
			
 
				+			tsk = ERR_PTR(-ESRCH);
			
 
				+			goto out_unlock_threadgroup;
			
 
				 		}
			
 
				 	} else {
			
 
				 		tsk = current;
			
@@ -2520,35 +2613,33 @@ ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
 
				 	 * cgroup with no rt_runtime allocated.  Just say no.
			
 
				 	 */
			
 
				 	if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
			
 
				-		ret = -EINVAL;
			
 
				-		goto out_unlock_rcu;
			
 
				+		tsk = ERR_PTR(-EINVAL);
			
 
				+		goto out_unlock_threadgroup;
			
 
				 	}
			
 
				 
			
 
				 	get_task_struct(tsk);
			
 
				+	goto out_unlock_rcu;
			
 
				+
			
 
				+out_unlock_threadgroup:
			
 
				+	percpu_up_write(&cgroup_threadgroup_rwsem);
			
 
				+out_unlock_rcu:
			
 
				 	rcu_read_unlock();
			
 
				+	return tsk;
			
 
				+}
			
 
				 
			
 
				-	ret = cgroup_procs_write_permission(tsk, cgrp, of);
			
 
				-	if (!ret)
			
 
				-		ret = cgroup_attach_task(cgrp, tsk, threadgroup);
			
 
				+void cgroup_procs_write_finish(struct task_struct *task)
			
 
				+	__releases(&cgroup_threadgroup_rwsem)
			
 
				+{
			
 
				+	struct cgroup_subsys *ss;
			
 
				+	int ssid;
			
 
				 
			
 
				-	put_task_struct(tsk);
			
 
				-	goto out_unlock_threadgroup;
			
 
				+	/* release reference from cgroup_procs_write_start() */
			
 
				+	put_task_struct(task);
			
 
				 
			
 
				-out_unlock_rcu:
			
 
				-	rcu_read_unlock();
			
 
				-out_unlock_threadgroup:
			
 
				 	percpu_up_write(&cgroup_threadgroup_rwsem);
			
 
				 	for_each_subsys(ss, ssid)
			
 
				 		if (ss->post_attach)
			
 
				 			ss->post_attach();
			
 
				-	cgroup_kn_unlock(of->kn);
			
 
				-	return ret ?: nbytes;
			
 
				-}
			
 
				-
			
 
				-ssize_t cgroup_procs_write(struct kernfs_open_file *of, char *buf, size_t nbytes,
			
 
				-			   loff_t off)
			
 
				-{
			
 
				-	return __cgroup_procs_write(of, buf, nbytes, off, true);
			
 
				 }
			
 
				 
			
 
				 static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask)
			
@@ -2891,6 +2982,46 @@ static void cgroup_finalize_control(struct cgroup *cgrp, int ret)
 
				 	cgroup_apply_control_disable(cgrp);
			
 
				 }
			
 
				 
			
 
				+static int cgroup_vet_subtree_control_enable(struct cgroup *cgrp, u16 enable)
			
 
				+{
			
 
				+	u16 domain_enable = enable & ~cgrp_dfl_threaded_ss_mask;
			
 
				+
			
 
				+	/* if nothing is getting enabled, nothing to worry about */
			
 
				+	if (!enable)
			
 
				+		return 0;
			
 
				+
			
 
				+	/* can @cgrp host any resources? */
			
 
				+	if (!cgroup_is_valid_domain(cgrp->dom_cgrp))
			
 
				+		return -EOPNOTSUPP;
			
 
				+
			
 
				+	/* mixables don't care */
			
 
				+	if (cgroup_is_mixable(cgrp))
			
 
				+		return 0;
			
 
				+
			
 
				+	if (domain_enable) {
			
 
				+		/* can't enable domain controllers inside a thread subtree */
			
 
				+		if (cgroup_is_thread_root(cgrp) || cgroup_is_threaded(cgrp))
			
 
				+			return -EOPNOTSUPP;
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * Threaded controllers can handle internal competitions
			
 
				+		 * and are always allowed inside a (prospective) thread
			
 
				+		 * subtree.
			
 
				+		 */
			
 
				+		if (cgroup_can_be_thread_root(cgrp) || cgroup_is_threaded(cgrp))
			
 
				+			return 0;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Controllers can't be enabled for a cgroup with tasks to avoid
			
 
				+	 * child cgroups competing against tasks.
			
 
				+	 */
			
 
				+	if (cgroup_has_tasks(cgrp))
			
 
				+		return -EBUSY;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 /* change the enabled child controllers for a cgroup in the default hierarchy */
			
 
				 static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
			
 
				 					    char *buf, size_t nbytes,
			
@@ -2966,33 +3097,9 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 
				 		goto out_unlock;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * Except for the root, subtree_control must be zero for a cgroup
			
 
				-	 * with tasks so that child cgroups don't compete against tasks.
			
 
				-	 */
			
 
				-	if (enable && cgroup_parent(cgrp)) {
			
 
				-		struct cgrp_cset_link *link;
			
 
				-
			
 
				-		/*
			
 
				-		 * Because namespaces pin csets too, @cgrp->cset_links
			
 
				-		 * might not be empty even when @cgrp is empty.  Walk and
			
 
				-		 * verify each cset.
			
 
				-		 */
			
 
				-		spin_lock_irq(&css_set_lock);
			
 
				-
			
 
				-		ret = 0;
			
 
				-		list_for_each_entry(link, &cgrp->cset_links, cset_link) {
			
 
				-			if (css_set_populated(link->cset)) {
			
 
				-				ret = -EBUSY;
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		spin_unlock_irq(&css_set_lock);
			
 
				-
			
 
				-		if (ret)
			
 
				-			goto out_unlock;
			
 
				-	}
			
 
				+	ret = cgroup_vet_subtree_control_enable(cgrp, enable);
			
 
				+	if (ret)
			
 
				+		goto out_unlock;
			
 
				 
			
 
				 	/* save and update control masks and prepare csses */
			
 
				 	cgroup_save_control(cgrp);
			
@@ -3011,6 +3118,172 @@ out_unlock:
 
				 	return ret ?: nbytes;
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * cgroup_enable_threaded - make @cgrp threaded
			
 
				+ * @cgrp: the target cgroup
			
 
				+ *
			
 
				+ * Called when "threaded" is written to the cgroup.type interface file and
			
 
				+ * tries to make @cgrp threaded and join the parent's resource domain.
			
 
				+ * This function is never called on the root cgroup as cgroup.type doesn't
			
 
				+ * exist on it.
			
 
				+ */
			
 
				+static int cgroup_enable_threaded(struct cgroup *cgrp)
			
 
				+{
			
 
				+	struct cgroup *parent = cgroup_parent(cgrp);
			
 
				+	struct cgroup *dom_cgrp = parent->dom_cgrp;
			
 
				+	int ret;
			
 
				+
			
 
				+	lockdep_assert_held(&cgroup_mutex);
			
 
				+
			
 
				+	/* noop if already threaded */
			
 
				+	if (cgroup_is_threaded(cgrp))
			
 
				+		return 0;
			
 
				+
			
 
				+	/* we're joining the parent's domain, ensure its validity */
			
 
				+	if (!cgroup_is_valid_domain(dom_cgrp) ||
			
 
				+	    !cgroup_can_be_thread_root(dom_cgrp))
			
 
				+		return -EOPNOTSUPP;
			
 
				+
			
 
				+	/*
			
 
				+	 * The following shouldn't cause actual migrations and should
			
 
				+	 * always succeed.
			
 
				+	 */
			
 
				+	cgroup_save_control(cgrp);
			
 
				+
			
 
				+	cgrp->dom_cgrp = dom_cgrp;
			
 
				+	ret = cgroup_apply_control(cgrp);
			
 
				+	if (!ret)
			
 
				+		parent->nr_threaded_children++;
			
 
				+	else
			
 
				+		cgrp->dom_cgrp = cgrp;
			
 
				+
			
 
				+	cgroup_finalize_control(cgrp, ret);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static int cgroup_type_show(struct seq_file *seq, void *v)
			
 
				+{
			
 
				+	struct cgroup *cgrp = seq_css(seq)->cgroup;
			
 
				+
			
 
				+	if (cgroup_is_threaded(cgrp))
			
 
				+		seq_puts(seq, "threaded\n");
			
 
				+	else if (!cgroup_is_valid_domain(cgrp))
			
 
				+		seq_puts(seq, "domain invalid\n");
			
 
				+	else if (cgroup_is_thread_root(cgrp))
			
 
				+		seq_puts(seq, "domain threaded\n");
			
 
				+	else
			
 
				+		seq_puts(seq, "domain\n");
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf,
			
 
				+				 size_t nbytes, loff_t off)
			
 
				+{
			
 
				+	struct cgroup *cgrp;
			
 
				+	int ret;
			
 
				+
			
 
				+	/* only switching to threaded mode is supported */
			
 
				+	if (strcmp(strstrip(buf), "threaded"))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	cgrp = cgroup_kn_lock_live(of->kn, false);
			
 
				+	if (!cgrp)
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	/* threaded can only be enabled */
			
 
				+	ret = cgroup_enable_threaded(cgrp);
			
 
				+
			
 
				+	cgroup_kn_unlock(of->kn);
			
 
				+	return ret ?: nbytes;
			
 
				+}
			
 
				+
			
 
				+static int cgroup_max_descendants_show(struct seq_file *seq, void *v)
			
 
				+{
			
 
				+	struct cgroup *cgrp = seq_css(seq)->cgroup;
			
 
				+	int descendants = READ_ONCE(cgrp->max_descendants);
			
 
				+
			
 
				+	if (descendants == INT_MAX)
			
 
				+		seq_puts(seq, "max\n");
			
 
				+	else
			
 
				+		seq_printf(seq, "%d\n", descendants);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static ssize_t cgroup_max_descendants_write(struct kernfs_open_file *of,
			
 
				+					   char *buf, size_t nbytes, loff_t off)
			
 
				+{
			
 
				+	struct cgroup *cgrp;
			
 
				+	int descendants;
			
 
				+	ssize_t ret;
			
 
				+
			
 
				+	buf = strstrip(buf);
			
 
				+	if (!strcmp(buf, "max")) {
			
 
				+		descendants = INT_MAX;
			
 
				+	} else {
			
 
				+		ret = kstrtoint(buf, 0, &descendants);
			
 
				+		if (ret)
			
 
				+			return ret;
			
 
				+	}
			
 
				+
			
 
				+	if (descendants < 0)
			
 
				+		return -ERANGE;
			
 
				+
			
 
				+	cgrp = cgroup_kn_lock_live(of->kn, false);
			
 
				+	if (!cgrp)
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	cgrp->max_descendants = descendants;
			
 
				+
			
 
				+	cgroup_kn_unlock(of->kn);
			
 
				+
			
 
				+	return nbytes;
			
 
				+}
			
 
				+
			
 
				+static int cgroup_max_depth_show(struct seq_file *seq, void *v)
			
 
				+{
			
 
				+	struct cgroup *cgrp = seq_css(seq)->cgroup;
			
 
				+	int depth = READ_ONCE(cgrp->max_depth);
			
 
				+
			
 
				+	if (depth == INT_MAX)
			
 
				+		seq_puts(seq, "max\n");
			
 
				+	else
			
 
				+		seq_printf(seq, "%d\n", depth);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
			
 
				+				      char *buf, size_t nbytes, loff_t off)
			
 
				+{
			
 
				+	struct cgroup *cgrp;
			
 
				+	ssize_t ret;
			
 
				+	int depth;
			
 
				+
			
 
				+	buf = strstrip(buf);
			
 
				+	if (!strcmp(buf, "max")) {
			
 
				+		depth = INT_MAX;
			
 
				+	} else {
			
 
				+		ret = kstrtoint(buf, 0, &depth);
			
 
				+		if (ret)
			
 
				+			return ret;
			
 
				+	}
			
 
				+
			
 
				+	if (depth < 0)
			
 
				+		return -ERANGE;
			
 
				+
			
 
				+	cgrp = cgroup_kn_lock_live(of->kn, false);
			
 
				+	if (!cgrp)
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	cgrp->max_depth = depth;
			
 
				+
			
 
				+	cgroup_kn_unlock(of->kn);
			
 
				+
			
 
				+	return nbytes;
			
 
				+}
			
 
				+
			
 
				 static int cgroup_events_show(struct seq_file *seq, void *v)
			
 
				 {
			
 
				 	seq_printf(seq, "populated %d\n",
			
@@ -3018,6 +3291,18 @@ static int cgroup_events_show(struct seq_file *seq, void *v)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static int cgroup_stat_show(struct seq_file *seq, void *v)
			
 
				+{
			
 
				+	struct cgroup *cgroup = seq_css(seq)->cgroup;
			
 
				+
			
 
				+	seq_printf(seq, "nr_descendants %d\n",
			
 
				+		   cgroup->nr_descendants);
			
 
				+	seq_printf(seq, "nr_dying_descendants %d\n",
			
 
				+		   cgroup->nr_dying_descendants);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static int cgroup_file_open(struct kernfs_open_file *of)
			
 
				 {
			
 
				 	struct cftype *cft = of->kn->priv;
			
@@ -3234,7 +3519,6 @@ restart:
 
				 
			
 
				 static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
			
 
				 {
			
 
				-	LIST_HEAD(pending);
			
 
				 	struct cgroup_subsys *ss = cfts[0].ss;
			
 
				 	struct cgroup *root = &ss->root->cgrp;
			
 
				 	struct cgroup_subsys_state *css;
			
@@ -3659,6 +3943,58 @@ bool css_has_online_children(struct cgroup_subsys_state *css)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static struct css_set *css_task_iter_next_css_set(struct css_task_iter *it)
			
 
				+{
			
 
				+	struct list_head *l;
			
 
				+	struct cgrp_cset_link *link;
			
 
				+	struct css_set *cset;
			
 
				+
			
 
				+	lockdep_assert_held(&css_set_lock);
			
 
				+
			
 
				+	/* find the next threaded cset */
			
 
				+	if (it->tcset_pos) {
			
 
				+		l = it->tcset_pos->next;
			
 
				+
			
 
				+		if (l != it->tcset_head) {
			
 
				+			it->tcset_pos = l;
			
 
				+			return container_of(l, struct css_set,
			
 
				+					    threaded_csets_node);
			
 
				+		}
			
 
				+
			
 
				+		it->tcset_pos = NULL;
			
 
				+	}
			
 
				+
			
 
				+	/* find the next cset */
			
 
				+	l = it->cset_pos;
			
 
				+	l = l->next;
			
 
				+	if (l == it->cset_head) {
			
 
				+		it->cset_pos = NULL;
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	if (it->ss) {
			
 
				+		cset = container_of(l, struct css_set, e_cset_node[it->ss->id]);
			
 
				+	} else {
			
 
				+		link = list_entry(l, struct cgrp_cset_link, cset_link);
			
 
				+		cset = link->cset;
			
 
				+	}
			
 
				+
			
 
				+	it->cset_pos = l;
			
 
				+
			
 
				+	/* initialize threaded css_set walking */
			
 
				+	if (it->flags & CSS_TASK_ITER_THREADED) {
			
 
				+		if (it->cur_dcset)
			
 
				+			put_css_set_locked(it->cur_dcset);
			
 
				+		it->cur_dcset = cset;
			
 
				+		get_css_set(cset);
			
 
				+
			
 
				+		it->tcset_head = &cset->threaded_csets;
			
 
				+		it->tcset_pos = &cset->threaded_csets;
			
 
				+	}
			
 
				+
			
 
				+	return cset;
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * css_task_iter_advance_css_set - advance a task itererator to the next css_set
			
 
				  * @it: the iterator to advance
			
@@ -3667,32 +4003,19 @@ bool css_has_online_children(struct cgroup_subsys_state *css)
 
				  */
			
 
				 static void css_task_iter_advance_css_set(struct css_task_iter *it)
			
 
				 {
			
 
				-	struct list_head *l = it->cset_pos;
			
 
				-	struct cgrp_cset_link *link;
			
 
				 	struct css_set *cset;
			
 
				 
			
 
				 	lockdep_assert_held(&css_set_lock);
			
 
				 
			
 
				 	/* Advance to the next non-empty css_set */
			
 
				 	do {
			
 
				-		l = l->next;
			
 
				-		if (l == it->cset_head) {
			
 
				-			it->cset_pos = NULL;
			
 
				+		cset = css_task_iter_next_css_set(it);
			
 
				+		if (!cset) {
			
 
				 			it->task_pos = NULL;
			
 
				 			return;
			
 
				 		}
			
 
				-
			
 
				-		if (it->ss) {
			
 
				-			cset = container_of(l, struct css_set,
			
 
				-					    e_cset_node[it->ss->id]);
			
 
				-		} else {
			
 
				-			link = list_entry(l, struct cgrp_cset_link, cset_link);
			
 
				-			cset = link->cset;
			
 
				-		}
			
 
				 	} while (!css_set_populated(cset));
			
 
				 
			
 
				-	it->cset_pos = l;
			
 
				-
			
 
				 	if (!list_empty(&cset->tasks))
			
 
				 		it->task_pos = cset->tasks.next;
			
 
				 	else
			
@@ -3732,6 +4055,7 @@ static void css_task_iter_advance(struct css_task_iter *it)
 
				 	lockdep_assert_held(&css_set_lock);
			
 
				 	WARN_ON_ONCE(!l);
			
 
				 
			
 
				+repeat:
			
 
				 	/*
			
 
				 	 * Advance iterator to find next entry.  cset->tasks is consumed
			
 
				 	 * first and then ->mg_tasks.  After ->mg_tasks, we move onto the
			
@@ -3746,11 +4070,18 @@ static void css_task_iter_advance(struct css_task_iter *it)
 
				 		css_task_iter_advance_css_set(it);
			
 
				 	else
			
 
				 		it->task_pos = l;
			
 
				+
			
 
				+	/* if PROCS, skip over tasks which aren't group leaders */
			
 
				+	if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos &&
			
 
				+	    !thread_group_leader(list_entry(it->task_pos, struct task_struct,
			
 
				+					    cg_list)))
			
 
				+		goto repeat;
			
 
				 }
			
 
				 
			
 
				 /**
			
 
				  * css_task_iter_start - initiate task iteration
			
 
				  * @css: the css to walk tasks of
			
 
				+ * @flags: CSS_TASK_ITER_* flags
			
 
				  * @it: the task iterator to use
			
 
				  *
			
 
				  * Initiate iteration through the tasks of @css.  The caller can call
			
@@ -3758,7 +4089,7 @@ static void css_task_iter_advance(struct css_task_iter *it)
 
				  * returns NULL.  On completion of iteration, css_task_iter_end() must be
			
 
				  * called.
			
 
				  */
			
 
				-void css_task_iter_start(struct cgroup_subsys_state *css,
			
 
				+void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
			
 
				 			 struct css_task_iter *it)
			
 
				 {
			
 
				 	/* no one should try to iterate before mounting cgroups */
			
@@ -3769,6 +4100,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
 
				 	spin_lock_irq(&css_set_lock);
			
 
				 
			
 
				 	it->ss = css->ss;
			
 
				+	it->flags = flags;
			
 
				 
			
 
				 	if (it->ss)
			
 
				 		it->cset_pos = &css->cgroup->e_csets[css->ss->id];
			
@@ -3826,6 +4158,9 @@ void css_task_iter_end(struct css_task_iter *it)
 
				 		spin_unlock_irq(&css_set_lock);
			
 
				 	}
			
 
				 
			
 
				+	if (it->cur_dcset)
			
 
				+		put_css_set(it->cur_dcset);
			
 
				+
			
 
				 	if (it->cur_task)
			
 
				 		put_task_struct(it->cur_task);
			
 
				 }
			
@@ -3842,16 +4177,12 @@ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
 
				 {
			
 
				 	struct kernfs_open_file *of = s->private;
			
 
				 	struct css_task_iter *it = of->priv;
			
 
				-	struct task_struct *task;
			
 
				 
			
 
				-	do {
			
 
				-		task = css_task_iter_next(it);
			
 
				-	} while (task && !thread_group_leader(task));
			
 
				-
			
 
				-	return task;
			
 
				+	return css_task_iter_next(it);
			
 
				 }
			
 
				 
			
 
				-static void *cgroup_procs_start(struct seq_file *s, loff_t *pos)
			
 
				+static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
			
 
				+				  unsigned int iter_flags)
			
 
				 {
			
 
				 	struct kernfs_open_file *of = s->private;
			
 
				 	struct cgroup *cgrp = seq_css(s)->cgroup;
			
@@ -3869,23 +4200,168 @@ static void *cgroup_procs_start(struct seq_file *s, loff_t *pos)
 
				 		if (!it)
			
 
				 			return ERR_PTR(-ENOMEM);
			
 
				 		of->priv = it;
			
 
				-		css_task_iter_start(&cgrp->self, it);
			
 
				+		css_task_iter_start(&cgrp->self, iter_flags, it);
			
 
				 	} else if (!(*pos)++) {
			
 
				 		css_task_iter_end(it);
			
 
				-		css_task_iter_start(&cgrp->self, it);
			
 
				+		css_task_iter_start(&cgrp->self, iter_flags, it);
			
 
				 	}
			
 
				 
			
 
				 	return cgroup_procs_next(s, NULL, NULL);
			
 
				 }
			
 
				 
			
 
				+static void *cgroup_procs_start(struct seq_file *s, loff_t *pos)
			
 
				+{
			
 
				+	struct cgroup *cgrp = seq_css(s)->cgroup;
			
 
				+
			
 
				+	/*
			
 
				+	 * All processes of a threaded subtree belong to the domain cgroup
			
 
				+	 * of the subtree.  Only threads can be distributed across the
			
 
				+	 * subtree.  Reject reads on cgroup.procs in the subtree proper.
			
 
				+	 * They're always empty anyway.
			
 
				+	 */
			
 
				+	if (cgroup_is_threaded(cgrp))
			
 
				+		return ERR_PTR(-EOPNOTSUPP);
			
 
				+
			
 
				+	return __cgroup_procs_start(s, pos, CSS_TASK_ITER_PROCS |
			
 
				+					    CSS_TASK_ITER_THREADED);
			
 
				+}
			
 
				+
			
 
				 static int cgroup_procs_show(struct seq_file *s, void *v)
			
 
				 {
			
 
				-	seq_printf(s, "%d\n", task_tgid_vnr(v));
			
 
				+	seq_printf(s, "%d\n", task_pid_vnr(v));
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
			
 
				+					 struct cgroup *dst_cgrp,
			
 
				+					 struct super_block *sb)
			
 
				+{
			
 
				+	struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
			
 
				+	struct cgroup *com_cgrp = src_cgrp;
			
 
				+	struct inode *inode;
			
 
				+	int ret;
			
 
				+
			
 
				+	lockdep_assert_held(&cgroup_mutex);
			
 
				+
			
 
				+	/* find the common ancestor */
			
 
				+	while (!cgroup_is_descendant(dst_cgrp, com_cgrp))
			
 
				+		com_cgrp = cgroup_parent(com_cgrp);
			
 
				+
			
 
				+	/* %current should be authorized to migrate to the common ancestor */
			
 
				+	inode = kernfs_get_inode(sb, com_cgrp->procs_file.kn);
			
 
				+	if (!inode)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	ret = inode_permission(inode, MAY_WRITE);
			
 
				+	iput(inode);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	/*
			
 
				+	 * If namespaces are delegation boundaries, %current must be able
			
 
				+	 * to see both source and destination cgroups from its namespace.
			
 
				+	 */
			
 
				+	if ((cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) &&
			
 
				+	    (!cgroup_is_descendant(src_cgrp, ns->root_cset->dfl_cgrp) ||
			
 
				+	     !cgroup_is_descendant(dst_cgrp, ns->root_cset->dfl_cgrp)))
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
			
 
				+				  char *buf, size_t nbytes, loff_t off)
			
 
				+{
			
 
				+	struct cgroup *src_cgrp, *dst_cgrp;
			
 
				+	struct task_struct *task;
			
 
				+	ssize_t ret;
			
 
				+
			
 
				+	dst_cgrp = cgroup_kn_lock_live(of->kn, false);
			
 
				+	if (!dst_cgrp)
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	task = cgroup_procs_write_start(buf, true);
			
 
				+	ret = PTR_ERR_OR_ZERO(task);
			
 
				+	if (ret)
			
 
				+		goto out_unlock;
			
 
				+
			
 
				+	/* find the source cgroup */
			
 
				+	spin_lock_irq(&css_set_lock);
			
 
				+	src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
			
 
				+	spin_unlock_irq(&css_set_lock);
			
 
				+
			
 
				+	ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
			
 
				+					    of->file->f_path.dentry->d_sb);
			
 
				+	if (ret)
			
 
				+		goto out_finish;
			
 
				+
			
 
				+	ret = cgroup_attach_task(dst_cgrp, task, true);
			
 
				+
			
 
				+out_finish:
			
 
				+	cgroup_procs_write_finish(task);
			
 
				+out_unlock:
			
 
				+	cgroup_kn_unlock(of->kn);
			
 
				+
			
 
				+	return ret ?: nbytes;
			
 
				+}
			
 
				+
			
 
				+static void *cgroup_threads_start(struct seq_file *s, loff_t *pos)
			
 
				+{
			
 
				+	return __cgroup_procs_start(s, pos, 0);
			
 
				+}
			
 
				+
			
 
				+static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
			
 
				+				    char *buf, size_t nbytes, loff_t off)
			
 
				+{
			
 
				+	struct cgroup *src_cgrp, *dst_cgrp;
			
 
				+	struct task_struct *task;
			
 
				+	ssize_t ret;
			
 
				+
			
 
				+	buf = strstrip(buf);
			
 
				+
			
 
				+	dst_cgrp = cgroup_kn_lock_live(of->kn, false);
			
 
				+	if (!dst_cgrp)
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	task = cgroup_procs_write_start(buf, false);
			
 
				+	ret = PTR_ERR_OR_ZERO(task);
			
 
				+	if (ret)
			
 
				+		goto out_unlock;
			
 
				+
			
 
				+	/* find the source cgroup */
			
 
				+	spin_lock_irq(&css_set_lock);
			
 
				+	src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
			
 
				+	spin_unlock_irq(&css_set_lock);
			
 
				+
			
 
				+	/* thread migrations follow the cgroup.procs delegation rule */
			
 
				+	ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
			
 
				+					    of->file->f_path.dentry->d_sb);
			
 
				+	if (ret)
			
 
				+		goto out_finish;
			
 
				+
			
 
				+	/* and must be contained in the same domain */
			
 
				+	ret = -EOPNOTSUPP;
			
 
				+	if (src_cgrp->dom_cgrp != dst_cgrp->dom_cgrp)
			
 
				+		goto out_finish;
			
 
				+
			
 
				+	ret = cgroup_attach_task(dst_cgrp, task, false);
			
 
				+
			
 
				+out_finish:
			
 
				+	cgroup_procs_write_finish(task);
			
 
				+out_unlock:
			
 
				+	cgroup_kn_unlock(of->kn);
			
 
				+
			
 
				+	return ret ?: nbytes;
			
 
				+}
			
 
				+
			
 
				 /* cgroup core interface files for the default hierarchy */
			
 
				 static struct cftype cgroup_base_files[] = {
			
 
				+	{
			
 
				+		.name = "cgroup.type",
			
 
				+		.flags = CFTYPE_NOT_ON_ROOT,
			
 
				+		.seq_show = cgroup_type_show,
			
 
				+		.write = cgroup_type_write,
			
 
				+	},
			
 
				 	{
			
 
				 		.name = "cgroup.procs",
			
 
				 		.flags = CFTYPE_NS_DELEGATABLE,
			
@@ -3896,6 +4372,14 @@ static struct cftype cgroup_base_files[] = {
 
				 		.seq_show = cgroup_procs_show,
			
 
				 		.write = cgroup_procs_write,
			
 
				 	},
			
 
				+	{
			
 
				+		.name = "cgroup.threads",
			
 
				+		.release = cgroup_procs_release,
			
 
				+		.seq_start = cgroup_threads_start,
			
 
				+		.seq_next = cgroup_procs_next,
			
 
				+		.seq_show = cgroup_procs_show,
			
 
				+		.write = cgroup_threads_write,
			
 
				+	},
			
 
				 	{
			
 
				 		.name = "cgroup.controllers",
			
 
				 		.seq_show = cgroup_controllers_show,
			
@@ -3912,6 +4396,20 @@ static struct cftype cgroup_base_files[] = {
 
				 		.file_offset = offsetof(struct cgroup, events_file),
			
 
				 		.seq_show = cgroup_events_show,
			
 
				 	},
			
 
				+	{
			
 
				+		.name = "cgroup.max.descendants",
			
 
				+		.seq_show = cgroup_max_descendants_show,
			
 
				+		.write = cgroup_max_descendants_write,
			
 
				+	},
			
 
				+	{
			
 
				+		.name = "cgroup.max.depth",
			
 
				+		.seq_show = cgroup_max_depth_show,
			
 
				+		.write = cgroup_max_depth_write,
			
 
				+	},
			
 
				+	{
			
 
				+		.name = "cgroup.stat",
			
 
				+		.seq_show = cgroup_stat_show,
			
 
				+	},
			
 
				 	{ }	/* terminate */
			
 
				 };
			
 
				 
			
@@ -4011,9 +4509,15 @@ static void css_release_work_fn(struct work_struct *work)
 
				 		if (ss->css_released)
			
 
				 			ss->css_released(css);
			
 
				 	} else {
			
 
				+		struct cgroup *tcgrp;
			
 
				+
			
 
				 		/* cgroup release path */
			
 
				 		trace_cgroup_release(cgrp);
			
 
				 
			
 
				+		for (tcgrp = cgroup_parent(cgrp); tcgrp;
			
 
				+		     tcgrp = cgroup_parent(tcgrp))
			
 
				+			tcgrp->nr_dying_descendants--;
			
 
				+
			
 
				 		cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
			
 
				 		cgrp->id = -1;
			
 
				 
			
@@ -4209,9 +4713,13 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
 
				 	cgrp->root = root;
			
 
				 	cgrp->level = level;
			
 
				 
			
 
				-	for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp))
			
 
				+	for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
			
 
				 		cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
			
 
				 
			
 
				+		if (tcgrp != cgrp)
			
 
				+			tcgrp->nr_descendants++;
			
 
				+	}
			
 
				+
			
 
				 	if (notify_on_release(parent))
			
 
				 		set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
			
 
				 
			
@@ -4252,6 +4760,29 @@ out_free_cgrp:
 
				 	return ERR_PTR(ret);
			
 
				 }
			
 
				 
			
 
				+static bool cgroup_check_hierarchy_limits(struct cgroup *parent)
			
 
				+{
			
 
				+	struct cgroup *cgroup;
			
 
				+	int ret = false;
			
 
				+	int level = 1;
			
 
				+
			
 
				+	lockdep_assert_held(&cgroup_mutex);
			
 
				+
			
 
				+	for (cgroup = parent; cgroup; cgroup = cgroup_parent(cgroup)) {
			
 
				+		if (cgroup->nr_descendants >= cgroup->max_descendants)
			
 
				+			goto fail;
			
 
				+
			
 
				+		if (level > cgroup->max_depth)
			
 
				+			goto fail;
			
 
				+
			
 
				+		level++;
			
 
				+	}
			
 
				+
			
 
				+	ret = true;
			
 
				+fail:
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
			
 
				 {
			
 
				 	struct cgroup *parent, *cgrp;
			
@@ -4266,6 +4797,11 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
 
				 	if (!parent)
			
 
				 		return -ENODEV;
			
 
				 
			
 
				+	if (!cgroup_check_hierarchy_limits(parent)) {
			
 
				+		ret = -EAGAIN;
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				 	cgrp = cgroup_create(parent);
			
 
				 	if (IS_ERR(cgrp)) {
			
 
				 		ret = PTR_ERR(cgrp);
			
@@ -4417,6 +4953,7 @@ static void kill_css(struct cgroup_subsys_state *css)
 
				 static int cgroup_destroy_locked(struct cgroup *cgrp)
			
 
				 	__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
			
 
				 {
			
 
				+	struct cgroup *tcgrp, *parent = cgroup_parent(cgrp);
			
 
				 	struct cgroup_subsys_state *css;
			
 
				 	struct cgrp_cset_link *link;
			
 
				 	int ssid;
			
@@ -4461,7 +4998,15 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 
				 	 */
			
 
				 	kernfs_remove(cgrp->kn);
			
 
				 
			
 
				-	cgroup1_check_for_release(cgroup_parent(cgrp));
			
 
				+	if (parent && cgroup_is_threaded(cgrp))
			
 
				+		parent->nr_threaded_children--;
			
 
				+
			
 
				+	for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
			
 
				+		tcgrp->nr_descendants--;
			
 
				+		tcgrp->nr_dying_descendants++;
			
 
				+	}
			
 
				+
			
 
				+	cgroup1_check_for_release(parent);
			
 
				 
			
 
				 	/* put the base reference */
			
 
				 	percpu_ref_kill(&cgrp->self.refcnt);
			
@@ -4656,11 +5201,17 @@ int __init cgroup_init(void)
 
				 
			
 
				 		cgrp_dfl_root.subsys_mask |= 1 << ss->id;
			
 
				 
			
 
				+		/* implicit controllers must be threaded too */
			
 
				+		WARN_ON(ss->implicit_on_dfl && !ss->threaded);
			
 
				+
			
 
				 		if (ss->implicit_on_dfl)
			
 
				 			cgrp_dfl_implicit_ss_mask |= 1 << ss->id;
			
 
				 		else if (!ss->dfl_cftypes)
			
 
				 			cgrp_dfl_inhibit_ss_mask |= 1 << ss->id;
			
 
				 
			
 
				+		if (ss->threaded)
			
 
				+			cgrp_dfl_threaded_ss_mask |= 1 << ss->id;
			
 
				+
			
 
				 		if (ss->dfl_cftypes == ss->legacy_cftypes) {
			
 
				 			WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes));
			
 
				 		} else {
			
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -300,6 +300,16 @@ static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn);
 
				 
			
 
				 static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq);
			
 
				 
			
 
				+/*
			
 
				+ * Cgroup v2 behavior is used when on default hierarchy or the
			
 
				+ * cgroup_v2_mode flag is set.
			
 
				+ */
			
 
				+static inline bool is_in_v2_mode(void)
			
 
				+{
			
 
				+	return cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
			
 
				+	      (cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * This is ugly, but preserves the userspace API for existing cpuset
			
 
				  * users. If someone tries to mount the "cpuset" filesystem, we
			
@@ -490,8 +500,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
 
				 
			
 
				 	/* On legacy hiearchy, we must be a subset of our parent cpuset. */
			
 
				 	ret = -EACCES;
			
 
				-	if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
			
 
				-	    !is_cpuset_subset(trial, par))
			
 
				+	if (!is_in_v2_mode() && !is_cpuset_subset(trial, par))
			
 
				 		goto out;
			
 
				 
			
 
				 	/*
			
@@ -870,7 +879,7 @@ static void update_tasks_cpumask(struct cpuset *cs)
 
				 	struct css_task_iter it;
			
 
				 	struct task_struct *task;
			
 
				 
			
 
				-	css_task_iter_start(&cs->css, &it);
			
 
				+	css_task_iter_start(&cs->css, 0, &it);
			
 
				 	while ((task = css_task_iter_next(&it)))
			
 
				 		set_cpus_allowed_ptr(task, cs->effective_cpus);
			
 
				 	css_task_iter_end(&it);
			
@@ -904,8 +913,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
 
				 		 * If it becomes empty, inherit the effective mask of the
			
 
				 		 * parent, which is guaranteed to have some CPUs.
			
 
				 		 */
			
 
				-		if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
			
 
				-		    cpumask_empty(new_cpus))
			
 
				+		if (is_in_v2_mode() && cpumask_empty(new_cpus))
			
 
				 			cpumask_copy(new_cpus, parent->effective_cpus);
			
 
				 
			
 
				 		/* Skip the whole subtree if the cpumask remains the same. */
			
@@ -922,7 +930,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
 
				 		cpumask_copy(cp->effective_cpus, new_cpus);
			
 
				 		spin_unlock_irq(&callback_lock);
			
 
				 
			
 
				-		WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
			
 
				+		WARN_ON(!is_in_v2_mode() &&
			
 
				 			!cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
			
 
				 
			
 
				 		update_tasks_cpumask(cp);
			
@@ -1100,7 +1108,7 @@ static void update_tasks_nodemask(struct cpuset *cs)
 
				 	 * It's ok if we rebind the same mm twice; mpol_rebind_mm()
			
 
				 	 * is idempotent.  Also migrate pages in each mm to new nodes.
			
 
				 	 */
			
 
				-	css_task_iter_start(&cs->css, &it);
			
 
				+	css_task_iter_start(&cs->css, 0, &it);
			
 
				 	while ((task = css_task_iter_next(&it))) {
			
 
				 		struct mm_struct *mm;
			
 
				 		bool migrate;
			
@@ -1158,8 +1166,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
 
				 		 * If it becomes empty, inherit the effective mask of the
			
 
				 		 * parent, which is guaranteed to have some MEMs.
			
 
				 		 */
			
 
				-		if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
			
 
				-		    nodes_empty(*new_mems))
			
 
				+		if (is_in_v2_mode() && nodes_empty(*new_mems))
			
 
				 			*new_mems = parent->effective_mems;
			
 
				 
			
 
				 		/* Skip the whole subtree if the nodemask remains the same. */
			
@@ -1176,7 +1183,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
 
				 		cp->effective_mems = *new_mems;
			
 
				 		spin_unlock_irq(&callback_lock);
			
 
				 
			
 
				-		WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
			
 
				+		WARN_ON(!is_in_v2_mode() &&
			
 
				 			!nodes_equal(cp->mems_allowed, cp->effective_mems));
			
 
				 
			
 
				 		update_tasks_nodemask(cp);
			
@@ -1293,7 +1300,7 @@ static void update_tasks_flags(struct cpuset *cs)
 
				 	struct css_task_iter it;
			
 
				 	struct task_struct *task;
			
 
				 
			
 
				-	css_task_iter_start(&cs->css, &it);
			
 
				+	css_task_iter_start(&cs->css, 0, &it);
			
 
				 	while ((task = css_task_iter_next(&it)))
			
 
				 		cpuset_update_task_spread_flag(cs, task);
			
 
				 	css_task_iter_end(&it);
			
@@ -1468,7 +1475,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
 
				 
			
 
				 	/* allow moving tasks into an empty cpuset if on default hierarchy */
			
 
				 	ret = -ENOSPC;
			
 
				-	if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
			
 
				+	if (!is_in_v2_mode() &&
			
 
				 	    (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
			
 
				 		goto out_unlock;
			
 
				 
			
@@ -1987,7 +1994,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
 
				 	cpuset_inc();
			
 
				 
			
 
				 	spin_lock_irq(&callback_lock);
			
 
				-	if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
			
 
				+	if (is_in_v2_mode()) {
			
 
				 		cpumask_copy(cs->effective_cpus, parent->effective_cpus);
			
 
				 		cs->effective_mems = parent->effective_mems;
			
 
				 	}
			
@@ -2064,7 +2071,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
 
				 	mutex_lock(&cpuset_mutex);
			
 
				 	spin_lock_irq(&callback_lock);
			
 
				 
			
 
				-	if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
			
 
				+	if (is_in_v2_mode()) {
			
 
				 		cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
			
 
				 		top_cpuset.mems_allowed = node_possible_map;
			
 
				 	} else {
			
@@ -2258,7 +2265,7 @@ retry:
 
				 	cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
			
 
				 	mems_updated = !nodes_equal(new_mems, cs->effective_mems);
			
 
				 
			
 
				-	if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
			
 
				+	if (is_in_v2_mode())
			
 
				 		hotplug_update_tasks(cs, &new_cpus, &new_mems,
			
 
				 				     cpus_updated, mems_updated);
			
 
				 	else
			
@@ -2289,7 +2296,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
 
				 	static cpumask_t new_cpus;
			
 
				 	static nodemask_t new_mems;
			
 
				 	bool cpus_updated, mems_updated;
			
 
				-	bool on_dfl = cgroup_subsys_on_dfl(cpuset_cgrp_subsys);
			
 
				+	bool on_dfl = is_in_v2_mode();
			
 
				 
			
 
				 	mutex_lock(&cpuset_mutex);
			
 
				 
			
--- a/kernel/cgroup/debug.c
+++ b/kernel/cgroup/debug.c
@@ -114,27 +114,49 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
 
				 {
			
 
				 	struct cgroup_subsys_state *css = seq_css(seq);
			
 
				 	struct cgrp_cset_link *link;
			
 
				-	int dead_cnt = 0, extra_refs = 0;
			
 
				+	int dead_cnt = 0, extra_refs = 0, threaded_csets = 0;
			
 
				 
			
 
				 	spin_lock_irq(&css_set_lock);
			
 
				+
			
 
				 	list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
			
 
				 		struct css_set *cset = link->cset;
			
 
				 		struct task_struct *task;
			
 
				 		int count = 0;
			
 
				 		int refcnt = refcount_read(&cset->refcount);
			
 
				 
			
 
				-		seq_printf(seq, " %d", refcnt);
			
 
				-		if (refcnt - cset->nr_tasks > 0) {
			
 
				-			int extra = refcnt - cset->nr_tasks;
			
 
				-
			
 
				-			seq_printf(seq, " +%d", extra);
			
 
				-			/*
			
 
				-			 * Take out the one additional reference in
			
 
				-			 * init_css_set.
			
 
				-			 */
			
 
				-			if (cset == &init_css_set)
			
 
				-				extra--;
			
 
				-			extra_refs += extra;
			
 
				+		/*
			
 
				+		 * Print out the proc_cset and threaded_cset relationship
			
 
				+		 * and highlight difference between refcount and task_count.
			
 
				+		 */
			
 
				+		seq_printf(seq, "css_set %pK", cset);
			
 
				+		if (rcu_dereference_protected(cset->dom_cset, 1) != cset) {
			
 
				+			threaded_csets++;
			
 
				+			seq_printf(seq, "=>%pK", cset->dom_cset);
			
 
				+		}
			
 
				+		if (!list_empty(&cset->threaded_csets)) {
			
 
				+			struct css_set *tcset;
			
 
				+			int idx = 0;
			
 
				+
			
 
				+			list_for_each_entry(tcset, &cset->threaded_csets,
			
 
				+					    threaded_csets_node) {
			
 
				+				seq_puts(seq, idx ? "," : "<=");
			
 
				+				seq_printf(seq, "%pK", tcset);
			
 
				+				idx++;
			
 
				+			}
			
 
				+		} else {
			
 
				+			seq_printf(seq, " %d", refcnt);
			
 
				+			if (refcnt - cset->nr_tasks > 0) {
			
 
				+				int extra = refcnt - cset->nr_tasks;
			
 
				+
			
 
				+				seq_printf(seq, " +%d", extra);
			
 
				+				/*
			
 
				+				 * Take out the one additional reference in
			
 
				+				 * init_css_set.
			
 
				+				 */
			
 
				+				if (cset == &init_css_set)
			
 
				+					extra--;
			
 
				+				extra_refs += extra;
			
 
				+			}
			
 
				 		}
			
 
				 		seq_puts(seq, "\n");
			
 
				 
			
@@ -163,10 +185,12 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
 
				 	}
			
 
				 	spin_unlock_irq(&css_set_lock);
			
 
				 
			
 
				-	if (!dead_cnt && !extra_refs)
			
 
				+	if (!dead_cnt && !extra_refs && !threaded_csets)
			
 
				 		return 0;
			
 
				 
			
 
				 	seq_puts(seq, "\n");
			
 
				+	if (threaded_csets)
			
 
				+		seq_printf(seq, "threaded css_sets = %d\n", threaded_csets);
			
 
				 	if (extra_refs)
			
 
				 		seq_printf(seq, "extra references = %d\n", extra_refs);
			
 
				 	if (dead_cnt)
			
@@ -352,6 +376,7 @@ static int __init enable_cgroup_debug(char *str)
 
				 {
			
 
				 	debug_cgrp_subsys.dfl_cftypes = debug_files;
			
 
				 	debug_cgrp_subsys.implicit_on_dfl = true;
			
 
				+	debug_cgrp_subsys.threaded = true;
			
 
				 	return 1;
			
 
				 }
			
 
				 __setup("cgroup_debug", enable_cgroup_debug);
			
--- a/kernel/cgroup/freezer.c
+++ b/kernel/cgroup/freezer.c
@@ -268,7 +268,7 @@ static void update_if_frozen(struct cgroup_subsys_state *css)
 
				 	rcu_read_unlock();
			
 
				 
			
 
				 	/* are all tasks frozen? */
			
 
				-	css_task_iter_start(css, &it);
			
 
				+	css_task_iter_start(css, 0, &it);
			
 
				 
			
 
				 	while ((task = css_task_iter_next(&it))) {
			
 
				 		if (freezing(task)) {
			
@@ -320,7 +320,7 @@ static void freeze_cgroup(struct freezer *freezer)
 
				 	struct css_task_iter it;
			
 
				 	struct task_struct *task;
			
 
				 
			
 
				-	css_task_iter_start(&freezer->css, &it);
			
 
				+	css_task_iter_start(&freezer->css, 0, &it);
			
 
				 	while ((task = css_task_iter_next(&it)))
			
 
				 		freeze_task(task);
			
 
				 	css_task_iter_end(&it);
			
@@ -331,7 +331,7 @@ static void unfreeze_cgroup(struct freezer *freezer)
 
				 	struct css_task_iter it;
			
 
				 	struct task_struct *task;
			
 
				 
			
 
				-	css_task_iter_start(&freezer->css, &it);
			
 
				+	css_task_iter_start(&freezer->css, 0, &it);
			
 
				 	while ((task = css_task_iter_next(&it)))
			
 
				 		__thaw_task(task);
			
 
				 	css_task_iter_end(&it);
			
--- a/kernel/cgroup/pids.c
+++ b/kernel/cgroup/pids.c
@@ -345,4 +345,5 @@ struct cgroup_subsys pids_cgrp_subsys = {
 
				 	.free		= pids_free,
			
 
				 	.legacy_cftypes	= pids_files,
			
 
				 	.dfl_cftypes	= pids_files,
			
 
				+	.threaded	= true,
			
 
				 };
			
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11293,5 +11293,6 @@ struct cgroup_subsys perf_event_cgrp_subsys = {
 
				 	 * controller is not mounted on a legacy hierarchy.
			
 
				 	 */
			
 
				 	.implicit_on_dfl = true,
			
 
				+	.threaded	= true,
			
 
				 };
			
 
				 #endif /* CONFIG_CGROUP_PERF */
			
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -919,7 +919,7 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
 
				 		struct css_task_iter it;
			
 
				 		struct task_struct *task;
			
 
				 
			
 
				-		css_task_iter_start(&iter->css, &it);
			
 
				+		css_task_iter_start(&iter->css, 0, &it);
			
 
				 		while (!ret && (task = css_task_iter_next(&it)))
			
 
				 			ret = fn(task, arg);
			
 
				 		css_task_iter_end(&it);
			
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -100,7 +100,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
 
				 
			
 
				 	cs->classid = (u32)value;
			
 
				 
			
 
				-	css_task_iter_start(css, &it);
			
 
				+	css_task_iter_start(css, 0, &it);
			
 
				 	while ((p = css_task_iter_next(&it))) {
			
 
				 		task_lock(p);
			
 
				 		iterate_fd(p->files, 0, update_classid_sock,