12 лет назад · 191a712090
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -442,7 +442,7 @@ You can attach the current shell task by echoing 0:
 
															 You can use the cgroup.procs file instead of the tasks file to move all
														
 
															 threads in a threadgroup at once. Echoing the PID of any task in a
														
 
															 threadgroup to cgroup.procs causes all tasks in that threadgroup to be
														
 
															-be attached to the cgroup. Writing 0 to cgroup.procs moves all tasks
														
 
															+attached to the cgroup. Writing 0 to cgroup.procs moves all tasks
														
 
															 in the writing task's threadgroup.
														
 
															 Note: Since every task is always a member of exactly one cgroup in each
														
@@ -580,6 +580,7 @@ propagation along the hierarchy. See the comment on
 
															 cgroup_for_each_descendant_pre() for details.
														
 
															 void css_offline(struct cgroup *cgrp);
														
 
															+(cgroup_mutex held by caller)
														
 
															 This is the counterpart of css_online() and called iff css_online()
														
 
															 has succeeded on @cgrp. This signifies the beginning of the end of
														
--- a/Documentation/cgroups/devices.txt
+++ b/Documentation/cgroups/devices.txt
@@ -13,9 +13,7 @@ either an integer or * for all.  Access is a composition of r
 
															 The root device cgroup starts with rwm to 'all'.  A child device
														
 
															 cgroup gets a copy of the parent.  Administrators can then remove
														
 
															 devices from the whitelist or add new entries.  A child cgroup can
														
 
															-never receive a device access which is denied by its parent.  However
														
 
															-when a device access is removed from a parent it will not also be
														
 
															-removed from the child(ren).
														
 
															+never receive a device access which is denied by its parent.
														
 
															 2. User Interface
														
@@ -50,3 +48,69 @@ task to a new cgroup.  (Again we'll probably want to change that).
 
															 A cgroup may not be granted more permissions than the cgroup's
														
 
															 parent has.
														
 
															+
														
 
															+4. Hierarchy
														
 
															+
														
 
															+device cgroups maintain hierarchy by making sure a cgroup never has more
														
 
															+access permissions than its parent.  Every time an entry is written to
														
 
															+a cgroup's devices.deny file, all its children will have that entry removed
														
 
															+from their whitelist and all the locally set whitelist entries will be
														
 
															+re-evaluated.  In case one of the locally set whitelist entries would provide
														
 
															+more access than the cgroup's parent, it'll be removed from the whitelist.
														
 
															+
														
 
															+Example:
														
 
															+      A
														
 
															+     / \
														
 
															+        B
														
 
															+
														
 
															+    group        behavior	exceptions
														
 
															+    A            allow		"b 8:* rwm", "c 116:1 rw"
														
 
															+    B            deny		"c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm"
														
 
															+
														
 
															+If a device is denied in group A:
														
 
															+	# echo "c 116:* r" > A/devices.deny
														
 
															+it'll propagate down and after revalidating B's entries, the whitelist entry
														
 
															+"c 116:2 rwm" will be removed:
														
 
															+
														
 
															+    group        whitelist entries                        denied devices
														
 
															+    A            all                                      "b 8:* rwm", "c 116:* rw"
														
 
															+    B            "c 1:3 rwm", "b 3:* rwm"                 all the rest
														
 
															+
														
 
															+In case parent's exceptions change and local exceptions are not allowed
														
 
															+anymore, they'll be deleted.
														
 
															+
														
 
															+Notice that new whitelist entries will not be propagated:
														
 
															+      A
														
 
															+     / \
														
 
															+        B
														
 
															+
														
 
															+    group        whitelist entries                        denied devices
														
 
															+    A            "c 1:3 rwm", "c 1:5 r"                   all the rest
														
 
															+    B            "c 1:3 rwm", "c 1:5 r"                   all the rest
														
 
															+
														
 
															+when adding "c *:3 rwm":
														
 
															+	# echo "c *:3 rwm" >A/devices.allow
														
 
															+
														
 
															+the result:
														
 
															+    group        whitelist entries                        denied devices
														
 
															+    A            "c *:3 rwm", "c 1:5 r"                   all the rest
														
 
															+    B            "c 1:3 rwm", "c 1:5 r"                   all the rest
														
 
															+
														
 
															+but now it'll be possible to add new entries to B:
														
 
															+	# echo "c 2:3 rwm" >B/devices.allow
														
 
															+	# echo "c 50:3 r" >B/devices.allow
														
 
															+or even
														
 
															+	# echo "c *:3 rwm" >B/devices.allow
														
 
															+
														
 
															+Allowing or denying all by writing 'a' to devices.allow or devices.deny will
														
 
															+not be possible once the device cgroups has children.
														
 
															+
														
 
															+4.1 Hierarchy (internal implementation)
														
 
															+
														
 
															+device cgroups is implemented internally using a behavior (ALLOW, DENY) and a
														
 
															+list of exceptions.  The internal state is controlled using the same user
														
 
															+interface to preserve compatibility with the previous whitelist-only
														
 
															+implementation.  Removal or addition of exceptions that will reduce the access
														
 
															+to devices will be propagated down the hierarchy.
														
 
															+For every propagated exception, the effective rules will be re-evaluated based
														
 
															+on current parent's access rules.
														
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -22,6 +22,7 @@
 
															 #include <linux/pfn.h>
														
 
															 #include <linux/cpuset.h>
														
 
															 #include <linux/node.h>
														
 
															+#include <linux/slab.h>
														
 
															 #include <asm/sparsemem.h>
														
 
															 #include <asm/prom.h>
														
 
															 #include <asm/smp.h>
														
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -247,9 +247,7 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
 
															 {
														
 
															 	int ret;
														
 
															-	rcu_read_lock();
														
 
															 	ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
														
 
															-	rcu_read_unlock();
														
 
															 	if (ret)
														
 
															 		strncpy(buf, "<unavailable>", buflen);
														
 
															 	return ret;
														
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -19,6 +19,7 @@
 
															 #include <linux/idr.h>
														
 
															 #include <linux/workqueue.h>
														
 
															 #include <linux/xattr.h>
														
 
															+#include <linux/fs.h>
														
 
															 #ifdef CONFIG_CGROUPS
														
@@ -30,10 +31,6 @@ struct css_id;
 
															 extern int cgroup_init_early(void);
														
 
															 extern int cgroup_init(void);
														
 
															-extern void cgroup_lock(void);
														
 
															-extern int cgroup_lock_is_held(void);
														
 
															-extern bool cgroup_lock_live_group(struct cgroup *cgrp);
														
 
															-extern void cgroup_unlock(void);
														
 
															 extern void cgroup_fork(struct task_struct *p);
														
 
															 extern void cgroup_post_fork(struct task_struct *p);
														
 
															 extern void cgroup_exit(struct task_struct *p, int run_callbacks);
														
@@ -44,14 +41,25 @@ extern void cgroup_unload_subsys(struct cgroup_subsys *ss);
 
															 extern const struct file_operations proc_cgroup_operations;
														
 
															-/* Define the enumeration of all builtin cgroup subsystems */
														
 
															+/*
														
 
															+ * Define the enumeration of all cgroup subsystems.
														
 
															+ *
														
 
															+ * We define ids for builtin subsystems and then modular ones.
														
 
															+ */
														
 
															 #define SUBSYS(_x) _x ## _subsys_id,
														
 
															-#define IS_SUBSYS_ENABLED(option) IS_ENABLED(option)
														
 
															 enum cgroup_subsys_id {
														
 
															+#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
														
 
															+#include <linux/cgroup_subsys.h>
														
 
															+#undef IS_SUBSYS_ENABLED
														
 
															+	CGROUP_BUILTIN_SUBSYS_COUNT,
														
 
															+
														
 
															+	__CGROUP_SUBSYS_TEMP_PLACEHOLDER = CGROUP_BUILTIN_SUBSYS_COUNT - 1,
														
 
															+
														
 
															+#define IS_SUBSYS_ENABLED(option) IS_MODULE(option)
														
 
															 #include <linux/cgroup_subsys.h>
														
 
															+#undef IS_SUBSYS_ENABLED
														
 
															 	CGROUP_SUBSYS_COUNT,
														
 
															 };
														
 
															-#undef IS_SUBSYS_ENABLED
														
 
															 #undef SUBSYS
														
 
															 /* Per-subsystem/per-cgroup state maintained by the system. */
														
@@ -148,6 +156,13 @@ enum {
 
															 	 * specified at mount time and thus is implemented here.
														
 
															 	 */
														
 
															 	CGRP_CPUSET_CLONE_CHILDREN,
														
 
															+	/* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */
														
 
															+	CGRP_SANE_BEHAVIOR,
														
 
															+};
														
 
															+
														
 
															+struct cgroup_name {
														
 
															+	struct rcu_head rcu_head;
														
 
															+	char name[];
														
 
															 };
														
 
															 struct cgroup {
														
@@ -172,11 +187,23 @@ struct cgroup {
 
															 	struct cgroup *parent;		/* my parent */
														
 
															 	struct dentry *dentry;		/* cgroup fs entry, RCU protected */
														
 
															+	/*
														
 
															+	 * This is a copy of dentry->d_name, and it's needed because
														
 
															+	 * we can't use dentry->d_name in cgroup_path().
														
 
															+	 *
														
 
															+	 * You must acquire rcu_read_lock() to access cgrp->name, and
														
 
															+	 * the only place that can change it is rename(), which is
														
 
															+	 * protected by parent dir's i_mutex.
														
 
															+	 *
														
 
															+	 * Normally you should use cgroup_name() wrapper rather than
														
 
															+	 * access it directly.
														
 
															+	 */
														
 
															+	struct cgroup_name __rcu *name;
														
 
															+
														
 
															 	/* Private pointers for each registered subsystem */
														
 
															 	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
														
 
															 	struct cgroupfs_root *root;
														
 
															-	struct cgroup *top_cgroup;
														
 
															 	/*
														
 
															 	 * List of cg_cgroup_links pointing at css_sets with
														
@@ -213,6 +240,96 @@ struct cgroup {
 
															 	struct simple_xattrs xattrs;
														
 
															 };
														
 
															+#define MAX_CGROUP_ROOT_NAMELEN 64
														
 
															+
														
 
															+/* cgroupfs_root->flags */
														
 
															+enum {
														
 
															+	/*
														
 
															+	 * Unfortunately, cgroup core and various controllers are riddled
														
 
															+	 * with idiosyncrasies and pointless options.  The following flag,
														
 
															+	 * when set, will force sane behavior - some options are forced on,
														
 
															+	 * others are disallowed, and some controllers will change their
														
 
															+	 * hierarchical or other behaviors.
														
 
															+	 *
														
 
															+	 * The set of behaviors affected by this flag are still being
														
 
															+	 * determined and developed and the mount option for this flag is
														
 
															+	 * prefixed with __DEVEL__.  The prefix will be dropped once we
														
 
															+	 * reach the point where all behaviors are compatible with the
														
 
															+	 * planned unified hierarchy, which will automatically turn on this
														
 
															+	 * flag.
														
 
															+	 *
														
 
															+	 * The followings are the behaviors currently affected this flag.
														
 
															+	 *
														
 
															+	 * - Mount options "noprefix" and "clone_children" are disallowed.
														
 
															+	 *   Also, cgroupfs file cgroup.clone_children is not created.
														
 
															+	 *
														
 
															+	 * - When mounting an existing superblock, mount options should
														
 
															+	 *   match.
														
 
															+	 *
														
 
															+	 * - Remount is disallowed.
														
 
															+	 *
														
 
															+	 * - memcg: use_hierarchy is on by default and the cgroup file for
														
 
															+	 *   the flag is not created.
														
 
															+	 *
														
 
															+	 * The followings are planned changes.
														
 
															+	 *
														
 
															+	 * - release_agent will be disallowed once replacement notification
														
 
															+	 *   mechanism is implemented.
														
 
															+	 */
														
 
															+	CGRP_ROOT_SANE_BEHAVIOR	= (1 << 0),
														
 
															+
														
 
															+	CGRP_ROOT_NOPREFIX	= (1 << 1), /* mounted subsystems have no named prefix */
														
 
															+	CGRP_ROOT_XATTR		= (1 << 2), /* supports extended attributes */
														
 
															+};
														
 
															+
														
 
															+/*
														
 
															+ * A cgroupfs_root represents the root of a cgroup hierarchy, and may be
														
 
															+ * associated with a superblock to form an active hierarchy.  This is
														
 
															+ * internal to cgroup core.  Don't access directly from controllers.
														
 
															+ */
														
 
															+struct cgroupfs_root {
														
 
															+	struct super_block *sb;
														
 
															+
														
 
															+	/*
														
 
															+	 * The bitmask of subsystems intended to be attached to this
														
 
															+	 * hierarchy
														
 
															+	 */
														
 
															+	unsigned long subsys_mask;
														
 
															+
														
 
															+	/* Unique id for this hierarchy. */
														
 
															+	int hierarchy_id;
														
 
															+
														
 
															+	/* The bitmask of subsystems currently attached to this hierarchy */
														
 
															+	unsigned long actual_subsys_mask;
														
 
															+
														
 
															+	/* A list running through the attached subsystems */
														
 
															+	struct list_head subsys_list;
														
 
															+
														
 
															+	/* The root cgroup for this hierarchy */
														
 
															+	struct cgroup top_cgroup;
														
 
															+
														
 
															+	/* Tracks how many cgroups are currently defined in hierarchy.*/
														
 
															+	int number_of_cgroups;
														
 
															+
														
 
															+	/* A list running through the active hierarchies */
														
 
															+	struct list_head root_list;
														
 
															+
														
 
															+	/* All cgroups on this root, cgroup_mutex protected */
														
 
															+	struct list_head allcg_list;
														
 
															+
														
 
															+	/* Hierarchy-specific flags */
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	/* IDs for cgroups in this hierarchy */
														
 
															+	struct ida cgroup_ida;
														
 
															+
														
 
															+	/* The path to use for release notifications. */
														
 
															+	char release_agent_path[PATH_MAX];
														
 
															+
														
 
															+	/* The name for this hierarchy - may be empty */
														
 
															+	char name[MAX_CGROUP_ROOT_NAMELEN];
														
 
															+};
														
 
															+
														
 
															 /*
														
 
															  * A css_set is a structure holding pointers to a set of
														
 
															  * cgroup_subsys_state objects. This saves space in the task struct
														
@@ -278,6 +395,7 @@ struct cgroup_map_cb {
 
															 /* cftype->flags */
														
 
															 #define CFTYPE_ONLY_ON_ROOT	(1U << 0)	/* only create on root cg */
														
 
															 #define CFTYPE_NOT_ON_ROOT	(1U << 1)	/* don't create on root cg */
														
 
															+#define CFTYPE_INSANE		(1U << 2)	/* don't create if sane_behavior */
														
 
															 #define MAX_CFTYPE_NAME		64
														
@@ -304,9 +422,6 @@ struct cftype {
 
															 	/* CFTYPE_* flags */
														
 
															 	unsigned int flags;
														
 
															-	/* file xattrs */
														
 
															-	struct simple_xattrs xattrs;
														
 
															-
														
 
															 	int (*open)(struct inode *inode, struct file *file);
														
 
															 	ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
														
 
															 			struct file *file,
														
@@ -404,18 +519,31 @@ struct cgroup_scanner {
 
															 	void *data;
														
 
															 };
														
 
															+/*
														
 
															+ * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details.  This
														
 
															+ * function can be called as long as @cgrp is accessible.
														
 
															+ */
														
 
															+static inline bool cgroup_sane_behavior(const struct cgroup *cgrp)
														
 
															+{
														
 
															+	return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR;
														
 
															+}
														
 
															+
														
 
															+/* Caller should hold rcu_read_lock() */
														
 
															+static inline const char *cgroup_name(const struct cgroup *cgrp)
														
 
															+{
														
 
															+	return rcu_dereference(cgrp->name)->name;
														
 
															+}
														
 
															+
														
 
															 int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
														
 
															 int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
														
 
															 int cgroup_is_removed(const struct cgroup *cgrp);
														
 
															+bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
														
 
															 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
														
 
															 int cgroup_task_count(const struct cgroup *cgrp);
														
 
															-/* Return true if cgrp is a descendant of the task's cgroup */
														
 
															-int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task);
														
 
															-
														
 
															 /*
														
 
															  * Control Group taskset, used to pass around set of tasks to cgroup_subsys
														
 
															  * methods.
														
@@ -523,10 +651,16 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state(
 
															  * rcu_dereference_check() conditions, such as locks used during the
														
 
															  * cgroup_subsys::attach() methods.
														
 
															  */
														
 
															+#ifdef CONFIG_PROVE_RCU
														
 
															+extern struct mutex cgroup_mutex;
														
 
															+#define task_subsys_state_check(task, subsys_id, __c)			\
														
 
															+	rcu_dereference_check((task)->cgroups->subsys[(subsys_id)],	\
														
 
															+			      lockdep_is_held(&(task)->alloc_lock) ||	\
														
 
															+			      lockdep_is_held(&cgroup_mutex) || (__c))
														
 
															+#else
														
 
															 #define task_subsys_state_check(task, subsys_id, __c)			\
														
 
															-	rcu_dereference_check(task->cgroups->subsys[subsys_id],		\
														
 
															-			      lockdep_is_held(&task->alloc_lock) ||	\
														
 
															-			      cgroup_lock_is_held() || (__c))
														
 
															+	rcu_dereference((task)->cgroups->subsys[(subsys_id)])
														
 
															+#endif
														
 
															 static inline struct cgroup_subsys_state *
														
 
															 task_subsys_state(struct task_struct *task, int subsys_id)
														
@@ -661,8 +795,8 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
 
															 					struct cgroup_iter *it);
														
 
															 void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
														
 
															 int cgroup_scan_tasks(struct cgroup_scanner *scan);
														
 
															-int cgroup_attach_task(struct cgroup *, struct task_struct *);
														
 
															 int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
														
 
															+int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
														
 
															 /*
														
 
															  * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
														
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -11,7 +11,6 @@
 
															 #include <linux/sched.h>
														
 
															 #include <linux/cpumask.h>
														
 
															 #include <linux/nodemask.h>
														
 
															-#include <linux/cgroup.h>
														
 
															 #include <linux/mm.h>
														
 
															 #ifdef CONFIG_CPUSETS
														
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -13,7 +13,7 @@
 
															  * info about what this counter is.
														
 
															  */
														
 
															-#include <linux/cgroup.h>
														
 
															+#include <linux/spinlock.h>
														
 
															 #include <linux/errno.h>
														
 
															 /*
														
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -30,7 +30,6 @@
 
															 #include <linux/cred.h>
														
 
															 #include <linux/ctype.h>
														
 
															 #include <linux/errno.h>
														
 
															-#include <linux/fs.h>
														
 
															 #include <linux/init_task.h>
														
 
															 #include <linux/kernel.h>
														
 
															 #include <linux/list.h>
														
@@ -59,7 +58,7 @@
 
															 #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
														
 
															 #include <linux/eventfd.h>
														
 
															 #include <linux/poll.h>
														
 
															-#include <linux/flex_array.h> /* used in cgroup_attach_proc */
														
 
															+#include <linux/flex_array.h> /* used in cgroup_attach_task */
														
 
															 #include <linux/kthread.h>
														
 
															 #include <linux/atomic.h>
														
@@ -83,7 +82,13 @@
 
															  * B happens only through cgroup_show_options() and using cgroup_root_mutex
														
 
															  * breaks it.
														
 
															  */
														
 
															+#ifdef CONFIG_PROVE_RCU
														
 
															+DEFINE_MUTEX(cgroup_mutex);
														
 
															+EXPORT_SYMBOL_GPL(cgroup_mutex);	/* only for task_subsys_state_check() */
														
 
															+#else
														
 
															 static DEFINE_MUTEX(cgroup_mutex);
														
 
															+#endif
														
 
															+
														
 
															 static DEFINE_MUTEX(cgroup_root_mutex);
														
 
															 /*
														
@@ -98,56 +103,6 @@ static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
 
															 #include <linux/cgroup_subsys.h>
														
 
															 };
														
 
															-#define MAX_CGROUP_ROOT_NAMELEN 64
														
 
															-
														
 
															-/*
														
 
															- * A cgroupfs_root represents the root of a cgroup hierarchy,
														
 
															- * and may be associated with a superblock to form an active
														
 
															- * hierarchy
														
 
															- */
														
 
															-struct cgroupfs_root {
														
 
															-	struct super_block *sb;
														
 
															-
														
 
															-	/*
														
 
															-	 * The bitmask of subsystems intended to be attached to this
														
 
															-	 * hierarchy
														
 
															-	 */
														
 
															-	unsigned long subsys_mask;
														
 
															-
														
 
															-	/* Unique id for this hierarchy. */
														
 
															-	int hierarchy_id;
														
 
															-
														
 
															-	/* The bitmask of subsystems currently attached to this hierarchy */
														
 
															-	unsigned long actual_subsys_mask;
														
 
															-
														
 
															-	/* A list running through the attached subsystems */
														
 
															-	struct list_head subsys_list;
														
 
															-
														
 
															-	/* The root cgroup for this hierarchy */
														
 
															-	struct cgroup top_cgroup;
														
 
															-
														
 
															-	/* Tracks how many cgroups are currently defined in hierarchy.*/
														
 
															-	int number_of_cgroups;
														
 
															-
														
 
															-	/* A list running through the active hierarchies */
														
 
															-	struct list_head root_list;
														
 
															-
														
 
															-	/* All cgroups on this root, cgroup_mutex protected */
														
 
															-	struct list_head allcg_list;
														
 
															-
														
 
															-	/* Hierarchy-specific flags */
														
 
															-	unsigned long flags;
														
 
															-
														
 
															-	/* IDs for cgroups in this hierarchy */
														
 
															-	struct ida cgroup_ida;
														
 
															-
														
 
															-	/* The path to use for release notifications. */
														
 
															-	char release_agent_path[PATH_MAX];
														
 
															-
														
 
															-	/* The name for this hierarchy - may be empty */
														
 
															-	char name[MAX_CGROUP_ROOT_NAMELEN];
														
 
															-};
														
 
															-
														
 
															 /*
														
 
															  * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the
														
 
															  * subsystems that are otherwise unattached - it never has more than a
														
@@ -162,6 +117,9 @@ struct cfent {
 
															 	struct list_head		node;
														
 
															 	struct dentry			*dentry;
														
 
															 	struct cftype			*type;
														
 
															+
														
 
															+	/* file xattrs */
														
 
															+	struct simple_xattrs		xattrs;
														
 
															 };
														
 
															 /*
														
@@ -238,6 +196,8 @@ static DEFINE_SPINLOCK(hierarchy_id_lock);
 
															 /* dummytop is a shorthand for the dummy hierarchy's top cgroup */
														
 
															 #define dummytop (&rootnode.top_cgroup)
														
 
															+static struct cgroup_name root_cgroup_name = { .name = "/" };
														
 
															+
														
 
															 /* This flag indicates whether tasks in the fork and exit paths should
														
 
															  * check for fork/exit handlers to call. This avoids us having to do
														
 
															  * extra work in the fork/exit path if none of the subsystems need to
														
@@ -249,20 +209,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp);
 
															 static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
														
 
															 			      struct cftype cfts[], bool is_add);
														
 
															-#ifdef CONFIG_PROVE_LOCKING
														
 
															-int cgroup_lock_is_held(void)
														
 
															-{
														
 
															-	return lockdep_is_held(&cgroup_mutex);
														
 
															-}
														
 
															-#else /* #ifdef CONFIG_PROVE_LOCKING */
														
 
															-int cgroup_lock_is_held(void)
														
 
															-{
														
 
															-	return mutex_is_locked(&cgroup_mutex);
														
 
															-}
														
 
															-#endif /* #else #ifdef CONFIG_PROVE_LOCKING */
														
 
															-
														
 
															-EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
														
 
															-
														
 
															 static int css_unbias_refcnt(int refcnt)
														
 
															 {
														
 
															 	return refcnt >= 0 ? refcnt : refcnt - CSS_DEACT_BIAS;
														
@@ -282,11 +228,25 @@ inline int cgroup_is_removed(const struct cgroup *cgrp)
 
															 	return test_bit(CGRP_REMOVED, &cgrp->flags);
														
 
															 }
														
 
															-/* bits in struct cgroupfs_root flags field */
														
 
															-enum {
														
 
															-	ROOT_NOPREFIX,	/* mounted subsystems have no named prefix */
														
 
															-	ROOT_XATTR,	/* supports extended attributes */
														
 
															-};
														
 
															+/**
														
 
															+ * cgroup_is_descendant - test ancestry
														
 
															+ * @cgrp: the cgroup to be tested
														
 
															+ * @ancestor: possible ancestor of @cgrp
														
 
															+ *
														
 
															+ * Test whether @cgrp is a descendant of @ancestor.  It also returns %true
														
 
															+ * if @cgrp == @ancestor.  This function is safe to call as long as @cgrp
														
 
															+ * and @ancestor are accessible.
														
 
															+ */
														
 
															+bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor)
														
 
															+{
														
 
															+	while (cgrp) {
														
 
															+		if (cgrp == ancestor)
														
 
															+			return true;
														
 
															+		cgrp = cgrp->parent;
														
 
															+	}
														
 
															+	return false;
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(cgroup_is_descendant);
														
 
															 static int cgroup_is_releasable(const struct cgroup *cgrp)
														
 
															 {
														
@@ -327,6 +287,23 @@ static inline struct cftype *__d_cft(struct dentry *dentry)
 
															 	return __d_cfe(dentry)->type;
														
 
															 }
														
 
															+/**
														
 
															+ * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
														
 
															+ * @cgrp: the cgroup to be checked for liveness
														
 
															+ *
														
 
															+ * On success, returns true; the mutex should be later unlocked.  On
														
 
															+ * failure returns false with no lock held.
														
 
															+ */
														
 
															+static bool cgroup_lock_live_group(struct cgroup *cgrp)
														
 
															+{
														
 
															+	mutex_lock(&cgroup_mutex);
														
 
															+	if (cgroup_is_removed(cgrp)) {
														
 
															+		mutex_unlock(&cgroup_mutex);
														
 
															+		return false;
														
 
															+	}
														
 
															+	return true;
														
 
															+}
														
 
															+
														
 
															 /* the list of cgroups eligible for automatic release. Protected by
														
 
															  * release_list_lock */
														
 
															 static LIST_HEAD(release_list);
														
@@ -800,27 +777,6 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
 
															  * update of a tasks cgroup pointer by cgroup_attach_task()
														
 
															  */
														
 
															-/**
														
 
															- * cgroup_lock - lock out any changes to cgroup structures
														
 
															- *
														
 
															- */
														
 
															-void cgroup_lock(void)
														
 
															-{
														
 
															-	mutex_lock(&cgroup_mutex);
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(cgroup_lock);
														
 
															-
														
 
															-/**
														
 
															- * cgroup_unlock - release lock on cgroup changes
														
 
															- *
														
 
															- * Undo the lock taken in a previous cgroup_lock() call.
														
 
															- */
														
 
															-void cgroup_unlock(void)
														
 
															-{
														
 
															-	mutex_unlock(&cgroup_mutex);
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(cgroup_unlock);
														
 
															-
														
 
															 /*
														
 
															  * A couple of forward declarations required, due to cyclic reference loop:
														
 
															  * cgroup_mkdir -> cgroup_create -> cgroup_populate_dir ->
														
@@ -859,6 +815,17 @@ static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
 
															 	return inode;
														
 
															 }
														
 
															+static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry)
														
 
															+{
														
 
															+	struct cgroup_name *name;
														
 
															+
														
 
															+	name = kmalloc(sizeof(*name) + dentry->d_name.len + 1, GFP_KERNEL);
														
 
															+	if (!name)
														
 
															+		return NULL;
														
 
															+	strcpy(name->name, dentry->d_name.name);
														
 
															+	return name;
														
 
															+}
														
 
															+
														
 
															 static void cgroup_free_fn(struct work_struct *work)
														
 
															 {
														
 
															 	struct cgroup *cgrp = container_of(work, struct cgroup, free_work);
														
@@ -874,9 +841,19 @@ static void cgroup_free_fn(struct work_struct *work)
 
															 	cgrp->root->number_of_cgroups--;
														
 
															 	mutex_unlock(&cgroup_mutex);
														
 
															+	/*
														
 
															+	 * We get a ref to the parent's dentry, and put the ref when
														
 
															+	 * this cgroup is being freed, so it's guaranteed that the
														
 
															+	 * parent won't be destroyed before its children.
														
 
															+	 */
														
 
															+	dput(cgrp->parent->dentry);
														
 
															+
														
 
															+	ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
														
 
															+
														
 
															 	/*
														
 
															 	 * Drop the active superblock reference that we took when we
														
 
															-	 * created the cgroup
														
 
															+	 * created the cgroup. This will free cgrp->root, if we are
														
 
															+	 * holding the last reference to @sb.
														
 
															 	 */
														
 
															 	deactivate_super(cgrp->root->sb);
														
@@ -888,7 +865,7 @@ static void cgroup_free_fn(struct work_struct *work)
 
															 	simple_xattrs_free(&cgrp->xattrs);
														
 
															-	ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
														
 
															+	kfree(rcu_dereference_raw(cgrp->name));
														
 
															 	kfree(cgrp);
														
 
															 }
														
@@ -910,13 +887,12 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
 
															 	} else {
														
 
															 		struct cfent *cfe = __d_cfe(dentry);
														
 
															 		struct cgroup *cgrp = dentry->d_parent->d_fsdata;
														
 
															-		struct cftype *cft = cfe->type;
														
 
															 		WARN_ONCE(!list_empty(&cfe->node) &&
														
 
															 			  cgrp != &cgrp->root->top_cgroup,
														
 
															 			  "cfe still linked for %s\n", cfe->type->name);
														
 
															+		simple_xattrs_free(&cfe->xattrs);
														
 
															 		kfree(cfe);
														
 
															-		simple_xattrs_free(&cft->xattrs);
														
 
															 	}
														
 
															 	iput(inode);
														
 
															 }
														
@@ -1108,9 +1084,11 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
 
															 	mutex_lock(&cgroup_root_mutex);
														
 
															 	for_each_subsys(root, ss)
														
 
															 		seq_printf(seq, ",%s", ss->name);
														
 
															-	if (test_bit(ROOT_NOPREFIX, &root->flags))
														
 
															+	if (root->flags & CGRP_ROOT_SANE_BEHAVIOR)
														
 
															+		seq_puts(seq, ",sane_behavior");
														
 
															+	if (root->flags & CGRP_ROOT_NOPREFIX)
														
 
															 		seq_puts(seq, ",noprefix");
														
 
															-	if (test_bit(ROOT_XATTR, &root->flags))
														
 
															+	if (root->flags & CGRP_ROOT_XATTR)
														
 
															 		seq_puts(seq, ",xattr");
														
 
															 	if (strlen(root->release_agent_path))
														
 
															 		seq_printf(seq, ",release_agent=%s", root->release_agent_path);
														
@@ -1172,8 +1150,12 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 
															 			all_ss = true;
														
 
															 			continue;
														
 
															 		}
														
 
															+		if (!strcmp(token, "__DEVEL__sane_behavior")) {
														
 
															+			opts->flags |= CGRP_ROOT_SANE_BEHAVIOR;
														
 
															+			continue;
														
 
															+		}
														
 
															 		if (!strcmp(token, "noprefix")) {
														
 
															-			set_bit(ROOT_NOPREFIX, &opts->flags);
														
 
															+			opts->flags |= CGRP_ROOT_NOPREFIX;
														
 
															 			continue;
														
 
															 		}
														
 
															 		if (!strcmp(token, "clone_children")) {
														
@@ -1181,7 +1163,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 
															 			continue;
														
 
															 		}
														
 
															 		if (!strcmp(token, "xattr")) {
														
 
															-			set_bit(ROOT_XATTR, &opts->flags);
														
 
															+			opts->flags |= CGRP_ROOT_XATTR;
														
 
															 			continue;
														
 
															 		}
														
 
															 		if (!strncmp(token, "release_agent=", 14)) {
														
@@ -1259,13 +1241,26 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 
															 	/* Consistency checks */
														
 
															+	if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) {
														
 
															+		pr_warning("cgroup: sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n");
														
 
															+
														
 
															+		if (opts->flags & CGRP_ROOT_NOPREFIX) {
														
 
															+			pr_err("cgroup: sane_behavior: noprefix is not allowed\n");
														
 
															+			return -EINVAL;
														
 
															+		}
														
 
															+
														
 
															+		if (opts->cpuset_clone_children) {
														
 
															+			pr_err("cgroup: sane_behavior: clone_children is not allowed\n");
														
 
															+			return -EINVAL;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															 	/*
														
 
															 	 * Option noprefix was introduced just for backward compatibility
														
 
															 	 * with the old cpuset, so we allow noprefix only if mounting just
														
 
															 	 * the cpuset subsystem.
														
 
															 	 */
														
 
															-	if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
														
 
															-	    (opts->subsys_mask & mask))
														
 
															+	if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask))
														
 
															 		return -EINVAL;
														
@@ -1336,6 +1331,11 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 
															 	struct cgroup_sb_opts opts;
														
 
															 	unsigned long added_mask, removed_mask;
														
 
															+	if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) {
														
 
															+		pr_err("cgroup: sane_behavior: remount is not allowed\n");
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+
														
 
															 	mutex_lock(&cgrp->dentry->d_inode->i_mutex);
														
 
															 	mutex_lock(&cgroup_mutex);
														
 
															 	mutex_lock(&cgroup_root_mutex);
														
@@ -1421,7 +1421,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
 
															 	INIT_LIST_HEAD(&root->allcg_list);
														
 
															 	root->number_of_cgroups = 1;
														
 
															 	cgrp->root = root;
														
 
															-	cgrp->top_cgroup = cgrp;
														
 
															+	cgrp->name = &root_cgroup_name;
														
 
															 	init_cgroup_housekeeping(cgrp);
														
 
															 	list_add_tail(&cgrp->allcg_node, &root->allcg_list);
														
 
															 }
														
@@ -1685,6 +1685,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 
															 		 * any) is not needed
														
 
															 		 */
														
 
															 		cgroup_drop_root(opts.new_root);
														
 
															+
														
 
															+		if (((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) &&
														
 
															+		    root->flags != opts.flags) {
														
 
															+			pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n");
														
 
															+			ret = -EINVAL;
														
 
															+			goto drop_new_super;
														
 
															+		}
														
 
															+
														
 
															 		/* no subsys rebinding, so refcounts don't change */
														
 
															 		drop_parsed_module_refcounts(opts.subsys_mask);
														
 
															 	}
														
@@ -1769,49 +1777,48 @@ static struct kobject *cgroup_kobj;
 
															  * @buf: the buffer to write the path into
														
 
															  * @buflen: the length of the buffer
														
 
															  *
														
 
															- * Called with cgroup_mutex held or else with an RCU-protected cgroup
														
 
															- * reference.  Writes path of cgroup into buf.  Returns 0 on success,
														
 
															- * -errno on error.
														
 
															+ * Writes path of cgroup into buf.  Returns 0 on success, -errno on error.
														
 
															+ *
														
 
															+ * We can't generate cgroup path using dentry->d_name, as accessing
														
 
															+ * dentry->name must be protected by irq-unsafe dentry->d_lock or parent
														
 
															+ * inode's i_mutex, while on the other hand cgroup_path() can be called
														
 
															+ * with some irq-safe spinlocks held.
														
 
															  */
														
 
															 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
														
 
															 {
														
 
															-	struct dentry *dentry = cgrp->dentry;
														
 
															+	int ret = -ENAMETOOLONG;
														
 
															 	char *start;
														
 
															-	rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(),
														
 
															-			   "cgroup_path() called without proper locking");
														
 
															-
														
 
															-	if (cgrp == dummytop) {
														
 
															-		/*
														
 
															-		 * Inactive subsystems have no dentry for their root
														
 
															-		 * cgroup
														
 
															-		 */
														
 
															-		strcpy(buf, "/");
														
 
															+	if (!cgrp->parent) {
														
 
															+		if (strlcpy(buf, "/", buflen) >= buflen)
														
 
															+			return -ENAMETOOLONG;
														
 
															 		return 0;
														
 
															 	}
														
 
															 	start = buf + buflen - 1;
														
 
															-
														
 
															 	*start = '\0';
														
 
															-	for (;;) {
														
 
															-		int len = dentry->d_name.len;
														
 
															+	rcu_read_lock();
														
 
															+	do {
														
 
															+		const char *name = cgroup_name(cgrp);
														
 
															+		int len;
														
 
															+
														
 
															+		len = strlen(name);
														
 
															 		if ((start -= len) < buf)
														
 
															-			return -ENAMETOOLONG;
														
 
															-		memcpy(start, dentry->d_name.name, len);
														
 
															-		cgrp = cgrp->parent;
														
 
															-		if (!cgrp)
														
 
															-			break;
														
 
															+			goto out;
														
 
															+		memcpy(start, name, len);
														
 
															-		dentry = cgrp->dentry;
														
 
															-		if (!cgrp->parent)
														
 
															-			continue;
														
 
															 		if (--start < buf)
														
 
															-			return -ENAMETOOLONG;
														
 
															+			goto out;
														
 
															 		*start = '/';
														
 
															-	}
														
 
															+
														
 
															+		cgrp = cgrp->parent;
														
 
															+	} while (cgrp->parent);
														
 
															+	ret = 0;
														
 
															 	memmove(buf, start, buf + buflen - start);
														
 
															-	return 0;
														
 
															+out:
														
 
															+	rcu_read_unlock();
														
 
															+	return ret;
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(cgroup_path);
														
@@ -1900,7 +1907,7 @@ EXPORT_SYMBOL_GPL(cgroup_taskset_size);
 
															  *
														
 
															  * Must be called with cgroup_mutex and threadgroup locked.
														
 
															  */
														
 
															-static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
														
 
															+static void cgroup_task_migrate(struct cgroup *oldcgrp,
														
 
															 				struct task_struct *tsk, struct css_set *newcg)
														
 
															 {
														
 
															 	struct css_set *oldcg;
														
@@ -1933,121 +1940,22 @@ static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
 
															 }
														
 
															 /**
														
 
															- * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
														
 
															- * @cgrp: the cgroup the task is attaching to
														
 
															- * @tsk: the task to be attached
														
 
															- *
														
 
															- * Call with cgroup_mutex and threadgroup locked. May take task_lock of
														
 
															- * @tsk during call.
														
 
															- */
														
 
															-int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
														
 
															-{
														
 
															-	int retval = 0;
														
 
															-	struct cgroup_subsys *ss, *failed_ss = NULL;
														
 
															-	struct cgroup *oldcgrp;
														
 
															-	struct cgroupfs_root *root = cgrp->root;
														
 
															-	struct cgroup_taskset tset = { };
														
 
															-	struct css_set *newcg;
														
 
															-
														
 
															-	/* @tsk either already exited or can't exit until the end */
														
 
															-	if (tsk->flags & PF_EXITING)
														
 
															-		return -ESRCH;
														
 
															-
														
 
															-	/* Nothing to do if the task is already in that cgroup */
														
 
															-	oldcgrp = task_cgroup_from_root(tsk, root);
														
 
															-	if (cgrp == oldcgrp)
														
 
															-		return 0;
														
 
															-
														
 
															-	tset.single.task = tsk;
														
 
															-	tset.single.cgrp = oldcgrp;
														
 
															-
														
 
															-	for_each_subsys(root, ss) {
														
 
															-		if (ss->can_attach) {
														
 
															-			retval = ss->can_attach(cgrp, &tset);
														
 
															-			if (retval) {
														
 
															-				/*
														
 
															-				 * Remember on which subsystem the can_attach()
														
 
															-				 * failed, so that we only call cancel_attach()
														
 
															-				 * against the subsystems whose can_attach()
														
 
															-				 * succeeded. (See below)
														
 
															-				 */
														
 
															-				failed_ss = ss;
														
 
															-				goto out;
														
 
															-			}
														
 
															-		}
														
 
															-	}
														
 
															-
														
 
															-	newcg = find_css_set(tsk->cgroups, cgrp);
														
 
															-	if (!newcg) {
														
 
															-		retval = -ENOMEM;
														
 
															-		goto out;
														
 
															-	}
														
 
															-
														
 
															-	cgroup_task_migrate(cgrp, oldcgrp, tsk, newcg);
														
 
															-
														
 
															-	for_each_subsys(root, ss) {
														
 
															-		if (ss->attach)
														
 
															-			ss->attach(cgrp, &tset);
														
 
															-	}
														
 
															-
														
 
															-out:
														
 
															-	if (retval) {
														
 
															-		for_each_subsys(root, ss) {
														
 
															-			if (ss == failed_ss)
														
 
															-				/*
														
 
															-				 * This subsystem was the one that failed the
														
 
															-				 * can_attach() check earlier, so we don't need
														
 
															-				 * to call cancel_attach() against it or any
														
 
															-				 * remaining subsystems.
														
 
															-				 */
														
 
															-				break;
														
 
															-			if (ss->cancel_attach)
														
 
															-				ss->cancel_attach(cgrp, &tset);
														
 
															-		}
														
 
															-	}
														
 
															-	return retval;
														
 
															-}
														
 
															-
														
 
															-/**
														
 
															- * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
														
 
															- * @from: attach to all cgroups of a given task
														
 
															- * @tsk: the task to be attached
														
 
															- */
														
 
															-int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
														
 
															-{
														
 
															-	struct cgroupfs_root *root;
														
 
															-	int retval = 0;
														
 
															-
														
 
															-	cgroup_lock();
														
 
															-	for_each_active_root(root) {
														
 
															-		struct cgroup *from_cg = task_cgroup_from_root(from, root);
														
 
															-
														
 
															-		retval = cgroup_attach_task(from_cg, tsk);
														
 
															-		if (retval)
														
 
															-			break;
														
 
															-	}
														
 
															-	cgroup_unlock();
														
 
															-
														
 
															-	return retval;
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
														
 
															-
														
 
															-/**
														
 
															- * cgroup_attach_proc - attach all threads in a threadgroup to a cgroup
														
 
															+ * cgroup_attach_task - attach a task or a whole threadgroup to a cgroup
														
 
															  * @cgrp: the cgroup to attach to
														
 
															- * @leader: the threadgroup leader task_struct of the group to be attached
														
 
															+ * @tsk: the task or the leader of the threadgroup to be attached
														
 
															+ * @threadgroup: attach the whole threadgroup?
														
 
															  *
														
 
															  * Call holding cgroup_mutex and the group_rwsem of the leader. Will take
														
 
															- * task_lock of each thread in leader's threadgroup individually in turn.
														
 
															+ * task_lock of @tsk or each thread in the threadgroup individually in turn.
														
 
															  */
														
 
															-static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
														
 
															+static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
														
 
															+			      bool threadgroup)
														
 
															 {
														
 
															 	int retval, i, group_size;
														
 
															 	struct cgroup_subsys *ss, *failed_ss = NULL;
														
 
															-	/* guaranteed to be initialized later, but the compiler needs this */
														
 
															 	struct cgroupfs_root *root = cgrp->root;
														
 
															 	/* threadgroup list cursor and array */
														
 
															-	struct task_struct *tsk;
														
 
															+	struct task_struct *leader = tsk;
														
 
															 	struct task_and_cgroup *tc;
														
 
															 	struct flex_array *group;
														
 
															 	struct cgroup_taskset tset = { };
														
@@ -2059,17 +1967,19 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 
															 	 * group - group_rwsem prevents new threads from appearing, and if
														
 
															 	 * threads exit, this will just be an over-estimate.
														
 
															 	 */
														
 
															-	group_size = get_nr_threads(leader);
														
 
															+	if (threadgroup)
														
 
															+		group_size = get_nr_threads(tsk);
														
 
															+	else
														
 
															+		group_size = 1;
														
 
															 	/* flex_array supports very large thread-groups better than kmalloc. */
														
 
															 	group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL);
														
 
															 	if (!group)
														
 
															 		return -ENOMEM;
														
 
															 	/* pre-allocate to guarantee space while iterating in rcu read-side. */
														
 
															-	retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL);
														
 
															+	retval = flex_array_prealloc(group, 0, group_size, GFP_KERNEL);
														
 
															 	if (retval)
														
 
															 		goto out_free_group_list;
														
 
															-	tsk = leader;
														
 
															 	i = 0;
														
 
															 	/*
														
 
															 	 * Prevent freeing of tasks while we take a snapshot. Tasks that are
														
@@ -2098,6 +2008,9 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 
															 		retval = flex_array_put(group, i, &ent, GFP_ATOMIC);
														
 
															 		BUG_ON(retval != 0);
														
 
															 		i++;
														
 
															+
														
 
															+		if (!threadgroup)
														
 
															+			break;
														
 
															 	} while_each_thread(leader, tsk);
														
 
															 	rcu_read_unlock();
														
 
															 	/* remember the number of threads in the array for later. */
														
@@ -2143,7 +2056,7 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 
															 	 */
														
 
															 	for (i = 0; i < group_size; i++) {
														
 
															 		tc = flex_array_get(group, i);
														
 
															-		cgroup_task_migrate(cgrp, tc->cgrp, tc->task, tc->cg);
														
 
															+		cgroup_task_migrate(tc->cgrp, tc->task, tc->cg);
														
 
															 	}
														
 
															 	/* nothing is sensitive to fork() after this point. */
														
@@ -2251,17 +2164,42 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
 
															 			put_task_struct(tsk);
														
 
															 			goto retry_find_task;
														
 
															 		}
														
 
															-		ret = cgroup_attach_proc(cgrp, tsk);
														
 
															-	} else
														
 
															-		ret = cgroup_attach_task(cgrp, tsk);
														
 
															+	}
														
 
															+
														
 
															+	ret = cgroup_attach_task(cgrp, tsk, threadgroup);
														
 
															+
														
 
															 	threadgroup_unlock(tsk);
														
 
															 	put_task_struct(tsk);
														
 
															 out_unlock_cgroup:
														
 
															-	cgroup_unlock();
														
 
															+	mutex_unlock(&cgroup_mutex);
														
 
															 	return ret;
														
 
															 }
														
 
															+/**
														
 
															+ * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
														
 
															+ * @from: attach to all cgroups of a given task
														
 
															+ * @tsk: the task to be attached
														
 
															+ */
														
 
															+int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
														
 
															+{
														
 
															+	struct cgroupfs_root *root;
														
 
															+	int retval = 0;
														
 
															+
														
 
															+	mutex_lock(&cgroup_mutex);
														
 
															+	for_each_active_root(root) {
														
 
															+		struct cgroup *from_cg = task_cgroup_from_root(from, root);
														
 
															+
														
 
															+		retval = cgroup_attach_task(from_cg, tsk, false);
														
 
															+		if (retval)
														
 
															+			break;
														
 
															+	}
														
 
															+	mutex_unlock(&cgroup_mutex);
														
 
															+
														
 
															+	return retval;
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
														
 
															+
														
 
															 static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
														
 
															 {
														
 
															 	return attach_task_by_pid(cgrp, pid, false);
														
@@ -2272,24 +2210,6 @@ static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
 
															 	return attach_task_by_pid(cgrp, tgid, true);
														
 
															 }
														
 
															-/**
														
 
															- * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
														
 
															- * @cgrp: the cgroup to be checked for liveness
														
 
															- *
														
 
															- * On success, returns true; the lock should be later released with
														
 
															- * cgroup_unlock(). On failure returns false with no lock held.
														
 
															- */
														
 
															-bool cgroup_lock_live_group(struct cgroup *cgrp)
														
 
															-{
														
 
															-	mutex_lock(&cgroup_mutex);
														
 
															-	if (cgroup_is_removed(cgrp)) {
														
 
															-		mutex_unlock(&cgroup_mutex);
														
 
															-		return false;
														
 
															-	}
														
 
															-	return true;
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(cgroup_lock_live_group);
														
 
															-
														
 
															 static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
														
 
															 				      const char *buffer)
														
 
															 {
														
@@ -2301,7 +2221,7 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
 
															 	mutex_lock(&cgroup_root_mutex);
														
 
															 	strcpy(cgrp->root->release_agent_path, buffer);
														
 
															 	mutex_unlock(&cgroup_root_mutex);
														
 
															-	cgroup_unlock();
														
 
															+	mutex_unlock(&cgroup_mutex);
														
 
															 	return 0;
														
 
															 }
														
@@ -2312,7 +2232,14 @@ static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
 
															 		return -ENODEV;
														
 
															 	seq_puts(seq, cgrp->root->release_agent_path);
														
 
															 	seq_putc(seq, '\n');
														
 
															-	cgroup_unlock();
														
 
															+	mutex_unlock(&cgroup_mutex);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int cgroup_sane_behavior_show(struct cgroup *cgrp, struct cftype *cft,
														
 
															+				     struct seq_file *seq)
														
 
															+{
														
 
															+	seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp));
														
 
															 	return 0;
														
 
															 }
														
@@ -2537,13 +2464,40 @@ static int cgroup_file_release(struct inode *inode, struct file *file)
 
															 static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
														
 
															 			    struct inode *new_dir, struct dentry *new_dentry)
														
 
															 {
														
 
															+	int ret;
														
 
															+	struct cgroup_name *name, *old_name;
														
 
															+	struct cgroup *cgrp;
														
 
															+
														
 
															+	/*
														
 
															+	 * It's convinient to use parent dir's i_mutex to protected
														
 
															+	 * cgrp->name.
														
 
															+	 */
														
 
															+	lockdep_assert_held(&old_dir->i_mutex);
														
 
															+
														
 
															 	if (!S_ISDIR(old_dentry->d_inode->i_mode))
														
 
															 		return -ENOTDIR;
														
 
															 	if (new_dentry->d_inode)
														
 
															 		return -EEXIST;
														
 
															 	if (old_dir != new_dir)
														
 
															 		return -EIO;
														
 
															-	return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
														
 
															+
														
 
															+	cgrp = __d_cgrp(old_dentry);
														
 
															+
														
 
															+	name = cgroup_alloc_name(new_dentry);
														
 
															+	if (!name)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	ret = simple_rename(old_dir, old_dentry, new_dir, new_dentry);
														
 
															+	if (ret) {
														
 
															+		kfree(name);
														
 
															+		return ret;
														
 
															+	}
														
 
															+
														
 
															+	old_name = cgrp->name;
														
 
															+	rcu_assign_pointer(cgrp->name, name);
														
 
															+
														
 
															+	kfree_rcu(old_name, rcu_head);
														
 
															+	return 0;
														
 
															 }
														
 
															 static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
														
@@ -2551,13 +2505,13 @@ static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
 
															 	if (S_ISDIR(dentry->d_inode->i_mode))
														
 
															 		return &__d_cgrp(dentry)->xattrs;
														
 
															 	else
														
 
															-		return &__d_cft(dentry)->xattrs;
														
 
															+		return &__d_cfe(dentry)->xattrs;
														
 
															 }
														
 
															 static inline int xattr_enabled(struct dentry *dentry)
														
 
															 {
														
 
															 	struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
														
 
															-	return test_bit(ROOT_XATTR, &root->flags);
														
 
															+	return root->flags & CGRP_ROOT_XATTR;
														
 
															 }
														
 
															 static bool is_valid_xattr(const char *name)
														
@@ -2727,9 +2681,7 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
 
															 	umode_t mode;
														
 
															 	char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
														
 
															-	simple_xattrs_init(&cft->xattrs);
														
 
															-
														
 
															-	if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
														
 
															+	if (subsys && !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) {
														
 
															 		strcpy(name, subsys->name);
														
 
															 		strcat(name, ".");
														
 
															 	}
														
@@ -2753,6 +2705,7 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
 
															 		cfe->type = (void *)cft;
														
 
															 		cfe->dentry = dentry;
														
 
															 		dentry->d_fsdata = cfe;
														
 
															+		simple_xattrs_init(&cfe->xattrs);
														
 
															 		list_add_tail(&cfe->node, &parent->files);
														
 
															 		cfe = NULL;
														
 
															 	}
														
@@ -2770,6 +2723,8 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
 
															 	for (cft = cfts; cft->name[0] != '\0'; cft++) {
														
 
															 		/* does cft->flags tell us to skip this file on @cgrp? */
														
 
															+		if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp))
														
 
															+			continue;
														
 
															 		if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
														
 
															 			continue;
														
 
															 		if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
														
@@ -3300,6 +3255,34 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
 
															 	return 0;
														
 
															 }
														
 
															+static void cgroup_transfer_one_task(struct task_struct *task,
														
 
															+				     struct cgroup_scanner *scan)
														
 
															+{
														
 
															+	struct cgroup *new_cgroup = scan->data;
														
 
															+
														
 
															+	mutex_lock(&cgroup_mutex);
														
 
															+	cgroup_attach_task(new_cgroup, task, false);
														
 
															+	mutex_unlock(&cgroup_mutex);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * cgroup_trasnsfer_tasks - move tasks from one cgroup to another
														
 
															+ * @to: cgroup to which the tasks will be moved
														
 
															+ * @from: cgroup in which the tasks currently reside
														
 
															+ */
														
 
															+int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
														
 
															+{
														
 
															+	struct cgroup_scanner scan;
														
 
															+
														
 
															+	scan.cg = from;
														
 
															+	scan.test_task = NULL; /* select all tasks in cgroup */
														
 
															+	scan.process_task = cgroup_transfer_one_task;
														
 
															+	scan.heap = NULL;
														
 
															+	scan.data = to;
														
 
															+
														
 
															+	return cgroup_scan_tasks(&scan);
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * Stuff for reading the 'tasks'/'procs' files.
														
 
															  *
														
@@ -3362,35 +3345,14 @@ static void pidlist_free(void *p)
 
															 	else
														
 
															 		kfree(p);
														
 
															 }
														
 
															-static void *pidlist_resize(void *p, int newcount)
														
 
															-{
														
 
															-	void *newlist;
														
 
															-	/* note: if new alloc fails, old p will still be valid either way */
														
 
															-	if (is_vmalloc_addr(p)) {
														
 
															-		newlist = vmalloc(newcount * sizeof(pid_t));
														
 
															-		if (!newlist)
														
 
															-			return NULL;
														
 
															-		memcpy(newlist, p, newcount * sizeof(pid_t));
														
 
															-		vfree(p);
														
 
															-	} else {
														
 
															-		newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
														
 
															-	}
														
 
															-	return newlist;
														
 
															-}
														
 
															 /*
														
 
															  * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries
														
 
															- * If the new stripped list is sufficiently smaller and there's enough memory
														
 
															- * to allocate a new buffer, will let go of the unneeded memory. Returns the
														
 
															- * number of unique elements.
														
 
															+ * Returns the number of unique elements.
														
 
															  */
														
 
															-/* is the size difference enough that we should re-allocate the array? */
														
 
															-#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
														
 
															-static int pidlist_uniq(pid_t **p, int length)
														
 
															+static int pidlist_uniq(pid_t *list, int length)
														
 
															 {
														
 
															 	int src, dest = 1;
														
 
															-	pid_t *list = *p;
														
 
															-	pid_t *newlist;
														
 
															 	/*
														
 
															 	 * we presume the 0th element is unique, so i starts at 1. trivial
														
@@ -3411,16 +3373,6 @@ static int pidlist_uniq(pid_t **p, int length)
 
															 		dest++;
														
 
															 	}
														
 
															 after:
														
 
															-	/*
														
 
															-	 * if the length difference is large enough, we want to allocate a
														
 
															-	 * smaller buffer to save memory. if this fails due to out of memory,
														
 
															-	 * we'll just stay with what we've got.
														
 
															-	 */
														
 
															-	if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
														
 
															-		newlist = pidlist_resize(list, dest);
														
 
															-		if (newlist)
														
 
															-			*p = newlist;
														
 
															-	}
														
 
															 	return dest;
														
 
															 }
														
@@ -3516,7 +3468,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
 
															 	/* now sort & (if procs) strip out duplicates */
														
 
															 	sort(array, length, sizeof(pid_t), cmppid, NULL);
														
 
															 	if (type == CGROUP_FILE_PROCS)
														
 
															-		length = pidlist_uniq(&array, length);
														
 
															+		length = pidlist_uniq(array, length);
														
 
															 	l = cgroup_pidlist_find(cgrp, type);
														
 
															 	if (!l) {
														
 
															 		pidlist_free(array);
														
@@ -3930,11 +3882,7 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
 
															 	if (ret)
														
 
															 		goto fail;
														
 
															-	if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
														
 
															-		event->cft->unregister_event(cgrp, event->cft, event->eventfd);
														
 
															-		ret = 0;
														
 
															-		goto fail;
														
 
															-	}
														
 
															+	efile->f_op->poll(efile, &event->pt);
														
 
															 	/*
														
 
															 	 * Events should be removed after rmdir of cgroup directory, but before
														
@@ -4016,9 +3964,15 @@ static struct cftype files[] = {
 
															 	},
														
 
															 	{
														
 
															 		.name = "cgroup.clone_children",
														
 
															+		.flags = CFTYPE_INSANE,
														
 
															 		.read_u64 = cgroup_clone_children_read,
														
 
															 		.write_u64 = cgroup_clone_children_write,
														
 
															 	},
														
 
															+	{
														
 
															+		.name = "cgroup.sane_behavior",
														
 
															+		.flags = CFTYPE_ONLY_ON_ROOT,
														
 
															+		.read_seq_string = cgroup_sane_behavior_show,
														
 
															+	},
														
 
															 	{
														
 
															 		.name = "release_agent",
														
 
															 		.flags = CFTYPE_ONLY_ON_ROOT,
														
@@ -4131,17 +4085,8 @@ static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
 
															 	if (!(css->flags & CSS_ONLINE))
														
 
															 		return;
														
 
															-	/*
														
 
															-	 * css_offline() should be called with cgroup_mutex unlocked.  See
														
 
															-	 * 3fa59dfbc3 ("cgroup: fix potential deadlock in pre_destroy") for
														
 
															-	 * details.  This temporary unlocking should go away once
														
 
															-	 * cgroup_mutex is unexported from controllers.
														
 
															-	 */
														
 
															-	if (ss->css_offline) {
														
 
															-		mutex_unlock(&cgroup_mutex);
														
 
															+	if (ss->css_offline)
														
 
															 		ss->css_offline(cgrp);
														
 
															-		mutex_lock(&cgroup_mutex);
														
 
															-	}
														
 
															 	cgrp->subsys[ss->subsys_id]->flags &= ~CSS_ONLINE;
														
 
															 }
														
@@ -4158,6 +4103,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 
															 			     umode_t mode)
														
 
															 {
														
 
															 	struct cgroup *cgrp;
														
 
															+	struct cgroup_name *name;
														
 
															 	struct cgroupfs_root *root = parent->root;
														
 
															 	int err = 0;
														
 
															 	struct cgroup_subsys *ss;
														
@@ -4168,9 +4114,14 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 
															 	if (!cgrp)
														
 
															 		return -ENOMEM;
														
 
															+	name = cgroup_alloc_name(dentry);
														
 
															+	if (!name)
														
 
															+		goto err_free_cgrp;
														
 
															+	rcu_assign_pointer(cgrp->name, name);
														
 
															+
														
 
															 	cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL);
														
 
															 	if (cgrp->id < 0)
														
 
															-		goto err_free_cgrp;
														
 
															+		goto err_free_name;
														
 
															 	/*
														
 
															 	 * Only live parents can have children.  Note that the liveliness
														
@@ -4198,7 +4149,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 
															 	cgrp->parent = parent;
														
 
															 	cgrp->root = parent->root;
														
 
															-	cgrp->top_cgroup = parent->top_cgroup;
														
 
															 	if (notify_on_release(parent))
														
 
															 		set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
														
@@ -4241,6 +4191,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 
															 	for_each_subsys(root, ss)
														
 
															 		dget(dentry);
														
 
															+	/* hold a ref to the parent's dentry */
														
 
															+	dget(parent->dentry);
														
 
															+
														
 
															 	/* creation succeeded, notify subsystems */
														
 
															 	for_each_subsys(root, ss) {
														
 
															 		err = online_css(ss, cgrp);
														
@@ -4276,6 +4229,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 
															 	deactivate_super(sb);
														
 
															 err_free_id:
														
 
															 	ida_simple_remove(&root->cgroup_ida, cgrp->id);
														
 
															+err_free_name:
														
 
															+	kfree(rcu_dereference_raw(cgrp->name));
														
 
															 err_free_cgrp:
														
 
															 	kfree(cgrp);
														
 
															 	return err;
														
@@ -4295,56 +4250,13 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 
															 	return cgroup_create(c_parent, dentry, mode | S_IFDIR);
														
 
															 }
														
 
															-/*
														
 
															- * Check the reference count on each subsystem. Since we already
														
 
															- * established that there are no tasks in the cgroup, if the css refcount
														
 
															- * is also 1, then there should be no outstanding references, so the
														
 
															- * subsystem is safe to destroy. We scan across all subsystems rather than
														
 
															- * using the per-hierarchy linked list of mounted subsystems since we can
														
 
															- * be called via check_for_release() with no synchronization other than
														
 
															- * RCU, and the subsystem linked list isn't RCU-safe.
														
 
															- */
														
 
															-static int cgroup_has_css_refs(struct cgroup *cgrp)
														
 
															-{
														
 
															-	int i;
														
 
															-
														
 
															-	/*
														
 
															-	 * We won't need to lock the subsys array, because the subsystems
														
 
															-	 * we're concerned about aren't going anywhere since our cgroup root
														
 
															-	 * has a reference on them.
														
 
															-	 */
														
 
															-	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
														
 
															-		struct cgroup_subsys *ss = subsys[i];
														
 
															-		struct cgroup_subsys_state *css;
														
 
															-
														
 
															-		/* Skip subsystems not present or not in this hierarchy */
														
 
															-		if (ss == NULL || ss->root != cgrp->root)
														
 
															-			continue;
														
 
															-
														
 
															-		css = cgrp->subsys[ss->subsys_id];
														
 
															-		/*
														
 
															-		 * When called from check_for_release() it's possible
														
 
															-		 * that by this point the cgroup has been removed
														
 
															-		 * and the css deleted. But a false-positive doesn't
														
 
															-		 * matter, since it can only happen if the cgroup
														
 
															-		 * has been deleted and hence no longer needs the
														
 
															-		 * release agent to be called anyway.
														
 
															-		 */
														
 
															-		if (css && css_refcnt(css) > 1)
														
 
															-			return 1;
														
 
															-	}
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															 static int cgroup_destroy_locked(struct cgroup *cgrp)
														
 
															 	__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
														
 
															 {
														
 
															 	struct dentry *d = cgrp->dentry;
														
 
															 	struct cgroup *parent = cgrp->parent;
														
 
															-	DEFINE_WAIT(wait);
														
 
															 	struct cgroup_event *event, *tmp;
														
 
															 	struct cgroup_subsys *ss;
														
 
															-	LIST_HEAD(tmp_list);
														
 
															 	lockdep_assert_held(&d->d_inode->i_mutex);
														
 
															 	lockdep_assert_held(&cgroup_mutex);
														
@@ -4935,17 +4847,17 @@ void cgroup_post_fork(struct task_struct *child)
 
															 	 * and addition to css_set.
														
 
															 	 */
														
 
															 	if (need_forkexit_callback) {
														
 
															-		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
														
 
															+		/*
														
 
															+		 * fork/exit callbacks are supported only for builtin
														
 
															+		 * subsystems, and the builtin section of the subsys
														
 
															+		 * array is immutable, so we don't need to lock the
														
 
															+		 * subsys array here. On the other hand, modular section
														
 
															+		 * of the array can be freed at module unload, so we
														
 
															+		 * can't touch that.
														
 
															+		 */
														
 
															+		for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
														
 
															 			struct cgroup_subsys *ss = subsys[i];
														
 
															-			/*
														
 
															-			 * fork/exit callbacks are supported only for
														
 
															-			 * builtin subsystems and we don't need further
														
 
															-			 * synchronization as they never go away.
														
 
															-			 */
														
 
															-			if (!ss || ss->module)
														
 
															-				continue;
														
 
															-
														
 
															 			if (ss->fork)
														
 
															 				ss->fork(child);
														
 
															 		}
														
@@ -5010,13 +4922,13 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
 
															 	tsk->cgroups = &init_css_set;
														
 
															 	if (run_callbacks && need_forkexit_callback) {
														
 
															-		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
														
 
															+		/*
														
 
															+		 * fork/exit callbacks are supported only for builtin
														
 
															+		 * subsystems, see cgroup_post_fork() for details.
														
 
															+		 */
														
 
															+		for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
														
 
															 			struct cgroup_subsys *ss = subsys[i];
														
 
															-			/* modular subsystems can't use callbacks */
														
 
															-			if (!ss || ss->module)
														
 
															-				continue;
														
 
															-
														
 
															 			if (ss->exit) {
														
 
															 				struct cgroup *old_cgrp =
														
 
															 					rcu_dereference_raw(cg->subsys[i])->cgroup;
														
@@ -5030,44 +4942,19 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
 
															 	put_css_set_taskexit(cg);
														
 
															 }
														
 
															-/**
														
 
															- * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp
														
 
															- * @cgrp: the cgroup in question
														
 
															- * @task: the task in question
														
 
															- *
														
 
															- * See if @cgrp is a descendant of @task's cgroup in the appropriate
														
 
															- * hierarchy.
														
 
															- *
														
 
															- * If we are sending in dummytop, then presumably we are creating
														
 
															- * the top cgroup in the subsystem.
														
 
															- *
														
 
															- * Called only by the ns (nsproxy) cgroup.
														
 
															- */
														
 
															-int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
														
 
															-{
														
 
															-	int ret;
														
 
															-	struct cgroup *target;
														
 
															-
														
 
															-	if (cgrp == dummytop)
														
 
															-		return 1;
														
 
															-
														
 
															-	target = task_cgroup_from_root(task, cgrp->root);
														
 
															-	while (cgrp != target && cgrp!= cgrp->top_cgroup)
														
 
															-		cgrp = cgrp->parent;
														
 
															-	ret = (cgrp == target);
														
 
															-	return ret;
														
 
															-}
														
 
															-
														
 
															 static void check_for_release(struct cgroup *cgrp)
														
 
															 {
														
 
															 	/* All of these checks rely on RCU to keep the cgroup
														
 
															 	 * structure alive */
														
 
															-	if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
														
 
															-	    && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
														
 
															-		/* Control Group is currently removeable. If it's not
														
 
															+	if (cgroup_is_releasable(cgrp) &&
														
 
															+	    !atomic_read(&cgrp->count) && list_empty(&cgrp->children)) {
														
 
															+		/*
														
 
															+		 * Control Group is currently removeable. If it's not
														
 
															 		 * already queued for a userspace notification, queue
														
 
															-		 * it now */
														
 
															+		 * it now
														
 
															+		 */
														
 
															 		int need_schedule_work = 0;
														
 
															+
														
 
															 		raw_spin_lock(&release_list_lock);
														
 
															 		if (!cgroup_is_removed(cgrp) &&
														
 
															 		    list_empty(&cgrp->release_list)) {
														
@@ -5100,24 +4987,11 @@ EXPORT_SYMBOL_GPL(__css_tryget);
 
															 /* Caller must verify that the css is not for root cgroup */
														
 
															 void __css_put(struct cgroup_subsys_state *css)
														
 
															 {
														
 
															-	struct cgroup *cgrp = css->cgroup;
														
 
															 	int v;
														
 
															-	rcu_read_lock();
														
 
															 	v = css_unbias_refcnt(atomic_dec_return(&css->refcnt));
														
 
															-
														
 
															-	switch (v) {
														
 
															-	case 1:
														
 
															-		if (notify_on_release(cgrp)) {
														
 
															-			set_bit(CGRP_RELEASABLE, &cgrp->flags);
														
 
															-			check_for_release(cgrp);
														
 
															-		}
														
 
															-		break;
														
 
															-	case 0:
														
 
															+	if (v == 0)
														
 
															 		schedule_work(&css->dput_work);
														
 
															-		break;
														
 
															-	}
														
 
															-	rcu_read_unlock();
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(__css_put);
														
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -264,17 +264,6 @@ static struct cpuset top_cpuset = {
 
															 static DEFINE_MUTEX(cpuset_mutex);
														
 
															 static DEFINE_MUTEX(callback_mutex);
														
 
															-/*
														
 
															- * cpuset_buffer_lock protects both the cpuset_name and cpuset_nodelist
														
 
															- * buffers.  They are statically allocated to prevent using excess stack
														
 
															- * when calling cpuset_print_task_mems_allowed().
														
 
															- */
														
 
															-#define CPUSET_NAME_LEN		(128)
														
 
															-#define	CPUSET_NODELIST_LEN	(256)
														
 
															-static char cpuset_name[CPUSET_NAME_LEN];
														
 
															-static char cpuset_nodelist[CPUSET_NODELIST_LEN];
														
 
															-static DEFINE_SPINLOCK(cpuset_buffer_lock);
														
 
															-
														
 
															 /*
														
 
															  * CPU / memory hotplug is handled asynchronously.
														
 
															  */
														
@@ -780,25 +769,26 @@ static void rebuild_sched_domains_locked(void)
 
															 	lockdep_assert_held(&cpuset_mutex);
														
 
															 	get_online_cpus();
														
 
															+	/*
														
 
															+	 * We have raced with CPU hotplug. Don't do anything to avoid
														
 
															+	 * passing doms with offlined cpu to partition_sched_domains().
														
 
															+	 * Anyways, hotplug work item will rebuild sched domains.
														
 
															+	 */
														
 
															+	if (!cpumask_equal(top_cpuset.cpus_allowed, cpu_active_mask))
														
 
															+		goto out;
														
 
															+
														
 
															 	/* Generate domain masks and attrs */
														
 
															 	ndoms = generate_sched_domains(&doms, &attr);
														
 
															 	/* Have scheduler rebuild the domains */
														
 
															 	partition_sched_domains(ndoms, doms, attr);
														
 
															-
														
 
															+out:
														
 
															 	put_online_cpus();
														
 
															 }
														
 
															 #else /* !CONFIG_SMP */
														
 
															 static void rebuild_sched_domains_locked(void)
														
 
															 {
														
 
															 }
														
 
															-
														
 
															-static int generate_sched_domains(cpumask_var_t **domains,
														
 
															-			struct sched_domain_attr **attributes)
														
 
															-{
														
 
															-	*domains = NULL;
														
 
															-	return 1;
														
 
															-}
														
 
															 #endif /* CONFIG_SMP */
														
 
															 void rebuild_sched_domains(void)
														
@@ -2005,50 +1995,6 @@ int __init cpuset_init(void)
 
															 	return 0;
														
 
															 }
														
 
															-/**
														
 
															- * cpuset_do_move_task - move a given task to another cpuset
														
 
															- * @tsk: pointer to task_struct the task to move
														
 
															- * @scan: struct cgroup_scanner contained in its struct cpuset_hotplug_scanner
														
 
															- *
														
 
															- * Called by cgroup_scan_tasks() for each task in a cgroup.
														
 
															- * Return nonzero to stop the walk through the tasks.
														
 
															- */
														
 
															-static void cpuset_do_move_task(struct task_struct *tsk,
														
 
															-				struct cgroup_scanner *scan)
														
 
															-{
														
 
															-	struct cgroup *new_cgroup = scan->data;
														
 
															-
														
 
															-	cgroup_lock();
														
 
															-	cgroup_attach_task(new_cgroup, tsk);
														
 
															-	cgroup_unlock();
														
 
															-}
														
 
															-
														
 
															-/**
														
 
															- * move_member_tasks_to_cpuset - move tasks from one cpuset to another
														
 
															- * @from: cpuset in which the tasks currently reside
														
 
															- * @to: cpuset to which the tasks will be moved
														
 
															- *
														
 
															- * Called with cpuset_mutex held
														
 
															- * callback_mutex must not be held, as cpuset_attach() will take it.
														
 
															- *
														
 
															- * The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
														
 
															- * calling callback functions for each.
														
 
															- */
														
 
															-static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to)
														
 
															-{
														
 
															-	struct cgroup_scanner scan;
														
 
															-
														
 
															-	scan.cg = from->css.cgroup;
														
 
															-	scan.test_task = NULL; /* select all tasks in cgroup */
														
 
															-	scan.process_task = cpuset_do_move_task;
														
 
															-	scan.heap = NULL;
														
 
															-	scan.data = to->css.cgroup;
														
 
															-
														
 
															-	if (cgroup_scan_tasks(&scan))
														
 
															-		printk(KERN_ERR "move_member_tasks_to_cpuset: "
														
 
															-				"cgroup_scan_tasks failed\n");
														
 
															-}
														
 
															-
														
 
															 /*
														
 
															  * If CPU and/or memory hotplug handlers, below, unplug any CPUs
														
 
															  * or memory nodes, we need to walk over the cpuset hierarchy,
														
@@ -2069,7 +2015,12 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
 
															 			nodes_empty(parent->mems_allowed))
														
 
															 		parent = parent_cs(parent);
														
 
															-	move_member_tasks_to_cpuset(cs, parent);
														
 
															+	if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
														
 
															+		rcu_read_lock();
														
 
															+		printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset %s\n",
														
 
															+		       cgroup_name(cs->css.cgroup));
														
 
															+		rcu_read_unlock();
														
 
															+	}
														
 
															 }
														
 
															 /**
														
@@ -2222,17 +2173,8 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
 
															 	flush_workqueue(cpuset_propagate_hotplug_wq);
														
 
															 	/* rebuild sched domains if cpus_allowed has changed */
														
 
															-	if (cpus_updated) {
														
 
															-		struct sched_domain_attr *attr;
														
 
															-		cpumask_var_t *doms;
														
 
															-		int ndoms;
														
 
															-
														
 
															-		mutex_lock(&cpuset_mutex);
														
 
															-		ndoms = generate_sched_domains(&doms, &attr);
														
 
															-		mutex_unlock(&cpuset_mutex);
														
 
															-
														
 
															-		partition_sched_domains(ndoms, doms, attr);
														
 
															-	}
														
 
															+	if (cpus_updated)
														
 
															+		rebuild_sched_domains();
														
 
															 }
														
 
															 void cpuset_update_active_cpus(bool cpu_online)
														
@@ -2594,6 +2536,8 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
 
															 	return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
														
 
															 }
														
 
															+#define CPUSET_NODELIST_LEN	(256)
														
 
															+
														
 
															 /**
														
 
															  * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed
														
 
															  * @task: pointer to task_struct of some task.
														
@@ -2604,25 +2548,22 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
 
															  */
														
 
															 void cpuset_print_task_mems_allowed(struct task_struct *tsk)
														
 
															 {
														
 
															-	struct dentry *dentry;
														
 
															+	 /* Statically allocated to prevent using excess stack. */
														
 
															+	static char cpuset_nodelist[CPUSET_NODELIST_LEN];
														
 
															+	static DEFINE_SPINLOCK(cpuset_buffer_lock);
														
 
															-	dentry = task_cs(tsk)->css.cgroup->dentry;
														
 
															-	spin_lock(&cpuset_buffer_lock);
														
 
															+	struct cgroup *cgrp = task_cs(tsk)->css.cgroup;
														
 
															-	if (!dentry) {
														
 
															-		strcpy(cpuset_name, "/");
														
 
															-	} else {
														
 
															-		spin_lock(&dentry->d_lock);
														
 
															-		strlcpy(cpuset_name, (const char *)dentry->d_name.name,
														
 
															-			CPUSET_NAME_LEN);
														
 
															-		spin_unlock(&dentry->d_lock);
														
 
															-	}
														
 
															+	rcu_read_lock();
														
 
															+	spin_lock(&cpuset_buffer_lock);
														
 
															 	nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
														
 
															 			   tsk->mems_allowed);
														
 
															 	printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n",
														
 
															-	       tsk->comm, cpuset_name, cpuset_nodelist);
														
 
															+	       tsk->comm, cgroup_name(cgrp), cpuset_nodelist);
														
 
															+
														
 
															 	spin_unlock(&cpuset_buffer_lock);
														
 
															+	rcu_read_unlock();
														
 
															 }
														
 
															 /*
														
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -251,7 +251,22 @@ perf_cgroup_match(struct perf_event *event)
 
															 	struct perf_event_context *ctx = event->ctx;
														
 
															 	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
														
 
															-	return !event->cgrp || event->cgrp == cpuctx->cgrp;
														
 
															+	/* @event doesn't care about cgroup */
														
 
															+	if (!event->cgrp)
														
 
															+		return true;
														
 
															+
														
 
															+	/* wants specific cgroup scope but @cpuctx isn't associated with any */
														
 
															+	if (!cpuctx->cgrp)
														
 
															+		return false;
														
 
															+
														
 
															+	/*
														
 
															+	 * Cgroup scoping is recursive.  An event enabled for a cgroup is
														
 
															+	 * also enabled for all its descendant cgroups.  If @cpuctx's
														
 
															+	 * cgroup is a descendant of @event's (the test covers identity
														
 
															+	 * case), it's a match.
														
 
															+	 */
														
 
															+	return cgroup_is_descendant(cpuctx->cgrp->css.cgroup,
														
 
															+				    event->cgrp->css.cgroup);
														
 
															 }
														
 
															 static inline bool perf_tryget_cgroup(struct perf_event *event)
														
@@ -7517,12 +7532,5 @@ struct cgroup_subsys perf_subsys = {
 
															 	.css_free	= perf_cgroup_css_free,
														
 
															 	.exit		= perf_cgroup_exit,
														
 
															 	.attach		= perf_cgroup_attach,
														
 
															-
														
 
															-	/*
														
 
															-	 * perf_event cgroup doesn't handle nesting correctly.
														
 
															-	 * ctx->nr_cgroups adjustments should be propagated through the
														
 
															-	 * cgroup hierarchy.  Fix it and remove the following.
														
 
															-	 */
														
 
															-	.broken_hierarchy = true,
														
 
															 };
														
 
															 #endif /* CONFIG_CGROUP_PERF */
														
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3321,52 +3321,53 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
 
															 	schedule_work(&cachep->memcg_params->destroy);
														
 
															 }
														
 
															-static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
														
 
															-{
														
 
															-	char *name;
														
 
															-	struct dentry *dentry;
														
 
															-
														
 
															-	rcu_read_lock();
														
 
															-	dentry = rcu_dereference(memcg->css.cgroup->dentry);
														
 
															-	rcu_read_unlock();
														
 
															-
														
 
															-	BUG_ON(dentry == NULL);
														
 
															-
														
 
															-	name = kasprintf(GFP_KERNEL, "%s(%d:%s)", s->name,
														
 
															-			 memcg_cache_id(memcg), dentry->d_name.name);
														
 
															-
														
 
															-	return name;
														
 
															-}
														
 
															+/*
														
 
															+ * This lock protects updaters, not readers. We want readers to be as fast as
														
 
															+ * they can, and they will either see NULL or a valid cache value. Our model
														
 
															+ * allow them to see NULL, in which case the root memcg will be selected.
														
 
															+ *
														
 
															+ * We need this lock because multiple allocations to the same cache from a non
														
 
															+ * will span more than one worker. Only one of them can create the cache.
														
 
															+ */
														
 
															+static DEFINE_MUTEX(memcg_cache_mutex);
														
 
															+/*
														
 
															+ * Called with memcg_cache_mutex held
														
 
															+ */
														
 
															 static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
														
 
															 					 struct kmem_cache *s)
														
 
															 {
														
 
															-	char *name;
														
 
															 	struct kmem_cache *new;
														
 
															+	static char *tmp_name = NULL;
														
 
															-	name = memcg_cache_name(memcg, s);
														
 
															-	if (!name)
														
 
															-		return NULL;
														
 
															+	lockdep_assert_held(&memcg_cache_mutex);
														
 
															+
														
 
															+	/*
														
 
															+	 * kmem_cache_create_memcg duplicates the given name and
														
 
															+	 * cgroup_name for this name requires RCU context.
														
 
															+	 * This static temporary buffer is used to prevent from
														
 
															+	 * pointless shortliving allocation.
														
 
															+	 */
														
 
															+	if (!tmp_name) {
														
 
															+		tmp_name = kmalloc(PATH_MAX, GFP_KERNEL);
														
 
															+		if (!tmp_name)
														
 
															+			return NULL;
														
 
															+	}
														
 
															+
														
 
															+	rcu_read_lock();
														
 
															+	snprintf(tmp_name, PATH_MAX, "%s(%d:%s)", s->name,
														
 
															+			 memcg_cache_id(memcg), cgroup_name(memcg->css.cgroup));
														
 
															+	rcu_read_unlock();
														
 
															-	new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align,
														
 
															+	new = kmem_cache_create_memcg(memcg, tmp_name, s->object_size, s->align,
														
 
															 				      (s->flags & ~SLAB_PANIC), s->ctor, s);
														
 
															 	if (new)
														
 
															 		new->allocflags |= __GFP_KMEMCG;
														
 
															-	kfree(name);
														
 
															 	return new;
														
 
															 }
														
 
															-/*
														
 
															- * This lock protects updaters, not readers. We want readers to be as fast as
														
 
															- * they can, and they will either see NULL or a valid cache value. Our model
														
 
															- * allow them to see NULL, in which case the root memcg will be selected.
														
 
															- *
														
 
															- * We need this lock because multiple allocations to the same cache from a non
														
 
															- * will span more than one worker. Only one of them can create the cache.
														
 
															- */
														
 
															-static DEFINE_MUTEX(memcg_cache_mutex);
														
 
															 static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
														
 
															 						  struct kmem_cache *cachep)
														
 
															 {
														
@@ -5912,6 +5913,7 @@ static struct cftype mem_cgroup_files[] = {
 
															 	},
														
 
															 	{
														
 
															 		.name = "use_hierarchy",
														
 
															+		.flags = CFTYPE_INSANE,
														
 
															 		.write_u64 = mem_cgroup_hierarchy_write,
														
 
															 		.read_u64 = mem_cgroup_hierarchy_read,
														
 
															 	},
														
@@ -6907,6 +6909,21 @@ static void mem_cgroup_move_task(struct cgroup *cont,
 
															 }
														
 
															 #endif
														
 
															+/*
														
 
															+ * Cgroup retains root cgroups across [un]mount cycles making it necessary
														
 
															+ * to verify sane_behavior flag on each mount attempt.
														
 
															+ */
														
 
															+static void mem_cgroup_bind(struct cgroup *root)
														
 
															+{
														
 
															+	/*
														
 
															+	 * use_hierarchy is forced with sane_behavior.  cgroup core
														
 
															+	 * guarantees that @root doesn't have any children, so turning it
														
 
															+	 * on for the root memcg is enough.
														
 
															+	 */
														
 
															+	if (cgroup_sane_behavior(root))
														
 
															+		mem_cgroup_from_cont(root)->use_hierarchy = true;
														
 
															+}
														
 
															+
														
 
															 struct cgroup_subsys mem_cgroup_subsys = {
														
 
															 	.name = "memory",
														
 
															 	.subsys_id = mem_cgroup_subsys_id,
														
@@ -6917,6 +6934,7 @@ struct cgroup_subsys mem_cgroup_subsys = {
 
															 	.can_attach = mem_cgroup_can_attach,
														
 
															 	.cancel_attach = mem_cgroup_cancel_attach,
														
 
															 	.attach = mem_cgroup_move_task,
														
 
															+	.bind = mem_cgroup_bind,
														
 
															 	.base_cftypes = mem_cgroup_files,
														
 
															 	.early_init = 0,
														
 
															 	.use_id = 1,
														
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -25,6 +25,12 @@
 
															 static DEFINE_MUTEX(devcgroup_mutex);
														
 
															+enum devcg_behavior {
														
 
															+	DEVCG_DEFAULT_NONE,
														
 
															+	DEVCG_DEFAULT_ALLOW,
														
 
															+	DEVCG_DEFAULT_DENY,
														
 
															+};
														
 
															+
														
 
															 /*
														
 
															  * exception list locking rules:
														
 
															  * hold devcgroup_mutex for update/read.
														
@@ -42,10 +48,9 @@ struct dev_exception_item {
 
															 struct dev_cgroup {
														
 
															 	struct cgroup_subsys_state css;
														
 
															 	struct list_head exceptions;
														
 
															-	enum {
														
 
															-		DEVCG_DEFAULT_ALLOW,
														
 
															-		DEVCG_DEFAULT_DENY,
														
 
															-	} behavior;
														
 
															+	enum devcg_behavior behavior;
														
 
															+	/* temporary list for pending propagation operations */
														
 
															+	struct list_head propagate_pending;
														
 
															 };
														
 
															 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
														
@@ -182,35 +187,62 @@ static void dev_exception_clean(struct dev_cgroup *dev_cgroup)
 
															 	__dev_exception_clean(dev_cgroup);
														
 
															 }
														
 
															+static inline bool is_devcg_online(const struct dev_cgroup *devcg)
														
 
															+{
														
 
															+	return (devcg->behavior != DEVCG_DEFAULT_NONE);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * devcgroup_online - initializes devcgroup's behavior and exceptions based on
														
 
															+ * 		      parent's
														
 
															+ * @cgroup: cgroup getting online
														
 
															+ * returns 0 in case of success, error code otherwise
														
 
															+ */
														
 
															+static int devcgroup_online(struct cgroup *cgroup)
														
 
															+{
														
 
															+	struct dev_cgroup *dev_cgroup, *parent_dev_cgroup = NULL;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	mutex_lock(&devcgroup_mutex);
														
 
															+	dev_cgroup = cgroup_to_devcgroup(cgroup);
														
 
															+	if (cgroup->parent)
														
 
															+		parent_dev_cgroup = cgroup_to_devcgroup(cgroup->parent);
														
 
															+
														
 
															+	if (parent_dev_cgroup == NULL)
														
 
															+		dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW;
														
 
															+	else {
														
 
															+		ret = dev_exceptions_copy(&dev_cgroup->exceptions,
														
 
															+					  &parent_dev_cgroup->exceptions);
														
 
															+		if (!ret)
														
 
															+			dev_cgroup->behavior = parent_dev_cgroup->behavior;
														
 
															+	}
														
 
															+	mutex_unlock(&devcgroup_mutex);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static void devcgroup_offline(struct cgroup *cgroup)
														
 
															+{
														
 
															+	struct dev_cgroup *dev_cgroup = cgroup_to_devcgroup(cgroup);
														
 
															+
														
 
															+	mutex_lock(&devcgroup_mutex);
														
 
															+	dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
														
 
															+	mutex_unlock(&devcgroup_mutex);
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * called from kernel/cgroup.c with cgroup_lock() held.
														
 
															  */
														
 
															 static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup)
														
 
															 {
														
 
															-	struct dev_cgroup *dev_cgroup, *parent_dev_cgroup;
														
 
															-	struct cgroup *parent_cgroup;
														
 
															-	int ret;
														
 
															+	struct dev_cgroup *dev_cgroup;
														
 
															 	dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL);
														
 
															 	if (!dev_cgroup)
														
 
															 		return ERR_PTR(-ENOMEM);
														
 
															 	INIT_LIST_HEAD(&dev_cgroup->exceptions);
														
 
															-	parent_cgroup = cgroup->parent;
														
 
															-
														
 
															-	if (parent_cgroup == NULL)
														
 
															-		dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW;
														
 
															-	else {
														
 
															-		parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
														
 
															-		mutex_lock(&devcgroup_mutex);
														
 
															-		ret = dev_exceptions_copy(&dev_cgroup->exceptions,
														
 
															-					  &parent_dev_cgroup->exceptions);
														
 
															-		dev_cgroup->behavior = parent_dev_cgroup->behavior;
														
 
															-		mutex_unlock(&devcgroup_mutex);
														
 
															-		if (ret) {
														
 
															-			kfree(dev_cgroup);
														
 
															-			return ERR_PTR(ret);
														
 
															-		}
														
 
															-	}
														
 
															+	INIT_LIST_HEAD(&dev_cgroup->propagate_pending);
														
 
															+	dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
														
 
															 	return &dev_cgroup->css;
														
 
															 }
														
@@ -304,9 +336,11 @@ static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
 
															  *		verify if a certain access is allowed.
														
 
															  * @dev_cgroup: dev cgroup to be tested against
														
 
															  * @refex: new exception
														
 
															+ * @behavior: behavior of the exception
														
 
															  */
														
 
															-static int may_access(struct dev_cgroup *dev_cgroup,
														
 
															-		      struct dev_exception_item *refex)
														
 
															+static bool may_access(struct dev_cgroup *dev_cgroup,
														
 
															+		       struct dev_exception_item *refex,
														
 
															+		       enum devcg_behavior behavior)
														
 
															 {
														
 
															 	struct dev_exception_item *ex;
														
 
															 	bool match = false;
														
@@ -330,18 +364,29 @@ static int may_access(struct dev_cgroup *dev_cgroup,
 
															 		break;
														
 
															 	}
														
 
															-	/*
														
 
															-	 * In two cases we'll consider this new exception valid:
														
 
															-	 * - the dev cgroup has its default policy to allow + exception list:
														
 
															-	 *   the new exception should *not* match any of the exceptions
														
 
															-	 *   (behavior == DEVCG_DEFAULT_ALLOW, !match)
														
 
															-	 * - the dev cgroup has its default policy to deny + exception list:
														
 
															-	 *   the new exception *should* match the exceptions
														
 
															-	 *   (behavior == DEVCG_DEFAULT_DENY, match)
														
 
															-	 */
														
 
															-	if ((dev_cgroup->behavior == DEVCG_DEFAULT_DENY) == match)
														
 
															-		return 1;
														
 
															-	return 0;
														
 
															+	if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) {
														
 
															+		if (behavior == DEVCG_DEFAULT_ALLOW) {
														
 
															+			/* the exception will deny access to certain devices */
														
 
															+			return true;
														
 
															+		} else {
														
 
															+			/* the exception will allow access to certain devices */
														
 
															+			if (match)
														
 
															+				/*
														
 
															+				 * a new exception allowing access shouldn't
														
 
															+				 * match an parent's exception
														
 
															+				 */
														
 
															+				return false;
														
 
															+			return true;
														
 
															+		}
														
 
															+	} else {
														
 
															+		/* only behavior == DEVCG_DEFAULT_DENY allowed here */
														
 
															+		if (match)
														
 
															+			/* parent has an exception that matches the proposed */
														
 
															+			return true;
														
 
															+		else
														
 
															+			return false;
														
 
															+	}
														
 
															+	return false;
														
 
															 }
														
 
															 /*
														
@@ -358,7 +403,7 @@ static int parent_has_perm(struct dev_cgroup *childcg,
 
															 	if (!pcg)
														
 
															 		return 1;
														
 
															 	parent = cgroup_to_devcgroup(pcg);
														
 
															-	return may_access(parent, ex);
														
 
															+	return may_access(parent, ex, childcg->behavior);
														
 
															 }
														
 
															 /**
														
@@ -374,6 +419,111 @@ static inline int may_allow_all(struct dev_cgroup *parent)
 
															 	return parent->behavior == DEVCG_DEFAULT_ALLOW;
														
 
															 }
														
 
															+/**
														
 
															+ * revalidate_active_exceptions - walks through the active exception list and
														
 
															+ * 				  revalidates the exceptions based on parent's
														
 
															+ * 				  behavior and exceptions. The exceptions that
														
 
															+ * 				  are no longer valid will be removed.
														
 
															+ * 				  Called with devcgroup_mutex held.
														
 
															+ * @devcg: cgroup which exceptions will be checked
														
 
															+ *
														
 
															+ * This is one of the three key functions for hierarchy implementation.
														
 
															+ * This function is responsible for re-evaluating all the cgroup's active
														
 
															+ * exceptions due to a parent's exception change.
														
 
															+ * Refer to Documentation/cgroups/devices.txt for more details.
														
 
															+ */
														
 
															+static void revalidate_active_exceptions(struct dev_cgroup *devcg)
														
 
															+{
														
 
															+	struct dev_exception_item *ex;
														
 
															+	struct list_head *this, *tmp;
														
 
															+
														
 
															+	list_for_each_safe(this, tmp, &devcg->exceptions) {
														
 
															+		ex = container_of(this, struct dev_exception_item, list);
														
 
															+		if (!parent_has_perm(devcg, ex))
														
 
															+			dev_exception_rm(devcg, ex);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * get_online_devcg - walks the cgroup tree and fills a list with the online
														
 
															+ * 		      groups
														
 
															+ * @root: cgroup used as starting point
														
 
															+ * @online: list that will be filled with online groups
														
 
															+ *
														
 
															+ * Must be called with devcgroup_mutex held. Grabs RCU lock.
														
 
															+ * Because devcgroup_mutex is held, no devcg will become online or offline
														
 
															+ * during the tree walk (see devcgroup_online, devcgroup_offline)
														
 
															+ * A separated list is needed because propagate_behavior() and
														
 
															+ * propagate_exception() need to allocate memory and can block.
														
 
															+ */
														
 
															+static void get_online_devcg(struct cgroup *root, struct list_head *online)
														
 
															+{
														
 
															+	struct cgroup *pos;
														
 
															+	struct dev_cgroup *devcg;
														
 
															+
														
 
															+	lockdep_assert_held(&devcgroup_mutex);
														
 
															+
														
 
															+	rcu_read_lock();
														
 
															+	cgroup_for_each_descendant_pre(pos, root) {
														
 
															+		devcg = cgroup_to_devcgroup(pos);
														
 
															+		if (is_devcg_online(devcg))
														
 
															+			list_add_tail(&devcg->propagate_pending, online);
														
 
															+	}
														
 
															+	rcu_read_unlock();
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * propagate_exception - propagates a new exception to the children
														
 
															+ * @devcg_root: device cgroup that added a new exception
														
 
															+ * @ex: new exception to be propagated
														
 
															+ *
														
 
															+ * returns: 0 in case of success, != 0 in case of error
														
 
															+ */
														
 
															+static int propagate_exception(struct dev_cgroup *devcg_root,
														
 
															+			       struct dev_exception_item *ex)
														
 
															+{
														
 
															+	struct cgroup *root = devcg_root->css.cgroup;
														
 
															+	struct dev_cgroup *devcg, *parent, *tmp;
														
 
															+	int rc = 0;
														
 
															+	LIST_HEAD(pending);
														
 
															+
														
 
															+	get_online_devcg(root, &pending);
														
 
															+
														
 
															+	list_for_each_entry_safe(devcg, tmp, &pending, propagate_pending) {
														
 
															+		parent = cgroup_to_devcgroup(devcg->css.cgroup->parent);
														
 
															+
														
 
															+		/*
														
 
															+		 * in case both root's behavior and devcg is allow, a new
														
 
															+		 * restriction means adding to the exception list
														
 
															+		 */
														
 
															+		if (devcg_root->behavior == DEVCG_DEFAULT_ALLOW &&
														
 
															+		    devcg->behavior == DEVCG_DEFAULT_ALLOW) {
														
 
															+			rc = dev_exception_add(devcg, ex);
														
 
															+			if (rc)
														
 
															+				break;
														
 
															+		} else {
														
 
															+			/*
														
 
															+			 * in the other possible cases:
														
 
															+			 * root's behavior: allow, devcg's: deny
														
 
															+			 * root's behavior: deny, devcg's: deny
														
 
															+			 * the exception will be removed
														
 
															+			 */
														
 
															+			dev_exception_rm(devcg, ex);
														
 
															+		}
														
 
															+		revalidate_active_exceptions(devcg);
														
 
															+
														
 
															+		list_del_init(&devcg->propagate_pending);
														
 
															+	}
														
 
															+	return rc;
														
 
															+}
														
 
															+
														
 
															+static inline bool has_children(struct dev_cgroup *devcgroup)
														
 
															+{
														
 
															+	struct cgroup *cgrp = devcgroup->css.cgroup;
														
 
															+
														
 
															+	return !list_empty(&cgrp->children);
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * Modify the exception list using allow/deny rules.
														
 
															  * CAP_SYS_ADMIN is needed for this.  It's at least separate from CAP_MKNOD
														
@@ -392,7 +542,7 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 
															 {
														
 
															 	const char *b;
														
 
															 	char temp[12];		/* 11 + 1 characters needed for a u32 */
														
 
															-	int count, rc;
														
 
															+	int count, rc = 0;
														
 
															 	struct dev_exception_item ex;
														
 
															 	struct cgroup *p = devcgroup->css.cgroup;
														
 
															 	struct dev_cgroup *parent = NULL;
														
@@ -410,6 +560,9 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 
															 	case 'a':
														
 
															 		switch (filetype) {
														
 
															 		case DEVCG_ALLOW:
														
 
															+			if (has_children(devcgroup))
														
 
															+				return -EINVAL;
														
 
															+
														
 
															 			if (!may_allow_all(parent))
														
 
															 				return -EPERM;
														
 
															 			dev_exception_clean(devcgroup);
														
@@ -423,6 +576,9 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 
															 				return rc;
														
 
															 			break;
														
 
															 		case DEVCG_DENY:
														
 
															+			if (has_children(devcgroup))
														
 
															+				return -EINVAL;
														
 
															+
														
 
															 			dev_exception_clean(devcgroup);
														
 
															 			devcgroup->behavior = DEVCG_DEFAULT_DENY;
														
 
															 			break;
														
@@ -517,22 +673,28 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 
															 			dev_exception_rm(devcgroup, &ex);
														
 
															 			return 0;
														
 
															 		}
														
 
															-		return dev_exception_add(devcgroup, &ex);
														
 
															+		rc = dev_exception_add(devcgroup, &ex);
														
 
															+		break;
														
 
															 	case DEVCG_DENY:
														
 
															 		/*
														
 
															 		 * If the default policy is to deny by default, try to remove
														
 
															 		 * an matching exception instead. And be silent about it: we
														
 
															 		 * don't want to break compatibility
														
 
															 		 */
														
 
															-		if (devcgroup->behavior == DEVCG_DEFAULT_DENY) {
														
 
															+		if (devcgroup->behavior == DEVCG_DEFAULT_DENY)
														
 
															 			dev_exception_rm(devcgroup, &ex);
														
 
															-			return 0;
														
 
															-		}
														
 
															-		return dev_exception_add(devcgroup, &ex);
														
 
															+		else
														
 
															+			rc = dev_exception_add(devcgroup, &ex);
														
 
															+
														
 
															+		if (rc)
														
 
															+			break;
														
 
															+		/* we only propagate new restrictions */
														
 
															+		rc = propagate_exception(devcgroup, &ex);
														
 
															+		break;
														
 
															 	default:
														
 
															-		return -EINVAL;
														
 
															+		rc = -EINVAL;
														
 
															 	}
														
 
															-	return 0;
														
 
															+	return rc;
														
 
															 }
														
 
															 static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,
														
@@ -571,17 +733,10 @@ struct cgroup_subsys devices_subsys = {
 
															 	.can_attach = devcgroup_can_attach,
														
 
															 	.css_alloc = devcgroup_css_alloc,
														
 
															 	.css_free = devcgroup_css_free,
														
 
															+	.css_online = devcgroup_online,
														
 
															+	.css_offline = devcgroup_offline,
														
 
															 	.subsys_id = devices_subsys_id,
														
 
															 	.base_cftypes = dev_cgroup_files,
														
 
															-
														
 
															-	/*
														
 
															-	 * While devices cgroup has the rudimentary hierarchy support which
														
 
															-	 * checks the parent's restriction, it doesn't properly propagates
														
 
															-	 * config changes in ancestors to their descendents.  A child
														
 
															-	 * should only be allowed to add more restrictions to the parent's
														
 
															-	 * configuration.  Fix it and remove the following.
														
 
															-	 */
														
 
															-	.broken_hierarchy = true,
														
 
															 };
														
 
															 /**
														
@@ -609,7 +764,7 @@ static int __devcgroup_check_permission(short type, u32 major, u32 minor,
 
															 	rcu_read_lock();
														
 
															 	dev_cgroup = task_devcgroup(current);
														
 
															-	rc = may_access(dev_cgroup, &ex);
														
 
															+	rc = may_access(dev_cgroup, &ex, dev_cgroup->behavior);
														
 
															 	rcu_read_unlock();
														
 
															 	if (!rc)