8 anos atrás · 9410091dd5
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -13,6 +13,7 @@
 
				 #include <linux/wait.h>
			
 
				 #include <linux/mutex.h>
			
 
				 #include <linux/rcupdate.h>
			
 
				+#include <linux/refcount.h>
			
 
				 #include <linux/percpu-refcount.h>
			
 
				 #include <linux/percpu-rwsem.h>
			
 
				 #include <linux/workqueue.h>
			
@@ -106,9 +107,6 @@ struct cgroup_subsys_state {
 
				 	/* reference count - access via css_[try]get() and css_put() */
			
 
				 	struct percpu_ref refcnt;
			
 
				 
			
 
				-	/* PI: the parent css */
			
 
				-	struct cgroup_subsys_state *parent;
			
 
				-
			
 
				 	/* siblings list anchored at the parent's ->children */
			
 
				 	struct list_head sibling;
			
 
				 	struct list_head children;
			
@@ -138,6 +136,12 @@ struct cgroup_subsys_state {
 
				 	/* percpu_ref killing and RCU release */
			
 
				 	struct rcu_head rcu_head;
			
 
				 	struct work_struct destroy_work;
			
 
				+
			
 
				+	/*
			
 
				+	 * PI: the parent css.	Placed here for cache proximity to following
			
 
				+	 * fields of the containing structure.
			
 
				+	 */
			
 
				+	struct cgroup_subsys_state *parent;
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -156,7 +160,7 @@ struct css_set {
 
				 	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
			
 
				 
			
 
				 	/* reference count */
			
 
				-	atomic_t refcount;
			
 
				+	refcount_t refcount;
			
 
				 
			
 
				 	/* the default cgroup associated with this css_set */
			
 
				 	struct cgroup *dfl_cgrp;
			
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -17,11 +17,11 @@
 
				 #include <linux/seq_file.h>
			
 
				 #include <linux/kernfs.h>
			
 
				 #include <linux/jump_label.h>
			
 
				-#include <linux/nsproxy.h>
			
 
				 #include <linux/types.h>
			
 
				 #include <linux/ns_common.h>
			
 
				 #include <linux/nsproxy.h>
			
 
				 #include <linux/user_namespace.h>
			
 
				+#include <linux/refcount.h>
			
 
				 
			
 
				 #include <linux/cgroup-defs.h>
			
 
				 
			
@@ -661,7 +661,7 @@ static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {}
 
				 #endif	/* CONFIG_CGROUP_DATA */
			
 
				 
			
 
				 struct cgroup_namespace {
			
 
				-	atomic_t		count;
			
 
				+	refcount_t		count;
			
 
				 	struct ns_common	ns;
			
 
				 	struct user_namespace	*user_ns;
			
 
				 	struct ucounts		*ucounts;
			
@@ -696,12 +696,12 @@ copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns,
 
				 static inline void get_cgroup_ns(struct cgroup_namespace *ns)
			
 
				 {
			
 
				 	if (ns)
			
 
				-		atomic_inc(&ns->count);
			
 
				+		refcount_inc(&ns->count);
			
 
				 }
			
 
				 
			
 
				 static inline void put_cgroup_ns(struct cgroup_namespace *ns)
			
 
				 {
			
 
				-	if (ns && atomic_dec_and_test(&ns->count))
			
 
				+	if (ns && refcount_dec_and_test(&ns->count))
			
 
				 		free_cgroup_ns(ns);
			
 
				 }
			
 
				 
			
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -42,7 +42,7 @@ static inline void cpuset_dec(void)
 
				 
			
 
				 extern int cpuset_init(void);
			
 
				 extern void cpuset_init_smp(void);
			
 
				-extern void cpuset_update_active_cpus(bool cpu_online);
			
 
				+extern void cpuset_update_active_cpus(void);
			
 
				 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
			
 
				 extern void cpuset_cpus_allowed_fallback(struct task_struct *p);
			
 
				 extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
			
@@ -155,7 +155,7 @@ static inline bool cpusets_enabled(void) { return false; }
 
				 static inline int cpuset_init(void) { return 0; }
			
 
				 static inline void cpuset_init_smp(void) {}
			
 
				 
			
 
				-static inline void cpuset_update_active_cpus(bool cpu_online)
			
 
				+static inline void cpuset_update_active_cpus(void)
			
 
				 {
			
 
				 	partition_sched_domains(1, NULL, NULL);
			
 
				 }
			
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -5,6 +5,7 @@
 
				 #include <linux/kernfs.h>
			
 
				 #include <linux/workqueue.h>
			
 
				 #include <linux/list.h>
			
 
				+#include <linux/refcount.h>
			
 
				 
			
 
				 /*
			
 
				  * A cgroup can be associated with multiple css_sets as different tasks may
			
@@ -134,7 +135,7 @@ static inline void put_css_set(struct css_set *cset)
 
				 	 * can see it. Similar to atomic_dec_and_lock(), but for an
			
 
				 	 * rwlock
			
 
				 	 */
			
 
				-	if (atomic_add_unless(&cset->refcount, -1, 1))
			
 
				+	if (refcount_dec_not_one(&cset->refcount))
			
 
				 		return;
			
 
				 
			
 
				 	spin_lock_irqsave(&css_set_lock, flags);
			
@@ -147,7 +148,7 @@ static inline void put_css_set(struct css_set *cset)
 
				  */
			
 
				 static inline void get_css_set(struct css_set *cset)
			
 
				 {
			
 
				-	atomic_inc(&cset->refcount);
			
 
				+	refcount_inc(&cset->refcount);
			
 
				 }
			
 
				 
			
 
				 bool cgroup_ssid_enabled(int ssid);
			
@@ -163,7 +164,7 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
 
				 
			
 
				 void cgroup_free_root(struct cgroup_root *root);
			
 
				 void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts);
			
 
				-int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask);
			
 
				+int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags);
			
 
				 int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask);
			
 
				 struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
			
 
				 			       struct cgroup_root *root, unsigned long magic,
			
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -346,7 +346,7 @@ static int cgroup_task_count(const struct cgroup *cgrp)
 
				 
			
 
				 	spin_lock_irq(&css_set_lock);
			
 
				 	list_for_each_entry(link, &cgrp->cset_links, cset_link)
			
 
				-		count += atomic_read(&link->cset->refcount);
			
 
				+		count += refcount_read(&link->cset->refcount);
			
 
				 	spin_unlock_irq(&css_set_lock);
			
 
				 	return count;
			
 
				 }
			
@@ -1072,6 +1072,7 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
 
				 	struct cgroup_subsys *ss;
			
 
				 	struct dentry *dentry;
			
 
				 	int i, ret;
			
 
				+	bool new_root = false;
			
 
				 
			
 
				 	cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
			
 
				 
			
@@ -1181,10 +1182,11 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
 
				 		ret = -ENOMEM;
			
 
				 		goto out_unlock;
			
 
				 	}
			
 
				+	new_root = true;
			
 
				 
			
 
				 	init_cgroup_root(root, &opts);
			
 
				 
			
 
				-	ret = cgroup_setup_root(root, opts.subsys_mask);
			
 
				+	ret = cgroup_setup_root(root, opts.subsys_mask, PERCPU_REF_INIT_DEAD);
			
 
				 	if (ret)
			
 
				 		cgroup_free_root(root);
			
 
				 
			
@@ -1200,6 +1202,18 @@ out_free:
 
				 	dentry = cgroup_do_mount(&cgroup_fs_type, flags, root,
			
 
				 				 CGROUP_SUPER_MAGIC, ns);
			
 
				 
			
 
				+	/*
			
 
				+	 * There's a race window after we release cgroup_mutex and before
			
 
				+	 * allocating a superblock. Make sure a concurrent process won't
			
 
				+	 * be able to re-use the root during this window by delaying the
			
 
				+	 * initialization of root refcnt.
			
 
				+	 */
			
 
				+	if (new_root) {
			
 
				+		mutex_lock(&cgroup_mutex);
			
 
				+		percpu_ref_reinit(&root->cgrp.self.refcnt);
			
 
				+		mutex_unlock(&cgroup_mutex);
			
 
				+	}
			
 
				+
			
 
				 	/*
			
 
				 	 * If @pinned_sb, we're reusing an existing root and holding an
			
 
				 	 * extra ref on its sb.  Mount is complete.  Put the extra ref.
			
@@ -1286,7 +1300,7 @@ static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css,
 
				 	u64 count;
			
 
				 
			
 
				 	rcu_read_lock();
			
 
				-	count = atomic_read(&task_css_set(current)->refcount);
			
 
				+	count = refcount_read(&task_css_set(current)->refcount);
			
 
				 	rcu_read_unlock();
			
 
				 	return count;
			
 
				 }
			
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -189,7 +189,7 @@ static u16 have_canfork_callback __read_mostly;
 
				 
			
 
				 /* cgroup namespace for init task */
			
 
				 struct cgroup_namespace init_cgroup_ns = {
			
 
				-	.count		= { .counter = 2, },
			
 
				+	.count		= REFCOUNT_INIT(2),
			
 
				 	.user_ns	= &init_user_ns,
			
 
				 	.ns.ops		= &cgroupns_operations,
			
 
				 	.ns.inum	= PROC_CGROUP_INIT_INO,
			
@@ -436,7 +436,12 @@ out_unlock:
 
				 	return css;
			
 
				 }
			
 
				 
			
 
				-static void cgroup_get(struct cgroup *cgrp)
			
 
				+static void __maybe_unused cgroup_get(struct cgroup *cgrp)
			
 
				+{
			
 
				+	css_get(&cgrp->self);
			
 
				+}
			
 
				+
			
 
				+static void cgroup_get_live(struct cgroup *cgrp)
			
 
				 {
			
 
				 	WARN_ON_ONCE(cgroup_is_dead(cgrp));
			
 
				 	css_get(&cgrp->self);
			
@@ -554,7 +559,7 @@ EXPORT_SYMBOL_GPL(of_css);
 
				  * haven't been created.
			
 
				  */
			
 
				 struct css_set init_css_set = {
			
 
				-	.refcount		= ATOMIC_INIT(1),
			
 
				+	.refcount		= REFCOUNT_INIT(1),
			
 
				 	.tasks			= LIST_HEAD_INIT(init_css_set.tasks),
			
 
				 	.mg_tasks		= LIST_HEAD_INIT(init_css_set.mg_tasks),
			
 
				 	.task_iters		= LIST_HEAD_INIT(init_css_set.task_iters),
			
@@ -724,7 +729,7 @@ void put_css_set_locked(struct css_set *cset)
 
				 
			
 
				 	lockdep_assert_held(&css_set_lock);
			
 
				 
			
 
				-	if (!atomic_dec_and_test(&cset->refcount))
			
 
				+	if (!refcount_dec_and_test(&cset->refcount))
			
 
				 		return;
			
 
				 
			
 
				 	/* This css_set is dead. unlink it and release cgroup and css refs */
			
@@ -932,7 +937,7 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
 
				 	list_add_tail(&link->cgrp_link, &cset->cgrp_links);
			
 
				 
			
 
				 	if (cgroup_parent(cgrp))
			
 
				-		cgroup_get(cgrp);
			
 
				+		cgroup_get_live(cgrp);
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -977,7 +982,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 
				 		return NULL;
			
 
				 	}
			
 
				 
			
 
				-	atomic_set(&cset->refcount, 1);
			
 
				+	refcount_set(&cset->refcount, 1);
			
 
				 	INIT_LIST_HEAD(&cset->tasks);
			
 
				 	INIT_LIST_HEAD(&cset->mg_tasks);
			
 
				 	INIT_LIST_HEAD(&cset->task_iters);
			
@@ -1640,7 +1645,7 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
 
				 		set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
			
 
				 }
			
 
				 
			
 
				-int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
			
 
				+int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags)
			
 
				 {
			
 
				 	LIST_HEAD(tmp_links);
			
 
				 	struct cgroup *root_cgrp = &root->cgrp;
			
@@ -1656,8 +1661,8 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
 
				 	root_cgrp->id = ret;
			
 
				 	root_cgrp->ancestor_ids[0] = ret;
			
 
				 
			
 
				-	ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0,
			
 
				-			      GFP_KERNEL);
			
 
				+	ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
			
 
				+			      ref_flags, GFP_KERNEL);
			
 
				 	if (ret)
			
 
				 		goto out;
			
 
				 
			
@@ -1802,7 +1807,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 
				 			return ERR_PTR(-EINVAL);
			
 
				 		}
			
 
				 		cgrp_dfl_visible = true;
			
 
				-		cgroup_get(&cgrp_dfl_root.cgrp);
			
 
				+		cgroup_get_live(&cgrp_dfl_root.cgrp);
			
 
				 
			
 
				 		dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root,
			
 
				 					 CGROUP2_SUPER_MAGIC, ns);
			
@@ -2576,7 +2581,7 @@ restart:
 
				 			if (!css || !percpu_ref_is_dying(&css->refcnt))
			
 
				 				continue;
			
 
				 
			
 
				-			cgroup_get(dsct);
			
 
				+			cgroup_get_live(dsct);
			
 
				 			prepare_to_wait(&dsct->offline_waitq, &wait,
			
 
				 					TASK_UNINTERRUPTIBLE);
			
 
				 
			
@@ -3947,7 +3952,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
 
				 {
			
 
				 	lockdep_assert_held(&cgroup_mutex);
			
 
				 
			
 
				-	cgroup_get(cgrp);
			
 
				+	cgroup_get_live(cgrp);
			
 
				 
			
 
				 	memset(css, 0, sizeof(*css));
			
 
				 	css->cgroup = cgrp;
			
@@ -4123,7 +4128,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
 
				 	/* allocation complete, commit to creation */
			
 
				 	list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
			
 
				 	atomic_inc(&root->nr_cgrps);
			
 
				-	cgroup_get(parent);
			
 
				+	cgroup_get_live(parent);
			
 
				 
			
 
				 	/*
			
 
				 	 * @cgrp is now fully operational.  If something fails after this
			
@@ -4513,7 +4518,7 @@ int __init cgroup_init(void)
 
				 	hash_add(css_set_table, &init_css_set.hlist,
			
 
				 		 css_set_hash(init_css_set.subsys));
			
 
				 
			
 
				-	BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
			
 
				+	BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0, 0));
			
 
				 
			
 
				 	mutex_unlock(&cgroup_mutex);
			
 
				 
			
@@ -4947,7 +4952,7 @@ struct cgroup *cgroup_get_from_path(const char *path)
 
				 	if (kn) {
			
 
				 		if (kernfs_type(kn) == KERNFS_DIR) {
			
 
				 			cgrp = kn->priv;
			
 
				-			cgroup_get(cgrp);
			
 
				+			cgroup_get_live(cgrp);
			
 
				 		} else {
			
 
				 			cgrp = ERR_PTR(-ENOTDIR);
			
 
				 		}
			
@@ -5027,6 +5032,11 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
 
				 
			
 
				 	/* Socket clone path */
			
 
				 	if (skcd->val) {
			
 
				+		/*
			
 
				+		 * We might be cloning a socket which is left in an empty
			
 
				+		 * cgroup and the cgroup might have already been rmdir'd.
			
 
				+		 * Don't use cgroup_get_live().
			
 
				+		 */
			
 
				 		cgroup_get(sock_cgroup_ptr(skcd));
			
 
				 		return;
			
 
				 	}
			
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2121,10 +2121,8 @@ int __init cpuset_init(void)
 
				 {
			
 
				 	int err = 0;
			
 
				 
			
 
				-	if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL))
			
 
				-		BUG();
			
 
				-	if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL))
			
 
				-		BUG();
			
 
				+	BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
			
 
				+	BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
			
 
				 
			
 
				 	cpumask_setall(top_cpuset.cpus_allowed);
			
 
				 	nodes_setall(top_cpuset.mems_allowed);
			
@@ -2139,8 +2137,7 @@ int __init cpuset_init(void)
 
				 	if (err < 0)
			
 
				 		return err;
			
 
				 
			
 
				-	if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
			
 
				-		BUG();
			
 
				+	BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL));
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -2354,7 +2351,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
 
				 		rebuild_sched_domains();
			
 
				 }
			
 
				 
			
 
				-void cpuset_update_active_cpus(bool cpu_online)
			
 
				+void cpuset_update_active_cpus(void)
			
 
				 {
			
 
				 	/*
			
 
				 	 * We're inside cpu hotplug critical region which usually nests
			
--- a/kernel/cgroup/namespace.c
+++ b/kernel/cgroup/namespace.c
@@ -31,7 +31,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
 
				 		kfree(new_ns);
			
 
				 		return ERR_PTR(ret);
			
 
				 	}
			
 
				-	atomic_set(&new_ns->count, 1);
			
 
				+	refcount_set(&new_ns->count, 1);
			
 
				 	new_ns->ns.ops = &cgroupns_operations;
			
 
				 	return new_ns;
			
 
				 }
			
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5732,7 +5732,7 @@ static void cpuset_cpu_active(void)
 
				 		 * cpuset configurations.
			
 
				 		 */
			
 
				 	}
			
 
				-	cpuset_update_active_cpus(true);
			
 
				+	cpuset_update_active_cpus();
			
 
				 }
			
 
				 
			
 
				 static int cpuset_cpu_inactive(unsigned int cpu)
			
@@ -5755,7 +5755,7 @@ static int cpuset_cpu_inactive(unsigned int cpu)
 
				 
			
 
				 		if (overflow)
			
 
				 			return -EBUSY;
			
 
				-		cpuset_update_active_cpus(false);
			
 
				+		cpuset_update_active_cpus();
			
 
				 	} else {
			
 
				 		num_cpus_frozen++;
			
 
				 		partition_sched_domains(1, NULL, NULL);