|
@@ -138,6 +138,9 @@ struct cgroupfs_root {
|
|
/* Hierarchy-specific flags */
|
|
/* Hierarchy-specific flags */
|
|
unsigned long flags;
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
+ /* IDs for cgroups in this hierarchy */
|
|
|
|
+ struct ida cgroup_ida;
|
|
|
|
+
|
|
/* The path to use for release notifications. */
|
|
/* The path to use for release notifications. */
|
|
char release_agent_path[PATH_MAX];
|
|
char release_agent_path[PATH_MAX];
|
|
|
|
|
|
@@ -171,8 +174,8 @@ struct css_id {
|
|
* The css to which this ID points. This pointer is set to valid value
|
|
* The css to which this ID points. This pointer is set to valid value
|
|
* after cgroup is populated. If cgroup is removed, this will be NULL.
|
|
* after cgroup is populated. If cgroup is removed, this will be NULL.
|
|
* This pointer is expected to be RCU-safe because destroy()
|
|
* This pointer is expected to be RCU-safe because destroy()
|
|
- * is called after synchronize_rcu(). But for safe use, css_is_removed()
|
|
|
|
- * css_tryget() should be used for avoiding race.
|
|
|
|
|
|
+ * is called after synchronize_rcu(). But for safe use, css_tryget()
|
|
|
|
+ * should be used for avoiding race.
|
|
*/
|
|
*/
|
|
struct cgroup_subsys_state __rcu *css;
|
|
struct cgroup_subsys_state __rcu *css;
|
|
/*
|
|
/*
|
|
@@ -242,6 +245,10 @@ static DEFINE_SPINLOCK(hierarchy_id_lock);
|
|
*/
|
|
*/
|
|
static int need_forkexit_callback __read_mostly;
|
|
static int need_forkexit_callback __read_mostly;
|
|
|
|
|
|
|
|
+static int cgroup_destroy_locked(struct cgroup *cgrp);
|
|
|
|
+static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
|
|
|
|
+ struct cftype cfts[], bool is_add);
|
|
|
|
+
|
|
#ifdef CONFIG_PROVE_LOCKING
|
|
#ifdef CONFIG_PROVE_LOCKING
|
|
int cgroup_lock_is_held(void)
|
|
int cgroup_lock_is_held(void)
|
|
{
|
|
{
|
|
@@ -294,11 +301,6 @@ static int notify_on_release(const struct cgroup *cgrp)
|
|
return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
|
|
return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
|
|
}
|
|
}
|
|
|
|
|
|
-static int clone_children(const struct cgroup *cgrp)
|
|
|
|
-{
|
|
|
|
- return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
/*
|
|
/*
|
|
* for_each_subsys() allows you to iterate on each subsystem attached to
|
|
* for_each_subsys() allows you to iterate on each subsystem attached to
|
|
* an active hierarchy
|
|
* an active hierarchy
|
|
@@ -782,12 +784,12 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
|
|
* The task_lock() exception
|
|
* The task_lock() exception
|
|
*
|
|
*
|
|
* The need for this exception arises from the action of
|
|
* The need for this exception arises from the action of
|
|
- * cgroup_attach_task(), which overwrites one tasks cgroup pointer with
|
|
|
|
|
|
+ * cgroup_attach_task(), which overwrites one task's cgroup pointer with
|
|
* another. It does so using cgroup_mutex, however there are
|
|
* another. It does so using cgroup_mutex, however there are
|
|
* several performance critical places that need to reference
|
|
* several performance critical places that need to reference
|
|
* task->cgroup without the expense of grabbing a system global
|
|
* task->cgroup without the expense of grabbing a system global
|
|
* mutex. Therefore except as noted below, when dereferencing or, as
|
|
* mutex. Therefore except as noted below, when dereferencing or, as
|
|
- * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use
|
|
|
|
|
|
+ * in cgroup_attach_task(), modifying a task's cgroup pointer we use
|
|
* task_lock(), which acts on a spinlock (task->alloc_lock) already in
|
|
* task_lock(), which acts on a spinlock (task->alloc_lock) already in
|
|
* the task_struct routinely used for such matters.
|
|
* the task_struct routinely used for such matters.
|
|
*
|
|
*
|
|
@@ -854,30 +856,6 @@ static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
|
|
return inode;
|
|
return inode;
|
|
}
|
|
}
|
|
|
|
|
|
-/*
|
|
|
|
- * Call subsys's pre_destroy handler.
|
|
|
|
- * This is called before css refcnt check.
|
|
|
|
- */
|
|
|
|
-static int cgroup_call_pre_destroy(struct cgroup *cgrp)
|
|
|
|
-{
|
|
|
|
- struct cgroup_subsys *ss;
|
|
|
|
- int ret = 0;
|
|
|
|
-
|
|
|
|
- for_each_subsys(cgrp->root, ss) {
|
|
|
|
- if (!ss->pre_destroy)
|
|
|
|
- continue;
|
|
|
|
-
|
|
|
|
- ret = ss->pre_destroy(cgrp);
|
|
|
|
- if (ret) {
|
|
|
|
- /* ->pre_destroy() failure is being deprecated */
|
|
|
|
- WARN_ON_ONCE(!ss->__DEPRECATED_clear_css_refs);
|
|
|
|
- break;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- return ret;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
static void cgroup_diput(struct dentry *dentry, struct inode *inode)
|
|
static void cgroup_diput(struct dentry *dentry, struct inode *inode)
|
|
{
|
|
{
|
|
/* is dentry a directory ? if so, kfree() associated cgroup */
|
|
/* is dentry a directory ? if so, kfree() associated cgroup */
|
|
@@ -898,7 +876,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
|
|
* Release the subsystem state objects.
|
|
* Release the subsystem state objects.
|
|
*/
|
|
*/
|
|
for_each_subsys(cgrp->root, ss)
|
|
for_each_subsys(cgrp->root, ss)
|
|
- ss->destroy(cgrp);
|
|
|
|
|
|
+ ss->css_free(cgrp);
|
|
|
|
|
|
cgrp->root->number_of_cgroups--;
|
|
cgrp->root->number_of_cgroups--;
|
|
mutex_unlock(&cgroup_mutex);
|
|
mutex_unlock(&cgroup_mutex);
|
|
@@ -917,6 +895,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
|
|
|
|
|
|
simple_xattrs_free(&cgrp->xattrs);
|
|
simple_xattrs_free(&cgrp->xattrs);
|
|
|
|
|
|
|
|
+ ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
|
|
kfree_rcu(cgrp, rcu_head);
|
|
kfree_rcu(cgrp, rcu_head);
|
|
} else {
|
|
} else {
|
|
struct cfent *cfe = __d_cfe(dentry);
|
|
struct cfent *cfe = __d_cfe(dentry);
|
|
@@ -987,7 +966,7 @@ static void cgroup_clear_directory(struct dentry *dir, bool base_files,
|
|
if (!test_bit(ss->subsys_id, &subsys_mask))
|
|
if (!test_bit(ss->subsys_id, &subsys_mask))
|
|
continue;
|
|
continue;
|
|
list_for_each_entry(set, &ss->cftsets, node)
|
|
list_for_each_entry(set, &ss->cftsets, node)
|
|
- cgroup_rm_file(cgrp, set->cfts);
|
|
|
|
|
|
+ cgroup_addrm_files(cgrp, NULL, set->cfts, false);
|
|
}
|
|
}
|
|
if (base_files) {
|
|
if (base_files) {
|
|
while (!list_empty(&cgrp->files))
|
|
while (!list_empty(&cgrp->files))
|
|
@@ -1014,33 +993,6 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
|
|
remove_dir(dentry);
|
|
remove_dir(dentry);
|
|
}
|
|
}
|
|
|
|
|
|
-/*
|
|
|
|
- * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
|
|
|
|
- * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
|
|
|
|
- * reference to css->refcnt. In general, this refcnt is expected to goes down
|
|
|
|
- * to zero, soon.
|
|
|
|
- *
|
|
|
|
- * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
|
|
|
|
- */
|
|
|
|
-static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
|
|
|
|
-
|
|
|
|
-static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
|
|
|
|
-{
|
|
|
|
- if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
|
|
|
|
- wake_up_all(&cgroup_rmdir_waitq);
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
|
|
|
|
-{
|
|
|
|
- css_get(css);
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
|
|
|
|
-{
|
|
|
|
- cgroup_wakeup_rmdir_waiter(css->cgroup);
|
|
|
|
- css_put(css);
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
/*
|
|
/*
|
|
* Call with cgroup_mutex held. Drops reference counts on modules, including
|
|
* Call with cgroup_mutex held. Drops reference counts on modules, including
|
|
* any duplicate ones that parse_cgroupfs_options took. If this function
|
|
* any duplicate ones that parse_cgroupfs_options took. If this function
|
|
@@ -1150,7 +1102,7 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
|
|
seq_puts(seq, ",xattr");
|
|
seq_puts(seq, ",xattr");
|
|
if (strlen(root->release_agent_path))
|
|
if (strlen(root->release_agent_path))
|
|
seq_printf(seq, ",release_agent=%s", root->release_agent_path);
|
|
seq_printf(seq, ",release_agent=%s", root->release_agent_path);
|
|
- if (clone_children(&root->top_cgroup))
|
|
|
|
|
|
+ if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags))
|
|
seq_puts(seq, ",clone_children");
|
|
seq_puts(seq, ",clone_children");
|
|
if (strlen(root->name))
|
|
if (strlen(root->name))
|
|
seq_printf(seq, ",name=%s", root->name);
|
|
seq_printf(seq, ",name=%s", root->name);
|
|
@@ -1162,7 +1114,7 @@ struct cgroup_sb_opts {
|
|
unsigned long subsys_mask;
|
|
unsigned long subsys_mask;
|
|
unsigned long flags;
|
|
unsigned long flags;
|
|
char *release_agent;
|
|
char *release_agent;
|
|
- bool clone_children;
|
|
|
|
|
|
+ bool cpuset_clone_children;
|
|
char *name;
|
|
char *name;
|
|
/* User explicitly requested empty subsystem */
|
|
/* User explicitly requested empty subsystem */
|
|
bool none;
|
|
bool none;
|
|
@@ -1213,7 +1165,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
|
|
continue;
|
|
continue;
|
|
}
|
|
}
|
|
if (!strcmp(token, "clone_children")) {
|
|
if (!strcmp(token, "clone_children")) {
|
|
- opts->clone_children = true;
|
|
|
|
|
|
+ opts->cpuset_clone_children = true;
|
|
continue;
|
|
continue;
|
|
}
|
|
}
|
|
if (!strcmp(token, "xattr")) {
|
|
if (!strcmp(token, "xattr")) {
|
|
@@ -1397,14 +1349,21 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
|
|
goto out_unlock;
|
|
goto out_unlock;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * Clear out the files of subsystems that should be removed, do
|
|
|
|
+ * this before rebind_subsystems, since rebind_subsystems may
|
|
|
|
+ * change this hierarchy's subsys_list.
|
|
|
|
+ */
|
|
|
|
+ cgroup_clear_directory(cgrp->dentry, false, removed_mask);
|
|
|
|
+
|
|
ret = rebind_subsystems(root, opts.subsys_mask);
|
|
ret = rebind_subsystems(root, opts.subsys_mask);
|
|
if (ret) {
|
|
if (ret) {
|
|
|
|
+ /* rebind_subsystems failed, re-populate the removed files */
|
|
|
|
+ cgroup_populate_dir(cgrp, false, removed_mask);
|
|
drop_parsed_module_refcounts(opts.subsys_mask);
|
|
drop_parsed_module_refcounts(opts.subsys_mask);
|
|
goto out_unlock;
|
|
goto out_unlock;
|
|
}
|
|
}
|
|
|
|
|
|
- /* clear out any existing files and repopulate subsystem files */
|
|
|
|
- cgroup_clear_directory(cgrp->dentry, false, removed_mask);
|
|
|
|
/* re-populate subsystem files */
|
|
/* re-populate subsystem files */
|
|
cgroup_populate_dir(cgrp, false, added_mask);
|
|
cgroup_populate_dir(cgrp, false, added_mask);
|
|
|
|
|
|
@@ -1432,6 +1391,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
|
|
INIT_LIST_HEAD(&cgrp->children);
|
|
INIT_LIST_HEAD(&cgrp->children);
|
|
INIT_LIST_HEAD(&cgrp->files);
|
|
INIT_LIST_HEAD(&cgrp->files);
|
|
INIT_LIST_HEAD(&cgrp->css_sets);
|
|
INIT_LIST_HEAD(&cgrp->css_sets);
|
|
|
|
+ INIT_LIST_HEAD(&cgrp->allcg_node);
|
|
INIT_LIST_HEAD(&cgrp->release_list);
|
|
INIT_LIST_HEAD(&cgrp->release_list);
|
|
INIT_LIST_HEAD(&cgrp->pidlists);
|
|
INIT_LIST_HEAD(&cgrp->pidlists);
|
|
mutex_init(&cgrp->pidlist_mutex);
|
|
mutex_init(&cgrp->pidlist_mutex);
|
|
@@ -1450,8 +1410,8 @@ static void init_cgroup_root(struct cgroupfs_root *root)
|
|
root->number_of_cgroups = 1;
|
|
root->number_of_cgroups = 1;
|
|
cgrp->root = root;
|
|
cgrp->root = root;
|
|
cgrp->top_cgroup = cgrp;
|
|
cgrp->top_cgroup = cgrp;
|
|
- list_add_tail(&cgrp->allcg_node, &root->allcg_list);
|
|
|
|
init_cgroup_housekeeping(cgrp);
|
|
init_cgroup_housekeeping(cgrp);
|
|
|
|
+ list_add_tail(&cgrp->allcg_node, &root->allcg_list);
|
|
}
|
|
}
|
|
|
|
|
|
static bool init_root_id(struct cgroupfs_root *root)
|
|
static bool init_root_id(struct cgroupfs_root *root)
|
|
@@ -1518,12 +1478,13 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
|
|
|
|
|
|
root->subsys_mask = opts->subsys_mask;
|
|
root->subsys_mask = opts->subsys_mask;
|
|
root->flags = opts->flags;
|
|
root->flags = opts->flags;
|
|
|
|
+ ida_init(&root->cgroup_ida);
|
|
if (opts->release_agent)
|
|
if (opts->release_agent)
|
|
strcpy(root->release_agent_path, opts->release_agent);
|
|
strcpy(root->release_agent_path, opts->release_agent);
|
|
if (opts->name)
|
|
if (opts->name)
|
|
strcpy(root->name, opts->name);
|
|
strcpy(root->name, opts->name);
|
|
- if (opts->clone_children)
|
|
|
|
- set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags);
|
|
|
|
|
|
+ if (opts->cpuset_clone_children)
|
|
|
|
+ set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags);
|
|
return root;
|
|
return root;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1536,6 +1497,7 @@ static void cgroup_drop_root(struct cgroupfs_root *root)
|
|
spin_lock(&hierarchy_id_lock);
|
|
spin_lock(&hierarchy_id_lock);
|
|
ida_remove(&hierarchy_ida, root->hierarchy_id);
|
|
ida_remove(&hierarchy_ida, root->hierarchy_id);
|
|
spin_unlock(&hierarchy_id_lock);
|
|
spin_unlock(&hierarchy_id_lock);
|
|
|
|
+ ida_destroy(&root->cgroup_ida);
|
|
kfree(root);
|
|
kfree(root);
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1701,7 +1663,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
|
|
|
|
|
|
free_cg_links(&tmp_cg_links);
|
|
free_cg_links(&tmp_cg_links);
|
|
|
|
|
|
- BUG_ON(!list_empty(&root_cgrp->sibling));
|
|
|
|
BUG_ON(!list_empty(&root_cgrp->children));
|
|
BUG_ON(!list_empty(&root_cgrp->children));
|
|
BUG_ON(root->number_of_cgroups != 1);
|
|
BUG_ON(root->number_of_cgroups != 1);
|
|
|
|
|
|
@@ -1750,7 +1711,6 @@ static void cgroup_kill_sb(struct super_block *sb) {
|
|
|
|
|
|
BUG_ON(root->number_of_cgroups != 1);
|
|
BUG_ON(root->number_of_cgroups != 1);
|
|
BUG_ON(!list_empty(&cgrp->children));
|
|
BUG_ON(!list_empty(&cgrp->children));
|
|
- BUG_ON(!list_empty(&cgrp->sibling));
|
|
|
|
|
|
|
|
mutex_lock(&cgroup_mutex);
|
|
mutex_lock(&cgroup_mutex);
|
|
mutex_lock(&cgroup_root_mutex);
|
|
mutex_lock(&cgroup_root_mutex);
|
|
@@ -1808,9 +1768,11 @@ static struct kobject *cgroup_kobj;
|
|
*/
|
|
*/
|
|
int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
|
|
int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
|
|
{
|
|
{
|
|
|
|
+ struct dentry *dentry = cgrp->dentry;
|
|
char *start;
|
|
char *start;
|
|
- struct dentry *dentry = rcu_dereference_check(cgrp->dentry,
|
|
|
|
- cgroup_lock_is_held());
|
|
|
|
|
|
+
|
|
|
|
+ rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(),
|
|
|
|
+ "cgroup_path() called without proper locking");
|
|
|
|
|
|
if (!dentry || cgrp == dummytop) {
|
|
if (!dentry || cgrp == dummytop) {
|
|
/*
|
|
/*
|
|
@@ -1821,9 +1783,9 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
- start = buf + buflen;
|
|
|
|
|
|
+ start = buf + buflen - 1;
|
|
|
|
|
|
- *--start = '\0';
|
|
|
|
|
|
+ *start = '\0';
|
|
for (;;) {
|
|
for (;;) {
|
|
int len = dentry->d_name.len;
|
|
int len = dentry->d_name.len;
|
|
|
|
|
|
@@ -1834,8 +1796,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
|
|
if (!cgrp)
|
|
if (!cgrp)
|
|
break;
|
|
break;
|
|
|
|
|
|
- dentry = rcu_dereference_check(cgrp->dentry,
|
|
|
|
- cgroup_lock_is_held());
|
|
|
|
|
|
+ dentry = cgrp->dentry;
|
|
if (!cgrp->parent)
|
|
if (!cgrp->parent)
|
|
continue;
|
|
continue;
|
|
if (--start < buf)
|
|
if (--start < buf)
|
|
@@ -1930,9 +1891,7 @@ EXPORT_SYMBOL_GPL(cgroup_taskset_size);
|
|
/*
|
|
/*
|
|
* cgroup_task_migrate - move a task from one cgroup to another.
|
|
* cgroup_task_migrate - move a task from one cgroup to another.
|
|
*
|
|
*
|
|
- * 'guarantee' is set if the caller promises that a new css_set for the task
|
|
|
|
- * will already exist. If not set, this function might sleep, and can fail with
|
|
|
|
- * -ENOMEM. Must be called with cgroup_mutex and threadgroup locked.
|
|
|
|
|
|
+ * Must be called with cgroup_mutex and threadgroup locked.
|
|
*/
|
|
*/
|
|
static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
|
|
static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
|
|
struct task_struct *tsk, struct css_set *newcg)
|
|
struct task_struct *tsk, struct css_set *newcg)
|
|
@@ -2025,12 +1984,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
|
|
}
|
|
}
|
|
|
|
|
|
synchronize_rcu();
|
|
synchronize_rcu();
|
|
-
|
|
|
|
- /*
|
|
|
|
- * wake up rmdir() waiter. the rmdir should fail since the cgroup
|
|
|
|
- * is no longer empty.
|
|
|
|
- */
|
|
|
|
- cgroup_wakeup_rmdir_waiter(cgrp);
|
|
|
|
out:
|
|
out:
|
|
if (retval) {
|
|
if (retval) {
|
|
for_each_subsys(root, ss) {
|
|
for_each_subsys(root, ss) {
|
|
@@ -2200,7 +2153,6 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
|
|
* step 5: success! and cleanup
|
|
* step 5: success! and cleanup
|
|
*/
|
|
*/
|
|
synchronize_rcu();
|
|
synchronize_rcu();
|
|
- cgroup_wakeup_rmdir_waiter(cgrp);
|
|
|
|
retval = 0;
|
|
retval = 0;
|
|
out_put_css_set_refs:
|
|
out_put_css_set_refs:
|
|
if (retval) {
|
|
if (retval) {
|
|
@@ -2711,10 +2663,17 @@ static int cgroup_create_file(struct dentry *dentry, umode_t mode,
|
|
|
|
|
|
/* start off with i_nlink == 2 (for "." entry) */
|
|
/* start off with i_nlink == 2 (for "." entry) */
|
|
inc_nlink(inode);
|
|
inc_nlink(inode);
|
|
|
|
+ inc_nlink(dentry->d_parent->d_inode);
|
|
|
|
|
|
- /* start with the directory inode held, so that we can
|
|
|
|
- * populate it without racing with another mkdir */
|
|
|
|
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
|
|
|
|
|
|
+ /*
|
|
|
|
+ * Control reaches here with cgroup_mutex held.
|
|
|
|
+ * @inode->i_mutex should nest outside cgroup_mutex but we
|
|
|
|
+ * want to populate it immediately without releasing
|
|
|
|
+ * cgroup_mutex. As @inode isn't visible to anyone else
|
|
|
|
+ * yet, trylock will always succeed without affecting
|
|
|
|
+ * lockdep checks.
|
|
|
|
+ */
|
|
|
|
+ WARN_ON_ONCE(!mutex_trylock(&inode->i_mutex));
|
|
} else if (S_ISREG(mode)) {
|
|
} else if (S_ISREG(mode)) {
|
|
inode->i_size = 0;
|
|
inode->i_size = 0;
|
|
inode->i_fop = &cgroup_file_operations;
|
|
inode->i_fop = &cgroup_file_operations;
|
|
@@ -2725,32 +2684,6 @@ static int cgroup_create_file(struct dentry *dentry, umode_t mode,
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
-/*
|
|
|
|
- * cgroup_create_dir - create a directory for an object.
|
|
|
|
- * @cgrp: the cgroup we create the directory for. It must have a valid
|
|
|
|
- * ->parent field. And we are going to fill its ->dentry field.
|
|
|
|
- * @dentry: dentry of the new cgroup
|
|
|
|
- * @mode: mode to set on new directory.
|
|
|
|
- */
|
|
|
|
-static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
|
|
|
|
- umode_t mode)
|
|
|
|
-{
|
|
|
|
- struct dentry *parent;
|
|
|
|
- int error = 0;
|
|
|
|
-
|
|
|
|
- parent = cgrp->parent->dentry;
|
|
|
|
- error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
|
|
|
|
- if (!error) {
|
|
|
|
- dentry->d_fsdata = cgrp;
|
|
|
|
- inc_nlink(parent->d_inode);
|
|
|
|
- rcu_assign_pointer(cgrp->dentry, dentry);
|
|
|
|
- dget(dentry);
|
|
|
|
- }
|
|
|
|
- dput(dentry);
|
|
|
|
-
|
|
|
|
- return error;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
/**
|
|
/**
|
|
* cgroup_file_mode - deduce file mode of a control file
|
|
* cgroup_file_mode - deduce file mode of a control file
|
|
* @cft: the control file in question
|
|
* @cft: the control file in question
|
|
@@ -2791,12 +2724,6 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
|
|
|
|
|
|
simple_xattrs_init(&cft->xattrs);
|
|
simple_xattrs_init(&cft->xattrs);
|
|
|
|
|
|
- /* does @cft->flags tell us to skip creation on @cgrp? */
|
|
|
|
- if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
|
|
|
|
- return 0;
|
|
|
|
- if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
|
|
|
|
- return 0;
|
|
|
|
-
|
|
|
|
if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
|
|
if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
|
|
strcpy(name, subsys->name);
|
|
strcpy(name, subsys->name);
|
|
strcat(name, ".");
|
|
strcat(name, ".");
|
|
@@ -2837,6 +2764,12 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
|
|
int err, ret = 0;
|
|
int err, ret = 0;
|
|
|
|
|
|
for (cft = cfts; cft->name[0] != '\0'; cft++) {
|
|
for (cft = cfts; cft->name[0] != '\0'; cft++) {
|
|
|
|
+ /* does cft->flags tell us to skip this file on @cgrp? */
|
|
|
|
+ if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
|
|
|
|
+ continue;
|
|
|
|
+ if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
|
|
|
|
+ continue;
|
|
|
|
+
|
|
if (is_add)
|
|
if (is_add)
|
|
err = cgroup_add_file(cgrp, subsys, cft);
|
|
err = cgroup_add_file(cgrp, subsys, cft);
|
|
else
|
|
else
|
|
@@ -3044,6 +2977,92 @@ static void cgroup_enable_task_cg_lists(void)
|
|
write_unlock(&css_set_lock);
|
|
write_unlock(&css_set_lock);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/**
|
|
|
|
+ * cgroup_next_descendant_pre - find the next descendant for pre-order walk
|
|
|
|
+ * @pos: the current position (%NULL to initiate traversal)
|
|
|
|
+ * @cgroup: cgroup whose descendants to walk
|
|
|
|
+ *
|
|
|
|
+ * To be used by cgroup_for_each_descendant_pre(). Find the next
|
|
|
|
+ * descendant to visit for pre-order traversal of @cgroup's descendants.
|
|
|
|
+ */
|
|
|
|
+struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
|
|
|
|
+ struct cgroup *cgroup)
|
|
|
|
+{
|
|
|
|
+ struct cgroup *next;
|
|
|
|
+
|
|
|
|
+ WARN_ON_ONCE(!rcu_read_lock_held());
|
|
|
|
+
|
|
|
|
+ /* if first iteration, pretend we just visited @cgroup */
|
|
|
|
+ if (!pos) {
|
|
|
|
+ if (list_empty(&cgroup->children))
|
|
|
|
+ return NULL;
|
|
|
|
+ pos = cgroup;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /* visit the first child if exists */
|
|
|
|
+ next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling);
|
|
|
|
+ if (next)
|
|
|
|
+ return next;
|
|
|
|
+
|
|
|
|
+ /* no child, visit my or the closest ancestor's next sibling */
|
|
|
|
+ do {
|
|
|
|
+ next = list_entry_rcu(pos->sibling.next, struct cgroup,
|
|
|
|
+ sibling);
|
|
|
|
+ if (&next->sibling != &pos->parent->children)
|
|
|
|
+ return next;
|
|
|
|
+
|
|
|
|
+ pos = pos->parent;
|
|
|
|
+ } while (pos != cgroup);
|
|
|
|
+
|
|
|
|
+ return NULL;
|
|
|
|
+}
|
|
|
|
+EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre);
|
|
|
|
+
|
|
|
|
+static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos)
|
|
|
|
+{
|
|
|
|
+ struct cgroup *last;
|
|
|
|
+
|
|
|
|
+ do {
|
|
|
|
+ last = pos;
|
|
|
|
+ pos = list_first_or_null_rcu(&pos->children, struct cgroup,
|
|
|
|
+ sibling);
|
|
|
|
+ } while (pos);
|
|
|
|
+
|
|
|
|
+ return last;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/**
|
|
|
|
+ * cgroup_next_descendant_post - find the next descendant for post-order walk
|
|
|
|
+ * @pos: the current position (%NULL to initiate traversal)
|
|
|
|
+ * @cgroup: cgroup whose descendants to walk
|
|
|
|
+ *
|
|
|
|
+ * To be used by cgroup_for_each_descendant_post(). Find the next
|
|
|
|
+ * descendant to visit for post-order traversal of @cgroup's descendants.
|
|
|
|
+ */
|
|
|
|
+struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
|
|
|
|
+ struct cgroup *cgroup)
|
|
|
|
+{
|
|
|
|
+ struct cgroup *next;
|
|
|
|
+
|
|
|
|
+ WARN_ON_ONCE(!rcu_read_lock_held());
|
|
|
|
+
|
|
|
|
+ /* if first iteration, visit the leftmost descendant */
|
|
|
|
+ if (!pos) {
|
|
|
|
+ next = cgroup_leftmost_descendant(cgroup);
|
|
|
|
+ return next != cgroup ? next : NULL;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /* if there's an unvisited sibling, visit its leftmost descendant */
|
|
|
|
+ next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
|
|
|
|
+ if (&next->sibling != &pos->parent->children)
|
|
|
|
+ return cgroup_leftmost_descendant(next);
|
|
|
|
+
|
|
|
|
+ /* no sibling left, visit parent */
|
|
|
|
+ next = pos->parent;
|
|
|
|
+ return next != cgroup ? next : NULL;
|
|
|
|
+}
|
|
|
|
+EXPORT_SYMBOL_GPL(cgroup_next_descendant_post);
|
|
|
|
+
|
|
void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
|
|
void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
|
|
__acquires(css_set_lock)
|
|
__acquires(css_set_lock)
|
|
{
|
|
{
|
|
@@ -3757,7 +3776,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
|
|
if (flags & POLLHUP) {
|
|
if (flags & POLLHUP) {
|
|
__remove_wait_queue(event->wqh, &event->wait);
|
|
__remove_wait_queue(event->wqh, &event->wait);
|
|
spin_lock(&cgrp->event_list_lock);
|
|
spin_lock(&cgrp->event_list_lock);
|
|
- list_del(&event->list);
|
|
|
|
|
|
+ list_del_init(&event->list);
|
|
spin_unlock(&cgrp->event_list_lock);
|
|
spin_unlock(&cgrp->event_list_lock);
|
|
/*
|
|
/*
|
|
* We are in atomic context, but cgroup_event_remove() may
|
|
* We are in atomic context, but cgroup_event_remove() may
|
|
@@ -3894,7 +3913,7 @@ fail:
|
|
static u64 cgroup_clone_children_read(struct cgroup *cgrp,
|
|
static u64 cgroup_clone_children_read(struct cgroup *cgrp,
|
|
struct cftype *cft)
|
|
struct cftype *cft)
|
|
{
|
|
{
|
|
- return clone_children(cgrp);
|
|
|
|
|
|
+ return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
|
|
}
|
|
}
|
|
|
|
|
|
static int cgroup_clone_children_write(struct cgroup *cgrp,
|
|
static int cgroup_clone_children_write(struct cgroup *cgrp,
|
|
@@ -3902,9 +3921,9 @@ static int cgroup_clone_children_write(struct cgroup *cgrp,
|
|
u64 val)
|
|
u64 val)
|
|
{
|
|
{
|
|
if (val)
|
|
if (val)
|
|
- set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
|
|
|
|
|
|
+ set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
|
|
else
|
|
else
|
|
- clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
|
|
|
|
|
|
+ clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -4017,19 +4036,57 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
|
|
css->flags = 0;
|
|
css->flags = 0;
|
|
css->id = NULL;
|
|
css->id = NULL;
|
|
if (cgrp == dummytop)
|
|
if (cgrp == dummytop)
|
|
- set_bit(CSS_ROOT, &css->flags);
|
|
|
|
|
|
+ css->flags |= CSS_ROOT;
|
|
BUG_ON(cgrp->subsys[ss->subsys_id]);
|
|
BUG_ON(cgrp->subsys[ss->subsys_id]);
|
|
cgrp->subsys[ss->subsys_id] = css;
|
|
cgrp->subsys[ss->subsys_id] = css;
|
|
|
|
|
|
/*
|
|
/*
|
|
- * If !clear_css_refs, css holds an extra ref to @cgrp->dentry
|
|
|
|
- * which is put on the last css_put(). dput() requires process
|
|
|
|
- * context, which css_put() may be called without. @css->dput_work
|
|
|
|
- * will be used to invoke dput() asynchronously from css_put().
|
|
|
|
|
|
+ * css holds an extra ref to @cgrp->dentry which is put on the last
|
|
|
|
+ * css_put(). dput() requires process context, which css_put() may
|
|
|
|
+ * be called without. @css->dput_work will be used to invoke
|
|
|
|
+ * dput() asynchronously from css_put().
|
|
*/
|
|
*/
|
|
INIT_WORK(&css->dput_work, css_dput_fn);
|
|
INIT_WORK(&css->dput_work, css_dput_fn);
|
|
- if (ss->__DEPRECATED_clear_css_refs)
|
|
|
|
- set_bit(CSS_CLEAR_CSS_REFS, &css->flags);
|
|
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* invoke ->post_create() on a new CSS and mark it online if successful */
|
|
|
|
+static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
|
|
|
+{
|
|
|
|
+ int ret = 0;
|
|
|
|
+
|
|
|
|
+ lockdep_assert_held(&cgroup_mutex);
|
|
|
|
+
|
|
|
|
+ if (ss->css_online)
|
|
|
|
+ ret = ss->css_online(cgrp);
|
|
|
|
+ if (!ret)
|
|
|
|
+ cgrp->subsys[ss->subsys_id]->flags |= CSS_ONLINE;
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* if the CSS is online, invoke ->pre_destory() on it and mark it offline */
|
|
|
|
+static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
|
|
|
+ __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
|
|
|
|
+{
|
|
|
|
+ struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
|
|
|
|
+
|
|
|
|
+ lockdep_assert_held(&cgroup_mutex);
|
|
|
|
+
|
|
|
|
+ if (!(css->flags & CSS_ONLINE))
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * css_offline() should be called with cgroup_mutex unlocked. See
|
|
|
|
+ * 3fa59dfbc3 ("cgroup: fix potential deadlock in pre_destroy") for
|
|
|
|
+ * details. This temporary unlocking should go away once
|
|
|
|
+ * cgroup_mutex is unexported from controllers.
|
|
|
|
+ */
|
|
|
|
+ if (ss->css_offline) {
|
|
|
|
+ mutex_unlock(&cgroup_mutex);
|
|
|
|
+ ss->css_offline(cgrp);
|
|
|
|
+ mutex_lock(&cgroup_mutex);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ cgrp->subsys[ss->subsys_id]->flags &= ~CSS_ONLINE;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -4049,10 +4106,27 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
|
|
struct cgroup_subsys *ss;
|
|
struct cgroup_subsys *ss;
|
|
struct super_block *sb = root->sb;
|
|
struct super_block *sb = root->sb;
|
|
|
|
|
|
|
|
+ /* allocate the cgroup and its ID, 0 is reserved for the root */
|
|
cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
|
|
cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
|
|
if (!cgrp)
|
|
if (!cgrp)
|
|
return -ENOMEM;
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
+ cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL);
|
|
|
|
+ if (cgrp->id < 0)
|
|
|
|
+ goto err_free_cgrp;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Only live parents can have children. Note that the liveliness
|
|
|
|
+ * check isn't strictly necessary because cgroup_mkdir() and
|
|
|
|
+ * cgroup_rmdir() are fully synchronized by i_mutex; however, do it
|
|
|
|
+ * anyway so that locking is contained inside cgroup proper and we
|
|
|
|
+ * don't get nasty surprises if we ever grow another caller.
|
|
|
|
+ */
|
|
|
|
+ if (!cgroup_lock_live_group(parent)) {
|
|
|
|
+ err = -ENODEV;
|
|
|
|
+ goto err_free_id;
|
|
|
|
+ }
|
|
|
|
+
|
|
/* Grab a reference on the superblock so the hierarchy doesn't
|
|
/* Grab a reference on the superblock so the hierarchy doesn't
|
|
* get deleted on unmount if there are child cgroups. This
|
|
* get deleted on unmount if there are child cgroups. This
|
|
* can be done outside cgroup_mutex, since the sb can't
|
|
* can be done outside cgroup_mutex, since the sb can't
|
|
@@ -4060,8 +4134,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
|
|
* fs */
|
|
* fs */
|
|
atomic_inc(&sb->s_active);
|
|
atomic_inc(&sb->s_active);
|
|
|
|
|
|
- mutex_lock(&cgroup_mutex);
|
|
|
|
-
|
|
|
|
init_cgroup_housekeeping(cgrp);
|
|
init_cgroup_housekeeping(cgrp);
|
|
|
|
|
|
cgrp->parent = parent;
|
|
cgrp->parent = parent;
|
|
@@ -4071,26 +4143,51 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
|
|
if (notify_on_release(parent))
|
|
if (notify_on_release(parent))
|
|
set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
|
|
set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
|
|
|
|
|
|
- if (clone_children(parent))
|
|
|
|
- set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
|
|
|
|
|
|
+ if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
|
|
|
|
+ set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
|
|
|
|
|
|
for_each_subsys(root, ss) {
|
|
for_each_subsys(root, ss) {
|
|
struct cgroup_subsys_state *css;
|
|
struct cgroup_subsys_state *css;
|
|
|
|
|
|
- css = ss->create(cgrp);
|
|
|
|
|
|
+ css = ss->css_alloc(cgrp);
|
|
if (IS_ERR(css)) {
|
|
if (IS_ERR(css)) {
|
|
err = PTR_ERR(css);
|
|
err = PTR_ERR(css);
|
|
- goto err_destroy;
|
|
|
|
|
|
+ goto err_free_all;
|
|
}
|
|
}
|
|
init_cgroup_css(css, ss, cgrp);
|
|
init_cgroup_css(css, ss, cgrp);
|
|
if (ss->use_id) {
|
|
if (ss->use_id) {
|
|
err = alloc_css_id(ss, parent, cgrp);
|
|
err = alloc_css_id(ss, parent, cgrp);
|
|
if (err)
|
|
if (err)
|
|
- goto err_destroy;
|
|
|
|
|
|
+ goto err_free_all;
|
|
}
|
|
}
|
|
- /* At error, ->destroy() callback has to free assigned ID. */
|
|
|
|
- if (clone_children(parent) && ss->post_clone)
|
|
|
|
- ss->post_clone(cgrp);
|
|
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Create directory. cgroup_create_file() returns with the new
|
|
|
|
+ * directory locked on success so that it can be populated without
|
|
|
|
+ * dropping cgroup_mutex.
|
|
|
|
+ */
|
|
|
|
+ err = cgroup_create_file(dentry, S_IFDIR | mode, sb);
|
|
|
|
+ if (err < 0)
|
|
|
|
+ goto err_free_all;
|
|
|
|
+ lockdep_assert_held(&dentry->d_inode->i_mutex);
|
|
|
|
+
|
|
|
|
+ /* allocation complete, commit to creation */
|
|
|
|
+ dentry->d_fsdata = cgrp;
|
|
|
|
+ cgrp->dentry = dentry;
|
|
|
|
+ list_add_tail(&cgrp->allcg_node, &root->allcg_list);
|
|
|
|
+ list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
|
|
|
|
+ root->number_of_cgroups++;
|
|
|
|
+
|
|
|
|
+ /* each css holds a ref to the cgroup's dentry */
|
|
|
|
+ for_each_subsys(root, ss)
|
|
|
|
+ dget(dentry);
|
|
|
|
+
|
|
|
|
+ /* creation succeeded, notify subsystems */
|
|
|
|
+ for_each_subsys(root, ss) {
|
|
|
|
+ err = online_css(ss, cgrp);
|
|
|
|
+ if (err)
|
|
|
|
+ goto err_destroy;
|
|
|
|
|
|
if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
|
|
if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
|
|
parent->parent) {
|
|
parent->parent) {
|
|
@@ -4102,50 +4199,34 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- list_add(&cgrp->sibling, &cgrp->parent->children);
|
|
|
|
- root->number_of_cgroups++;
|
|
|
|
-
|
|
|
|
- err = cgroup_create_dir(cgrp, dentry, mode);
|
|
|
|
- if (err < 0)
|
|
|
|
- goto err_remove;
|
|
|
|
-
|
|
|
|
- /* If !clear_css_refs, each css holds a ref to the cgroup's dentry */
|
|
|
|
- for_each_subsys(root, ss)
|
|
|
|
- if (!ss->__DEPRECATED_clear_css_refs)
|
|
|
|
- dget(dentry);
|
|
|
|
-
|
|
|
|
- /* The cgroup directory was pre-locked for us */
|
|
|
|
- BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
|
|
|
|
-
|
|
|
|
- list_add_tail(&cgrp->allcg_node, &root->allcg_list);
|
|
|
|
-
|
|
|
|
err = cgroup_populate_dir(cgrp, true, root->subsys_mask);
|
|
err = cgroup_populate_dir(cgrp, true, root->subsys_mask);
|
|
- /* If err < 0, we have a half-filled directory - oh well ;) */
|
|
|
|
|
|
+ if (err)
|
|
|
|
+ goto err_destroy;
|
|
|
|
|
|
mutex_unlock(&cgroup_mutex);
|
|
mutex_unlock(&cgroup_mutex);
|
|
mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
|
|
mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
|
|
|
|
|
|
return 0;
|
|
return 0;
|
|
|
|
|
|
- err_remove:
|
|
|
|
-
|
|
|
|
- list_del(&cgrp->sibling);
|
|
|
|
- root->number_of_cgroups--;
|
|
|
|
-
|
|
|
|
- err_destroy:
|
|
|
|
-
|
|
|
|
|
|
+err_free_all:
|
|
for_each_subsys(root, ss) {
|
|
for_each_subsys(root, ss) {
|
|
if (cgrp->subsys[ss->subsys_id])
|
|
if (cgrp->subsys[ss->subsys_id])
|
|
- ss->destroy(cgrp);
|
|
|
|
|
|
+ ss->css_free(cgrp);
|
|
}
|
|
}
|
|
-
|
|
|
|
mutex_unlock(&cgroup_mutex);
|
|
mutex_unlock(&cgroup_mutex);
|
|
-
|
|
|
|
/* Release the reference count that we took on the superblock */
|
|
/* Release the reference count that we took on the superblock */
|
|
deactivate_super(sb);
|
|
deactivate_super(sb);
|
|
-
|
|
|
|
|
|
+err_free_id:
|
|
|
|
+ ida_simple_remove(&root->cgroup_ida, cgrp->id);
|
|
|
|
+err_free_cgrp:
|
|
kfree(cgrp);
|
|
kfree(cgrp);
|
|
return err;
|
|
return err;
|
|
|
|
+
|
|
|
|
+err_destroy:
|
|
|
|
+ cgroup_destroy_locked(cgrp);
|
|
|
|
+ mutex_unlock(&cgroup_mutex);
|
|
|
|
+ mutex_unlock(&dentry->d_inode->i_mutex);
|
|
|
|
+ return err;
|
|
}
|
|
}
|
|
|
|
|
|
static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
|
|
static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
|
|
@@ -4197,153 +4278,60 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
-/*
|
|
|
|
- * Atomically mark all (or else none) of the cgroup's CSS objects as
|
|
|
|
- * CSS_REMOVED. Return true on success, or false if the cgroup has
|
|
|
|
- * busy subsystems. Call with cgroup_mutex held
|
|
|
|
- *
|
|
|
|
- * Depending on whether a subsys has __DEPRECATED_clear_css_refs set or
|
|
|
|
- * not, cgroup removal behaves differently.
|
|
|
|
- *
|
|
|
|
- * If clear is set, css refcnt for the subsystem should be zero before
|
|
|
|
- * cgroup removal can be committed. This is implemented by
|
|
|
|
- * CGRP_WAIT_ON_RMDIR and retry logic around ->pre_destroy(), which may be
|
|
|
|
- * called multiple times until all css refcnts reach zero and is allowed to
|
|
|
|
- * veto removal on any invocation. This behavior is deprecated and will be
|
|
|
|
- * removed as soon as the existing user (memcg) is updated.
|
|
|
|
- *
|
|
|
|
- * If clear is not set, each css holds an extra reference to the cgroup's
|
|
|
|
- * dentry and cgroup removal proceeds regardless of css refs.
|
|
|
|
- * ->pre_destroy() will be called at least once and is not allowed to fail.
|
|
|
|
- * On the last put of each css, whenever that may be, the extra dentry ref
|
|
|
|
- * is put so that dentry destruction happens only after all css's are
|
|
|
|
- * released.
|
|
|
|
- */
|
|
|
|
-static int cgroup_clear_css_refs(struct cgroup *cgrp)
|
|
|
|
|
|
+static int cgroup_destroy_locked(struct cgroup *cgrp)
|
|
|
|
+ __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
|
|
{
|
|
{
|
|
|
|
+ struct dentry *d = cgrp->dentry;
|
|
|
|
+ struct cgroup *parent = cgrp->parent;
|
|
|
|
+ DEFINE_WAIT(wait);
|
|
|
|
+ struct cgroup_event *event, *tmp;
|
|
struct cgroup_subsys *ss;
|
|
struct cgroup_subsys *ss;
|
|
- unsigned long flags;
|
|
|
|
- bool failed = false;
|
|
|
|
|
|
+ LIST_HEAD(tmp_list);
|
|
|
|
+
|
|
|
|
+ lockdep_assert_held(&d->d_inode->i_mutex);
|
|
|
|
+ lockdep_assert_held(&cgroup_mutex);
|
|
|
|
|
|
- local_irq_save(flags);
|
|
|
|
|
|
+ if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children))
|
|
|
|
+ return -EBUSY;
|
|
|
|
|
|
/*
|
|
/*
|
|
- * Block new css_tryget() by deactivating refcnt. If all refcnts
|
|
|
|
- * for subsystems w/ clear_css_refs set were 1 at the moment of
|
|
|
|
- * deactivation, we succeeded.
|
|
|
|
|
|
+ * Block new css_tryget() by deactivating refcnt and mark @cgrp
|
|
|
|
+ * removed. This makes future css_tryget() and child creation
|
|
|
|
+ * attempts fail thus maintaining the removal conditions verified
|
|
|
|
+ * above.
|
|
*/
|
|
*/
|
|
for_each_subsys(cgrp->root, ss) {
|
|
for_each_subsys(cgrp->root, ss) {
|
|
struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
|
|
struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
|
|
|
|
|
|
WARN_ON(atomic_read(&css->refcnt) < 0);
|
|
WARN_ON(atomic_read(&css->refcnt) < 0);
|
|
atomic_add(CSS_DEACT_BIAS, &css->refcnt);
|
|
atomic_add(CSS_DEACT_BIAS, &css->refcnt);
|
|
-
|
|
|
|
- if (ss->__DEPRECATED_clear_css_refs)
|
|
|
|
- failed |= css_refcnt(css) != 1;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * If succeeded, set REMOVED and put all the base refs; otherwise,
|
|
|
|
- * restore refcnts to positive values. Either way, all in-progress
|
|
|
|
- * css_tryget() will be released.
|
|
|
|
- */
|
|
|
|
- for_each_subsys(cgrp->root, ss) {
|
|
|
|
- struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
|
|
|
|
-
|
|
|
|
- if (!failed) {
|
|
|
|
- set_bit(CSS_REMOVED, &css->flags);
|
|
|
|
- css_put(css);
|
|
|
|
- } else {
|
|
|
|
- atomic_sub(CSS_DEACT_BIAS, &css->refcnt);
|
|
|
|
- }
|
|
|
|
}
|
|
}
|
|
|
|
+ set_bit(CGRP_REMOVED, &cgrp->flags);
|
|
|
|
|
|
- local_irq_restore(flags);
|
|
|
|
- return !failed;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
|
|
|
|
-{
|
|
|
|
- struct cgroup *cgrp = dentry->d_fsdata;
|
|
|
|
- struct dentry *d;
|
|
|
|
- struct cgroup *parent;
|
|
|
|
- DEFINE_WAIT(wait);
|
|
|
|
- struct cgroup_event *event, *tmp;
|
|
|
|
- int ret;
|
|
|
|
-
|
|
|
|
- /* the vfs holds both inode->i_mutex already */
|
|
|
|
-again:
|
|
|
|
- mutex_lock(&cgroup_mutex);
|
|
|
|
- if (atomic_read(&cgrp->count) != 0) {
|
|
|
|
- mutex_unlock(&cgroup_mutex);
|
|
|
|
- return -EBUSY;
|
|
|
|
- }
|
|
|
|
- if (!list_empty(&cgrp->children)) {
|
|
|
|
- mutex_unlock(&cgroup_mutex);
|
|
|
|
- return -EBUSY;
|
|
|
|
- }
|
|
|
|
- mutex_unlock(&cgroup_mutex);
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * In general, subsystem has no css->refcnt after pre_destroy(). But
|
|
|
|
- * in racy cases, subsystem may have to get css->refcnt after
|
|
|
|
- * pre_destroy() and it makes rmdir return with -EBUSY. This sometimes
|
|
|
|
- * make rmdir return -EBUSY too often. To avoid that, we use waitqueue
|
|
|
|
- * for cgroup's rmdir. CGRP_WAIT_ON_RMDIR is for synchronizing rmdir
|
|
|
|
- * and subsystem's reference count handling. Please see css_get/put
|
|
|
|
- * and css_tryget() and cgroup_wakeup_rmdir_waiter() implementation.
|
|
|
|
- */
|
|
|
|
- set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
|
|
|
|
|
|
+ /* tell subsystems to initate destruction */
|
|
|
|
+ for_each_subsys(cgrp->root, ss)
|
|
|
|
+ offline_css(ss, cgrp);
|
|
|
|
|
|
/*
|
|
/*
|
|
- * Call pre_destroy handlers of subsys. Notify subsystems
|
|
|
|
- * that rmdir() request comes.
|
|
|
|
|
|
+ * Put all the base refs. Each css holds an extra reference to the
|
|
|
|
+ * cgroup's dentry and cgroup removal proceeds regardless of css
|
|
|
|
+ * refs. On the last put of each css, whenever that may be, the
|
|
|
|
+ * extra dentry ref is put so that dentry destruction happens only
|
|
|
|
+ * after all css's are released.
|
|
*/
|
|
*/
|
|
- ret = cgroup_call_pre_destroy(cgrp);
|
|
|
|
- if (ret) {
|
|
|
|
- clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
|
|
|
|
- return ret;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- mutex_lock(&cgroup_mutex);
|
|
|
|
- parent = cgrp->parent;
|
|
|
|
- if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
|
|
|
|
- clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
|
|
|
|
- mutex_unlock(&cgroup_mutex);
|
|
|
|
- return -EBUSY;
|
|
|
|
- }
|
|
|
|
- prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
|
|
|
|
- if (!cgroup_clear_css_refs(cgrp)) {
|
|
|
|
- mutex_unlock(&cgroup_mutex);
|
|
|
|
- /*
|
|
|
|
- * Because someone may call cgroup_wakeup_rmdir_waiter() before
|
|
|
|
- * prepare_to_wait(), we need to check this flag.
|
|
|
|
- */
|
|
|
|
- if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
|
|
|
|
- schedule();
|
|
|
|
- finish_wait(&cgroup_rmdir_waitq, &wait);
|
|
|
|
- clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
|
|
|
|
- if (signal_pending(current))
|
|
|
|
- return -EINTR;
|
|
|
|
- goto again;
|
|
|
|
- }
|
|
|
|
- /* NO css_tryget() can success after here. */
|
|
|
|
- finish_wait(&cgroup_rmdir_waitq, &wait);
|
|
|
|
- clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
|
|
|
|
|
|
+ for_each_subsys(cgrp->root, ss)
|
|
|
|
+ css_put(cgrp->subsys[ss->subsys_id]);
|
|
|
|
|
|
raw_spin_lock(&release_list_lock);
|
|
raw_spin_lock(&release_list_lock);
|
|
- set_bit(CGRP_REMOVED, &cgrp->flags);
|
|
|
|
if (!list_empty(&cgrp->release_list))
|
|
if (!list_empty(&cgrp->release_list))
|
|
list_del_init(&cgrp->release_list);
|
|
list_del_init(&cgrp->release_list);
|
|
raw_spin_unlock(&release_list_lock);
|
|
raw_spin_unlock(&release_list_lock);
|
|
|
|
|
|
/* delete this cgroup from parent->children */
|
|
/* delete this cgroup from parent->children */
|
|
- list_del_init(&cgrp->sibling);
|
|
|
|
-
|
|
|
|
|
|
+ list_del_rcu(&cgrp->sibling);
|
|
list_del_init(&cgrp->allcg_node);
|
|
list_del_init(&cgrp->allcg_node);
|
|
|
|
|
|
- d = dget(cgrp->dentry);
|
|
|
|
-
|
|
|
|
|
|
+ dget(d);
|
|
cgroup_d_remove_dir(d);
|
|
cgroup_d_remove_dir(d);
|
|
dput(d);
|
|
dput(d);
|
|
|
|
|
|
@@ -4353,21 +4341,35 @@ again:
|
|
/*
|
|
/*
|
|
* Unregister events and notify userspace.
|
|
* Unregister events and notify userspace.
|
|
* Notify userspace about cgroup removing only after rmdir of cgroup
|
|
* Notify userspace about cgroup removing only after rmdir of cgroup
|
|
- * directory to avoid race between userspace and kernelspace
|
|
|
|
|
|
+ * directory to avoid race between userspace and kernelspace. Use
|
|
|
|
+ * a temporary list to avoid a deadlock with cgroup_event_wake(). Since
|
|
|
|
+ * cgroup_event_wake() is called with the wait queue head locked,
|
|
|
|
+ * remove_wait_queue() cannot be called while holding event_list_lock.
|
|
*/
|
|
*/
|
|
spin_lock(&cgrp->event_list_lock);
|
|
spin_lock(&cgrp->event_list_lock);
|
|
- list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
|
|
|
|
- list_del(&event->list);
|
|
|
|
|
|
+ list_splice_init(&cgrp->event_list, &tmp_list);
|
|
|
|
+ spin_unlock(&cgrp->event_list_lock);
|
|
|
|
+ list_for_each_entry_safe(event, tmp, &tmp_list, list) {
|
|
|
|
+ list_del_init(&event->list);
|
|
remove_wait_queue(event->wqh, &event->wait);
|
|
remove_wait_queue(event->wqh, &event->wait);
|
|
eventfd_signal(event->eventfd, 1);
|
|
eventfd_signal(event->eventfd, 1);
|
|
schedule_work(&event->remove);
|
|
schedule_work(&event->remove);
|
|
}
|
|
}
|
|
- spin_unlock(&cgrp->event_list_lock);
|
|
|
|
|
|
|
|
- mutex_unlock(&cgroup_mutex);
|
|
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
|
|
|
|
+{
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ mutex_lock(&cgroup_mutex);
|
|
|
|
+ ret = cgroup_destroy_locked(dentry->d_fsdata);
|
|
|
|
+ mutex_unlock(&cgroup_mutex);
|
|
|
|
+
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+
|
|
static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss)
|
|
static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss)
|
|
{
|
|
{
|
|
INIT_LIST_HEAD(&ss->cftsets);
|
|
INIT_LIST_HEAD(&ss->cftsets);
|
|
@@ -4388,13 +4390,15 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
|
|
|
|
|
|
printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
|
|
printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
|
|
|
|
|
|
|
|
+ mutex_lock(&cgroup_mutex);
|
|
|
|
+
|
|
/* init base cftset */
|
|
/* init base cftset */
|
|
cgroup_init_cftsets(ss);
|
|
cgroup_init_cftsets(ss);
|
|
|
|
|
|
/* Create the top cgroup state for this subsystem */
|
|
/* Create the top cgroup state for this subsystem */
|
|
list_add(&ss->sibling, &rootnode.subsys_list);
|
|
list_add(&ss->sibling, &rootnode.subsys_list);
|
|
ss->root = &rootnode;
|
|
ss->root = &rootnode;
|
|
- css = ss->create(dummytop);
|
|
|
|
|
|
+ css = ss->css_alloc(dummytop);
|
|
/* We don't handle early failures gracefully */
|
|
/* We don't handle early failures gracefully */
|
|
BUG_ON(IS_ERR(css));
|
|
BUG_ON(IS_ERR(css));
|
|
init_cgroup_css(css, ss, dummytop);
|
|
init_cgroup_css(css, ss, dummytop);
|
|
@@ -4403,7 +4407,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
|
|
* pointer to this state - since the subsystem is
|
|
* pointer to this state - since the subsystem is
|
|
* newly registered, all tasks and hence the
|
|
* newly registered, all tasks and hence the
|
|
* init_css_set is in the subsystem's top cgroup. */
|
|
* init_css_set is in the subsystem's top cgroup. */
|
|
- init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
|
|
|
|
|
|
+ init_css_set.subsys[ss->subsys_id] = css;
|
|
|
|
|
|
need_forkexit_callback |= ss->fork || ss->exit;
|
|
need_forkexit_callback |= ss->fork || ss->exit;
|
|
|
|
|
|
@@ -4413,6 +4417,9 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
|
|
BUG_ON(!list_empty(&init_task.tasks));
|
|
BUG_ON(!list_empty(&init_task.tasks));
|
|
|
|
|
|
ss->active = 1;
|
|
ss->active = 1;
|
|
|
|
+ BUG_ON(online_css(ss, dummytop));
|
|
|
|
+
|
|
|
|
+ mutex_unlock(&cgroup_mutex);
|
|
|
|
|
|
/* this function shouldn't be used with modular subsystems, since they
|
|
/* this function shouldn't be used with modular subsystems, since they
|
|
* need to register a subsys_id, among other things */
|
|
* need to register a subsys_id, among other things */
|
|
@@ -4430,12 +4437,12 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
|
|
*/
|
|
*/
|
|
int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
|
|
int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
|
|
{
|
|
{
|
|
- int i;
|
|
|
|
struct cgroup_subsys_state *css;
|
|
struct cgroup_subsys_state *css;
|
|
|
|
+ int i, ret;
|
|
|
|
|
|
/* check name and function validity */
|
|
/* check name and function validity */
|
|
if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
|
|
if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
|
|
- ss->create == NULL || ss->destroy == NULL)
|
|
|
|
|
|
+ ss->css_alloc == NULL || ss->css_free == NULL)
|
|
return -EINVAL;
|
|
return -EINVAL;
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -4464,10 +4471,11 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
|
|
subsys[ss->subsys_id] = ss;
|
|
subsys[ss->subsys_id] = ss;
|
|
|
|
|
|
/*
|
|
/*
|
|
- * no ss->create seems to need anything important in the ss struct, so
|
|
|
|
- * this can happen first (i.e. before the rootnode attachment).
|
|
|
|
|
|
+ * no ss->css_alloc seems to need anything important in the ss
|
|
|
|
+ * struct, so this can happen first (i.e. before the rootnode
|
|
|
|
+ * attachment).
|
|
*/
|
|
*/
|
|
- css = ss->create(dummytop);
|
|
|
|
|
|
+ css = ss->css_alloc(dummytop);
|
|
if (IS_ERR(css)) {
|
|
if (IS_ERR(css)) {
|
|
/* failure case - need to deassign the subsys[] slot. */
|
|
/* failure case - need to deassign the subsys[] slot. */
|
|
subsys[ss->subsys_id] = NULL;
|
|
subsys[ss->subsys_id] = NULL;
|
|
@@ -4482,14 +4490,9 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
|
|
init_cgroup_css(css, ss, dummytop);
|
|
init_cgroup_css(css, ss, dummytop);
|
|
/* init_idr must be after init_cgroup_css because it sets css->id. */
|
|
/* init_idr must be after init_cgroup_css because it sets css->id. */
|
|
if (ss->use_id) {
|
|
if (ss->use_id) {
|
|
- int ret = cgroup_init_idr(ss, css);
|
|
|
|
- if (ret) {
|
|
|
|
- dummytop->subsys[ss->subsys_id] = NULL;
|
|
|
|
- ss->destroy(dummytop);
|
|
|
|
- subsys[ss->subsys_id] = NULL;
|
|
|
|
- mutex_unlock(&cgroup_mutex);
|
|
|
|
- return ret;
|
|
|
|
- }
|
|
|
|
|
|
+ ret = cgroup_init_idr(ss, css);
|
|
|
|
+ if (ret)
|
|
|
|
+ goto err_unload;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -4522,10 +4525,19 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
|
|
write_unlock(&css_set_lock);
|
|
write_unlock(&css_set_lock);
|
|
|
|
|
|
ss->active = 1;
|
|
ss->active = 1;
|
|
|
|
+ ret = online_css(ss, dummytop);
|
|
|
|
+ if (ret)
|
|
|
|
+ goto err_unload;
|
|
|
|
|
|
/* success! */
|
|
/* success! */
|
|
mutex_unlock(&cgroup_mutex);
|
|
mutex_unlock(&cgroup_mutex);
|
|
return 0;
|
|
return 0;
|
|
|
|
+
|
|
|
|
+err_unload:
|
|
|
|
+ mutex_unlock(&cgroup_mutex);
|
|
|
|
+ /* @ss can't be mounted here as try_module_get() would fail */
|
|
|
|
+ cgroup_unload_subsys(ss);
|
|
|
|
+ return ret;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(cgroup_load_subsys);
|
|
EXPORT_SYMBOL_GPL(cgroup_load_subsys);
|
|
|
|
|
|
@@ -4552,6 +4564,15 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
|
|
BUG_ON(ss->root != &rootnode);
|
|
BUG_ON(ss->root != &rootnode);
|
|
|
|
|
|
mutex_lock(&cgroup_mutex);
|
|
mutex_lock(&cgroup_mutex);
|
|
|
|
+
|
|
|
|
+ offline_css(ss, dummytop);
|
|
|
|
+ ss->active = 0;
|
|
|
|
+
|
|
|
|
+ if (ss->use_id) {
|
|
|
|
+ idr_remove_all(&ss->idr);
|
|
|
|
+ idr_destroy(&ss->idr);
|
|
|
|
+ }
|
|
|
|
+
|
|
/* deassign the subsys_id */
|
|
/* deassign the subsys_id */
|
|
subsys[ss->subsys_id] = NULL;
|
|
subsys[ss->subsys_id] = NULL;
|
|
|
|
|
|
@@ -4567,7 +4588,6 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
|
|
struct css_set *cg = link->cg;
|
|
struct css_set *cg = link->cg;
|
|
|
|
|
|
hlist_del(&cg->hlist);
|
|
hlist_del(&cg->hlist);
|
|
- BUG_ON(!cg->subsys[ss->subsys_id]);
|
|
|
|
cg->subsys[ss->subsys_id] = NULL;
|
|
cg->subsys[ss->subsys_id] = NULL;
|
|
hhead = css_set_hash(cg->subsys);
|
|
hhead = css_set_hash(cg->subsys);
|
|
hlist_add_head(&cg->hlist, hhead);
|
|
hlist_add_head(&cg->hlist, hhead);
|
|
@@ -4575,12 +4595,12 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
|
|
write_unlock(&css_set_lock);
|
|
write_unlock(&css_set_lock);
|
|
|
|
|
|
/*
|
|
/*
|
|
- * remove subsystem's css from the dummytop and free it - need to free
|
|
|
|
- * before marking as null because ss->destroy needs the cgrp->subsys
|
|
|
|
- * pointer to find their state. note that this also takes care of
|
|
|
|
- * freeing the css_id.
|
|
|
|
|
|
+ * remove subsystem's css from the dummytop and free it - need to
|
|
|
|
+ * free before marking as null because ss->css_free needs the
|
|
|
|
+ * cgrp->subsys pointer to find their state. note that this also
|
|
|
|
+ * takes care of freeing the css_id.
|
|
*/
|
|
*/
|
|
- ss->destroy(dummytop);
|
|
|
|
|
|
+ ss->css_free(dummytop);
|
|
dummytop->subsys[ss->subsys_id] = NULL;
|
|
dummytop->subsys[ss->subsys_id] = NULL;
|
|
|
|
|
|
mutex_unlock(&cgroup_mutex);
|
|
mutex_unlock(&cgroup_mutex);
|
|
@@ -4624,8 +4644,8 @@ int __init cgroup_init_early(void)
|
|
|
|
|
|
BUG_ON(!ss->name);
|
|
BUG_ON(!ss->name);
|
|
BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
|
|
BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
|
|
- BUG_ON(!ss->create);
|
|
|
|
- BUG_ON(!ss->destroy);
|
|
|
|
|
|
+ BUG_ON(!ss->css_alloc);
|
|
|
|
+ BUG_ON(!ss->css_free);
|
|
if (ss->subsys_id != i) {
|
|
if (ss->subsys_id != i) {
|
|
printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
|
|
printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
|
|
ss->name, ss->subsys_id);
|
|
ss->name, ss->subsys_id);
|
|
@@ -4831,45 +4851,20 @@ void cgroup_fork(struct task_struct *child)
|
|
INIT_LIST_HEAD(&child->cg_list);
|
|
INIT_LIST_HEAD(&child->cg_list);
|
|
}
|
|
}
|
|
|
|
|
|
-/**
|
|
|
|
- * cgroup_fork_callbacks - run fork callbacks
|
|
|
|
- * @child: the new task
|
|
|
|
- *
|
|
|
|
- * Called on a new task very soon before adding it to the
|
|
|
|
- * tasklist. No need to take any locks since no-one can
|
|
|
|
- * be operating on this task.
|
|
|
|
- */
|
|
|
|
-void cgroup_fork_callbacks(struct task_struct *child)
|
|
|
|
-{
|
|
|
|
- if (need_forkexit_callback) {
|
|
|
|
- int i;
|
|
|
|
- for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
|
|
|
- struct cgroup_subsys *ss = subsys[i];
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * forkexit callbacks are only supported for
|
|
|
|
- * builtin subsystems.
|
|
|
|
- */
|
|
|
|
- if (!ss || ss->module)
|
|
|
|
- continue;
|
|
|
|
-
|
|
|
|
- if (ss->fork)
|
|
|
|
- ss->fork(child);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
/**
|
|
/**
|
|
* cgroup_post_fork - called on a new task after adding it to the task list
|
|
* cgroup_post_fork - called on a new task after adding it to the task list
|
|
* @child: the task in question
|
|
* @child: the task in question
|
|
*
|
|
*
|
|
- * Adds the task to the list running through its css_set if necessary.
|
|
|
|
- * Has to be after the task is visible on the task list in case we race
|
|
|
|
- * with the first call to cgroup_iter_start() - to guarantee that the
|
|
|
|
- * new task ends up on its list.
|
|
|
|
|
|
+ * Adds the task to the list running through its css_set if necessary and
|
|
|
|
+ * call the subsystem fork() callbacks. Has to be after the task is
|
|
|
|
+ * visible on the task list in case we race with the first call to
|
|
|
|
+ * cgroup_iter_start() - to guarantee that the new task ends up on its
|
|
|
|
+ * list.
|
|
*/
|
|
*/
|
|
void cgroup_post_fork(struct task_struct *child)
|
|
void cgroup_post_fork(struct task_struct *child)
|
|
{
|
|
{
|
|
|
|
+ int i;
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* use_task_css_set_links is set to 1 before we walk the tasklist
|
|
* use_task_css_set_links is set to 1 before we walk the tasklist
|
|
* under the tasklist_lock and we read it here after we added the child
|
|
* under the tasklist_lock and we read it here after we added the child
|
|
@@ -4889,7 +4884,30 @@ void cgroup_post_fork(struct task_struct *child)
|
|
task_unlock(child);
|
|
task_unlock(child);
|
|
write_unlock(&css_set_lock);
|
|
write_unlock(&css_set_lock);
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Call ss->fork(). This must happen after @child is linked on
|
|
|
|
+ * css_set; otherwise, @child might change state between ->fork()
|
|
|
|
+ * and addition to css_set.
|
|
|
|
+ */
|
|
|
|
+ if (need_forkexit_callback) {
|
|
|
|
+ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
|
|
|
+ struct cgroup_subsys *ss = subsys[i];
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * fork/exit callbacks are supported only for
|
|
|
|
+ * builtin subsystems and we don't need further
|
|
|
|
+ * synchronization as they never go away.
|
|
|
|
+ */
|
|
|
|
+ if (!ss || ss->module)
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ if (ss->fork)
|
|
|
|
+ ss->fork(child);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
+
|
|
/**
|
|
/**
|
|
* cgroup_exit - detach cgroup from exiting task
|
|
* cgroup_exit - detach cgroup from exiting task
|
|
* @tsk: pointer to task_struct of exiting process
|
|
* @tsk: pointer to task_struct of exiting process
|
|
@@ -5022,15 +5040,17 @@ static void check_for_release(struct cgroup *cgrp)
|
|
/* Caller must verify that the css is not for root cgroup */
|
|
/* Caller must verify that the css is not for root cgroup */
|
|
bool __css_tryget(struct cgroup_subsys_state *css)
|
|
bool __css_tryget(struct cgroup_subsys_state *css)
|
|
{
|
|
{
|
|
- do {
|
|
|
|
- int v = css_refcnt(css);
|
|
|
|
|
|
+ while (true) {
|
|
|
|
+ int t, v;
|
|
|
|
|
|
- if (atomic_cmpxchg(&css->refcnt, v, v + 1) == v)
|
|
|
|
|
|
+ v = css_refcnt(css);
|
|
|
|
+ t = atomic_cmpxchg(&css->refcnt, v, v + 1);
|
|
|
|
+ if (likely(t == v))
|
|
return true;
|
|
return true;
|
|
|
|
+ else if (t < 0)
|
|
|
|
+ return false;
|
|
cpu_relax();
|
|
cpu_relax();
|
|
- } while (!test_bit(CSS_REMOVED, &css->flags));
|
|
|
|
-
|
|
|
|
- return false;
|
|
|
|
|
|
+ }
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(__css_tryget);
|
|
EXPORT_SYMBOL_GPL(__css_tryget);
|
|
|
|
|
|
@@ -5049,11 +5069,9 @@ void __css_put(struct cgroup_subsys_state *css)
|
|
set_bit(CGRP_RELEASABLE, &cgrp->flags);
|
|
set_bit(CGRP_RELEASABLE, &cgrp->flags);
|
|
check_for_release(cgrp);
|
|
check_for_release(cgrp);
|
|
}
|
|
}
|
|
- cgroup_wakeup_rmdir_waiter(cgrp);
|
|
|
|
break;
|
|
break;
|
|
case 0:
|
|
case 0:
|
|
- if (!test_bit(CSS_CLEAR_CSS_REFS, &css->flags))
|
|
|
|
- schedule_work(&css->dput_work);
|
|
|
|
|
|
+ schedule_work(&css->dput_work);
|
|
break;
|
|
break;
|
|
}
|
|
}
|
|
rcu_read_unlock();
|
|
rcu_read_unlock();
|
|
@@ -5439,7 +5457,7 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
|
|
}
|
|
}
|
|
|
|
|
|
#ifdef CONFIG_CGROUP_DEBUG
|
|
#ifdef CONFIG_CGROUP_DEBUG
|
|
-static struct cgroup_subsys_state *debug_create(struct cgroup *cont)
|
|
|
|
|
|
+static struct cgroup_subsys_state *debug_css_alloc(struct cgroup *cont)
|
|
{
|
|
{
|
|
struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
|
|
struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
|
|
|
|
|
|
@@ -5449,7 +5467,7 @@ static struct cgroup_subsys_state *debug_create(struct cgroup *cont)
|
|
return css;
|
|
return css;
|
|
}
|
|
}
|
|
|
|
|
|
-static void debug_destroy(struct cgroup *cont)
|
|
|
|
|
|
+static void debug_css_free(struct cgroup *cont)
|
|
{
|
|
{
|
|
kfree(cont->subsys[debug_subsys_id]);
|
|
kfree(cont->subsys[debug_subsys_id]);
|
|
}
|
|
}
|
|
@@ -5578,8 +5596,8 @@ static struct cftype debug_files[] = {
|
|
|
|
|
|
struct cgroup_subsys debug_subsys = {
|
|
struct cgroup_subsys debug_subsys = {
|
|
.name = "debug",
|
|
.name = "debug",
|
|
- .create = debug_create,
|
|
|
|
- .destroy = debug_destroy,
|
|
|
|
|
|
+ .css_alloc = debug_css_alloc,
|
|
|
|
+ .css_free = debug_css_free,
|
|
.subsys_id = debug_subsys_id,
|
|
.subsys_id = debug_subsys_id,
|
|
.base_cftypes = debug_files,
|
|
.base_cftypes = debug_files,
|
|
};
|
|
};
|