|
@@ -56,11 +56,8 @@
|
|
#include <linux/pid_namespace.h>
|
|
#include <linux/pid_namespace.h>
|
|
#include <linux/idr.h>
|
|
#include <linux/idr.h>
|
|
#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
|
|
#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
|
|
-#include <linux/eventfd.h>
|
|
|
|
-#include <linux/poll.h>
|
|
|
|
#include <linux/flex_array.h> /* used in cgroup_attach_task */
|
|
#include <linux/flex_array.h> /* used in cgroup_attach_task */
|
|
#include <linux/kthread.h>
|
|
#include <linux/kthread.h>
|
|
-#include <linux/file.h>
|
|
|
|
|
|
|
|
#include <linux/atomic.h>
|
|
#include <linux/atomic.h>
|
|
|
|
|
|
@@ -156,36 +153,6 @@ struct css_id {
|
|
unsigned short stack[0]; /* Array of Length (depth+1) */
|
|
unsigned short stack[0]; /* Array of Length (depth+1) */
|
|
};
|
|
};
|
|
|
|
|
|
-/*
|
|
|
|
- * cgroup_event represents events which userspace want to receive.
|
|
|
|
- */
|
|
|
|
-struct cgroup_event {
|
|
|
|
- /*
|
|
|
|
- * css which the event belongs to.
|
|
|
|
- */
|
|
|
|
- struct cgroup_subsys_state *css;
|
|
|
|
- /*
|
|
|
|
- * Control file which the event associated.
|
|
|
|
- */
|
|
|
|
- struct cftype *cft;
|
|
|
|
- /*
|
|
|
|
- * eventfd to signal userspace about the event.
|
|
|
|
- */
|
|
|
|
- struct eventfd_ctx *eventfd;
|
|
|
|
- /*
|
|
|
|
- * Each of these stored in a list by the cgroup.
|
|
|
|
- */
|
|
|
|
- struct list_head list;
|
|
|
|
- /*
|
|
|
|
- * All fields below needed to unregister event when
|
|
|
|
- * userspace closes eventfd.
|
|
|
|
- */
|
|
|
|
- poll_table pt;
|
|
|
|
- wait_queue_head_t *wqh;
|
|
|
|
- wait_queue_t wait;
|
|
|
|
- struct work_struct remove;
|
|
|
|
-};
|
|
|
|
-
|
|
|
|
/* The list of hierarchy roots */
|
|
/* The list of hierarchy roots */
|
|
|
|
|
|
static LIST_HEAD(cgroup_roots);
|
|
static LIST_HEAD(cgroup_roots);
|
|
@@ -235,8 +202,8 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
|
|
* keep accessing it outside the said locks. This function may return
|
|
* keep accessing it outside the said locks. This function may return
|
|
* %NULL if @cgrp doesn't have @subsys_id enabled.
|
|
* %NULL if @cgrp doesn't have @subsys_id enabled.
|
|
*/
|
|
*/
|
|
-static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
|
|
|
|
- struct cgroup_subsys *ss)
|
|
|
|
|
|
+struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
|
|
|
|
+ struct cgroup_subsys *ss)
|
|
{
|
|
{
|
|
if (ss)
|
|
if (ss)
|
|
return rcu_dereference_check(cgrp->subsys[ss->subsys_id],
|
|
return rcu_dereference_check(cgrp->subsys[ss->subsys_id],
|
|
@@ -2663,7 +2630,7 @@ static const struct inode_operations cgroup_dir_inode_operations = {
|
|
/*
|
|
/*
|
|
* Check if a file is a control file
|
|
* Check if a file is a control file
|
|
*/
|
|
*/
|
|
-static inline struct cftype *__file_cft(struct file *file)
|
|
|
|
|
|
+struct cftype *__file_cft(struct file *file)
|
|
{
|
|
{
|
|
if (file_inode(file)->i_fop != &cgroup_file_operations)
|
|
if (file_inode(file)->i_fop != &cgroup_file_operations)
|
|
return ERR_PTR(-EINVAL);
|
|
return ERR_PTR(-EINVAL);
|
|
@@ -3949,202 +3916,6 @@ static void cgroup_dput(struct cgroup *cgrp)
|
|
deactivate_super(sb);
|
|
deactivate_super(sb);
|
|
}
|
|
}
|
|
|
|
|
|
-/*
|
|
|
|
- * Unregister event and free resources.
|
|
|
|
- *
|
|
|
|
- * Gets called from workqueue.
|
|
|
|
- */
|
|
|
|
-static void cgroup_event_remove(struct work_struct *work)
|
|
|
|
-{
|
|
|
|
- struct cgroup_event *event = container_of(work, struct cgroup_event,
|
|
|
|
- remove);
|
|
|
|
- struct cgroup_subsys_state *css = event->css;
|
|
|
|
-
|
|
|
|
- remove_wait_queue(event->wqh, &event->wait);
|
|
|
|
-
|
|
|
|
- event->cft->unregister_event(css, event->cft, event->eventfd);
|
|
|
|
-
|
|
|
|
- /* Notify userspace the event is going away. */
|
|
|
|
- eventfd_signal(event->eventfd, 1);
|
|
|
|
-
|
|
|
|
- eventfd_ctx_put(event->eventfd);
|
|
|
|
- kfree(event);
|
|
|
|
- css_put(css);
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-/*
|
|
|
|
- * Gets called on POLLHUP on eventfd when user closes it.
|
|
|
|
- *
|
|
|
|
- * Called with wqh->lock held and interrupts disabled.
|
|
|
|
- */
|
|
|
|
-static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
|
|
|
|
- int sync, void *key)
|
|
|
|
-{
|
|
|
|
- struct cgroup_event *event = container_of(wait,
|
|
|
|
- struct cgroup_event, wait);
|
|
|
|
- struct cgroup *cgrp = event->css->cgroup;
|
|
|
|
- unsigned long flags = (unsigned long)key;
|
|
|
|
-
|
|
|
|
- if (flags & POLLHUP) {
|
|
|
|
- /*
|
|
|
|
- * If the event has been detached at cgroup removal, we
|
|
|
|
- * can simply return knowing the other side will cleanup
|
|
|
|
- * for us.
|
|
|
|
- *
|
|
|
|
- * We can't race against event freeing since the other
|
|
|
|
- * side will require wqh->lock via remove_wait_queue(),
|
|
|
|
- * which we hold.
|
|
|
|
- */
|
|
|
|
- spin_lock(&cgrp->event_list_lock);
|
|
|
|
- if (!list_empty(&event->list)) {
|
|
|
|
- list_del_init(&event->list);
|
|
|
|
- /*
|
|
|
|
- * We are in atomic context, but cgroup_event_remove()
|
|
|
|
- * may sleep, so we have to call it in workqueue.
|
|
|
|
- */
|
|
|
|
- schedule_work(&event->remove);
|
|
|
|
- }
|
|
|
|
- spin_unlock(&cgrp->event_list_lock);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- return 0;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-static void cgroup_event_ptable_queue_proc(struct file *file,
|
|
|
|
- wait_queue_head_t *wqh, poll_table *pt)
|
|
|
|
-{
|
|
|
|
- struct cgroup_event *event = container_of(pt,
|
|
|
|
- struct cgroup_event, pt);
|
|
|
|
-
|
|
|
|
- event->wqh = wqh;
|
|
|
|
- add_wait_queue(wqh, &event->wait);
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-/*
|
|
|
|
- * Parse input and register new cgroup event handler.
|
|
|
|
- *
|
|
|
|
- * Input must be in format '<event_fd> <control_fd> <args>'.
|
|
|
|
- * Interpretation of args is defined by control file implementation.
|
|
|
|
- */
|
|
|
|
-static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css,
|
|
|
|
- struct cftype *cft, const char *buffer)
|
|
|
|
-{
|
|
|
|
- struct cgroup *cgrp = dummy_css->cgroup;
|
|
|
|
- struct cgroup_event *event;
|
|
|
|
- struct cgroup_subsys_state *cfile_css;
|
|
|
|
- unsigned int efd, cfd;
|
|
|
|
- struct fd efile;
|
|
|
|
- struct fd cfile;
|
|
|
|
- char *endp;
|
|
|
|
- int ret;
|
|
|
|
-
|
|
|
|
- efd = simple_strtoul(buffer, &endp, 10);
|
|
|
|
- if (*endp != ' ')
|
|
|
|
- return -EINVAL;
|
|
|
|
- buffer = endp + 1;
|
|
|
|
-
|
|
|
|
- cfd = simple_strtoul(buffer, &endp, 10);
|
|
|
|
- if ((*endp != ' ') && (*endp != '\0'))
|
|
|
|
- return -EINVAL;
|
|
|
|
- buffer = endp + 1;
|
|
|
|
-
|
|
|
|
- event = kzalloc(sizeof(*event), GFP_KERNEL);
|
|
|
|
- if (!event)
|
|
|
|
- return -ENOMEM;
|
|
|
|
-
|
|
|
|
- INIT_LIST_HEAD(&event->list);
|
|
|
|
- init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
|
|
|
|
- init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
|
|
|
|
- INIT_WORK(&event->remove, cgroup_event_remove);
|
|
|
|
-
|
|
|
|
- efile = fdget(efd);
|
|
|
|
- if (!efile.file) {
|
|
|
|
- ret = -EBADF;
|
|
|
|
- goto out_kfree;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- event->eventfd = eventfd_ctx_fileget(efile.file);
|
|
|
|
- if (IS_ERR(event->eventfd)) {
|
|
|
|
- ret = PTR_ERR(event->eventfd);
|
|
|
|
- goto out_put_efile;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- cfile = fdget(cfd);
|
|
|
|
- if (!cfile.file) {
|
|
|
|
- ret = -EBADF;
|
|
|
|
- goto out_put_eventfd;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /* the process need read permission on control file */
|
|
|
|
- /* AV: shouldn't we check that it's been opened for read instead? */
|
|
|
|
- ret = inode_permission(file_inode(cfile.file), MAY_READ);
|
|
|
|
- if (ret < 0)
|
|
|
|
- goto out_put_cfile;
|
|
|
|
-
|
|
|
|
- event->cft = __file_cft(cfile.file);
|
|
|
|
- if (IS_ERR(event->cft)) {
|
|
|
|
- ret = PTR_ERR(event->cft);
|
|
|
|
- goto out_put_cfile;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if (!event->cft->ss) {
|
|
|
|
- ret = -EBADF;
|
|
|
|
- goto out_put_cfile;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * Determine the css of @cfile, verify it belongs to the same
|
|
|
|
- * cgroup as cgroup.event_control, and associate @event with it.
|
|
|
|
- * Remaining events are automatically removed on cgroup destruction
|
|
|
|
- * but the removal is asynchronous, so take an extra ref.
|
|
|
|
- */
|
|
|
|
- rcu_read_lock();
|
|
|
|
-
|
|
|
|
- ret = -EINVAL;
|
|
|
|
- event->css = cgroup_css(cgrp, event->cft->ss);
|
|
|
|
- cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss);
|
|
|
|
- if (event->css && event->css == cfile_css && css_tryget(event->css))
|
|
|
|
- ret = 0;
|
|
|
|
-
|
|
|
|
- rcu_read_unlock();
|
|
|
|
- if (ret)
|
|
|
|
- goto out_put_cfile;
|
|
|
|
-
|
|
|
|
- if (!event->cft->register_event || !event->cft->unregister_event) {
|
|
|
|
- ret = -EINVAL;
|
|
|
|
- goto out_put_css;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- ret = event->cft->register_event(event->css, event->cft,
|
|
|
|
- event->eventfd, buffer);
|
|
|
|
- if (ret)
|
|
|
|
- goto out_put_css;
|
|
|
|
-
|
|
|
|
- efile.file->f_op->poll(efile.file, &event->pt);
|
|
|
|
-
|
|
|
|
- spin_lock(&cgrp->event_list_lock);
|
|
|
|
- list_add(&event->list, &cgrp->event_list);
|
|
|
|
- spin_unlock(&cgrp->event_list_lock);
|
|
|
|
-
|
|
|
|
- fdput(cfile);
|
|
|
|
- fdput(efile);
|
|
|
|
-
|
|
|
|
- return 0;
|
|
|
|
-
|
|
|
|
-out_put_css:
|
|
|
|
- css_put(event->css);
|
|
|
|
-out_put_cfile:
|
|
|
|
- fdput(cfile);
|
|
|
|
-out_put_eventfd:
|
|
|
|
- eventfd_ctx_put(event->eventfd);
|
|
|
|
-out_put_efile:
|
|
|
|
- fdput(efile);
|
|
|
|
-out_kfree:
|
|
|
|
- kfree(event);
|
|
|
|
-
|
|
|
|
- return ret;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
|
|
static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
|
|
struct cftype *cft)
|
|
struct cftype *cft)
|
|
{
|
|
{
|
|
@@ -4169,11 +3940,6 @@ static struct cftype cgroup_base_files[] = {
|
|
.release = cgroup_pidlist_release,
|
|
.release = cgroup_pidlist_release,
|
|
.mode = S_IRUGO | S_IWUSR,
|
|
.mode = S_IRUGO | S_IWUSR,
|
|
},
|
|
},
|
|
- {
|
|
|
|
- .name = "cgroup.event_control",
|
|
|
|
- .write_string = cgroup_write_event_control,
|
|
|
|
- .mode = S_IWUGO,
|
|
|
|
- },
|
|
|
|
{
|
|
{
|
|
.name = "cgroup.clone_children",
|
|
.name = "cgroup.clone_children",
|
|
.flags = CFTYPE_INSANE,
|
|
.flags = CFTYPE_INSANE,
|
|
@@ -4666,7 +4432,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
|
|
__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
|
|
__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
|
|
{
|
|
{
|
|
struct dentry *d = cgrp->dentry;
|
|
struct dentry *d = cgrp->dentry;
|
|
- struct cgroup_event *event, *tmp;
|
|
|
|
struct cgroup_subsys *ss;
|
|
struct cgroup_subsys *ss;
|
|
struct cgroup *child;
|
|
struct cgroup *child;
|
|
bool empty;
|
|
bool empty;
|
|
@@ -4741,18 +4506,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
|
|
dget(d);
|
|
dget(d);
|
|
cgroup_d_remove_dir(d);
|
|
cgroup_d_remove_dir(d);
|
|
|
|
|
|
- /*
|
|
|
|
- * Unregister events and notify userspace.
|
|
|
|
- * Notify userspace about cgroup removing only after rmdir of cgroup
|
|
|
|
- * directory to avoid race between userspace and kernelspace.
|
|
|
|
- */
|
|
|
|
- spin_lock(&cgrp->event_list_lock);
|
|
|
|
- list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
|
|
|
|
- list_del_init(&event->list);
|
|
|
|
- schedule_work(&event->remove);
|
|
|
|
- }
|
|
|
|
- spin_unlock(&cgrp->event_list_lock);
|
|
|
|
-
|
|
|
|
return 0;
|
|
return 0;
|
|
};
|
|
};
|
|
|
|
|