11 years ago · f075e0f699
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -24,7 +24,6 @@ CONTENTS:
 
															   2.1 Basic Usage
														
 
															   2.2 Attaching processes
														
 
															   2.3 Mounting hierarchies by name
														
 
															-  2.4 Notification API
														
 
															 3. Kernel API
														
 
															   3.1 Overview
														
 
															   3.2 Synchronization
														
@@ -472,25 +471,6 @@ you give a subsystem a name.
 
															 The name of the subsystem appears as part of the hierarchy description
														
 
															 in /proc/mounts and /proc/<pid>/cgroups.
														
 
															-2.4 Notification API
														
 
															---------------------
														
 
															-
														
 
															-There is mechanism which allows to get notifications about changing
														
 
															-status of a cgroup.
														
 
															-
														
 
															-To register a new notification handler you need to:
														
 
															- - create a file descriptor for event notification using eventfd(2);
														
 
															- - open a control file to be monitored (e.g. memory.usage_in_bytes);
														
 
															- - write "<event_fd> <control_fd> <args>" to cgroup.event_control.
														
 
															-   Interpretation of args is defined by control file implementation;
														
 
															-
														
 
															-eventfd will be woken up by control file implementation or when the
														
 
															-cgroup is removed.
														
 
															-
														
 
															-To unregister a notification handler just close eventfd.
														
 
															-
														
 
															-NOTE: Support of notifications should be implemented for the control
														
 
															-file. See documentation for the subsystem.
														
 
															 3. Kernel API
														
 
															 =============
														
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -577,7 +577,7 @@ Each memcg's numa_stat file includes "total", "file", "anon" and "unevictable"
 
															 per-node page counts including "hierarchical_<counter>" which sums up all
														
 
															 hierarchical children's values in addition to the memcg's own value.
														
 
															-The ouput format of memory.numa_stat is:
														
 
															+The output format of memory.numa_stat is:
														
 
															 total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
														
 
															 file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
														
@@ -670,7 +670,7 @@ page tables.
 
															 8.1 Interface
														
 
															-This feature is disabled by default. It can be enabledi (and disabled again) by
														
 
															+This feature is disabled by default. It can be enabled (and disabled again) by
														
 
															 writing to memory.move_charge_at_immigrate of the destination cgroup.
														
 
															 If you want to enable it:
														
--- a/Documentation/cgroups/resource_counter.txt
+++ b/Documentation/cgroups/resource_counter.txt
@@ -97,8 +97,8 @@ to work with it.
 
															 		(struct res_counter *rc, struct res_counter *top,
														
 
															 		 unsinged long val)
														
 
															-	Almost same as res_cunter_uncharge() but propagation of uncharge
														
 
															-	stops when rc == top. This is useful when kill a res_coutner in
														
 
															+	Almost same as res_counter_uncharge() but propagation of uncharge
														
 
															+	stops when rc == top. This is useful when kill a res_counter in
														
 
															 	child cgroup.
														
 
															  2.1 Other accounting routines
														
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1303,13 +1303,10 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf,
 
															 	return __blkg_prfill_rwstat(sf, pd, &rwstat);
														
 
															 }
														
 
															-static int tg_print_cpu_rwstat(struct cgroup_subsys_state *css,
														
 
															-			       struct cftype *cft, struct seq_file *sf)
														
 
															+static int tg_print_cpu_rwstat(struct seq_file *sf, void *v)
														
 
															 {
														
 
															-	struct blkcg *blkcg = css_to_blkcg(css);
														
 
															-
														
 
															-	blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkcg_policy_throtl,
														
 
															-			  cft->private, true);
														
 
															+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_cpu_rwstat,
														
 
															+			  &blkcg_policy_throtl, seq_cft(sf)->private, true);
														
 
															 	return 0;
														
 
															 }
														
@@ -1335,19 +1332,17 @@ static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd,
 
															 	return __blkg_prfill_u64(sf, pd, v);
														
 
															 }
														
 
															-static int tg_print_conf_u64(struct cgroup_subsys_state *css,
														
 
															-			     struct cftype *cft, struct seq_file *sf)
														
 
															+static int tg_print_conf_u64(struct seq_file *sf, void *v)
														
 
															 {
														
 
															-	blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_u64,
														
 
															-			  &blkcg_policy_throtl, cft->private, false);
														
 
															+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_u64,
														
 
															+			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
														
 
															 	return 0;
														
 
															 }
														
 
															-static int tg_print_conf_uint(struct cgroup_subsys_state *css,
														
 
															-			      struct cftype *cft, struct seq_file *sf)
														
 
															+static int tg_print_conf_uint(struct seq_file *sf, void *v)
														
 
															 {
														
 
															-	blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_uint,
														
 
															-			  &blkcg_policy_throtl, cft->private, false);
														
 
															+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_uint,
														
 
															+			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
														
 
															 	return 0;
														
 
															 }
														
@@ -1428,40 +1423,40 @@ static struct cftype throtl_files[] = {
 
															 	{
														
 
															 		.name = "throttle.read_bps_device",
														
 
															 		.private = offsetof(struct throtl_grp, bps[READ]),
														
 
															-		.read_seq_string = tg_print_conf_u64,
														
 
															+		.seq_show = tg_print_conf_u64,
														
 
															 		.write_string = tg_set_conf_u64,
														
 
															 		.max_write_len = 256,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "throttle.write_bps_device",
														
 
															 		.private = offsetof(struct throtl_grp, bps[WRITE]),
														
 
															-		.read_seq_string = tg_print_conf_u64,
														
 
															+		.seq_show = tg_print_conf_u64,
														
 
															 		.write_string = tg_set_conf_u64,
														
 
															 		.max_write_len = 256,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "throttle.read_iops_device",
														
 
															 		.private = offsetof(struct throtl_grp, iops[READ]),
														
 
															-		.read_seq_string = tg_print_conf_uint,
														
 
															+		.seq_show = tg_print_conf_uint,
														
 
															 		.write_string = tg_set_conf_uint,
														
 
															 		.max_write_len = 256,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "throttle.write_iops_device",
														
 
															 		.private = offsetof(struct throtl_grp, iops[WRITE]),
														
 
															-		.read_seq_string = tg_print_conf_uint,
														
 
															+		.seq_show = tg_print_conf_uint,
														
 
															 		.write_string = tg_set_conf_uint,
														
 
															 		.max_write_len = 256,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "throttle.io_service_bytes",
														
 
															 		.private = offsetof(struct tg_stats_cpu, service_bytes),
														
 
															-		.read_seq_string = tg_print_cpu_rwstat,
														
 
															+		.seq_show = tg_print_cpu_rwstat,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "throttle.io_serviced",
														
 
															 		.private = offsetof(struct tg_stats_cpu, serviced),
														
 
															-		.read_seq_string = tg_print_cpu_rwstat,
														
 
															+		.seq_show = tg_print_cpu_rwstat,
														
 
															 	},
														
 
															 	{ }	/* terminate */
														
 
															 };
														
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1632,11 +1632,11 @@ static u64 cfqg_prfill_weight_device(struct seq_file *sf,
 
															 	return __blkg_prfill_u64(sf, pd, cfqg->dev_weight);
														
 
															 }
														
 
															-static int cfqg_print_weight_device(struct cgroup_subsys_state *css,
														
 
															-				    struct cftype *cft, struct seq_file *sf)
														
 
															+static int cfqg_print_weight_device(struct seq_file *sf, void *v)
														
 
															 {
														
 
															-	blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_weight_device,
														
 
															-			  &blkcg_policy_cfq, 0, false);
														
 
															+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
														
 
															+			  cfqg_prfill_weight_device, &blkcg_policy_cfq,
														
 
															+			  0, false);
														
 
															 	return 0;
														
 
															 }
														
@@ -1650,26 +1650,23 @@ static u64 cfqg_prfill_leaf_weight_device(struct seq_file *sf,
 
															 	return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight);
														
 
															 }
														
 
															-static int cfqg_print_leaf_weight_device(struct cgroup_subsys_state *css,
														
 
															-					 struct cftype *cft,
														
 
															-					 struct seq_file *sf)
														
 
															+static int cfqg_print_leaf_weight_device(struct seq_file *sf, void *v)
														
 
															 {
														
 
															-	blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_leaf_weight_device,
														
 
															-			  &blkcg_policy_cfq, 0, false);
														
 
															+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
														
 
															+			  cfqg_prfill_leaf_weight_device, &blkcg_policy_cfq,
														
 
															+			  0, false);
														
 
															 	return 0;
														
 
															 }
														
 
															-static int cfq_print_weight(struct cgroup_subsys_state *css, struct cftype *cft,
														
 
															-			    struct seq_file *sf)
														
 
															+static int cfq_print_weight(struct seq_file *sf, void *v)
														
 
															 {
														
 
															-	seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_weight);
														
 
															+	seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_weight);
														
 
															 	return 0;
														
 
															 }
														
 
															-static int cfq_print_leaf_weight(struct cgroup_subsys_state *css,
														
 
															-				 struct cftype *cft, struct seq_file *sf)
														
 
															+static int cfq_print_leaf_weight(struct seq_file *sf, void *v)
														
 
															 {
														
 
															-	seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_leaf_weight);
														
 
															+	seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_leaf_weight);
														
 
															 	return 0;
														
 
															 }
														
@@ -1762,23 +1759,17 @@ static int cfq_set_leaf_weight(struct cgroup_subsys_state *css,
 
															 	return __cfq_set_weight(css, cft, val, true);
														
 
															 }
														
 
															-static int cfqg_print_stat(struct cgroup_subsys_state *css, struct cftype *cft,
														
 
															-			   struct seq_file *sf)
														
 
															+static int cfqg_print_stat(struct seq_file *sf, void *v)
														
 
															 {
														
 
															-	struct blkcg *blkcg = css_to_blkcg(css);
														
 
															-
														
 
															-	blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, &blkcg_policy_cfq,
														
 
															-			  cft->private, false);
														
 
															+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
														
 
															+			  &blkcg_policy_cfq, seq_cft(sf)->private, false);
														
 
															 	return 0;
														
 
															 }
														
 
															-static int cfqg_print_rwstat(struct cgroup_subsys_state *css,
														
 
															-			     struct cftype *cft, struct seq_file *sf)
														
 
															+static int cfqg_print_rwstat(struct seq_file *sf, void *v)
														
 
															 {
														
 
															-	struct blkcg *blkcg = css_to_blkcg(css);
														
 
															-
														
 
															-	blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, &blkcg_policy_cfq,
														
 
															-			  cft->private, true);
														
 
															+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
														
 
															+			  &blkcg_policy_cfq, seq_cft(sf)->private, true);
														
 
															 	return 0;
														
 
															 }
														
@@ -1798,23 +1789,19 @@ static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf,
 
															 	return __blkg_prfill_rwstat(sf, pd, &sum);
														
 
															 }
														
 
															-static int cfqg_print_stat_recursive(struct cgroup_subsys_state *css,
														
 
															-				     struct cftype *cft, struct seq_file *sf)
														
 
															+static int cfqg_print_stat_recursive(struct seq_file *sf, void *v)
														
 
															 {
														
 
															-	struct blkcg *blkcg = css_to_blkcg(css);
														
 
															-
														
 
															-	blkcg_print_blkgs(sf, blkcg, cfqg_prfill_stat_recursive,
														
 
															-			  &blkcg_policy_cfq, cft->private, false);
														
 
															+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
														
 
															+			  cfqg_prfill_stat_recursive, &blkcg_policy_cfq,
														
 
															+			  seq_cft(sf)->private, false);
														
 
															 	return 0;
														
 
															 }
														
 
															-static int cfqg_print_rwstat_recursive(struct cgroup_subsys_state *css,
														
 
															-				       struct cftype *cft, struct seq_file *sf)
														
 
															+static int cfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
														
 
															 {
														
 
															-	struct blkcg *blkcg = css_to_blkcg(css);
														
 
															-
														
 
															-	blkcg_print_blkgs(sf, blkcg, cfqg_prfill_rwstat_recursive,
														
 
															-			  &blkcg_policy_cfq, cft->private, true);
														
 
															+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
														
 
															+			  cfqg_prfill_rwstat_recursive, &blkcg_policy_cfq,
														
 
															+			  seq_cft(sf)->private, true);
														
 
															 	return 0;
														
 
															 }
														
@@ -1835,13 +1822,11 @@ static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf,
 
															 }
														
 
															 /* print avg_queue_size */
														
 
															-static int cfqg_print_avg_queue_size(struct cgroup_subsys_state *css,
														
 
															-				     struct cftype *cft, struct seq_file *sf)
														
 
															+static int cfqg_print_avg_queue_size(struct seq_file *sf, void *v)
														
 
															 {
														
 
															-	struct blkcg *blkcg = css_to_blkcg(css);
														
 
															-
														
 
															-	blkcg_print_blkgs(sf, blkcg, cfqg_prfill_avg_queue_size,
														
 
															-			  &blkcg_policy_cfq, 0, false);
														
 
															+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
														
 
															+			  cfqg_prfill_avg_queue_size, &blkcg_policy_cfq,
														
 
															+			  0, false);
														
 
															 	return 0;
														
 
															 }
														
 
															 #endif	/* CONFIG_DEBUG_BLK_CGROUP */
														
@@ -1851,14 +1836,14 @@ static struct cftype cfq_blkcg_files[] = {
 
															 	{
														
 
															 		.name = "weight_device",
														
 
															 		.flags = CFTYPE_ONLY_ON_ROOT,
														
 
															-		.read_seq_string = cfqg_print_leaf_weight_device,
														
 
															+		.seq_show = cfqg_print_leaf_weight_device,
														
 
															 		.write_string = cfqg_set_leaf_weight_device,
														
 
															 		.max_write_len = 256,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "weight",
														
 
															 		.flags = CFTYPE_ONLY_ON_ROOT,
														
 
															-		.read_seq_string = cfq_print_leaf_weight,
														
 
															+		.seq_show = cfq_print_leaf_weight,
														
 
															 		.write_u64 = cfq_set_leaf_weight,
														
 
															 	},
														
@@ -1866,26 +1851,26 @@ static struct cftype cfq_blkcg_files[] = {
 
															 	{
														
 
															 		.name = "weight_device",
														
 
															 		.flags = CFTYPE_NOT_ON_ROOT,
														
 
															-		.read_seq_string = cfqg_print_weight_device,
														
 
															+		.seq_show = cfqg_print_weight_device,
														
 
															 		.write_string = cfqg_set_weight_device,
														
 
															 		.max_write_len = 256,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "weight",
														
 
															 		.flags = CFTYPE_NOT_ON_ROOT,
														
 
															-		.read_seq_string = cfq_print_weight,
														
 
															+		.seq_show = cfq_print_weight,
														
 
															 		.write_u64 = cfq_set_weight,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "leaf_weight_device",
														
 
															-		.read_seq_string = cfqg_print_leaf_weight_device,
														
 
															+		.seq_show = cfqg_print_leaf_weight_device,
														
 
															 		.write_string = cfqg_set_leaf_weight_device,
														
 
															 		.max_write_len = 256,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "leaf_weight",
														
 
															-		.read_seq_string = cfq_print_leaf_weight,
														
 
															+		.seq_show = cfq_print_leaf_weight,
														
 
															 		.write_u64 = cfq_set_leaf_weight,
														
 
															 	},
														
@@ -1893,114 +1878,114 @@ static struct cftype cfq_blkcg_files[] = {
 
															 	{
														
 
															 		.name = "time",
														
 
															 		.private = offsetof(struct cfq_group, stats.time),
														
 
															-		.read_seq_string = cfqg_print_stat,
														
 
															+		.seq_show = cfqg_print_stat,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "sectors",
														
 
															 		.private = offsetof(struct cfq_group, stats.sectors),
														
 
															-		.read_seq_string = cfqg_print_stat,
														
 
															+		.seq_show = cfqg_print_stat,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "io_service_bytes",
														
 
															 		.private = offsetof(struct cfq_group, stats.service_bytes),
														
 
															-		.read_seq_string = cfqg_print_rwstat,
														
 
															+		.seq_show = cfqg_print_rwstat,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "io_serviced",
														
 
															 		.private = offsetof(struct cfq_group, stats.serviced),
														
 
															-		.read_seq_string = cfqg_print_rwstat,
														
 
															+		.seq_show = cfqg_print_rwstat,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "io_service_time",
														
 
															 		.private = offsetof(struct cfq_group, stats.service_time),
														
 
															-		.read_seq_string = cfqg_print_rwstat,
														
 
															+		.seq_show = cfqg_print_rwstat,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "io_wait_time",
														
 
															 		.private = offsetof(struct cfq_group, stats.wait_time),
														
 
															-		.read_seq_string = cfqg_print_rwstat,
														
 
															+		.seq_show = cfqg_print_rwstat,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "io_merged",
														
 
															 		.private = offsetof(struct cfq_group, stats.merged),
														
 
															-		.read_seq_string = cfqg_print_rwstat,
														
 
															+		.seq_show = cfqg_print_rwstat,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "io_queued",
														
 
															 		.private = offsetof(struct cfq_group, stats.queued),
														
 
															-		.read_seq_string = cfqg_print_rwstat,
														
 
															+		.seq_show = cfqg_print_rwstat,
														
 
															 	},
														
 
															 	/* the same statictics which cover the cfqg and its descendants */
														
 
															 	{
														
 
															 		.name = "time_recursive",
														
 
															 		.private = offsetof(struct cfq_group, stats.time),
														
 
															-		.read_seq_string = cfqg_print_stat_recursive,
														
 
															+		.seq_show = cfqg_print_stat_recursive,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "sectors_recursive",
														
 
															 		.private = offsetof(struct cfq_group, stats.sectors),
														
 
															-		.read_seq_string = cfqg_print_stat_recursive,
														
 
															+		.seq_show = cfqg_print_stat_recursive,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "io_service_bytes_recursive",
														
 
															 		.private = offsetof(struct cfq_group, stats.service_bytes),
														
 
															-		.read_seq_string = cfqg_print_rwstat_recursive,
														
 
															+		.seq_show = cfqg_print_rwstat_recursive,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "io_serviced_recursive",
														
 
															 		.private = offsetof(struct cfq_group, stats.serviced),
														
 
															-		.read_seq_string = cfqg_print_rwstat_recursive,
														
 
															+		.seq_show = cfqg_print_rwstat_recursive,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "io_service_time_recursive",
														
 
															 		.private = offsetof(struct cfq_group, stats.service_time),
														
 
															-		.read_seq_string = cfqg_print_rwstat_recursive,
														
 
															+		.seq_show = cfqg_print_rwstat_recursive,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "io_wait_time_recursive",
														
 
															 		.private = offsetof(struct cfq_group, stats.wait_time),
														
 
															-		.read_seq_string = cfqg_print_rwstat_recursive,
														
 
															+		.seq_show = cfqg_print_rwstat_recursive,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "io_merged_recursive",
														
 
															 		.private = offsetof(struct cfq_group, stats.merged),
														
 
															-		.read_seq_string = cfqg_print_rwstat_recursive,
														
 
															+		.seq_show = cfqg_print_rwstat_recursive,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "io_queued_recursive",
														
 
															 		.private = offsetof(struct cfq_group, stats.queued),
														
 
															-		.read_seq_string = cfqg_print_rwstat_recursive,
														
 
															+		.seq_show = cfqg_print_rwstat_recursive,
														
 
															 	},
														
 
															 #ifdef CONFIG_DEBUG_BLK_CGROUP
														
 
															 	{
														
 
															 		.name = "avg_queue_size",
														
 
															-		.read_seq_string = cfqg_print_avg_queue_size,
														
 
															+		.seq_show = cfqg_print_avg_queue_size,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "group_wait_time",
														
 
															 		.private = offsetof(struct cfq_group, stats.group_wait_time),
														
 
															-		.read_seq_string = cfqg_print_stat,
														
 
															+		.seq_show = cfqg_print_stat,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "idle_time",
														
 
															 		.private = offsetof(struct cfq_group, stats.idle_time),
														
 
															-		.read_seq_string = cfqg_print_stat,
														
 
															+		.seq_show = cfqg_print_stat,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "empty_time",
														
 
															 		.private = offsetof(struct cfq_group, stats.empty_time),
														
 
															-		.read_seq_string = cfqg_print_stat,
														
 
															+		.seq_show = cfqg_print_stat,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "dequeue",
														
 
															 		.private = offsetof(struct cfq_group, stats.dequeue),
														
 
															-		.read_seq_string = cfqg_print_stat,
														
 
															+		.seq_show = cfqg_print_stat,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "unaccounted_time",
														
 
															 		.private = offsetof(struct cfq_group, stats.unaccounted_time),
														
 
															-		.read_seq_string = cfqg_print_stat,
														
 
															+		.seq_show = cfqg_print_stat,
														
 
															 	},
														
 
															 #endif	/* CONFIG_DEBUG_BLK_CGROUP */
														
 
															 	{ }	/* terminate */
														
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -163,7 +163,6 @@ static struct cgroup_subsys_state *bcachecg_create(struct cgroup *cgroup)
 
															 static void bcachecg_destroy(struct cgroup *cgroup)
														
 
															 {
														
 
															 	struct bch_cgroup *cg = cgroup_to_bcache(cgroup);
														
 
															-	free_css_id(&bcache_subsys, &cg->css);
														
 
															 	kfree(cg);
														
 
															 }
														
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -21,6 +21,7 @@
 
															 #include <linux/xattr.h>
														
 
															 #include <linux/fs.h>
														
 
															 #include <linux/percpu-refcount.h>
														
 
															+#include <linux/seq_file.h>
														
 
															 #ifdef CONFIG_CGROUPS
														
@@ -28,8 +29,6 @@ struct cgroupfs_root;
 
															 struct cgroup_subsys;
														
 
															 struct inode;
														
 
															 struct cgroup;
														
 
															-struct css_id;
														
 
															-struct eventfd_ctx;
														
 
															 extern int cgroup_init_early(void);
														
 
															 extern int cgroup_init(void);
														
@@ -79,8 +78,6 @@ struct cgroup_subsys_state {
 
															 	struct cgroup_subsys_state *parent;
														
 
															 	unsigned long flags;
														
 
															-	/* ID for this css, if possible */
														
 
															-	struct css_id __rcu *id;
														
 
															 	/* percpu_ref killing and RCU release */
														
 
															 	struct rcu_head rcu_head;
														
@@ -239,10 +236,6 @@ struct cgroup {
 
															 	struct rcu_head rcu_head;
														
 
															 	struct work_struct destroy_work;
														
 
															-	/* List of events which userspace want to receive */
														
 
															-	struct list_head event_list;
														
 
															-	spinlock_t event_list_lock;
														
 
															-
														
 
															 	/* directory xattrs */
														
 
															 	struct simple_xattrs xattrs;
														
 
															 };
														
@@ -280,6 +273,9 @@ enum {
 
															 	 * - "tasks" is removed.  Everything should be at process
														
 
															 	 *   granularity.  Use "cgroup.procs" instead.
														
 
															 	 *
														
 
															+	 * - "cgroup.procs" is not sorted.  pids will be unique unless they
														
 
															+	 *   got recycled inbetween reads.
														
 
															+	 *
														
 
															 	 * - "release_agent" and "notify_on_release" are removed.
														
 
															 	 *   Replacement notification mechanism will be implemented.
														
 
															 	 *
														
@@ -320,9 +316,6 @@ struct cgroupfs_root {
 
															 	/* Unique id for this hierarchy. */
														
 
															 	int hierarchy_id;
														
 
															-	/* A list running through the attached subsystems */
														
 
															-	struct list_head subsys_list;
														
 
															-
														
 
															 	/* The root cgroup for this hierarchy */
														
 
															 	struct cgroup top_cgroup;
														
@@ -388,16 +381,6 @@ struct css_set {
 
															 	struct rcu_head rcu_head;
														
 
															 };
														
 
															-/*
														
 
															- * cgroup_map_cb is an abstract callback API for reporting map-valued
														
 
															- * control files
														
 
															- */
														
 
															-
														
 
															-struct cgroup_map_cb {
														
 
															-	int (*fill)(struct cgroup_map_cb *cb, const char *key, u64 value);
														
 
															-	void *state;
														
 
															-};
														
 
															-
														
 
															 /*
														
 
															  * struct cftype: handler definitions for cgroup control files
														
 
															  *
														
@@ -445,10 +428,6 @@ struct cftype {
 
															 	 */
														
 
															 	struct cgroup_subsys *ss;
														
 
															-	int (*open)(struct inode *inode, struct file *file);
														
 
															-	ssize_t (*read)(struct cgroup_subsys_state *css, struct cftype *cft,
														
 
															-			struct file *file,
														
 
															-			char __user *buf, size_t nbytes, loff_t *ppos);
														
 
															 	/*
														
 
															 	 * read_u64() is a shortcut for the common case of returning a
														
 
															 	 * single integer. Use it in place of read()
														
@@ -458,24 +437,14 @@ struct cftype {
 
															 	 * read_s64() is a signed version of read_u64()
														
 
															 	 */
														
 
															 	s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft);
														
 
															-	/*
														
 
															-	 * read_map() is used for defining a map of key/value
														
 
															-	 * pairs. It should call cb->fill(cb, key, value) for each
														
 
															-	 * entry. The key/value pairs (and their ordering) should not
														
 
															-	 * change between reboots.
														
 
															-	 */
														
 
															-	int (*read_map)(struct cgroup_subsys_state *css, struct cftype *cft,
														
 
															-			struct cgroup_map_cb *cb);
														
 
															-	/*
														
 
															-	 * read_seq_string() is used for outputting a simple sequence
														
 
															-	 * using seqfile.
														
 
															-	 */
														
 
															-	int (*read_seq_string)(struct cgroup_subsys_state *css,
														
 
															-			       struct cftype *cft, struct seq_file *m);
														
 
															-	ssize_t (*write)(struct cgroup_subsys_state *css, struct cftype *cft,
														
 
															-			 struct file *file,
														
 
															-			 const char __user *buf, size_t nbytes, loff_t *ppos);
														
 
															+	/* generic seq_file read interface */
														
 
															+	int (*seq_show)(struct seq_file *sf, void *v);
														
 
															+
														
 
															+	/* optional ops, implement all or none */
														
 
															+	void *(*seq_start)(struct seq_file *sf, loff_t *ppos);
														
 
															+	void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos);
														
 
															+	void (*seq_stop)(struct seq_file *sf, void *v);
														
 
															 	/*
														
 
															 	 * write_u64() is a shortcut for the common case of accepting
														
@@ -504,27 +473,6 @@ struct cftype {
 
															 	 * kick type for multiplexing.
														
 
															 	 */
														
 
															 	int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
														
 
															-
														
 
															-	int (*release)(struct inode *inode, struct file *file);
														
 
															-
														
 
															-	/*
														
 
															-	 * register_event() callback will be used to add new userspace
														
 
															-	 * waiter for changes related to the cftype. Implement it if
														
 
															-	 * you want to provide this functionality. Use eventfd_signal()
														
 
															-	 * on eventfd to send notification to userspace.
														
 
															-	 */
														
 
															-	int (*register_event)(struct cgroup_subsys_state *css,
														
 
															-			      struct cftype *cft, struct eventfd_ctx *eventfd,
														
 
															-			      const char *args);
														
 
															-	/*
														
 
															-	 * unregister_event() callback will be called when userspace
														
 
															-	 * closes the eventfd or on cgroup removing.
														
 
															-	 * This callback must be implemented, if you want provide
														
 
															-	 * notification functionality.
														
 
															-	 */
														
 
															-	void (*unregister_event)(struct cgroup_subsys_state *css,
														
 
															-				 struct cftype *cft,
														
 
															-				 struct eventfd_ctx *eventfd);
														
 
															 };
														
 
															 /*
														
@@ -537,6 +485,26 @@ struct cftype_set {
 
															 	struct cftype			*cfts;
														
 
															 };
														
 
															+/*
														
 
															+ * cgroupfs file entry, pointed to from leaf dentry->d_fsdata.  Don't
														
 
															+ * access directly.
														
 
															+ */
														
 
															+struct cfent {
														
 
															+	struct list_head		node;
														
 
															+	struct dentry			*dentry;
														
 
															+	struct cftype			*type;
														
 
															+	struct cgroup_subsys_state	*css;
														
 
															+
														
 
															+	/* file xattrs */
														
 
															+	struct simple_xattrs		xattrs;
														
 
															+};
														
 
															+
														
 
															+/* seq_file->private points to the following, only ->priv is public */
														
 
															+struct cgroup_open_file {
														
 
															+	struct cfent			*cfe;
														
 
															+	void				*priv;
														
 
															+};
														
 
															+
														
 
															 /*
														
 
															  * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details.  This
														
 
															  * function can be called as long as @cgrp is accessible.
														
@@ -552,6 +520,18 @@ static inline const char *cgroup_name(const struct cgroup *cgrp)
 
															 	return rcu_dereference(cgrp->name)->name;
														
 
															 }
														
 
															+static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq)
														
 
															+{
														
 
															+	struct cgroup_open_file *of = seq->private;
														
 
															+	return of->cfe->css;
														
 
															+}
														
 
															+
														
 
															+static inline struct cftype *seq_cft(struct seq_file *seq)
														
 
															+{
														
 
															+	struct cgroup_open_file *of = seq->private;
														
 
															+	return of->cfe->type;
														
 
															+}
														
 
															+
														
 
															 int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
														
 
															 int cgroup_rm_cftypes(struct cftype *cfts);
														
@@ -631,12 +611,8 @@ struct cgroup_subsys {
 
															 #define MAX_CGROUP_TYPE_NAMELEN 32
														
 
															 	const char *name;
														
 
															-	/*
														
 
															-	 * Link to parent, and list entry in parent's children.
														
 
															-	 * Protected by cgroup_lock()
														
 
															-	 */
														
 
															+	/* link to parent, protected by cgroup_lock() */
														
 
															 	struct cgroupfs_root *root;
														
 
															-	struct list_head sibling;
														
 
															 	/* list of cftype_sets */
														
 
															 	struct list_head cftsets;
														
--- a/include/linux/vmpressure.h
+++ b/include/linux/vmpressure.h
@@ -7,6 +7,7 @@
 
															 #include <linux/gfp.h>
														
 
															 #include <linux/types.h>
														
 
															 #include <linux/cgroup.h>
														
 
															+#include <linux/eventfd.h>
														
 
															 struct vmpressure {
														
 
															 	unsigned long scanned;
														
@@ -33,13 +34,10 @@ extern void vmpressure_init(struct vmpressure *vmpr);
 
															 extern void vmpressure_cleanup(struct vmpressure *vmpr);
														
 
															 extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg);
														
 
															 extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr);
														
 
															-extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css);
														
 
															-extern int vmpressure_register_event(struct cgroup_subsys_state *css,
														
 
															-				     struct cftype *cft,
														
 
															+extern int vmpressure_register_event(struct mem_cgroup *memcg,
														
 
															 				     struct eventfd_ctx *eventfd,
														
 
															 				     const char *args);
														
 
															-extern void vmpressure_unregister_event(struct cgroup_subsys_state *css,
														
 
															-					struct cftype *cft,
														
 
															+extern void vmpressure_unregister_event(struct mem_cgroup *memcg,
														
 
															 					struct eventfd_ctx *eventfd);
														
 
															 #else
														
 
															 static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
														
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -854,7 +854,6 @@ config NUMA_BALANCING
 
															 menuconfig CGROUPS
														
 
															 	boolean "Control Group support"
														
 
															-	depends on EVENTFD
														
 
															 	help
														
 
															 	  This option adds support for grouping sets of processes together, for
														
 
															 	  use with process control subsystems such as Cpusets, CFS, memory
														
@@ -921,6 +920,7 @@ config MEMCG
 
															 	bool "Memory Resource Controller for Control Groups"
														
 
															 	depends on RESOURCE_COUNTERS
														
 
															 	select MM_OWNER
														
 
															+	select EVENTFD
														
 
															 	help
														
 
															 	  Provides a memory resource controller that manages both anonymous
														
 
															 	  memory and page cache. (See Documentation/cgroups/memory.txt)
														
@@ -1160,7 +1160,6 @@ config UIDGID_STRICT_TYPE_CHECKS
 
															 config SCHED_AUTOGROUP
														
 
															 	bool "Automatic process group scheduling"
														
 
															-	select EVENTFD
														
 
															 	select CGROUPS
														
 
															 	select CGROUP_SCHED
														
 
															 	select FAIR_GROUP_SCHED
														
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -41,7 +41,6 @@
 
															 #include <linux/rcupdate.h>
														
 
															 #include <linux/sched.h>
														
 
															 #include <linux/backing-dev.h>
														
 
															-#include <linux/seq_file.h>
														
 
															 #include <linux/slab.h>
														
 
															 #include <linux/magic.h>
														
 
															 #include <linux/spinlock.h>
														
@@ -56,14 +55,19 @@
 
															 #include <linux/pid_namespace.h>
														
 
															 #include <linux/idr.h>
														
 
															 #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
														
 
															-#include <linux/eventfd.h>
														
 
															-#include <linux/poll.h>
														
 
															 #include <linux/flex_array.h> /* used in cgroup_attach_task */
														
 
															 #include <linux/kthread.h>
														
 
															-#include <linux/file.h>
														
 
															 #include <linux/atomic.h>
														
 
															+/*
														
 
															+ * pidlists linger the following amount before being destroyed.  The goal
														
 
															+ * is avoiding frequent destruction in the middle of consecutive read calls
														
 
															+ * Expiring in the middle is a performance problem not a correctness one.
														
 
															+ * 1 sec should be enough.
														
 
															+ */
														
 
															+#define CGROUP_PIDLIST_DESTROY_DELAY	HZ
														
 
															+
														
 
															 /*
														
 
															  * cgroup_mutex is the master lock.  Any modification to cgroup or its
														
 
															  * hierarchy must be performed while holding it.
														
@@ -89,6 +93,19 @@ static DEFINE_MUTEX(cgroup_mutex);
 
															 static DEFINE_MUTEX(cgroup_root_mutex);
														
 
															+#define cgroup_assert_mutex_or_rcu_locked()				\
														
 
															+	rcu_lockdep_assert(rcu_read_lock_held() ||			\
														
 
															+			   lockdep_is_held(&cgroup_mutex),		\
														
 
															+			   "cgroup_mutex or RCU read lock required");
														
 
															+
														
 
															+#ifdef CONFIG_LOCKDEP
														
 
															+#define cgroup_assert_mutex_or_root_locked()				\
														
 
															+	WARN_ON_ONCE(debug_locks && (!lockdep_is_held(&cgroup_mutex) &&	\
														
 
															+				     !lockdep_is_held(&cgroup_root_mutex)))
														
 
															+#else
														
 
															+#define cgroup_assert_mutex_or_root_locked()	do { } while (0)
														
 
															+#endif
														
 
															+
														
 
															 /*
														
 
															  * cgroup destruction makes heavy use of work items and there can be a lot
														
 
															  * of concurrent destructions.  Use a separate workqueue so that cgroup
														
@@ -97,6 +114,12 @@ static DEFINE_MUTEX(cgroup_root_mutex);
 
															  */
														
 
															 static struct workqueue_struct *cgroup_destroy_wq;
														
 
															+/*
														
 
															+ * pidlist destructions need to be flushed on cgroup destruction.  Use a
														
 
															+ * separate workqueue as flush domain.
														
 
															+ */
														
 
															+static struct workqueue_struct *cgroup_pidlist_destroy_wq;
														
 
															+
														
 
															 /*
														
 
															  * Generate an array of cgroup subsystem pointers. At boot time, this is
														
 
															  * populated with the built in subsystems, and modular subsystems are
														
@@ -119,49 +142,6 @@ static struct cgroupfs_root cgroup_dummy_root;
 
															 /* dummy_top is a shorthand for the dummy hierarchy's top cgroup */
														
 
															 static struct cgroup * const cgroup_dummy_top = &cgroup_dummy_root.top_cgroup;
														
 
															-/*
														
 
															- * cgroupfs file entry, pointed to from leaf dentry->d_fsdata.
														
 
															- */
														
 
															-struct cfent {
														
 
															-	struct list_head		node;
														
 
															-	struct dentry			*dentry;
														
 
															-	struct cftype			*type;
														
 
															-	struct cgroup_subsys_state	*css;
														
 
															-
														
 
															-	/* file xattrs */
														
 
															-	struct simple_xattrs		xattrs;
														
 
															-};
														
 
															-
														
 
															-/*
														
 
															- * cgroup_event represents events which userspace want to receive.
														
 
															- */
														
 
															-struct cgroup_event {
														
 
															-	/*
														
 
															-	 * css which the event belongs to.
														
 
															-	 */
														
 
															-	struct cgroup_subsys_state *css;
														
 
															-	/*
														
 
															-	 * Control file which the event associated.
														
 
															-	 */
														
 
															-	struct cftype *cft;
														
 
															-	/*
														
 
															-	 * eventfd to signal userspace about the event.
														
 
															-	 */
														
 
															-	struct eventfd_ctx *eventfd;
														
 
															-	/*
														
 
															-	 * Each of these stored in a list by the cgroup.
														
 
															-	 */
														
 
															-	struct list_head list;
														
 
															-	/*
														
 
															-	 * All fields below needed to unregister event when
														
 
															-	 * userspace closes eventfd.
														
 
															-	 */
														
 
															-	poll_table pt;
														
 
															-	wait_queue_head_t *wqh;
														
 
															-	wait_queue_t wait;
														
 
															-	struct work_struct remove;
														
 
															-};
														
 
															-
														
 
															 /* The list of hierarchy roots */
														
 
															 static LIST_HEAD(cgroup_roots);
														
@@ -200,6 +180,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp);
 
															 static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
														
 
															 			      bool is_add);
														
 
															 static int cgroup_file_release(struct inode *inode, struct file *file);
														
 
															+static void cgroup_pidlist_destroy_all(struct cgroup *cgrp);
														
 
															 /**
														
 
															  * cgroup_css - obtain a cgroup's css for the specified subsystem
														
@@ -261,17 +242,33 @@ static int notify_on_release(const struct cgroup *cgrp)
 
															 	return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
														
 
															 }
														
 
															+/**
														
 
															+ * for_each_css - iterate all css's of a cgroup
														
 
															+ * @css: the iteration cursor
														
 
															+ * @ssid: the index of the subsystem, CGROUP_SUBSYS_COUNT after reaching the end
														
 
															+ * @cgrp: the target cgroup to iterate css's of
														
 
															+ *
														
 
															+ * Should be called under cgroup_mutex.
														
 
															+ */
														
 
															+#define for_each_css(css, ssid, cgrp)					\
														
 
															+	for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)	\
														
 
															+		if (!((css) = rcu_dereference_check(			\
														
 
															+				(cgrp)->subsys[(ssid)],			\
														
 
															+				lockdep_is_held(&cgroup_mutex)))) { }	\
														
 
															+		else
														
 
															+
														
 
															 /**
														
 
															  * for_each_subsys - iterate all loaded cgroup subsystems
														
 
															  * @ss: the iteration cursor
														
 
															- * @i: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end
														
 
															+ * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end
														
 
															  *
														
 
															- * Should be called under cgroup_mutex.
														
 
															+ * Iterates through all loaded subsystems.  Should be called under
														
 
															+ * cgroup_mutex or cgroup_root_mutex.
														
 
															  */
														
 
															-#define for_each_subsys(ss, i)						\
														
 
															-	for ((i) = 0; (i) < CGROUP_SUBSYS_COUNT; (i)++)			\
														
 
															-		if (({ lockdep_assert_held(&cgroup_mutex);		\
														
 
															-		       !((ss) = cgroup_subsys[i]); })) { }		\
														
 
															+#define for_each_subsys(ss, ssid)					\
														
 
															+	for (({ cgroup_assert_mutex_or_root_locked(); (ssid) = 0; });	\
														
 
															+	     (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)			\
														
 
															+		if (!((ss) = cgroup_subsys[(ssid)])) { }		\
														
 
															 		else
														
 
															 /**
														
@@ -286,10 +283,6 @@ static int notify_on_release(const struct cgroup *cgrp)
 
															 	for ((i) = 0; (i) < CGROUP_BUILTIN_SUBSYS_COUNT &&		\
														
 
															 	     (((ss) = cgroup_subsys[i]) || true); (i)++)
														
 
															-/* iterate each subsystem attached to a hierarchy */
														
 
															-#define for_each_root_subsys(root, ss)					\
														
 
															-	list_for_each_entry((ss), &(root)->subsys_list, sibling)
														
 
															-
														
 
															 /* iterate across the active hierarchies */
														
 
															 #define for_each_active_root(root)					\
														
 
															 	list_for_each_entry((root), &cgroup_roots, root_list)
														
@@ -863,11 +856,7 @@ static void cgroup_free_fn(struct work_struct *work)
 
															 	 */
														
 
															 	deactivate_super(cgrp->root->sb);
														
 
															-	/*
														
 
															-	 * if we're getting rid of the cgroup, refcount should ensure
														
 
															-	 * that there are no pidlists left.
														
 
															-	 */
														
 
															-	BUG_ON(!list_empty(&cgrp->pidlists));
														
 
															+	cgroup_pidlist_destroy_all(cgrp);
														
 
															 	simple_xattrs_free(&cgrp->xattrs);
														
@@ -1050,7 +1039,6 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 
															 					   cgroup_css(cgroup_dummy_top, ss));
														
 
															 			cgroup_css(cgrp, ss)->cgroup = cgrp;
														
 
															-			list_move(&ss->sibling, &root->subsys_list);
														
 
															 			ss->root = root;
														
 
															 			if (ss->bind)
														
 
															 				ss->bind(cgroup_css(cgrp, ss));
														
@@ -1069,7 +1057,6 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 
															 			RCU_INIT_POINTER(cgrp->subsys[i], NULL);
														
 
															 			cgroup_subsys[i]->root = &cgroup_dummy_root;
														
 
															-			list_move(&ss->sibling, &cgroup_dummy_root.subsys_list);
														
 
															 			/* subsystem is now free - drop reference on module */
														
 
															 			module_put(ss->module);
														
@@ -1096,10 +1083,12 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
 
															 {
														
 
															 	struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
														
 
															 	struct cgroup_subsys *ss;
														
 
															+	int ssid;
														
 
															 	mutex_lock(&cgroup_root_mutex);
														
 
															-	for_each_root_subsys(root, ss)
														
 
															-		seq_printf(seq, ",%s", ss->name);
														
 
															+	for_each_subsys(ss, ssid)
														
 
															+		if (root->subsys_mask & (1 << ssid))
														
 
															+			seq_printf(seq, ",%s", ss->name);
														
 
															 	if (root->flags & CGRP_ROOT_SANE_BEHAVIOR)
														
 
															 		seq_puts(seq, ",sane_behavior");
														
 
															 	if (root->flags & CGRP_ROOT_NOPREFIX)
														
@@ -1362,8 +1351,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 
															 	INIT_LIST_HEAD(&cgrp->pidlists);
														
 
															 	mutex_init(&cgrp->pidlist_mutex);
														
 
															 	cgrp->dummy_css.cgroup = cgrp;
														
 
															-	INIT_LIST_HEAD(&cgrp->event_list);
														
 
															-	spin_lock_init(&cgrp->event_list_lock);
														
 
															 	simple_xattrs_init(&cgrp->xattrs);
														
 
															 }
														
@@ -1371,7 +1358,6 @@ static void init_cgroup_root(struct cgroupfs_root *root)
 
															 {
														
 
															 	struct cgroup *cgrp = &root->top_cgroup;
														
 
															-	INIT_LIST_HEAD(&root->subsys_list);
														
 
															 	INIT_LIST_HEAD(&root->root_list);
														
 
															 	root->number_of_cgroups = 1;
														
 
															 	cgrp->root = root;
														
@@ -1693,7 +1679,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 
															 	return ERR_PTR(ret);
														
 
															 }
														
 
															-static void cgroup_kill_sb(struct super_block *sb) {
														
 
															+static void cgroup_kill_sb(struct super_block *sb)
														
 
															+{
														
 
															 	struct cgroupfs_root *root = sb->s_fs_info;
														
 
															 	struct cgroup *cgrp = &root->top_cgroup;
														
 
															 	struct cgrp_cset_link *link, *tmp_link;
														
@@ -1976,8 +1963,8 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
 
															 			      bool threadgroup)
														
 
															 {
														
 
															 	int retval, i, group_size;
														
 
															-	struct cgroup_subsys *ss, *failed_ss = NULL;
														
 
															 	struct cgroupfs_root *root = cgrp->root;
														
 
															+	struct cgroup_subsys_state *css, *failed_css = NULL;
														
 
															 	/* threadgroup list cursor and array */
														
 
															 	struct task_struct *leader = tsk;
														
 
															 	struct task_and_cgroup *tc;
														
@@ -2050,13 +2037,11 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
 
															 	/*
														
 
															 	 * step 1: check that we can legitimately attach to the cgroup.
														
 
															 	 */
														
 
															-	for_each_root_subsys(root, ss) {
														
 
															-		struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
														
 
															-
														
 
															-		if (ss->can_attach) {
														
 
															-			retval = ss->can_attach(css, &tset);
														
 
															+	for_each_css(css, i, cgrp) {
														
 
															+		if (css->ss->can_attach) {
														
 
															+			retval = css->ss->can_attach(css, &tset);
														
 
															 			if (retval) {
														
 
															-				failed_ss = ss;
														
 
															+				failed_css = css;
														
 
															 				goto out_cancel_attach;
														
 
															 			}
														
 
															 		}
														
@@ -2092,12 +2077,9 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
 
															 	/*
														
 
															 	 * step 4: do subsystem attach callbacks.
														
 
															 	 */
														
 
															-	for_each_root_subsys(root, ss) {
														
 
															-		struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
														
 
															-
														
 
															-		if (ss->attach)
														
 
															-			ss->attach(css, &tset);
														
 
															-	}
														
 
															+	for_each_css(css, i, cgrp)
														
 
															+		if (css->ss->attach)
														
 
															+			css->ss->attach(css, &tset);
														
 
															 	/*
														
 
															 	 * step 5: success! and cleanup
														
@@ -2114,13 +2096,11 @@ out_put_css_set_refs:
 
															 	}
														
 
															 out_cancel_attach:
														
 
															 	if (retval) {
														
 
															-		for_each_root_subsys(root, ss) {
														
 
															-			struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
														
 
															-
														
 
															-			if (ss == failed_ss)
														
 
															+		for_each_css(css, i, cgrp) {
														
 
															+			if (css == failed_css)
														
 
															 				break;
														
 
															-			if (ss->cancel_attach)
														
 
															-				ss->cancel_attach(css, &tset);
														
 
															+			if (css->ss->cancel_attach)
														
 
															+				css->ss->cancel_attach(css, &tset);
														
 
															 		}
														
 
															 	}
														
 
															 out_free_group_list:
														
@@ -2148,7 +2128,7 @@ retry_find_task:
 
															 		tsk = find_task_by_vpid(pid);
														
 
															 		if (!tsk) {
														
 
															 			rcu_read_unlock();
														
 
															-			ret= -ESRCH;
														
 
															+			ret = -ESRCH;
														
 
															 			goto out_unlock_cgroup;
														
 
															 		}
														
 
															 		/*
														
@@ -2260,10 +2240,9 @@ static int cgroup_release_agent_write(struct cgroup_subsys_state *css,
 
															 	return 0;
														
 
															 }
														
 
															-static int cgroup_release_agent_show(struct cgroup_subsys_state *css,
														
 
															-				     struct cftype *cft, struct seq_file *seq)
														
 
															+static int cgroup_release_agent_show(struct seq_file *seq, void *v)
														
 
															 {
														
 
															-	struct cgroup *cgrp = css->cgroup;
														
 
															+	struct cgroup *cgrp = seq_css(seq)->cgroup;
														
 
															 	if (!cgroup_lock_live_group(cgrp))
														
 
															 		return -ENODEV;
														
@@ -2273,174 +2252,129 @@ static int cgroup_release_agent_show(struct cgroup_subsys_state *css,
 
															 	return 0;
														
 
															 }
														
 
															-static int cgroup_sane_behavior_show(struct cgroup_subsys_state *css,
														
 
															-				     struct cftype *cft, struct seq_file *seq)
														
 
															+static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
														
 
															 {
														
 
															-	seq_printf(seq, "%d\n", cgroup_sane_behavior(css->cgroup));
														
 
															+	struct cgroup *cgrp = seq_css(seq)->cgroup;
														
 
															+
														
 
															+	seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp));
														
 
															 	return 0;
														
 
															 }
														
 
															 /* A buffer size big enough for numbers or short strings */
														
 
															 #define CGROUP_LOCAL_BUFFER_SIZE 64
														
 
															-static ssize_t cgroup_write_X64(struct cgroup_subsys_state *css,
														
 
															-				struct cftype *cft, struct file *file,
														
 
															-				const char __user *userbuf, size_t nbytes,
														
 
															-				loff_t *unused_ppos)
														
 
															+static ssize_t cgroup_file_write(struct file *file, const char __user *userbuf,
														
 
															+				 size_t nbytes, loff_t *ppos)
														
 
															 {
														
 
															-	char buffer[CGROUP_LOCAL_BUFFER_SIZE];
														
 
															-	int retval = 0;
														
 
															-	char *end;
														
 
															+	struct cfent *cfe = __d_cfe(file->f_dentry);
														
 
															+	struct cftype *cft = __d_cft(file->f_dentry);
														
 
															+	struct cgroup_subsys_state *css = cfe->css;
														
 
															+	size_t max_bytes = cft->max_write_len ?: CGROUP_LOCAL_BUFFER_SIZE - 1;
														
 
															+	char *buf;
														
 
															+	int ret;
														
 
															-	if (!nbytes)
														
 
															-		return -EINVAL;
														
 
															-	if (nbytes >= sizeof(buffer))
														
 
															+	if (nbytes >= max_bytes)
														
 
															 		return -E2BIG;
														
 
															-	if (copy_from_user(buffer, userbuf, nbytes))
														
 
															-		return -EFAULT;
														
 
															-	buffer[nbytes] = 0;     /* nul-terminate */
														
 
															-	if (cft->write_u64) {
														
 
															-		u64 val = simple_strtoull(strstrip(buffer), &end, 0);
														
 
															-		if (*end)
														
 
															-			return -EINVAL;
														
 
															-		retval = cft->write_u64(css, cft, val);
														
 
															+	buf = kmalloc(nbytes + 1, GFP_KERNEL);
														
 
															+	if (!buf)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	if (copy_from_user(buf, userbuf, nbytes)) {
														
 
															+		ret = -EFAULT;
														
 
															+		goto out_free;
														
 
															+	}
														
 
															+
														
 
															+	buf[nbytes] = '\0';
														
 
															+
														
 
															+	if (cft->write_string) {
														
 
															+		ret = cft->write_string(css, cft, strstrip(buf));
														
 
															+	} else if (cft->write_u64) {
														
 
															+		unsigned long long v;
														
 
															+		ret = kstrtoull(buf, 0, &v);
														
 
															+		if (!ret)
														
 
															+			ret = cft->write_u64(css, cft, v);
														
 
															+	} else if (cft->write_s64) {
														
 
															+		long long v;
														
 
															+		ret = kstrtoll(buf, 0, &v);
														
 
															+		if (!ret)
														
 
															+			ret = cft->write_s64(css, cft, v);
														
 
															+	} else if (cft->trigger) {
														
 
															+		ret = cft->trigger(css, (unsigned int)cft->private);
														
 
															 	} else {
														
 
															-		s64 val = simple_strtoll(strstrip(buffer), &end, 0);
														
 
															-		if (*end)
														
 
															-			return -EINVAL;
														
 
															-		retval = cft->write_s64(css, cft, val);
														
 
															+		ret = -EINVAL;
														
 
															 	}
														
 
															-	if (!retval)
														
 
															-		retval = nbytes;
														
 
															-	return retval;
														
 
															+out_free:
														
 
															+	kfree(buf);
														
 
															+	return ret ?: nbytes;
														
 
															 }
														
 
															-static ssize_t cgroup_write_string(struct cgroup_subsys_state *css,
														
 
															-				   struct cftype *cft, struct file *file,
														
 
															-				   const char __user *userbuf, size_t nbytes,
														
 
															-				   loff_t *unused_ppos)
														
 
															+/*
														
 
															+ * seqfile ops/methods for returning structured data. Currently just
														
 
															+ * supports string->u64 maps, but can be extended in future.
														
 
															+ */
														
 
															+
														
 
															+static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
														
 
															 {
														
 
															-	char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
														
 
															-	int retval = 0;
														
 
															-	size_t max_bytes = cft->max_write_len;
														
 
															-	char *buffer = local_buffer;
														
 
															+	struct cftype *cft = seq_cft(seq);
														
 
															-	if (!max_bytes)
														
 
															-		max_bytes = sizeof(local_buffer) - 1;
														
 
															-	if (nbytes >= max_bytes)
														
 
															-		return -E2BIG;
														
 
															-	/* Allocate a dynamic buffer if we need one */
														
 
															-	if (nbytes >= sizeof(local_buffer)) {
														
 
															-		buffer = kmalloc(nbytes + 1, GFP_KERNEL);
														
 
															-		if (buffer == NULL)
														
 
															-			return -ENOMEM;
														
 
															-	}
														
 
															-	if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
														
 
															-		retval = -EFAULT;
														
 
															-		goto out;
														
 
															+	if (cft->seq_start) {
														
 
															+		return cft->seq_start(seq, ppos);
														
 
															+	} else {
														
 
															+		/*
														
 
															+		 * The same behavior and code as single_open().  Returns
														
 
															+		 * !NULL if pos is at the beginning; otherwise, NULL.
														
 
															+		 */
														
 
															+		return NULL + !*ppos;
														
 
															 	}
														
 
															-
														
 
															-	buffer[nbytes] = 0;     /* nul-terminate */
														
 
															-	retval = cft->write_string(css, cft, strstrip(buffer));
														
 
															-	if (!retval)
														
 
															-		retval = nbytes;
														
 
															-out:
														
 
															-	if (buffer != local_buffer)
														
 
															-		kfree(buffer);
														
 
															-	return retval;
														
 
															 }
														
 
															-static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
														
 
															-				 size_t nbytes, loff_t *ppos)
														
 
															+static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos)
														
 
															 {
														
 
															-	struct cfent *cfe = __d_cfe(file->f_dentry);
														
 
															-	struct cftype *cft = __d_cft(file->f_dentry);
														
 
															-	struct cgroup_subsys_state *css = cfe->css;
														
 
															+	struct cftype *cft = seq_cft(seq);
														
 
															-	if (cft->write)
														
 
															-		return cft->write(css, cft, file, buf, nbytes, ppos);
														
 
															-	if (cft->write_u64 || cft->write_s64)
														
 
															-		return cgroup_write_X64(css, cft, file, buf, nbytes, ppos);
														
 
															-	if (cft->write_string)
														
 
															-		return cgroup_write_string(css, cft, file, buf, nbytes, ppos);
														
 
															-	if (cft->trigger) {
														
 
															-		int ret = cft->trigger(css, (unsigned int)cft->private);
														
 
															-		return ret ? ret : nbytes;
														
 
															+	if (cft->seq_next) {
														
 
															+		return cft->seq_next(seq, v, ppos);
														
 
															+	} else {
														
 
															+		/*
														
 
															+		 * The same behavior and code as single_open(), always
														
 
															+		 * terminate after the initial read.
														
 
															+		 */
														
 
															+		++*ppos;
														
 
															+		return NULL;
														
 
															 	}
														
 
															-	return -EINVAL;
														
 
															 }
														
 
															-static ssize_t cgroup_read_u64(struct cgroup_subsys_state *css,
														
 
															-			       struct cftype *cft, struct file *file,
														
 
															-			       char __user *buf, size_t nbytes, loff_t *ppos)
														
 
															+static void cgroup_seqfile_stop(struct seq_file *seq, void *v)
														
 
															 {
														
 
															-	char tmp[CGROUP_LOCAL_BUFFER_SIZE];
														
 
															-	u64 val = cft->read_u64(css, cft);
														
 
															-	int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
														
 
															+	struct cftype *cft = seq_cft(seq);
														
 
															-	return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
														
 
															+	if (cft->seq_stop)
														
 
															+		cft->seq_stop(seq, v);
														
 
															 }
														
 
															-static ssize_t cgroup_read_s64(struct cgroup_subsys_state *css,
														
 
															-			       struct cftype *cft, struct file *file,
														
 
															-			       char __user *buf, size_t nbytes, loff_t *ppos)
														
 
															+static int cgroup_seqfile_show(struct seq_file *m, void *arg)
														
 
															 {
														
 
															-	char tmp[CGROUP_LOCAL_BUFFER_SIZE];
														
 
															-	s64 val = cft->read_s64(css, cft);
														
 
															-	int len = sprintf(tmp, "%lld\n", (long long) val);
														
 
															+	struct cftype *cft = seq_cft(m);
														
 
															+	struct cgroup_subsys_state *css = seq_css(m);
														
 
															-	return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
														
 
															-}
														
 
															+	if (cft->seq_show)
														
 
															+		return cft->seq_show(m, arg);
														
 
															-static ssize_t cgroup_file_read(struct file *file, char __user *buf,
														
 
															-				size_t nbytes, loff_t *ppos)
														
 
															-{
														
 
															-	struct cfent *cfe = __d_cfe(file->f_dentry);
														
 
															-	struct cftype *cft = __d_cft(file->f_dentry);
														
 
															-	struct cgroup_subsys_state *css = cfe->css;
														
 
															-
														
 
															-	if (cft->read)
														
 
															-		return cft->read(css, cft, file, buf, nbytes, ppos);
														
 
															 	if (cft->read_u64)
														
 
															-		return cgroup_read_u64(css, cft, file, buf, nbytes, ppos);
														
 
															-	if (cft->read_s64)
														
 
															-		return cgroup_read_s64(css, cft, file, buf, nbytes, ppos);
														
 
															-	return -EINVAL;
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- * seqfile ops/methods for returning structured data. Currently just
														
 
															- * supports string->u64 maps, but can be extended in future.
														
 
															- */
														
 
															-
														
 
															-static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
														
 
															-{
														
 
															-	struct seq_file *sf = cb->state;
														
 
															-	return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
														
 
															-}
														
 
															-
														
 
															-static int cgroup_seqfile_show(struct seq_file *m, void *arg)
														
 
															-{
														
 
															-	struct cfent *cfe = m->private;
														
 
															-	struct cftype *cft = cfe->type;
														
 
															-	struct cgroup_subsys_state *css = cfe->css;
														
 
															-
														
 
															-	if (cft->read_map) {
														
 
															-		struct cgroup_map_cb cb = {
														
 
															-			.fill = cgroup_map_add,
														
 
															-			.state = m,
														
 
															-		};
														
 
															-		return cft->read_map(css, cft, &cb);
														
 
															-	}
														
 
															-	return cft->read_seq_string(css, cft, m);
														
 
															+		seq_printf(m, "%llu\n", cft->read_u64(css, cft));
														
 
															+	else if (cft->read_s64)
														
 
															+		seq_printf(m, "%lld\n", cft->read_s64(css, cft));
														
 
															+	else
														
 
															+		return -EINVAL;
														
 
															+	return 0;
														
 
															 }
														
 
															-static const struct file_operations cgroup_seqfile_operations = {
														
 
															-	.read = seq_read,
														
 
															-	.write = cgroup_file_write,
														
 
															-	.llseek = seq_lseek,
														
 
															-	.release = cgroup_file_release,
														
 
															+static struct seq_operations cgroup_seq_operations = {
														
 
															+	.start		= cgroup_seqfile_start,
														
 
															+	.next		= cgroup_seqfile_next,
														
 
															+	.stop		= cgroup_seqfile_stop,
														
 
															+	.show		= cgroup_seqfile_show,
														
 
															 };
														
 
															 static int cgroup_file_open(struct inode *inode, struct file *file)
														
@@ -2449,6 +2383,7 @@ static int cgroup_file_open(struct inode *inode, struct file *file)
 
															 	struct cftype *cft = __d_cft(file->f_dentry);
														
 
															 	struct cgroup *cgrp = __d_cgrp(cfe->dentry->d_parent);
														
 
															 	struct cgroup_subsys_state *css;
														
 
															+	struct cgroup_open_file *of;
														
 
															 	int err;
														
 
															 	err = generic_file_open(inode, file);
														
@@ -2478,32 +2413,26 @@ static int cgroup_file_open(struct inode *inode, struct file *file)
 
															 	WARN_ON_ONCE(cfe->css && cfe->css != css);
														
 
															 	cfe->css = css;
														
 
															-	if (cft->read_map || cft->read_seq_string) {
														
 
															-		file->f_op = &cgroup_seqfile_operations;
														
 
															-		err = single_open(file, cgroup_seqfile_show, cfe);
														
 
															-	} else if (cft->open) {
														
 
															-		err = cft->open(inode, file);
														
 
															+	of = __seq_open_private(file, &cgroup_seq_operations,
														
 
															+				sizeof(struct cgroup_open_file));
														
 
															+	if (of) {
														
 
															+		of->cfe = cfe;
														
 
															+		return 0;
														
 
															 	}
														
 
															-	if (css->ss && err)
														
 
															+	if (css->ss)
														
 
															 		css_put(css);
														
 
															-	return err;
														
 
															+	return -ENOMEM;
														
 
															 }
														
 
															 static int cgroup_file_release(struct inode *inode, struct file *file)
														
 
															 {
														
 
															 	struct cfent *cfe = __d_cfe(file->f_dentry);
														
 
															-	struct cftype *cft = __d_cft(file->f_dentry);
														
 
															 	struct cgroup_subsys_state *css = cfe->css;
														
 
															-	int ret = 0;
														
 
															-	if (cft->release)
														
 
															-		ret = cft->release(inode, file);
														
 
															 	if (css->ss)
														
 
															 		css_put(css);
														
 
															-	if (file->f_op == &cgroup_seqfile_operations)
														
 
															-		single_release(inode, file);
														
 
															-	return ret;
														
 
															+	return seq_release_private(inode, file);
														
 
															 }
														
 
															 /*
														
@@ -2614,7 +2543,7 @@ static ssize_t cgroup_listxattr(struct dentry *dentry, char *buf, size_t size)
 
															 }
														
 
															 static const struct file_operations cgroup_file_operations = {
														
 
															-	.read = cgroup_file_read,
														
 
															+	.read = seq_read,
														
 
															 	.write = cgroup_file_write,
														
 
															 	.llseek = generic_file_llseek,
														
 
															 	.open = cgroup_file_open,
														
@@ -2639,16 +2568,6 @@ static const struct inode_operations cgroup_dir_inode_operations = {
 
															 	.removexattr = cgroup_removexattr,
														
 
															 };
														
 
															-/*
														
 
															- * Check if a file is a control file
														
 
															- */
														
 
															-static inline struct cftype *__file_cft(struct file *file)
														
 
															-{
														
 
															-	if (file_inode(file)->i_fop != &cgroup_file_operations)
														
 
															-		return ERR_PTR(-EINVAL);
														
 
															-	return __d_cft(file->f_dentry);
														
 
															-}
														
 
															-
														
 
															 static int cgroup_create_file(struct dentry *dentry, umode_t mode,
														
 
															 				struct super_block *sb)
														
 
															 {
														
@@ -2706,12 +2625,11 @@ static umode_t cgroup_file_mode(const struct cftype *cft)
 
															 	if (cft->mode)
														
 
															 		return cft->mode;
														
 
															-	if (cft->read || cft->read_u64 || cft->read_s64 ||
														
 
															-	    cft->read_map || cft->read_seq_string)
														
 
															+	if (cft->read_u64 || cft->read_s64 || cft->seq_show)
														
 
															 		mode |= S_IRUGO;
														
 
															-	if (cft->write || cft->write_u64 || cft->write_s64 ||
														
 
															-	    cft->write_string || cft->trigger)
														
 
															+	if (cft->write_u64 || cft->write_s64 || cft->write_string ||
														
 
															+	    cft->trigger)
														
 
															 		mode |= S_IWUSR;
														
 
															 	return mode;
														
@@ -3007,9 +2925,9 @@ static void cgroup_enable_task_cg_lists(void)
 
															  * @parent_css: css whose children to walk
														
 
															  *
														
 
															  * This function returns the next child of @parent_css and should be called
														
 
															- * under RCU read lock.  The only requirement is that @parent_css and
														
 
															- * @pos_css are accessible.  The next sibling is guaranteed to be returned
														
 
															- * regardless of their states.
														
 
															+ * under either cgroup_mutex or RCU read lock.  The only requirement is
														
 
															+ * that @parent_css and @pos_css are accessible.  The next sibling is
														
 
															+ * guaranteed to be returned regardless of their states.
														
 
															  */
														
 
															 struct cgroup_subsys_state *
														
 
															 css_next_child(struct cgroup_subsys_state *pos_css,
														
@@ -3019,7 +2937,7 @@ css_next_child(struct cgroup_subsys_state *pos_css,
 
															 	struct cgroup *cgrp = parent_css->cgroup;
														
 
															 	struct cgroup *next;
														
 
															-	WARN_ON_ONCE(!rcu_read_lock_held());
														
 
															+	cgroup_assert_mutex_or_rcu_locked();
														
 
															 	/*
														
 
															 	 * @pos could already have been removed.  Once a cgroup is removed,
														
@@ -3066,10 +2984,10 @@ EXPORT_SYMBOL_GPL(css_next_child);
 
															  * to visit for pre-order traversal of @root's descendants.  @root is
														
 
															  * included in the iteration and the first node to be visited.
														
 
															  *
														
 
															- * While this function requires RCU read locking, it doesn't require the
														
 
															- * whole traversal to be contained in a single RCU critical section.  This
														
 
															- * function will return the correct next descendant as long as both @pos
														
 
															- * and @root are accessible and @pos is a descendant of @root.
														
 
															+ * While this function requires cgroup_mutex or RCU read locking, it
														
 
															+ * doesn't require the whole traversal to be contained in a single critical
														
 
															+ * section.  This function will return the correct next descendant as long
														
 
															+ * as both @pos and @root are accessible and @pos is a descendant of @root.
														
 
															  */
														
 
															 struct cgroup_subsys_state *
														
 
															 css_next_descendant_pre(struct cgroup_subsys_state *pos,
														
@@ -3077,7 +2995,7 @@ css_next_descendant_pre(struct cgroup_subsys_state *pos,
 
															 {
														
 
															 	struct cgroup_subsys_state *next;
														
 
															-	WARN_ON_ONCE(!rcu_read_lock_held());
														
 
															+	cgroup_assert_mutex_or_rcu_locked();
														
 
															 	/* if first iteration, visit @root */
														
 
															 	if (!pos)
														
@@ -3108,17 +3026,17 @@ EXPORT_SYMBOL_GPL(css_next_descendant_pre);
 
															  * is returned.  This can be used during pre-order traversal to skip
														
 
															  * subtree of @pos.
														
 
															  *
														
 
															- * While this function requires RCU read locking, it doesn't require the
														
 
															- * whole traversal to be contained in a single RCU critical section.  This
														
 
															- * function will return the correct rightmost descendant as long as @pos is
														
 
															- * accessible.
														
 
															+ * While this function requires cgroup_mutex or RCU read locking, it
														
 
															+ * doesn't require the whole traversal to be contained in a single critical
														
 
															+ * section.  This function will return the correct rightmost descendant as
														
 
															+ * long as @pos is accessible.
														
 
															  */
														
 
															 struct cgroup_subsys_state *
														
 
															 css_rightmost_descendant(struct cgroup_subsys_state *pos)
														
 
															 {
														
 
															 	struct cgroup_subsys_state *last, *tmp;
														
 
															-	WARN_ON_ONCE(!rcu_read_lock_held());
														
 
															+	cgroup_assert_mutex_or_rcu_locked();
														
 
															 	do {
														
 
															 		last = pos;
														
@@ -3154,10 +3072,11 @@ css_leftmost_descendant(struct cgroup_subsys_state *pos)
 
															  * to visit for post-order traversal of @root's descendants.  @root is
														
 
															  * included in the iteration and the last node to be visited.
														
 
															  *
														
 
															- * While this function requires RCU read locking, it doesn't require the
														
 
															- * whole traversal to be contained in a single RCU critical section.  This
														
 
															- * function will return the correct next descendant as long as both @pos
														
 
															- * and @cgroup are accessible and @pos is a descendant of @cgroup.
														
 
															+ * While this function requires cgroup_mutex or RCU read locking, it
														
 
															+ * doesn't require the whole traversal to be contained in a single critical
														
 
															+ * section.  This function will return the correct next descendant as long
														
 
															+ * as both @pos and @cgroup are accessible and @pos is a descendant of
														
 
															+ * @cgroup.
														
 
															  */
														
 
															 struct cgroup_subsys_state *
														
 
															 css_next_descendant_post(struct cgroup_subsys_state *pos,
														
@@ -3165,7 +3084,7 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
 
															 {
														
 
															 	struct cgroup_subsys_state *next;
														
 
															-	WARN_ON_ONCE(!rcu_read_lock_held());
														
 
															+	cgroup_assert_mutex_or_rcu_locked();
														
 
															 	/* if first iteration, visit leftmost descendant which may be @root */
														
 
															 	if (!pos)
														
@@ -3504,14 +3423,12 @@ struct cgroup_pidlist {
 
															 	pid_t *list;
														
 
															 	/* how many elements the above list has */
														
 
															 	int length;
														
 
															-	/* how many files are using the current array */
														
 
															-	int use_count;
														
 
															 	/* each of these stored in a list by its cgroup */
														
 
															 	struct list_head links;
														
 
															 	/* pointer to the cgroup we belong to, for list removal purposes */
														
 
															 	struct cgroup *owner;
														
 
															-	/* protects the other fields */
														
 
															-	struct rw_semaphore rwsem;
														
 
															+	/* for delayed destruction */
														
 
															+	struct delayed_work destroy_dwork;
														
 
															 };
														
 
															 /*
														
@@ -3527,6 +3444,7 @@ static void *pidlist_allocate(int count)
 
															 	else
														
 
															 		return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
														
 
															 }
														
 
															+
														
 
															 static void pidlist_free(void *p)
														
 
															 {
														
 
															 	if (is_vmalloc_addr(p))
														
@@ -3535,6 +3453,47 @@ static void pidlist_free(void *p)
 
															 		kfree(p);
														
 
															 }
														
 
															+/*
														
 
															+ * Used to destroy all pidlists lingering waiting for destroy timer.  None
														
 
															+ * should be left afterwards.
														
 
															+ */
														
 
															+static void cgroup_pidlist_destroy_all(struct cgroup *cgrp)
														
 
															+{
														
 
															+	struct cgroup_pidlist *l, *tmp_l;
														
 
															+
														
 
															+	mutex_lock(&cgrp->pidlist_mutex);
														
 
															+	list_for_each_entry_safe(l, tmp_l, &cgrp->pidlists, links)
														
 
															+		mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, 0);
														
 
															+	mutex_unlock(&cgrp->pidlist_mutex);
														
 
															+
														
 
															+	flush_workqueue(cgroup_pidlist_destroy_wq);
														
 
															+	BUG_ON(!list_empty(&cgrp->pidlists));
														
 
															+}
														
 
															+
														
 
															+static void cgroup_pidlist_destroy_work_fn(struct work_struct *work)
														
 
															+{
														
 
															+	struct delayed_work *dwork = to_delayed_work(work);
														
 
															+	struct cgroup_pidlist *l = container_of(dwork, struct cgroup_pidlist,
														
 
															+						destroy_dwork);
														
 
															+	struct cgroup_pidlist *tofree = NULL;
														
 
															+
														
 
															+	mutex_lock(&l->owner->pidlist_mutex);
														
 
															+
														
 
															+	/*
														
 
															+	 * Destroy iff we didn't get queued again.  The state won't change
														
 
															+	 * as destroy_dwork can only be queued while locked.
														
 
															+	 */
														
 
															+	if (!delayed_work_pending(dwork)) {
														
 
															+		list_del(&l->links);
														
 
															+		pidlist_free(l->list);
														
 
															+		put_pid_ns(l->key.ns);
														
 
															+		tofree = l;
														
 
															+	}
														
 
															+
														
 
															+	mutex_unlock(&l->owner->pidlist_mutex);
														
 
															+	kfree(tofree);
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries
														
 
															  * Returns the number of unique elements.
														
@@ -3565,52 +3524,92 @@ after:
 
															 	return dest;
														
 
															 }
														
 
															+/*
														
 
															+ * The two pid files - task and cgroup.procs - guaranteed that the result
														
 
															+ * is sorted, which forced this whole pidlist fiasco.  As pid order is
														
 
															+ * different per namespace, each namespace needs differently sorted list,
														
 
															+ * making it impossible to use, for example, single rbtree of member tasks
														
 
															+ * sorted by task pointer.  As pidlists can be fairly large, allocating one
														
 
															+ * per open file is dangerous, so cgroup had to implement shared pool of
														
 
															+ * pidlists keyed by cgroup and namespace.
														
 
															+ *
														
 
															+ * All this extra complexity was caused by the original implementation
														
 
															+ * committing to an entirely unnecessary property.  In the long term, we
														
 
															+ * want to do away with it.  Explicitly scramble sort order if
														
 
															+ * sane_behavior so that no such expectation exists in the new interface.
														
 
															+ *
														
 
															+ * Scrambling is done by swapping every two consecutive bits, which is
														
 
															+ * non-identity one-to-one mapping which disturbs sort order sufficiently.
														
 
															+ */
														
 
															+static pid_t pid_fry(pid_t pid)
														
 
															+{
														
 
															+	unsigned a = pid & 0x55555555;
														
 
															+	unsigned b = pid & 0xAAAAAAAA;
														
 
															+
														
 
															+	return (a << 1) | (b >> 1);
														
 
															+}
														
 
															+
														
 
															+static pid_t cgroup_pid_fry(struct cgroup *cgrp, pid_t pid)
														
 
															+{
														
 
															+	if (cgroup_sane_behavior(cgrp))
														
 
															+		return pid_fry(pid);
														
 
															+	else
														
 
															+		return pid;
														
 
															+}
														
 
															+
														
 
															 static int cmppid(const void *a, const void *b)
														
 
															 {
														
 
															 	return *(pid_t *)a - *(pid_t *)b;
														
 
															 }
														
 
															+static int fried_cmppid(const void *a, const void *b)
														
 
															+{
														
 
															+	return pid_fry(*(pid_t *)a) - pid_fry(*(pid_t *)b);
														
 
															+}
														
 
															+
														
 
															+static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
														
 
															+						  enum cgroup_filetype type)
														
 
															+{
														
 
															+	struct cgroup_pidlist *l;
														
 
															+	/* don't need task_nsproxy() if we're looking at ourself */
														
 
															+	struct pid_namespace *ns = task_active_pid_ns(current);
														
 
															+
														
 
															+	lockdep_assert_held(&cgrp->pidlist_mutex);
														
 
															+
														
 
															+	list_for_each_entry(l, &cgrp->pidlists, links)
														
 
															+		if (l->key.type == type && l->key.ns == ns)
														
 
															+			return l;
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * find the appropriate pidlist for our purpose (given procs vs tasks)
														
 
															  * returns with the lock on that pidlist already held, and takes care
														
 
															  * of the use count, or returns NULL with no locks held if we're out of
														
 
															  * memory.
														
 
															  */
														
 
															-static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
														
 
															-						  enum cgroup_filetype type)
														
 
															+static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp,
														
 
															+						enum cgroup_filetype type)
														
 
															 {
														
 
															 	struct cgroup_pidlist *l;
														
 
															-	/* don't need task_nsproxy() if we're looking at ourself */
														
 
															-	struct pid_namespace *ns = task_active_pid_ns(current);
														
 
															-	/*
														
 
															-	 * We can't drop the pidlist_mutex before taking the l->rwsem in case
														
 
															-	 * the last ref-holder is trying to remove l from the list at the same
														
 
															-	 * time. Holding the pidlist_mutex precludes somebody taking whichever
														
 
															-	 * list we find out from under us - compare release_pid_array().
														
 
															-	 */
														
 
															-	mutex_lock(&cgrp->pidlist_mutex);
														
 
															-	list_for_each_entry(l, &cgrp->pidlists, links) {
														
 
															-		if (l->key.type == type && l->key.ns == ns) {
														
 
															-			/* make sure l doesn't vanish out from under us */
														
 
															-			down_write(&l->rwsem);
														
 
															-			mutex_unlock(&cgrp->pidlist_mutex);
														
 
															-			return l;
														
 
															-		}
														
 
															-	}
														
 
															+	lockdep_assert_held(&cgrp->pidlist_mutex);
														
 
															+
														
 
															+	l = cgroup_pidlist_find(cgrp, type);
														
 
															+	if (l)
														
 
															+		return l;
														
 
															+
														
 
															 	/* entry not found; create a new one */
														
 
															 	l = kzalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
														
 
															-	if (!l) {
														
 
															-		mutex_unlock(&cgrp->pidlist_mutex);
														
 
															+	if (!l)
														
 
															 		return l;
														
 
															-	}
														
 
															-	init_rwsem(&l->rwsem);
														
 
															-	down_write(&l->rwsem);
														
 
															+
														
 
															+	INIT_DELAYED_WORK(&l->destroy_dwork, cgroup_pidlist_destroy_work_fn);
														
 
															 	l->key.type = type;
														
 
															-	l->key.ns = get_pid_ns(ns);
														
 
															+	/* don't need task_nsproxy() if we're looking at ourself */
														
 
															+	l->key.ns = get_pid_ns(task_active_pid_ns(current));
														
 
															 	l->owner = cgrp;
														
 
															 	list_add(&l->links, &cgrp->pidlists);
														
 
															-	mutex_unlock(&cgrp->pidlist_mutex);
														
 
															 	return l;
														
 
															 }
														
@@ -3627,6 +3626,8 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
 
															 	struct task_struct *tsk;
														
 
															 	struct cgroup_pidlist *l;
														
 
															+	lockdep_assert_held(&cgrp->pidlist_mutex);
														
 
															+
														
 
															 	/*
														
 
															 	 * If cgroup gets more users after we read count, we won't have
														
 
															 	 * enough space - tough.  This race is indistinguishable to the
														
@@ -3653,20 +3654,24 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
 
															 	css_task_iter_end(&it);
														
 
															 	length = n;
														
 
															 	/* now sort & (if procs) strip out duplicates */
														
 
															-	sort(array, length, sizeof(pid_t), cmppid, NULL);
														
 
															+	if (cgroup_sane_behavior(cgrp))
														
 
															+		sort(array, length, sizeof(pid_t), fried_cmppid, NULL);
														
 
															+	else
														
 
															+		sort(array, length, sizeof(pid_t), cmppid, NULL);
														
 
															 	if (type == CGROUP_FILE_PROCS)
														
 
															 		length = pidlist_uniq(array, length);
														
 
															-	l = cgroup_pidlist_find(cgrp, type);
														
 
															+
														
 
															+	l = cgroup_pidlist_find_create(cgrp, type);
														
 
															 	if (!l) {
														
 
															+		mutex_unlock(&cgrp->pidlist_mutex);
														
 
															 		pidlist_free(array);
														
 
															 		return -ENOMEM;
														
 
															 	}
														
 
															-	/* store array, freeing old if necessary - lock already held */
														
 
															+
														
 
															+	/* store array, freeing old if necessary */
														
 
															 	pidlist_free(l->list);
														
 
															 	l->list = array;
														
 
															 	l->length = length;
														
 
															-	l->use_count++;
														
 
															-	up_write(&l->rwsem);
														
 
															 	*lp = l;
														
 
															 	return 0;
														
 
															 }
														
@@ -3740,20 +3745,45 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
 
															 	 * after a seek to the start). Use a binary-search to find the
														
 
															 	 * next pid to display, if any
														
 
															 	 */
														
 
															-	struct cgroup_pidlist *l = s->private;
														
 
															+	struct cgroup_open_file *of = s->private;
														
 
															+	struct cgroup *cgrp = seq_css(s)->cgroup;
														
 
															+	struct cgroup_pidlist *l;
														
 
															+	enum cgroup_filetype type = seq_cft(s)->private;
														
 
															 	int index = 0, pid = *pos;
														
 
															-	int *iter;
														
 
															+	int *iter, ret;
														
 
															+
														
 
															+	mutex_lock(&cgrp->pidlist_mutex);
														
 
															+
														
 
															+	/*
														
 
															+	 * !NULL @of->priv indicates that this isn't the first start()
														
 
															+	 * after open.  If the matching pidlist is around, we can use that.
														
 
															+	 * Look for it.  Note that @of->priv can't be used directly.  It
														
 
															+	 * could already have been destroyed.
														
 
															+	 */
														
 
															+	if (of->priv)
														
 
															+		of->priv = cgroup_pidlist_find(cgrp, type);
														
 
															+
														
 
															+	/*
														
 
															+	 * Either this is the first start() after open or the matching
														
 
															+	 * pidlist has been destroyed inbetween.  Create a new one.
														
 
															+	 */
														
 
															+	if (!of->priv) {
														
 
															+		ret = pidlist_array_load(cgrp, type,
														
 
															+					 (struct cgroup_pidlist **)&of->priv);
														
 
															+		if (ret)
														
 
															+			return ERR_PTR(ret);
														
 
															+	}
														
 
															+	l = of->priv;
														
 
															-	down_read(&l->rwsem);
														
 
															 	if (pid) {
														
 
															 		int end = l->length;
														
 
															 		while (index < end) {
														
 
															 			int mid = (index + end) / 2;
														
 
															-			if (l->list[mid] == pid) {
														
 
															+			if (cgroup_pid_fry(cgrp, l->list[mid]) == pid) {
														
 
															 				index = mid;
														
 
															 				break;
														
 
															-			} else if (l->list[mid] <= pid)
														
 
															+			} else if (cgroup_pid_fry(cgrp, l->list[mid]) <= pid)
														
 
															 				index = mid + 1;
														
 
															 			else
														
 
															 				end = mid;
														
@@ -3764,19 +3794,25 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
 
															 		return NULL;
														
 
															 	/* Update the abstract position to be the actual pid that we found */
														
 
															 	iter = l->list + index;
														
 
															-	*pos = *iter;
														
 
															+	*pos = cgroup_pid_fry(cgrp, *iter);
														
 
															 	return iter;
														
 
															 }
														
 
															 static void cgroup_pidlist_stop(struct seq_file *s, void *v)
														
 
															 {
														
 
															-	struct cgroup_pidlist *l = s->private;
														
 
															-	up_read(&l->rwsem);
														
 
															+	struct cgroup_open_file *of = s->private;
														
 
															+	struct cgroup_pidlist *l = of->priv;
														
 
															+
														
 
															+	if (l)
														
 
															+		mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork,
														
 
															+				 CGROUP_PIDLIST_DESTROY_DELAY);
														
 
															+	mutex_unlock(&seq_css(s)->cgroup->pidlist_mutex);
														
 
															 }
														
 
															 static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
														
 
															 {
														
 
															-	struct cgroup_pidlist *l = s->private;
														
 
															+	struct cgroup_open_file *of = s->private;
														
 
															+	struct cgroup_pidlist *l = of->priv;
														
 
															 	pid_t *p = v;
														
 
															 	pid_t *end = l->list + l->length;
														
 
															 	/*
														
@@ -3787,7 +3823,7 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
 
															 	if (p >= end) {
														
 
															 		return NULL;
														
 
															 	} else {
														
 
															-		*pos = *p;
														
 
															+		*pos = cgroup_pid_fry(seq_css(s)->cgroup, *p);
														
 
															 		return p;
														
 
															 	}
														
 
															 }
														
@@ -3808,92 +3844,6 @@ static const struct seq_operations cgroup_pidlist_seq_operations = {
 
															 	.show = cgroup_pidlist_show,
														
 
															 };
														
 
															-static void cgroup_release_pid_array(struct cgroup_pidlist *l)
														
 
															-{
														
 
															-	/*
														
 
															-	 * the case where we're the last user of this particular pidlist will
														
 
															-	 * have us remove it from the cgroup's list, which entails taking the
														
 
															-	 * mutex. since in pidlist_find the pidlist->lock depends on cgroup->
														
 
															-	 * pidlist_mutex, we have to take pidlist_mutex first.
														
 
															-	 */
														
 
															-	mutex_lock(&l->owner->pidlist_mutex);
														
 
															-	down_write(&l->rwsem);
														
 
															-	BUG_ON(!l->use_count);
														
 
															-	if (!--l->use_count) {
														
 
															-		/* we're the last user if refcount is 0; remove and free */
														
 
															-		list_del(&l->links);
														
 
															-		mutex_unlock(&l->owner->pidlist_mutex);
														
 
															-		pidlist_free(l->list);
														
 
															-		put_pid_ns(l->key.ns);
														
 
															-		up_write(&l->rwsem);
														
 
															-		kfree(l);
														
 
															-		return;
														
 
															-	}
														
 
															-	mutex_unlock(&l->owner->pidlist_mutex);
														
 
															-	up_write(&l->rwsem);
														
 
															-}
														
 
															-
														
 
															-static int cgroup_pidlist_release(struct inode *inode, struct file *file)
														
 
															-{
														
 
															-	struct cgroup_pidlist *l;
														
 
															-	if (!(file->f_mode & FMODE_READ))
														
 
															-		return 0;
														
 
															-	/*
														
 
															-	 * the seq_file will only be initialized if the file was opened for
														
 
															-	 * reading; hence we check if it's not null only in that case.
														
 
															-	 */
														
 
															-	l = ((struct seq_file *)file->private_data)->private;
														
 
															-	cgroup_release_pid_array(l);
														
 
															-	return seq_release(inode, file);
														
 
															-}
														
 
															-
														
 
															-static const struct file_operations cgroup_pidlist_operations = {
														
 
															-	.read = seq_read,
														
 
															-	.llseek = seq_lseek,
														
 
															-	.write = cgroup_file_write,
														
 
															-	.release = cgroup_pidlist_release,
														
 
															-};
														
 
															-
														
 
															-/*
														
 
															- * The following functions handle opens on a file that displays a pidlist
														
 
															- * (tasks or procs). Prepare an array of the process/thread IDs of whoever's
														
 
															- * in the cgroup.
														
 
															- */
														
 
															-/* helper function for the two below it */
														
 
															-static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
														
 
															-{
														
 
															-	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
														
 
															-	struct cgroup_pidlist *l;
														
 
															-	int retval;
														
 
															-
														
 
															-	/* Nothing to do for write-only files */
														
 
															-	if (!(file->f_mode & FMODE_READ))
														
 
															-		return 0;
														
 
															-
														
 
															-	/* have the array populated */
														
 
															-	retval = pidlist_array_load(cgrp, type, &l);
														
 
															-	if (retval)
														
 
															-		return retval;
														
 
															-	/* configure file information */
														
 
															-	file->f_op = &cgroup_pidlist_operations;
														
 
															-
														
 
															-	retval = seq_open(file, &cgroup_pidlist_seq_operations);
														
 
															-	if (retval) {
														
 
															-		cgroup_release_pid_array(l);
														
 
															-		return retval;
														
 
															-	}
														
 
															-	((struct seq_file *)file->private_data)->private = l;
														
 
															-	return 0;
														
 
															-}
														
 
															-static int cgroup_tasks_open(struct inode *unused, struct file *file)
														
 
															-{
														
 
															-	return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
														
 
															-}
														
 
															-static int cgroup_procs_open(struct inode *unused, struct file *file)
														
 
															-{
														
 
															-	return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
														
 
															-}
														
 
															-
														
 
															 static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
														
 
															 					 struct cftype *cft)
														
 
															 {
														
@@ -3928,202 +3878,6 @@ static void cgroup_dput(struct cgroup *cgrp)
 
															 	deactivate_super(sb);
														
 
															 }
														
 
															-/*
														
 
															- * Unregister event and free resources.
														
 
															- *
														
 
															- * Gets called from workqueue.
														
 
															- */
														
 
															-static void cgroup_event_remove(struct work_struct *work)
														
 
															-{
														
 
															-	struct cgroup_event *event = container_of(work, struct cgroup_event,
														
 
															-			remove);
														
 
															-	struct cgroup_subsys_state *css = event->css;
														
 
															-
														
 
															-	remove_wait_queue(event->wqh, &event->wait);
														
 
															-
														
 
															-	event->cft->unregister_event(css, event->cft, event->eventfd);
														
 
															-
														
 
															-	/* Notify userspace the event is going away. */
														
 
															-	eventfd_signal(event->eventfd, 1);
														
 
															-
														
 
															-	eventfd_ctx_put(event->eventfd);
														
 
															-	kfree(event);
														
 
															-	css_put(css);
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- * Gets called on POLLHUP on eventfd when user closes it.
														
 
															- *
														
 
															- * Called with wqh->lock held and interrupts disabled.
														
 
															- */
														
 
															-static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
														
 
															-		int sync, void *key)
														
 
															-{
														
 
															-	struct cgroup_event *event = container_of(wait,
														
 
															-			struct cgroup_event, wait);
														
 
															-	struct cgroup *cgrp = event->css->cgroup;
														
 
															-	unsigned long flags = (unsigned long)key;
														
 
															-
														
 
															-	if (flags & POLLHUP) {
														
 
															-		/*
														
 
															-		 * If the event has been detached at cgroup removal, we
														
 
															-		 * can simply return knowing the other side will cleanup
														
 
															-		 * for us.
														
 
															-		 *
														
 
															-		 * We can't race against event freeing since the other
														
 
															-		 * side will require wqh->lock via remove_wait_queue(),
														
 
															-		 * which we hold.
														
 
															-		 */
														
 
															-		spin_lock(&cgrp->event_list_lock);
														
 
															-		if (!list_empty(&event->list)) {
														
 
															-			list_del_init(&event->list);
														
 
															-			/*
														
 
															-			 * We are in atomic context, but cgroup_event_remove()
														
 
															-			 * may sleep, so we have to call it in workqueue.
														
 
															-			 */
														
 
															-			schedule_work(&event->remove);
														
 
															-		}
														
 
															-		spin_unlock(&cgrp->event_list_lock);
														
 
															-	}
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static void cgroup_event_ptable_queue_proc(struct file *file,
														
 
															-		wait_queue_head_t *wqh, poll_table *pt)
														
 
															-{
														
 
															-	struct cgroup_event *event = container_of(pt,
														
 
															-			struct cgroup_event, pt);
														
 
															-
														
 
															-	event->wqh = wqh;
														
 
															-	add_wait_queue(wqh, &event->wait);
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- * Parse input and register new cgroup event handler.
														
 
															- *
														
 
															- * Input must be in format '<event_fd> <control_fd> <args>'.
														
 
															- * Interpretation of args is defined by control file implementation.
														
 
															- */
														
 
															-static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css,
														
 
															-				      struct cftype *cft, const char *buffer)
														
 
															-{
														
 
															-	struct cgroup *cgrp = dummy_css->cgroup;
														
 
															-	struct cgroup_event *event;
														
 
															-	struct cgroup_subsys_state *cfile_css;
														
 
															-	unsigned int efd, cfd;
														
 
															-	struct fd efile;
														
 
															-	struct fd cfile;
														
 
															-	char *endp;
														
 
															-	int ret;
														
 
															-
														
 
															-	efd = simple_strtoul(buffer, &endp, 10);
														
 
															-	if (*endp != ' ')
														
 
															-		return -EINVAL;
														
 
															-	buffer = endp + 1;
														
 
															-
														
 
															-	cfd = simple_strtoul(buffer, &endp, 10);
														
 
															-	if ((*endp != ' ') && (*endp != '\0'))
														
 
															-		return -EINVAL;
														
 
															-	buffer = endp + 1;
														
 
															-
														
 
															-	event = kzalloc(sizeof(*event), GFP_KERNEL);
														
 
															-	if (!event)
														
 
															-		return -ENOMEM;
														
 
															-
														
 
															-	INIT_LIST_HEAD(&event->list);
														
 
															-	init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
														
 
															-	init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
														
 
															-	INIT_WORK(&event->remove, cgroup_event_remove);
														
 
															-
														
 
															-	efile = fdget(efd);
														
 
															-	if (!efile.file) {
														
 
															-		ret = -EBADF;
														
 
															-		goto out_kfree;
														
 
															-	}
														
 
															-
														
 
															-	event->eventfd = eventfd_ctx_fileget(efile.file);
														
 
															-	if (IS_ERR(event->eventfd)) {
														
 
															-		ret = PTR_ERR(event->eventfd);
														
 
															-		goto out_put_efile;
														
 
															-	}
														
 
															-
														
 
															-	cfile = fdget(cfd);
														
 
															-	if (!cfile.file) {
														
 
															-		ret = -EBADF;
														
 
															-		goto out_put_eventfd;
														
 
															-	}
														
 
															-
														
 
															-	/* the process need read permission on control file */
														
 
															-	/* AV: shouldn't we check that it's been opened for read instead? */
														
 
															-	ret = inode_permission(file_inode(cfile.file), MAY_READ);
														
 
															-	if (ret < 0)
														
 
															-		goto out_put_cfile;
														
 
															-
														
 
															-	event->cft = __file_cft(cfile.file);
														
 
															-	if (IS_ERR(event->cft)) {
														
 
															-		ret = PTR_ERR(event->cft);
														
 
															-		goto out_put_cfile;
														
 
															-	}
														
 
															-
														
 
															-	if (!event->cft->ss) {
														
 
															-		ret = -EBADF;
														
 
															-		goto out_put_cfile;
														
 
															-	}
														
 
															-
														
 
															-	/*
														
 
															-	 * Determine the css of @cfile, verify it belongs to the same
														
 
															-	 * cgroup as cgroup.event_control, and associate @event with it.
														
 
															-	 * Remaining events are automatically removed on cgroup destruction
														
 
															-	 * but the removal is asynchronous, so take an extra ref.
														
 
															-	 */
														
 
															-	rcu_read_lock();
														
 
															-
														
 
															-	ret = -EINVAL;
														
 
															-	event->css = cgroup_css(cgrp, event->cft->ss);
														
 
															-	cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss);
														
 
															-	if (event->css && event->css == cfile_css && css_tryget(event->css))
														
 
															-		ret = 0;
														
 
															-
														
 
															-	rcu_read_unlock();
														
 
															-	if (ret)
														
 
															-		goto out_put_cfile;
														
 
															-
														
 
															-	if (!event->cft->register_event || !event->cft->unregister_event) {
														
 
															-		ret = -EINVAL;
														
 
															-		goto out_put_css;
														
 
															-	}
														
 
															-
														
 
															-	ret = event->cft->register_event(event->css, event->cft,
														
 
															-			event->eventfd, buffer);
														
 
															-	if (ret)
														
 
															-		goto out_put_css;
														
 
															-
														
 
															-	efile.file->f_op->poll(efile.file, &event->pt);
														
 
															-
														
 
															-	spin_lock(&cgrp->event_list_lock);
														
 
															-	list_add(&event->list, &cgrp->event_list);
														
 
															-	spin_unlock(&cgrp->event_list_lock);
														
 
															-
														
 
															-	fdput(cfile);
														
 
															-	fdput(efile);
														
 
															-
														
 
															-	return 0;
														
 
															-
														
 
															-out_put_css:
														
 
															-	css_put(event->css);
														
 
															-out_put_cfile:
														
 
															-	fdput(cfile);
														
 
															-out_put_eventfd:
														
 
															-	eventfd_ctx_put(event->eventfd);
														
 
															-out_put_efile:
														
 
															-	fdput(efile);
														
 
															-out_kfree:
														
 
															-	kfree(event);
														
 
															-
														
 
															-	return ret;
														
 
															-}
														
 
															-
														
 
															 static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
														
 
															 				      struct cftype *cft)
														
 
															 {
														
@@ -4143,16 +3897,14 @@ static int cgroup_clone_children_write(struct cgroup_subsys_state *css,
 
															 static struct cftype cgroup_base_files[] = {
														
 
															 	{
														
 
															 		.name = "cgroup.procs",
														
 
															-		.open = cgroup_procs_open,
														
 
															+		.seq_start = cgroup_pidlist_start,
														
 
															+		.seq_next = cgroup_pidlist_next,
														
 
															+		.seq_stop = cgroup_pidlist_stop,
														
 
															+		.seq_show = cgroup_pidlist_show,
														
 
															+		.private = CGROUP_FILE_PROCS,
														
 
															 		.write_u64 = cgroup_procs_write,
														
 
															-		.release = cgroup_pidlist_release,
														
 
															 		.mode = S_IRUGO | S_IWUSR,
														
 
															 	},
														
 
															-	{
														
 
															-		.name = "cgroup.event_control",
														
 
															-		.write_string = cgroup_write_event_control,
														
 
															-		.mode = S_IWUGO,
														
 
															-	},
														
 
															 	{
														
 
															 		.name = "cgroup.clone_children",
														
 
															 		.flags = CFTYPE_INSANE,
														
@@ -4162,7 +3914,7 @@ static struct cftype cgroup_base_files[] = {
 
															 	{
														
 
															 		.name = "cgroup.sane_behavior",
														
 
															 		.flags = CFTYPE_ONLY_ON_ROOT,
														
 
															-		.read_seq_string = cgroup_sane_behavior_show,
														
 
															+		.seq_show = cgroup_sane_behavior_show,
														
 
															 	},
														
 
															 	/*
														
@@ -4173,9 +3925,12 @@ static struct cftype cgroup_base_files[] = {
 
															 	{
														
 
															 		.name = "tasks",
														
 
															 		.flags = CFTYPE_INSANE,		/* use "procs" instead */
														
 
															-		.open = cgroup_tasks_open,
														
 
															+		.seq_start = cgroup_pidlist_start,
														
 
															+		.seq_next = cgroup_pidlist_next,
														
 
															+		.seq_stop = cgroup_pidlist_stop,
														
 
															+		.seq_show = cgroup_pidlist_show,
														
 
															+		.private = CGROUP_FILE_TASKS,
														
 
															 		.write_u64 = cgroup_tasks_write,
														
 
															-		.release = cgroup_pidlist_release,
														
 
															 		.mode = S_IRUGO | S_IWUSR,
														
 
															 	},
														
 
															 	{
														
@@ -4187,7 +3942,7 @@ static struct cftype cgroup_base_files[] = {
 
															 	{
														
 
															 		.name = "release_agent",
														
 
															 		.flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT,
														
 
															-		.read_seq_string = cgroup_release_agent_show,
														
 
															+		.seq_show = cgroup_release_agent_show,
														
 
															 		.write_string = cgroup_release_agent_write,
														
 
															 		.max_write_len = PATH_MAX,
														
 
															 	},
														
@@ -4333,6 +4088,62 @@ static void offline_css(struct cgroup_subsys_state *css)
 
															 	RCU_INIT_POINTER(css->cgroup->subsys[ss->subsys_id], css);
														
 
															 }
														
 
															+/**
														
 
															+ * create_css - create a cgroup_subsys_state
														
 
															+ * @cgrp: the cgroup new css will be associated with
														
 
															+ * @ss: the subsys of new css
														
 
															+ *
														
 
															+ * Create a new css associated with @cgrp - @ss pair.  On success, the new
														
 
															+ * css is online and installed in @cgrp with all interface files created.
														
 
															+ * Returns 0 on success, -errno on failure.
														
 
															+ */
														
 
															+static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
														
 
															+{
														
 
															+	struct cgroup *parent = cgrp->parent;
														
 
															+	struct cgroup_subsys_state *css;
														
 
															+	int err;
														
 
															+
														
 
															+	lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex);
														
 
															+	lockdep_assert_held(&cgroup_mutex);
														
 
															+
														
 
															+	css = ss->css_alloc(cgroup_css(parent, ss));
														
 
															+	if (IS_ERR(css))
														
 
															+		return PTR_ERR(css);
														
 
															+
														
 
															+	err = percpu_ref_init(&css->refcnt, css_release);
														
 
															+	if (err)
														
 
															+		goto err_free;
														
 
															+
														
 
															+	init_css(css, ss, cgrp);
														
 
															+
														
 
															+	err = cgroup_populate_dir(cgrp, 1 << ss->subsys_id);
														
 
															+	if (err)
														
 
															+		goto err_free;
														
 
															+
														
 
															+	err = online_css(css);
														
 
															+	if (err)
														
 
															+		goto err_free;
														
 
															+
														
 
															+	dget(cgrp->dentry);
														
 
															+	css_get(css->parent);
														
 
															+
														
 
															+	if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
														
 
															+	    parent->parent) {
														
 
															+		pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
														
 
															+			   current->comm, current->pid, ss->name);
														
 
															+		if (!strcmp(ss->name, "memory"))
														
 
															+			pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n");
														
 
															+		ss->warned_broken_hierarchy = true;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+
														
 
															+err_free:
														
 
															+	percpu_ref_cancel_init(&css->refcnt);
														
 
															+	ss->css_free(css);
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * cgroup_create - create a cgroup
														
 
															  * @parent: cgroup that will be parent of the new cgroup
														
@@ -4344,11 +4155,10 @@ static void offline_css(struct cgroup_subsys_state *css)
 
															 static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
														
 
															 			     umode_t mode)
														
 
															 {
														
 
															-	struct cgroup_subsys_state *css_ar[CGROUP_SUBSYS_COUNT] = { };
														
 
															 	struct cgroup *cgrp;
														
 
															 	struct cgroup_name *name;
														
 
															 	struct cgroupfs_root *root = parent->root;
														
 
															-	int err = 0;
														
 
															+	int ssid, err = 0;
														
 
															 	struct cgroup_subsys *ss;
														
 
															 	struct super_block *sb = root->sb;
														
@@ -4404,23 +4214,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 
															 	if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
														
 
															 		set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
														
 
															-	for_each_root_subsys(root, ss) {
														
 
															-		struct cgroup_subsys_state *css;
														
 
															-
														
 
															-		css = ss->css_alloc(cgroup_css(parent, ss));
														
 
															-		if (IS_ERR(css)) {
														
 
															-			err = PTR_ERR(css);
														
 
															-			goto err_free_all;
														
 
															-		}
														
 
															-		css_ar[ss->subsys_id] = css;
														
 
															-
														
 
															-		err = percpu_ref_init(&css->refcnt, css_release);
														
 
															-		if (err)
														
 
															-			goto err_free_all;
														
 
															-
														
 
															-		init_css(css, ss, cgrp);
														
 
															-	}
														
 
															-
														
 
															 	/*
														
 
															 	 * Create directory.  cgroup_create_file() returns with the new
														
 
															 	 * directory locked on success so that it can be populated without
														
@@ -4428,7 +4221,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 
															 	 */
														
 
															 	err = cgroup_create_file(dentry, S_IFDIR | mode, sb);
														
 
															 	if (err < 0)
														
 
															-		goto err_free_all;
														
 
															+		goto err_unlock;
														
 
															 	lockdep_assert_held(&dentry->d_inode->i_mutex);
														
 
															 	cgrp->serial_nr = cgroup_serial_nr_next++;
														
@@ -4440,55 +4233,31 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 
															 	/* hold a ref to the parent's dentry */
														
 
															 	dget(parent->dentry);
														
 
															-	/* creation succeeded, notify subsystems */
														
 
															-	for_each_root_subsys(root, ss) {
														
 
															-		struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
														
 
															-
														
 
															-		err = online_css(css);
														
 
															-		if (err)
														
 
															-			goto err_destroy;
														
 
															-
														
 
															-		/* each css holds a ref to the cgroup's dentry and parent css */
														
 
															-		dget(dentry);
														
 
															-		css_get(css->parent);
														
 
															-
														
 
															-		/* mark it consumed for error path */
														
 
															-		css_ar[ss->subsys_id] = NULL;
														
 
															-
														
 
															-		if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
														
 
															-		    parent->parent) {
														
 
															-			pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
														
 
															-				   current->comm, current->pid, ss->name);
														
 
															-			if (!strcmp(ss->name, "memory"))
														
 
															-				pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n");
														
 
															-			ss->warned_broken_hierarchy = true;
														
 
															-		}
														
 
															-	}
														
 
															-
														
 
															+	/*
														
 
															+	 * @cgrp is now fully operational.  If something fails after this
														
 
															+	 * point, it'll be released via the normal destruction path.
														
 
															+	 */
														
 
															 	idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
														
 
															 	err = cgroup_addrm_files(cgrp, cgroup_base_files, true);
														
 
															 	if (err)
														
 
															 		goto err_destroy;
														
 
															-	err = cgroup_populate_dir(cgrp, root->subsys_mask);
														
 
															-	if (err)
														
 
															-		goto err_destroy;
														
 
															+	/* let's create and online css's */
														
 
															+	for_each_subsys(ss, ssid) {
														
 
															+		if (root->subsys_mask & (1 << ssid)) {
														
 
															+			err = create_css(cgrp, ss);
														
 
															+			if (err)
														
 
															+				goto err_destroy;
														
 
															+		}
														
 
															+	}
														
 
															 	mutex_unlock(&cgroup_mutex);
														
 
															 	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
														
 
															 	return 0;
														
 
															-err_free_all:
														
 
															-	for_each_root_subsys(root, ss) {
														
 
															-		struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
														
 
															-
														
 
															-		if (css) {
														
 
															-			percpu_ref_cancel_init(&css->refcnt);
														
 
															-			ss->css_free(css);
														
 
															-		}
														
 
															-	}
														
 
															+err_unlock:
														
 
															 	mutex_unlock(&cgroup_mutex);
														
 
															 	/* Release the reference count that we took on the superblock */
														
 
															 	deactivate_super(sb);
														
@@ -4501,14 +4270,6 @@ err_free_cgrp:
 
															 	return err;
														
 
															 err_destroy:
														
 
															-	for_each_root_subsys(root, ss) {
														
 
															-		struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
														
 
															-
														
 
															-		if (css) {
														
 
															-			percpu_ref_cancel_init(&css->refcnt);
														
 
															-			ss->css_free(css);
														
 
															-		}
														
 
															-	}
														
 
															 	cgroup_destroy_locked(cgrp);
														
 
															 	mutex_unlock(&cgroup_mutex);
														
 
															 	mutex_unlock(&dentry->d_inode->i_mutex);
														
@@ -4631,10 +4392,10 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 
															 	__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
														
 
															 {
														
 
															 	struct dentry *d = cgrp->dentry;
														
 
															-	struct cgroup_event *event, *tmp;
														
 
															-	struct cgroup_subsys *ss;
														
 
															+	struct cgroup_subsys_state *css;
														
 
															 	struct cgroup *child;
														
 
															 	bool empty;
														
 
															+	int ssid;
														
 
															 	lockdep_assert_held(&d->d_inode->i_mutex);
														
 
															 	lockdep_assert_held(&cgroup_mutex);
														
@@ -4670,12 +4431,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 
															 	 * will be invoked to perform the rest of destruction once the
														
 
															 	 * percpu refs of all css's are confirmed to be killed.
														
 
															 	 */
														
 
															-	for_each_root_subsys(cgrp->root, ss) {
														
 
															-		struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
														
 
															-
														
 
															-		if (css)
														
 
															-			kill_css(css);
														
 
															-	}
														
 
															+	for_each_css(css, ssid, cgrp)
														
 
															+		kill_css(css);
														
 
															 	/*
														
 
															 	 * Mark @cgrp dead.  This prevents further task migration and child
														
@@ -4710,18 +4467,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 
															 	dget(d);
														
 
															 	cgroup_d_remove_dir(d);
														
 
															-	/*
														
 
															-	 * Unregister events and notify userspace.
														
 
															-	 * Notify userspace about cgroup removing only after rmdir of cgroup
														
 
															-	 * directory to avoid race between userspace and kernelspace.
														
 
															-	 */
														
 
															-	spin_lock(&cgrp->event_list_lock);
														
 
															-	list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
														
 
															-		list_del_init(&event->list);
														
 
															-		schedule_work(&event->remove);
														
 
															-	}
														
 
															-	spin_unlock(&cgrp->event_list_lock);
														
 
															-
														
 
															 	return 0;
														
 
															 };
														
@@ -4792,7 +4537,6 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 
															 	cgroup_init_cftsets(ss);
														
 
															 	/* Create the top cgroup state for this subsystem */
														
 
															-	list_add(&ss->sibling, &cgroup_dummy_root.subsys_list);
														
 
															 	ss->root = &cgroup_dummy_root;
														
 
															 	css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss));
														
 
															 	/* We don't handle early failures gracefully */
														
@@ -4866,6 +4610,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 
															 	cgroup_init_cftsets(ss);
														
 
															 	mutex_lock(&cgroup_mutex);
														
 
															+	mutex_lock(&cgroup_root_mutex);
														
 
															 	cgroup_subsys[ss->subsys_id] = ss;
														
 
															 	/*
														
@@ -4877,11 +4622,11 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 
															 	if (IS_ERR(css)) {
														
 
															 		/* failure case - need to deassign the cgroup_subsys[] slot. */
														
 
															 		cgroup_subsys[ss->subsys_id] = NULL;
														
 
															+		mutex_unlock(&cgroup_root_mutex);
														
 
															 		mutex_unlock(&cgroup_mutex);
														
 
															 		return PTR_ERR(css);
														
 
															 	}
														
 
															-	list_add(&ss->sibling, &cgroup_dummy_root.subsys_list);
														
 
															 	ss->root = &cgroup_dummy_root;
														
 
															 	/* our new subsystem will be attached to the dummy hierarchy. */
														
@@ -4911,14 +4656,18 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 
															 	write_unlock(&css_set_lock);
														
 
															 	ret = online_css(css);
														
 
															-	if (ret)
														
 
															+	if (ret) {
														
 
															+		ss->css_free(css);
														
 
															 		goto err_unload;
														
 
															+	}
														
 
															 	/* success! */
														
 
															+	mutex_unlock(&cgroup_root_mutex);
														
 
															 	mutex_unlock(&cgroup_mutex);
														
 
															 	return 0;
														
 
															 err_unload:
														
 
															+	mutex_unlock(&cgroup_root_mutex);
														
 
															 	mutex_unlock(&cgroup_mutex);
														
 
															 	/* @ss can't be mounted here as try_module_get() would fail */
														
 
															 	cgroup_unload_subsys(ss);
														
@@ -4937,6 +4686,7 @@ EXPORT_SYMBOL_GPL(cgroup_load_subsys);
 
															 void cgroup_unload_subsys(struct cgroup_subsys *ss)
														
 
															 {
														
 
															 	struct cgrp_cset_link *link;
														
 
															+	struct cgroup_subsys_state *css;
														
 
															 	BUG_ON(ss->module == NULL);
														
@@ -4948,15 +4698,15 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
 
															 	BUG_ON(ss->root != &cgroup_dummy_root);
														
 
															 	mutex_lock(&cgroup_mutex);
														
 
															+	mutex_lock(&cgroup_root_mutex);
														
 
															-	offline_css(cgroup_css(cgroup_dummy_top, ss));
														
 
															+	css = cgroup_css(cgroup_dummy_top, ss);
														
 
															+	if (css)
														
 
															+		offline_css(css);
														
 
															 	/* deassign the subsys_id */
														
 
															 	cgroup_subsys[ss->subsys_id] = NULL;
														
 
															-	/* remove subsystem from the dummy root's list of subsystems */
														
 
															-	list_del_init(&ss->sibling);
														
 
															-
														
 
															 	/*
														
 
															 	 * disentangle the css from all css_sets attached to the dummy
														
 
															 	 * top. as in loading, we need to pay our respects to the hashtable
														
@@ -4979,9 +4729,11 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
 
															 	 * need to free before marking as null because ss->css_free needs
														
 
															 	 * the cgrp->subsys pointer to find their state.
														
 
															 	 */
														
 
															-	ss->css_free(cgroup_css(cgroup_dummy_top, ss));
														
 
															+	if (css)
														
 
															+		ss->css_free(css);
														
 
															 	RCU_INIT_POINTER(cgroup_dummy_top->subsys[ss->subsys_id], NULL);
														
 
															+	mutex_unlock(&cgroup_root_mutex);
														
 
															 	mutex_unlock(&cgroup_mutex);
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
														
@@ -5100,6 +4852,15 @@ static int __init cgroup_wq_init(void)
 
															 	 */
														
 
															 	cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
														
 
															 	BUG_ON(!cgroup_destroy_wq);
														
 
															+
														
 
															+	/*
														
 
															+	 * Used to destroy pidlists and separate to serve as flush domain.
														
 
															+	 * Cap @max_active to 1 too.
														
 
															+	 */
														
 
															+	cgroup_pidlist_destroy_wq = alloc_workqueue("cgroup_pidlist_destroy",
														
 
															+						    0, 1);
														
 
															+	BUG_ON(!cgroup_pidlist_destroy_wq);
														
 
															+
														
 
															 	return 0;
														
 
															 }
														
 
															 core_initcall(cgroup_wq_init);
														
@@ -5143,11 +4904,12 @@ int proc_cgroup_show(struct seq_file *m, void *v)
 
															 	for_each_active_root(root) {
														
 
															 		struct cgroup_subsys *ss;
														
 
															 		struct cgroup *cgrp;
														
 
															-		int count = 0;
														
 
															+		int ssid, count = 0;
														
 
															 		seq_printf(m, "%d:", root->hierarchy_id);
														
 
															-		for_each_root_subsys(root, ss)
														
 
															-			seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
														
 
															+		for_each_subsys(ss, ssid)
														
 
															+			if (root->subsys_mask & (1 << ssid))
														
 
															+				seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
														
 
															 		if (strlen(root->name))
														
 
															 			seq_printf(m, "%sname=%s", count ? "," : "",
														
 
															 				   root->name);
														
@@ -5488,16 +5250,16 @@ __setup("cgroup_disable=", cgroup_disable);
 
															  * @dentry: directory dentry of interest
														
 
															  * @ss: subsystem of interest
														
 
															  *
														
 
															- * Must be called under RCU read lock.  The caller is responsible for
														
 
															- * pinning the returned css if it needs to be accessed outside the RCU
														
 
															- * critical section.
														
 
															+ * Must be called under cgroup_mutex or RCU read lock.  The caller is
														
 
															+ * responsible for pinning the returned css if it needs to be accessed
														
 
															+ * outside the critical section.
														
 
															  */
														
 
															 struct cgroup_subsys_state *css_from_dir(struct dentry *dentry,
														
 
															 					 struct cgroup_subsys *ss)
														
 
															 {
														
 
															 	struct cgroup *cgrp;
														
 
															-	WARN_ON_ONCE(!rcu_read_lock_held());
														
 
															+	cgroup_assert_mutex_or_rcu_locked();
														
 
															 	/* is @dentry a cgroup dir? */
														
 
															 	if (!dentry->d_inode ||
														
@@ -5520,9 +5282,7 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
 
															 {
														
 
															 	struct cgroup *cgrp;
														
 
															-	rcu_lockdep_assert(rcu_read_lock_held() ||
														
 
															-			   lockdep_is_held(&cgroup_mutex),
														
 
															-			   "css_from_id() needs proper protection");
														
 
															+	cgroup_assert_mutex_or_rcu_locked();
														
 
															 	cgrp = idr_find(&ss->root->cgroup_idr, id);
														
 
															 	if (cgrp)
														
@@ -5570,9 +5330,7 @@ static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css,
 
															 	return count;
														
 
															 }
														
 
															-static int current_css_set_cg_links_read(struct cgroup_subsys_state *css,
														
 
															-					 struct cftype *cft,
														
 
															-					 struct seq_file *seq)
														
 
															+static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
														
 
															 {
														
 
															 	struct cgrp_cset_link *link;
														
 
															 	struct css_set *cset;
														
@@ -5597,9 +5355,9 @@ static int current_css_set_cg_links_read(struct cgroup_subsys_state *css,
 
															 }
														
 
															 #define MAX_TASKS_SHOWN_PER_CSS 25
														
 
															-static int cgroup_css_links_read(struct cgroup_subsys_state *css,
														
 
															-				 struct cftype *cft, struct seq_file *seq)
														
 
															+static int cgroup_css_links_read(struct seq_file *seq, void *v)
														
 
															 {
														
 
															+	struct cgroup_subsys_state *css = seq_css(seq);
														
 
															 	struct cgrp_cset_link *link;
														
 
															 	read_lock(&css_set_lock);
														
@@ -5645,12 +5403,12 @@ static struct cftype debug_files[] =  {
 
															 	{
														
 
															 		.name = "current_css_set_cg_links",
														
 
															-		.read_seq_string = current_css_set_cg_links_read,
														
 
															+		.seq_show = current_css_set_cg_links_read,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "cgroup_css_links",
														
 
															-		.read_seq_string = cgroup_css_links_read,
														
 
															+		.seq_show = cgroup_css_links_read,
														
 
															 	},
														
 
															 	{
														
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -301,10 +301,9 @@ out_unlock:
 
															 	spin_unlock_irq(&freezer->lock);
														
 
															 }
														
 
															-static int freezer_read(struct cgroup_subsys_state *css, struct cftype *cft,
														
 
															-			struct seq_file *m)
														
 
															+static int freezer_read(struct seq_file *m, void *v)
														
 
															 {
														
 
															-	struct cgroup_subsys_state *pos;
														
 
															+	struct cgroup_subsys_state *css = seq_css(m), *pos;
														
 
															 	rcu_read_lock();
														
@@ -458,7 +457,7 @@ static struct cftype files[] = {
 
															 	{
														
 
															 		.name = "state",
														
 
															 		.flags = CFTYPE_NOT_ON_ROOT,
														
 
															-		.read_seq_string = freezer_read,
														
 
															+		.seq_show = freezer_read,
														
 
															 		.write_string = freezer_write,
														
 
															 	},
														
 
															 	{
														
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1731,66 +1731,41 @@ out_unlock:
 
															  * used, list of ranges of sequential numbers, is variable length,
														
 
															  * and since these maps can change value dynamically, one could read
														
 
															  * gibberish by doing partial reads while a list was changing.
														
 
															- * A single large read to a buffer that crosses a page boundary is
														
 
															- * ok, because the result being copied to user land is not recomputed
														
 
															- * across a page fault.
														
 
															  */
														
 
															-
														
 
															-static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
														
 
															+static int cpuset_common_seq_show(struct seq_file *sf, void *v)
														
 
															 {
														
 
															-	size_t count;
														
 
															-
														
 
															-	mutex_lock(&callback_mutex);
														
 
															-	count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
														
 
															-	mutex_unlock(&callback_mutex);
														
 
															+	struct cpuset *cs = css_cs(seq_css(sf));
														
 
															+	cpuset_filetype_t type = seq_cft(sf)->private;
														
 
															+	ssize_t count;
														
 
															+	char *buf, *s;
														
 
															+	int ret = 0;
														
 
															-	return count;
														
 
															-}
														
 
															-
														
 
															-static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs)
														
 
															-{
														
 
															-	size_t count;
														
 
															+	count = seq_get_buf(sf, &buf);
														
 
															+	s = buf;
														
 
															 	mutex_lock(&callback_mutex);
														
 
															-	count = nodelist_scnprintf(page, PAGE_SIZE, cs->mems_allowed);
														
 
															-	mutex_unlock(&callback_mutex);
														
 
															-
														
 
															-	return count;
														
 
															-}
														
 
															-
														
 
															-static ssize_t cpuset_common_file_read(struct cgroup_subsys_state *css,
														
 
															-				       struct cftype *cft, struct file *file,
														
 
															-				       char __user *buf, size_t nbytes,
														
 
															-				       loff_t *ppos)
														
 
															-{
														
 
															-	struct cpuset *cs = css_cs(css);
														
 
															-	cpuset_filetype_t type = cft->private;
														
 
															-	char *page;
														
 
															-	ssize_t retval = 0;
														
 
															-	char *s;
														
 
															-
														
 
															-	if (!(page = (char *)__get_free_page(GFP_TEMPORARY)))
														
 
															-		return -ENOMEM;
														
 
															-
														
 
															-	s = page;
														
 
															 	switch (type) {
														
 
															 	case FILE_CPULIST:
														
 
															-		s += cpuset_sprintf_cpulist(s, cs);
														
 
															+		s += cpulist_scnprintf(s, count, cs->cpus_allowed);
														
 
															 		break;
														
 
															 	case FILE_MEMLIST:
														
 
															-		s += cpuset_sprintf_memlist(s, cs);
														
 
															+		s += nodelist_scnprintf(s, count, cs->mems_allowed);
														
 
															 		break;
														
 
															 	default:
														
 
															-		retval = -EINVAL;
														
 
															-		goto out;
														
 
															+		ret = -EINVAL;
														
 
															+		goto out_unlock;
														
 
															 	}
														
 
															-	*s++ = '\n';
														
 
															-	retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
														
 
															-out:
														
 
															-	free_page((unsigned long)page);
														
 
															-	return retval;
														
 
															+	if (s < buf + count - 1) {
														
 
															+		*s++ = '\n';
														
 
															+		seq_commit(sf, s - buf);
														
 
															+	} else {
														
 
															+		seq_commit(sf, -1);
														
 
															+	}
														
 
															+out_unlock:
														
 
															+	mutex_unlock(&callback_mutex);
														
 
															+	return ret;
														
 
															 }
														
 
															 static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
														
@@ -1847,7 +1822,7 @@ static s64 cpuset_read_s64(struct cgroup_subsys_state *css, struct cftype *cft)
 
															 static struct cftype files[] = {
														
 
															 	{
														
 
															 		.name = "cpus",
														
 
															-		.read = cpuset_common_file_read,
														
 
															+		.seq_show = cpuset_common_seq_show,
														
 
															 		.write_string = cpuset_write_resmask,
														
 
															 		.max_write_len = (100U + 6 * NR_CPUS),
														
 
															 		.private = FILE_CPULIST,
														
@@ -1855,7 +1830,7 @@ static struct cftype files[] = {
 
															 	{
														
 
															 		.name = "mems",
														
 
															-		.read = cpuset_common_file_read,
														
 
															+		.seq_show = cpuset_common_seq_show,
														
 
															 		.write_string = cpuset_write_resmask,
														
 
															 		.max_write_len = (100U + 6 * MAX_NUMNODES),
														
 
															 		.private = FILE_MEMLIST,
														
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7852,15 +7852,14 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
 
															 	return ret;
														
 
															 }
														
 
															-static int cpu_stats_show(struct cgroup_subsys_state *css, struct cftype *cft,
														
 
															-		struct cgroup_map_cb *cb)
														
 
															+static int cpu_stats_show(struct seq_file *sf, void *v)
														
 
															 {
														
 
															-	struct task_group *tg = css_tg(css);
														
 
															+	struct task_group *tg = css_tg(seq_css(sf));
														
 
															 	struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
														
 
															-	cb->fill(cb, "nr_periods", cfs_b->nr_periods);
														
 
															-	cb->fill(cb, "nr_throttled", cfs_b->nr_throttled);
														
 
															-	cb->fill(cb, "throttled_time", cfs_b->throttled_time);
														
 
															+	seq_printf(sf, "nr_periods %d\n", cfs_b->nr_periods);
														
 
															+	seq_printf(sf, "nr_throttled %d\n", cfs_b->nr_throttled);
														
 
															+	seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time);
														
 
															 	return 0;
														
 
															 }
														
@@ -7914,7 +7913,7 @@ static struct cftype cpu_files[] = {
 
															 	},
														
 
															 	{
														
 
															 		.name = "stat",
														
 
															-		.read_map = cpu_stats_show,
														
 
															+		.seq_show = cpu_stats_show,
														
 
															 	},
														
 
															 #endif
														
 
															 #ifdef CONFIG_RT_GROUP_SCHED
														
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -163,10 +163,9 @@ out:
 
															 	return err;
														
 
															 }
														
 
															-static int cpuacct_percpu_seq_read(struct cgroup_subsys_state *css,
														
 
															-				   struct cftype *cft, struct seq_file *m)
														
 
															+static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
														
 
															 {
														
 
															-	struct cpuacct *ca = css_ca(css);
														
 
															+	struct cpuacct *ca = css_ca(seq_css(m));
														
 
															 	u64 percpu;
														
 
															 	int i;
														
@@ -183,10 +182,9 @@ static const char * const cpuacct_stat_desc[] = {
 
															 	[CPUACCT_STAT_SYSTEM] = "system",
														
 
															 };
														
 
															-static int cpuacct_stats_show(struct cgroup_subsys_state *css,
														
 
															-			      struct cftype *cft, struct cgroup_map_cb *cb)
														
 
															+static int cpuacct_stats_show(struct seq_file *sf, void *v)
														
 
															 {
														
 
															-	struct cpuacct *ca = css_ca(css);
														
 
															+	struct cpuacct *ca = css_ca(seq_css(sf));
														
 
															 	int cpu;
														
 
															 	s64 val = 0;
														
@@ -196,7 +194,7 @@ static int cpuacct_stats_show(struct cgroup_subsys_state *css,
 
															 		val += kcpustat->cpustat[CPUTIME_NICE];
														
 
															 	}
														
 
															 	val = cputime64_to_clock_t(val);
														
 
															-	cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val);
														
 
															+	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val);
														
 
															 	val = 0;
														
 
															 	for_each_online_cpu(cpu) {
														
@@ -207,7 +205,7 @@ static int cpuacct_stats_show(struct cgroup_subsys_state *css,
 
															 	}
														
 
															 	val = cputime64_to_clock_t(val);
														
 
															-	cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
														
 
															+	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
														
 
															 	return 0;
														
 
															 }
														
@@ -220,11 +218,11 @@ static struct cftype files[] = {
 
															 	},
														
 
															 	{
														
 
															 		.name = "usage_percpu",
														
 
															-		.read_seq_string = cpuacct_percpu_seq_read,
														
 
															+		.seq_show = cpuacct_percpu_seq_show,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "stat",
														
 
															-		.read_map = cpuacct_stats_show,
														
 
															+		.seq_show = cpuacct_stats_show,
														
 
															 	},
														
 
															 	{ }	/* terminate */
														
 
															 };
														
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -242,22 +242,16 @@ void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 
															 	return;
														
 
															 }
														
 
															-static ssize_t hugetlb_cgroup_read(struct cgroup_subsys_state *css,
														
 
															-				   struct cftype *cft, struct file *file,
														
 
															-				   char __user *buf, size_t nbytes,
														
 
															-				   loff_t *ppos)
														
 
															+static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
														
 
															+				   struct cftype *cft)
														
 
															 {
														
 
															-	u64 val;
														
 
															-	char str[64];
														
 
															-	int idx, name, len;
														
 
															+	int idx, name;
														
 
															 	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
														
 
															 	idx = MEMFILE_IDX(cft->private);
														
 
															 	name = MEMFILE_ATTR(cft->private);
														
 
															-	val = res_counter_read_u64(&h_cg->hugepage[idx], name);
														
 
															-	len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val);
														
 
															-	return simple_read_from_buffer(buf, nbytes, ppos, str, len);
														
 
															+	return res_counter_read_u64(&h_cg->hugepage[idx], name);
														
 
															 }
														
 
															 static int hugetlb_cgroup_write(struct cgroup_subsys_state *css,
														
@@ -337,28 +331,28 @@ static void __init __hugetlb_cgroup_file_init(int idx)
 
															 	cft = &h->cgroup_files[0];
														
 
															 	snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf);
														
 
															 	cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
														
 
															-	cft->read = hugetlb_cgroup_read;
														
 
															+	cft->read_u64 = hugetlb_cgroup_read_u64;
														
 
															 	cft->write_string = hugetlb_cgroup_write;
														
 
															 	/* Add the usage file */
														
 
															 	cft = &h->cgroup_files[1];
														
 
															 	snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf);
														
 
															 	cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
														
 
															-	cft->read = hugetlb_cgroup_read;
														
 
															+	cft->read_u64 = hugetlb_cgroup_read_u64;
														
 
															 	/* Add the MAX usage file */
														
 
															 	cft = &h->cgroup_files[2];
														
 
															 	snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf);
														
 
															 	cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE);
														
 
															 	cft->trigger = hugetlb_cgroup_reset;
														
 
															-	cft->read = hugetlb_cgroup_read;
														
 
															+	cft->read_u64 = hugetlb_cgroup_read_u64;
														
 
															 	/* Add the failcntfile */
														
 
															 	cft = &h->cgroup_files[3];
														
 
															 	snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf);
														
 
															 	cft->private  = MEMFILE_PRIVATE(idx, RES_FAILCNT);
														
 
															 	cft->trigger  = hugetlb_cgroup_reset;
														
 
															-	cft->read = hugetlb_cgroup_read;
														
 
															+	cft->read_u64 = hugetlb_cgroup_read_u64;
														
 
															 	/* NULL terminate the last cft */
														
 
															 	cft = &h->cgroup_files[4];
														
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -45,6 +45,7 @@
 
															 #include <linux/swapops.h>
														
 
															 #include <linux/spinlock.h>
														
 
															 #include <linux/eventfd.h>
														
 
															+#include <linux/poll.h>
														
 
															 #include <linux/sort.h>
														
 
															 #include <linux/fs.h>
														
 
															 #include <linux/seq_file.h>
														
@@ -55,6 +56,7 @@
 
															 #include <linux/cpu.h>
														
 
															 #include <linux/oom.h>
														
 
															 #include <linux/lockdep.h>
														
 
															+#include <linux/file.h>
														
 
															 #include "internal.h"
														
 
															 #include <net/sock.h>
														
 
															 #include <net/ip.h>
														
@@ -227,6 +229,46 @@ struct mem_cgroup_eventfd_list {
 
															 	struct eventfd_ctx *eventfd;
														
 
															 };
														
 
															+/*
														
 
															+ * cgroup_event represents events which userspace want to receive.
														
 
															+ */
														
 
															+struct mem_cgroup_event {
														
 
															+	/*
														
 
															+	 * memcg which the event belongs to.
														
 
															+	 */
														
 
															+	struct mem_cgroup *memcg;
														
 
															+	/*
														
 
															+	 * eventfd to signal userspace about the event.
														
 
															+	 */
														
 
															+	struct eventfd_ctx *eventfd;
														
 
															+	/*
														
 
															+	 * Each of these stored in a list by the cgroup.
														
 
															+	 */
														
 
															+	struct list_head list;
														
 
															+	/*
														
 
															+	 * register_event() callback will be used to add new userspace
														
 
															+	 * waiter for changes related to this event.  Use eventfd_signal()
														
 
															+	 * on eventfd to send notification to userspace.
														
 
															+	 */
														
 
															+	int (*register_event)(struct mem_cgroup *memcg,
														
 
															+			      struct eventfd_ctx *eventfd, const char *args);
														
 
															+	/*
														
 
															+	 * unregister_event() callback will be called when userspace closes
														
 
															+	 * the eventfd or on cgroup removing.  This callback must be set,
														
 
															+	 * if you want provide notification functionality.
														
 
															+	 */
														
 
															+	void (*unregister_event)(struct mem_cgroup *memcg,
														
 
															+				 struct eventfd_ctx *eventfd);
														
 
															+	/*
														
 
															+	 * All fields below needed to unregister event when
														
 
															+	 * userspace closes eventfd.
														
 
															+	 */
														
 
															+	poll_table pt;
														
 
															+	wait_queue_head_t *wqh;
														
 
															+	wait_queue_t wait;
														
 
															+	struct work_struct remove;
														
 
															+};
														
 
															+
														
 
															 static void mem_cgroup_threshold(struct mem_cgroup *memcg);
														
 
															 static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
														
@@ -331,6 +373,10 @@ struct mem_cgroup {
 
															 	atomic_t	numainfo_updating;
														
 
															 #endif
														
 
															+	/* List of events which userspace want to receive */
														
 
															+	struct list_head event_list;
														
 
															+	spinlock_t event_list_lock;
														
 
															+
														
 
															 	struct mem_cgroup_per_node *nodeinfo[0];
														
 
															 	/* WARNING: nodeinfo must be the last member here */
														
 
															 };
														
@@ -490,11 +536,6 @@ struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr)
 
															 	return &container_of(vmpr, struct mem_cgroup, vmpressure)->css;
														
 
															 }
														
 
															-struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css)
														
 
															-{
														
 
															-	return &mem_cgroup_from_css(css)->vmpressure;
														
 
															-}
														
 
															-
														
 
															 static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
														
 
															 {
														
 
															 	return (memcg == root_mem_cgroup);
														
@@ -2976,10 +3017,9 @@ static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
 
															 }
														
 
															 #ifdef CONFIG_SLABINFO
														
 
															-static int mem_cgroup_slabinfo_read(struct cgroup_subsys_state *css,
														
 
															-				    struct cftype *cft, struct seq_file *m)
														
 
															+static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
														
 
															 {
														
 
															-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
														
 
															+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
														
 
															 	struct memcg_cache_params *params;
														
 
															 	if (!memcg_can_account_kmem(memcg))
														
@@ -5112,14 +5152,12 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
 
															 	return val << PAGE_SHIFT;
														
 
															 }
														
 
															-static ssize_t mem_cgroup_read(struct cgroup_subsys_state *css,
														
 
															-			       struct cftype *cft, struct file *file,
														
 
															-			       char __user *buf, size_t nbytes, loff_t *ppos)
														
 
															+static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
														
 
															+				   struct cftype *cft)
														
 
															 {
														
 
															 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
														
 
															-	char str[64];
														
 
															 	u64 val;
														
 
															-	int name, len;
														
 
															+	int name;
														
 
															 	enum res_type type;
														
 
															 	type = MEMFILE_TYPE(cft->private);
														
@@ -5145,8 +5183,7 @@ static ssize_t mem_cgroup_read(struct cgroup_subsys_state *css,
 
															 		BUG();
														
 
															 	}
														
 
															-	len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val);
														
 
															-	return simple_read_from_buffer(buf, nbytes, ppos, str, len);
														
 
															+	return val;
														
 
															 }
														
 
															 static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val)
														
@@ -5383,8 +5420,7 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css,
 
															 #endif
														
 
															 #ifdef CONFIG_NUMA
														
 
															-static int memcg_numa_stat_show(struct cgroup_subsys_state *css,
														
 
															-				struct cftype *cft, struct seq_file *m)
														
 
															+static int memcg_numa_stat_show(struct seq_file *m, void *v)
														
 
															 {
														
 
															 	struct numa_stat {
														
 
															 		const char *name;
														
@@ -5400,7 +5436,7 @@ static int memcg_numa_stat_show(struct cgroup_subsys_state *css,
 
															 	const struct numa_stat *stat;
														
 
															 	int nid;
														
 
															 	unsigned long nr;
														
 
															-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
														
 
															+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
														
 
															 	for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
														
 
															 		nr = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask);
														
@@ -5439,10 +5475,9 @@ static inline void mem_cgroup_lru_names_not_uptodate(void)
 
															 	BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
														
 
															 }
														
 
															-static int memcg_stat_show(struct cgroup_subsys_state *css, struct cftype *cft,
														
 
															-				 struct seq_file *m)
														
 
															+static int memcg_stat_show(struct seq_file *m, void *v)
														
 
															 {
														
 
															-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
														
 
															+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
														
 
															 	struct mem_cgroup *mi;
														
 
															 	unsigned int i;
														
@@ -5651,13 +5686,11 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg)
 
															 		mem_cgroup_oom_notify_cb(iter);
														
 
															 }
														
 
															-static int mem_cgroup_usage_register_event(struct cgroup_subsys_state *css,
														
 
															-	struct cftype *cft, struct eventfd_ctx *eventfd, const char *args)
														
 
															+static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
														
 
															+	struct eventfd_ctx *eventfd, const char *args, enum res_type type)
														
 
															 {
														
 
															-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
														
 
															 	struct mem_cgroup_thresholds *thresholds;
														
 
															 	struct mem_cgroup_threshold_ary *new;
														
 
															-	enum res_type type = MEMFILE_TYPE(cft->private);
														
 
															 	u64 threshold, usage;
														
 
															 	int i, size, ret;
														
@@ -5734,13 +5767,23 @@ unlock:
 
															 	return ret;
														
 
															 }
														
 
															-static void mem_cgroup_usage_unregister_event(struct cgroup_subsys_state *css,
														
 
															-	struct cftype *cft, struct eventfd_ctx *eventfd)
														
 
															+static int mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
														
 
															+	struct eventfd_ctx *eventfd, const char *args)
														
 
															+{
														
 
															+	return __mem_cgroup_usage_register_event(memcg, eventfd, args, _MEM);
														
 
															+}
														
 
															+
														
 
															+static int memsw_cgroup_usage_register_event(struct mem_cgroup *memcg,
														
 
															+	struct eventfd_ctx *eventfd, const char *args)
														
 
															+{
														
 
															+	return __mem_cgroup_usage_register_event(memcg, eventfd, args, _MEMSWAP);
														
 
															+}
														
 
															+
														
 
															+static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
														
 
															+	struct eventfd_ctx *eventfd, enum res_type type)
														
 
															 {
														
 
															-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
														
 
															 	struct mem_cgroup_thresholds *thresholds;
														
 
															 	struct mem_cgroup_threshold_ary *new;
														
 
															-	enum res_type type = MEMFILE_TYPE(cft->private);
														
 
															 	u64 usage;
														
 
															 	int i, j, size;
														
@@ -5813,14 +5856,23 @@ unlock:
 
															 	mutex_unlock(&memcg->thresholds_lock);
														
 
															 }
														
 
															-static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css,
														
 
															-	struct cftype *cft, struct eventfd_ctx *eventfd, const char *args)
														
 
															+static void mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
														
 
															+	struct eventfd_ctx *eventfd)
														
 
															+{
														
 
															+	return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEM);
														
 
															+}
														
 
															+
														
 
															+static void memsw_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
														
 
															+	struct eventfd_ctx *eventfd)
														
 
															+{
														
 
															+	return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEMSWAP);
														
 
															+}
														
 
															+
														
 
															+static int mem_cgroup_oom_register_event(struct mem_cgroup *memcg,
														
 
															+	struct eventfd_ctx *eventfd, const char *args)
														
 
															 {
														
 
															-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
														
 
															 	struct mem_cgroup_eventfd_list *event;
														
 
															-	enum res_type type = MEMFILE_TYPE(cft->private);
														
 
															-	BUG_ON(type != _OOM_TYPE);
														
 
															 	event = kmalloc(sizeof(*event),	GFP_KERNEL);
														
 
															 	if (!event)
														
 
															 		return -ENOMEM;
														
@@ -5838,14 +5890,10 @@ static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css,
 
															 	return 0;
														
 
															 }
														
 
															-static void mem_cgroup_oom_unregister_event(struct cgroup_subsys_state *css,
														
 
															-	struct cftype *cft, struct eventfd_ctx *eventfd)
														
 
															+static void mem_cgroup_oom_unregister_event(struct mem_cgroup *memcg,
														
 
															+	struct eventfd_ctx *eventfd)
														
 
															 {
														
 
															-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
														
 
															 	struct mem_cgroup_eventfd_list *ev, *tmp;
														
 
															-	enum res_type type = MEMFILE_TYPE(cft->private);
														
 
															-
														
 
															-	BUG_ON(type != _OOM_TYPE);
														
 
															 	spin_lock(&memcg_oom_lock);
														
@@ -5859,17 +5907,12 @@ static void mem_cgroup_oom_unregister_event(struct cgroup_subsys_state *css,
 
															 	spin_unlock(&memcg_oom_lock);
														
 
															 }
														
 
															-static int mem_cgroup_oom_control_read(struct cgroup_subsys_state *css,
														
 
															-	struct cftype *cft,  struct cgroup_map_cb *cb)
														
 
															+static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v)
														
 
															 {
														
 
															-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
														
 
															-
														
 
															-	cb->fill(cb, "oom_kill_disable", memcg->oom_kill_disable);
														
 
															+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(sf));
														
 
															-	if (atomic_read(&memcg->under_oom))
														
 
															-		cb->fill(cb, "under_oom", 1);
														
 
															-	else
														
 
															-		cb->fill(cb, "under_oom", 0);
														
 
															+	seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable);
														
 
															+	seq_printf(sf, "under_oom %d\n", (bool)atomic_read(&memcg->under_oom));
														
 
															 	return 0;
														
 
															 }
														
@@ -5962,41 +6005,261 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg)
 
															 }
														
 
															 #endif
														
 
															+/*
														
 
															+ * DO NOT USE IN NEW FILES.
														
 
															+ *
														
 
															+ * "cgroup.event_control" implementation.
														
 
															+ *
														
 
															+ * This is way over-engineered.  It tries to support fully configurable
														
 
															+ * events for each user.  Such level of flexibility is completely
														
 
															+ * unnecessary especially in the light of the planned unified hierarchy.
														
 
															+ *
														
 
															+ * Please deprecate this and replace with something simpler if at all
														
 
															+ * possible.
														
 
															+ */
														
 
															+
														
 
															+/*
														
 
															+ * Unregister event and free resources.
														
 
															+ *
														
 
															+ * Gets called from workqueue.
														
 
															+ */
														
 
															+static void memcg_event_remove(struct work_struct *work)
														
 
															+{
														
 
															+	struct mem_cgroup_event *event =
														
 
															+		container_of(work, struct mem_cgroup_event, remove);
														
 
															+	struct mem_cgroup *memcg = event->memcg;
														
 
															+
														
 
															+	remove_wait_queue(event->wqh, &event->wait);
														
 
															+
														
 
															+	event->unregister_event(memcg, event->eventfd);
														
 
															+
														
 
															+	/* Notify userspace the event is going away. */
														
 
															+	eventfd_signal(event->eventfd, 1);
														
 
															+
														
 
															+	eventfd_ctx_put(event->eventfd);
														
 
															+	kfree(event);
														
 
															+	css_put(&memcg->css);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Gets called on POLLHUP on eventfd when user closes it.
														
 
															+ *
														
 
															+ * Called with wqh->lock held and interrupts disabled.
														
 
															+ */
														
 
															+static int memcg_event_wake(wait_queue_t *wait, unsigned mode,
														
 
															+			    int sync, void *key)
														
 
															+{
														
 
															+	struct mem_cgroup_event *event =
														
 
															+		container_of(wait, struct mem_cgroup_event, wait);
														
 
															+	struct mem_cgroup *memcg = event->memcg;
														
 
															+	unsigned long flags = (unsigned long)key;
														
 
															+
														
 
															+	if (flags & POLLHUP) {
														
 
															+		/*
														
 
															+		 * If the event has been detached at cgroup removal, we
														
 
															+		 * can simply return knowing the other side will cleanup
														
 
															+		 * for us.
														
 
															+		 *
														
 
															+		 * We can't race against event freeing since the other
														
 
															+		 * side will require wqh->lock via remove_wait_queue(),
														
 
															+		 * which we hold.
														
 
															+		 */
														
 
															+		spin_lock(&memcg->event_list_lock);
														
 
															+		if (!list_empty(&event->list)) {
														
 
															+			list_del_init(&event->list);
														
 
															+			/*
														
 
															+			 * We are in atomic context, but cgroup_event_remove()
														
 
															+			 * may sleep, so we have to call it in workqueue.
														
 
															+			 */
														
 
															+			schedule_work(&event->remove);
														
 
															+		}
														
 
															+		spin_unlock(&memcg->event_list_lock);
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void memcg_event_ptable_queue_proc(struct file *file,
														
 
															+		wait_queue_head_t *wqh, poll_table *pt)
														
 
															+{
														
 
															+	struct mem_cgroup_event *event =
														
 
															+		container_of(pt, struct mem_cgroup_event, pt);
														
 
															+
														
 
															+	event->wqh = wqh;
														
 
															+	add_wait_queue(wqh, &event->wait);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * DO NOT USE IN NEW FILES.
														
 
															+ *
														
 
															+ * Parse input and register new cgroup event handler.
														
 
															+ *
														
 
															+ * Input must be in format '<event_fd> <control_fd> <args>'.
														
 
															+ * Interpretation of args is defined by control file implementation.
														
 
															+ */
														
 
															+static int memcg_write_event_control(struct cgroup_subsys_state *css,
														
 
															+				     struct cftype *cft, const char *buffer)
														
 
															+{
														
 
															+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
														
 
															+	struct mem_cgroup_event *event;
														
 
															+	struct cgroup_subsys_state *cfile_css;
														
 
															+	unsigned int efd, cfd;
														
 
															+	struct fd efile;
														
 
															+	struct fd cfile;
														
 
															+	const char *name;
														
 
															+	char *endp;
														
 
															+	int ret;
														
 
															+
														
 
															+	efd = simple_strtoul(buffer, &endp, 10);
														
 
															+	if (*endp != ' ')
														
 
															+		return -EINVAL;
														
 
															+	buffer = endp + 1;
														
 
															+
														
 
															+	cfd = simple_strtoul(buffer, &endp, 10);
														
 
															+	if ((*endp != ' ') && (*endp != '\0'))
														
 
															+		return -EINVAL;
														
 
															+	buffer = endp + 1;
														
 
															+
														
 
															+	event = kzalloc(sizeof(*event), GFP_KERNEL);
														
 
															+	if (!event)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	event->memcg = memcg;
														
 
															+	INIT_LIST_HEAD(&event->list);
														
 
															+	init_poll_funcptr(&event->pt, memcg_event_ptable_queue_proc);
														
 
															+	init_waitqueue_func_entry(&event->wait, memcg_event_wake);
														
 
															+	INIT_WORK(&event->remove, memcg_event_remove);
														
 
															+
														
 
															+	efile = fdget(efd);
														
 
															+	if (!efile.file) {
														
 
															+		ret = -EBADF;
														
 
															+		goto out_kfree;
														
 
															+	}
														
 
															+
														
 
															+	event->eventfd = eventfd_ctx_fileget(efile.file);
														
 
															+	if (IS_ERR(event->eventfd)) {
														
 
															+		ret = PTR_ERR(event->eventfd);
														
 
															+		goto out_put_efile;
														
 
															+	}
														
 
															+
														
 
															+	cfile = fdget(cfd);
														
 
															+	if (!cfile.file) {
														
 
															+		ret = -EBADF;
														
 
															+		goto out_put_eventfd;
														
 
															+	}
														
 
															+
														
 
															+	/* the process need read permission on control file */
														
 
															+	/* AV: shouldn't we check that it's been opened for read instead? */
														
 
															+	ret = inode_permission(file_inode(cfile.file), MAY_READ);
														
 
															+	if (ret < 0)
														
 
															+		goto out_put_cfile;
														
 
															+
														
 
															+	/*
														
 
															+	 * Determine the event callbacks and set them in @event.  This used
														
 
															+	 * to be done via struct cftype but cgroup core no longer knows
														
 
															+	 * about these events.  The following is crude but the whole thing
														
 
															+	 * is for compatibility anyway.
														
 
															+	 *
														
 
															+	 * DO NOT ADD NEW FILES.
														
 
															+	 */
														
 
															+	name = cfile.file->f_dentry->d_name.name;
														
 
															+
														
 
															+	if (!strcmp(name, "memory.usage_in_bytes")) {
														
 
															+		event->register_event = mem_cgroup_usage_register_event;
														
 
															+		event->unregister_event = mem_cgroup_usage_unregister_event;
														
 
															+	} else if (!strcmp(name, "memory.oom_control")) {
														
 
															+		event->register_event = mem_cgroup_oom_register_event;
														
 
															+		event->unregister_event = mem_cgroup_oom_unregister_event;
														
 
															+	} else if (!strcmp(name, "memory.pressure_level")) {
														
 
															+		event->register_event = vmpressure_register_event;
														
 
															+		event->unregister_event = vmpressure_unregister_event;
														
 
															+	} else if (!strcmp(name, "memory.memsw.usage_in_bytes")) {
														
 
															+		event->register_event = memsw_cgroup_usage_register_event;
														
 
															+		event->unregister_event = memsw_cgroup_usage_unregister_event;
														
 
															+	} else {
														
 
															+		ret = -EINVAL;
														
 
															+		goto out_put_cfile;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * Verify @cfile should belong to @css.  Also, remaining events are
														
 
															+	 * automatically removed on cgroup destruction but the removal is
														
 
															+	 * asynchronous, so take an extra ref on @css.
														
 
															+	 */
														
 
															+	rcu_read_lock();
														
 
															+
														
 
															+	ret = -EINVAL;
														
 
															+	cfile_css = css_from_dir(cfile.file->f_dentry->d_parent,
														
 
															+				 &mem_cgroup_subsys);
														
 
															+	if (cfile_css == css && css_tryget(css))
														
 
															+		ret = 0;
														
 
															+
														
 
															+	rcu_read_unlock();
														
 
															+	if (ret)
														
 
															+		goto out_put_cfile;
														
 
															+
														
 
															+	ret = event->register_event(memcg, event->eventfd, buffer);
														
 
															+	if (ret)
														
 
															+		goto out_put_css;
														
 
															+
														
 
															+	efile.file->f_op->poll(efile.file, &event->pt);
														
 
															+
														
 
															+	spin_lock(&memcg->event_list_lock);
														
 
															+	list_add(&event->list, &memcg->event_list);
														
 
															+	spin_unlock(&memcg->event_list_lock);
														
 
															+
														
 
															+	fdput(cfile);
														
 
															+	fdput(efile);
														
 
															+
														
 
															+	return 0;
														
 
															+
														
 
															+out_put_css:
														
 
															+	css_put(css);
														
 
															+out_put_cfile:
														
 
															+	fdput(cfile);
														
 
															+out_put_eventfd:
														
 
															+	eventfd_ctx_put(event->eventfd);
														
 
															+out_put_efile:
														
 
															+	fdput(efile);
														
 
															+out_kfree:
														
 
															+	kfree(event);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															 static struct cftype mem_cgroup_files[] = {
														
 
															 	{
														
 
															 		.name = "usage_in_bytes",
														
 
															 		.private = MEMFILE_PRIVATE(_MEM, RES_USAGE),
														
 
															-		.read = mem_cgroup_read,
														
 
															-		.register_event = mem_cgroup_usage_register_event,
														
 
															-		.unregister_event = mem_cgroup_usage_unregister_event,
														
 
															+		.read_u64 = mem_cgroup_read_u64,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "max_usage_in_bytes",
														
 
															 		.private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE),
														
 
															 		.trigger = mem_cgroup_reset,
														
 
															-		.read = mem_cgroup_read,
														
 
															+		.read_u64 = mem_cgroup_read_u64,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "limit_in_bytes",
														
 
															 		.private = MEMFILE_PRIVATE(_MEM, RES_LIMIT),
														
 
															 		.write_string = mem_cgroup_write,
														
 
															-		.read = mem_cgroup_read,
														
 
															+		.read_u64 = mem_cgroup_read_u64,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "soft_limit_in_bytes",
														
 
															 		.private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),
														
 
															 		.write_string = mem_cgroup_write,
														
 
															-		.read = mem_cgroup_read,
														
 
															+		.read_u64 = mem_cgroup_read_u64,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "failcnt",
														
 
															 		.private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT),
														
 
															 		.trigger = mem_cgroup_reset,
														
 
															-		.read = mem_cgroup_read,
														
 
															+		.read_u64 = mem_cgroup_read_u64,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "stat",
														
 
															-		.read_seq_string = memcg_stat_show,
														
 
															+		.seq_show = memcg_stat_show,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "force_empty",
														
@@ -6008,6 +6271,12 @@ static struct cftype mem_cgroup_files[] = {
 
															 		.write_u64 = mem_cgroup_hierarchy_write,
														
 
															 		.read_u64 = mem_cgroup_hierarchy_read,
														
 
															 	},
														
 
															+	{
														
 
															+		.name = "cgroup.event_control",		/* XXX: for compat */
														
 
															+		.write_string = memcg_write_event_control,
														
 
															+		.flags = CFTYPE_NO_PREFIX,
														
 
															+		.mode = S_IWUGO,
														
 
															+	},
														
 
															 	{
														
 
															 		.name = "swappiness",
														
 
															 		.read_u64 = mem_cgroup_swappiness_read,
														
@@ -6020,21 +6289,17 @@ static struct cftype mem_cgroup_files[] = {
 
															 	},
														
 
															 	{
														
 
															 		.name = "oom_control",
														
 
															-		.read_map = mem_cgroup_oom_control_read,
														
 
															+		.seq_show = mem_cgroup_oom_control_read,
														
 
															 		.write_u64 = mem_cgroup_oom_control_write,
														
 
															-		.register_event = mem_cgroup_oom_register_event,
														
 
															-		.unregister_event = mem_cgroup_oom_unregister_event,
														
 
															 		.private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "pressure_level",
														
 
															-		.register_event = vmpressure_register_event,
														
 
															-		.unregister_event = vmpressure_unregister_event,
														
 
															 	},
														
 
															 #ifdef CONFIG_NUMA
														
 
															 	{
														
 
															 		.name = "numa_stat",
														
 
															-		.read_seq_string = memcg_numa_stat_show,
														
 
															+		.seq_show = memcg_numa_stat_show,
														
 
															 	},
														
 
															 #endif
														
 
															 #ifdef CONFIG_MEMCG_KMEM
														
@@ -6042,29 +6307,29 @@ static struct cftype mem_cgroup_files[] = {
 
															 		.name = "kmem.limit_in_bytes",
														
 
															 		.private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT),
														
 
															 		.write_string = mem_cgroup_write,
														
 
															-		.read = mem_cgroup_read,
														
 
															+		.read_u64 = mem_cgroup_read_u64,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "kmem.usage_in_bytes",
														
 
															 		.private = MEMFILE_PRIVATE(_KMEM, RES_USAGE),
														
 
															-		.read = mem_cgroup_read,
														
 
															+		.read_u64 = mem_cgroup_read_u64,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "kmem.failcnt",
														
 
															 		.private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT),
														
 
															 		.trigger = mem_cgroup_reset,
														
 
															-		.read = mem_cgroup_read,
														
 
															+		.read_u64 = mem_cgroup_read_u64,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "kmem.max_usage_in_bytes",
														
 
															 		.private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE),
														
 
															 		.trigger = mem_cgroup_reset,
														
 
															-		.read = mem_cgroup_read,
														
 
															+		.read_u64 = mem_cgroup_read_u64,
														
 
															 	},
														
 
															 #ifdef CONFIG_SLABINFO
														
 
															 	{
														
 
															 		.name = "kmem.slabinfo",
														
 
															-		.read_seq_string = mem_cgroup_slabinfo_read,
														
 
															+		.seq_show = mem_cgroup_slabinfo_read,
														
 
															 	},
														
 
															 #endif
														
 
															 #endif
														
@@ -6076,27 +6341,25 @@ static struct cftype memsw_cgroup_files[] = {
 
															 	{
														
 
															 		.name = "memsw.usage_in_bytes",
														
 
															 		.private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE),
														
 
															-		.read = mem_cgroup_read,
														
 
															-		.register_event = mem_cgroup_usage_register_event,
														
 
															-		.unregister_event = mem_cgroup_usage_unregister_event,
														
 
															+		.read_u64 = mem_cgroup_read_u64,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "memsw.max_usage_in_bytes",
														
 
															 		.private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE),
														
 
															 		.trigger = mem_cgroup_reset,
														
 
															-		.read = mem_cgroup_read,
														
 
															+		.read_u64 = mem_cgroup_read_u64,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "memsw.limit_in_bytes",
														
 
															 		.private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT),
														
 
															 		.write_string = mem_cgroup_write,
														
 
															-		.read = mem_cgroup_read,
														
 
															+		.read_u64 = mem_cgroup_read_u64,
														
 
															 	},
														
 
															 	{
														
 
															 		.name = "memsw.failcnt",
														
 
															 		.private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT),
														
 
															 		.trigger = mem_cgroup_reset,
														
 
															-		.read = mem_cgroup_read,
														
 
															+		.read_u64 = mem_cgroup_read_u64,
														
 
															 	},
														
 
															 	{ },	/* terminate */
														
 
															 };
														
@@ -6268,6 +6531,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 
															 	mutex_init(&memcg->thresholds_lock);
														
 
															 	spin_lock_init(&memcg->move_lock);
														
 
															 	vmpressure_init(&memcg->vmpressure);
														
 
															+	INIT_LIST_HEAD(&memcg->event_list);
														
 
															+	spin_lock_init(&memcg->event_list_lock);
														
 
															 	return &memcg->css;
														
@@ -6343,6 +6608,19 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg)
 
															 static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
														
 
															 {
														
 
															 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
														
 
															+	struct mem_cgroup_event *event, *tmp;
														
 
															+
														
 
															+	/*
														
 
															+	 * Unregister events and notify userspace.
														
 
															+	 * Notify userspace about cgroup removing only after rmdir of cgroup
														
 
															+	 * directory to avoid race between userspace and kernelspace.
														
 
															+	 */
														
 
															+	spin_lock(&memcg->event_list_lock);
														
 
															+	list_for_each_entry_safe(event, tmp, &memcg->event_list, list) {
														
 
															+		list_del_init(&event->list);
														
 
															+		schedule_work(&event->remove);
														
 
															+	}
														
 
															+	spin_unlock(&memcg->event_list_lock);
														
 
															 	kmem_cgroup_css_offline(memcg);
														
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -451,7 +451,7 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
 
															  * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry
														
 
															  * @ent: swap entry to be looked up.
														
 
															  *
														
 
															- * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
														
 
															+ * Returns ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
														
 
															  */
														
 
															 unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
														
 
															 {
														
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -278,8 +278,7 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
 
															 /**
														
 
															  * vmpressure_register_event() - Bind vmpressure notifications to an eventfd
														
 
															- * @css:	css that is interested in vmpressure notifications
														
 
															- * @cft:	cgroup control files handle
														
 
															+ * @memcg:	memcg that is interested in vmpressure notifications
														
 
															  * @eventfd:	eventfd context to link notifications with
														
 
															  * @args:	event arguments (used to set up a pressure level threshold)
														
 
															  *
														
@@ -289,15 +288,12 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
 
															  * threshold (one of vmpressure_str_levels, i.e. "low", "medium", or
														
 
															  * "critical").
														
 
															  *
														
 
															- * This function should not be used directly, just pass it to (struct
														
 
															- * cftype).register_event, and then cgroup core will handle everything by
														
 
															- * itself.
														
 
															+ * To be used as memcg event method.
														
 
															  */
														
 
															-int vmpressure_register_event(struct cgroup_subsys_state *css,
														
 
															-			      struct cftype *cft, struct eventfd_ctx *eventfd,
														
 
															-			      const char *args)
														
 
															+int vmpressure_register_event(struct mem_cgroup *memcg,
														
 
															+			      struct eventfd_ctx *eventfd, const char *args)
														
 
															 {
														
 
															-	struct vmpressure *vmpr = css_to_vmpressure(css);
														
 
															+	struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
														
 
															 	struct vmpressure_event *ev;
														
 
															 	int level;
														
@@ -325,23 +321,19 @@ int vmpressure_register_event(struct cgroup_subsys_state *css,
 
															 /**
														
 
															  * vmpressure_unregister_event() - Unbind eventfd from vmpressure
														
 
															- * @css:	css handle
														
 
															- * @cft:	cgroup control files handle
														
 
															+ * @memcg:	memcg handle
														
 
															  * @eventfd:	eventfd context that was used to link vmpressure with the @cg
														
 
															  *
														
 
															  * This function does internal manipulations to detach the @eventfd from
														
 
															  * the vmpressure notifications, and then frees internal resources
														
 
															  * associated with the @eventfd (but the @eventfd itself is not freed).
														
 
															  *
														
 
															- * This function should not be used directly, just pass it to (struct
														
 
															- * cftype).unregister_event, and then cgroup core will handle everything
														
 
															- * by itself.
														
 
															+ * To be used as memcg event method.
														
 
															  */
														
 
															-void vmpressure_unregister_event(struct cgroup_subsys_state *css,
														
 
															-				 struct cftype *cft,
														
 
															+void vmpressure_unregister_event(struct mem_cgroup *memcg,
														
 
															 				 struct eventfd_ctx *eventfd)
														
 
															 {
														
 
															-	struct vmpressure *vmpr = css_to_vmpressure(css);
														
 
															+	struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
														
 
															 	struct vmpressure_event *ev;
														
 
															 	mutex_lock(&vmpr->events_lock);
														
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -173,14 +173,14 @@ static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft)
 
															 	return css->cgroup->id;
														
 
															 }
														
 
															-static int read_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
														
 
															-			struct cgroup_map_cb *cb)
														
 
															+static int read_priomap(struct seq_file *sf, void *v)
														
 
															 {
														
 
															 	struct net_device *dev;
														
 
															 	rcu_read_lock();
														
 
															 	for_each_netdev_rcu(&init_net, dev)
														
 
															-		cb->fill(cb, dev->name, netprio_prio(css, dev));
														
 
															+		seq_printf(sf, "%s %u\n", dev->name,
														
 
															+			   netprio_prio(seq_css(sf), dev));
														
 
															 	rcu_read_unlock();
														
 
															 	return 0;
														
 
															 }
														
@@ -238,7 +238,7 @@ static struct cftype ss_files[] = {
 
															 	},
														
 
															 	{
														
 
															 		.name = "ifpriomap",
														
 
															-		.read_map = read_priomap,
														
 
															+		.seq_show = read_priomap,
														
 
															 		.write_string = write_priomap,
														
 
															 	},
														
 
															 	{ }	/* terminate */
														
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -274,10 +274,9 @@ static void set_majmin(char *str, unsigned m)
 
															 		sprintf(str, "%u", m);
														
 
															 }
														
 
															-static int devcgroup_seq_read(struct cgroup_subsys_state *css,
														
 
															-			      struct cftype *cft, struct seq_file *m)
														
 
															+static int devcgroup_seq_show(struct seq_file *m, void *v)
														
 
															 {
														
 
															-	struct dev_cgroup *devcgroup = css_to_devcgroup(css);
														
 
															+	struct dev_cgroup *devcgroup = css_to_devcgroup(seq_css(m));
														
 
															 	struct dev_exception_item *ex;
														
 
															 	char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
														
@@ -679,7 +678,7 @@ static struct cftype dev_cgroup_files[] = {
 
															 	},
														
 
															 	{
														
 
															 		.name = "list",
														
 
															-		.read_seq_string = devcgroup_seq_read,
														
 
															+		.seq_show = devcgroup_seq_show,
														
 
															 		.private = DEVCG_LIST,
														
 
															 	},
														
 
															 	{ }	/* terminate */