10 سال پیش · 33398cf2f3
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -23,6 +23,11 @@
 
				 #include <linux/vm_event_item.h>
			
 
				 #include <linux/hardirq.h>
			
 
				 #include <linux/jump_label.h>
			
 
				+#include <linux/page_counter.h>
			
 
				+#include <linux/vmpressure.h>
			
 
				+#include <linux/eventfd.h>
			
 
				+#include <linux/mmzone.h>
			
 
				+#include <linux/writeback.h>
			
 
				 
			
 
				 struct mem_cgroup;
			
 
				 struct page;
			
@@ -67,12 +72,221 @@ enum mem_cgroup_events_index {
 
				 	MEMCG_NR_EVENTS,
			
 
				 };
			
 
				 
			
 
				+/*
			
 
				+ * Per memcg event counter is incremented at every pagein/pageout. With THP,
			
 
				+ * it will be incremated by the number of pages. This counter is used for
			
 
				+ * for trigger some periodic events. This is straightforward and better
			
 
				+ * than using jiffies etc. to handle periodic memcg event.
			
 
				+ */
			
 
				+enum mem_cgroup_events_target {
			
 
				+	MEM_CGROUP_TARGET_THRESH,
			
 
				+	MEM_CGROUP_TARGET_SOFTLIMIT,
			
 
				+	MEM_CGROUP_TARGET_NUMAINFO,
			
 
				+	MEM_CGROUP_NTARGETS,
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Bits in struct cg_proto.flags
			
 
				+ */
			
 
				+enum cg_proto_flags {
			
 
				+	/* Currently active and new sockets should be assigned to cgroups */
			
 
				+	MEMCG_SOCK_ACTIVE,
			
 
				+	/* It was ever activated; we must disarm static keys on destruction */
			
 
				+	MEMCG_SOCK_ACTIVATED,
			
 
				+};
			
 
				+
			
 
				+struct cg_proto {
			
 
				+	struct page_counter	memory_allocated;	/* Current allocated memory. */
			
 
				+	struct percpu_counter	sockets_allocated;	/* Current number of sockets. */
			
 
				+	int			memory_pressure;
			
 
				+	long			sysctl_mem[3];
			
 
				+	unsigned long		flags;
			
 
				+	/*
			
 
				+	 * memcg field is used to find which memcg we belong directly
			
 
				+	 * Each memcg struct can hold more than one cg_proto, so container_of
			
 
				+	 * won't really cut.
			
 
				+	 *
			
 
				+	 * The elegant solution would be having an inverse function to
			
 
				+	 * proto_cgroup in struct proto, but that means polluting the structure
			
 
				+	 * for everybody, instead of just for memcg users.
			
 
				+	 */
			
 
				+	struct mem_cgroup	*memcg;
			
 
				+};
			
 
				+
			
 
				 #ifdef CONFIG_MEMCG
			
 
				+struct mem_cgroup_stat_cpu {
			
 
				+	long count[MEM_CGROUP_STAT_NSTATS];
			
 
				+	unsigned long events[MEMCG_NR_EVENTS];
			
 
				+	unsigned long nr_page_events;
			
 
				+	unsigned long targets[MEM_CGROUP_NTARGETS];
			
 
				+};
			
 
				+
			
 
				+struct mem_cgroup_reclaim_iter {
			
 
				+	struct mem_cgroup *position;
			
 
				+	/* scan generation, increased every round-trip */
			
 
				+	unsigned int generation;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * per-zone information in memory controller.
			
 
				+ */
			
 
				+struct mem_cgroup_per_zone {
			
 
				+	struct lruvec		lruvec;
			
 
				+	unsigned long		lru_size[NR_LRU_LISTS];
			
 
				+
			
 
				+	struct mem_cgroup_reclaim_iter	iter[DEF_PRIORITY + 1];
			
 
				+
			
 
				+	struct rb_node		tree_node;	/* RB tree node */
			
 
				+	unsigned long		usage_in_excess;/* Set to the value by which */
			
 
				+						/* the soft limit is exceeded*/
			
 
				+	bool			on_tree;
			
 
				+	struct mem_cgroup	*memcg;		/* Back pointer, we cannot */
			
 
				+						/* use container_of	   */
			
 
				+};
			
 
				+
			
 
				+struct mem_cgroup_per_node {
			
 
				+	struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
			
 
				+};
			
 
				+
			
 
				+struct mem_cgroup_threshold {
			
 
				+	struct eventfd_ctx *eventfd;
			
 
				+	unsigned long threshold;
			
 
				+};
			
 
				+
			
 
				+/* For threshold */
			
 
				+struct mem_cgroup_threshold_ary {
			
 
				+	/* An array index points to threshold just below or equal to usage. */
			
 
				+	int current_threshold;
			
 
				+	/* Size of entries[] */
			
 
				+	unsigned int size;
			
 
				+	/* Array of thresholds */
			
 
				+	struct mem_cgroup_threshold entries[0];
			
 
				+};
			
 
				+
			
 
				+struct mem_cgroup_thresholds {
			
 
				+	/* Primary thresholds array */
			
 
				+	struct mem_cgroup_threshold_ary *primary;
			
 
				+	/*
			
 
				+	 * Spare threshold array.
			
 
				+	 * This is needed to make mem_cgroup_unregister_event() "never fail".
			
 
				+	 * It must be able to store at least primary->size - 1 entries.
			
 
				+	 */
			
 
				+	struct mem_cgroup_threshold_ary *spare;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * The memory controller data structure. The memory controller controls both
			
 
				+ * page cache and RSS per cgroup. We would eventually like to provide
			
 
				+ * statistics based on the statistics developed by Rik Van Riel for clock-pro,
			
 
				+ * to help the administrator determine what knobs to tune.
			
 
				+ */
			
 
				+struct mem_cgroup {
			
 
				+	struct cgroup_subsys_state css;
			
 
				+
			
 
				+	/* Accounted resources */
			
 
				+	struct page_counter memory;
			
 
				+	struct page_counter memsw;
			
 
				+	struct page_counter kmem;
			
 
				+
			
 
				+	/* Normal memory consumption range */
			
 
				+	unsigned long low;
			
 
				+	unsigned long high;
			
 
				+
			
 
				+	unsigned long soft_limit;
			
 
				+
			
 
				+	/* vmpressure notifications */
			
 
				+	struct vmpressure vmpressure;
			
 
				+
			
 
				+	/* css_online() has been completed */
			
 
				+	int initialized;
			
 
				+
			
 
				+	/*
			
 
				+	 * Should the accounting and control be hierarchical, per subtree?
			
 
				+	 */
			
 
				+	bool use_hierarchy;
			
 
				+
			
 
				+	/* protected by memcg_oom_lock */
			
 
				+	bool		oom_lock;
			
 
				+	int		under_oom;
			
 
				+
			
 
				+	int	swappiness;
			
 
				+	/* OOM-Killer disable */
			
 
				+	int		oom_kill_disable;
			
 
				+
			
 
				+	/* protect arrays of thresholds */
			
 
				+	struct mutex thresholds_lock;
			
 
				+
			
 
				+	/* thresholds for memory usage. RCU-protected */
			
 
				+	struct mem_cgroup_thresholds thresholds;
			
 
				+
			
 
				+	/* thresholds for mem+swap usage. RCU-protected */
			
 
				+	struct mem_cgroup_thresholds memsw_thresholds;
			
 
				+
			
 
				+	/* For oom notifier event fd */
			
 
				+	struct list_head oom_notify;
			
 
				+
			
 
				+	/*
			
 
				+	 * Should we move charges of a task when a task is moved into this
			
 
				+	 * mem_cgroup ? And what type of charges should we move ?
			
 
				+	 */
			
 
				+	unsigned long move_charge_at_immigrate;
			
 
				+	/*
			
 
				+	 * set > 0 if pages under this cgroup are moving to other cgroup.
			
 
				+	 */
			
 
				+	atomic_t		moving_account;
			
 
				+	/* taken only while moving_account > 0 */
			
 
				+	spinlock_t		move_lock;
			
 
				+	struct task_struct	*move_lock_task;
			
 
				+	unsigned long		move_lock_flags;
			
 
				+	/*
			
 
				+	 * percpu counter.
			
 
				+	 */
			
 
				+	struct mem_cgroup_stat_cpu __percpu *stat;
			
 
				+	spinlock_t pcp_counter_lock;
			
 
				+
			
 
				+#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
			
 
				+	struct cg_proto tcp_mem;
			
 
				+#endif
			
 
				+#if defined(CONFIG_MEMCG_KMEM)
			
 
				+        /* Index in the kmem_cache->memcg_params.memcg_caches array */
			
 
				+	int kmemcg_id;
			
 
				+	bool kmem_acct_activated;
			
 
				+	bool kmem_acct_active;
			
 
				+#endif
			
 
				+
			
 
				+	int last_scanned_node;
			
 
				+#if MAX_NUMNODES > 1
			
 
				+	nodemask_t	scan_nodes;
			
 
				+	atomic_t	numainfo_events;
			
 
				+	atomic_t	numainfo_updating;
			
 
				+#endif
			
 
				+
			
 
				+#ifdef CONFIG_CGROUP_WRITEBACK
			
 
				+	struct list_head cgwb_list;
			
 
				+	struct wb_domain cgwb_domain;
			
 
				+#endif
			
 
				+
			
 
				+	/* List of events which userspace want to receive */
			
 
				+	struct list_head event_list;
			
 
				+	spinlock_t event_list_lock;
			
 
				+
			
 
				+	struct mem_cgroup_per_node *nodeinfo[0];
			
 
				+	/* WARNING: nodeinfo must be the last member here */
			
 
				+};
			
 
				 extern struct cgroup_subsys_state *mem_cgroup_root_css;
			
 
				 
			
 
				-void mem_cgroup_events(struct mem_cgroup *memcg,
			
 
				+/**
			
 
				+ * mem_cgroup_events - count memory events against a cgroup
			
 
				+ * @memcg: the memory cgroup
			
 
				+ * @idx: the event index
			
 
				+ * @nr: the number of events to account for
			
 
				+ */
			
 
				+static inline void mem_cgroup_events(struct mem_cgroup *memcg,
			
 
				 		       enum mem_cgroup_events_index idx,
			
 
				-		       unsigned int nr);
			
 
				+		       unsigned int nr)
			
 
				+{
			
 
				+	this_cpu_add(memcg->stat->events[idx], nr);
			
 
				+}
			
 
				 
			
 
				 bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg);
			
 
				 
			
@@ -90,15 +304,31 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
 
				 struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
			
 
				 struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
			
 
				 
			
 
				-bool mem_cgroup_is_descendant(struct mem_cgroup *memcg,
			
 
				-			      struct mem_cgroup *root);
			
 
				 bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
			
 
				 
			
 
				 extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
			
 
				 extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
			
 
				 
			
 
				 extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
			
 
				-extern struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css);
			
 
				+static inline
			
 
				+struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
			
 
				+	return css ? container_of(css, struct mem_cgroup, css) : NULL;
			
 
				+}
			
 
				+
			
 
				+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
			
 
				+				   struct mem_cgroup *,
			
 
				+				   struct mem_cgroup_reclaim_cookie *);
			
 
				+void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
			
 
				+
			
 
				+static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg,
			
 
				+			      struct mem_cgroup *root)
			
 
				+{
			
 
				+	if (root == memcg)
			
 
				+		return true;
			
 
				+	if (!root->use_hierarchy)
			
 
				+		return false;
			
 
				+	return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
			
 
				+}
			
 
				 
			
 
				 static inline bool mm_match_cgroup(struct mm_struct *mm,
			
 
				 				   struct mem_cgroup *memcg)
			
@@ -114,22 +344,65 @@ static inline bool mm_match_cgroup(struct mm_struct *mm,
 
				 	return match;
			
 
				 }
			
 
				 
			
 
				-extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
			
 
				 extern struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page);
			
 
				 
			
 
				-struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
			
 
				-				   struct mem_cgroup *,
			
 
				-				   struct mem_cgroup_reclaim_cookie *);
			
 
				-void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
			
 
				+static inline bool mem_cgroup_disabled(void)
			
 
				+{
			
 
				+	if (memory_cgrp_subsys.disabled)
			
 
				+		return true;
			
 
				+	return false;
			
 
				+}
			
 
				 
			
 
				 /*
			
 
				  * For memory reclaim.
			
 
				  */
			
 
				-int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec);
			
 
				-bool mem_cgroup_lruvec_online(struct lruvec *lruvec);
			
 
				 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
			
 
				-unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list);
			
 
				-void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int);
			
 
				+
			
 
				+void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
			
 
				+		int nr_pages);
			
 
				+
			
 
				+static inline bool mem_cgroup_lruvec_online(struct lruvec *lruvec)
			
 
				+{
			
 
				+	struct mem_cgroup_per_zone *mz;
			
 
				+	struct mem_cgroup *memcg;
			
 
				+
			
 
				+	if (mem_cgroup_disabled())
			
 
				+		return true;
			
 
				+
			
 
				+	mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
			
 
				+	memcg = mz->memcg;
			
 
				+
			
 
				+	return !!(memcg->css.flags & CSS_ONLINE);
			
 
				+}
			
 
				+
			
 
				+static inline
			
 
				+unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
			
 
				+{
			
 
				+	struct mem_cgroup_per_zone *mz;
			
 
				+
			
 
				+	mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
			
 
				+	return mz->lru_size[lru];
			
 
				+}
			
 
				+
			
 
				+static inline int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
			
 
				+{
			
 
				+	unsigned long inactive_ratio;
			
 
				+	unsigned long inactive;
			
 
				+	unsigned long active;
			
 
				+	unsigned long gb;
			
 
				+
			
 
				+	inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_ANON);
			
 
				+	active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_ANON);
			
 
				+
			
 
				+	gb = (inactive + active) >> (30 - PAGE_SHIFT);
			
 
				+	if (gb)
			
 
				+		inactive_ratio = int_sqrt(10 * gb);
			
 
				+	else
			
 
				+		inactive_ratio = 1;
			
 
				+
			
 
				+	return inactive * inactive_ratio < active;
			
 
				+}
			
 
				+
			
 
				 extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
			
 
				 					struct task_struct *p);
			
 
				 
			
@@ -156,18 +429,26 @@ bool mem_cgroup_oom_synchronize(bool wait);
 
				 extern int do_swap_account;
			
 
				 #endif
			
 
				 
			
 
				-static inline bool mem_cgroup_disabled(void)
			
 
				-{
			
 
				-	if (memory_cgrp_subsys.disabled)
			
 
				-		return true;
			
 
				-	return false;
			
 
				-}
			
 
				-
			
 
				 struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page);
			
 
				-void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
			
 
				-				 enum mem_cgroup_stat_index idx, int val);
			
 
				 void mem_cgroup_end_page_stat(struct mem_cgroup *memcg);
			
 
				 
			
 
				+/**
			
 
				+ * mem_cgroup_update_page_stat - update page state statistics
			
 
				+ * @memcg: memcg to account against
			
 
				+ * @idx: page state item to account
			
 
				+ * @val: number of pages (positive or negative)
			
 
				+ *
			
 
				+ * See mem_cgroup_begin_page_stat() for locking requirements.
			
 
				+ */
			
 
				+static inline void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
			
 
				+				 enum mem_cgroup_stat_index idx, int val)
			
 
				+{
			
 
				+	VM_BUG_ON(!rcu_read_lock_held());
			
 
				+
			
 
				+	if (memcg)
			
 
				+		this_cpu_add(memcg->stat->count[idx], val);
			
 
				+}
			
 
				+
			
 
				 static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg,
			
 
				 					    enum mem_cgroup_stat_index idx)
			
 
				 {
			
@@ -184,13 +465,31 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 
				 						gfp_t gfp_mask,
			
 
				 						unsigned long *total_scanned);
			
 
				 
			
 
				-void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
			
 
				 static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
			
 
				 					     enum vm_event_item idx)
			
 
				 {
			
 
				+	struct mem_cgroup *memcg;
			
 
				+
			
 
				 	if (mem_cgroup_disabled())
			
 
				 		return;
			
 
				-	__mem_cgroup_count_vm_event(mm, idx);
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
			
 
				+	if (unlikely(!memcg))
			
 
				+		goto out;
			
 
				+
			
 
				+	switch (idx) {
			
 
				+	case PGFAULT:
			
 
				+		this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT]);
			
 
				+		break;
			
 
				+	case PGMAJFAULT:
			
 
				+		this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT]);
			
 
				+		break;
			
 
				+	default:
			
 
				+		BUG();
			
 
				+	}
			
 
				+out:
			
 
				+	rcu_read_unlock();
			
 
				 }
			
 
				 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
			
 
				 void mem_cgroup_split_huge_fixup(struct page *head);
			
@@ -275,12 +574,6 @@ static inline bool task_in_mem_cgroup(struct task_struct *task,
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				-static inline struct cgroup_subsys_state
			
 
				-		*mem_cgroup_css(struct mem_cgroup *memcg)
			
 
				-{
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				 static inline struct mem_cgroup *
			
 
				 mem_cgroup_iter(struct mem_cgroup *root,
			
 
				 		struct mem_cgroup *prev,
			
@@ -444,7 +737,10 @@ static inline bool memcg_kmem_enabled(void)
 
				 	return static_key_false(&memcg_kmem_enabled_key);
			
 
				 }
			
 
				 
			
 
				-bool memcg_kmem_is_active(struct mem_cgroup *memcg);
			
 
				+static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg)
			
 
				+{
			
 
				+	return memcg->kmem_acct_active;
			
 
				+}
			
 
				 
			
 
				 /*
			
 
				  * In general, we'll do everything in our power to not incur in any overhead
			
@@ -463,7 +759,15 @@ void __memcg_kmem_commit_charge(struct page *page,
 
				 				       struct mem_cgroup *memcg, int order);
			
 
				 void __memcg_kmem_uncharge_pages(struct page *page, int order);
			
 
				 
			
 
				-int memcg_cache_id(struct mem_cgroup *memcg);
			
 
				+/*
			
 
				+ * helper for acessing a memcg's index. It will be used as an index in the
			
 
				+ * child cache array in kmem_cache, and also to derive its name. This function
			
 
				+ * will return -1 when this is not a kmem-limited memcg.
			
 
				+ */
			
 
				+static inline int memcg_cache_id(struct mem_cgroup *memcg)
			
 
				+{
			
 
				+	return memcg ? memcg->kmemcg_id : -1;
			
 
				+}
			
 
				 
			
 
				 struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep);
			
 
				 void __memcg_kmem_put_cache(struct kmem_cache *cachep);
			
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -351,7 +351,15 @@ extern void check_move_unevictable_pages(struct page **, int nr_pages);
 
				 extern int kswapd_run(int nid);
			
 
				 extern void kswapd_stop(int nid);
			
 
				 #ifdef CONFIG_MEMCG
			
 
				-extern int mem_cgroup_swappiness(struct mem_cgroup *mem);
			
 
				+static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)
			
 
				+{
			
 
				+	/* root ? */
			
 
				+	if (mem_cgroup_disabled() || !memcg->css.parent)
			
 
				+		return vm_swappiness;
			
 
				+
			
 
				+	return memcg->swappiness;
			
 
				+}
			
 
				+
			
 
				 #else
			
 
				 static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
			
 
				 {
			
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1042,34 +1042,6 @@ struct proto {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				-/*
			
 
				- * Bits in struct cg_proto.flags
			
 
				- */
			
 
				-enum cg_proto_flags {
			
 
				-	/* Currently active and new sockets should be assigned to cgroups */
			
 
				-	MEMCG_SOCK_ACTIVE,
			
 
				-	/* It was ever activated; we must disarm static keys on destruction */
			
 
				-	MEMCG_SOCK_ACTIVATED,
			
 
				-};
			
 
				-
			
 
				-struct cg_proto {
			
 
				-	struct page_counter	memory_allocated;	/* Current allocated memory. */
			
 
				-	struct percpu_counter	sockets_allocated;	/* Current number of sockets. */
			
 
				-	int			memory_pressure;
			
 
				-	long			sysctl_mem[3];
			
 
				-	unsigned long		flags;
			
 
				-	/*
			
 
				-	 * memcg field is used to find which memcg we belong directly
			
 
				-	 * Each memcg struct can hold more than one cg_proto, so container_of
			
 
				-	 * won't really cut.
			
 
				-	 *
			
 
				-	 * The elegant solution would be having an inverse function to
			
 
				-	 * proto_cgroup in struct proto, but that means polluting the structure
			
 
				-	 * for everybody, instead of just for memcg users.
			
 
				-	 */
			
 
				-	struct mem_cgroup	*memcg;
			
 
				-};
			
 
				-
			
 
				 int proto_register(struct proto *prot, int alloc_slab);
			
 
				 void proto_unregister(struct proto *prot);
			
 
				 
			
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -111,56 +111,10 @@ static const char * const mem_cgroup_lru_names[] = {
 
				 	"unevictable",
			
 
				 };
			
 
				 
			
 
				-/*
			
 
				- * Per memcg event counter is incremented at every pagein/pageout. With THP,
			
 
				- * it will be incremated by the number of pages. This counter is used for
			
 
				- * for trigger some periodic events. This is straightforward and better
			
 
				- * than using jiffies etc. to handle periodic memcg event.
			
 
				- */
			
 
				-enum mem_cgroup_events_target {
			
 
				-	MEM_CGROUP_TARGET_THRESH,
			
 
				-	MEM_CGROUP_TARGET_SOFTLIMIT,
			
 
				-	MEM_CGROUP_TARGET_NUMAINFO,
			
 
				-	MEM_CGROUP_NTARGETS,
			
 
				-};
			
 
				 #define THRESHOLDS_EVENTS_TARGET 128
			
 
				 #define SOFTLIMIT_EVENTS_TARGET 1024
			
 
				 #define NUMAINFO_EVENTS_TARGET	1024
			
 
				 
			
 
				-struct mem_cgroup_stat_cpu {
			
 
				-	long count[MEM_CGROUP_STAT_NSTATS];
			
 
				-	unsigned long events[MEMCG_NR_EVENTS];
			
 
				-	unsigned long nr_page_events;
			
 
				-	unsigned long targets[MEM_CGROUP_NTARGETS];
			
 
				-};
			
 
				-
			
 
				-struct reclaim_iter {
			
 
				-	struct mem_cgroup *position;
			
 
				-	/* scan generation, increased every round-trip */
			
 
				-	unsigned int generation;
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * per-zone information in memory controller.
			
 
				- */
			
 
				-struct mem_cgroup_per_zone {
			
 
				-	struct lruvec		lruvec;
			
 
				-	unsigned long		lru_size[NR_LRU_LISTS];
			
 
				-
			
 
				-	struct reclaim_iter	iter[DEF_PRIORITY + 1];
			
 
				-
			
 
				-	struct rb_node		tree_node;	/* RB tree node */
			
 
				-	unsigned long		usage_in_excess;/* Set to the value by which */
			
 
				-						/* the soft limit is exceeded*/
			
 
				-	bool			on_tree;
			
 
				-	struct mem_cgroup	*memcg;		/* Back pointer, we cannot */
			
 
				-						/* use container_of	   */
			
 
				-};
			
 
				-
			
 
				-struct mem_cgroup_per_node {
			
 
				-	struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
			
 
				-};
			
 
				-
			
 
				 /*
			
 
				  * Cgroups above their limits are maintained in a RB-Tree, independent of
			
 
				  * their hierarchy representation
			
@@ -181,32 +135,6 @@ struct mem_cgroup_tree {
 
				 
			
 
				 static struct mem_cgroup_tree soft_limit_tree __read_mostly;
			
 
				 
			
 
				-struct mem_cgroup_threshold {
			
 
				-	struct eventfd_ctx *eventfd;
			
 
				-	unsigned long threshold;
			
 
				-};
			
 
				-
			
 
				-/* For threshold */
			
 
				-struct mem_cgroup_threshold_ary {
			
 
				-	/* An array index points to threshold just below or equal to usage. */
			
 
				-	int current_threshold;
			
 
				-	/* Size of entries[] */
			
 
				-	unsigned int size;
			
 
				-	/* Array of thresholds */
			
 
				-	struct mem_cgroup_threshold entries[0];
			
 
				-};
			
 
				-
			
 
				-struct mem_cgroup_thresholds {
			
 
				-	/* Primary thresholds array */
			
 
				-	struct mem_cgroup_threshold_ary *primary;
			
 
				-	/*
			
 
				-	 * Spare threshold array.
			
 
				-	 * This is needed to make mem_cgroup_unregister_event() "never fail".
			
 
				-	 * It must be able to store at least primary->size - 1 entries.
			
 
				-	 */
			
 
				-	struct mem_cgroup_threshold_ary *spare;
			
 
				-};
			
 
				-
			
 
				 /* for OOM */
			
 
				 struct mem_cgroup_eventfd_list {
			
 
				 	struct list_head list;
			
@@ -256,113 +184,6 @@ struct mem_cgroup_event {
 
				 static void mem_cgroup_threshold(struct mem_cgroup *memcg);
			
 
				 static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
			
 
				 
			
 
				-/*
			
 
				- * The memory controller data structure. The memory controller controls both
			
 
				- * page cache and RSS per cgroup. We would eventually like to provide
			
 
				- * statistics based on the statistics developed by Rik Van Riel for clock-pro,
			
 
				- * to help the administrator determine what knobs to tune.
			
 
				- */
			
 
				-struct mem_cgroup {
			
 
				-	struct cgroup_subsys_state css;
			
 
				-
			
 
				-	/* Accounted resources */
			
 
				-	struct page_counter memory;
			
 
				-	struct page_counter memsw;
			
 
				-	struct page_counter kmem;
			
 
				-
			
 
				-	/* Normal memory consumption range */
			
 
				-	unsigned long low;
			
 
				-	unsigned long high;
			
 
				-
			
 
				-	unsigned long soft_limit;
			
 
				-
			
 
				-	/* vmpressure notifications */
			
 
				-	struct vmpressure vmpressure;
			
 
				-
			
 
				-	/* css_online() has been completed */
			
 
				-	int initialized;
			
 
				-
			
 
				-	/*
			
 
				-	 * Should the accounting and control be hierarchical, per subtree?
			
 
				-	 */
			
 
				-	bool use_hierarchy;
			
 
				-
			
 
				-	/* protected by memcg_oom_lock */
			
 
				-	bool		oom_lock;
			
 
				-	int		under_oom;
			
 
				-
			
 
				-	int	swappiness;
			
 
				-	/* OOM-Killer disable */
			
 
				-	int		oom_kill_disable;
			
 
				-
			
 
				-	/* protect arrays of thresholds */
			
 
				-	struct mutex thresholds_lock;
			
 
				-
			
 
				-	/* thresholds for memory usage. RCU-protected */
			
 
				-	struct mem_cgroup_thresholds thresholds;
			
 
				-
			
 
				-	/* thresholds for mem+swap usage. RCU-protected */
			
 
				-	struct mem_cgroup_thresholds memsw_thresholds;
			
 
				-
			
 
				-	/* For oom notifier event fd */
			
 
				-	struct list_head oom_notify;
			
 
				-
			
 
				-	/*
			
 
				-	 * Should we move charges of a task when a task is moved into this
			
 
				-	 * mem_cgroup ? And what type of charges should we move ?
			
 
				-	 */
			
 
				-	unsigned long move_charge_at_immigrate;
			
 
				-	/*
			
 
				-	 * set > 0 if pages under this cgroup are moving to other cgroup.
			
 
				-	 */
			
 
				-	atomic_t		moving_account;
			
 
				-	/* taken only while moving_account > 0 */
			
 
				-	spinlock_t		move_lock;
			
 
				-	struct task_struct	*move_lock_task;
			
 
				-	unsigned long		move_lock_flags;
			
 
				-	/*
			
 
				-	 * percpu counter.
			
 
				-	 */
			
 
				-	struct mem_cgroup_stat_cpu __percpu *stat;
			
 
				-	spinlock_t pcp_counter_lock;
			
 
				-
			
 
				-#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
			
 
				-	struct cg_proto tcp_mem;
			
 
				-#endif
			
 
				-#if defined(CONFIG_MEMCG_KMEM)
			
 
				-        /* Index in the kmem_cache->memcg_params.memcg_caches array */
			
 
				-	int kmemcg_id;
			
 
				-	bool kmem_acct_activated;
			
 
				-	bool kmem_acct_active;
			
 
				-#endif
			
 
				-
			
 
				-	int last_scanned_node;
			
 
				-#if MAX_NUMNODES > 1
			
 
				-	nodemask_t	scan_nodes;
			
 
				-	atomic_t	numainfo_events;
			
 
				-	atomic_t	numainfo_updating;
			
 
				-#endif
			
 
				-
			
 
				-#ifdef CONFIG_CGROUP_WRITEBACK
			
 
				-	struct list_head cgwb_list;
			
 
				-	struct wb_domain cgwb_domain;
			
 
				-#endif
			
 
				-
			
 
				-	/* List of events which userspace want to receive */
			
 
				-	struct list_head event_list;
			
 
				-	spinlock_t event_list_lock;
			
 
				-
			
 
				-	struct mem_cgroup_per_node *nodeinfo[0];
			
 
				-	/* WARNING: nodeinfo must be the last member here */
			
 
				-};
			
 
				-
			
 
				-#ifdef CONFIG_MEMCG_KMEM
			
 
				-bool memcg_kmem_is_active(struct mem_cgroup *memcg)
			
 
				-{
			
 
				-	return memcg->kmem_acct_active;
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				 /* Stuffs for move charges at task migration. */
			
 
				 /*
			
 
				  * Types of charges to be moved.
			
@@ -423,11 +244,6 @@ enum res_type {
 
				  */
			
 
				 static DEFINE_MUTEX(memcg_create_mutex);
			
 
				 
			
 
				-struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s)
			
 
				-{
			
 
				-	return s ? container_of(s, struct mem_cgroup, css) : NULL;
			
 
				-}
			
 
				-
			
 
				 /* Some nice accessors for the vmpressure. */
			
 
				 struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg)
			
 
				 {
			
@@ -593,11 +409,6 @@ mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone)
 
				 	return &memcg->nodeinfo[nid]->zoneinfo[zid];
			
 
				 }
			
 
				 
			
 
				-struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg)
			
 
				-{
			
 
				-	return &memcg->css;
			
 
				-}
			
 
				-
			
 
				 /**
			
 
				  * mem_cgroup_css_from_page - css of the memcg associated with a page
			
 
				  * @page: page of interest
			
@@ -876,14 +687,6 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
 
				 	__this_cpu_add(memcg->stat->nr_page_events, nr_pages);
			
 
				 }
			
 
				 
			
 
				-unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
			
 
				-{
			
 
				-	struct mem_cgroup_per_zone *mz;
			
 
				-
			
 
				-	mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
			
 
				-	return mz->lru_size[lru];
			
 
				-}
			
 
				-
			
 
				 static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
			
 
				 						  int nid,
			
 
				 						  unsigned int lru_mask)
			
@@ -986,6 +789,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
 
				 
			
 
				 	return mem_cgroup_from_css(task_css(p, memory_cgrp_id));
			
 
				 }
			
 
				+EXPORT_SYMBOL(mem_cgroup_from_task);
			
 
				 
			
 
				 static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
			
 
				 {
			
@@ -1031,7 +835,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
 
				 				   struct mem_cgroup *prev,
			
 
				 				   struct mem_cgroup_reclaim_cookie *reclaim)
			
 
				 {
			
 
				-	struct reclaim_iter *uninitialized_var(iter);
			
 
				+	struct mem_cgroup_reclaim_iter *uninitialized_var(iter);
			
 
				 	struct cgroup_subsys_state *css = NULL;
			
 
				 	struct mem_cgroup *memcg = NULL;
			
 
				 	struct mem_cgroup *pos = NULL;
			
@@ -1173,30 +977,6 @@ void mem_cgroup_iter_break(struct mem_cgroup *root,
 
				 	     iter != NULL;				\
			
 
				 	     iter = mem_cgroup_iter(NULL, iter, NULL))
			
 
				 
			
 
				-void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
			
 
				-{
			
 
				-	struct mem_cgroup *memcg;
			
 
				-
			
 
				-	rcu_read_lock();
			
 
				-	memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
			
 
				-	if (unlikely(!memcg))
			
 
				-		goto out;
			
 
				-
			
 
				-	switch (idx) {
			
 
				-	case PGFAULT:
			
 
				-		this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT]);
			
 
				-		break;
			
 
				-	case PGMAJFAULT:
			
 
				-		this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT]);
			
 
				-		break;
			
 
				-	default:
			
 
				-		BUG();
			
 
				-	}
			
 
				-out:
			
 
				-	rcu_read_unlock();
			
 
				-}
			
 
				-EXPORT_SYMBOL(__mem_cgroup_count_vm_event);
			
 
				-
			
 
				 /**
			
 
				  * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
			
 
				  * @zone: zone of the wanted lruvec
			
@@ -1295,15 +1075,6 @@ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
 
				 	VM_BUG_ON((long)(*lru_size) < 0);
			
 
				 }
			
 
				 
			
 
				-bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, struct mem_cgroup *root)
			
 
				-{
			
 
				-	if (root == memcg)
			
 
				-		return true;
			
 
				-	if (!root->use_hierarchy)
			
 
				-		return false;
			
 
				-	return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
			
 
				-}
			
 
				-
			
 
				 bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg)
			
 
				 {
			
 
				 	struct mem_cgroup *task_memcg;
			
@@ -1330,39 +1101,6 @@ bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
			
 
				-{
			
 
				-	unsigned long inactive_ratio;
			
 
				-	unsigned long inactive;
			
 
				-	unsigned long active;
			
 
				-	unsigned long gb;
			
 
				-
			
 
				-	inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_ANON);
			
 
				-	active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_ANON);
			
 
				-
			
 
				-	gb = (inactive + active) >> (30 - PAGE_SHIFT);
			
 
				-	if (gb)
			
 
				-		inactive_ratio = int_sqrt(10 * gb);
			
 
				-	else
			
 
				-		inactive_ratio = 1;
			
 
				-
			
 
				-	return inactive * inactive_ratio < active;
			
 
				-}
			
 
				-
			
 
				-bool mem_cgroup_lruvec_online(struct lruvec *lruvec)
			
 
				-{
			
 
				-	struct mem_cgroup_per_zone *mz;
			
 
				-	struct mem_cgroup *memcg;
			
 
				-
			
 
				-	if (mem_cgroup_disabled())
			
 
				-		return true;
			
 
				-
			
 
				-	mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
			
 
				-	memcg = mz->memcg;
			
 
				-
			
 
				-	return !!(memcg->css.flags & CSS_ONLINE);
			
 
				-}
			
 
				-
			
 
				 #define mem_cgroup_from_counter(counter, member)	\
			
 
				 	container_of(counter, struct mem_cgroup, member)
			
 
				 
			
@@ -1394,15 +1132,6 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
 
				 	return margin;
			
 
				 }
			
 
				 
			
 
				-int mem_cgroup_swappiness(struct mem_cgroup *memcg)
			
 
				-{
			
 
				-	/* root ? */
			
 
				-	if (mem_cgroup_disabled() || !memcg->css.parent)
			
 
				-		return vm_swappiness;
			
 
				-
			
 
				-	return memcg->swappiness;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * A routine for checking "mem" is under move_account() or not.
			
 
				  *
			
@@ -2067,23 +1796,6 @@ void mem_cgroup_end_page_stat(struct mem_cgroup *memcg)
 
				 }
			
 
				 EXPORT_SYMBOL(mem_cgroup_end_page_stat);
			
 
				 
			
 
				-/**
			
 
				- * mem_cgroup_update_page_stat - update page state statistics
			
 
				- * @memcg: memcg to account against
			
 
				- * @idx: page state item to account
			
 
				- * @val: number of pages (positive or negative)
			
 
				- *
			
 
				- * See mem_cgroup_begin_page_stat() for locking requirements.
			
 
				- */
			
 
				-void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
			
 
				-				 enum mem_cgroup_stat_index idx, int val)
			
 
				-{
			
 
				-	VM_BUG_ON(!rcu_read_lock_held());
			
 
				-
			
 
				-	if (memcg)
			
 
				-		this_cpu_add(memcg->stat->count[idx], val);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * size of first charge trial. "32" comes from vmscan.c's magic value.
			
 
				  * TODO: maybe necessary to use big numbers in big irons.
			
@@ -2509,16 +2221,6 @@ void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages)
 
				 	css_put_many(&memcg->css, nr_pages);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * helper for acessing a memcg's index. It will be used as an index in the
			
 
				- * child cache array in kmem_cache, and also to derive its name. This function
			
 
				- * will return -1 when this is not a kmem-limited memcg.
			
 
				- */
			
 
				-int memcg_cache_id(struct mem_cgroup *memcg)
			
 
				-{
			
 
				-	return memcg ? memcg->kmemcg_id : -1;
			
 
				-}
			
 
				-
			
 
				 static int memcg_alloc_cache_id(void)
			
 
				 {
			
 
				 	int id, size;
			
@@ -5525,19 +5227,6 @@ struct cgroup_subsys memory_cgrp_subsys = {
 
				 	.early_init = 0,
			
 
				 };
			
 
				 
			
 
				-/**
			
 
				- * mem_cgroup_events - count memory events against a cgroup
			
 
				- * @memcg: the memory cgroup
			
 
				- * @idx: the event index
			
 
				- * @nr: the number of events to account for
			
 
				- */
			
 
				-void mem_cgroup_events(struct mem_cgroup *memcg,
			
 
				-		       enum mem_cgroup_events_index idx,
			
 
				-		       unsigned int nr)
			
 
				-{
			
 
				-	this_cpu_add(memcg->stat->events[idx], nr);
			
 
				-}
			
 
				-
			
 
				 /**
			
 
				  * mem_cgroup_low - check if memory consumption is below the normal range
			
 
				  * @root: the highest ancestor to consider
			
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -146,7 +146,7 @@ static int hwpoison_filter_task(struct page *p)
 
				 	if (!mem)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	css = mem_cgroup_css(mem);
			
 
				+	css = &mem->css;
			
 
				 	ino = cgroup_ino(css->cgroup);
			
 
				 	css_put(css);
			
 
				 
			
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -500,7 +500,7 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
 
				 			     struct kmem_cache *root_cache)
			
 
				 {
			
 
				 	static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
			
 
				-	struct cgroup_subsys_state *css = mem_cgroup_css(memcg);
			
 
				+	struct cgroup_subsys_state *css = &memcg->css;
			
 
				 	struct memcg_cache_array *arr;
			
 
				 	struct kmem_cache *s = NULL;
			
 
				 	char *cache_name;
			
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -175,7 +175,7 @@ static bool sane_reclaim(struct scan_control *sc)
 
				 	if (!memcg)
			
 
				 		return true;
			
 
				 #ifdef CONFIG_CGROUP_WRITEBACK
			
 
				-	if (cgroup_on_dfl(mem_cgroup_css(memcg)->cgroup))
			
 
				+	if (memcg->css.cgroup)
			
 
				 		return true;
			
 
				 #endif
			
 
				 	return false;