7 жил өмнө · 0d707a2f24
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10844,6 +10844,7 @@ F:	drivers/platform/x86/peaq-wmi.c
 
															 PER-CPU MEMORY ALLOCATOR
														
 
															 M:	Tejun Heo <tj@kernel.org>
														
 
															 M:	Christoph Lameter <cl@linux.com>
														
 
															+M:	Dennis Zhou <dennisszhou@gmail.com>
														
 
															 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git
														
 
															 S:	Maintained
														
 
															 F:	include/linux/percpu*.h
														
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -30,10 +30,14 @@
 
															  * calls io_destroy() or the process exits.
														
 
															  *
														
 
															  * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it
														
 
															- * calls percpu_ref_kill(), then hlist_del_rcu() and synchronize_rcu() to remove
														
 
															- * the kioctx from the proccess's list of kioctxs - after that, there can't be
														
 
															- * any new users of the kioctx (from lookup_ioctx()) and it's then safe to drop
														
 
															- * the initial ref with percpu_ref_put().
														
 
															+ * removes the kioctx from the proccess's table of kioctxs and kills percpu_ref.
														
 
															+ * After that, there can't be any new users of the kioctx (from lookup_ioctx())
														
 
															+ * and it's then safe to drop the initial ref with percpu_ref_put().
														
 
															+ *
														
 
															+ * Note that the free path, free_ioctx(), needs to go through explicit call_rcu()
														
 
															+ * to synchronize with RCU protected lookup_ioctx().  percpu_ref operations don't
														
 
															+ * imply RCU grace periods of any kind and if a user wants to combine percpu_ref
														
 
															+ * with RCU protection, it must be done explicitly.
														
 
															  *
														
 
															  * Code that does a two stage shutdown like this often needs some kind of
														
 
															  * explicit synchronization to ensure the initial refcount can only be dropped
														
@@ -113,8 +117,10 @@ void percpu_ref_reinit(struct percpu_ref *ref);
 
															  * Must be used to drop the initial ref on a percpu refcount; must be called
														
 
															  * precisely once before shutdown.
														
 
															  *
														
 
															- * Puts @ref in non percpu mode, then does a call_rcu() before gathering up the
														
 
															- * percpu counters and dropping the initial ref.
														
 
															+ * Switches @ref into atomic mode before gathering up the percpu counters
														
 
															+ * and dropping the initial ref.
														
 
															+ *
														
 
															+ * There are no implied RCU grace periods between kill and release.
														
 
															  */
														
 
															 static inline void percpu_ref_kill(struct percpu_ref *ref)
														
 
															 {
														
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -322,6 +322,8 @@ EXPORT_SYMBOL_GPL(percpu_ref_switch_to_percpu);
 
															  * This function normally doesn't block and can be called from any context
														
 
															  * but it may block if @confirm_kill is specified and @ref is in the
														
 
															  * process of switching to atomic mode by percpu_ref_switch_to_atomic().
														
 
															+ *
														
 
															+ * There are no implied RCU grace periods between kill and release.
														
 
															  */
														
 
															 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
														
 
															 				 percpu_ref_func_t *confirm_kill)
														
--- a/mm/percpu-km.c
+++ b/mm/percpu-km.c
@@ -34,7 +34,7 @@
 
															 #include <linux/log2.h>
														
 
															 static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
														
 
															-			       int page_start, int page_end)
														
 
															+			       int page_start, int page_end, gfp_t gfp)
														
 
															 {
														
 
															 	return 0;
														
 
															 }
														
@@ -45,18 +45,18 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
 
															 	/* nada */
														
 
															 }
														
 
															-static struct pcpu_chunk *pcpu_create_chunk(void)
														
 
															+static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
														
 
															 {
														
 
															 	const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT;
														
 
															 	struct pcpu_chunk *chunk;
														
 
															 	struct page *pages;
														
 
															 	int i;
														
 
															-	chunk = pcpu_alloc_chunk();
														
 
															+	chunk = pcpu_alloc_chunk(gfp);
														
 
															 	if (!chunk)
														
 
															 		return NULL;
														
 
															-	pages = alloc_pages(GFP_KERNEL, order_base_2(nr_pages));
														
 
															+	pages = alloc_pages(gfp, order_base_2(nr_pages));
														
 
															 	if (!pages) {
														
 
															 		pcpu_free_chunk(chunk);
														
 
															 		return NULL;
														
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -37,7 +37,7 @@ static struct page **pcpu_get_pages(void)
 
															 	lockdep_assert_held(&pcpu_alloc_mutex);
														
 
															 	if (!pages)
														
 
															-		pages = pcpu_mem_zalloc(pages_size);
														
 
															+		pages = pcpu_mem_zalloc(pages_size, GFP_KERNEL);
														
 
															 	return pages;
														
 
															 }
														
@@ -73,18 +73,21 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk,
 
															  * @pages: array to put the allocated pages into, indexed by pcpu_page_idx()
														
 
															  * @page_start: page index of the first page to be allocated
														
 
															  * @page_end: page index of the last page to be allocated + 1
														
 
															+ * @gfp: allocation flags passed to the underlying allocator
														
 
															  *
														
 
															  * Allocate pages [@page_start,@page_end) into @pages for all units.
														
 
															  * The allocation is for @chunk.  Percpu core doesn't care about the
														
 
															  * content of @pages and will pass it verbatim to pcpu_map_pages().
														
 
															  */
														
 
															 static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
														
 
															-			    struct page **pages, int page_start, int page_end)
														
 
															+			    struct page **pages, int page_start, int page_end,
														
 
															+			    gfp_t gfp)
														
 
															 {
														
 
															-	const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM;
														
 
															 	unsigned int cpu, tcpu;
														
 
															 	int i;
														
 
															+	gfp |= __GFP_HIGHMEM;
														
 
															+
														
 
															 	for_each_possible_cpu(cpu) {
														
 
															 		for (i = page_start; i < page_end; i++) {
														
 
															 			struct page **pagep = &pages[pcpu_page_idx(cpu, i)];
														
@@ -262,6 +265,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
 
															  * @chunk: chunk of interest
														
 
															  * @page_start: the start page
														
 
															  * @page_end: the end page
														
 
															+ * @gfp: allocation flags passed to the underlying memory allocator
														
 
															  *
														
 
															  * For each cpu, populate and map pages [@page_start,@page_end) into
														
 
															  * @chunk.
														
@@ -270,7 +274,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
 
															  * pcpu_alloc_mutex, does GFP_KERNEL allocation.
														
 
															  */
														
 
															 static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
														
 
															-			       int page_start, int page_end)
														
 
															+			       int page_start, int page_end, gfp_t gfp)
														
 
															 {
														
 
															 	struct page **pages;
														
@@ -278,7 +282,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
 
															 	if (!pages)
														
 
															 		return -ENOMEM;
														
 
															-	if (pcpu_alloc_pages(chunk, pages, page_start, page_end))
														
 
															+	if (pcpu_alloc_pages(chunk, pages, page_start, page_end, gfp))
														
 
															 		return -ENOMEM;
														
 
															 	if (pcpu_map_pages(chunk, pages, page_start, page_end)) {
														
@@ -325,12 +329,12 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
 
															 	pcpu_free_pages(chunk, pages, page_start, page_end);
														
 
															 }
														
 
															-static struct pcpu_chunk *pcpu_create_chunk(void)
														
 
															+static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
														
 
															 {
														
 
															 	struct pcpu_chunk *chunk;
														
 
															 	struct vm_struct **vms;
														
 
															-	chunk = pcpu_alloc_chunk();
														
 
															+	chunk = pcpu_alloc_chunk(gfp);
														
 
															 	if (!chunk)
														
 
															 		return NULL;
														
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -80,6 +80,7 @@
 
															 #include <linux/vmalloc.h>
														
 
															 #include <linux/workqueue.h>
														
 
															 #include <linux/kmemleak.h>
														
 
															+#include <linux/sched.h>
														
 
															 #include <asm/cacheflush.h>
														
 
															 #include <asm/sections.h>
														
@@ -447,26 +448,25 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits,
 
															 /**
														
 
															  * pcpu_mem_zalloc - allocate memory
														
 
															  * @size: bytes to allocate
														
 
															+ * @gfp: allocation flags
														
 
															  *
														
 
															  * Allocate @size bytes.  If @size is smaller than PAGE_SIZE,
														
 
															- * kzalloc() is used; otherwise, vzalloc() is used.  The returned
														
 
															- * memory is always zeroed.
														
 
															- *
														
 
															- * CONTEXT:
														
 
															- * Does GFP_KERNEL allocation.
														
 
															+ * kzalloc() is used; otherwise, the equivalent of vzalloc() is used.
														
 
															+ * This is to facilitate passing through whitelisted flags.  The
														
 
															+ * returned memory is always zeroed.
														
 
															  *
														
 
															  * RETURNS:
														
 
															  * Pointer to the allocated area on success, NULL on failure.
														
 
															  */
														
 
															-static void *pcpu_mem_zalloc(size_t size)
														
 
															+static void *pcpu_mem_zalloc(size_t size, gfp_t gfp)
														
 
															 {
														
 
															 	if (WARN_ON_ONCE(!slab_is_available()))
														
 
															 		return NULL;
														
 
															 	if (size <= PAGE_SIZE)
														
 
															-		return kzalloc(size, GFP_KERNEL);
														
 
															+		return kzalloc(size, gfp);
														
 
															 	else
														
 
															-		return vzalloc(size);
														
 
															+		return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL);
														
 
															 }
														
 
															 /**
														
@@ -1154,12 +1154,12 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
 
															 	return chunk;
														
 
															 }
														
 
															-static struct pcpu_chunk *pcpu_alloc_chunk(void)
														
 
															+static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
														
 
															 {
														
 
															 	struct pcpu_chunk *chunk;
														
 
															 	int region_bits;
														
 
															-	chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size);
														
 
															+	chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp);
														
 
															 	if (!chunk)
														
 
															 		return NULL;
														
@@ -1168,17 +1168,17 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
 
															 	region_bits = pcpu_chunk_map_bits(chunk);
														
 
															 	chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) *
														
 
															-					   sizeof(chunk->alloc_map[0]));
														
 
															+					   sizeof(chunk->alloc_map[0]), gfp);
														
 
															 	if (!chunk->alloc_map)
														
 
															 		goto alloc_map_fail;
														
 
															 	chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) *
														
 
															-					   sizeof(chunk->bound_map[0]));
														
 
															+					   sizeof(chunk->bound_map[0]), gfp);
														
 
															 	if (!chunk->bound_map)
														
 
															 		goto bound_map_fail;
														
 
															 	chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) *
														
 
															-					   sizeof(chunk->md_blocks[0]));
														
 
															+					   sizeof(chunk->md_blocks[0]), gfp);
														
 
															 	if (!chunk->md_blocks)
														
 
															 		goto md_blocks_fail;
														
@@ -1277,9 +1277,11 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
 
															  * pcpu_addr_to_page		- translate address to physical address
														
 
															  * pcpu_verify_alloc_info	- check alloc_info is acceptable during init
														
 
															  */
														
 
															-static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size);
														
 
															-static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size);
														
 
															-static struct pcpu_chunk *pcpu_create_chunk(void);
														
 
															+static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
														
 
															+			       int page_start, int page_end, gfp_t gfp);
														
 
															+static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
														
 
															+				  int page_start, int page_end);
														
 
															+static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp);
														
 
															 static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
														
 
															 static struct page *pcpu_addr_to_page(void *addr);
														
 
															 static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
														
@@ -1339,6 +1341,8 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 
															 static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
														
 
															 				 gfp_t gfp)
														
 
															 {
														
 
															+	/* whitelisted flags that can be passed to the backing allocators */
														
 
															+	gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
														
 
															 	bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
														
 
															 	bool do_warn = !(gfp & __GFP_NOWARN);
														
 
															 	static int warn_limit = 10;
														
@@ -1369,8 +1373,17 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
 
															 		return NULL;
														
 
															 	}
														
 
															-	if (!is_atomic)
														
 
															-		mutex_lock(&pcpu_alloc_mutex);
														
 
															+	if (!is_atomic) {
														
 
															+		/*
														
 
															+		 * pcpu_balance_workfn() allocates memory under this mutex,
														
 
															+		 * and it may wait for memory reclaim. Allow current task
														
 
															+		 * to become OOM victim, in case of memory pressure.
														
 
															+		 */
														
 
															+		if (gfp & __GFP_NOFAIL)
														
 
															+			mutex_lock(&pcpu_alloc_mutex);
														
 
															+		else if (mutex_lock_killable(&pcpu_alloc_mutex))
														
 
															+			return NULL;
														
 
															+	}
														
 
															 	spin_lock_irqsave(&pcpu_lock, flags);
														
@@ -1421,7 +1434,7 @@ restart:
 
															 	}
														
 
															 	if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
														
 
															-		chunk = pcpu_create_chunk();
														
 
															+		chunk = pcpu_create_chunk(pcpu_gfp);
														
 
															 		if (!chunk) {
														
 
															 			err = "failed to allocate new chunk";
														
 
															 			goto fail;
														
@@ -1450,7 +1463,7 @@ area_found:
 
															 					   page_start, page_end) {
														
 
															 			WARN_ON(chunk->immutable);
														
 
															-			ret = pcpu_populate_chunk(chunk, rs, re);
														
 
															+			ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp);
														
 
															 			spin_lock_irqsave(&pcpu_lock, flags);
														
 
															 			if (ret) {
														
@@ -1561,10 +1574,17 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
 
															  * pcpu_balance_workfn - manage the amount of free chunks and populated pages
														
 
															  * @work: unused
														
 
															  *
														
 
															- * Reclaim all fully free chunks except for the first one.
														
 
															+ * Reclaim all fully free chunks except for the first one.  This is also
														
 
															+ * responsible for maintaining the pool of empty populated pages.  However,
														
 
															+ * it is possible that this is called when physical memory is scarce causing
														
 
															+ * OOM killer to be triggered.  We should avoid doing so until an actual
														
 
															+ * allocation causes the failure as it is possible that requests can be
														
 
															+ * serviced from already backed regions.
														
 
															  */
														
 
															 static void pcpu_balance_workfn(struct work_struct *work)
														
 
															 {
														
 
															+	/* gfp flags passed to underlying allocators */
														
 
															+	const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
														
 
															 	LIST_HEAD(to_free);
														
 
															 	struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1];
														
 
															 	struct pcpu_chunk *chunk, *next;
														
@@ -1600,6 +1620,7 @@ static void pcpu_balance_workfn(struct work_struct *work)
 
															 			spin_unlock_irq(&pcpu_lock);
														
 
															 		}
														
 
															 		pcpu_destroy_chunk(chunk);
														
 
															+		cond_resched();
														
 
															 	}
														
 
															 	/*
														
@@ -1645,7 +1666,7 @@ retry_pop:
 
															 					   chunk->nr_pages) {
														
 
															 			int nr = min(re - rs, nr_to_pop);
														
 
															-			ret = pcpu_populate_chunk(chunk, rs, rs + nr);
														
 
															+			ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
														
 
															 			if (!ret) {
														
 
															 				nr_to_pop -= nr;
														
 
															 				spin_lock_irq(&pcpu_lock);
														
@@ -1662,7 +1683,7 @@ retry_pop:
 
															 	if (nr_to_pop) {
														
 
															 		/* ran out of chunks to populate, create a new one and retry */
														
 
															-		chunk = pcpu_create_chunk();
														
 
															+		chunk = pcpu_create_chunk(gfp);
														
 
															 		if (chunk) {
														
 
															 			spin_lock_irq(&pcpu_lock);
														
 
															 			pcpu_chunk_relocate(chunk, -1);