11 лет назад · 3a025760fc
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -370,5 +370,6 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
 
				 #define ALLOC_HIGH		0x20 /* __GFP_HIGH set */
			
 
				 #define ALLOC_CPUSET		0x40 /* check for correct cpuset */
			
 
				 #define ALLOC_CMA		0x80 /* allow allocations from CMA areas */
			
 
				+#define ALLOC_FAIR		0x100 /* fair zone allocation */
			
 
				 
			
 
				 #endif	/* __MM_INTERNAL_H */
			
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1239,15 +1239,6 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 
				 	}
			
 
				 	local_irq_restore(flags);
			
 
				 }
			
 
				-static bool gfp_thisnode_allocation(gfp_t gfp_mask)
			
 
				-{
			
 
				-	return (gfp_mask & GFP_THISNODE) == GFP_THISNODE;
			
 
				-}
			
 
				-#else
			
 
				-static bool gfp_thisnode_allocation(gfp_t gfp_mask)
			
 
				-{
			
 
				-	return false;
			
 
				-}
			
 
				 #endif
			
 
				 
			
 
				 /*
			
@@ -1584,12 +1575,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
 
				 					  get_pageblock_migratetype(page));
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * NOTE: GFP_THISNODE allocations do not partake in the kswapd
			
 
				-	 * aging protocol, so they can't be fair.
			
 
				-	 */
			
 
				-	if (!gfp_thisnode_allocation(gfp_flags))
			
 
				-		__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
			
 
				+	__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
			
 
				 
			
 
				 	__count_zone_vm_events(PGALLOC, zone, 1 << order);
			
 
				 	zone_statistics(preferred_zone, zone, gfp_flags);
			
@@ -1955,23 +1941,12 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
 
				 		 * zone size to ensure fair page aging.  The zone a
			
 
				 		 * page was allocated in should have no effect on the
			
 
				 		 * time the page has in memory before being reclaimed.
			
 
				-		 *
			
 
				-		 * Try to stay in local zones in the fastpath.  If
			
 
				-		 * that fails, the slowpath is entered, which will do
			
 
				-		 * another pass starting with the local zones, but
			
 
				-		 * ultimately fall back to remote zones that do not
			
 
				-		 * partake in the fairness round-robin cycle of this
			
 
				-		 * zonelist.
			
 
				-		 *
			
 
				-		 * NOTE: GFP_THISNODE allocations do not partake in
			
 
				-		 * the kswapd aging protocol, so they can't be fair.
			
 
				 		 */
			
 
				-		if ((alloc_flags & ALLOC_WMARK_LOW) &&
			
 
				-		    !gfp_thisnode_allocation(gfp_mask)) {
			
 
				-			if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
			
 
				-				continue;
			
 
				+		if (alloc_flags & ALLOC_FAIR) {
			
 
				 			if (!zone_local(preferred_zone, zone))
			
 
				 				continue;
			
 
				+			if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
			
 
				+				continue;
			
 
				 		}
			
 
				 		/*
			
 
				 		 * When allocating a page cache page for writing, we
			
@@ -2409,32 +2384,40 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
 
				 	return page;
			
 
				 }
			
 
				 
			
 
				-static void prepare_slowpath(gfp_t gfp_mask, unsigned int order,
			
 
				-			     struct zonelist *zonelist,
			
 
				-			     enum zone_type high_zoneidx,
			
 
				-			     struct zone *preferred_zone)
			
 
				+static void reset_alloc_batches(struct zonelist *zonelist,
			
 
				+				enum zone_type high_zoneidx,
			
 
				+				struct zone *preferred_zone)
			
 
				 {
			
 
				 	struct zoneref *z;
			
 
				 	struct zone *zone;
			
 
				 
			
 
				 	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
			
 
				-		if (!(gfp_mask & __GFP_NO_KSWAPD))
			
 
				-			wakeup_kswapd(zone, order, zone_idx(preferred_zone));
			
 
				 		/*
			
 
				 		 * Only reset the batches of zones that were actually
			
 
				-		 * considered in the fast path, we don't want to
			
 
				-		 * thrash fairness information for zones that are not
			
 
				+		 * considered in the fairness pass, we don't want to
			
 
				+		 * trash fairness information for zones that are not
			
 
				 		 * actually part of this zonelist's round-robin cycle.
			
 
				 		 */
			
 
				 		if (!zone_local(preferred_zone, zone))
			
 
				 			continue;
			
 
				 		mod_zone_page_state(zone, NR_ALLOC_BATCH,
			
 
				-				    high_wmark_pages(zone) -
			
 
				-				    low_wmark_pages(zone) -
			
 
				-				    zone_page_state(zone, NR_ALLOC_BATCH));
			
 
				+			high_wmark_pages(zone) - low_wmark_pages(zone) -
			
 
				+			atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static void wake_all_kswapds(unsigned int order,
			
 
				+			     struct zonelist *zonelist,
			
 
				+			     enum zone_type high_zoneidx,
			
 
				+			     struct zone *preferred_zone)
			
 
				+{
			
 
				+	struct zoneref *z;
			
 
				+	struct zone *zone;
			
 
				+
			
 
				+	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
			
 
				+		wakeup_kswapd(zone, order, zone_idx(preferred_zone));
			
 
				+}
			
 
				+
			
 
				 static inline int
			
 
				 gfp_to_alloc_flags(gfp_t gfp_mask)
			
 
				 {
			
@@ -2523,12 +2506,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 
				 	 * allowed per node queues are empty and that nodes are
			
 
				 	 * over allocated.
			
 
				 	 */
			
 
				-	if (gfp_thisnode_allocation(gfp_mask))
			
 
				+	if (IS_ENABLED(CONFIG_NUMA) &&
			
 
				+	    (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
			
 
				 		goto nopage;
			
 
				 
			
 
				 restart:
			
 
				-	prepare_slowpath(gfp_mask, order, zonelist,
			
 
				-			 high_zoneidx, preferred_zone);
			
 
				+	if (!(gfp_mask & __GFP_NO_KSWAPD))
			
 
				+		wake_all_kswapds(order, zonelist, high_zoneidx, preferred_zone);
			
 
				 
			
 
				 	/*
			
 
				 	 * OK, we're below the kswapd watermark and have kicked background
			
@@ -2712,7 +2696,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 
				 	struct page *page = NULL;
			
 
				 	int migratetype = allocflags_to_migratetype(gfp_mask);
			
 
				 	unsigned int cpuset_mems_cookie;
			
 
				-	int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET;
			
 
				+	int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
			
 
				 	struct mem_cgroup *memcg = NULL;
			
 
				 
			
 
				 	gfp_mask &= gfp_allowed_mask;
			
@@ -2753,11 +2737,28 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 
				 	if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
			
 
				 		alloc_flags |= ALLOC_CMA;
			
 
				 #endif
			
 
				+retry:
			
 
				 	/* First allocation attempt */
			
 
				 	page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
			
 
				 			zonelist, high_zoneidx, alloc_flags,
			
 
				 			preferred_zone, migratetype);
			
 
				 	if (unlikely(!page)) {
			
 
				+		/*
			
 
				+		 * The first pass makes sure allocations are spread
			
 
				+		 * fairly within the local node.  However, the local
			
 
				+		 * node might have free pages left after the fairness
			
 
				+		 * batches are exhausted, and remote zones haven't
			
 
				+		 * even been considered yet.  Try once more without
			
 
				+		 * fairness, and include remote zones now, before
			
 
				+		 * entering the slowpath and waking kswapd: prefer
			
 
				+		 * spilling to a remote zone over swapping locally.
			
 
				+		 */
			
 
				+		if (alloc_flags & ALLOC_FAIR) {
			
 
				+			reset_alloc_batches(zonelist, high_zoneidx,
			
 
				+					    preferred_zone);
			
 
				+			alloc_flags &= ~ALLOC_FAIR;
			
 
				+			goto retry;
			
 
				+		}
			
 
				 		/*
			
 
				 		 * Runtime PM, block IO and its error handling path
			
 
				 		 * can deadlock because I/O on the device might not