|
@@ -2537,10 +2537,17 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
|
|
|
|
|
|
for (i = 0; i <= ZONE_NORMAL; i++) {
|
|
|
zone = &pgdat->node_zones[i];
|
|
|
+ if (!populated_zone(zone))
|
|
|
+ continue;
|
|
|
+
|
|
|
pfmemalloc_reserve += min_wmark_pages(zone);
|
|
|
free_pages += zone_page_state(zone, NR_FREE_PAGES);
|
|
|
}
|
|
|
|
|
|
+ /* If there are no reserves (unexpected config) then do not throttle */
|
|
|
+ if (!pfmemalloc_reserve)
|
|
|
+ return true;
|
|
|
+
|
|
|
wmark_ok = free_pages > pfmemalloc_reserve / 2;
|
|
|
|
|
|
/* kswapd must be awake if processes are being throttled */
|
|
@@ -2565,9 +2572,9 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
|
|
|
static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
|
|
|
nodemask_t *nodemask)
|
|
|
{
|
|
|
+ struct zoneref *z;
|
|
|
struct zone *zone;
|
|
|
- int high_zoneidx = gfp_zone(gfp_mask);
|
|
|
- pg_data_t *pgdat;
|
|
|
+ pg_data_t *pgdat = NULL;
|
|
|
|
|
|
/*
|
|
|
* Kernel threads should not be throttled as they may be indirectly
|
|
@@ -2586,10 +2593,34 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
|
|
|
if (fatal_signal_pending(current))
|
|
|
goto out;
|
|
|
|
|
|
- /* Check if the pfmemalloc reserves are ok */
|
|
|
- first_zones_zonelist(zonelist, high_zoneidx, NULL, &zone);
|
|
|
- pgdat = zone->zone_pgdat;
|
|
|
- if (pfmemalloc_watermark_ok(pgdat))
|
|
|
+ /*
|
|
|
+ * Check if the pfmemalloc reserves are ok by finding the first node
|
|
|
+ * with a usable ZONE_NORMAL or lower zone. The expectation is that
|
|
|
+ * GFP_KERNEL will be required for allocating network buffers when
|
|
|
+ * swapping over the network so ZONE_HIGHMEM is unusable.
|
|
|
+ *
|
|
|
+ * Throttling is based on the first usable node and throttled processes
|
|
|
+ * wait on a queue until kswapd makes progress and wakes them. There
|
|
|
+ * is an affinity then between processes waking up and where reclaim
|
|
|
+ * progress has been made assuming the process wakes on the same node.
|
|
|
+ * More importantly, processes running on remote nodes will not compete
|
|
|
+ * for remote pfmemalloc reserves and processes on different nodes
|
|
|
+ * should make reasonable progress.
|
|
|
+ */
|
|
|
+ for_each_zone_zonelist_nodemask(zone, z, zonelist,
|
|
|
+ gfp_mask, nodemask) {
|
|
|
+ if (zone_idx(zone) > ZONE_NORMAL)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ /* Throttle based on the first usable node */
|
|
|
+ pgdat = zone->zone_pgdat;
|
|
|
+ if (pfmemalloc_watermark_ok(pgdat))
|
|
|
+ goto out;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* If no zone was usable by the allocation flags then do not throttle */
|
|
|
+ if (!pgdat)
|
|
|
goto out;
|
|
|
|
|
|
/* Account for the throttling */
|