|
@@ -2620,6 +2620,15 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
|
|
} while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
|
|
} while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
|
|
sc->nr_scanned - nr_scanned, sc));
|
|
sc->nr_scanned - nr_scanned, sc));
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * Kswapd gives up on balancing particular nodes after too
|
|
|
|
+ * many failures to reclaim anything from them and goes to
|
|
|
|
+ * sleep. On reclaim progress, reset the failure counter. A
|
|
|
|
+ * successful direct reclaim run will revive a dormant kswapd.
|
|
|
|
+ */
|
|
|
|
+ if (reclaimable)
|
|
|
|
+ pgdat->kswapd_failures = 0;
|
|
|
|
+
|
|
return reclaimable;
|
|
return reclaimable;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -2694,10 +2703,6 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
|
|
GFP_KERNEL | __GFP_HARDWALL))
|
|
GFP_KERNEL | __GFP_HARDWALL))
|
|
continue;
|
|
continue;
|
|
|
|
|
|
- if (sc->priority != DEF_PRIORITY &&
|
|
|
|
- !pgdat_reclaimable(zone->zone_pgdat))
|
|
|
|
- continue; /* Let kswapd poll it */
|
|
|
|
-
|
|
|
|
/*
|
|
/*
|
|
* If we already have plenty of memory free for
|
|
* If we already have plenty of memory free for
|
|
* compaction in this zone, don't free any more.
|
|
* compaction in this zone, don't free any more.
|
|
@@ -2817,7 +2822,7 @@ retry:
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
-static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
|
|
|
|
|
|
+static bool allow_direct_reclaim(pg_data_t *pgdat)
|
|
{
|
|
{
|
|
struct zone *zone;
|
|
struct zone *zone;
|
|
unsigned long pfmemalloc_reserve = 0;
|
|
unsigned long pfmemalloc_reserve = 0;
|
|
@@ -2825,6 +2830,9 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
|
|
int i;
|
|
int i;
|
|
bool wmark_ok;
|
|
bool wmark_ok;
|
|
|
|
|
|
|
|
+ if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
|
|
|
|
+ return true;
|
|
|
|
+
|
|
for (i = 0; i <= ZONE_NORMAL; i++) {
|
|
for (i = 0; i <= ZONE_NORMAL; i++) {
|
|
zone = &pgdat->node_zones[i];
|
|
zone = &pgdat->node_zones[i];
|
|
if (!managed_zone(zone) ||
|
|
if (!managed_zone(zone) ||
|
|
@@ -2905,7 +2913,7 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
|
|
|
|
|
|
/* Throttle based on the first usable node */
|
|
/* Throttle based on the first usable node */
|
|
pgdat = zone->zone_pgdat;
|
|
pgdat = zone->zone_pgdat;
|
|
- if (pfmemalloc_watermark_ok(pgdat))
|
|
|
|
|
|
+ if (allow_direct_reclaim(pgdat))
|
|
goto out;
|
|
goto out;
|
|
break;
|
|
break;
|
|
}
|
|
}
|
|
@@ -2927,14 +2935,14 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
|
|
*/
|
|
*/
|
|
if (!(gfp_mask & __GFP_FS)) {
|
|
if (!(gfp_mask & __GFP_FS)) {
|
|
wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
|
|
wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
|
|
- pfmemalloc_watermark_ok(pgdat), HZ);
|
|
|
|
|
|
+ allow_direct_reclaim(pgdat), HZ);
|
|
|
|
|
|
goto check_pending;
|
|
goto check_pending;
|
|
}
|
|
}
|
|
|
|
|
|
/* Throttle until kswapd wakes the process */
|
|
/* Throttle until kswapd wakes the process */
|
|
wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
|
|
wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
|
|
- pfmemalloc_watermark_ok(pgdat));
|
|
|
|
|
|
+ allow_direct_reclaim(pgdat));
|
|
|
|
|
|
check_pending:
|
|
check_pending:
|
|
if (fatal_signal_pending(current))
|
|
if (fatal_signal_pending(current))
|
|
@@ -3114,7 +3122,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx)
|
|
|
|
|
|
/*
|
|
/*
|
|
* The throttled processes are normally woken up in balance_pgdat() as
|
|
* The throttled processes are normally woken up in balance_pgdat() as
|
|
- * soon as pfmemalloc_watermark_ok() is true. But there is a potential
|
|
|
|
|
|
+ * soon as allow_direct_reclaim() is true. But there is a potential
|
|
* race between when kswapd checks the watermarks and a process gets
|
|
* race between when kswapd checks the watermarks and a process gets
|
|
* throttled. There is also a potential race if processes get
|
|
* throttled. There is also a potential race if processes get
|
|
* throttled, kswapd wakes, a large process exits thereby balancing the
|
|
* throttled, kswapd wakes, a large process exits thereby balancing the
|
|
@@ -3128,6 +3136,10 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx)
|
|
if (waitqueue_active(&pgdat->pfmemalloc_wait))
|
|
if (waitqueue_active(&pgdat->pfmemalloc_wait))
|
|
wake_up_all(&pgdat->pfmemalloc_wait);
|
|
wake_up_all(&pgdat->pfmemalloc_wait);
|
|
|
|
|
|
|
|
+ /* Hopeless node, leave it to direct reclaim */
|
|
|
|
+ if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
|
|
|
|
+ return true;
|
|
|
|
+
|
|
for (i = 0; i <= classzone_idx; i++) {
|
|
for (i = 0; i <= classzone_idx; i++) {
|
|
struct zone *zone = pgdat->node_zones + i;
|
|
struct zone *zone = pgdat->node_zones + i;
|
|
|
|
|
|
@@ -3214,9 +3226,9 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
|
|
count_vm_event(PAGEOUTRUN);
|
|
count_vm_event(PAGEOUTRUN);
|
|
|
|
|
|
do {
|
|
do {
|
|
|
|
+ unsigned long nr_reclaimed = sc.nr_reclaimed;
|
|
bool raise_priority = true;
|
|
bool raise_priority = true;
|
|
|
|
|
|
- sc.nr_reclaimed = 0;
|
|
|
|
sc.reclaim_idx = classzone_idx;
|
|
sc.reclaim_idx = classzone_idx;
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -3295,7 +3307,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
|
|
* able to safely make forward progress. Wake them
|
|
* able to safely make forward progress. Wake them
|
|
*/
|
|
*/
|
|
if (waitqueue_active(&pgdat->pfmemalloc_wait) &&
|
|
if (waitqueue_active(&pgdat->pfmemalloc_wait) &&
|
|
- pfmemalloc_watermark_ok(pgdat))
|
|
|
|
|
|
+ allow_direct_reclaim(pgdat))
|
|
wake_up_all(&pgdat->pfmemalloc_wait);
|
|
wake_up_all(&pgdat->pfmemalloc_wait);
|
|
|
|
|
|
/* Check if kswapd should be suspending */
|
|
/* Check if kswapd should be suspending */
|
|
@@ -3306,10 +3318,14 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
|
|
* Raise priority if scanning rate is too low or there was no
|
|
* Raise priority if scanning rate is too low or there was no
|
|
* progress in reclaiming pages
|
|
* progress in reclaiming pages
|
|
*/
|
|
*/
|
|
- if (raise_priority || !sc.nr_reclaimed)
|
|
|
|
|
|
+ nr_reclaimed = sc.nr_reclaimed - nr_reclaimed;
|
|
|
|
+ if (raise_priority || !nr_reclaimed)
|
|
sc.priority--;
|
|
sc.priority--;
|
|
} while (sc.priority >= 1);
|
|
} while (sc.priority >= 1);
|
|
|
|
|
|
|
|
+ if (!sc.nr_reclaimed)
|
|
|
|
+ pgdat->kswapd_failures++;
|
|
|
|
+
|
|
out:
|
|
out:
|
|
/*
|
|
/*
|
|
* Return the order kswapd stopped reclaiming at as
|
|
* Return the order kswapd stopped reclaiming at as
|
|
@@ -3509,6 +3525,10 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
|
|
if (!waitqueue_active(&pgdat->kswapd_wait))
|
|
if (!waitqueue_active(&pgdat->kswapd_wait))
|
|
return;
|
|
return;
|
|
|
|
|
|
|
|
+ /* Hopeless node, leave it to direct reclaim */
|
|
|
|
+ if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
|
|
|
|
+ return;
|
|
|
|
+
|
|
/* Only wake kswapd if all zones are unbalanced */
|
|
/* Only wake kswapd if all zones are unbalanced */
|
|
for (z = 0; z <= classzone_idx; z++) {
|
|
for (z = 0; z <= classzone_idx; z++) {
|
|
zone = pgdat->node_zones + z;
|
|
zone = pgdat->node_zones + z;
|
|
@@ -3779,9 +3799,6 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
|
|
sum_zone_node_page_state(pgdat->node_id, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages)
|
|
sum_zone_node_page_state(pgdat->node_id, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages)
|
|
return NODE_RECLAIM_FULL;
|
|
return NODE_RECLAIM_FULL;
|
|
|
|
|
|
- if (!pgdat_reclaimable(pgdat))
|
|
|
|
- return NODE_RECLAIM_FULL;
|
|
|
|
-
|
|
|
|
/*
|
|
/*
|
|
* Do not scan if the allocation should not be delayed.
|
|
* Do not scan if the allocation should not be delayed.
|
|
*/
|
|
*/
|