|
@@ -3402,6 +3402,7 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
|
|
|
{
|
|
|
struct zone *zone;
|
|
|
struct zoneref *z;
|
|
|
+ pg_data_t *current_pgdat = NULL;
|
|
|
|
|
|
/*
|
|
|
* Make sure we converge to OOM if we cannot make any progress
|
|
@@ -3411,27 +3412,56 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
|
|
|
return false;
|
|
|
|
|
|
/*
|
|
|
- * Keep reclaiming pages while there is a chance this will lead somewhere.
|
|
|
- * If none of the target zones can satisfy our allocation request even
|
|
|
- * if all reclaimable pages are considered then we are screwed and have
|
|
|
- * to go OOM.
|
|
|
+ * Blindly retry lowmem allocation requests that are often ignored by
|
|
|
+ * the OOM killer up to MAX_RECLAIM_RETRIES as we not have a reliable
|
|
|
+ * and fast means of calculating reclaimable, dirty and writeback pages
|
|
|
+ * in eligible zones.
|
|
|
+ */
|
|
|
+ if (ac->high_zoneidx < ZONE_NORMAL)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Keep reclaiming pages while there is a chance this will lead
|
|
|
+ * somewhere. If none of the target zones can satisfy our allocation
|
|
|
+ * request even if all reclaimable pages are considered then we are
|
|
|
+ * screwed and have to go OOM.
|
|
|
*/
|
|
|
for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
|
|
|
ac->nodemask) {
|
|
|
unsigned long available;
|
|
|
unsigned long reclaimable;
|
|
|
+ int zid;
|
|
|
|
|
|
- available = reclaimable = zone_reclaimable_pages(zone);
|
|
|
+ if (current_pgdat == zone->zone_pgdat)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ current_pgdat = zone->zone_pgdat;
|
|
|
+ available = reclaimable = pgdat_reclaimable_pages(current_pgdat);
|
|
|
available -= DIV_ROUND_UP(no_progress_loops * available,
|
|
|
MAX_RECLAIM_RETRIES);
|
|
|
- available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
|
|
|
+
|
|
|
+ /* Account for all free pages on eligible zones */
|
|
|
+ for (zid = 0; zid <= zone_idx(zone); zid++) {
|
|
|
+ struct zone *acct_zone = ¤t_pgdat->node_zones[zid];
|
|
|
+
|
|
|
+ available += zone_page_state_snapshot(acct_zone, NR_FREE_PAGES);
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
|
* Would the allocation succeed if we reclaimed the whole
|
|
|
- * available?
|
|
|
+ * available? This is approximate because there is no
|
|
|
+ * accurate count of reclaimable pages per zone.
|
|
|
*/
|
|
|
- if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
|
|
|
- ac_classzone_idx(ac), alloc_flags, available)) {
|
|
|
+ for (zid = 0; zid <= zone_idx(zone); zid++) {
|
|
|
+ struct zone *check_zone = ¤t_pgdat->node_zones[zid];
|
|
|
+ unsigned long estimate;
|
|
|
+
|
|
|
+ estimate = min(check_zone->managed_pages, available);
|
|
|
+ if (!__zone_watermark_ok(check_zone, order,
|
|
|
+ min_wmark_pages(check_zone), ac_classzone_idx(ac),
|
|
|
+ alloc_flags, estimate))
|
|
|
+ continue;
|
|
|
+
|
|
|
/*
|
|
|
* If we didn't make any progress and have a lot of
|
|
|
* dirty + writeback pages then we should wait for
|
|
@@ -3441,15 +3471,16 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
|
|
|
if (!did_some_progress) {
|
|
|
unsigned long write_pending;
|
|
|
|
|
|
- write_pending = zone_page_state_snapshot(zone,
|
|
|
- NR_ZONE_WRITE_PENDING);
|
|
|
+ write_pending =
|
|
|
+ node_page_state(current_pgdat, NR_WRITEBACK) +
|
|
|
+ node_page_state(current_pgdat, NR_FILE_DIRTY);
|
|
|
|
|
|
if (2 * write_pending > reclaimable) {
|
|
|
congestion_wait(BLK_RW_ASYNC, HZ/10);
|
|
|
return true;
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+out:
|
|
|
/*
|
|
|
* Memory allocation/reclaim might be called from a WQ
|
|
|
* context and the current implementation of the WQ
|