|
@@ -3049,14 +3049,36 @@ static void age_active_anon(struct pglist_data *pgdat,
|
|
|
} while (memcg);
|
|
|
}
|
|
|
|
|
|
-static bool zone_balanced(struct zone *zone, int order, int classzone_idx)
|
|
|
+/*
|
|
|
+ * Returns true if there is an eligible zone balanced for the request order
|
|
|
+ * and classzone_idx
|
|
|
+ */
|
|
|
+static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx)
|
|
|
{
|
|
|
- unsigned long mark = high_wmark_pages(zone);
|
|
|
+ int i;
|
|
|
+ unsigned long mark = -1;
|
|
|
+ struct zone *zone;
|
|
|
|
|
|
- if (!zone_watermark_ok_safe(zone, order, mark, classzone_idx))
|
|
|
- return false;
|
|
|
+ for (i = 0; i <= classzone_idx; i++) {
|
|
|
+ zone = pgdat->node_zones + i;
|
|
|
|
|
|
- return true;
|
|
|
+ if (!managed_zone(zone))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ mark = high_wmark_pages(zone);
|
|
|
+ if (zone_watermark_ok_safe(zone, order, mark, classzone_idx))
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If a node has no populated zone within classzone_idx, it does not
|
|
|
+ * need balancing by definition. This can happen if a zone-restricted
|
|
|
+ * allocation tries to wake a remote kswapd.
|
|
|
+ */
|
|
|
+ if (mark == -1)
|
|
|
+ return true;
|
|
|
+
|
|
|
+ return false;
|
|
|
}
|
|
|
|
|
|
/* Clear pgdat state for congested, dirty or under writeback. */
|
|
@@ -3075,8 +3097,6 @@ static void clear_pgdat_congested(pg_data_t *pgdat)
|
|
|
*/
|
|
|
static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx)
|
|
|
{
|
|
|
- int i;
|
|
|
-
|
|
|
/*
|
|
|
* The throttled processes are normally woken up in balance_pgdat() as
|
|
|
* soon as allow_direct_reclaim() is true. But there is a potential
|
|
@@ -3097,16 +3117,9 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx)
|
|
|
if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
|
|
|
return true;
|
|
|
|
|
|
- for (i = 0; i <= classzone_idx; i++) {
|
|
|
- struct zone *zone = pgdat->node_zones + i;
|
|
|
-
|
|
|
- if (!managed_zone(zone))
|
|
|
- continue;
|
|
|
-
|
|
|
- if (zone_balanced(zone, order, classzone_idx)) {
|
|
|
- clear_pgdat_congested(pgdat);
|
|
|
- return true;
|
|
|
- }
|
|
|
+ if (pgdat_balanced(pgdat, order, classzone_idx)) {
|
|
|
+ clear_pgdat_congested(pgdat);
|
|
|
+ return true;
|
|
|
}
|
|
|
|
|
|
return false;
|
|
@@ -3212,23 +3225,12 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Only reclaim if there are no eligible zones. Check from
|
|
|
- * high to low zone as allocations prefer higher zones.
|
|
|
- * Scanning from low to high zone would allow congestion to be
|
|
|
- * cleared during a very small window when a small low
|
|
|
- * zone was balanced even under extreme pressure when the
|
|
|
- * overall node may be congested. Note that sc.reclaim_idx
|
|
|
- * is not used as buffer_heads_over_limit may have adjusted
|
|
|
- * it.
|
|
|
+ * Only reclaim if there are no eligible zones. Note that
|
|
|
+ * sc.reclaim_idx is not used as buffer_heads_over_limit may
|
|
|
+ * have adjusted it.
|
|
|
*/
|
|
|
- for (i = classzone_idx; i >= 0; i--) {
|
|
|
- zone = pgdat->node_zones + i;
|
|
|
- if (!managed_zone(zone))
|
|
|
- continue;
|
|
|
-
|
|
|
- if (zone_balanced(zone, sc.order, classzone_idx))
|
|
|
- goto out;
|
|
|
- }
|
|
|
+ if (pgdat_balanced(pgdat, sc.order, classzone_idx))
|
|
|
+ goto out;
|
|
|
|
|
|
/*
|
|
|
* Do some background aging of the anon list, to give
|
|
@@ -3295,6 +3297,22 @@ out:
|
|
|
return sc.order;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * pgdat->kswapd_classzone_idx is the highest zone index that a recent
|
|
|
+ * allocation request woke kswapd for. When kswapd has not woken recently,
|
|
|
+ * the value is MAX_NR_ZONES which is not a valid index. This compares a
|
|
|
+ * given classzone and returns it or the highest classzone index kswapd
|
|
|
+ * was recently woke for.
|
|
|
+ */
|
|
|
+static enum zone_type kswapd_classzone_idx(pg_data_t *pgdat,
|
|
|
+ enum zone_type classzone_idx)
|
|
|
+{
|
|
|
+ if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES)
|
|
|
+ return classzone_idx;
|
|
|
+
|
|
|
+ return max(pgdat->kswapd_classzone_idx, classzone_idx);
|
|
|
+}
|
|
|
+
|
|
|
static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order,
|
|
|
unsigned int classzone_idx)
|
|
|
{
|
|
@@ -3336,7 +3354,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o
|
|
|
* the previous request that slept prematurely.
|
|
|
*/
|
|
|
if (remaining) {
|
|
|
- pgdat->kswapd_classzone_idx = max(pgdat->kswapd_classzone_idx, classzone_idx);
|
|
|
+ pgdat->kswapd_classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx);
|
|
|
pgdat->kswapd_order = max(pgdat->kswapd_order, reclaim_order);
|
|
|
}
|
|
|
|
|
@@ -3390,7 +3408,8 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o
|
|
|
*/
|
|
|
static int kswapd(void *p)
|
|
|
{
|
|
|
- unsigned int alloc_order, reclaim_order, classzone_idx;
|
|
|
+ unsigned int alloc_order, reclaim_order;
|
|
|
+ unsigned int classzone_idx = MAX_NR_ZONES - 1;
|
|
|
pg_data_t *pgdat = (pg_data_t*)p;
|
|
|
struct task_struct *tsk = current;
|
|
|
|
|
@@ -3420,20 +3439,23 @@ static int kswapd(void *p)
|
|
|
tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
|
|
|
set_freezable();
|
|
|
|
|
|
- pgdat->kswapd_order = alloc_order = reclaim_order = 0;
|
|
|
- pgdat->kswapd_classzone_idx = classzone_idx = 0;
|
|
|
+ pgdat->kswapd_order = 0;
|
|
|
+ pgdat->kswapd_classzone_idx = MAX_NR_ZONES;
|
|
|
for ( ; ; ) {
|
|
|
bool ret;
|
|
|
|
|
|
+ alloc_order = reclaim_order = pgdat->kswapd_order;
|
|
|
+ classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx);
|
|
|
+
|
|
|
kswapd_try_sleep:
|
|
|
kswapd_try_to_sleep(pgdat, alloc_order, reclaim_order,
|
|
|
classzone_idx);
|
|
|
|
|
|
/* Read the new order and classzone_idx */
|
|
|
alloc_order = reclaim_order = pgdat->kswapd_order;
|
|
|
- classzone_idx = pgdat->kswapd_classzone_idx;
|
|
|
+ classzone_idx = kswapd_classzone_idx(pgdat, 0);
|
|
|
pgdat->kswapd_order = 0;
|
|
|
- pgdat->kswapd_classzone_idx = 0;
|
|
|
+ pgdat->kswapd_classzone_idx = MAX_NR_ZONES;
|
|
|
|
|
|
ret = try_to_freeze();
|
|
|
if (kthread_should_stop())
|
|
@@ -3459,9 +3481,6 @@ kswapd_try_sleep:
|
|
|
reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx);
|
|
|
if (reclaim_order < alloc_order)
|
|
|
goto kswapd_try_sleep;
|
|
|
-
|
|
|
- alloc_order = reclaim_order = pgdat->kswapd_order;
|
|
|
- classzone_idx = pgdat->kswapd_classzone_idx;
|
|
|
}
|
|
|
|
|
|
tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
|
|
@@ -3477,7 +3496,6 @@ kswapd_try_sleep:
|
|
|
void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
|
|
|
{
|
|
|
pg_data_t *pgdat;
|
|
|
- int z;
|
|
|
|
|
|
if (!managed_zone(zone))
|
|
|
return;
|
|
@@ -3485,7 +3503,8 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
|
|
|
if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL))
|
|
|
return;
|
|
|
pgdat = zone->zone_pgdat;
|
|
|
- pgdat->kswapd_classzone_idx = max(pgdat->kswapd_classzone_idx, classzone_idx);
|
|
|
+ pgdat->kswapd_classzone_idx = kswapd_classzone_idx(pgdat,
|
|
|
+ classzone_idx);
|
|
|
pgdat->kswapd_order = max(pgdat->kswapd_order, order);
|
|
|
if (!waitqueue_active(&pgdat->kswapd_wait))
|
|
|
return;
|
|
@@ -3494,17 +3513,10 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
|
|
|
if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
|
|
|
return;
|
|
|
|
|
|
- /* Only wake kswapd if all zones are unbalanced */
|
|
|
- for (z = 0; z <= classzone_idx; z++) {
|
|
|
- zone = pgdat->node_zones + z;
|
|
|
- if (!managed_zone(zone))
|
|
|
- continue;
|
|
|
-
|
|
|
- if (zone_balanced(zone, order, classzone_idx))
|
|
|
- return;
|
|
|
- }
|
|
|
+ if (pgdat_balanced(pgdat, order, classzone_idx))
|
|
|
+ return;
|
|
|
|
|
|
- trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
|
|
|
+ trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, classzone_idx, order);
|
|
|
wake_up_interruptible(&pgdat->kswapd_wait);
|
|
|
}
|
|
|
|