|
@@ -2968,18 +2968,23 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc)
|
|
|
} while (memcg);
|
|
|
}
|
|
|
|
|
|
-static bool zone_balanced(struct zone *zone, int order,
|
|
|
- unsigned long balance_gap, int classzone_idx)
|
|
|
+static bool zone_balanced(struct zone *zone, int order, bool highorder,
|
|
|
+ unsigned long balance_gap, int classzone_idx)
|
|
|
{
|
|
|
- if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone) +
|
|
|
- balance_gap, classzone_idx))
|
|
|
- return false;
|
|
|
+ unsigned long mark = high_wmark_pages(zone) + balance_gap;
|
|
|
|
|
|
- if (IS_ENABLED(CONFIG_COMPACTION) && order && compaction_suitable(zone,
|
|
|
- order, 0, classzone_idx) == COMPACT_SKIPPED)
|
|
|
- return false;
|
|
|
+ /*
|
|
|
+ * When checking from pgdat_balanced(), kswapd should stop and sleep
|
|
|
+ * when it reaches the high order-0 watermark and let kcompactd take
|
|
|
+ * over. Other callers such as wakeup_kswapd() want to determine the
|
|
|
+ * true high-order watermark.
|
|
|
+ */
|
|
|
+ if (IS_ENABLED(CONFIG_COMPACTION) && !highorder) {
|
|
|
+ mark += (1UL << order);
|
|
|
+ order = 0;
|
|
|
+ }
|
|
|
|
|
|
- return true;
|
|
|
+ return zone_watermark_ok_safe(zone, order, mark, classzone_idx);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -3029,7 +3034,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx)
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
- if (zone_balanced(zone, order, 0, i))
|
|
|
+ if (zone_balanced(zone, order, false, 0, i))
|
|
|
balanced_pages += zone->managed_pages;
|
|
|
else if (!order)
|
|
|
return false;
|
|
@@ -3083,27 +3088,14 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
|
|
|
*/
|
|
|
static bool kswapd_shrink_zone(struct zone *zone,
|
|
|
int classzone_idx,
|
|
|
- struct scan_control *sc,
|
|
|
- unsigned long *nr_attempted)
|
|
|
+ struct scan_control *sc)
|
|
|
{
|
|
|
- int testorder = sc->order;
|
|
|
unsigned long balance_gap;
|
|
|
bool lowmem_pressure;
|
|
|
|
|
|
/* Reclaim above the high watermark. */
|
|
|
sc->nr_to_reclaim = max(SWAP_CLUSTER_MAX, high_wmark_pages(zone));
|
|
|
|
|
|
- /*
|
|
|
- * Kswapd reclaims only single pages with compaction enabled. Trying
|
|
|
- * too hard to reclaim until contiguous free pages have become
|
|
|
- * available can hurt performance by evicting too much useful data
|
|
|
- * from memory. Do not reclaim more than needed for compaction.
|
|
|
- */
|
|
|
- if (IS_ENABLED(CONFIG_COMPACTION) && sc->order &&
|
|
|
- compaction_suitable(zone, sc->order, 0, classzone_idx)
|
|
|
- != COMPACT_SKIPPED)
|
|
|
- testorder = 0;
|
|
|
-
|
|
|
/*
|
|
|
* We put equal pressure on every zone, unless one zone has way too
|
|
|
* many pages free already. The "too many pages" is defined as the
|
|
@@ -3118,15 +3110,12 @@ static bool kswapd_shrink_zone(struct zone *zone,
|
|
|
* reclaim is necessary
|
|
|
*/
|
|
|
lowmem_pressure = (buffer_heads_over_limit && is_highmem(zone));
|
|
|
- if (!lowmem_pressure && zone_balanced(zone, testorder,
|
|
|
+ if (!lowmem_pressure && zone_balanced(zone, sc->order, false,
|
|
|
balance_gap, classzone_idx))
|
|
|
return true;
|
|
|
|
|
|
shrink_zone(zone, sc, zone_idx(zone) == classzone_idx);
|
|
|
|
|
|
- /* Account for the number of pages attempted to reclaim */
|
|
|
- *nr_attempted += sc->nr_to_reclaim;
|
|
|
-
|
|
|
clear_bit(ZONE_WRITEBACK, &zone->flags);
|
|
|
|
|
|
/*
|
|
@@ -3136,7 +3125,7 @@ static bool kswapd_shrink_zone(struct zone *zone,
|
|
|
* waits.
|
|
|
*/
|
|
|
if (zone_reclaimable(zone) &&
|
|
|
- zone_balanced(zone, testorder, 0, classzone_idx)) {
|
|
|
+ zone_balanced(zone, sc->order, false, 0, classzone_idx)) {
|
|
|
clear_bit(ZONE_CONGESTED, &zone->flags);
|
|
|
clear_bit(ZONE_DIRTY, &zone->flags);
|
|
|
}
|
|
@@ -3148,7 +3137,7 @@ static bool kswapd_shrink_zone(struct zone *zone,
|
|
|
* For kswapd, balance_pgdat() will work across all this node's zones until
|
|
|
* they are all at high_wmark_pages(zone).
|
|
|
*
|
|
|
- * Returns the final order kswapd was reclaiming at
|
|
|
+ * Returns the highest zone idx kswapd was reclaiming at
|
|
|
*
|
|
|
* There is special handling here for zones which are full of pinned pages.
|
|
|
* This can happen if the pages are all mlocked, or if they are all used by
|
|
@@ -3165,8 +3154,7 @@ static bool kswapd_shrink_zone(struct zone *zone,
|
|
|
* interoperates with the page allocator fallback scheme to ensure that aging
|
|
|
* of pages is balanced across the zones.
|
|
|
*/
|
|
|
-static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
|
|
- int *classzone_idx)
|
|
|
+static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
|
|
|
{
|
|
|
int i;
|
|
|
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
|
|
@@ -3183,9 +3171,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
|
|
count_vm_event(PAGEOUTRUN);
|
|
|
|
|
|
do {
|
|
|
- unsigned long nr_attempted = 0;
|
|
|
bool raise_priority = true;
|
|
|
- bool pgdat_needs_compaction = (order > 0);
|
|
|
|
|
|
sc.nr_reclaimed = 0;
|
|
|
|
|
@@ -3220,7 +3206,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
- if (!zone_balanced(zone, order, 0, 0)) {
|
|
|
+ if (!zone_balanced(zone, order, false, 0, 0)) {
|
|
|
end_zone = i;
|
|
|
break;
|
|
|
} else {
|
|
@@ -3236,24 +3222,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
|
|
if (i < 0)
|
|
|
goto out;
|
|
|
|
|
|
- for (i = 0; i <= end_zone; i++) {
|
|
|
- struct zone *zone = pgdat->node_zones + i;
|
|
|
-
|
|
|
- if (!populated_zone(zone))
|
|
|
- continue;
|
|
|
-
|
|
|
- /*
|
|
|
- * If any zone is currently balanced then kswapd will
|
|
|
- * not call compaction as it is expected that the
|
|
|
- * necessary pages are already available.
|
|
|
- */
|
|
|
- if (pgdat_needs_compaction &&
|
|
|
- zone_watermark_ok(zone, order,
|
|
|
- low_wmark_pages(zone),
|
|
|
- *classzone_idx, 0))
|
|
|
- pgdat_needs_compaction = false;
|
|
|
- }
|
|
|
-
|
|
|
/*
|
|
|
* If we're getting trouble reclaiming, start doing writepage
|
|
|
* even in laptop mode.
|
|
@@ -3297,8 +3265,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
|
|
* that that high watermark would be met at 100%
|
|
|
* efficiency.
|
|
|
*/
|
|
|
- if (kswapd_shrink_zone(zone, end_zone,
|
|
|
- &sc, &nr_attempted))
|
|
|
+ if (kswapd_shrink_zone(zone, end_zone, &sc))
|
|
|
raise_priority = false;
|
|
|
}
|
|
|
|
|
@@ -3311,28 +3278,10 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
|
|
pfmemalloc_watermark_ok(pgdat))
|
|
|
wake_up_all(&pgdat->pfmemalloc_wait);
|
|
|
|
|
|
- /*
|
|
|
- * Fragmentation may mean that the system cannot be rebalanced
|
|
|
- * for high-order allocations in all zones. If twice the
|
|
|
- * allocation size has been reclaimed and the zones are still
|
|
|
- * not balanced then recheck the watermarks at order-0 to
|
|
|
- * prevent kswapd reclaiming excessively. Assume that a
|
|
|
- * process requested a high-order can direct reclaim/compact.
|
|
|
- */
|
|
|
- if (order && sc.nr_reclaimed >= 2UL << order)
|
|
|
- order = sc.order = 0;
|
|
|
-
|
|
|
/* Check if kswapd should be suspending */
|
|
|
if (try_to_freeze() || kthread_should_stop())
|
|
|
break;
|
|
|
|
|
|
- /*
|
|
|
- * Compact if necessary and kswapd is reclaiming at least the
|
|
|
- * high watermark number of pages as requsted
|
|
|
- */
|
|
|
- if (pgdat_needs_compaction && sc.nr_reclaimed > nr_attempted)
|
|
|
- compact_pgdat(pgdat, order);
|
|
|
-
|
|
|
/*
|
|
|
* Raise priority if scanning rate is too low or there was no
|
|
|
* progress in reclaiming pages
|
|
@@ -3340,20 +3289,18 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
|
|
if (raise_priority || !sc.nr_reclaimed)
|
|
|
sc.priority--;
|
|
|
} while (sc.priority >= 1 &&
|
|
|
- !pgdat_balanced(pgdat, order, *classzone_idx));
|
|
|
+ !pgdat_balanced(pgdat, order, classzone_idx));
|
|
|
|
|
|
out:
|
|
|
/*
|
|
|
- * Return the order we were reclaiming at so prepare_kswapd_sleep()
|
|
|
- * makes a decision on the order we were last reclaiming at. However,
|
|
|
- * if another caller entered the allocator slow path while kswapd
|
|
|
- * was awake, order will remain at the higher level
|
|
|
+ * Return the highest zone idx we were reclaiming at so
|
|
|
+ * prepare_kswapd_sleep() makes the same decisions as here.
|
|
|
*/
|
|
|
- *classzone_idx = end_zone;
|
|
|
- return order;
|
|
|
+ return end_zone;
|
|
|
}
|
|
|
|
|
|
-static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
|
|
|
+static void kswapd_try_to_sleep(pg_data_t *pgdat, int order,
|
|
|
+ int classzone_idx, int balanced_classzone_idx)
|
|
|
{
|
|
|
long remaining = 0;
|
|
|
DEFINE_WAIT(wait);
|
|
@@ -3364,7 +3311,8 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
|
|
|
prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
|
|
|
|
|
|
/* Try to sleep for a short interval */
|
|
|
- if (prepare_kswapd_sleep(pgdat, order, remaining, classzone_idx)) {
|
|
|
+ if (prepare_kswapd_sleep(pgdat, order, remaining,
|
|
|
+ balanced_classzone_idx)) {
|
|
|
remaining = schedule_timeout(HZ/10);
|
|
|
finish_wait(&pgdat->kswapd_wait, &wait);
|
|
|
prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
|
|
@@ -3374,7 +3322,8 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
|
|
|
* After a short sleep, check if it was a premature sleep. If not, then
|
|
|
* go fully to sleep until explicitly woken up.
|
|
|
*/
|
|
|
- if (prepare_kswapd_sleep(pgdat, order, remaining, classzone_idx)) {
|
|
|
+ if (prepare_kswapd_sleep(pgdat, order, remaining,
|
|
|
+ balanced_classzone_idx)) {
|
|
|
trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
|
|
|
|
|
|
/*
|
|
@@ -3395,6 +3344,12 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
|
|
|
*/
|
|
|
reset_isolation_suitable(pgdat);
|
|
|
|
|
|
+ /*
|
|
|
+ * We have freed the memory, now we should compact it to make
|
|
|
+ * allocation of the requested order possible.
|
|
|
+ */
|
|
|
+ wakeup_kcompactd(pgdat, order, classzone_idx);
|
|
|
+
|
|
|
if (!kthread_should_stop())
|
|
|
schedule();
|
|
|
|
|
@@ -3424,7 +3379,6 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
|
|
|
static int kswapd(void *p)
|
|
|
{
|
|
|
unsigned long order, new_order;
|
|
|
- unsigned balanced_order;
|
|
|
int classzone_idx, new_classzone_idx;
|
|
|
int balanced_classzone_idx;
|
|
|
pg_data_t *pgdat = (pg_data_t*)p;
|
|
@@ -3457,23 +3411,19 @@ static int kswapd(void *p)
|
|
|
set_freezable();
|
|
|
|
|
|
order = new_order = 0;
|
|
|
- balanced_order = 0;
|
|
|
classzone_idx = new_classzone_idx = pgdat->nr_zones - 1;
|
|
|
balanced_classzone_idx = classzone_idx;
|
|
|
for ( ; ; ) {
|
|
|
bool ret;
|
|
|
|
|
|
/*
|
|
|
- * If the last balance_pgdat was unsuccessful it's unlikely a
|
|
|
- * new request of a similar or harder type will succeed soon
|
|
|
- * so consider going to sleep on the basis we reclaimed at
|
|
|
+ * While we were reclaiming, there might have been another
|
|
|
+ * wakeup, so check the values.
|
|
|
*/
|
|
|
- if (balanced_order == new_order) {
|
|
|
- new_order = pgdat->kswapd_max_order;
|
|
|
- new_classzone_idx = pgdat->classzone_idx;
|
|
|
- pgdat->kswapd_max_order = 0;
|
|
|
- pgdat->classzone_idx = pgdat->nr_zones - 1;
|
|
|
- }
|
|
|
+ new_order = pgdat->kswapd_max_order;
|
|
|
+ new_classzone_idx = pgdat->classzone_idx;
|
|
|
+ pgdat->kswapd_max_order = 0;
|
|
|
+ pgdat->classzone_idx = pgdat->nr_zones - 1;
|
|
|
|
|
|
if (order < new_order || classzone_idx > new_classzone_idx) {
|
|
|
/*
|
|
@@ -3483,7 +3433,7 @@ static int kswapd(void *p)
|
|
|
order = new_order;
|
|
|
classzone_idx = new_classzone_idx;
|
|
|
} else {
|
|
|
- kswapd_try_to_sleep(pgdat, balanced_order,
|
|
|
+ kswapd_try_to_sleep(pgdat, order, classzone_idx,
|
|
|
balanced_classzone_idx);
|
|
|
order = pgdat->kswapd_max_order;
|
|
|
classzone_idx = pgdat->classzone_idx;
|
|
@@ -3503,9 +3453,8 @@ static int kswapd(void *p)
|
|
|
*/
|
|
|
if (!ret) {
|
|
|
trace_mm_vmscan_kswapd_wake(pgdat->node_id, order);
|
|
|
- balanced_classzone_idx = classzone_idx;
|
|
|
- balanced_order = balance_pgdat(pgdat, order,
|
|
|
- &balanced_classzone_idx);
|
|
|
+ balanced_classzone_idx = balance_pgdat(pgdat, order,
|
|
|
+ classzone_idx);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -3535,7 +3484,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
|
|
|
}
|
|
|
if (!waitqueue_active(&pgdat->kswapd_wait))
|
|
|
return;
|
|
|
- if (zone_balanced(zone, order, 0, 0))
|
|
|
+ if (zone_balanced(zone, order, true, 0, 0))
|
|
|
return;
|
|
|
|
|
|
trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
|