|
|
@@ -2762,7 +2762,7 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
|
|
|
|
|
|
/* kswapd must be awake if processes are being throttled */
|
|
|
if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) {
|
|
|
- pgdat->classzone_idx = min(pgdat->classzone_idx,
|
|
|
+ pgdat->kswapd_classzone_idx = min(pgdat->kswapd_classzone_idx,
|
|
|
(enum zone_type)ZONE_NORMAL);
|
|
|
wake_up_interruptible(&pgdat->kswapd_wait);
|
|
|
}
|
|
|
@@ -3042,11 +3042,11 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
|
|
|
if (!populated_zone(zone))
|
|
|
continue;
|
|
|
|
|
|
- if (zone_balanced(zone, order, classzone_idx))
|
|
|
- return true;
|
|
|
+ if (!zone_balanced(zone, order, classzone_idx))
|
|
|
+ return false;
|
|
|
}
|
|
|
|
|
|
- return false;
|
|
|
+ return true;
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
@@ -3238,8 +3238,8 @@ out:
|
|
|
return sc.order;
|
|
|
}
|
|
|
|
|
|
-static void kswapd_try_to_sleep(pg_data_t *pgdat, int order,
|
|
|
- int classzone_idx, int balanced_classzone_idx)
|
|
|
+static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order,
|
|
|
+ unsigned int classzone_idx)
|
|
|
{
|
|
|
long remaining = 0;
|
|
|
DEFINE_WAIT(wait);
|
|
|
@@ -3250,8 +3250,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order,
|
|
|
prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
|
|
|
|
|
|
/* Try to sleep for a short interval */
|
|
|
- if (prepare_kswapd_sleep(pgdat, order, remaining,
|
|
|
- balanced_classzone_idx)) {
|
|
|
+ if (prepare_kswapd_sleep(pgdat, reclaim_order, remaining, classzone_idx)) {
|
|
|
/*
|
|
|
* Compaction records what page blocks it recently failed to
|
|
|
* isolate pages from and skips them in the future scanning.
|
|
|
@@ -3264,9 +3263,20 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order,
|
|
|
* We have freed the memory, now we should compact it to make
|
|
|
* allocation of the requested order possible.
|
|
|
*/
|
|
|
- wakeup_kcompactd(pgdat, order, classzone_idx);
|
|
|
+ wakeup_kcompactd(pgdat, alloc_order, classzone_idx);
|
|
|
|
|
|
remaining = schedule_timeout(HZ/10);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If woken prematurely then reset kswapd_classzone_idx and
|
|
|
+ * order. The values will either be from a wakeup request or
|
|
|
+ * the previous request that slept prematurely.
|
|
|
+ */
|
|
|
+ if (remaining) {
|
|
|
+ pgdat->kswapd_classzone_idx = max(pgdat->kswapd_classzone_idx, classzone_idx);
|
|
|
+ pgdat->kswapd_order = max(pgdat->kswapd_order, reclaim_order);
|
|
|
+ }
|
|
|
+
|
|
|
finish_wait(&pgdat->kswapd_wait, &wait);
|
|
|
prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
|
|
|
}
|
|
|
@@ -3275,8 +3285,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order,
|
|
|
* After a short sleep, check if it was a premature sleep. If not, then
|
|
|
* go fully to sleep until explicitly woken up.
|
|
|
*/
|
|
|
- if (prepare_kswapd_sleep(pgdat, order, remaining,
|
|
|
- balanced_classzone_idx)) {
|
|
|
+ if (prepare_kswapd_sleep(pgdat, reclaim_order, remaining, classzone_idx)) {
|
|
|
trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
|
|
|
|
|
|
/*
|
|
|
@@ -3317,9 +3326,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order,
|
|
|
*/
|
|
|
static int kswapd(void *p)
|
|
|
{
|
|
|
- unsigned long order, new_order;
|
|
|
- int classzone_idx, new_classzone_idx;
|
|
|
- int balanced_classzone_idx;
|
|
|
+ unsigned int alloc_order, reclaim_order, classzone_idx;
|
|
|
pg_data_t *pgdat = (pg_data_t*)p;
|
|
|
struct task_struct *tsk = current;
|
|
|
|
|
|
@@ -3349,38 +3356,20 @@ static int kswapd(void *p)
|
|
|
tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
|
|
|
set_freezable();
|
|
|
|
|
|
- order = new_order = 0;
|
|
|
- classzone_idx = new_classzone_idx = pgdat->nr_zones - 1;
|
|
|
- balanced_classzone_idx = classzone_idx;
|
|
|
+ pgdat->kswapd_order = alloc_order = reclaim_order = 0;
|
|
|
+ pgdat->kswapd_classzone_idx = classzone_idx = 0;
|
|
|
for ( ; ; ) {
|
|
|
bool ret;
|
|
|
|
|
|
- /*
|
|
|
- * While we were reclaiming, there might have been another
|
|
|
- * wakeup, so check the values.
|
|
|
- */
|
|
|
- new_order = pgdat->kswapd_max_order;
|
|
|
- new_classzone_idx = pgdat->classzone_idx;
|
|
|
- pgdat->kswapd_max_order = 0;
|
|
|
- pgdat->classzone_idx = pgdat->nr_zones - 1;
|
|
|
+kswapd_try_sleep:
|
|
|
+ kswapd_try_to_sleep(pgdat, alloc_order, reclaim_order,
|
|
|
+ classzone_idx);
|
|
|
|
|
|
- if (order < new_order || classzone_idx > new_classzone_idx) {
|
|
|
- /*
|
|
|
- * Don't sleep if someone wants a larger 'order'
|
|
|
- * allocation or has tigher zone constraints
|
|
|
- */
|
|
|
- order = new_order;
|
|
|
- classzone_idx = new_classzone_idx;
|
|
|
- } else {
|
|
|
- kswapd_try_to_sleep(pgdat, order, classzone_idx,
|
|
|
- balanced_classzone_idx);
|
|
|
- order = pgdat->kswapd_max_order;
|
|
|
- classzone_idx = pgdat->classzone_idx;
|
|
|
- new_order = order;
|
|
|
- new_classzone_idx = classzone_idx;
|
|
|
- pgdat->kswapd_max_order = 0;
|
|
|
- pgdat->classzone_idx = pgdat->nr_zones - 1;
|
|
|
- }
|
|
|
+ /* Read the new order and classzone_idx */
|
|
|
+ alloc_order = reclaim_order = pgdat->kswapd_order;
|
|
|
+ classzone_idx = pgdat->kswapd_classzone_idx;
|
|
|
+ pgdat->kswapd_order = 0;
|
|
|
+ pgdat->kswapd_classzone_idx = 0;
|
|
|
|
|
|
ret = try_to_freeze();
|
|
|
if (kthread_should_stop())
|
|
|
@@ -3390,12 +3379,24 @@ static int kswapd(void *p)
|
|
|
* We can speed up thawing tasks if we don't call balance_pgdat
|
|
|
* after returning from the refrigerator
|
|
|
*/
|
|
|
- if (!ret) {
|
|
|
- trace_mm_vmscan_kswapd_wake(pgdat->node_id, order);
|
|
|
+ if (ret)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Reclaim begins at the requested order but if a high-order
|
|
|
+ * reclaim fails then kswapd falls back to reclaiming for
|
|
|
+ * order-0. If that happens, kswapd will consider sleeping
|
|
|
+ * for the order it finished reclaiming at (reclaim_order)
|
|
|
+ * but kcompactd is woken to compact for the original
|
|
|
+ * request (alloc_order).
|
|
|
+ */
|
|
|
+ trace_mm_vmscan_kswapd_wake(pgdat->node_id, alloc_order);
|
|
|
+ reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx);
|
|
|
+ if (reclaim_order < alloc_order)
|
|
|
+ goto kswapd_try_sleep;
|
|
|
|
|
|
- /* return value ignored until next patch */
|
|
|
- balance_pgdat(pgdat, order, classzone_idx);
|
|
|
- }
|
|
|
+ alloc_order = reclaim_order = pgdat->kswapd_order;
|
|
|
+ classzone_idx = pgdat->kswapd_classzone_idx;
|
|
|
}
|
|
|
|
|
|
tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
|
|
|
@@ -3418,10 +3419,8 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
|
|
|
if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL))
|
|
|
return;
|
|
|
pgdat = zone->zone_pgdat;
|
|
|
- if (pgdat->kswapd_max_order < order) {
|
|
|
- pgdat->kswapd_max_order = order;
|
|
|
- pgdat->classzone_idx = min(pgdat->classzone_idx, classzone_idx);
|
|
|
- }
|
|
|
+ pgdat->kswapd_classzone_idx = max(pgdat->kswapd_classzone_idx, classzone_idx);
|
|
|
+ pgdat->kswapd_order = max(pgdat->kswapd_order, order);
|
|
|
if (!waitqueue_active(&pgdat->kswapd_wait))
|
|
|
return;
|
|
|
if (zone_balanced(zone, order, 0))
|