|
@@ -200,6 +200,29 @@ static bool sane_reclaim(struct scan_control *sc)
|
|
|
#endif
|
|
|
return false;
|
|
|
}
|
|
|
+
|
|
|
+static void set_memcg_congestion(pg_data_t *pgdat,
|
|
|
+ struct mem_cgroup *memcg,
|
|
|
+ bool congested)
|
|
|
+{
|
|
|
+ struct mem_cgroup_per_node *mn;
|
|
|
+
|
|
|
+ if (!memcg)
|
|
|
+ return;
|
|
|
+
|
|
|
+ mn = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
|
|
|
+ WRITE_ONCE(mn->congested, congested);
|
|
|
+}
|
|
|
+
|
|
|
+static bool memcg_congested(pg_data_t *pgdat,
|
|
|
+ struct mem_cgroup *memcg)
|
|
|
+{
|
|
|
+ struct mem_cgroup_per_node *mn;
|
|
|
+
|
|
|
+ mn = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
|
|
|
+ return READ_ONCE(mn->congested);
|
|
|
+
|
|
|
+}
|
|
|
#else
|
|
|
static bool global_reclaim(struct scan_control *sc)
|
|
|
{
|
|
@@ -210,6 +233,18 @@ static bool sane_reclaim(struct scan_control *sc)
|
|
|
{
|
|
|
return true;
|
|
|
}
|
|
|
+
|
|
|
+static inline void set_memcg_congestion(struct pglist_data *pgdat,
|
|
|
+ struct mem_cgroup *memcg, bool congested)
|
|
|
+{
|
|
|
+}
|
|
|
+
|
|
|
+static inline bool memcg_congested(struct pglist_data *pgdat,
|
|
|
+ struct mem_cgroup *memcg)
|
|
|
+{
|
|
|
+ return false;
|
|
|
+
|
|
|
+}
|
|
|
#endif
|
|
|
|
|
|
/*
|
|
@@ -2474,6 +2509,12 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
+static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg)
|
|
|
+{
|
|
|
+ return test_bit(PGDAT_CONGESTED, &pgdat->flags) ||
|
|
|
+ (memcg && memcg_congested(pgdat, memcg));
|
|
|
+}
|
|
|
+
|
|
|
static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
|
|
|
{
|
|
|
struct reclaim_state *reclaim_state = current->reclaim_state;
|
|
@@ -2556,29 +2597,27 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
|
|
|
if (sc->nr_reclaimed - nr_reclaimed)
|
|
|
reclaimable = true;
|
|
|
|
|
|
- /*
|
|
|
- * If reclaim is isolating dirty pages under writeback, it
|
|
|
- * implies that the long-lived page allocation rate is exceeding
|
|
|
- * the page laundering rate. Either the global limits are not
|
|
|
- * being effective at throttling processes due to the page
|
|
|
- * distribution throughout zones or there is heavy usage of a
|
|
|
- * slow backing device. The only option is to throttle from
|
|
|
- * reclaim context which is not ideal as there is no guarantee
|
|
|
- * the dirtying process is throttled in the same way
|
|
|
- * balance_dirty_pages() manages.
|
|
|
- *
|
|
|
- * Once a node is flagged PGDAT_WRITEBACK, kswapd will count the
|
|
|
- * number of pages under pages flagged for immediate reclaim and
|
|
|
- * stall if any are encountered in the nr_immediate check below.
|
|
|
- */
|
|
|
- if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
|
|
|
- set_bit(PGDAT_WRITEBACK, &pgdat->flags);
|
|
|
+ if (current_is_kswapd()) {
|
|
|
+ /*
|
|
|
+ * If reclaim is isolating dirty pages under writeback,
|
|
|
+ * it implies that the long-lived page allocation rate
|
|
|
+ * is exceeding the page laundering rate. Either the
|
|
|
+ * global limits are not being effective at throttling
|
|
|
+ * processes due to the page distribution throughout
|
|
|
+ * zones or there is heavy usage of a slow backing
|
|
|
+ * device. The only option is to throttle from reclaim
|
|
|
+ * context which is not ideal as there is no guarantee
|
|
|
+ * the dirtying process is throttled in the same way
|
|
|
+ * balance_dirty_pages() manages.
|
|
|
+ *
|
|
|
+ * Once a node is flagged PGDAT_WRITEBACK, kswapd will
|
|
|
+ * count the number of pages under pages flagged for
|
|
|
+ * immediate reclaim and stall if any are encountered
|
|
|
+ * in the nr_immediate check below.
|
|
|
+ */
|
|
|
+ if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
|
|
|
+ set_bit(PGDAT_WRITEBACK, &pgdat->flags);
|
|
|
|
|
|
- /*
|
|
|
- * Legacy memcg will stall in page writeback so avoid forcibly
|
|
|
- * stalling here.
|
|
|
- */
|
|
|
- if (sane_reclaim(sc)) {
|
|
|
/*
|
|
|
* Tag a node as congested if all the dirty pages
|
|
|
* scanned were backed by a congested BDI and
|
|
@@ -2601,6 +2640,14 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
|
|
|
congestion_wait(BLK_RW_ASYNC, HZ/10);
|
|
|
}
|
|
|
|
|
|
+ /*
|
|
|
+ * Legacy memcg will stall in page writeback so avoid forcibly
|
|
|
+ * stalling in wait_iff_congested().
|
|
|
+ */
|
|
|
+ if (!global_reclaim(sc) && sane_reclaim(sc) &&
|
|
|
+ sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
|
|
|
+ set_memcg_congestion(pgdat, root, true);
|
|
|
+
|
|
|
/*
|
|
|
* Stall direct reclaim for IO completions if underlying BDIs
|
|
|
* and node is congested. Allow kswapd to continue until it
|
|
@@ -2608,8 +2655,8 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
|
|
|
* the LRU too quickly.
|
|
|
*/
|
|
|
if (!sc->hibernation_mode && !current_is_kswapd() &&
|
|
|
- current_may_throttle())
|
|
|
- wait_iff_congested(pgdat, BLK_RW_ASYNC, HZ/10);
|
|
|
+ current_may_throttle() && pgdat_memcg_congested(pgdat, root))
|
|
|
+ wait_iff_congested(BLK_RW_ASYNC, HZ/10);
|
|
|
|
|
|
} while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
|
|
|
sc->nr_scanned - nr_scanned, sc));
|
|
@@ -2826,6 +2873,7 @@ retry:
|
|
|
continue;
|
|
|
last_pgdat = zone->zone_pgdat;
|
|
|
snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat);
|
|
|
+ set_memcg_congestion(last_pgdat, sc->target_mem_cgroup, false);
|
|
|
}
|
|
|
|
|
|
delayacct_freepages_end();
|