|
@@ -229,9 +229,10 @@ EXPORT_SYMBOL(unregister_shrinker);
|
|
|
|
|
|
#define SHRINK_BATCH 128
|
|
|
|
|
|
-static unsigned long
|
|
|
-shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
|
|
|
- unsigned long nr_pages_scanned, unsigned long lru_pages)
|
|
|
+static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
|
|
|
+ struct shrinker *shrinker,
|
|
|
+ unsigned long nr_scanned,
|
|
|
+ unsigned long nr_eligible)
|
|
|
{
|
|
|
unsigned long freed = 0;
|
|
|
unsigned long long delta;
|
|
@@ -255,9 +256,9 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
|
|
|
nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
|
|
|
|
|
|
total_scan = nr;
|
|
|
- delta = (4 * nr_pages_scanned) / shrinker->seeks;
|
|
|
+ delta = (4 * nr_scanned) / shrinker->seeks;
|
|
|
delta *= freeable;
|
|
|
- do_div(delta, lru_pages + 1);
|
|
|
+ do_div(delta, nr_eligible + 1);
|
|
|
total_scan += delta;
|
|
|
if (total_scan < 0) {
|
|
|
pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n",
|
|
@@ -289,8 +290,8 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
|
|
|
total_scan = freeable * 2;
|
|
|
|
|
|
trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
|
|
|
- nr_pages_scanned, lru_pages,
|
|
|
- freeable, delta, total_scan);
|
|
|
+ nr_scanned, nr_eligible,
|
|
|
+ freeable, delta, total_scan);
|
|
|
|
|
|
/*
|
|
|
* Normally, we should not scan less than batch_size objects in one
|
|
@@ -339,34 +340,37 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
|
|
|
return freed;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * Call the shrink functions to age shrinkable caches
|
|
|
- *
|
|
|
- * Here we assume it costs one seek to replace a lru page and that it also
|
|
|
- * takes a seek to recreate a cache object. With this in mind we age equal
|
|
|
- * percentages of the lru and ageable caches. This should balance the seeks
|
|
|
- * generated by these structures.
|
|
|
+/**
|
|
|
+ * shrink_node_slabs - shrink slab caches of a given node
|
|
|
+ * @gfp_mask: allocation context
|
|
|
+ * @nid: node whose slab caches to target
|
|
|
+ * @nr_scanned: pressure numerator
|
|
|
+ * @nr_eligible: pressure denominator
|
|
|
*
|
|
|
- * If the vm encountered mapped pages on the LRU it increase the pressure on
|
|
|
- * slab to avoid swapping.
|
|
|
+ * Call the shrink functions to age shrinkable caches.
|
|
|
*
|
|
|
- * We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits.
|
|
|
+ * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set,
|
|
|
+ * unaware shrinkers will receive a node id of 0 instead.
|
|
|
*
|
|
|
- * `lru_pages' represents the number of on-LRU pages in all the zones which
|
|
|
- * are eligible for the caller's allocation attempt. It is used for balancing
|
|
|
- * slab reclaim versus page reclaim.
|
|
|
+ * @nr_scanned and @nr_eligible form a ratio that indicate how much of
|
|
|
+ * the available objects should be scanned. Page reclaim for example
|
|
|
+ * passes the number of pages scanned and the number of pages on the
|
|
|
+ * LRU lists that it considered on @nid, plus a bias in @nr_scanned
|
|
|
+ * when it encountered mapped pages. The ratio is further biased by
|
|
|
+ * the ->seeks setting of the shrink function, which indicates the
|
|
|
+ * cost to recreate an object relative to that of an LRU page.
|
|
|
*
|
|
|
- * Returns the number of slab objects which we shrunk.
|
|
|
+ * Returns the number of reclaimed slab objects.
|
|
|
*/
|
|
|
-unsigned long shrink_slab(struct shrink_control *shrinkctl,
|
|
|
- unsigned long nr_pages_scanned,
|
|
|
- unsigned long lru_pages)
|
|
|
+unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
|
|
|
+ unsigned long nr_scanned,
|
|
|
+ unsigned long nr_eligible)
|
|
|
{
|
|
|
struct shrinker *shrinker;
|
|
|
unsigned long freed = 0;
|
|
|
|
|
|
- if (nr_pages_scanned == 0)
|
|
|
- nr_pages_scanned = SWAP_CLUSTER_MAX;
|
|
|
+ if (nr_scanned == 0)
|
|
|
+ nr_scanned = SWAP_CLUSTER_MAX;
|
|
|
|
|
|
if (!down_read_trylock(&shrinker_rwsem)) {
|
|
|
/*
|
|
@@ -380,20 +384,17 @@ unsigned long shrink_slab(struct shrink_control *shrinkctl,
|
|
|
}
|
|
|
|
|
|
list_for_each_entry(shrinker, &shrinker_list, list) {
|
|
|
- if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) {
|
|
|
- shrinkctl->nid = 0;
|
|
|
- freed += shrink_slab_node(shrinkctl, shrinker,
|
|
|
- nr_pages_scanned, lru_pages);
|
|
|
- continue;
|
|
|
- }
|
|
|
+ struct shrink_control sc = {
|
|
|
+ .gfp_mask = gfp_mask,
|
|
|
+ .nid = nid,
|
|
|
+ };
|
|
|
|
|
|
- for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) {
|
|
|
- if (node_online(shrinkctl->nid))
|
|
|
- freed += shrink_slab_node(shrinkctl, shrinker,
|
|
|
- nr_pages_scanned, lru_pages);
|
|
|
+ if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
|
|
|
+ sc.nid = 0;
|
|
|
|
|
|
- }
|
|
|
+ freed += shrink_slabs(&sc, shrinker, nr_scanned, nr_eligible);
|
|
|
}
|
|
|
+
|
|
|
up_read(&shrinker_rwsem);
|
|
|
out:
|
|
|
cond_resched();
|
|
@@ -1876,7 +1877,8 @@ enum scan_balance {
|
|
|
* nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
|
|
|
*/
|
|
|
static void get_scan_count(struct lruvec *lruvec, int swappiness,
|
|
|
- struct scan_control *sc, unsigned long *nr)
|
|
|
+ struct scan_control *sc, unsigned long *nr,
|
|
|
+ unsigned long *lru_pages)
|
|
|
{
|
|
|
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
|
|
|
u64 fraction[2];
|
|
@@ -2022,6 +2024,7 @@ out:
|
|
|
some_scanned = false;
|
|
|
/* Only use force_scan on second pass. */
|
|
|
for (pass = 0; !some_scanned && pass < 2; pass++) {
|
|
|
+ *lru_pages = 0;
|
|
|
for_each_evictable_lru(lru) {
|
|
|
int file = is_file_lru(lru);
|
|
|
unsigned long size;
|
|
@@ -2048,14 +2051,19 @@ out:
|
|
|
case SCAN_FILE:
|
|
|
case SCAN_ANON:
|
|
|
/* Scan one type exclusively */
|
|
|
- if ((scan_balance == SCAN_FILE) != file)
|
|
|
+ if ((scan_balance == SCAN_FILE) != file) {
|
|
|
+ size = 0;
|
|
|
scan = 0;
|
|
|
+ }
|
|
|
break;
|
|
|
default:
|
|
|
/* Look ma, no brain */
|
|
|
BUG();
|
|
|
}
|
|
|
+
|
|
|
+ *lru_pages += size;
|
|
|
nr[lru] = scan;
|
|
|
+
|
|
|
/*
|
|
|
* Skip the second pass and don't force_scan,
|
|
|
* if we found something to scan.
|
|
@@ -2069,7 +2077,7 @@ out:
|
|
|
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
|
|
|
*/
|
|
|
static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
|
|
|
- struct scan_control *sc)
|
|
|
+ struct scan_control *sc, unsigned long *lru_pages)
|
|
|
{
|
|
|
unsigned long nr[NR_LRU_LISTS];
|
|
|
unsigned long targets[NR_LRU_LISTS];
|
|
@@ -2080,7 +2088,7 @@ static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
|
|
|
struct blk_plug plug;
|
|
|
bool scan_adjusted;
|
|
|
|
|
|
- get_scan_count(lruvec, swappiness, sc, nr);
|
|
|
+ get_scan_count(lruvec, swappiness, sc, nr, lru_pages);
|
|
|
|
|
|
/* Record the original scan target for proportional adjustments later */
|
|
|
memcpy(targets, nr, sizeof(nr));
|
|
@@ -2258,7 +2266,8 @@ static inline bool should_continue_reclaim(struct zone *zone,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-static bool shrink_zone(struct zone *zone, struct scan_control *sc)
|
|
|
+static bool shrink_zone(struct zone *zone, struct scan_control *sc,
|
|
|
+ bool is_classzone)
|
|
|
{
|
|
|
unsigned long nr_reclaimed, nr_scanned;
|
|
|
bool reclaimable = false;
|
|
@@ -2269,6 +2278,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc)
|
|
|
.zone = zone,
|
|
|
.priority = sc->priority,
|
|
|
};
|
|
|
+ unsigned long zone_lru_pages = 0;
|
|
|
struct mem_cgroup *memcg;
|
|
|
|
|
|
nr_reclaimed = sc->nr_reclaimed;
|
|
@@ -2276,13 +2286,15 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc)
|
|
|
|
|
|
memcg = mem_cgroup_iter(root, NULL, &reclaim);
|
|
|
do {
|
|
|
+ unsigned long lru_pages;
|
|
|
struct lruvec *lruvec;
|
|
|
int swappiness;
|
|
|
|
|
|
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
|
|
|
swappiness = mem_cgroup_swappiness(memcg);
|
|
|
|
|
|
- shrink_lruvec(lruvec, swappiness, sc);
|
|
|
+ shrink_lruvec(lruvec, swappiness, sc, &lru_pages);
|
|
|
+ zone_lru_pages += lru_pages;
|
|
|
|
|
|
/*
|
|
|
* Direct reclaim and kswapd have to scan all memory
|
|
@@ -2302,6 +2314,25 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc)
|
|
|
memcg = mem_cgroup_iter(root, memcg, &reclaim);
|
|
|
} while (memcg);
|
|
|
|
|
|
+ /*
|
|
|
+ * Shrink the slab caches in the same proportion that
|
|
|
+ * the eligible LRU pages were scanned.
|
|
|
+ */
|
|
|
+ if (global_reclaim(sc) && is_classzone) {
|
|
|
+ struct reclaim_state *reclaim_state;
|
|
|
+
|
|
|
+ shrink_node_slabs(sc->gfp_mask, zone_to_nid(zone),
|
|
|
+ sc->nr_scanned - nr_scanned,
|
|
|
+ zone_lru_pages);
|
|
|
+
|
|
|
+ reclaim_state = current->reclaim_state;
|
|
|
+ if (reclaim_state) {
|
|
|
+ sc->nr_reclaimed +=
|
|
|
+ reclaim_state->reclaimed_slab;
|
|
|
+ reclaim_state->reclaimed_slab = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
|
|
|
sc->nr_scanned - nr_scanned,
|
|
|
sc->nr_reclaimed - nr_reclaimed);
|
|
@@ -2376,12 +2407,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
|
|
|
struct zone *zone;
|
|
|
unsigned long nr_soft_reclaimed;
|
|
|
unsigned long nr_soft_scanned;
|
|
|
- unsigned long lru_pages = 0;
|
|
|
- struct reclaim_state *reclaim_state = current->reclaim_state;
|
|
|
gfp_t orig_mask;
|
|
|
- struct shrink_control shrink = {
|
|
|
- .gfp_mask = sc->gfp_mask,
|
|
|
- };
|
|
|
enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
|
|
|
bool reclaimable = false;
|
|
|
|
|
@@ -2394,12 +2420,18 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
|
|
|
if (buffer_heads_over_limit)
|
|
|
sc->gfp_mask |= __GFP_HIGHMEM;
|
|
|
|
|
|
- nodes_clear(shrink.nodes_to_scan);
|
|
|
-
|
|
|
for_each_zone_zonelist_nodemask(zone, z, zonelist,
|
|
|
- gfp_zone(sc->gfp_mask), sc->nodemask) {
|
|
|
+ requested_highidx, sc->nodemask) {
|
|
|
+ enum zone_type classzone_idx;
|
|
|
+
|
|
|
if (!populated_zone(zone))
|
|
|
continue;
|
|
|
+
|
|
|
+ classzone_idx = requested_highidx;
|
|
|
+ while (!populated_zone(zone->zone_pgdat->node_zones +
|
|
|
+ classzone_idx))
|
|
|
+ classzone_idx--;
|
|
|
+
|
|
|
/*
|
|
|
* Take care memory controller reclaiming has small influence
|
|
|
* to global LRU.
|
|
@@ -2409,9 +2441,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
|
|
|
GFP_KERNEL | __GFP_HARDWALL))
|
|
|
continue;
|
|
|
|
|
|
- lru_pages += zone_reclaimable_pages(zone);
|
|
|
- node_set(zone_to_nid(zone), shrink.nodes_to_scan);
|
|
|
-
|
|
|
if (sc->priority != DEF_PRIORITY &&
|
|
|
!zone_reclaimable(zone))
|
|
|
continue; /* Let kswapd poll it */
|
|
@@ -2450,7 +2479,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
|
|
|
/* need some check for avoid more shrink_zone() */
|
|
|
}
|
|
|
|
|
|
- if (shrink_zone(zone, sc))
|
|
|
+ if (shrink_zone(zone, sc, zone_idx(zone) == classzone_idx))
|
|
|
reclaimable = true;
|
|
|
|
|
|
if (global_reclaim(sc) &&
|
|
@@ -2458,20 +2487,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
|
|
|
reclaimable = true;
|
|
|
}
|
|
|
|
|
|
- /*
|
|
|
- * Don't shrink slabs when reclaiming memory from over limit cgroups
|
|
|
- * but do shrink slab at least once when aborting reclaim for
|
|
|
- * compaction to avoid unevenly scanning file/anon LRU pages over slab
|
|
|
- * pages.
|
|
|
- */
|
|
|
- if (global_reclaim(sc)) {
|
|
|
- shrink_slab(&shrink, sc->nr_scanned, lru_pages);
|
|
|
- if (reclaim_state) {
|
|
|
- sc->nr_reclaimed += reclaim_state->reclaimed_slab;
|
|
|
- reclaim_state->reclaimed_slab = 0;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
/*
|
|
|
* Restore to original mask to avoid the impact on the caller if we
|
|
|
* promoted it to __GFP_HIGHMEM.
|
|
@@ -2736,6 +2751,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
|
|
|
};
|
|
|
struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
|
|
|
int swappiness = mem_cgroup_swappiness(memcg);
|
|
|
+ unsigned long lru_pages;
|
|
|
|
|
|
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
|
|
|
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
|
|
@@ -2751,7 +2767,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
|
|
|
* will pick up pages from other mem cgroup's as well. We hack
|
|
|
* the priority and make it zero.
|
|
|
*/
|
|
|
- shrink_lruvec(lruvec, swappiness, &sc);
|
|
|
+ shrink_lruvec(lruvec, swappiness, &sc, &lru_pages);
|
|
|
|
|
|
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
|
|
|
|
|
@@ -2932,15 +2948,10 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
|
|
|
static bool kswapd_shrink_zone(struct zone *zone,
|
|
|
int classzone_idx,
|
|
|
struct scan_control *sc,
|
|
|
- unsigned long lru_pages,
|
|
|
unsigned long *nr_attempted)
|
|
|
{
|
|
|
int testorder = sc->order;
|
|
|
unsigned long balance_gap;
|
|
|
- struct reclaim_state *reclaim_state = current->reclaim_state;
|
|
|
- struct shrink_control shrink = {
|
|
|
- .gfp_mask = sc->gfp_mask,
|
|
|
- };
|
|
|
bool lowmem_pressure;
|
|
|
|
|
|
/* Reclaim above the high watermark. */
|
|
@@ -2975,13 +2986,7 @@ static bool kswapd_shrink_zone(struct zone *zone,
|
|
|
balance_gap, classzone_idx))
|
|
|
return true;
|
|
|
|
|
|
- shrink_zone(zone, sc);
|
|
|
- nodes_clear(shrink.nodes_to_scan);
|
|
|
- node_set(zone_to_nid(zone), shrink.nodes_to_scan);
|
|
|
-
|
|
|
- reclaim_state->reclaimed_slab = 0;
|
|
|
- shrink_slab(&shrink, sc->nr_scanned, lru_pages);
|
|
|
- sc->nr_reclaimed += reclaim_state->reclaimed_slab;
|
|
|
+ shrink_zone(zone, sc, zone_idx(zone) == classzone_idx);
|
|
|
|
|
|
/* Account for the number of pages attempted to reclaim */
|
|
|
*nr_attempted += sc->nr_to_reclaim;
|
|
@@ -3042,7 +3047,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
|
|
count_vm_event(PAGEOUTRUN);
|
|
|
|
|
|
do {
|
|
|
- unsigned long lru_pages = 0;
|
|
|
unsigned long nr_attempted = 0;
|
|
|
bool raise_priority = true;
|
|
|
bool pgdat_needs_compaction = (order > 0);
|
|
@@ -3102,8 +3106,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
|
|
if (!populated_zone(zone))
|
|
|
continue;
|
|
|
|
|
|
- lru_pages += zone_reclaimable_pages(zone);
|
|
|
-
|
|
|
/*
|
|
|
* If any zone is currently balanced then kswapd will
|
|
|
* not call compaction as it is expected that the
|
|
@@ -3159,8 +3161,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
|
|
* that that high watermark would be met at 100%
|
|
|
* efficiency.
|
|
|
*/
|
|
|
- if (kswapd_shrink_zone(zone, end_zone, &sc,
|
|
|
- lru_pages, &nr_attempted))
|
|
|
+ if (kswapd_shrink_zone(zone, end_zone,
|
|
|
+ &sc, &nr_attempted))
|
|
|
raise_priority = false;
|
|
|
}
|
|
|
|
|
@@ -3612,10 +3614,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
|
|
|
.may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
|
|
|
.may_swap = 1,
|
|
|
};
|
|
|
- struct shrink_control shrink = {
|
|
|
- .gfp_mask = sc.gfp_mask,
|
|
|
- };
|
|
|
- unsigned long nr_slab_pages0, nr_slab_pages1;
|
|
|
|
|
|
cond_resched();
|
|
|
/*
|
|
@@ -3634,44 +3632,10 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
|
|
|
* priorities until we have enough memory freed.
|
|
|
*/
|
|
|
do {
|
|
|
- shrink_zone(zone, &sc);
|
|
|
+ shrink_zone(zone, &sc, true);
|
|
|
} while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0);
|
|
|
}
|
|
|
|
|
|
- nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
|
|
|
- if (nr_slab_pages0 > zone->min_slab_pages) {
|
|
|
- /*
|
|
|
- * shrink_slab() does not currently allow us to determine how
|
|
|
- * many pages were freed in this zone. So we take the current
|
|
|
- * number of slab pages and shake the slab until it is reduced
|
|
|
- * by the same nr_pages that we used for reclaiming unmapped
|
|
|
- * pages.
|
|
|
- */
|
|
|
- nodes_clear(shrink.nodes_to_scan);
|
|
|
- node_set(zone_to_nid(zone), shrink.nodes_to_scan);
|
|
|
- for (;;) {
|
|
|
- unsigned long lru_pages = zone_reclaimable_pages(zone);
|
|
|
-
|
|
|
- /* No reclaimable slab or very low memory pressure */
|
|
|
- if (!shrink_slab(&shrink, sc.nr_scanned, lru_pages))
|
|
|
- break;
|
|
|
-
|
|
|
- /* Freed enough memory */
|
|
|
- nr_slab_pages1 = zone_page_state(zone,
|
|
|
- NR_SLAB_RECLAIMABLE);
|
|
|
- if (nr_slab_pages1 + nr_pages <= nr_slab_pages0)
|
|
|
- break;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * Update nr_reclaimed by the number of slab pages we
|
|
|
- * reclaimed from this zone.
|
|
|
- */
|
|
|
- nr_slab_pages1 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
|
|
|
- if (nr_slab_pages1 < nr_slab_pages0)
|
|
|
- sc.nr_reclaimed += nr_slab_pages0 - nr_slab_pages1;
|
|
|
- }
|
|
|
-
|
|
|
p->reclaim_state = NULL;
|
|
|
current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
|
|
|
lockdep_clear_current_reclaim_state();
|