|
@@ -153,7 +153,8 @@
|
|
|
*/
|
|
|
|
|
|
#define EVICTION_SHIFT (RADIX_TREE_EXCEPTIONAL_ENTRY + \
|
|
|
- ZONES_SHIFT + NODES_SHIFT)
|
|
|
+ ZONES_SHIFT + NODES_SHIFT + \
|
|
|
+ MEM_CGROUP_ID_SHIFT)
|
|
|
#define EVICTION_MASK (~0UL >> EVICTION_SHIFT)
|
|
|
|
|
|
/*
|
|
@@ -166,9 +167,10 @@
|
|
|
*/
|
|
|
static unsigned int bucket_order __read_mostly;
|
|
|
|
|
|
-static void *pack_shadow(unsigned long eviction, struct zone *zone)
|
|
|
+static void *pack_shadow(int memcgid, struct zone *zone, unsigned long eviction)
|
|
|
{
|
|
|
eviction >>= bucket_order;
|
|
|
+ eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
|
|
|
eviction = (eviction << NODES_SHIFT) | zone_to_nid(zone);
|
|
|
eviction = (eviction << ZONES_SHIFT) | zone_idx(zone);
|
|
|
eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT);
|
|
@@ -176,18 +178,21 @@ static void *pack_shadow(unsigned long eviction, struct zone *zone)
|
|
|
return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY);
|
|
|
}
|
|
|
|
|
|
-static void unpack_shadow(void *shadow, struct zone **zonep,
|
|
|
+static void unpack_shadow(void *shadow, int *memcgidp, struct zone **zonep,
|
|
|
unsigned long *evictionp)
|
|
|
{
|
|
|
unsigned long entry = (unsigned long)shadow;
|
|
|
- int zid, nid;
|
|
|
+ int memcgid, nid, zid;
|
|
|
|
|
|
entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT;
|
|
|
zid = entry & ((1UL << ZONES_SHIFT) - 1);
|
|
|
entry >>= ZONES_SHIFT;
|
|
|
nid = entry & ((1UL << NODES_SHIFT) - 1);
|
|
|
entry >>= NODES_SHIFT;
|
|
|
+ memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1);
|
|
|
+ entry >>= MEM_CGROUP_ID_SHIFT;
|
|
|
|
|
|
+ *memcgidp = memcgid;
|
|
|
*zonep = NODE_DATA(nid)->node_zones + zid;
|
|
|
*evictionp = entry << bucket_order;
|
|
|
}
|
|
@@ -202,11 +207,20 @@ static void unpack_shadow(void *shadow, struct zone **zonep,
|
|
|
*/
|
|
|
void *workingset_eviction(struct address_space *mapping, struct page *page)
|
|
|
{
|
|
|
+ struct mem_cgroup *memcg = page_memcg(page);
|
|
|
struct zone *zone = page_zone(page);
|
|
|
+ int memcgid = mem_cgroup_id(memcg);
|
|
|
unsigned long eviction;
|
|
|
+ struct lruvec *lruvec;
|
|
|
|
|
|
- eviction = atomic_long_inc_return(&zone->inactive_age);
|
|
|
- return pack_shadow(eviction, zone);
|
|
|
+ /* Page is fully exclusive and pins page->mem_cgroup */
|
|
|
+ VM_BUG_ON_PAGE(PageLRU(page), page);
|
|
|
+ VM_BUG_ON_PAGE(page_count(page), page);
|
|
|
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
|
|
|
+
|
|
|
+ lruvec = mem_cgroup_zone_lruvec(zone, memcg);
|
|
|
+ eviction = atomic_long_inc_return(&lruvec->inactive_age);
|
|
|
+ return pack_shadow(memcgid, zone, eviction);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -221,13 +235,42 @@ void *workingset_eviction(struct address_space *mapping, struct page *page)
|
|
|
bool workingset_refault(void *shadow)
|
|
|
{
|
|
|
unsigned long refault_distance;
|
|
|
+ unsigned long active_file;
|
|
|
+ struct mem_cgroup *memcg;
|
|
|
unsigned long eviction;
|
|
|
+ struct lruvec *lruvec;
|
|
|
unsigned long refault;
|
|
|
struct zone *zone;
|
|
|
+ int memcgid;
|
|
|
|
|
|
- unpack_shadow(shadow, &zone, &eviction);
|
|
|
+ unpack_shadow(shadow, &memcgid, &zone, &eviction);
|
|
|
|
|
|
- refault = atomic_long_read(&zone->inactive_age);
|
|
|
+ rcu_read_lock();
|
|
|
+ /*
|
|
|
+ * Look up the memcg associated with the stored ID. It might
|
|
|
+ * have been deleted since the page's eviction.
|
|
|
+ *
|
|
|
+ * Note that in rare events the ID could have been recycled
|
|
|
+ * for a new cgroup that refaults a shared page. This is
|
|
|
+ * impossible to tell from the available data. However, this
|
|
|
+ * should be a rare and limited disturbance, and activations
|
|
|
+ * are always speculative anyway. Ultimately, it's the aging
|
|
|
+ * algorithm's job to shake out the minimum access frequency
|
|
|
+ * for the active cache.
|
|
|
+ *
|
|
|
+ * XXX: On !CONFIG_MEMCG, this will always return NULL; it
|
|
|
+ * would be better if the root_mem_cgroup existed in all
|
|
|
+ * configurations instead.
|
|
|
+ */
|
|
|
+ memcg = mem_cgroup_from_id(memcgid);
|
|
|
+ if (!mem_cgroup_disabled() && !memcg) {
|
|
|
+ rcu_read_unlock();
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ lruvec = mem_cgroup_zone_lruvec(zone, memcg);
|
|
|
+ refault = atomic_long_read(&lruvec->inactive_age);
|
|
|
+ active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE);
|
|
|
+ rcu_read_unlock();
|
|
|
|
|
|
/*
|
|
|
* The unsigned subtraction here gives an accurate distance
|
|
@@ -249,7 +292,7 @@ bool workingset_refault(void *shadow)
|
|
|
|
|
|
inc_zone_state(zone, WORKINGSET_REFAULT);
|
|
|
|
|
|
- if (refault_distance <= zone_page_state(zone, NR_ACTIVE_FILE)) {
|
|
|
+ if (refault_distance <= active_file) {
|
|
|
inc_zone_state(zone, WORKINGSET_ACTIVATE);
|
|
|
return true;
|
|
|
}
|
|
@@ -262,7 +305,23 @@ bool workingset_refault(void *shadow)
|
|
|
*/
|
|
|
void workingset_activation(struct page *page)
|
|
|
{
|
|
|
- atomic_long_inc(&page_zone(page)->inactive_age);
|
|
|
+ struct mem_cgroup *memcg;
|
|
|
+ struct lruvec *lruvec;
|
|
|
+
|
|
|
+ memcg = lock_page_memcg(page);
|
|
|
+ /*
|
|
|
+ * Filter non-memcg pages here, e.g. unmap can call
|
|
|
+ * mark_page_accessed() on VDSO pages.
|
|
|
+ *
|
|
|
+ * XXX: See workingset_refault() - this should return
|
|
|
+ * root_mem_cgroup even for !CONFIG_MEMCG.
|
|
|
+ */
|
|
|
+ if (!mem_cgroup_disabled() && !memcg)
|
|
|
+ goto out;
|
|
|
+ lruvec = mem_cgroup_zone_lruvec(page_zone(page), memcg);
|
|
|
+ atomic_long_inc(&lruvec->inactive_age);
|
|
|
+out:
|
|
|
+ unlock_page_memcg(memcg);
|
|
|
}
|
|
|
|
|
|
/*
|