|
@@ -55,6 +55,7 @@
|
|
|
#include <linux/kmemleak.h>
|
|
|
#include <linux/compaction.h>
|
|
|
#include <trace/events/kmem.h>
|
|
|
+#include <trace/events/oom.h>
|
|
|
#include <linux/prefetch.h>
|
|
|
#include <linux/mm_inline.h>
|
|
|
#include <linux/migrate.h>
|
|
@@ -714,7 +715,7 @@ static inline void rmv_page_order(struct page *page)
|
|
|
/*
|
|
|
* This function checks whether a page is free && is the buddy
|
|
|
* we can do coalesce a page and its buddy if
|
|
|
- * (a) the buddy is not in a hole &&
|
|
|
+ * (a) the buddy is not in a hole (check before calling!) &&
|
|
|
* (b) the buddy is in the buddy system &&
|
|
|
* (c) a page and its buddy have the same order &&
|
|
|
* (d) a page and its buddy are in the same zone.
|
|
@@ -729,9 +730,6 @@ static inline void rmv_page_order(struct page *page)
|
|
|
static inline int page_is_buddy(struct page *page, struct page *buddy,
|
|
|
unsigned int order)
|
|
|
{
|
|
|
- if (!pfn_valid_within(page_to_pfn(buddy)))
|
|
|
- return 0;
|
|
|
-
|
|
|
if (page_is_guard(buddy) && page_order(buddy) == order) {
|
|
|
if (page_zone_id(page) != page_zone_id(buddy))
|
|
|
return 0;
|
|
@@ -787,9 +785,8 @@ static inline void __free_one_page(struct page *page,
|
|
|
struct zone *zone, unsigned int order,
|
|
|
int migratetype)
|
|
|
{
|
|
|
- unsigned long page_idx;
|
|
|
- unsigned long combined_idx;
|
|
|
- unsigned long uninitialized_var(buddy_idx);
|
|
|
+ unsigned long combined_pfn;
|
|
|
+ unsigned long uninitialized_var(buddy_pfn);
|
|
|
struct page *buddy;
|
|
|
unsigned int max_order;
|
|
|
|
|
@@ -802,15 +799,16 @@ static inline void __free_one_page(struct page *page,
|
|
|
if (likely(!is_migrate_isolate(migratetype)))
|
|
|
__mod_zone_freepage_state(zone, 1 << order, migratetype);
|
|
|
|
|
|
- page_idx = pfn & ((1 << MAX_ORDER) - 1);
|
|
|
-
|
|
|
- VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
|
|
|
+ VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page);
|
|
|
VM_BUG_ON_PAGE(bad_range(zone, page), page);
|
|
|
|
|
|
continue_merging:
|
|
|
while (order < max_order - 1) {
|
|
|
- buddy_idx = __find_buddy_index(page_idx, order);
|
|
|
- buddy = page + (buddy_idx - page_idx);
|
|
|
+ buddy_pfn = __find_buddy_pfn(pfn, order);
|
|
|
+ buddy = page + (buddy_pfn - pfn);
|
|
|
+
|
|
|
+ if (!pfn_valid_within(buddy_pfn))
|
|
|
+ goto done_merging;
|
|
|
if (!page_is_buddy(page, buddy, order))
|
|
|
goto done_merging;
|
|
|
/*
|
|
@@ -824,9 +822,9 @@ continue_merging:
|
|
|
zone->free_area[order].nr_free--;
|
|
|
rmv_page_order(buddy);
|
|
|
}
|
|
|
- combined_idx = buddy_idx & page_idx;
|
|
|
- page = page + (combined_idx - page_idx);
|
|
|
- page_idx = combined_idx;
|
|
|
+ combined_pfn = buddy_pfn & pfn;
|
|
|
+ page = page + (combined_pfn - pfn);
|
|
|
+ pfn = combined_pfn;
|
|
|
order++;
|
|
|
}
|
|
|
if (max_order < MAX_ORDER) {
|
|
@@ -841,8 +839,8 @@ continue_merging:
|
|
|
if (unlikely(has_isolate_pageblock(zone))) {
|
|
|
int buddy_mt;
|
|
|
|
|
|
- buddy_idx = __find_buddy_index(page_idx, order);
|
|
|
- buddy = page + (buddy_idx - page_idx);
|
|
|
+ buddy_pfn = __find_buddy_pfn(pfn, order);
|
|
|
+ buddy = page + (buddy_pfn - pfn);
|
|
|
buddy_mt = get_pageblock_migratetype(buddy);
|
|
|
|
|
|
if (migratetype != buddy_mt
|
|
@@ -865,12 +863,12 @@ done_merging:
|
|
|
* so it's less likely to be used soon and more likely to be merged
|
|
|
* as a higher order page
|
|
|
*/
|
|
|
- if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) {
|
|
|
+ if ((order < MAX_ORDER-2) && pfn_valid_within(buddy_pfn)) {
|
|
|
struct page *higher_page, *higher_buddy;
|
|
|
- combined_idx = buddy_idx & page_idx;
|
|
|
- higher_page = page + (combined_idx - page_idx);
|
|
|
- buddy_idx = __find_buddy_index(combined_idx, order + 1);
|
|
|
- higher_buddy = higher_page + (buddy_idx - combined_idx);
|
|
|
+ combined_pfn = buddy_pfn & pfn;
|
|
|
+ higher_page = page + (combined_pfn - pfn);
|
|
|
+ buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1);
|
|
|
+ higher_buddy = higher_page + (buddy_pfn - combined_pfn);
|
|
|
if (page_is_buddy(higher_page, higher_buddy, order + 1)) {
|
|
|
list_add_tail(&page->lru,
|
|
|
&zone->free_area[order].free_list[migratetype]);
|
|
@@ -3007,18 +3005,12 @@ static inline bool should_suppress_show_mem(void)
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-static DEFINE_RATELIMIT_STATE(nopage_rs,
|
|
|
- DEFAULT_RATELIMIT_INTERVAL,
|
|
|
- DEFAULT_RATELIMIT_BURST);
|
|
|
-
|
|
|
-void warn_alloc(gfp_t gfp_mask, const char *fmt, ...)
|
|
|
+static void warn_alloc_show_mem(gfp_t gfp_mask, nodemask_t *nodemask)
|
|
|
{
|
|
|
unsigned int filter = SHOW_MEM_FILTER_NODES;
|
|
|
- struct va_format vaf;
|
|
|
- va_list args;
|
|
|
+ static DEFINE_RATELIMIT_STATE(show_mem_rs, HZ, 1);
|
|
|
|
|
|
- if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) ||
|
|
|
- debug_guardpage_minorder() > 0)
|
|
|
+ if (should_suppress_show_mem() || !__ratelimit(&show_mem_rs))
|
|
|
return;
|
|
|
|
|
|
/*
|
|
@@ -3033,6 +3025,20 @@ void warn_alloc(gfp_t gfp_mask, const char *fmt, ...)
|
|
|
if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
|
|
|
filter &= ~SHOW_MEM_FILTER_NODES;
|
|
|
|
|
|
+ show_mem(filter, nodemask);
|
|
|
+}
|
|
|
+
|
|
|
+void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
|
|
|
+{
|
|
|
+ struct va_format vaf;
|
|
|
+ va_list args;
|
|
|
+ static DEFINE_RATELIMIT_STATE(nopage_rs, DEFAULT_RATELIMIT_INTERVAL,
|
|
|
+ DEFAULT_RATELIMIT_BURST);
|
|
|
+
|
|
|
+ if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) ||
|
|
|
+ debug_guardpage_minorder() > 0)
|
|
|
+ return;
|
|
|
+
|
|
|
pr_warn("%s: ", current->comm);
|
|
|
|
|
|
va_start(args, fmt);
|
|
@@ -3041,11 +3047,36 @@ void warn_alloc(gfp_t gfp_mask, const char *fmt, ...)
|
|
|
pr_cont("%pV", &vaf);
|
|
|
va_end(args);
|
|
|
|
|
|
- pr_cont(", mode:%#x(%pGg)\n", gfp_mask, &gfp_mask);
|
|
|
+ pr_cont(", mode:%#x(%pGg), nodemask=", gfp_mask, &gfp_mask);
|
|
|
+ if (nodemask)
|
|
|
+ pr_cont("%*pbl\n", nodemask_pr_args(nodemask));
|
|
|
+ else
|
|
|
+ pr_cont("(null)\n");
|
|
|
+
|
|
|
+ cpuset_print_current_mems_allowed();
|
|
|
|
|
|
dump_stack();
|
|
|
- if (!should_suppress_show_mem())
|
|
|
- show_mem(filter);
|
|
|
+ warn_alloc_show_mem(gfp_mask, nodemask);
|
|
|
+}
|
|
|
+
|
|
|
+static inline struct page *
|
|
|
+__alloc_pages_cpuset_fallback(gfp_t gfp_mask, unsigned int order,
|
|
|
+ unsigned int alloc_flags,
|
|
|
+ const struct alloc_context *ac)
|
|
|
+{
|
|
|
+ struct page *page;
|
|
|
+
|
|
|
+ page = get_page_from_freelist(gfp_mask, order,
|
|
|
+ alloc_flags|ALLOC_CPUSET, ac);
|
|
|
+ /*
|
|
|
+ * fallback to ignore cpuset restriction if our nodes
|
|
|
+ * are depleted
|
|
|
+ */
|
|
|
+ if (!page)
|
|
|
+ page = get_page_from_freelist(gfp_mask, order,
|
|
|
+ alloc_flags, ac);
|
|
|
+
|
|
|
+ return page;
|
|
|
}
|
|
|
|
|
|
static inline struct page *
|
|
@@ -3083,47 +3114,42 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
|
|
|
if (page)
|
|
|
goto out;
|
|
|
|
|
|
- if (!(gfp_mask & __GFP_NOFAIL)) {
|
|
|
- /* Coredumps can quickly deplete all memory reserves */
|
|
|
- if (current->flags & PF_DUMPCORE)
|
|
|
- goto out;
|
|
|
- /* The OOM killer will not help higher order allocs */
|
|
|
- if (order > PAGE_ALLOC_COSTLY_ORDER)
|
|
|
- goto out;
|
|
|
- /* The OOM killer does not needlessly kill tasks for lowmem */
|
|
|
- if (ac->high_zoneidx < ZONE_NORMAL)
|
|
|
- goto out;
|
|
|
- if (pm_suspended_storage())
|
|
|
- goto out;
|
|
|
- /*
|
|
|
- * XXX: GFP_NOFS allocations should rather fail than rely on
|
|
|
- * other request to make a forward progress.
|
|
|
- * We are in an unfortunate situation where out_of_memory cannot
|
|
|
- * do much for this context but let's try it to at least get
|
|
|
- * access to memory reserved if the current task is killed (see
|
|
|
- * out_of_memory). Once filesystems are ready to handle allocation
|
|
|
- * failures more gracefully we should just bail out here.
|
|
|
- */
|
|
|
+ /* Coredumps can quickly deplete all memory reserves */
|
|
|
+ if (current->flags & PF_DUMPCORE)
|
|
|
+ goto out;
|
|
|
+ /* The OOM killer will not help higher order allocs */
|
|
|
+ if (order > PAGE_ALLOC_COSTLY_ORDER)
|
|
|
+ goto out;
|
|
|
+ /* The OOM killer does not needlessly kill tasks for lowmem */
|
|
|
+ if (ac->high_zoneidx < ZONE_NORMAL)
|
|
|
+ goto out;
|
|
|
+ if (pm_suspended_storage())
|
|
|
+ goto out;
|
|
|
+ /*
|
|
|
+ * XXX: GFP_NOFS allocations should rather fail than rely on
|
|
|
+ * other request to make a forward progress.
|
|
|
+ * We are in an unfortunate situation where out_of_memory cannot
|
|
|
+ * do much for this context but let's try it to at least get
|
|
|
+ * access to memory reserved if the current task is killed (see
|
|
|
+ * out_of_memory). Once filesystems are ready to handle allocation
|
|
|
+ * failures more gracefully we should just bail out here.
|
|
|
+ */
|
|
|
+
|
|
|
+ /* The OOM killer may not free memory on a specific node */
|
|
|
+ if (gfp_mask & __GFP_THISNODE)
|
|
|
+ goto out;
|
|
|
|
|
|
- /* The OOM killer may not free memory on a specific node */
|
|
|
- if (gfp_mask & __GFP_THISNODE)
|
|
|
- goto out;
|
|
|
- }
|
|
|
/* Exhausted what can be done so it's blamo time */
|
|
|
if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
|
|
|
*did_some_progress = 1;
|
|
|
|
|
|
- if (gfp_mask & __GFP_NOFAIL) {
|
|
|
- page = get_page_from_freelist(gfp_mask, order,
|
|
|
- ALLOC_NO_WATERMARKS|ALLOC_CPUSET, ac);
|
|
|
- /*
|
|
|
- * fallback to ignore cpuset restriction if our nodes
|
|
|
- * are depleted
|
|
|
- */
|
|
|
- if (!page)
|
|
|
- page = get_page_from_freelist(gfp_mask, order,
|
|
|
+ /*
|
|
|
+ * Help non-failing allocations by giving them access to memory
|
|
|
+ * reserves
|
|
|
+ */
|
|
|
+ if (gfp_mask & __GFP_NOFAIL)
|
|
|
+ page = __alloc_pages_cpuset_fallback(gfp_mask, order,
|
|
|
ALLOC_NO_WATERMARKS, ac);
|
|
|
- }
|
|
|
}
|
|
|
out:
|
|
|
mutex_unlock(&oom_lock);
|
|
@@ -3192,6 +3218,9 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
|
|
|
{
|
|
|
int max_retries = MAX_COMPACT_RETRIES;
|
|
|
int min_priority;
|
|
|
+ bool ret = false;
|
|
|
+ int retries = *compaction_retries;
|
|
|
+ enum compact_priority priority = *compact_priority;
|
|
|
|
|
|
if (!order)
|
|
|
return false;
|
|
@@ -3213,8 +3242,10 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
|
|
|
* But do not retry if the given zonelist is not suitable for
|
|
|
* compaction.
|
|
|
*/
|
|
|
- if (compaction_withdrawn(compact_result))
|
|
|
- return compaction_zonelist_suitable(ac, order, alloc_flags);
|
|
|
+ if (compaction_withdrawn(compact_result)) {
|
|
|
+ ret = compaction_zonelist_suitable(ac, order, alloc_flags);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
|
* !costly requests are much more important than __GFP_REPEAT
|
|
@@ -3226,8 +3257,10 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
|
|
|
*/
|
|
|
if (order > PAGE_ALLOC_COSTLY_ORDER)
|
|
|
max_retries /= 4;
|
|
|
- if (*compaction_retries <= max_retries)
|
|
|
- return true;
|
|
|
+ if (*compaction_retries <= max_retries) {
|
|
|
+ ret = true;
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
|
* Make sure there are attempts at the highest priority if we exhausted
|
|
@@ -3236,12 +3269,15 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
|
|
|
check_priority:
|
|
|
min_priority = (order > PAGE_ALLOC_COSTLY_ORDER) ?
|
|
|
MIN_COMPACT_COSTLY_PRIORITY : MIN_COMPACT_PRIORITY;
|
|
|
+
|
|
|
if (*compact_priority > min_priority) {
|
|
|
(*compact_priority)--;
|
|
|
*compaction_retries = 0;
|
|
|
- return true;
|
|
|
+ ret = true;
|
|
|
}
|
|
|
- return false;
|
|
|
+out:
|
|
|
+ trace_compact_retry(order, priority, compact_result, retries, max_retries, ret);
|
|
|
+ return ret;
|
|
|
}
|
|
|
#else
|
|
|
static inline struct page *
|
|
@@ -3464,6 +3500,8 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
|
|
|
ac->nodemask) {
|
|
|
unsigned long available;
|
|
|
unsigned long reclaimable;
|
|
|
+ unsigned long min_wmark = min_wmark_pages(zone);
|
|
|
+ bool wmark;
|
|
|
|
|
|
available = reclaimable = zone_reclaimable_pages(zone);
|
|
|
available -= DIV_ROUND_UP((*no_progress_loops) * available,
|
|
@@ -3474,8 +3512,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
|
|
|
* Would the allocation succeed if we reclaimed the whole
|
|
|
* available?
|
|
|
*/
|
|
|
- if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
|
|
|
- ac_classzone_idx(ac), alloc_flags, available)) {
|
|
|
+ wmark = __zone_watermark_ok(zone, order, min_wmark,
|
|
|
+ ac_classzone_idx(ac), alloc_flags, available);
|
|
|
+ trace_reclaim_retry_zone(z, order, reclaimable,
|
|
|
+ available, min_wmark, *no_progress_loops, wmark);
|
|
|
+ if (wmark) {
|
|
|
/*
|
|
|
* If we didn't make any progress and have a lot of
|
|
|
* dirty + writeback pages then we should wait for
|
|
@@ -3555,6 +3596,14 @@ retry_cpuset:
|
|
|
no_progress_loops = 0;
|
|
|
compact_priority = DEF_COMPACT_PRIORITY;
|
|
|
cpuset_mems_cookie = read_mems_allowed_begin();
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The fast path uses conservative alloc_flags to succeed only until
|
|
|
+ * kswapd needs to be woken up, and to avoid the cost of setting up
|
|
|
+ * alloc_flags precisely. So we do that now.
|
|
|
+ */
|
|
|
+ alloc_flags = gfp_to_alloc_flags(gfp_mask);
|
|
|
+
|
|
|
/*
|
|
|
* We need to recalculate the starting point for the zonelist iterator
|
|
|
* because we might have used different nodemask in the fast path, or
|
|
@@ -3566,14 +3615,6 @@ retry_cpuset:
|
|
|
if (!ac->preferred_zoneref->zone)
|
|
|
goto nopage;
|
|
|
|
|
|
-
|
|
|
- /*
|
|
|
- * The fast path uses conservative alloc_flags to succeed only until
|
|
|
- * kswapd needs to be woken up, and to avoid the cost of setting up
|
|
|
- * alloc_flags precisely. So we do that now.
|
|
|
- */
|
|
|
- alloc_flags = gfp_to_alloc_flags(gfp_mask);
|
|
|
-
|
|
|
if (gfp_mask & __GFP_KSWAPD_RECLAIM)
|
|
|
wake_all_kswapds(order, ac);
|
|
|
|
|
@@ -3650,35 +3691,21 @@ retry:
|
|
|
goto got_pg;
|
|
|
|
|
|
/* Caller is not willing to reclaim, we can't balance anything */
|
|
|
- if (!can_direct_reclaim) {
|
|
|
- /*
|
|
|
- * All existing users of the __GFP_NOFAIL are blockable, so warn
|
|
|
- * of any new users that actually allow this type of allocation
|
|
|
- * to fail.
|
|
|
- */
|
|
|
- WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL);
|
|
|
+ if (!can_direct_reclaim)
|
|
|
goto nopage;
|
|
|
- }
|
|
|
|
|
|
- /* Avoid recursion of direct reclaim */
|
|
|
- if (current->flags & PF_MEMALLOC) {
|
|
|
- /*
|
|
|
- * __GFP_NOFAIL request from this context is rather bizarre
|
|
|
- * because we cannot reclaim anything and only can loop waiting
|
|
|
- * for somebody to do a work for us.
|
|
|
- */
|
|
|
- if (WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
|
|
|
- cond_resched();
|
|
|
- goto retry;
|
|
|
- }
|
|
|
- goto nopage;
|
|
|
+ /* Make sure we know about allocations which stall for too long */
|
|
|
+ if (time_after(jiffies, alloc_start + stall_timeout)) {
|
|
|
+ warn_alloc(gfp_mask, ac->nodemask,
|
|
|
+ "page allocation stalls for %ums, order:%u",
|
|
|
+ jiffies_to_msecs(jiffies-alloc_start), order);
|
|
|
+ stall_timeout += 10 * HZ;
|
|
|
}
|
|
|
|
|
|
- /* Avoid allocations with no watermarks from looping endlessly */
|
|
|
- if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL))
|
|
|
+ /* Avoid recursion of direct reclaim */
|
|
|
+ if (current->flags & PF_MEMALLOC)
|
|
|
goto nopage;
|
|
|
|
|
|
-
|
|
|
/* Try direct reclaim and then allocating */
|
|
|
page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
|
|
|
&did_some_progress);
|
|
@@ -3702,14 +3729,6 @@ retry:
|
|
|
if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT))
|
|
|
goto nopage;
|
|
|
|
|
|
- /* Make sure we know about allocations which stall for too long */
|
|
|
- if (time_after(jiffies, alloc_start + stall_timeout)) {
|
|
|
- warn_alloc(gfp_mask,
|
|
|
- "page allocation stalls for %ums, order:%u",
|
|
|
- jiffies_to_msecs(jiffies-alloc_start), order);
|
|
|
- stall_timeout += 10 * HZ;
|
|
|
- }
|
|
|
-
|
|
|
if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
|
|
|
did_some_progress > 0, &no_progress_loops))
|
|
|
goto retry;
|
|
@@ -3738,6 +3757,10 @@ retry:
|
|
|
if (page)
|
|
|
goto got_pg;
|
|
|
|
|
|
+ /* Avoid allocations with no watermarks from looping endlessly */
|
|
|
+ if (test_thread_flag(TIF_MEMDIE))
|
|
|
+ goto nopage;
|
|
|
+
|
|
|
/* Retry as long as the OOM killer is making progress */
|
|
|
if (did_some_progress) {
|
|
|
no_progress_loops = 0;
|
|
@@ -3755,7 +3778,48 @@ nopage:
|
|
|
if (read_mems_allowed_retry(cpuset_mems_cookie))
|
|
|
goto retry_cpuset;
|
|
|
|
|
|
- warn_alloc(gfp_mask,
|
|
|
+ /*
|
|
|
+ * Make sure that __GFP_NOFAIL request doesn't leak out and make sure
|
|
|
+ * we always retry
|
|
|
+ */
|
|
|
+ if (gfp_mask & __GFP_NOFAIL) {
|
|
|
+ /*
|
|
|
+ * All existing users of the __GFP_NOFAIL are blockable, so warn
|
|
|
+ * of any new users that actually require GFP_NOWAIT
|
|
|
+ */
|
|
|
+ if (WARN_ON_ONCE(!can_direct_reclaim))
|
|
|
+ goto fail;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * PF_MEMALLOC request from this context is rather bizarre
|
|
|
+ * because we cannot reclaim anything and only can loop waiting
|
|
|
+ * for somebody to do a work for us
|
|
|
+ */
|
|
|
+ WARN_ON_ONCE(current->flags & PF_MEMALLOC);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * non failing costly orders are a hard requirement which we
|
|
|
+ * are not prepared for much so let's warn about these users
|
|
|
+ * so that we can identify them and convert them to something
|
|
|
+ * else.
|
|
|
+ */
|
|
|
+ WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Help non-failing allocations by giving them access to memory
|
|
|
+ * reserves but do not use ALLOC_NO_WATERMARKS because this
|
|
|
+ * could deplete whole memory reserves which would just make
|
|
|
+ * the situation worse
|
|
|
+ */
|
|
|
+ page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac);
|
|
|
+ if (page)
|
|
|
+ goto got_pg;
|
|
|
+
|
|
|
+ cond_resched();
|
|
|
+ goto retry;
|
|
|
+ }
|
|
|
+fail:
|
|
|
+ warn_alloc(gfp_mask, ac->nodemask,
|
|
|
"page allocation failure: order:%u", order);
|
|
|
got_pg:
|
|
|
return page;
|
|
@@ -4252,20 +4316,20 @@ void si_meminfo_node(struct sysinfo *val, int nid)
|
|
|
* Determine whether the node should be displayed or not, depending on whether
|
|
|
* SHOW_MEM_FILTER_NODES was passed to show_free_areas().
|
|
|
*/
|
|
|
-bool skip_free_areas_node(unsigned int flags, int nid)
|
|
|
+static bool show_mem_node_skip(unsigned int flags, int nid, nodemask_t *nodemask)
|
|
|
{
|
|
|
- bool ret = false;
|
|
|
- unsigned int cpuset_mems_cookie;
|
|
|
-
|
|
|
if (!(flags & SHOW_MEM_FILTER_NODES))
|
|
|
- goto out;
|
|
|
+ return false;
|
|
|
|
|
|
- do {
|
|
|
- cpuset_mems_cookie = read_mems_allowed_begin();
|
|
|
- ret = !node_isset(nid, cpuset_current_mems_allowed);
|
|
|
- } while (read_mems_allowed_retry(cpuset_mems_cookie));
|
|
|
-out:
|
|
|
- return ret;
|
|
|
+ /*
|
|
|
+ * no node mask - aka implicit memory numa policy. Do not bother with
|
|
|
+ * the synchronization - read_mems_allowed_begin - because we do not
|
|
|
+ * have to be precise here.
|
|
|
+ */
|
|
|
+ if (!nodemask)
|
|
|
+ nodemask = &cpuset_current_mems_allowed;
|
|
|
+
|
|
|
+ return !node_isset(nid, *nodemask);
|
|
|
}
|
|
|
|
|
|
#define K(x) ((x) << (PAGE_SHIFT-10))
|
|
@@ -4306,7 +4370,7 @@ static void show_migration_types(unsigned char type)
|
|
|
* SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's
|
|
|
* cpuset.
|
|
|
*/
|
|
|
-void show_free_areas(unsigned int filter)
|
|
|
+void show_free_areas(unsigned int filter, nodemask_t *nodemask)
|
|
|
{
|
|
|
unsigned long free_pcp = 0;
|
|
|
int cpu;
|
|
@@ -4314,7 +4378,7 @@ void show_free_areas(unsigned int filter)
|
|
|
pg_data_t *pgdat;
|
|
|
|
|
|
for_each_populated_zone(zone) {
|
|
|
- if (skip_free_areas_node(filter, zone_to_nid(zone)))
|
|
|
+ if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
|
|
|
continue;
|
|
|
|
|
|
for_each_online_cpu(cpu)
|
|
@@ -4348,6 +4412,9 @@ void show_free_areas(unsigned int filter)
|
|
|
global_page_state(NR_FREE_CMA_PAGES));
|
|
|
|
|
|
for_each_online_pgdat(pgdat) {
|
|
|
+ if (show_mem_node_skip(filter, pgdat->node_id, nodemask))
|
|
|
+ continue;
|
|
|
+
|
|
|
printk("Node %d"
|
|
|
" active_anon:%lukB"
|
|
|
" inactive_anon:%lukB"
|
|
@@ -4397,7 +4464,7 @@ void show_free_areas(unsigned int filter)
|
|
|
for_each_populated_zone(zone) {
|
|
|
int i;
|
|
|
|
|
|
- if (skip_free_areas_node(filter, zone_to_nid(zone)))
|
|
|
+ if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
|
|
|
continue;
|
|
|
|
|
|
free_pcp = 0;
|
|
@@ -4462,7 +4529,7 @@ void show_free_areas(unsigned int filter)
|
|
|
unsigned long nr[MAX_ORDER], flags, total = 0;
|
|
|
unsigned char types[MAX_ORDER];
|
|
|
|
|
|
- if (skip_free_areas_node(filter, zone_to_nid(zone)))
|
|
|
+ if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
|
|
|
continue;
|
|
|
show_node(zone);
|
|
|
printk(KERN_CONT "%s: ", zone->name);
|
|
@@ -5083,8 +5150,17 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
|
|
|
if (context != MEMMAP_EARLY)
|
|
|
goto not_early;
|
|
|
|
|
|
- if (!early_pfn_valid(pfn))
|
|
|
+ if (!early_pfn_valid(pfn)) {
|
|
|
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
|
|
|
+ /*
|
|
|
+ * Skip to the pfn preceding the next valid one (or
|
|
|
+ * end_pfn), such that we hit a valid pfn (or end_pfn)
|
|
|
+ * on our next iteration of the loop.
|
|
|
+ */
|
|
|
+ pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1;
|
|
|
+#endif
|
|
|
continue;
|
|
|
+ }
|
|
|
if (!early_pfn_in_nid(pfn, nid))
|
|
|
continue;
|
|
|
if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised))
|