|
@@ -1615,6 +1615,101 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
|
|
|
return -1;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Reserve a pageblock for exclusive use of high-order atomic allocations if
|
|
|
+ * there are no empty page blocks that contain a page with a suitable order
|
|
|
+ */
|
|
|
+static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
|
|
|
+ unsigned int alloc_order)
|
|
|
+{
|
|
|
+ int mt;
|
|
|
+ unsigned long max_managed, flags;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Limit the number reserved to 1 pageblock or roughly 1% of a zone.
|
|
|
+ * Check is race-prone but harmless.
|
|
|
+ */
|
|
|
+ max_managed = (zone->managed_pages / 100) + pageblock_nr_pages;
|
|
|
+ if (zone->nr_reserved_highatomic >= max_managed)
|
|
|
+ return;
|
|
|
+
|
|
|
+ spin_lock_irqsave(&zone->lock, flags);
|
|
|
+
|
|
|
+ /* Recheck the nr_reserved_highatomic limit under the lock */
|
|
|
+ if (zone->nr_reserved_highatomic >= max_managed)
|
|
|
+ goto out_unlock;
|
|
|
+
|
|
|
+ /* Yoink! */
|
|
|
+ mt = get_pageblock_migratetype(page);
|
|
|
+ if (mt != MIGRATE_HIGHATOMIC &&
|
|
|
+ !is_migrate_isolate(mt) && !is_migrate_cma(mt)) {
|
|
|
+ zone->nr_reserved_highatomic += pageblock_nr_pages;
|
|
|
+ set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
|
|
|
+ move_freepages_block(zone, page, MIGRATE_HIGHATOMIC);
|
|
|
+ }
|
|
|
+
|
|
|
+out_unlock:
|
|
|
+ spin_unlock_irqrestore(&zone->lock, flags);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Used when an allocation is about to fail under memory pressure. This
|
|
|
+ * potentially hurts the reliability of high-order allocations when under
|
|
|
+ * intense memory pressure but failed atomic allocations should be easier
|
|
|
+ * to recover from than an OOM.
|
|
|
+ */
|
|
|
+static void unreserve_highatomic_pageblock(const struct alloc_context *ac)
|
|
|
+{
|
|
|
+ struct zonelist *zonelist = ac->zonelist;
|
|
|
+ unsigned long flags;
|
|
|
+ struct zoneref *z;
|
|
|
+ struct zone *zone;
|
|
|
+ struct page *page;
|
|
|
+ int order;
|
|
|
+
|
|
|
+ for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->high_zoneidx,
|
|
|
+ ac->nodemask) {
|
|
|
+ /* Preserve at least one pageblock */
|
|
|
+ if (zone->nr_reserved_highatomic <= pageblock_nr_pages)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ spin_lock_irqsave(&zone->lock, flags);
|
|
|
+ for (order = 0; order < MAX_ORDER; order++) {
|
|
|
+ struct free_area *area = &(zone->free_area[order]);
|
|
|
+
|
|
|
+ if (list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ page = list_entry(area->free_list[MIGRATE_HIGHATOMIC].next,
|
|
|
+ struct page, lru);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * It should never happen but changes to locking could
|
|
|
+ * inadvertently allow a per-cpu drain to add pages
|
|
|
+ * to MIGRATE_HIGHATOMIC while unreserving so be safe
|
|
|
+ * and watch for underflows.
|
|
|
+ */
|
|
|
+ zone->nr_reserved_highatomic -= min(pageblock_nr_pages,
|
|
|
+ zone->nr_reserved_highatomic);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Convert to ac->migratetype and avoid the normal
|
|
|
+ * pageblock stealing heuristics. Minimally, the caller
|
|
|
+ * is doing the work and needs the pages. More
|
|
|
+ * importantly, if the block was always converted to
|
|
|
+ * MIGRATE_UNMOVABLE or another type then the number
|
|
|
+ * of pageblocks that cannot be completely freed
|
|
|
+ * may increase.
|
|
|
+ */
|
|
|
+ set_pageblock_migratetype(page, ac->migratetype);
|
|
|
+ move_freepages_block(zone, page, ac->migratetype);
|
|
|
+ spin_unlock_irqrestore(&zone->lock, flags);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ spin_unlock_irqrestore(&zone->lock, flags);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/* Remove an element from the buddy allocator from the fallback list */
|
|
|
static inline struct page *
|
|
|
__rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
|
|
@@ -1670,7 +1765,7 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
|
|
|
* Call me with the zone->lock already held.
|
|
|
*/
|
|
|
static struct page *__rmqueue(struct zone *zone, unsigned int order,
|
|
|
- int migratetype)
|
|
|
+ int migratetype, gfp_t gfp_flags)
|
|
|
{
|
|
|
struct page *page;
|
|
|
|
|
@@ -1700,7 +1795,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
|
|
|
|
|
|
spin_lock(&zone->lock);
|
|
|
for (i = 0; i < count; ++i) {
|
|
|
- struct page *page = __rmqueue(zone, order, migratetype);
|
|
|
+ struct page *page = __rmqueue(zone, order, migratetype, 0);
|
|
|
if (unlikely(page == NULL))
|
|
|
break;
|
|
|
|
|
@@ -2072,7 +2167,7 @@ int split_free_page(struct page *page)
|
|
|
static inline
|
|
|
struct page *buffered_rmqueue(struct zone *preferred_zone,
|
|
|
struct zone *zone, unsigned int order,
|
|
|
- gfp_t gfp_flags, int migratetype)
|
|
|
+ gfp_t gfp_flags, int alloc_flags, int migratetype)
|
|
|
{
|
|
|
unsigned long flags;
|
|
|
struct page *page;
|
|
@@ -2115,7 +2210,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
|
|
|
WARN_ON_ONCE(order > 1);
|
|
|
}
|
|
|
spin_lock_irqsave(&zone->lock, flags);
|
|
|
- page = __rmqueue(zone, order, migratetype);
|
|
|
+
|
|
|
+ page = NULL;
|
|
|
+ if (alloc_flags & ALLOC_HARDER) {
|
|
|
+ page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
|
|
|
+ if (page)
|
|
|
+ trace_mm_page_alloc_zone_locked(page, order, migratetype);
|
|
|
+ }
|
|
|
+ if (!page)
|
|
|
+ page = __rmqueue(zone, order, migratetype, gfp_flags);
|
|
|
spin_unlock(&zone->lock);
|
|
|
if (!page)
|
|
|
goto failed;
|
|
@@ -2226,15 +2329,24 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
|
|
|
unsigned long mark, int classzone_idx, int alloc_flags,
|
|
|
long free_pages)
|
|
|
{
|
|
|
- /* free_pages may go negative - that's OK */
|
|
|
long min = mark;
|
|
|
int o;
|
|
|
long free_cma = 0;
|
|
|
|
|
|
+ /* free_pages may go negative - that's OK */
|
|
|
free_pages -= (1 << order) - 1;
|
|
|
+
|
|
|
if (alloc_flags & ALLOC_HIGH)
|
|
|
min -= min / 2;
|
|
|
- if (alloc_flags & ALLOC_HARDER)
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If the caller does not have rights to ALLOC_HARDER then subtract
|
|
|
+ * the high-atomic reserves. This will over-estimate the size of the
|
|
|
+ * atomic reserve but it avoids a search.
|
|
|
+ */
|
|
|
+ if (likely(!(alloc_flags & ALLOC_HARDER)))
|
|
|
+ free_pages -= z->nr_reserved_highatomic;
|
|
|
+ else
|
|
|
min -= min / 4;
|
|
|
|
|
|
#ifdef CONFIG_CMA
|
|
@@ -2419,10 +2531,18 @@ zonelist_scan:
|
|
|
|
|
|
try_this_zone:
|
|
|
page = buffered_rmqueue(ac->preferred_zone, zone, order,
|
|
|
- gfp_mask, ac->migratetype);
|
|
|
+ gfp_mask, alloc_flags, ac->migratetype);
|
|
|
if (page) {
|
|
|
if (prep_new_page(page, order, gfp_mask, alloc_flags))
|
|
|
goto try_this_zone;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If this is a high-order atomic allocation then check
|
|
|
+ * if the pageblock should be reserved for the future
|
|
|
+ */
|
|
|
+ if (unlikely(order && (alloc_flags & ALLOC_HARDER)))
|
|
|
+ reserve_highatomic_pageblock(page, zone, order);
|
|
|
+
|
|
|
return page;
|
|
|
}
|
|
|
}
|
|
@@ -2695,9 +2815,11 @@ retry:
|
|
|
|
|
|
/*
|
|
|
* If an allocation failed after direct reclaim, it could be because
|
|
|
- * pages are pinned on the per-cpu lists. Drain them and try again
|
|
|
+ * pages are pinned on the per-cpu lists or in high alloc reserves.
|
|
|
+ * Shrink them them and try again
|
|
|
*/
|
|
|
if (!page && !drained) {
|
|
|
+ unreserve_highatomic_pageblock(ac);
|
|
|
drain_all_pages(NULL);
|
|
|
drained = true;
|
|
|
goto retry;
|