|
@@ -34,10 +34,9 @@
|
|
|
#include <linux/hugetlb_cgroup.h>
|
|
|
#include <linux/node.h>
|
|
|
#include <linux/userfaultfd_k.h>
|
|
|
+#include <linux/page_owner.h>
|
|
|
#include "internal.h"
|
|
|
|
|
|
-int hugepages_treat_as_movable;
|
|
|
-
|
|
|
int hugetlb_max_hstate __read_mostly;
|
|
|
unsigned int default_hstate_idx;
|
|
|
struct hstate hstates[HUGE_MAX_HSTATE];
|
|
@@ -926,7 +925,7 @@ retry_cpuset:
|
|
|
/* Movability of hugepages depends on migration support. */
|
|
|
static inline gfp_t htlb_alloc_mask(struct hstate *h)
|
|
|
{
|
|
|
- if (hugepages_treat_as_movable || hugepage_migration_supported(h))
|
|
|
+ if (hugepage_migration_supported(h))
|
|
|
return GFP_HIGHUSER_MOVABLE;
|
|
|
else
|
|
|
return GFP_HIGHUSER;
|
|
@@ -1108,7 +1107,8 @@ static bool zone_spans_last_pfn(const struct zone *zone,
|
|
|
return zone_spans_pfn(zone, last_pfn);
|
|
|
}
|
|
|
|
|
|
-static struct page *alloc_gigantic_page(int nid, struct hstate *h)
|
|
|
+static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
|
|
|
+ int nid, nodemask_t *nodemask)
|
|
|
{
|
|
|
unsigned int order = huge_page_order(h);
|
|
|
unsigned long nr_pages = 1 << order;
|
|
@@ -1116,11 +1116,9 @@ static struct page *alloc_gigantic_page(int nid, struct hstate *h)
|
|
|
struct zonelist *zonelist;
|
|
|
struct zone *zone;
|
|
|
struct zoneref *z;
|
|
|
- gfp_t gfp_mask;
|
|
|
|
|
|
- gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
|
|
|
zonelist = node_zonelist(nid, gfp_mask);
|
|
|
- for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), NULL) {
|
|
|
+ for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nodemask) {
|
|
|
spin_lock_irqsave(&zone->lock, flags);
|
|
|
|
|
|
pfn = ALIGN(zone->zone_start_pfn, nr_pages);
|
|
@@ -1151,41 +1149,13 @@ static struct page *alloc_gigantic_page(int nid, struct hstate *h)
|
|
|
static void prep_new_huge_page(struct hstate *h, struct page *page, int nid);
|
|
|
static void prep_compound_gigantic_page(struct page *page, unsigned int order);
|
|
|
|
|
|
-static struct page *alloc_fresh_gigantic_page_node(struct hstate *h, int nid)
|
|
|
-{
|
|
|
- struct page *page;
|
|
|
-
|
|
|
- page = alloc_gigantic_page(nid, h);
|
|
|
- if (page) {
|
|
|
- prep_compound_gigantic_page(page, huge_page_order(h));
|
|
|
- prep_new_huge_page(h, page, nid);
|
|
|
- }
|
|
|
-
|
|
|
- return page;
|
|
|
-}
|
|
|
-
|
|
|
-static int alloc_fresh_gigantic_page(struct hstate *h,
|
|
|
- nodemask_t *nodes_allowed)
|
|
|
-{
|
|
|
- struct page *page = NULL;
|
|
|
- int nr_nodes, node;
|
|
|
-
|
|
|
- for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
|
|
|
- page = alloc_fresh_gigantic_page_node(h, node);
|
|
|
- if (page)
|
|
|
- return 1;
|
|
|
- }
|
|
|
-
|
|
|
- return 0;
|
|
|
-}
|
|
|
-
|
|
|
#else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */
|
|
|
static inline bool gigantic_page_supported(void) { return false; }
|
|
|
+static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
|
|
|
+ int nid, nodemask_t *nodemask) { return NULL; }
|
|
|
static inline void free_gigantic_page(struct page *page, unsigned int order) { }
|
|
|
static inline void destroy_compound_gigantic_page(struct page *page,
|
|
|
unsigned int order) { }
|
|
|
-static inline int alloc_fresh_gigantic_page(struct hstate *h,
|
|
|
- nodemask_t *nodes_allowed) { return 0; }
|
|
|
#endif
|
|
|
|
|
|
static void update_and_free_page(struct hstate *h, struct page *page)
|
|
@@ -1250,6 +1220,28 @@ static void clear_page_huge_active(struct page *page)
|
|
|
ClearPagePrivate(&page[1]);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Internal hugetlb specific page flag. Do not use outside of the hugetlb
|
|
|
+ * code
|
|
|
+ */
|
|
|
+static inline bool PageHugeTemporary(struct page *page)
|
|
|
+{
|
|
|
+ if (!PageHuge(page))
|
|
|
+ return false;
|
|
|
+
|
|
|
+ return (unsigned long)page[2].mapping == -1U;
|
|
|
+}
|
|
|
+
|
|
|
+static inline void SetPageHugeTemporary(struct page *page)
|
|
|
+{
|
|
|
+ page[2].mapping = (void *)-1U;
|
|
|
+}
|
|
|
+
|
|
|
+static inline void ClearPageHugeTemporary(struct page *page)
|
|
|
+{
|
|
|
+ page[2].mapping = NULL;
|
|
|
+}
|
|
|
+
|
|
|
void free_huge_page(struct page *page)
|
|
|
{
|
|
|
/*
|
|
@@ -1284,7 +1276,11 @@ void free_huge_page(struct page *page)
|
|
|
if (restore_reserve)
|
|
|
h->resv_huge_pages++;
|
|
|
|
|
|
- if (h->surplus_huge_pages_node[nid]) {
|
|
|
+ if (PageHugeTemporary(page)) {
|
|
|
+ list_del(&page->lru);
|
|
|
+ ClearPageHugeTemporary(page);
|
|
|
+ update_and_free_page(h, page);
|
|
|
+ } else if (h->surplus_huge_pages_node[nid]) {
|
|
|
/* remove the page from active list */
|
|
|
list_del(&page->lru);
|
|
|
update_and_free_page(h, page);
|
|
@@ -1306,7 +1302,6 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
|
|
|
h->nr_huge_pages++;
|
|
|
h->nr_huge_pages_node[nid]++;
|
|
|
spin_unlock(&hugetlb_lock);
|
|
|
- put_page(page); /* free it into the hugepage allocator */
|
|
|
}
|
|
|
|
|
|
static void prep_compound_gigantic_page(struct page *page, unsigned int order)
|
|
@@ -1383,41 +1378,70 @@ pgoff_t __basepage_index(struct page *page)
|
|
|
return (index << compound_order(page_head)) + compound_idx;
|
|
|
}
|
|
|
|
|
|
-static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
|
|
|
+static struct page *alloc_buddy_huge_page(struct hstate *h,
|
|
|
+ gfp_t gfp_mask, int nid, nodemask_t *nmask)
|
|
|
{
|
|
|
+ int order = huge_page_order(h);
|
|
|
struct page *page;
|
|
|
|
|
|
- page = __alloc_pages_node(nid,
|
|
|
- htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
|
|
|
- __GFP_RETRY_MAYFAIL|__GFP_NOWARN,
|
|
|
- huge_page_order(h));
|
|
|
- if (page) {
|
|
|
- prep_new_huge_page(h, page, nid);
|
|
|
- }
|
|
|
+ gfp_mask |= __GFP_COMP|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
|
|
|
+ if (nid == NUMA_NO_NODE)
|
|
|
+ nid = numa_mem_id();
|
|
|
+ page = __alloc_pages_nodemask(gfp_mask, order, nid, nmask);
|
|
|
+ if (page)
|
|
|
+ __count_vm_event(HTLB_BUDDY_PGALLOC);
|
|
|
+ else
|
|
|
+ __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
|
|
|
+
|
|
|
+ return page;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Common helper to allocate a fresh hugetlb page. All specific allocators
|
|
|
+ * should use this function to get new hugetlb pages
|
|
|
+ */
|
|
|
+static struct page *alloc_fresh_huge_page(struct hstate *h,
|
|
|
+ gfp_t gfp_mask, int nid, nodemask_t *nmask)
|
|
|
+{
|
|
|
+ struct page *page;
|
|
|
+
|
|
|
+ if (hstate_is_gigantic(h))
|
|
|
+ page = alloc_gigantic_page(h, gfp_mask, nid, nmask);
|
|
|
+ else
|
|
|
+ page = alloc_buddy_huge_page(h, gfp_mask,
|
|
|
+ nid, nmask);
|
|
|
+ if (!page)
|
|
|
+ return NULL;
|
|
|
+
|
|
|
+ if (hstate_is_gigantic(h))
|
|
|
+ prep_compound_gigantic_page(page, huge_page_order(h));
|
|
|
+ prep_new_huge_page(h, page, page_to_nid(page));
|
|
|
|
|
|
return page;
|
|
|
}
|
|
|
|
|
|
-static int alloc_fresh_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
|
|
|
+/*
|
|
|
+ * Allocates a fresh page to the hugetlb allocator pool in the node interleaved
|
|
|
+ * manner.
|
|
|
+ */
|
|
|
+static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
|
|
|
{
|
|
|
struct page *page;
|
|
|
int nr_nodes, node;
|
|
|
- int ret = 0;
|
|
|
+ gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
|
|
|
|
|
|
for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
|
|
|
- page = alloc_fresh_huge_page_node(h, node);
|
|
|
- if (page) {
|
|
|
- ret = 1;
|
|
|
+ page = alloc_fresh_huge_page(h, gfp_mask, node, nodes_allowed);
|
|
|
+ if (page)
|
|
|
break;
|
|
|
- }
|
|
|
}
|
|
|
|
|
|
- if (ret)
|
|
|
- count_vm_event(HTLB_BUDDY_PGALLOC);
|
|
|
- else
|
|
|
- count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
|
|
|
+ if (!page)
|
|
|
+ return 0;
|
|
|
|
|
|
- return ret;
|
|
|
+ put_page(page); /* free it into the hugepage allocator */
|
|
|
+
|
|
|
+ return 1;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -1525,79 +1549,66 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
|
|
|
return rc;
|
|
|
}
|
|
|
|
|
|
-static struct page *__hugetlb_alloc_buddy_huge_page(struct hstate *h,
|
|
|
- gfp_t gfp_mask, int nid, nodemask_t *nmask)
|
|
|
-{
|
|
|
- int order = huge_page_order(h);
|
|
|
-
|
|
|
- gfp_mask |= __GFP_COMP|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
|
|
|
- if (nid == NUMA_NO_NODE)
|
|
|
- nid = numa_mem_id();
|
|
|
- return __alloc_pages_nodemask(gfp_mask, order, nid, nmask);
|
|
|
-}
|
|
|
-
|
|
|
-static struct page *__alloc_buddy_huge_page(struct hstate *h, gfp_t gfp_mask,
|
|
|
+/*
|
|
|
+ * Allocates a fresh surplus page from the page allocator.
|
|
|
+ */
|
|
|
+static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
|
|
|
int nid, nodemask_t *nmask)
|
|
|
{
|
|
|
- struct page *page;
|
|
|
- unsigned int r_nid;
|
|
|
+ struct page *page = NULL;
|
|
|
|
|
|
if (hstate_is_gigantic(h))
|
|
|
return NULL;
|
|
|
|
|
|
+ spin_lock(&hugetlb_lock);
|
|
|
+ if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages)
|
|
|
+ goto out_unlock;
|
|
|
+ spin_unlock(&hugetlb_lock);
|
|
|
+
|
|
|
+ page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask);
|
|
|
+ if (!page)
|
|
|
+ return NULL;
|
|
|
+
|
|
|
+ spin_lock(&hugetlb_lock);
|
|
|
/*
|
|
|
- * Assume we will successfully allocate the surplus page to
|
|
|
- * prevent racing processes from causing the surplus to exceed
|
|
|
- * overcommit
|
|
|
- *
|
|
|
- * This however introduces a different race, where a process B
|
|
|
- * tries to grow the static hugepage pool while alloc_pages() is
|
|
|
- * called by process A. B will only examine the per-node
|
|
|
- * counters in determining if surplus huge pages can be
|
|
|
- * converted to normal huge pages in adjust_pool_surplus(). A
|
|
|
- * won't be able to increment the per-node counter, until the
|
|
|
- * lock is dropped by B, but B doesn't drop hugetlb_lock until
|
|
|
- * no more huge pages can be converted from surplus to normal
|
|
|
- * state (and doesn't try to convert again). Thus, we have a
|
|
|
- * case where a surplus huge page exists, the pool is grown, and
|
|
|
- * the surplus huge page still exists after, even though it
|
|
|
- * should just have been converted to a normal huge page. This
|
|
|
- * does not leak memory, though, as the hugepage will be freed
|
|
|
- * once it is out of use. It also does not allow the counters to
|
|
|
- * go out of whack in adjust_pool_surplus() as we don't modify
|
|
|
- * the node values until we've gotten the hugepage and only the
|
|
|
- * per-node value is checked there.
|
|
|
+ * We could have raced with the pool size change.
|
|
|
+ * Double check that and simply deallocate the new page
|
|
|
+ * if we would end up overcommiting the surpluses. Abuse
|
|
|
+ * temporary page to workaround the nasty free_huge_page
|
|
|
+ * codeflow
|
|
|
*/
|
|
|
- spin_lock(&hugetlb_lock);
|
|
|
if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
|
|
|
- spin_unlock(&hugetlb_lock);
|
|
|
- return NULL;
|
|
|
+ SetPageHugeTemporary(page);
|
|
|
+ put_page(page);
|
|
|
+ page = NULL;
|
|
|
} else {
|
|
|
- h->nr_huge_pages++;
|
|
|
h->surplus_huge_pages++;
|
|
|
+ h->nr_huge_pages_node[page_to_nid(page)]++;
|
|
|
}
|
|
|
+
|
|
|
+out_unlock:
|
|
|
spin_unlock(&hugetlb_lock);
|
|
|
|
|
|
- page = __hugetlb_alloc_buddy_huge_page(h, gfp_mask, nid, nmask);
|
|
|
+ return page;
|
|
|
+}
|
|
|
|
|
|
- spin_lock(&hugetlb_lock);
|
|
|
- if (page) {
|
|
|
- INIT_LIST_HEAD(&page->lru);
|
|
|
- r_nid = page_to_nid(page);
|
|
|
- set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
|
|
|
- set_hugetlb_cgroup(page, NULL);
|
|
|
- /*
|
|
|
- * We incremented the global counters already
|
|
|
- */
|
|
|
- h->nr_huge_pages_node[r_nid]++;
|
|
|
- h->surplus_huge_pages_node[r_nid]++;
|
|
|
- __count_vm_event(HTLB_BUDDY_PGALLOC);
|
|
|
- } else {
|
|
|
- h->nr_huge_pages--;
|
|
|
- h->surplus_huge_pages--;
|
|
|
- __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
|
|
|
- }
|
|
|
- spin_unlock(&hugetlb_lock);
|
|
|
+static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
|
|
|
+ int nid, nodemask_t *nmask)
|
|
|
+{
|
|
|
+ struct page *page;
|
|
|
+
|
|
|
+ if (hstate_is_gigantic(h))
|
|
|
+ return NULL;
|
|
|
+
|
|
|
+ page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask);
|
|
|
+ if (!page)
|
|
|
+ return NULL;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We do not account these pages as surplus because they are only
|
|
|
+ * temporary and will be released properly on the last reference
|
|
|
+ */
|
|
|
+ SetPageHugeTemporary(page);
|
|
|
|
|
|
return page;
|
|
|
}
|
|
@@ -1606,7 +1617,7 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h, gfp_t gfp_mask,
|
|
|
* Use the VMA's mpolicy to allocate a huge page from the buddy.
|
|
|
*/
|
|
|
static
|
|
|
-struct page *__alloc_buddy_huge_page_with_mpol(struct hstate *h,
|
|
|
+struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
|
|
|
struct vm_area_struct *vma, unsigned long addr)
|
|
|
{
|
|
|
struct page *page;
|
|
@@ -1616,17 +1627,13 @@ struct page *__alloc_buddy_huge_page_with_mpol(struct hstate *h,
|
|
|
nodemask_t *nodemask;
|
|
|
|
|
|
nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask);
|
|
|
- page = __alloc_buddy_huge_page(h, gfp_mask, nid, nodemask);
|
|
|
+ page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask);
|
|
|
mpol_cond_put(mpol);
|
|
|
|
|
|
return page;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * This allocation function is useful in the context where vma is irrelevant.
|
|
|
- * E.g. soft-offlining uses this function because it only cares physical
|
|
|
- * address of error page.
|
|
|
- */
|
|
|
+/* page migration callback function */
|
|
|
struct page *alloc_huge_page_node(struct hstate *h, int nid)
|
|
|
{
|
|
|
gfp_t gfp_mask = htlb_alloc_mask(h);
|
|
@@ -1641,12 +1648,12 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid)
|
|
|
spin_unlock(&hugetlb_lock);
|
|
|
|
|
|
if (!page)
|
|
|
- page = __alloc_buddy_huge_page(h, gfp_mask, nid, NULL);
|
|
|
+ page = alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
|
|
|
|
|
|
return page;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
+/* page migration callback function */
|
|
|
struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
|
|
|
nodemask_t *nmask)
|
|
|
{
|
|
@@ -1664,9 +1671,25 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
|
|
|
}
|
|
|
spin_unlock(&hugetlb_lock);
|
|
|
|
|
|
- /* No reservations, try to overcommit */
|
|
|
+ return alloc_migrate_huge_page(h, gfp_mask, preferred_nid, nmask);
|
|
|
+}
|
|
|
+
|
|
|
+/* mempolicy aware migration callback */
|
|
|
+struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
|
|
|
+ unsigned long address)
|
|
|
+{
|
|
|
+ struct mempolicy *mpol;
|
|
|
+ nodemask_t *nodemask;
|
|
|
+ struct page *page;
|
|
|
+ gfp_t gfp_mask;
|
|
|
+ int node;
|
|
|
+
|
|
|
+ gfp_mask = htlb_alloc_mask(h);
|
|
|
+ node = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
|
|
|
+ page = alloc_huge_page_nodemask(h, node, nodemask);
|
|
|
+ mpol_cond_put(mpol);
|
|
|
|
|
|
- return __alloc_buddy_huge_page(h, gfp_mask, preferred_nid, nmask);
|
|
|
+ return page;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -1694,7 +1717,7 @@ static int gather_surplus_pages(struct hstate *h, int delta)
|
|
|
retry:
|
|
|
spin_unlock(&hugetlb_lock);
|
|
|
for (i = 0; i < needed; i++) {
|
|
|
- page = __alloc_buddy_huge_page(h, htlb_alloc_mask(h),
|
|
|
+ page = alloc_surplus_huge_page(h, htlb_alloc_mask(h),
|
|
|
NUMA_NO_NODE, NULL);
|
|
|
if (!page) {
|
|
|
alloc_ok = false;
|
|
@@ -2031,7 +2054,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
|
|
|
page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg);
|
|
|
if (!page) {
|
|
|
spin_unlock(&hugetlb_lock);
|
|
|
- page = __alloc_buddy_huge_page_with_mpol(h, vma, addr);
|
|
|
+ page = alloc_buddy_huge_page_with_mpol(h, vma, addr);
|
|
|
if (!page)
|
|
|
goto out_uncharge_cgroup;
|
|
|
if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
|
|
@@ -2074,20 +2097,6 @@ out_subpool_put:
|
|
|
return ERR_PTR(-ENOSPC);
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * alloc_huge_page()'s wrapper which simply returns the page if allocation
|
|
|
- * succeeds, otherwise NULL. This function is called from new_vma_page(),
|
|
|
- * where no ERR_VALUE is expected to be returned.
|
|
|
- */
|
|
|
-struct page *alloc_huge_page_noerr(struct vm_area_struct *vma,
|
|
|
- unsigned long addr, int avoid_reserve)
|
|
|
-{
|
|
|
- struct page *page = alloc_huge_page(vma, addr, avoid_reserve);
|
|
|
- if (IS_ERR(page))
|
|
|
- page = NULL;
|
|
|
- return page;
|
|
|
-}
|
|
|
-
|
|
|
int alloc_bootmem_huge_page(struct hstate *h)
|
|
|
__attribute__ ((weak, alias("__alloc_bootmem_huge_page")));
|
|
|
int __alloc_bootmem_huge_page(struct hstate *h)
|
|
@@ -2150,6 +2159,8 @@ static void __init gather_bootmem_prealloc(void)
|
|
|
prep_compound_huge_page(page, h->order);
|
|
|
WARN_ON(PageReserved(page));
|
|
|
prep_new_huge_page(h, page, page_to_nid(page));
|
|
|
+ put_page(page); /* free it into the hugepage allocator */
|
|
|
+
|
|
|
/*
|
|
|
* If we had gigantic hugepages allocated at boot time, we need
|
|
|
* to restore the 'stolen' pages to totalram_pages in order to
|
|
@@ -2169,7 +2180,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
|
|
|
if (hstate_is_gigantic(h)) {
|
|
|
if (!alloc_bootmem_huge_page(h))
|
|
|
break;
|
|
|
- } else if (!alloc_fresh_huge_page(h,
|
|
|
+ } else if (!alloc_pool_huge_page(h,
|
|
|
&node_states[N_MEMORY]))
|
|
|
break;
|
|
|
cond_resched();
|
|
@@ -2289,7 +2300,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
|
|
|
* First take pages out of surplus state. Then make up the
|
|
|
* remaining difference by allocating fresh huge pages.
|
|
|
*
|
|
|
- * We might race with __alloc_buddy_huge_page() here and be unable
|
|
|
+ * We might race with alloc_surplus_huge_page() here and be unable
|
|
|
* to convert a surplus huge page to a normal huge page. That is
|
|
|
* not critical, though, it just means the overall size of the
|
|
|
* pool might be one hugepage larger than it needs to be, but
|
|
@@ -2312,10 +2323,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
|
|
|
/* yield cpu to avoid soft lockup */
|
|
|
cond_resched();
|
|
|
|
|
|
- if (hstate_is_gigantic(h))
|
|
|
- ret = alloc_fresh_gigantic_page(h, nodes_allowed);
|
|
|
- else
|
|
|
- ret = alloc_fresh_huge_page(h, nodes_allowed);
|
|
|
+ ret = alloc_pool_huge_page(h, nodes_allowed);
|
|
|
spin_lock(&hugetlb_lock);
|
|
|
if (!ret)
|
|
|
goto out;
|
|
@@ -2335,7 +2343,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
|
|
|
* By placing pages into the surplus state independent of the
|
|
|
* overcommit value, we are allowing the surplus pool size to
|
|
|
* exceed overcommit. There are few sane options here. Since
|
|
|
- * __alloc_buddy_huge_page() is checking the global counter,
|
|
|
+ * alloc_surplus_huge_page() is checking the global counter,
|
|
|
* though, we'll note that we're not allowed to exceed surplus
|
|
|
* and won't grow the pool anywhere else. Not until one of the
|
|
|
* sysctls are changed, or the surplus pages go out of use.
|
|
@@ -2975,20 +2983,32 @@ out:
|
|
|
|
|
|
void hugetlb_report_meminfo(struct seq_file *m)
|
|
|
{
|
|
|
- struct hstate *h = &default_hstate;
|
|
|
+ struct hstate *h;
|
|
|
+ unsigned long total = 0;
|
|
|
+
|
|
|
if (!hugepages_supported())
|
|
|
return;
|
|
|
- seq_printf(m,
|
|
|
- "HugePages_Total: %5lu\n"
|
|
|
- "HugePages_Free: %5lu\n"
|
|
|
- "HugePages_Rsvd: %5lu\n"
|
|
|
- "HugePages_Surp: %5lu\n"
|
|
|
- "Hugepagesize: %8lu kB\n",
|
|
|
- h->nr_huge_pages,
|
|
|
- h->free_huge_pages,
|
|
|
- h->resv_huge_pages,
|
|
|
- h->surplus_huge_pages,
|
|
|
- 1UL << (huge_page_order(h) + PAGE_SHIFT - 10));
|
|
|
+
|
|
|
+ for_each_hstate(h) {
|
|
|
+ unsigned long count = h->nr_huge_pages;
|
|
|
+
|
|
|
+ total += (PAGE_SIZE << huge_page_order(h)) * count;
|
|
|
+
|
|
|
+ if (h == &default_hstate)
|
|
|
+ seq_printf(m,
|
|
|
+ "HugePages_Total: %5lu\n"
|
|
|
+ "HugePages_Free: %5lu\n"
|
|
|
+ "HugePages_Rsvd: %5lu\n"
|
|
|
+ "HugePages_Surp: %5lu\n"
|
|
|
+ "Hugepagesize: %8lu kB\n",
|
|
|
+ count,
|
|
|
+ h->free_huge_pages,
|
|
|
+ h->resv_huge_pages,
|
|
|
+ h->surplus_huge_pages,
|
|
|
+ (PAGE_SIZE << huge_page_order(h)) / 1024);
|
|
|
+ }
|
|
|
+
|
|
|
+ seq_printf(m, "Hugetlb: %8lu kB\n", total / 1024);
|
|
|
}
|
|
|
|
|
|
int hugetlb_report_node_meminfo(int nid, char *buf)
|
|
@@ -4799,3 +4819,36 @@ void putback_active_hugepage(struct page *page)
|
|
|
spin_unlock(&hugetlb_lock);
|
|
|
put_page(page);
|
|
|
}
|
|
|
+
|
|
|
+void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
|
|
|
+{
|
|
|
+ struct hstate *h = page_hstate(oldpage);
|
|
|
+
|
|
|
+ hugetlb_cgroup_migrate(oldpage, newpage);
|
|
|
+ set_page_owner_migrate_reason(newpage, reason);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * transfer temporary state of the new huge page. This is
|
|
|
+ * reverse to other transitions because the newpage is going to
|
|
|
+ * be final while the old one will be freed so it takes over
|
|
|
+ * the temporary status.
|
|
|
+ *
|
|
|
+ * Also note that we have to transfer the per-node surplus state
|
|
|
+ * here as well otherwise the global surplus count will not match
|
|
|
+ * the per-node's.
|
|
|
+ */
|
|
|
+ if (PageHugeTemporary(newpage)) {
|
|
|
+ int old_nid = page_to_nid(oldpage);
|
|
|
+ int new_nid = page_to_nid(newpage);
|
|
|
+
|
|
|
+ SetPageHugeTemporary(oldpage);
|
|
|
+ ClearPageHugeTemporary(newpage);
|
|
|
+
|
|
|
+ spin_lock(&hugetlb_lock);
|
|
|
+ if (h->surplus_huge_pages_node[old_nid]) {
|
|
|
+ h->surplus_huge_pages_node[old_nid]--;
|
|
|
+ h->surplus_huge_pages_node[new_nid]++;
|
|
|
+ }
|
|
|
+ spin_unlock(&hugetlb_lock);
|
|
|
+ }
|
|
|
+}
|