|
@@ -1437,7 +1437,76 @@ void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
|
|
|
dissolve_free_huge_page(pfn_to_page(pfn));
|
|
|
}
|
|
|
|
|
|
-static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
|
|
|
+/*
|
|
|
+ * There are 3 ways this can get called:
|
|
|
+ * 1. With vma+addr: we use the VMA's memory policy
|
|
|
+ * 2. With !vma, but nid=NUMA_NO_NODE: We try to allocate a huge
|
|
|
+ * page from any node, and let the buddy allocator itself figure
|
|
|
+ * it out.
|
|
|
+ * 3. With !vma, but nid!=NUMA_NO_NODE. We allocate a huge page
|
|
|
+ * strictly from 'nid'
|
|
|
+ */
|
|
|
+static struct page *__hugetlb_alloc_buddy_huge_page(struct hstate *h,
|
|
|
+ struct vm_area_struct *vma, unsigned long addr, int nid)
|
|
|
+{
|
|
|
+ int order = huge_page_order(h);
|
|
|
+ gfp_t gfp = htlb_alloc_mask(h)|__GFP_COMP|__GFP_REPEAT|__GFP_NOWARN;
|
|
|
+ unsigned int cpuset_mems_cookie;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We need a VMA to get a memory policy. If we do not
|
|
|
+ * have one, we use the 'nid' argument
|
|
|
+ */
|
|
|
+ if (!vma) {
|
|
|
+ /*
|
|
|
+ * If a specific node is requested, make sure to
|
|
|
+ * get memory from there, but only when a node
|
|
|
+ * is explicitly specified.
|
|
|
+ */
|
|
|
+ if (nid != NUMA_NO_NODE)
|
|
|
+ gfp |= __GFP_THISNODE;
|
|
|
+ /*
|
|
|
+ * Make sure to call something that can handle
|
|
|
+ * nid=NUMA_NO_NODE
|
|
|
+ */
|
|
|
+ return alloc_pages_node(nid, gfp, order);
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * OK, so we have a VMA. Fetch the mempolicy and try to
|
|
|
+ * allocate a huge page with it.
|
|
|
+ */
|
|
|
+ do {
|
|
|
+ struct page *page;
|
|
|
+ struct mempolicy *mpol;
|
|
|
+ struct zonelist *zl;
|
|
|
+ nodemask_t *nodemask;
|
|
|
+
|
|
|
+ cpuset_mems_cookie = read_mems_allowed_begin();
|
|
|
+ zl = huge_zonelist(vma, addr, gfp, &mpol, &nodemask);
|
|
|
+ mpol_cond_put(mpol);
|
|
|
+ page = __alloc_pages_nodemask(gfp, order, zl, nodemask);
|
|
|
+ if (page)
|
|
|
+ return page;
|
|
|
+ } while (read_mems_allowed_retry(cpuset_mems_cookie));
|
|
|
+
|
|
|
+ return NULL;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * There are two ways to allocate a huge page:
|
|
|
+ * 1. When you have a VMA and an address (like a fault)
|
|
|
+ * 2. When you have no VMA (like when setting /proc/.../nr_hugepages)
|
|
|
+ *
|
|
|
+ * 'vma' and 'addr' are only for (1). 'nid' is always NUMA_NO_NODE in
|
|
|
+ * this case which signifies that the allocation should be done with
|
|
|
+ * respect for the VMA's memory policy.
|
|
|
+ *
|
|
|
+ * For (2), we ignore 'vma' and 'addr' and use 'nid' exclusively. This
|
|
|
+ * implies that memory policies will not be taken in to account.
|
|
|
+ */
|
|
|
+static struct page *__alloc_buddy_huge_page(struct hstate *h,
|
|
|
+ struct vm_area_struct *vma, unsigned long addr, int nid)
|
|
|
{
|
|
|
struct page *page;
|
|
|
unsigned int r_nid;
|
|
@@ -1445,6 +1514,15 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
|
|
|
if (hstate_is_gigantic(h))
|
|
|
return NULL;
|
|
|
|
|
|
+ /*
|
|
|
+ * Make sure that anyone specifying 'nid' is not also specifying a VMA.
|
|
|
+ * This makes sure the caller is picking _one_ of the modes with which
|
|
|
+ * we can call this function, not both.
|
|
|
+ */
|
|
|
+ if (vma || (addr != -1)) {
|
|
|
+ WARN_ON_ONCE(addr == -1);
|
|
|
+ WARN_ON_ONCE(nid != NUMA_NO_NODE);
|
|
|
+ }
|
|
|
/*
|
|
|
* Assume we will successfully allocate the surplus page to
|
|
|
* prevent racing processes from causing the surplus to exceed
|
|
@@ -1478,14 +1556,7 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
|
|
|
}
|
|
|
spin_unlock(&hugetlb_lock);
|
|
|
|
|
|
- if (nid == NUMA_NO_NODE)
|
|
|
- page = alloc_pages(htlb_alloc_mask(h)|__GFP_COMP|
|
|
|
- __GFP_REPEAT|__GFP_NOWARN,
|
|
|
- huge_page_order(h));
|
|
|
- else
|
|
|
- page = __alloc_pages_node(nid,
|
|
|
- htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
|
|
|
- __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h));
|
|
|
+ page = __hugetlb_alloc_buddy_huge_page(h, vma, addr, nid);
|
|
|
|
|
|
spin_lock(&hugetlb_lock);
|
|
|
if (page) {
|
|
@@ -1509,6 +1580,27 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
|
|
|
return page;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Allocate a huge page from 'nid'. Note, 'nid' may be
|
|
|
+ * NUMA_NO_NODE, which means that it may be allocated
|
|
|
+ * anywhere.
|
|
|
+ */
|
|
|
+struct page *__alloc_buddy_huge_page_no_mpol(struct hstate *h, int nid)
|
|
|
+{
|
|
|
+ unsigned long addr = -1;
|
|
|
+
|
|
|
+ return __alloc_buddy_huge_page(h, NULL, addr, nid);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Use the VMA's mpolicy to allocate a huge page from the buddy.
|
|
|
+ */
|
|
|
+struct page *__alloc_buddy_huge_page_with_mpol(struct hstate *h,
|
|
|
+ struct vm_area_struct *vma, unsigned long addr)
|
|
|
+{
|
|
|
+ return __alloc_buddy_huge_page(h, vma, addr, NUMA_NO_NODE);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* This allocation function is useful in the context where vma is irrelevant.
|
|
|
* E.g. soft-offlining uses this function because it only cares physical
|
|
@@ -1524,7 +1616,7 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid)
|
|
|
spin_unlock(&hugetlb_lock);
|
|
|
|
|
|
if (!page)
|
|
|
- page = alloc_buddy_huge_page(h, nid);
|
|
|
+ page = __alloc_buddy_huge_page_no_mpol(h, nid);
|
|
|
|
|
|
return page;
|
|
|
}
|
|
@@ -1554,7 +1646,7 @@ static int gather_surplus_pages(struct hstate *h, int delta)
|
|
|
retry:
|
|
|
spin_unlock(&hugetlb_lock);
|
|
|
for (i = 0; i < needed; i++) {
|
|
|
- page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
|
|
|
+ page = __alloc_buddy_huge_page_no_mpol(h, NUMA_NO_NODE);
|
|
|
if (!page) {
|
|
|
alloc_ok = false;
|
|
|
break;
|
|
@@ -1787,7 +1879,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
|
|
|
page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg);
|
|
|
if (!page) {
|
|
|
spin_unlock(&hugetlb_lock);
|
|
|
- page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
|
|
|
+ page = __alloc_buddy_huge_page_with_mpol(h, vma, addr);
|
|
|
if (!page)
|
|
|
goto out_uncharge_cgroup;
|
|
|
|