|
@@ -1540,62 +1540,46 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
|
|
static struct page *__alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
|
|
static struct page *__alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
|
|
int nid, nodemask_t *nmask)
|
|
int nid, nodemask_t *nmask)
|
|
{
|
|
{
|
|
- struct page *page;
|
|
|
|
- unsigned int r_nid;
|
|
|
|
|
|
+ struct page *page = NULL;
|
|
|
|
|
|
if (hstate_is_gigantic(h))
|
|
if (hstate_is_gigantic(h))
|
|
return NULL;
|
|
return NULL;
|
|
|
|
|
|
- /*
|
|
|
|
- * Assume we will successfully allocate the surplus page to
|
|
|
|
- * prevent racing processes from causing the surplus to exceed
|
|
|
|
- * overcommit
|
|
|
|
- *
|
|
|
|
- * This however introduces a different race, where a process B
|
|
|
|
- * tries to grow the static hugepage pool while alloc_pages() is
|
|
|
|
- * called by process A. B will only examine the per-node
|
|
|
|
- * counters in determining if surplus huge pages can be
|
|
|
|
- * converted to normal huge pages in adjust_pool_surplus(). A
|
|
|
|
- * won't be able to increment the per-node counter, until the
|
|
|
|
- * lock is dropped by B, but B doesn't drop hugetlb_lock until
|
|
|
|
- * no more huge pages can be converted from surplus to normal
|
|
|
|
- * state (and doesn't try to convert again). Thus, we have a
|
|
|
|
- * case where a surplus huge page exists, the pool is grown, and
|
|
|
|
- * the surplus huge page still exists after, even though it
|
|
|
|
- * should just have been converted to a normal huge page. This
|
|
|
|
- * does not leak memory, though, as the hugepage will be freed
|
|
|
|
- * once it is out of use. It also does not allow the counters to
|
|
|
|
- * go out of whack in adjust_pool_surplus() as we don't modify
|
|
|
|
- * the node values until we've gotten the hugepage and only the
|
|
|
|
- * per-node value is checked there.
|
|
|
|
- */
|
|
|
|
spin_lock(&hugetlb_lock);
|
|
spin_lock(&hugetlb_lock);
|
|
- if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
|
|
|
|
- spin_unlock(&hugetlb_lock);
|
|
|
|
- return NULL;
|
|
|
|
- } else {
|
|
|
|
- h->nr_huge_pages++;
|
|
|
|
- h->surplus_huge_pages++;
|
|
|
|
- }
|
|
|
|
|
|
+ if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages)
|
|
|
|
+ goto out_unlock;
|
|
spin_unlock(&hugetlb_lock);
|
|
spin_unlock(&hugetlb_lock);
|
|
|
|
|
|
page = __hugetlb_alloc_buddy_huge_page(h, gfp_mask, nid, nmask);
|
|
page = __hugetlb_alloc_buddy_huge_page(h, gfp_mask, nid, nmask);
|
|
|
|
+ if (!page)
|
|
|
|
+ goto out_unlock;
|
|
|
|
|
|
spin_lock(&hugetlb_lock);
|
|
spin_lock(&hugetlb_lock);
|
|
- if (page) {
|
|
|
|
|
|
+ /*
|
|
|
|
+ * We could have raced with the pool size change.
|
|
|
|
+ * Double check that and simply deallocate the new page
|
|
|
|
+ * if we would end up overcommiting the surpluses. Abuse
|
|
|
|
+ * temporary page to workaround the nasty free_huge_page
|
|
|
|
+ * codeflow
|
|
|
|
+ */
|
|
|
|
+ if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
|
|
|
|
+ SetPageHugeTemporary(page);
|
|
|
|
+ put_page(page);
|
|
|
|
+ page = NULL;
|
|
|
|
+ } else {
|
|
|
|
+ int r_nid;
|
|
|
|
+
|
|
|
|
+ h->surplus_huge_pages++;
|
|
|
|
+ h->nr_huge_pages++;
|
|
INIT_LIST_HEAD(&page->lru);
|
|
INIT_LIST_HEAD(&page->lru);
|
|
r_nid = page_to_nid(page);
|
|
r_nid = page_to_nid(page);
|
|
set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
|
|
set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
|
|
set_hugetlb_cgroup(page, NULL);
|
|
set_hugetlb_cgroup(page, NULL);
|
|
- /*
|
|
|
|
- * We incremented the global counters already
|
|
|
|
- */
|
|
|
|
h->nr_huge_pages_node[r_nid]++;
|
|
h->nr_huge_pages_node[r_nid]++;
|
|
h->surplus_huge_pages_node[r_nid]++;
|
|
h->surplus_huge_pages_node[r_nid]++;
|
|
- } else {
|
|
|
|
- h->nr_huge_pages--;
|
|
|
|
- h->surplus_huge_pages--;
|
|
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+out_unlock:
|
|
spin_unlock(&hugetlb_lock);
|
|
spin_unlock(&hugetlb_lock);
|
|
|
|
|
|
return page;
|
|
return page;
|