|
@@ -34,6 +34,7 @@
|
|
|
#include <linux/hugetlb_cgroup.h>
|
|
|
#include <linux/node.h>
|
|
|
#include <linux/userfaultfd_k.h>
|
|
|
+#include <linux/page_owner.h>
|
|
|
#include "internal.h"
|
|
|
|
|
|
int hugetlb_max_hstate __read_mostly;
|
|
@@ -1219,6 +1220,28 @@ static void clear_page_huge_active(struct page *page)
|
|
|
ClearPagePrivate(&page[1]);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Internal hugetlb specific page flag. Do not use outside of the hugetlb
|
|
|
+ * code
|
|
|
+ */
|
|
|
+static inline bool PageHugeTemporary(struct page *page)
|
|
|
+{
|
|
|
+ if (!PageHuge(page))
|
|
|
+ return false;
|
|
|
+
|
|
|
+ return (unsigned long)page[2].mapping == -1U;
|
|
|
+}
|
|
|
+
|
|
|
+static inline void SetPageHugeTemporary(struct page *page)
|
|
|
+{
|
|
|
+ page[2].mapping = (void *)-1U;
|
|
|
+}
|
|
|
+
|
|
|
+static inline void ClearPageHugeTemporary(struct page *page)
|
|
|
+{
|
|
|
+ page[2].mapping = NULL;
|
|
|
+}
|
|
|
+
|
|
|
void free_huge_page(struct page *page)
|
|
|
{
|
|
|
/*
|
|
@@ -1253,7 +1276,11 @@ void free_huge_page(struct page *page)
|
|
|
if (restore_reserve)
|
|
|
h->resv_huge_pages++;
|
|
|
|
|
|
- if (h->surplus_huge_pages_node[nid]) {
|
|
|
+ if (PageHugeTemporary(page)) {
|
|
|
+ list_del(&page->lru);
|
|
|
+ ClearPageHugeTemporary(page);
|
|
|
+ update_and_free_page(h, page);
|
|
|
+ } else if (h->surplus_huge_pages_node[nid]) {
|
|
|
/* remove the page from active list */
|
|
|
list_del(&page->lru);
|
|
|
update_and_free_page(h, page);
|
|
@@ -1507,7 +1534,10 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
|
|
|
return rc;
|
|
|
}
|
|
|
|
|
|
-static struct page *__alloc_buddy_huge_page(struct hstate *h, gfp_t gfp_mask,
|
|
|
+/*
|
|
|
+ * Allocates a fresh surplus page from the page allocator.
|
|
|
+ */
|
|
|
+static struct page *__alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
|
|
|
int nid, nodemask_t *nmask)
|
|
|
{
|
|
|
struct page *page;
|
|
@@ -1571,6 +1601,28 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h, gfp_t gfp_mask,
|
|
|
return page;
|
|
|
}
|
|
|
|
|
|
+static struct page *__alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
|
|
|
+ int nid, nodemask_t *nmask)
|
|
|
+{
|
|
|
+ struct page *page;
|
|
|
+
|
|
|
+ if (hstate_is_gigantic(h))
|
|
|
+ return NULL;
|
|
|
+
|
|
|
+ page = __hugetlb_alloc_buddy_huge_page(h, gfp_mask, nid, nmask);
|
|
|
+ if (!page)
|
|
|
+ return NULL;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We do not account these pages as surplus because they are only
|
|
|
+ * temporary and will be released properly on the last reference
|
|
|
+ */
|
|
|
+ prep_new_huge_page(h, page, page_to_nid(page));
|
|
|
+ SetPageHugeTemporary(page);
|
|
|
+
|
|
|
+ return page;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Use the VMA's mpolicy to allocate a huge page from the buddy.
|
|
|
*/
|
|
@@ -1585,17 +1637,13 @@ struct page *__alloc_buddy_huge_page_with_mpol(struct hstate *h,
|
|
|
nodemask_t *nodemask;
|
|
|
|
|
|
nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask);
|
|
|
- page = __alloc_buddy_huge_page(h, gfp_mask, nid, nodemask);
|
|
|
+ page = __alloc_surplus_huge_page(h, gfp_mask, nid, nodemask);
|
|
|
mpol_cond_put(mpol);
|
|
|
|
|
|
return page;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * This allocation function is useful in the context where vma is irrelevant.
|
|
|
- * E.g. soft-offlining uses this function because it only cares physical
|
|
|
- * address of error page.
|
|
|
- */
|
|
|
+/* page migration callback function */
|
|
|
struct page *alloc_huge_page_node(struct hstate *h, int nid)
|
|
|
{
|
|
|
gfp_t gfp_mask = htlb_alloc_mask(h);
|
|
@@ -1610,12 +1658,12 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid)
|
|
|
spin_unlock(&hugetlb_lock);
|
|
|
|
|
|
if (!page)
|
|
|
- page = __alloc_buddy_huge_page(h, gfp_mask, nid, NULL);
|
|
|
+ page = __alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
|
|
|
|
|
|
return page;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
+/* page migration callback function */
|
|
|
struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
|
|
|
nodemask_t *nmask)
|
|
|
{
|
|
@@ -1633,9 +1681,7 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
|
|
|
}
|
|
|
spin_unlock(&hugetlb_lock);
|
|
|
|
|
|
- /* No reservations, try to overcommit */
|
|
|
-
|
|
|
- return __alloc_buddy_huge_page(h, gfp_mask, preferred_nid, nmask);
|
|
|
+ return __alloc_migrate_huge_page(h, gfp_mask, preferred_nid, nmask);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -1663,7 +1709,7 @@ static int gather_surplus_pages(struct hstate *h, int delta)
|
|
|
retry:
|
|
|
spin_unlock(&hugetlb_lock);
|
|
|
for (i = 0; i < needed; i++) {
|
|
|
- page = __alloc_buddy_huge_page(h, htlb_alloc_mask(h),
|
|
|
+ page = __alloc_surplus_huge_page(h, htlb_alloc_mask(h),
|
|
|
NUMA_NO_NODE, NULL);
|
|
|
if (!page) {
|
|
|
alloc_ok = false;
|
|
@@ -2260,7 +2306,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
|
|
|
* First take pages out of surplus state. Then make up the
|
|
|
* remaining difference by allocating fresh huge pages.
|
|
|
*
|
|
|
- * We might race with __alloc_buddy_huge_page() here and be unable
|
|
|
+ * We might race with __alloc_surplus_huge_page() here and be unable
|
|
|
* to convert a surplus huge page to a normal huge page. That is
|
|
|
* not critical, though, it just means the overall size of the
|
|
|
* pool might be one hugepage larger than it needs to be, but
|
|
@@ -2303,7 +2349,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
|
|
|
* By placing pages into the surplus state independent of the
|
|
|
* overcommit value, we are allowing the surplus pool size to
|
|
|
* exceed overcommit. There are few sane options here. Since
|
|
|
- * __alloc_buddy_huge_page() is checking the global counter,
|
|
|
+ * __alloc_surplus_huge_page() is checking the global counter,
|
|
|
* though, we'll note that we're not allowed to exceed surplus
|
|
|
* and won't grow the pool anywhere else. Not until one of the
|
|
|
* sysctls are changed, or the surplus pages go out of use.
|
|
@@ -4779,3 +4825,36 @@ void putback_active_hugepage(struct page *page)
|
|
|
spin_unlock(&hugetlb_lock);
|
|
|
put_page(page);
|
|
|
}
|
|
|
+
|
|
|
+void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
|
|
|
+{
|
|
|
+ struct hstate *h = page_hstate(oldpage);
|
|
|
+
|
|
|
+ hugetlb_cgroup_migrate(oldpage, newpage);
|
|
|
+ set_page_owner_migrate_reason(newpage, reason);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * transfer temporary state of the new huge page. This is
|
|
|
+ * reverse to other transitions because the newpage is going to
|
|
|
+ * be final while the old one will be freed so it takes over
|
|
|
+ * the temporary status.
|
|
|
+ *
|
|
|
+ * Also note that we have to transfer the per-node surplus state
|
|
|
+ * here as well otherwise the global surplus count will not match
|
|
|
+ * the per-node's.
|
|
|
+ */
|
|
|
+ if (PageHugeTemporary(newpage)) {
|
|
|
+ int old_nid = page_to_nid(oldpage);
|
|
|
+ int new_nid = page_to_nid(newpage);
|
|
|
+
|
|
|
+ SetPageHugeTemporary(oldpage);
|
|
|
+ ClearPageHugeTemporary(newpage);
|
|
|
+
|
|
|
+ spin_lock(&hugetlb_lock);
|
|
|
+ if (h->surplus_huge_pages_node[old_nid]) {
|
|
|
+ h->surplus_huge_pages_node[old_nid]--;
|
|
|
+ h->surplus_huge_pages_node[new_nid]++;
|
|
|
+ }
|
|
|
+ spin_unlock(&hugetlb_lock);
|
|
|
+ }
|
|
|
+}
|