9 年之前 · 099730d674
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1437,7 +1437,76 @@ void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
 
				 		dissolve_free_huge_page(pfn_to_page(pfn));
			
 
				 }
			
 
				 
			
 
				-static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
			
 
				+/*
			
 
				+ * There are 3 ways this can get called:
			
 
				+ * 1. With vma+addr: we use the VMA's memory policy
			
 
				+ * 2. With !vma, but nid=NUMA_NO_NODE:  We try to allocate a huge
			
 
				+ *    page from any node, and let the buddy allocator itself figure
			
 
				+ *    it out.
			
 
				+ * 3. With !vma, but nid!=NUMA_NO_NODE.  We allocate a huge page
			
 
				+ *    strictly from 'nid'
			
 
				+ */
			
 
				+static struct page *__hugetlb_alloc_buddy_huge_page(struct hstate *h,
			
 
				+		struct vm_area_struct *vma, unsigned long addr, int nid)
			
 
				+{
			
 
				+	int order = huge_page_order(h);
			
 
				+	gfp_t gfp = htlb_alloc_mask(h)|__GFP_COMP|__GFP_REPEAT|__GFP_NOWARN;
			
 
				+	unsigned int cpuset_mems_cookie;
			
 
				+
			
 
				+	/*
			
 
				+	 * We need a VMA to get a memory policy.  If we do not
			
 
				+	 * have one, we use the 'nid' argument
			
 
				+	 */
			
 
				+	if (!vma) {
			
 
				+		/*
			
 
				+		 * If a specific node is requested, make sure to
			
 
				+		 * get memory from there, but only when a node
			
 
				+		 * is explicitly specified.
			
 
				+		 */
			
 
				+		if (nid != NUMA_NO_NODE)
			
 
				+			gfp |= __GFP_THISNODE;
			
 
				+		/*
			
 
				+		 * Make sure to call something that can handle
			
 
				+		 * nid=NUMA_NO_NODE
			
 
				+		 */
			
 
				+		return alloc_pages_node(nid, gfp, order);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * OK, so we have a VMA.  Fetch the mempolicy and try to
			
 
				+	 * allocate a huge page with it.
			
 
				+	 */
			
 
				+	do {
			
 
				+		struct page *page;
			
 
				+		struct mempolicy *mpol;
			
 
				+		struct zonelist *zl;
			
 
				+		nodemask_t *nodemask;
			
 
				+
			
 
				+		cpuset_mems_cookie = read_mems_allowed_begin();
			
 
				+		zl = huge_zonelist(vma, addr, gfp, &mpol, &nodemask);
			
 
				+		mpol_cond_put(mpol);
			
 
				+		page = __alloc_pages_nodemask(gfp, order, zl, nodemask);
			
 
				+		if (page)
			
 
				+			return page;
			
 
				+	} while (read_mems_allowed_retry(cpuset_mems_cookie));
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * There are two ways to allocate a huge page:
			
 
				+ * 1. When you have a VMA and an address (like a fault)
			
 
				+ * 2. When you have no VMA (like when setting /proc/.../nr_hugepages)
			
 
				+ *
			
 
				+ * 'vma' and 'addr' are only for (1).  'nid' is always NUMA_NO_NODE in
			
 
				+ * this case which signifies that the allocation should be done with
			
 
				+ * respect for the VMA's memory policy.
			
 
				+ *
			
 
				+ * For (2), we ignore 'vma' and 'addr' and use 'nid' exclusively. This
			
 
				+ * implies that memory policies will not be taken in to account.
			
 
				+ */
			
 
				+static struct page *__alloc_buddy_huge_page(struct hstate *h,
			
 
				+		struct vm_area_struct *vma, unsigned long addr, int nid)
			
 
				 {
			
 
				 	struct page *page;
			
 
				 	unsigned int r_nid;
			
@@ -1445,6 +1514,15 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
 
				 	if (hstate_is_gigantic(h))
			
 
				 		return NULL;
			
 
				 
			
 
				+	/*
			
 
				+	 * Make sure that anyone specifying 'nid' is not also specifying a VMA.
			
 
				+	 * This makes sure the caller is picking _one_ of the modes with which
			
 
				+	 * we can call this function, not both.
			
 
				+	 */
			
 
				+	if (vma || (addr != -1)) {
			
 
				+		WARN_ON_ONCE(addr == -1);
			
 
				+		WARN_ON_ONCE(nid != NUMA_NO_NODE);
			
 
				+	}
			
 
				 	/*
			
 
				 	 * Assume we will successfully allocate the surplus page to
			
 
				 	 * prevent racing processes from causing the surplus to exceed
			
@@ -1478,14 +1556,7 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
 
				 	}
			
 
				 	spin_unlock(&hugetlb_lock);
			
 
				 
			
 
				-	if (nid == NUMA_NO_NODE)
			
 
				-		page = alloc_pages(htlb_alloc_mask(h)|__GFP_COMP|
			
 
				-				   __GFP_REPEAT|__GFP_NOWARN,
			
 
				-				   huge_page_order(h));
			
 
				-	else
			
 
				-		page = __alloc_pages_node(nid,
			
 
				-			htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
			
 
				-			__GFP_REPEAT|__GFP_NOWARN, huge_page_order(h));
			
 
				+	page = __hugetlb_alloc_buddy_huge_page(h, vma, addr, nid);
			
 
				 
			
 
				 	spin_lock(&hugetlb_lock);
			
 
				 	if (page) {
			
@@ -1509,6 +1580,27 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
 
				 	return page;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Allocate a huge page from 'nid'.  Note, 'nid' may be
			
 
				+ * NUMA_NO_NODE, which means that it may be allocated
			
 
				+ * anywhere.
			
 
				+ */
			
 
				+struct page *__alloc_buddy_huge_page_no_mpol(struct hstate *h, int nid)
			
 
				+{
			
 
				+	unsigned long addr = -1;
			
 
				+
			
 
				+	return __alloc_buddy_huge_page(h, NULL, addr, nid);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Use the VMA's mpolicy to allocate a huge page from the buddy.
			
 
				+ */
			
 
				+struct page *__alloc_buddy_huge_page_with_mpol(struct hstate *h,
			
 
				+		struct vm_area_struct *vma, unsigned long addr)
			
 
				+{
			
 
				+	return __alloc_buddy_huge_page(h, vma, addr, NUMA_NO_NODE);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * This allocation function is useful in the context where vma is irrelevant.
			
 
				  * E.g. soft-offlining uses this function because it only cares physical
			
@@ -1524,7 +1616,7 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid)
 
				 	spin_unlock(&hugetlb_lock);
			
 
				 
			
 
				 	if (!page)
			
 
				-		page = alloc_buddy_huge_page(h, nid);
			
 
				+		page = __alloc_buddy_huge_page_no_mpol(h, nid);
			
 
				 
			
 
				 	return page;
			
 
				 }
			
@@ -1554,7 +1646,7 @@ static int gather_surplus_pages(struct hstate *h, int delta)
 
				 retry:
			
 
				 	spin_unlock(&hugetlb_lock);
			
 
				 	for (i = 0; i < needed; i++) {
			
 
				-		page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
			
 
				+		page = __alloc_buddy_huge_page_no_mpol(h, NUMA_NO_NODE);
			
 
				 		if (!page) {
			
 
				 			alloc_ok = false;
			
 
				 			break;
			
@@ -1787,7 +1879,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 
				 	page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg);
			
 
				 	if (!page) {
			
 
				 		spin_unlock(&hugetlb_lock);
			
 
				-		page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
			
 
				+		page = __alloc_buddy_huge_page_with_mpol(h, vma, addr);
			
 
				 		if (!page)
			
 
				 			goto out_uncharge_cgroup;