|
@@ -1988,120 +1988,68 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
|
|
|
* @order:Order of the GFP allocation.
|
|
|
* @vma: Pointer to VMA or NULL if not available.
|
|
|
* @addr: Virtual Address of the allocation. Must be inside the VMA.
|
|
|
+ * @node: Which node to prefer for allocation (modulo policy).
|
|
|
+ * @hugepage: for hugepages try only the preferred node if possible
|
|
|
*
|
|
|
* This function allocates a page from the kernel page pool and applies
|
|
|
* a NUMA policy associated with the VMA or the current process.
|
|
|
* When VMA is not NULL caller must hold down_read on the mmap_sem of the
|
|
|
* mm_struct of the VMA to prevent it from going away. Should be used for
|
|
|
- * all allocations for pages that will be mapped into
|
|
|
- * user space. Returns NULL when no page can be allocated.
|
|
|
- *
|
|
|
- * Should be called with the mm_sem of the vma hold.
|
|
|
+ * all allocations for pages that will be mapped into user space. Returns
|
|
|
+ * NULL when no page can be allocated.
|
|
|
*/
|
|
|
struct page *
|
|
|
alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
|
|
|
- unsigned long addr, int node)
|
|
|
+ unsigned long addr, int node, bool hugepage)
|
|
|
{
|
|
|
struct mempolicy *pol;
|
|
|
struct page *page;
|
|
|
unsigned int cpuset_mems_cookie;
|
|
|
+ struct zonelist *zl;
|
|
|
+ nodemask_t *nmask;
|
|
|
|
|
|
retry_cpuset:
|
|
|
pol = get_vma_policy(vma, addr);
|
|
|
cpuset_mems_cookie = read_mems_allowed_begin();
|
|
|
|
|
|
- if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
|
|
|
+ if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage &&
|
|
|
+ pol->mode != MPOL_INTERLEAVE)) {
|
|
|
+ /*
|
|
|
+ * For hugepage allocation and non-interleave policy which
|
|
|
+ * allows the current node, we only try to allocate from the
|
|
|
+ * current node and don't fall back to other nodes, as the
|
|
|
+ * cost of remote accesses would likely offset THP benefits.
|
|
|
+ *
|
|
|
+ * If the policy is interleave, or does not allow the current
|
|
|
+ * node in its nodemask, we allocate the standard way.
|
|
|
+ */
|
|
|
+ nmask = policy_nodemask(gfp, pol);
|
|
|
+ if (!nmask || node_isset(node, *nmask)) {
|
|
|
+ mpol_cond_put(pol);
|
|
|
+ page = alloc_pages_exact_node(node, gfp, order);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (pol->mode == MPOL_INTERLEAVE) {
|
|
|
unsigned nid;
|
|
|
|
|
|
nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
|
|
|
mpol_cond_put(pol);
|
|
|
page = alloc_page_interleave(gfp, order, nid);
|
|
|
- if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
|
|
|
- goto retry_cpuset;
|
|
|
-
|
|
|
- return page;
|
|
|
+ goto out;
|
|
|
}
|
|
|
- page = __alloc_pages_nodemask(gfp, order,
|
|
|
- policy_zonelist(gfp, pol, node),
|
|
|
- policy_nodemask(gfp, pol));
|
|
|
+
|
|
|
+ nmask = policy_nodemask(gfp, pol);
|
|
|
+ zl = policy_zonelist(gfp, pol, node);
|
|
|
mpol_cond_put(pol);
|
|
|
+ page = __alloc_pages_nodemask(gfp, order, zl, nmask);
|
|
|
+out:
|
|
|
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
|
|
|
goto retry_cpuset;
|
|
|
return page;
|
|
|
}
|
|
|
|
|
|
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
|
-/**
|
|
|
- * alloc_hugepage_vma: Allocate a hugepage for a VMA
|
|
|
- * @gfp:
|
|
|
- * %GFP_USER user allocation.
|
|
|
- * %GFP_KERNEL kernel allocations,
|
|
|
- * %GFP_HIGHMEM highmem/user allocations,
|
|
|
- * %GFP_FS allocation should not call back into a file system.
|
|
|
- * %GFP_ATOMIC don't sleep.
|
|
|
- *
|
|
|
- * @vma: Pointer to VMA or NULL if not available.
|
|
|
- * @addr: Virtual Address of the allocation. Must be inside the VMA.
|
|
|
- * @order: Order of the hugepage for gfp allocation.
|
|
|
- *
|
|
|
- * This functions allocate a huge page from the kernel page pool and applies
|
|
|
- * a NUMA policy associated with the VMA or the current process.
|
|
|
- * For policy other than %MPOL_INTERLEAVE, we make sure we allocate hugepage
|
|
|
- * only from the current node if the current node is part of the node mask.
|
|
|
- * If we can't allocate a hugepage we fail the allocation and don' try to fallback
|
|
|
- * to other nodes in the node mask. If the current node is not part of node mask
|
|
|
- * or if the NUMA policy is MPOL_INTERLEAVE we use the allocator that can
|
|
|
- * fallback to nodes in the policy node mask.
|
|
|
- *
|
|
|
- * When VMA is not NULL caller must hold down_read on the mmap_sem of the
|
|
|
- * mm_struct of the VMA to prevent it from going away. Should be used for
|
|
|
- * all allocations for pages that will be mapped into
|
|
|
- * user space. Returns NULL when no page can be allocated.
|
|
|
- *
|
|
|
- * Should be called with vma->vm_mm->mmap_sem held.
|
|
|
- *
|
|
|
- */
|
|
|
-struct page *alloc_hugepage_vma(gfp_t gfp, struct vm_area_struct *vma,
|
|
|
- unsigned long addr, int order)
|
|
|
-{
|
|
|
- struct page *page;
|
|
|
- nodemask_t *nmask;
|
|
|
- struct mempolicy *pol;
|
|
|
- int node = numa_node_id();
|
|
|
- unsigned int cpuset_mems_cookie;
|
|
|
-
|
|
|
-retry_cpuset:
|
|
|
- pol = get_vma_policy(vma, addr);
|
|
|
- cpuset_mems_cookie = read_mems_allowed_begin();
|
|
|
- /*
|
|
|
- * For interleave policy, we don't worry about
|
|
|
- * current node. Otherwise if current node is
|
|
|
- * in nodemask, try to allocate hugepage from
|
|
|
- * the current node. Don't fall back to other nodes
|
|
|
- * for THP.
|
|
|
- */
|
|
|
- if (unlikely(pol->mode == MPOL_INTERLEAVE))
|
|
|
- goto alloc_with_fallback;
|
|
|
- nmask = policy_nodemask(gfp, pol);
|
|
|
- if (!nmask || node_isset(node, *nmask)) {
|
|
|
- mpol_cond_put(pol);
|
|
|
- page = alloc_pages_exact_node(node, gfp, order);
|
|
|
- if (unlikely(!page &&
|
|
|
- read_mems_allowed_retry(cpuset_mems_cookie)))
|
|
|
- goto retry_cpuset;
|
|
|
- return page;
|
|
|
- }
|
|
|
-alloc_with_fallback:
|
|
|
- mpol_cond_put(pol);
|
|
|
- /*
|
|
|
- * if current node is not part of node mask, try
|
|
|
- * the allocation from any node, and we can do retry
|
|
|
- * in that case.
|
|
|
- */
|
|
|
- return alloc_pages_vma(gfp, order, vma, addr, node);
|
|
|
-}
|
|
|
-#endif
|
|
|
-
|
|
|
/**
|
|
|
* alloc_pages_current - Allocate pages.
|
|
|
*
|