|
@@ -3029,6 +3029,17 @@ static int __do_fault(struct vm_fault *vmf)
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * The ordering of these checks is important for pmds with _PAGE_DEVMAP set.
|
|
|
+ * If we check pmd_trans_unstable() first we will trip the bad_pmd() check
|
|
|
+ * inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly
|
|
|
+ * returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
|
|
|
+ */
|
|
|
+static int pmd_devmap_trans_unstable(pmd_t *pmd)
|
|
|
+{
|
|
|
+ return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
|
|
|
+}
|
|
|
+
|
|
|
static int pte_alloc_one_map(struct vm_fault *vmf)
|
|
|
{
|
|
|
struct vm_area_struct *vma = vmf->vma;
|
|
@@ -3052,18 +3063,27 @@ static int pte_alloc_one_map(struct vm_fault *vmf)
|
|
|
map_pte:
|
|
|
/*
|
|
|
* If a huge pmd materialized under us just retry later. Use
|
|
|
- * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd
|
|
|
- * didn't become pmd_trans_huge under us and then back to pmd_none, as
|
|
|
- * a result of MADV_DONTNEED running immediately after a huge pmd fault
|
|
|
- * in a different thread of this mm, in turn leading to a misleading
|
|
|
- * pmd_trans_huge() retval. All we have to ensure is that it is a
|
|
|
- * regular pmd that we can walk with pte_offset_map() and we can do that
|
|
|
- * through an atomic read in C, which is what pmd_trans_unstable()
|
|
|
- * provides.
|
|
|
+ * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of
|
|
|
+ * pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge
|
|
|
+ * under us and then back to pmd_none, as a result of MADV_DONTNEED
|
|
|
+ * running immediately after a huge pmd fault in a different thread of
|
|
|
+ * this mm, in turn leading to a misleading pmd_trans_huge() retval.
|
|
|
+ * All we have to ensure is that it is a regular pmd that we can walk
|
|
|
+ * with pte_offset_map() and we can do that through an atomic read in
|
|
|
+ * C, which is what pmd_trans_unstable() provides.
|
|
|
*/
|
|
|
- if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd))
|
|
|
+ if (pmd_devmap_trans_unstable(vmf->pmd))
|
|
|
return VM_FAULT_NOPAGE;
|
|
|
|
|
|
+ /*
|
|
|
+ * At this point we know that our vmf->pmd points to a page of ptes
|
|
|
+ * and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge()
|
|
|
+ * for the duration of the fault. If a racing MADV_DONTNEED runs and
|
|
|
+ * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still
|
|
|
+ * be valid and we will re-check to make sure the vmf->pte isn't
|
|
|
+ * pte_none() under vmf->ptl protection when we return to
|
|
|
+ * alloc_set_pte().
|
|
|
+ */
|
|
|
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
|
|
|
&vmf->ptl);
|
|
|
return 0;
|
|
@@ -3690,7 +3710,7 @@ static int handle_pte_fault(struct vm_fault *vmf)
|
|
|
vmf->pte = NULL;
|
|
|
} else {
|
|
|
/* See comment in pte_alloc_one_map() */
|
|
|
- if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd))
|
|
|
+ if (pmd_devmap_trans_unstable(vmf->pmd))
|
|
|
return 0;
|
|
|
/*
|
|
|
* A regular pmd is established and it can't morph into a huge
|