|
@@ -560,41 +560,47 @@ pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address,
|
|
|
pmd_t pmd;
|
|
|
|
|
|
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
|
|
|
- if (pmd_trans_huge(*pmdp)) {
|
|
|
- pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp);
|
|
|
- } else {
|
|
|
- /*
|
|
|
- * khugepaged calls this for normal pmd
|
|
|
- */
|
|
|
- pmd = *pmdp;
|
|
|
- pmd_clear(pmdp);
|
|
|
- /*
|
|
|
- * Wait for all pending hash_page to finish. This is needed
|
|
|
- * in case of subpage collapse. When we collapse normal pages
|
|
|
- * to hugepage, we first clear the pmd, then invalidate all
|
|
|
- * the PTE entries. The assumption here is that any low level
|
|
|
- * page fault will see a none pmd and take the slow path that
|
|
|
- * will wait on mmap_sem. But we could very well be in a
|
|
|
- * hash_page with local ptep pointer value. Such a hash page
|
|
|
- * can result in adding new HPTE entries for normal subpages.
|
|
|
- * That means we could be modifying the page content as we
|
|
|
- * copy them to a huge page. So wait for parallel hash_page
|
|
|
- * to finish before invalidating HPTE entries. We can do this
|
|
|
- * by sending an IPI to all the cpus and executing a dummy
|
|
|
- * function there.
|
|
|
- */
|
|
|
- kick_all_cpus_sync();
|
|
|
- /*
|
|
|
- * Now invalidate the hpte entries in the range
|
|
|
- * covered by pmd. This make sure we take a
|
|
|
- * fault and will find the pmd as none, which will
|
|
|
- * result in a major fault which takes mmap_sem and
|
|
|
- * hence wait for collapse to complete. Without this
|
|
|
- * the __collapse_huge_page_copy can result in copying
|
|
|
- * the old content.
|
|
|
- */
|
|
|
- flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
|
|
|
- }
|
|
|
+ VM_BUG_ON(!pmd_trans_huge(*pmdp));
|
|
|
+ pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp);
|
|
|
+ return pmd;
|
|
|
+}
|
|
|
+
|
|
|
+pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
|
|
|
+ pmd_t *pmdp)
|
|
|
+{
|
|
|
+ pmd_t pmd;
|
|
|
+
|
|
|
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
|
|
|
+ VM_BUG_ON(pmd_trans_huge(*pmdp));
|
|
|
+
|
|
|
+ pmd = *pmdp;
|
|
|
+ pmd_clear(pmdp);
|
|
|
+ /*
|
|
|
+ * Wait for all pending hash_page to finish. This is needed
|
|
|
+ * in case of subpage collapse. When we collapse normal pages
|
|
|
+ * to hugepage, we first clear the pmd, then invalidate all
|
|
|
+ * the PTE entries. The assumption here is that any low level
|
|
|
+ * page fault will see a none pmd and take the slow path that
|
|
|
+ * will wait on mmap_sem. But we could very well be in a
|
|
|
+ * hash_page with local ptep pointer value. Such a hash page
|
|
|
+ * can result in adding new HPTE entries for normal subpages.
|
|
|
+ * That means we could be modifying the page content as we
|
|
|
+ * copy them to a huge page. So wait for parallel hash_page
|
|
|
+ * to finish before invalidating HPTE entries. We can do this
|
|
|
+ * by sending an IPI to all the cpus and executing a dummy
|
|
|
+ * function there.
|
|
|
+ */
|
|
|
+ kick_all_cpus_sync();
|
|
|
+ /*
|
|
|
+ * Now invalidate the hpte entries in the range
|
|
|
+ * covered by pmd. This make sure we take a
|
|
|
+ * fault and will find the pmd as none, which will
|
|
|
+ * result in a major fault which takes mmap_sem and
|
|
|
+ * hence wait for collapse to complete. Without this
|
|
|
+ * the __collapse_huge_page_copy can result in copying
|
|
|
+ * the old content.
|
|
|
+ */
|
|
|
+ flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
|
|
|
return pmd;
|
|
|
}
|
|
|
|