|
@@ -2977,7 +2977,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
|
|
|
}
|
|
|
|
|
|
void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
|
|
- unsigned long address)
|
|
|
+ unsigned long address, bool freeze)
|
|
|
{
|
|
|
spinlock_t *ptl;
|
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
@@ -2994,7 +2994,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
|
|
page = NULL;
|
|
|
} else if (!pmd_devmap(*pmd))
|
|
|
goto out;
|
|
|
- __split_huge_pmd_locked(vma, pmd, haddr, false);
|
|
|
+ __split_huge_pmd_locked(vma, pmd, haddr, freeze);
|
|
|
out:
|
|
|
spin_unlock(ptl);
|
|
|
mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE);
|
|
@@ -3006,7 +3006,8 @@ out:
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address)
|
|
|
+void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
|
|
|
+ bool freeze, struct page *page)
|
|
|
{
|
|
|
pgd_t *pgd;
|
|
|
pud_t *pud;
|
|
@@ -3023,11 +3024,20 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address)
|
|
|
pmd = pmd_offset(pud, address);
|
|
|
if (!pmd_present(*pmd) || (!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)))
|
|
|
return;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If caller asks to setup a migration entries, we need a page to check
|
|
|
+ * pmd against. Otherwise we can end up replacing wrong page.
|
|
|
+ */
|
|
|
+ VM_BUG_ON(freeze && !page);
|
|
|
+ if (page && page != pmd_page(*pmd))
|
|
|
+ return;
|
|
|
+
|
|
|
/*
|
|
|
* Caller holds the mmap_sem write mode, so a huge pmd cannot
|
|
|
* materialize from under us.
|
|
|
*/
|
|
|
- split_huge_pmd(vma, pmd, address);
|
|
|
+ __split_huge_pmd(vma, pmd, address, freeze);
|
|
|
}
|
|
|
|
|
|
void vma_adjust_trans_huge(struct vm_area_struct *vma,
|
|
@@ -3043,7 +3053,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
|
|
|
if (start & ~HPAGE_PMD_MASK &&
|
|
|
(start & HPAGE_PMD_MASK) >= vma->vm_start &&
|
|
|
(start & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
|
|
|
- split_huge_pmd_address(vma, start);
|
|
|
+ split_huge_pmd_address(vma, start, false, NULL);
|
|
|
|
|
|
/*
|
|
|
* If the new end address isn't hpage aligned and it could
|
|
@@ -3053,7 +3063,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
|
|
|
if (end & ~HPAGE_PMD_MASK &&
|
|
|
(end & HPAGE_PMD_MASK) >= vma->vm_start &&
|
|
|
(end & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
|
|
|
- split_huge_pmd_address(vma, end);
|
|
|
+ split_huge_pmd_address(vma, end, false, NULL);
|
|
|
|
|
|
/*
|
|
|
* If we're also updating the vma->vm_next->vm_start, if the new
|
|
@@ -3067,184 +3077,36 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
|
|
|
if (nstart & ~HPAGE_PMD_MASK &&
|
|
|
(nstart & HPAGE_PMD_MASK) >= next->vm_start &&
|
|
|
(nstart & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= next->vm_end)
|
|
|
- split_huge_pmd_address(next, nstart);
|
|
|
+ split_huge_pmd_address(next, nstart, false, NULL);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-static void freeze_page_vma(struct vm_area_struct *vma, struct page *page,
|
|
|
- unsigned long address)
|
|
|
+static void freeze_page(struct page *page)
|
|
|
{
|
|
|
- unsigned long haddr = address & HPAGE_PMD_MASK;
|
|
|
- spinlock_t *ptl;
|
|
|
- pgd_t *pgd;
|
|
|
- pud_t *pud;
|
|
|
- pmd_t *pmd;
|
|
|
- pte_t *pte;
|
|
|
- int i, nr = HPAGE_PMD_NR;
|
|
|
-
|
|
|
- /* Skip pages which doesn't belong to the VMA */
|
|
|
- if (address < vma->vm_start) {
|
|
|
- int off = (vma->vm_start - address) >> PAGE_SHIFT;
|
|
|
- page += off;
|
|
|
- nr -= off;
|
|
|
- address = vma->vm_start;
|
|
|
- }
|
|
|
-
|
|
|
- pgd = pgd_offset(vma->vm_mm, address);
|
|
|
- if (!pgd_present(*pgd))
|
|
|
- return;
|
|
|
- pud = pud_offset(pgd, address);
|
|
|
- if (!pud_present(*pud))
|
|
|
- return;
|
|
|
- pmd = pmd_offset(pud, address);
|
|
|
- ptl = pmd_lock(vma->vm_mm, pmd);
|
|
|
- if (!pmd_present(*pmd)) {
|
|
|
- spin_unlock(ptl);
|
|
|
- return;
|
|
|
- }
|
|
|
- if (pmd_trans_huge(*pmd)) {
|
|
|
- if (page == pmd_page(*pmd))
|
|
|
- __split_huge_pmd_locked(vma, pmd, haddr, true);
|
|
|
- spin_unlock(ptl);
|
|
|
- return;
|
|
|
- }
|
|
|
- spin_unlock(ptl);
|
|
|
-
|
|
|
- pte = pte_offset_map_lock(vma->vm_mm, pmd, address, &ptl);
|
|
|
- for (i = 0; i < nr; i++, address += PAGE_SIZE, page++, pte++) {
|
|
|
- pte_t entry, swp_pte;
|
|
|
- swp_entry_t swp_entry;
|
|
|
-
|
|
|
- /*
|
|
|
- * We've just crossed page table boundary: need to map next one.
|
|
|
- * It can happen if THP was mremaped to non PMD-aligned address.
|
|
|
- */
|
|
|
- if (unlikely(address == haddr + HPAGE_PMD_SIZE)) {
|
|
|
- pte_unmap_unlock(pte - 1, ptl);
|
|
|
- pmd = mm_find_pmd(vma->vm_mm, address);
|
|
|
- if (!pmd)
|
|
|
- return;
|
|
|
- pte = pte_offset_map_lock(vma->vm_mm, pmd,
|
|
|
- address, &ptl);
|
|
|
- }
|
|
|
-
|
|
|
- if (!pte_present(*pte))
|
|
|
- continue;
|
|
|
- if (page_to_pfn(page) != pte_pfn(*pte))
|
|
|
- continue;
|
|
|
- flush_cache_page(vma, address, page_to_pfn(page));
|
|
|
- entry = ptep_clear_flush(vma, address, pte);
|
|
|
- if (pte_dirty(entry))
|
|
|
- SetPageDirty(page);
|
|
|
- swp_entry = make_migration_entry(page, pte_write(entry));
|
|
|
- swp_pte = swp_entry_to_pte(swp_entry);
|
|
|
- if (pte_soft_dirty(entry))
|
|
|
- swp_pte = pte_swp_mksoft_dirty(swp_pte);
|
|
|
- set_pte_at(vma->vm_mm, address, pte, swp_pte);
|
|
|
- page_remove_rmap(page, false);
|
|
|
- put_page(page);
|
|
|
- }
|
|
|
- pte_unmap_unlock(pte - 1, ptl);
|
|
|
-}
|
|
|
-
|
|
|
-static void freeze_page(struct anon_vma *anon_vma, struct page *page)
|
|
|
-{
|
|
|
- struct anon_vma_chain *avc;
|
|
|
- pgoff_t pgoff = page_to_pgoff(page);
|
|
|
+ enum ttu_flags ttu_flags = TTU_MIGRATION | TTU_IGNORE_MLOCK |
|
|
|
+ TTU_IGNORE_ACCESS | TTU_RMAP_LOCKED;
|
|
|
+ int i, ret;
|
|
|
|
|
|
VM_BUG_ON_PAGE(!PageHead(page), page);
|
|
|
|
|
|
- anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff,
|
|
|
- pgoff + HPAGE_PMD_NR - 1) {
|
|
|
- unsigned long address = __vma_address(page, avc->vma);
|
|
|
-
|
|
|
- mmu_notifier_invalidate_range_start(avc->vma->vm_mm,
|
|
|
- address, address + HPAGE_PMD_SIZE);
|
|
|
- freeze_page_vma(avc->vma, page, address);
|
|
|
- mmu_notifier_invalidate_range_end(avc->vma->vm_mm,
|
|
|
- address, address + HPAGE_PMD_SIZE);
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-static void unfreeze_page_vma(struct vm_area_struct *vma, struct page *page,
|
|
|
- unsigned long address)
|
|
|
-{
|
|
|
- spinlock_t *ptl;
|
|
|
- pmd_t *pmd;
|
|
|
- pte_t *pte, entry;
|
|
|
- swp_entry_t swp_entry;
|
|
|
- unsigned long haddr = address & HPAGE_PMD_MASK;
|
|
|
- int i, nr = HPAGE_PMD_NR;
|
|
|
-
|
|
|
- /* Skip pages which doesn't belong to the VMA */
|
|
|
- if (address < vma->vm_start) {
|
|
|
- int off = (vma->vm_start - address) >> PAGE_SHIFT;
|
|
|
- page += off;
|
|
|
- nr -= off;
|
|
|
- address = vma->vm_start;
|
|
|
- }
|
|
|
-
|
|
|
- pmd = mm_find_pmd(vma->vm_mm, address);
|
|
|
- if (!pmd)
|
|
|
- return;
|
|
|
-
|
|
|
- pte = pte_offset_map_lock(vma->vm_mm, pmd, address, &ptl);
|
|
|
- for (i = 0; i < nr; i++, address += PAGE_SIZE, page++, pte++) {
|
|
|
- /*
|
|
|
- * We've just crossed page table boundary: need to map next one.
|
|
|
- * It can happen if THP was mremaped to non-PMD aligned address.
|
|
|
- */
|
|
|
- if (unlikely(address == haddr + HPAGE_PMD_SIZE)) {
|
|
|
- pte_unmap_unlock(pte - 1, ptl);
|
|
|
- pmd = mm_find_pmd(vma->vm_mm, address);
|
|
|
- if (!pmd)
|
|
|
- return;
|
|
|
- pte = pte_offset_map_lock(vma->vm_mm, pmd,
|
|
|
- address, &ptl);
|
|
|
- }
|
|
|
-
|
|
|
- if (!is_swap_pte(*pte))
|
|
|
- continue;
|
|
|
-
|
|
|
- swp_entry = pte_to_swp_entry(*pte);
|
|
|
- if (!is_migration_entry(swp_entry))
|
|
|
- continue;
|
|
|
- if (migration_entry_to_page(swp_entry) != page)
|
|
|
- continue;
|
|
|
-
|
|
|
- get_page(page);
|
|
|
- page_add_anon_rmap(page, vma, address, false);
|
|
|
-
|
|
|
- entry = pte_mkold(mk_pte(page, vma->vm_page_prot));
|
|
|
- if (PageDirty(page))
|
|
|
- entry = pte_mkdirty(entry);
|
|
|
- if (is_write_migration_entry(swp_entry))
|
|
|
- entry = maybe_mkwrite(entry, vma);
|
|
|
-
|
|
|
- flush_dcache_page(page);
|
|
|
- set_pte_at(vma->vm_mm, address, pte, entry);
|
|
|
+ /* We only need TTU_SPLIT_HUGE_PMD once */
|
|
|
+ ret = try_to_unmap(page, ttu_flags | TTU_SPLIT_HUGE_PMD);
|
|
|
+ for (i = 1; !ret && i < HPAGE_PMD_NR; i++) {
|
|
|
+ /* Cut short if the page is unmapped */
|
|
|
+ if (page_count(page) == 1)
|
|
|
+ return;
|
|
|
|
|
|
- /* No need to invalidate - it was non-present before */
|
|
|
- update_mmu_cache(vma, address, pte);
|
|
|
+ ret = try_to_unmap(page + i, ttu_flags);
|
|
|
}
|
|
|
- pte_unmap_unlock(pte - 1, ptl);
|
|
|
+ VM_BUG_ON(ret);
|
|
|
}
|
|
|
|
|
|
-static void unfreeze_page(struct anon_vma *anon_vma, struct page *page)
|
|
|
+static void unfreeze_page(struct page *page)
|
|
|
{
|
|
|
- struct anon_vma_chain *avc;
|
|
|
- pgoff_t pgoff = page_to_pgoff(page);
|
|
|
-
|
|
|
- anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
|
|
|
- pgoff, pgoff + HPAGE_PMD_NR - 1) {
|
|
|
- unsigned long address = __vma_address(page, avc->vma);
|
|
|
+ int i;
|
|
|
|
|
|
- mmu_notifier_invalidate_range_start(avc->vma->vm_mm,
|
|
|
- address, address + HPAGE_PMD_SIZE);
|
|
|
- unfreeze_page_vma(avc->vma, page, address);
|
|
|
- mmu_notifier_invalidate_range_end(avc->vma->vm_mm,
|
|
|
- address, address + HPAGE_PMD_SIZE);
|
|
|
- }
|
|
|
+ for (i = 0; i < HPAGE_PMD_NR; i++)
|
|
|
+ remove_migration_ptes(page + i, page + i, true);
|
|
|
}
|
|
|
|
|
|
static void __split_huge_page_tail(struct page *head, int tail,
|
|
@@ -3322,7 +3184,7 @@ static void __split_huge_page(struct page *page, struct list_head *list)
|
|
|
ClearPageCompound(head);
|
|
|
spin_unlock_irq(&zone->lru_lock);
|
|
|
|
|
|
- unfreeze_page(page_anon_vma(head), head);
|
|
|
+ unfreeze_page(head);
|
|
|
|
|
|
for (i = 0; i < HPAGE_PMD_NR; i++) {
|
|
|
struct page *subpage = head + i;
|
|
@@ -3418,7 +3280,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|
|
}
|
|
|
|
|
|
mlocked = PageMlocked(page);
|
|
|
- freeze_page(anon_vma, head);
|
|
|
+ freeze_page(head);
|
|
|
VM_BUG_ON_PAGE(compound_mapcount(head), head);
|
|
|
|
|
|
/* Make sure the page is not on per-CPU pagevec as it takes pin */
|
|
@@ -3447,7 +3309,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|
|
BUG();
|
|
|
} else {
|
|
|
spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
|
|
|
- unfreeze_page(anon_vma, head);
|
|
|
+ unfreeze_page(head);
|
|
|
ret = -EBUSY;
|
|
|
}
|
|
|
|