|
@@ -20,6 +20,9 @@
|
|
|
#include <linux/backing-dev.h>
|
|
|
#include <linux/swap.h>
|
|
|
#include <linux/swapops.h>
|
|
|
+#include <linux/mmu_notifier.h>
|
|
|
+
|
|
|
+#include <asm/tlb.h>
|
|
|
|
|
|
/*
|
|
|
* Any behaviour which results in changes to the vma->vm_flags needs to
|
|
@@ -32,6 +35,7 @@ static int madvise_need_mmap_write(int behavior)
|
|
|
case MADV_REMOVE:
|
|
|
case MADV_WILLNEED:
|
|
|
case MADV_DONTNEED:
|
|
|
+ case MADV_FREE:
|
|
|
return 0;
|
|
|
default:
|
|
|
/* be safe, default to 1. list exceptions explicitly */
|
|
@@ -256,6 +260,163 @@ static long madvise_willneed(struct vm_area_struct *vma,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
|
|
|
+ unsigned long end, struct mm_walk *walk)
|
|
|
+
|
|
|
+{
|
|
|
+ struct mmu_gather *tlb = walk->private;
|
|
|
+ struct mm_struct *mm = tlb->mm;
|
|
|
+ struct vm_area_struct *vma = walk->vma;
|
|
|
+ spinlock_t *ptl;
|
|
|
+ pte_t *orig_pte, *pte, ptent;
|
|
|
+ struct page *page;
|
|
|
+
|
|
|
+ split_huge_pmd(vma, pmd, addr);
|
|
|
+ if (pmd_trans_unstable(pmd))
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
|
|
|
+ arch_enter_lazy_mmu_mode();
|
|
|
+ for (; addr != end; pte++, addr += PAGE_SIZE) {
|
|
|
+ ptent = *pte;
|
|
|
+
|
|
|
+ if (!pte_present(ptent))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ page = vm_normal_page(vma, addr, ptent);
|
|
|
+ if (!page)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If pmd isn't transhuge but the page is THP and
|
|
|
+ * is owned by only this process, split it and
|
|
|
+ * deactivate all pages.
|
|
|
+ */
|
|
|
+ if (PageTransCompound(page)) {
|
|
|
+ if (page_mapcount(page) != 1)
|
|
|
+ goto out;
|
|
|
+ get_page(page);
|
|
|
+ if (!trylock_page(page)) {
|
|
|
+ put_page(page);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+ pte_unmap_unlock(orig_pte, ptl);
|
|
|
+ if (split_huge_page(page)) {
|
|
|
+ unlock_page(page);
|
|
|
+ put_page(page);
|
|
|
+ pte_offset_map_lock(mm, pmd, addr, &ptl);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+ put_page(page);
|
|
|
+ unlock_page(page);
|
|
|
+ pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
|
|
|
+ pte--;
|
|
|
+ addr -= PAGE_SIZE;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ VM_BUG_ON_PAGE(PageTransCompound(page), page);
|
|
|
+
|
|
|
+ if (PageSwapCache(page) || PageDirty(page)) {
|
|
|
+ if (!trylock_page(page))
|
|
|
+ continue;
|
|
|
+ /*
|
|
|
+ * If page is shared with others, we couldn't clear
|
|
|
+ * PG_dirty of the page.
|
|
|
+ */
|
|
|
+ if (page_mapcount(page) != 1) {
|
|
|
+ unlock_page(page);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (PageSwapCache(page) && !try_to_free_swap(page)) {
|
|
|
+ unlock_page(page);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ ClearPageDirty(page);
|
|
|
+ unlock_page(page);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (pte_young(ptent) || pte_dirty(ptent)) {
|
|
|
+ /*
|
|
|
+ * Some of architecture(ex, PPC) don't update TLB
|
|
|
+ * with set_pte_at and tlb_remove_tlb_entry so for
|
|
|
+ * the portability, remap the pte with old|clean
|
|
|
+ * after pte clearing.
|
|
|
+ */
|
|
|
+ ptent = ptep_get_and_clear_full(mm, addr, pte,
|
|
|
+ tlb->fullmm);
|
|
|
+
|
|
|
+ ptent = pte_mkold(ptent);
|
|
|
+ ptent = pte_mkclean(ptent);
|
|
|
+ set_pte_at(mm, addr, pte, ptent);
|
|
|
+ tlb_remove_tlb_entry(tlb, pte, addr);
|
|
|
+ }
|
|
|
+ }
|
|
|
+out:
|
|
|
+ arch_leave_lazy_mmu_mode();
|
|
|
+ pte_unmap_unlock(orig_pte, ptl);
|
|
|
+ cond_resched();
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static void madvise_free_page_range(struct mmu_gather *tlb,
|
|
|
+ struct vm_area_struct *vma,
|
|
|
+ unsigned long addr, unsigned long end)
|
|
|
+{
|
|
|
+ struct mm_walk free_walk = {
|
|
|
+ .pmd_entry = madvise_free_pte_range,
|
|
|
+ .mm = vma->vm_mm,
|
|
|
+ .private = tlb,
|
|
|
+ };
|
|
|
+
|
|
|
+ tlb_start_vma(tlb, vma);
|
|
|
+ walk_page_range(addr, end, &free_walk);
|
|
|
+ tlb_end_vma(tlb, vma);
|
|
|
+}
|
|
|
+
|
|
|
+static int madvise_free_single_vma(struct vm_area_struct *vma,
|
|
|
+ unsigned long start_addr, unsigned long end_addr)
|
|
|
+{
|
|
|
+ unsigned long start, end;
|
|
|
+ struct mm_struct *mm = vma->vm_mm;
|
|
|
+ struct mmu_gather tlb;
|
|
|
+
|
|
|
+ if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ /* MADV_FREE works for only anon vma at the moment */
|
|
|
+ if (!vma_is_anonymous(vma))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ start = max(vma->vm_start, start_addr);
|
|
|
+ if (start >= vma->vm_end)
|
|
|
+ return -EINVAL;
|
|
|
+ end = min(vma->vm_end, end_addr);
|
|
|
+ if (end <= vma->vm_start)
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ lru_add_drain();
|
|
|
+ tlb_gather_mmu(&tlb, mm, start, end);
|
|
|
+ update_hiwater_rss(mm);
|
|
|
+
|
|
|
+ mmu_notifier_invalidate_range_start(mm, start, end);
|
|
|
+ madvise_free_page_range(&tlb, vma, start, end);
|
|
|
+ mmu_notifier_invalidate_range_end(mm, start, end);
|
|
|
+ tlb_finish_mmu(&tlb, start, end);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static long madvise_free(struct vm_area_struct *vma,
|
|
|
+ struct vm_area_struct **prev,
|
|
|
+ unsigned long start, unsigned long end)
|
|
|
+{
|
|
|
+ *prev = vma;
|
|
|
+ return madvise_free_single_vma(vma, start, end);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Application no longer needs these pages. If the pages are dirty,
|
|
|
* it's OK to just throw them away. The app will be more careful about
|
|
@@ -379,6 +540,14 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
|
|
|
return madvise_remove(vma, prev, start, end);
|
|
|
case MADV_WILLNEED:
|
|
|
return madvise_willneed(vma, prev, start, end);
|
|
|
+ case MADV_FREE:
|
|
|
+ /*
|
|
|
+ * XXX: In this implementation, MADV_FREE works like
|
|
|
+ * MADV_DONTNEED on swapless system or full swap.
|
|
|
+ */
|
|
|
+ if (get_nr_swap_pages() > 0)
|
|
|
+ return madvise_free(vma, prev, start, end);
|
|
|
+ /* passthrough */
|
|
|
case MADV_DONTNEED:
|
|
|
return madvise_dontneed(vma, prev, start, end);
|
|
|
default:
|
|
@@ -398,6 +567,7 @@ madvise_behavior_valid(int behavior)
|
|
|
case MADV_REMOVE:
|
|
|
case MADV_WILLNEED:
|
|
|
case MADV_DONTNEED:
|
|
|
+ case MADV_FREE:
|
|
|
#ifdef CONFIG_KSM
|
|
|
case MADV_MERGEABLE:
|
|
|
case MADV_UNMERGEABLE:
|