|
@@ -324,11 +324,48 @@ static void remove_huge_page(struct page *page)
|
|
|
delete_from_page_cache(page);
|
|
|
}
|
|
|
|
|
|
+static void
|
|
|
+hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end)
|
|
|
+{
|
|
|
+ struct vm_area_struct *vma;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * end == 0 indicates that the entire range after
|
|
|
+ * start should be unmapped.
|
|
|
+ */
|
|
|
+ vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
|
|
|
+ unsigned long v_offset;
|
|
|
+ unsigned long v_end;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Can the expression below overflow on 32-bit arches?
|
|
|
+ * No, because the interval tree returns us only those vmas
|
|
|
+ * which overlap the truncated area starting at pgoff,
|
|
|
+ * and no vma on a 32-bit arch can span beyond the 4GB.
|
|
|
+ */
|
|
|
+ if (vma->vm_pgoff < start)
|
|
|
+ v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
|
|
|
+ else
|
|
|
+ v_offset = 0;
|
|
|
+
|
|
|
+ if (!end)
|
|
|
+ v_end = vma->vm_end;
|
|
|
+ else {
|
|
|
+ v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT)
|
|
|
+ + vma->vm_start;
|
|
|
+ if (v_end > vma->vm_end)
|
|
|
+ v_end = vma->vm_end;
|
|
|
+ }
|
|
|
+
|
|
|
+ unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
|
|
|
+ NULL);
|
|
|
+ }
|
|
|
+}
|
|
|
|
|
|
/*
|
|
|
* remove_inode_hugepages handles two distinct cases: truncation and hole
|
|
|
* punch. There are subtle differences in operation for each case.
|
|
|
-
|
|
|
+ *
|
|
|
* truncation is indicated by end of range being LLONG_MAX
|
|
|
* In this case, we first scan the range and release found pages.
|
|
|
* After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
|
|
@@ -379,6 +416,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
|
|
|
|
|
|
for (i = 0; i < pagevec_count(&pvec); ++i) {
|
|
|
struct page *page = pvec.pages[i];
|
|
|
+ bool rsv_on_error;
|
|
|
u32 hash;
|
|
|
|
|
|
/*
|
|
@@ -395,37 +433,43 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
|
|
|
mapping, next, 0);
|
|
|
mutex_lock(&hugetlb_fault_mutex_table[hash]);
|
|
|
|
|
|
- lock_page(page);
|
|
|
- if (likely(!page_mapped(page))) {
|
|
|
- bool rsv_on_error = !PagePrivate(page);
|
|
|
- /*
|
|
|
- * We must free the huge page and remove
|
|
|
- * from page cache (remove_huge_page) BEFORE
|
|
|
- * removing the region/reserve map
|
|
|
- * (hugetlb_unreserve_pages). In rare out
|
|
|
- * of memory conditions, removal of the
|
|
|
- * region/reserve map could fail. Before
|
|
|
- * free'ing the page, note PagePrivate which
|
|
|
- * is used in case of error.
|
|
|
- */
|
|
|
- remove_huge_page(page);
|
|
|
- freed++;
|
|
|
- if (!truncate_op) {
|
|
|
- if (unlikely(hugetlb_unreserve_pages(
|
|
|
- inode, next,
|
|
|
- next + 1, 1)))
|
|
|
- hugetlb_fix_reserve_counts(
|
|
|
- inode, rsv_on_error);
|
|
|
- }
|
|
|
- } else {
|
|
|
- /*
|
|
|
- * If page is mapped, it was faulted in after
|
|
|
- * being unmapped. It indicates a race between
|
|
|
- * hole punch and page fault. Do nothing in
|
|
|
- * this case. Getting here in a truncate
|
|
|
- * operation is a bug.
|
|
|
- */
|
|
|
+ /*
|
|
|
+ * If page is mapped, it was faulted in after being
|
|
|
+ * unmapped in caller. Unmap (again) now after taking
|
|
|
+ * the fault mutex. The mutex will prevent faults
|
|
|
+ * until we finish removing the page.
|
|
|
+ *
|
|
|
+ * This race can only happen in the hole punch case.
|
|
|
+ * Getting here in a truncate operation is a bug.
|
|
|
+ */
|
|
|
+ if (unlikely(page_mapped(page))) {
|
|
|
BUG_ON(truncate_op);
|
|
|
+
|
|
|
+ i_mmap_lock_write(mapping);
|
|
|
+ hugetlb_vmdelete_list(&mapping->i_mmap,
|
|
|
+ next * pages_per_huge_page(h),
|
|
|
+ (next + 1) * pages_per_huge_page(h));
|
|
|
+ i_mmap_unlock_write(mapping);
|
|
|
+ }
|
|
|
+
|
|
|
+ lock_page(page);
|
|
|
+ /*
|
|
|
+ * We must free the huge page and remove from page
|
|
|
+ * cache (remove_huge_page) BEFORE removing the
|
|
|
+ * region/reserve map (hugetlb_unreserve_pages). In
|
|
|
+ * rare out of memory conditions, removal of the
|
|
|
+ * region/reserve map could fail. Before free'ing
|
|
|
+ * the page, note PagePrivate which is used in case
|
|
|
+ * of error.
|
|
|
+ */
|
|
|
+ rsv_on_error = !PagePrivate(page);
|
|
|
+ remove_huge_page(page);
|
|
|
+ freed++;
|
|
|
+ if (!truncate_op) {
|
|
|
+ if (unlikely(hugetlb_unreserve_pages(inode,
|
|
|
+ next, next + 1, 1)))
|
|
|
+ hugetlb_fix_reserve_counts(inode,
|
|
|
+ rsv_on_error);
|
|
|
}
|
|
|
|
|
|
unlock_page(page);
|
|
@@ -452,44 +496,6 @@ static void hugetlbfs_evict_inode(struct inode *inode)
|
|
|
clear_inode(inode);
|
|
|
}
|
|
|
|
|
|
-static inline void
|
|
|
-hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end)
|
|
|
-{
|
|
|
- struct vm_area_struct *vma;
|
|
|
-
|
|
|
- /*
|
|
|
- * end == 0 indicates that the entire range after
|
|
|
- * start should be unmapped.
|
|
|
- */
|
|
|
- vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
|
|
|
- unsigned long v_offset;
|
|
|
- unsigned long v_end;
|
|
|
-
|
|
|
- /*
|
|
|
- * Can the expression below overflow on 32-bit arches?
|
|
|
- * No, because the interval tree returns us only those vmas
|
|
|
- * which overlap the truncated area starting at pgoff,
|
|
|
- * and no vma on a 32-bit arch can span beyond the 4GB.
|
|
|
- */
|
|
|
- if (vma->vm_pgoff < start)
|
|
|
- v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
|
|
|
- else
|
|
|
- v_offset = 0;
|
|
|
-
|
|
|
- if (!end)
|
|
|
- v_end = vma->vm_end;
|
|
|
- else {
|
|
|
- v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT)
|
|
|
- + vma->vm_start;
|
|
|
- if (v_end > vma->vm_end)
|
|
|
- v_end = vma->vm_end;
|
|
|
- }
|
|
|
-
|
|
|
- unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
|
|
|
- NULL);
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
|
|
|
{
|
|
|
pgoff_t pgoff;
|