|
@@ -601,14 +601,24 @@ static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
|
|
|
mm->map_count++;
|
|
|
}
|
|
|
|
|
|
-static inline void
|
|
|
-__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
- struct vm_area_struct *prev)
|
|
|
+static __always_inline void __vma_unlink_common(struct mm_struct *mm,
|
|
|
+ struct vm_area_struct *vma,
|
|
|
+ struct vm_area_struct *prev,
|
|
|
+ bool has_prev)
|
|
|
{
|
|
|
struct vm_area_struct *next;
|
|
|
|
|
|
vma_rb_erase(vma, &mm->mm_rb);
|
|
|
- prev->vm_next = next = vma->vm_next;
|
|
|
+ next = vma->vm_next;
|
|
|
+ if (has_prev)
|
|
|
+ prev->vm_next = next;
|
|
|
+ else {
|
|
|
+ prev = vma->vm_prev;
|
|
|
+ if (prev)
|
|
|
+ prev->vm_next = next;
|
|
|
+ else
|
|
|
+ mm->mmap = next;
|
|
|
+ }
|
|
|
if (next)
|
|
|
next->vm_prev = prev;
|
|
|
|
|
@@ -616,6 +626,19 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
vmacache_invalidate(mm);
|
|
|
}
|
|
|
|
|
|
+static inline void __vma_unlink_prev(struct mm_struct *mm,
|
|
|
+ struct vm_area_struct *vma,
|
|
|
+ struct vm_area_struct *prev)
|
|
|
+{
|
|
|
+ __vma_unlink_common(mm, vma, prev, true);
|
|
|
+}
|
|
|
+
|
|
|
+static inline void __vma_unlink(struct mm_struct *mm,
|
|
|
+ struct vm_area_struct *vma)
|
|
|
+{
|
|
|
+ __vma_unlink_common(mm, vma, NULL, false);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that
|
|
|
* is already present in an i_mmap tree without adjusting the tree.
|
|
@@ -623,11 +646,12 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
* are necessary. The "insert" vma (if any) is to be inserted
|
|
|
* before we drop the necessary locks.
|
|
|
*/
|
|
|
-int vma_adjust(struct vm_area_struct *vma, unsigned long start,
|
|
|
- unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
|
|
|
+int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
|
|
|
+ unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
|
|
|
+ struct vm_area_struct *expand)
|
|
|
{
|
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
|
- struct vm_area_struct *next = vma->vm_next;
|
|
|
+ struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
|
|
|
struct address_space *mapping = NULL;
|
|
|
struct rb_root *root = NULL;
|
|
|
struct anon_vma *anon_vma = NULL;
|
|
@@ -643,9 +667,38 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
|
|
|
/*
|
|
|
* vma expands, overlapping all the next, and
|
|
|
* perhaps the one after too (mprotect case 6).
|
|
|
+ * The only two other cases that gets here are
|
|
|
+ * case 1, case 7 and case 8.
|
|
|
*/
|
|
|
- remove_next = 1 + (end > next->vm_end);
|
|
|
- end = next->vm_end;
|
|
|
+ if (next == expand) {
|
|
|
+ /*
|
|
|
+ * The only case where we don't expand "vma"
|
|
|
+ * and we expand "next" instead is case 8.
|
|
|
+ */
|
|
|
+ VM_WARN_ON(end != next->vm_end);
|
|
|
+ /*
|
|
|
+ * remove_next == 3 means we're
|
|
|
+ * removing "vma" and that to do so we
|
|
|
+ * swapped "vma" and "next".
|
|
|
+ */
|
|
|
+ remove_next = 3;
|
|
|
+ VM_WARN_ON(file != next->vm_file);
|
|
|
+ swap(vma, next);
|
|
|
+ } else {
|
|
|
+ VM_WARN_ON(expand != vma);
|
|
|
+ /*
|
|
|
+ * case 1, 6, 7, remove_next == 2 is case 6,
|
|
|
+ * remove_next == 1 is case 1 or 7.
|
|
|
+ */
|
|
|
+ remove_next = 1 + (end > next->vm_end);
|
|
|
+ VM_WARN_ON(remove_next == 2 &&
|
|
|
+ end != next->vm_next->vm_end);
|
|
|
+ VM_WARN_ON(remove_next == 1 &&
|
|
|
+ end != next->vm_end);
|
|
|
+ /* trim end to next, for case 6 first pass */
|
|
|
+ end = next->vm_end;
|
|
|
+ }
|
|
|
+
|
|
|
exporter = next;
|
|
|
importer = vma;
|
|
|
|
|
@@ -664,6 +717,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
|
|
|
adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
|
|
|
exporter = next;
|
|
|
importer = vma;
|
|
|
+ VM_WARN_ON(expand != importer);
|
|
|
} else if (end < vma->vm_end) {
|
|
|
/*
|
|
|
* vma shrinks, and !insert tells it's not
|
|
@@ -673,6 +727,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
|
|
|
adjust_next = -((vma->vm_end - end) >> PAGE_SHIFT);
|
|
|
exporter = vma;
|
|
|
importer = next;
|
|
|
+ VM_WARN_ON(expand != importer);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -690,7 +745,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
|
|
|
}
|
|
|
}
|
|
|
again:
|
|
|
- vma_adjust_trans_huge(vma, start, end, adjust_next);
|
|
|
+ vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
|
|
|
|
|
|
if (file) {
|
|
|
mapping = file->f_mapping;
|
|
@@ -716,8 +771,8 @@ again:
|
|
|
if (!anon_vma && adjust_next)
|
|
|
anon_vma = next->anon_vma;
|
|
|
if (anon_vma) {
|
|
|
- VM_BUG_ON_VMA(adjust_next && next->anon_vma &&
|
|
|
- anon_vma != next->anon_vma, next);
|
|
|
+ VM_WARN_ON(adjust_next && next->anon_vma &&
|
|
|
+ anon_vma != next->anon_vma);
|
|
|
anon_vma_lock_write(anon_vma);
|
|
|
anon_vma_interval_tree_pre_update_vma(vma);
|
|
|
if (adjust_next)
|
|
@@ -757,7 +812,11 @@ again:
|
|
|
* vma_merge has merged next into vma, and needs
|
|
|
* us to remove next before dropping the locks.
|
|
|
*/
|
|
|
- __vma_unlink(mm, next, vma);
|
|
|
+ if (remove_next != 3)
|
|
|
+ __vma_unlink_prev(mm, next, vma);
|
|
|
+ else
|
|
|
+ /* vma is not before next if they've been swapped */
|
|
|
+ __vma_unlink(mm, next);
|
|
|
if (file)
|
|
|
__remove_shared_vm_struct(next, file, mapping);
|
|
|
} else if (insert) {
|
|
@@ -809,7 +868,27 @@ again:
|
|
|
* we must remove another next too. It would clutter
|
|
|
* up the code too much to do both in one go.
|
|
|
*/
|
|
|
- next = vma->vm_next;
|
|
|
+ if (remove_next != 3) {
|
|
|
+ /*
|
|
|
+ * If "next" was removed and vma->vm_end was
|
|
|
+ * expanded (up) over it, in turn
|
|
|
+ * "next->vm_prev->vm_end" changed and the
|
|
|
+ * "vma->vm_next" gap must be updated.
|
|
|
+ */
|
|
|
+ next = vma->vm_next;
|
|
|
+ } else {
|
|
|
+ /*
|
|
|
+ * For the scope of the comment "next" and
|
|
|
+ * "vma" considered pre-swap(): if "vma" was
|
|
|
+ * removed, next->vm_start was expanded (down)
|
|
|
+ * over it and the "next" gap must be updated.
|
|
|
+ * Because of the swap() the post-swap() "vma"
|
|
|
+ * actually points to pre-swap() "next"
|
|
|
+ * (post-swap() "next" as opposed is now a
|
|
|
+ * dangling pointer).
|
|
|
+ */
|
|
|
+ next = vma;
|
|
|
+ }
|
|
|
if (remove_next == 2) {
|
|
|
remove_next = 1;
|
|
|
end = next->vm_end;
|
|
@@ -958,13 +1037,24 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
|
|
|
* cannot merge might become might become might become
|
|
|
* PPNNNNNNNNNN PPPPPPPPPPNN PPPPPPPPPPPP 6 or
|
|
|
* mmap, brk or case 4 below case 5 below PPPPPPPPXXXX 7 or
|
|
|
- * mremap move: PPPPNNNNNNNN 8
|
|
|
+ * mremap move: PPPPXXXXXXXX 8
|
|
|
* AAAA
|
|
|
* PPPP NNNN PPPPPPPPPPPP PPPPPPPPNNNN PPPPNNNNNNNN
|
|
|
* might become case 1 below case 2 below case 3 below
|
|
|
*
|
|
|
- * Odd one out? Case 8, because it extends NNNN but needs flags of XXXX:
|
|
|
- * mprotect_fixup updates vm_flags & vm_page_prot on successful return.
|
|
|
+ * It is important for case 8 that the the vma NNNN overlapping the
|
|
|
+ * region AAAA is never going to extended over XXXX. Instead XXXX must
|
|
|
+ * be extended in region AAAA and NNNN must be removed. This way in
|
|
|
+ * all cases where vma_merge succeeds, the moment vma_adjust drops the
|
|
|
+ * rmap_locks, the properties of the merged vma will be already
|
|
|
+ * correct for the whole merged range. Some of those properties like
|
|
|
+ * vm_page_prot/vm_flags may be accessed by rmap_walks and they must
|
|
|
+ * be correct for the whole merged range immediately after the
|
|
|
+ * rmap_locks are released. Otherwise if XXXX would be removed and
|
|
|
+ * NNNN would be extended over the XXXX range, remove_migration_ptes
|
|
|
+ * or other rmap walkers (if working on addresses beyond the "end"
|
|
|
+ * parameter) may establish ptes with the wrong permissions of NNNN
|
|
|
+ * instead of the right permissions of XXXX.
|
|
|
*/
|
|
|
struct vm_area_struct *vma_merge(struct mm_struct *mm,
|
|
|
struct vm_area_struct *prev, unsigned long addr,
|
|
@@ -989,9 +1079,14 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
|
|
|
else
|
|
|
next = mm->mmap;
|
|
|
area = next;
|
|
|
- if (next && next->vm_end == end) /* cases 6, 7, 8 */
|
|
|
+ if (area && area->vm_end == end) /* cases 6, 7, 8 */
|
|
|
next = next->vm_next;
|
|
|
|
|
|
+ /* verify some invariant that must be enforced by the caller */
|
|
|
+ VM_WARN_ON(prev && addr <= prev->vm_start);
|
|
|
+ VM_WARN_ON(area && end > area->vm_end);
|
|
|
+ VM_WARN_ON(addr >= end);
|
|
|
+
|
|
|
/*
|
|
|
* Can it merge with the predecessor?
|
|
|
*/
|
|
@@ -1012,11 +1107,12 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
|
|
|
is_mergeable_anon_vma(prev->anon_vma,
|
|
|
next->anon_vma, NULL)) {
|
|
|
/* cases 1, 6 */
|
|
|
- err = vma_adjust(prev, prev->vm_start,
|
|
|
- next->vm_end, prev->vm_pgoff, NULL);
|
|
|
+ err = __vma_adjust(prev, prev->vm_start,
|
|
|
+ next->vm_end, prev->vm_pgoff, NULL,
|
|
|
+ prev);
|
|
|
} else /* cases 2, 5, 7 */
|
|
|
- err = vma_adjust(prev, prev->vm_start,
|
|
|
- end, prev->vm_pgoff, NULL);
|
|
|
+ err = __vma_adjust(prev, prev->vm_start,
|
|
|
+ end, prev->vm_pgoff, NULL, prev);
|
|
|
if (err)
|
|
|
return NULL;
|
|
|
khugepaged_enter_vma_merge(prev, vm_flags);
|
|
@@ -1032,11 +1128,18 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
|
|
|
anon_vma, file, pgoff+pglen,
|
|
|
vm_userfaultfd_ctx)) {
|
|
|
if (prev && addr < prev->vm_end) /* case 4 */
|
|
|
- err = vma_adjust(prev, prev->vm_start,
|
|
|
- addr, prev->vm_pgoff, NULL);
|
|
|
- else /* cases 3, 8 */
|
|
|
- err = vma_adjust(area, addr, next->vm_end,
|
|
|
- next->vm_pgoff - pglen, NULL);
|
|
|
+ err = __vma_adjust(prev, prev->vm_start,
|
|
|
+ addr, prev->vm_pgoff, NULL, next);
|
|
|
+ else { /* cases 3, 8 */
|
|
|
+ err = __vma_adjust(area, addr, next->vm_end,
|
|
|
+ next->vm_pgoff - pglen, NULL, next);
|
|
|
+ /*
|
|
|
+ * In case 3 area is already equal to next and
|
|
|
+ * this is a noop, but in case 8 "area" has
|
|
|
+ * been removed and next was expanded over it.
|
|
|
+ */
|
|
|
+ area = next;
|
|
|
+ }
|
|
|
if (err)
|
|
|
return NULL;
|
|
|
khugepaged_enter_vma_merge(area, vm_flags);
|