|
@@ -1826,11 +1826,17 @@ static void return_unused_surplus_pages(struct hstate *h,
|
|
|
* is not the case is if a reserve map was changed between calls. It
|
|
|
* is the responsibility of the caller to notice the difference and
|
|
|
* take appropriate action.
|
|
|
+ *
|
|
|
+ * vma_add_reservation is used in error paths where a reservation must
|
|
|
+ * be restored when a newly allocated huge page must be freed. It is
|
|
|
+ * to be called after calling vma_needs_reservation to determine if a
|
|
|
+ * reservation exists.
|
|
|
*/
|
|
|
enum vma_resv_mode {
|
|
|
VMA_NEEDS_RESV,
|
|
|
VMA_COMMIT_RESV,
|
|
|
VMA_END_RESV,
|
|
|
+ VMA_ADD_RESV,
|
|
|
};
|
|
|
static long __vma_reservation_common(struct hstate *h,
|
|
|
struct vm_area_struct *vma, unsigned long addr,
|
|
@@ -1856,6 +1862,14 @@ static long __vma_reservation_common(struct hstate *h,
|
|
|
region_abort(resv, idx, idx + 1);
|
|
|
ret = 0;
|
|
|
break;
|
|
|
+ case VMA_ADD_RESV:
|
|
|
+ if (vma->vm_flags & VM_MAYSHARE)
|
|
|
+ ret = region_add(resv, idx, idx + 1);
|
|
|
+ else {
|
|
|
+ region_abort(resv, idx, idx + 1);
|
|
|
+ ret = region_del(resv, idx, idx + 1);
|
|
|
+ }
|
|
|
+ break;
|
|
|
default:
|
|
|
BUG();
|
|
|
}
|
|
@@ -1903,6 +1917,56 @@ static void vma_end_reservation(struct hstate *h,
|
|
|
(void)__vma_reservation_common(h, vma, addr, VMA_END_RESV);
|
|
|
}
|
|
|
|
|
|
+static long vma_add_reservation(struct hstate *h,
|
|
|
+ struct vm_area_struct *vma, unsigned long addr)
|
|
|
+{
|
|
|
+ return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * This routine is called to restore a reservation on error paths. In the
|
|
|
+ * specific error paths, a huge page was allocated (via alloc_huge_page)
|
|
|
+ * and is about to be freed. If a reservation for the page existed,
|
|
|
+ * alloc_huge_page would have consumed the reservation and set PagePrivate
|
|
|
+ * in the newly allocated page. When the page is freed via free_huge_page,
|
|
|
+ * the global reservation count will be incremented if PagePrivate is set.
|
|
|
+ * However, free_huge_page can not adjust the reserve map. Adjust the
|
|
|
+ * reserve map here to be consistent with global reserve count adjustments
|
|
|
+ * to be made by free_huge_page.
|
|
|
+ */
|
|
|
+static void restore_reserve_on_error(struct hstate *h,
|
|
|
+ struct vm_area_struct *vma, unsigned long address,
|
|
|
+ struct page *page)
|
|
|
+{
|
|
|
+ if (unlikely(PagePrivate(page))) {
|
|
|
+ long rc = vma_needs_reservation(h, vma, address);
|
|
|
+
|
|
|
+ if (unlikely(rc < 0)) {
|
|
|
+ /*
|
|
|
+ * Rare out of memory condition in reserve map
|
|
|
+ * manipulation. Clear PagePrivate so that
|
|
|
+ * global reserve count will not be incremented
|
|
|
+ * by free_huge_page. This will make it appear
|
|
|
+ * as though the reservation for this page was
|
|
|
+ * consumed. This may prevent the task from
|
|
|
+ * faulting in the page at a later time. This
|
|
|
+ * is better than inconsistent global huge page
|
|
|
+ * accounting of reserve counts.
|
|
|
+ */
|
|
|
+ ClearPagePrivate(page);
|
|
|
+ } else if (rc) {
|
|
|
+ rc = vma_add_reservation(h, vma, address);
|
|
|
+ if (unlikely(rc < 0))
|
|
|
+ /*
|
|
|
+ * See above comment about rare out of
|
|
|
+ * memory condition.
|
|
|
+ */
|
|
|
+ ClearPagePrivate(page);
|
|
|
+ } else
|
|
|
+ vma_end_reservation(h, vma, address);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
struct page *alloc_huge_page(struct vm_area_struct *vma,
|
|
|
unsigned long addr, int avoid_reserve)
|
|
|
{
|
|
@@ -3498,6 +3562,7 @@ retry_avoidcopy:
|
|
|
spin_unlock(ptl);
|
|
|
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
|
|
|
out_release_all:
|
|
|
+ restore_reserve_on_error(h, vma, address, new_page);
|
|
|
put_page(new_page);
|
|
|
out_release_old:
|
|
|
put_page(old_page);
|
|
@@ -3680,6 +3745,7 @@ backout:
|
|
|
spin_unlock(ptl);
|
|
|
backout_unlocked:
|
|
|
unlock_page(page);
|
|
|
+ restore_reserve_on_error(h, vma, address, page);
|
|
|
put_page(page);
|
|
|
goto out;
|
|
|
}
|