|
@@ -2070,13 +2070,11 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page,
|
|
* case, all we need to do here is to mark the page as writable and update
|
|
* case, all we need to do here is to mark the page as writable and update
|
|
* any related book-keeping.
|
|
* any related book-keeping.
|
|
*/
|
|
*/
|
|
-static inline int wp_page_reuse(struct mm_struct *mm,
|
|
|
|
- struct vm_area_struct *vma, unsigned long address,
|
|
|
|
- pte_t *page_table, spinlock_t *ptl, pte_t orig_pte,
|
|
|
|
- struct page *page, int page_mkwrite,
|
|
|
|
- int dirty_shared)
|
|
|
|
- __releases(ptl)
|
|
|
|
|
|
+static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte,
|
|
|
|
+ struct page *page, int page_mkwrite, int dirty_shared)
|
|
|
|
+ __releases(fe->ptl)
|
|
{
|
|
{
|
|
|
|
+ struct vm_area_struct *vma = fe->vma;
|
|
pte_t entry;
|
|
pte_t entry;
|
|
/*
|
|
/*
|
|
* Clear the pages cpupid information as the existing
|
|
* Clear the pages cpupid information as the existing
|
|
@@ -2086,12 +2084,12 @@ static inline int wp_page_reuse(struct mm_struct *mm,
|
|
if (page)
|
|
if (page)
|
|
page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1);
|
|
page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1);
|
|
|
|
|
|
- flush_cache_page(vma, address, pte_pfn(orig_pte));
|
|
|
|
|
|
+ flush_cache_page(vma, fe->address, pte_pfn(orig_pte));
|
|
entry = pte_mkyoung(orig_pte);
|
|
entry = pte_mkyoung(orig_pte);
|
|
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
|
|
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
|
|
- if (ptep_set_access_flags(vma, address, page_table, entry, 1))
|
|
|
|
- update_mmu_cache(vma, address, page_table);
|
|
|
|
- pte_unmap_unlock(page_table, ptl);
|
|
|
|
|
|
+ if (ptep_set_access_flags(vma, fe->address, fe->pte, entry, 1))
|
|
|
|
+ update_mmu_cache(vma, fe->address, fe->pte);
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
|
|
|
|
if (dirty_shared) {
|
|
if (dirty_shared) {
|
|
struct address_space *mapping;
|
|
struct address_space *mapping;
|
|
@@ -2137,30 +2135,31 @@ static inline int wp_page_reuse(struct mm_struct *mm,
|
|
* held to the old page, as well as updating the rmap.
|
|
* held to the old page, as well as updating the rmap.
|
|
* - In any case, unlock the PTL and drop the reference we took to the old page.
|
|
* - In any case, unlock the PTL and drop the reference we took to the old page.
|
|
*/
|
|
*/
|
|
-static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
|
- unsigned long address, pte_t *page_table, pmd_t *pmd,
|
|
|
|
- pte_t orig_pte, struct page *old_page)
|
|
|
|
|
|
+static int wp_page_copy(struct fault_env *fe, pte_t orig_pte,
|
|
|
|
+ struct page *old_page)
|
|
{
|
|
{
|
|
|
|
+ struct vm_area_struct *vma = fe->vma;
|
|
|
|
+ struct mm_struct *mm = vma->vm_mm;
|
|
struct page *new_page = NULL;
|
|
struct page *new_page = NULL;
|
|
- spinlock_t *ptl = NULL;
|
|
|
|
pte_t entry;
|
|
pte_t entry;
|
|
int page_copied = 0;
|
|
int page_copied = 0;
|
|
- const unsigned long mmun_start = address & PAGE_MASK; /* For mmu_notifiers */
|
|
|
|
- const unsigned long mmun_end = mmun_start + PAGE_SIZE; /* For mmu_notifiers */
|
|
|
|
|
|
+ const unsigned long mmun_start = fe->address & PAGE_MASK;
|
|
|
|
+ const unsigned long mmun_end = mmun_start + PAGE_SIZE;
|
|
struct mem_cgroup *memcg;
|
|
struct mem_cgroup *memcg;
|
|
|
|
|
|
if (unlikely(anon_vma_prepare(vma)))
|
|
if (unlikely(anon_vma_prepare(vma)))
|
|
goto oom;
|
|
goto oom;
|
|
|
|
|
|
if (is_zero_pfn(pte_pfn(orig_pte))) {
|
|
if (is_zero_pfn(pte_pfn(orig_pte))) {
|
|
- new_page = alloc_zeroed_user_highpage_movable(vma, address);
|
|
|
|
|
|
+ new_page = alloc_zeroed_user_highpage_movable(vma, fe->address);
|
|
if (!new_page)
|
|
if (!new_page)
|
|
goto oom;
|
|
goto oom;
|
|
} else {
|
|
} else {
|
|
- new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
|
|
|
|
|
|
+ new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
|
|
|
|
+ fe->address);
|
|
if (!new_page)
|
|
if (!new_page)
|
|
goto oom;
|
|
goto oom;
|
|
- cow_user_page(new_page, old_page, address, vma);
|
|
|
|
|
|
+ cow_user_page(new_page, old_page, fe->address, vma);
|
|
}
|
|
}
|
|
|
|
|
|
if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false))
|
|
if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false))
|
|
@@ -2173,8 +2172,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
/*
|
|
/*
|
|
* Re-check the pte - we dropped the lock
|
|
* Re-check the pte - we dropped the lock
|
|
*/
|
|
*/
|
|
- page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
|
|
|
|
- if (likely(pte_same(*page_table, orig_pte))) {
|
|
|
|
|
|
+ fe->pte = pte_offset_map_lock(mm, fe->pmd, fe->address, &fe->ptl);
|
|
|
|
+ if (likely(pte_same(*fe->pte, orig_pte))) {
|
|
if (old_page) {
|
|
if (old_page) {
|
|
if (!PageAnon(old_page)) {
|
|
if (!PageAnon(old_page)) {
|
|
dec_mm_counter_fast(mm,
|
|
dec_mm_counter_fast(mm,
|
|
@@ -2184,7 +2183,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
} else {
|
|
} else {
|
|
inc_mm_counter_fast(mm, MM_ANONPAGES);
|
|
inc_mm_counter_fast(mm, MM_ANONPAGES);
|
|
}
|
|
}
|
|
- flush_cache_page(vma, address, pte_pfn(orig_pte));
|
|
|
|
|
|
+ flush_cache_page(vma, fe->address, pte_pfn(orig_pte));
|
|
entry = mk_pte(new_page, vma->vm_page_prot);
|
|
entry = mk_pte(new_page, vma->vm_page_prot);
|
|
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
|
|
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
|
|
/*
|
|
/*
|
|
@@ -2193,8 +2192,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
* seen in the presence of one thread doing SMC and another
|
|
* seen in the presence of one thread doing SMC and another
|
|
* thread doing COW.
|
|
* thread doing COW.
|
|
*/
|
|
*/
|
|
- ptep_clear_flush_notify(vma, address, page_table);
|
|
|
|
- page_add_new_anon_rmap(new_page, vma, address, false);
|
|
|
|
|
|
+ ptep_clear_flush_notify(vma, fe->address, fe->pte);
|
|
|
|
+ page_add_new_anon_rmap(new_page, vma, fe->address, false);
|
|
mem_cgroup_commit_charge(new_page, memcg, false, false);
|
|
mem_cgroup_commit_charge(new_page, memcg, false, false);
|
|
lru_cache_add_active_or_unevictable(new_page, vma);
|
|
lru_cache_add_active_or_unevictable(new_page, vma);
|
|
/*
|
|
/*
|
|
@@ -2202,8 +2201,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
* mmu page tables (such as kvm shadow page tables), we want the
|
|
* mmu page tables (such as kvm shadow page tables), we want the
|
|
* new page to be mapped directly into the secondary page table.
|
|
* new page to be mapped directly into the secondary page table.
|
|
*/
|
|
*/
|
|
- set_pte_at_notify(mm, address, page_table, entry);
|
|
|
|
- update_mmu_cache(vma, address, page_table);
|
|
|
|
|
|
+ set_pte_at_notify(mm, fe->address, fe->pte, entry);
|
|
|
|
+ update_mmu_cache(vma, fe->address, fe->pte);
|
|
if (old_page) {
|
|
if (old_page) {
|
|
/*
|
|
/*
|
|
* Only after switching the pte to the new page may
|
|
* Only after switching the pte to the new page may
|
|
@@ -2240,7 +2239,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
if (new_page)
|
|
if (new_page)
|
|
put_page(new_page);
|
|
put_page(new_page);
|
|
|
|
|
|
- pte_unmap_unlock(page_table, ptl);
|
|
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
|
|
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
|
|
if (old_page) {
|
|
if (old_page) {
|
|
/*
|
|
/*
|
|
@@ -2268,44 +2267,43 @@ oom:
|
|
* Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED
|
|
* Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED
|
|
* mapping
|
|
* mapping
|
|
*/
|
|
*/
|
|
-static int wp_pfn_shared(struct mm_struct *mm,
|
|
|
|
- struct vm_area_struct *vma, unsigned long address,
|
|
|
|
- pte_t *page_table, spinlock_t *ptl, pte_t orig_pte,
|
|
|
|
- pmd_t *pmd)
|
|
|
|
|
|
+static int wp_pfn_shared(struct fault_env *fe, pte_t orig_pte)
|
|
{
|
|
{
|
|
|
|
+ struct vm_area_struct *vma = fe->vma;
|
|
|
|
+
|
|
if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) {
|
|
if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) {
|
|
struct vm_fault vmf = {
|
|
struct vm_fault vmf = {
|
|
.page = NULL,
|
|
.page = NULL,
|
|
- .pgoff = linear_page_index(vma, address),
|
|
|
|
- .virtual_address = (void __user *)(address & PAGE_MASK),
|
|
|
|
|
|
+ .pgoff = linear_page_index(vma, fe->address),
|
|
|
|
+ .virtual_address =
|
|
|
|
+ (void __user *)(fe->address & PAGE_MASK),
|
|
.flags = FAULT_FLAG_WRITE | FAULT_FLAG_MKWRITE,
|
|
.flags = FAULT_FLAG_WRITE | FAULT_FLAG_MKWRITE,
|
|
};
|
|
};
|
|
int ret;
|
|
int ret;
|
|
|
|
|
|
- pte_unmap_unlock(page_table, ptl);
|
|
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
ret = vma->vm_ops->pfn_mkwrite(vma, &vmf);
|
|
ret = vma->vm_ops->pfn_mkwrite(vma, &vmf);
|
|
if (ret & VM_FAULT_ERROR)
|
|
if (ret & VM_FAULT_ERROR)
|
|
return ret;
|
|
return ret;
|
|
- page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
|
|
|
|
|
|
+ fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
|
|
|
|
+ &fe->ptl);
|
|
/*
|
|
/*
|
|
* We might have raced with another page fault while we
|
|
* We might have raced with another page fault while we
|
|
* released the pte_offset_map_lock.
|
|
* released the pte_offset_map_lock.
|
|
*/
|
|
*/
|
|
- if (!pte_same(*page_table, orig_pte)) {
|
|
|
|
- pte_unmap_unlock(page_table, ptl);
|
|
|
|
|
|
+ if (!pte_same(*fe->pte, orig_pte)) {
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- return wp_page_reuse(mm, vma, address, page_table, ptl, orig_pte,
|
|
|
|
- NULL, 0, 0);
|
|
|
|
|
|
+ return wp_page_reuse(fe, orig_pte, NULL, 0, 0);
|
|
}
|
|
}
|
|
|
|
|
|
-static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
|
- unsigned long address, pte_t *page_table,
|
|
|
|
- pmd_t *pmd, spinlock_t *ptl, pte_t orig_pte,
|
|
|
|
- struct page *old_page)
|
|
|
|
- __releases(ptl)
|
|
|
|
|
|
+static int wp_page_shared(struct fault_env *fe, pte_t orig_pte,
|
|
|
|
+ struct page *old_page)
|
|
|
|
+ __releases(fe->ptl)
|
|
{
|
|
{
|
|
|
|
+ struct vm_area_struct *vma = fe->vma;
|
|
int page_mkwrite = 0;
|
|
int page_mkwrite = 0;
|
|
|
|
|
|
get_page(old_page);
|
|
get_page(old_page);
|
|
@@ -2313,8 +2311,8 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
|
|
if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
|
|
int tmp;
|
|
int tmp;
|
|
|
|
|
|
- pte_unmap_unlock(page_table, ptl);
|
|
|
|
- tmp = do_page_mkwrite(vma, old_page, address);
|
|
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
|
|
+ tmp = do_page_mkwrite(vma, old_page, fe->address);
|
|
if (unlikely(!tmp || (tmp &
|
|
if (unlikely(!tmp || (tmp &
|
|
(VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
|
|
(VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
|
|
put_page(old_page);
|
|
put_page(old_page);
|
|
@@ -2326,19 +2324,18 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
* they did, we just return, as we can count on the
|
|
* they did, we just return, as we can count on the
|
|
* MMU to tell us if they didn't also make it writable.
|
|
* MMU to tell us if they didn't also make it writable.
|
|
*/
|
|
*/
|
|
- page_table = pte_offset_map_lock(mm, pmd, address,
|
|
|
|
- &ptl);
|
|
|
|
- if (!pte_same(*page_table, orig_pte)) {
|
|
|
|
|
|
+ fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
|
|
|
|
+ &fe->ptl);
|
|
|
|
+ if (!pte_same(*fe->pte, orig_pte)) {
|
|
unlock_page(old_page);
|
|
unlock_page(old_page);
|
|
- pte_unmap_unlock(page_table, ptl);
|
|
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
put_page(old_page);
|
|
put_page(old_page);
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
page_mkwrite = 1;
|
|
page_mkwrite = 1;
|
|
}
|
|
}
|
|
|
|
|
|
- return wp_page_reuse(mm, vma, address, page_table, ptl,
|
|
|
|
- orig_pte, old_page, page_mkwrite, 1);
|
|
|
|
|
|
+ return wp_page_reuse(fe, orig_pte, old_page, page_mkwrite, 1);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -2359,14 +2356,13 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
* but allow concurrent faults), with pte both mapped and locked.
|
|
* but allow concurrent faults), with pte both mapped and locked.
|
|
* We return with mmap_sem still held, but pte unmapped and unlocked.
|
|
* We return with mmap_sem still held, but pte unmapped and unlocked.
|
|
*/
|
|
*/
|
|
-static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
|
- unsigned long address, pte_t *page_table, pmd_t *pmd,
|
|
|
|
- spinlock_t *ptl, pte_t orig_pte)
|
|
|
|
- __releases(ptl)
|
|
|
|
|
|
+static int do_wp_page(struct fault_env *fe, pte_t orig_pte)
|
|
|
|
+ __releases(fe->ptl)
|
|
{
|
|
{
|
|
|
|
+ struct vm_area_struct *vma = fe->vma;
|
|
struct page *old_page;
|
|
struct page *old_page;
|
|
|
|
|
|
- old_page = vm_normal_page(vma, address, orig_pte);
|
|
|
|
|
|
+ old_page = vm_normal_page(vma, fe->address, orig_pte);
|
|
if (!old_page) {
|
|
if (!old_page) {
|
|
/*
|
|
/*
|
|
* VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a
|
|
* VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a
|
|
@@ -2377,12 +2373,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
*/
|
|
*/
|
|
if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
|
|
if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
|
|
(VM_WRITE|VM_SHARED))
|
|
(VM_WRITE|VM_SHARED))
|
|
- return wp_pfn_shared(mm, vma, address, page_table, ptl,
|
|
|
|
- orig_pte, pmd);
|
|
|
|
|
|
+ return wp_pfn_shared(fe, orig_pte);
|
|
|
|
|
|
- pte_unmap_unlock(page_table, ptl);
|
|
|
|
- return wp_page_copy(mm, vma, address, page_table, pmd,
|
|
|
|
- orig_pte, old_page);
|
|
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
|
|
+ return wp_page_copy(fe, orig_pte, old_page);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -2393,13 +2387,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
int total_mapcount;
|
|
int total_mapcount;
|
|
if (!trylock_page(old_page)) {
|
|
if (!trylock_page(old_page)) {
|
|
get_page(old_page);
|
|
get_page(old_page);
|
|
- pte_unmap_unlock(page_table, ptl);
|
|
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
lock_page(old_page);
|
|
lock_page(old_page);
|
|
- page_table = pte_offset_map_lock(mm, pmd, address,
|
|
|
|
- &ptl);
|
|
|
|
- if (!pte_same(*page_table, orig_pte)) {
|
|
|
|
|
|
+ fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd,
|
|
|
|
+ fe->address, &fe->ptl);
|
|
|
|
+ if (!pte_same(*fe->pte, orig_pte)) {
|
|
unlock_page(old_page);
|
|
unlock_page(old_page);
|
|
- pte_unmap_unlock(page_table, ptl);
|
|
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
put_page(old_page);
|
|
put_page(old_page);
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
@@ -2417,14 +2411,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
page_move_anon_rmap(old_page, vma);
|
|
page_move_anon_rmap(old_page, vma);
|
|
}
|
|
}
|
|
unlock_page(old_page);
|
|
unlock_page(old_page);
|
|
- return wp_page_reuse(mm, vma, address, page_table, ptl,
|
|
|
|
- orig_pte, old_page, 0, 0);
|
|
|
|
|
|
+ return wp_page_reuse(fe, orig_pte, old_page, 0, 0);
|
|
}
|
|
}
|
|
unlock_page(old_page);
|
|
unlock_page(old_page);
|
|
} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
|
|
} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
|
|
(VM_WRITE|VM_SHARED))) {
|
|
(VM_WRITE|VM_SHARED))) {
|
|
- return wp_page_shared(mm, vma, address, page_table, pmd,
|
|
|
|
- ptl, orig_pte, old_page);
|
|
|
|
|
|
+ return wp_page_shared(fe, orig_pte, old_page);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -2432,9 +2424,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
*/
|
|
*/
|
|
get_page(old_page);
|
|
get_page(old_page);
|
|
|
|
|
|
- pte_unmap_unlock(page_table, ptl);
|
|
|
|
- return wp_page_copy(mm, vma, address, page_table, pmd,
|
|
|
|
- orig_pte, old_page);
|
|
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
|
|
+ return wp_page_copy(fe, orig_pte, old_page);
|
|
}
|
|
}
|
|
|
|
|
|
static void unmap_mapping_range_vma(struct vm_area_struct *vma,
|
|
static void unmap_mapping_range_vma(struct vm_area_struct *vma,
|
|
@@ -2522,11 +2513,9 @@ EXPORT_SYMBOL(unmap_mapping_range);
|
|
* We return with the mmap_sem locked or unlocked in the same cases
|
|
* We return with the mmap_sem locked or unlocked in the same cases
|
|
* as does filemap_fault().
|
|
* as does filemap_fault().
|
|
*/
|
|
*/
|
|
-int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
|
- unsigned long address, pte_t *page_table, pmd_t *pmd,
|
|
|
|
- unsigned int flags, pte_t orig_pte)
|
|
|
|
|
|
+int do_swap_page(struct fault_env *fe, pte_t orig_pte)
|
|
{
|
|
{
|
|
- spinlock_t *ptl;
|
|
|
|
|
|
+ struct vm_area_struct *vma = fe->vma;
|
|
struct page *page, *swapcache;
|
|
struct page *page, *swapcache;
|
|
struct mem_cgroup *memcg;
|
|
struct mem_cgroup *memcg;
|
|
swp_entry_t entry;
|
|
swp_entry_t entry;
|
|
@@ -2535,17 +2524,17 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
int exclusive = 0;
|
|
int exclusive = 0;
|
|
int ret = 0;
|
|
int ret = 0;
|
|
|
|
|
|
- if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
|
|
|
|
|
|
+ if (!pte_unmap_same(vma->vm_mm, fe->pmd, fe->pte, orig_pte))
|
|
goto out;
|
|
goto out;
|
|
|
|
|
|
entry = pte_to_swp_entry(orig_pte);
|
|
entry = pte_to_swp_entry(orig_pte);
|
|
if (unlikely(non_swap_entry(entry))) {
|
|
if (unlikely(non_swap_entry(entry))) {
|
|
if (is_migration_entry(entry)) {
|
|
if (is_migration_entry(entry)) {
|
|
- migration_entry_wait(mm, pmd, address);
|
|
|
|
|
|
+ migration_entry_wait(vma->vm_mm, fe->pmd, fe->address);
|
|
} else if (is_hwpoison_entry(entry)) {
|
|
} else if (is_hwpoison_entry(entry)) {
|
|
ret = VM_FAULT_HWPOISON;
|
|
ret = VM_FAULT_HWPOISON;
|
|
} else {
|
|
} else {
|
|
- print_bad_pte(vma, address, orig_pte, NULL);
|
|
|
|
|
|
+ print_bad_pte(vma, fe->address, orig_pte, NULL);
|
|
ret = VM_FAULT_SIGBUS;
|
|
ret = VM_FAULT_SIGBUS;
|
|
}
|
|
}
|
|
goto out;
|
|
goto out;
|
|
@@ -2554,14 +2543,15 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
page = lookup_swap_cache(entry);
|
|
page = lookup_swap_cache(entry);
|
|
if (!page) {
|
|
if (!page) {
|
|
page = swapin_readahead(entry,
|
|
page = swapin_readahead(entry,
|
|
- GFP_HIGHUSER_MOVABLE, vma, address);
|
|
|
|
|
|
+ GFP_HIGHUSER_MOVABLE, vma, fe->address);
|
|
if (!page) {
|
|
if (!page) {
|
|
/*
|
|
/*
|
|
* Back out if somebody else faulted in this pte
|
|
* Back out if somebody else faulted in this pte
|
|
* while we released the pte lock.
|
|
* while we released the pte lock.
|
|
*/
|
|
*/
|
|
- page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
|
|
|
|
- if (likely(pte_same(*page_table, orig_pte)))
|
|
|
|
|
|
+ fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd,
|
|
|
|
+ fe->address, &fe->ptl);
|
|
|
|
+ if (likely(pte_same(*fe->pte, orig_pte)))
|
|
ret = VM_FAULT_OOM;
|
|
ret = VM_FAULT_OOM;
|
|
delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
|
|
delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
|
|
goto unlock;
|
|
goto unlock;
|
|
@@ -2570,7 +2560,7 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
/* Had to read the page from swap area: Major fault */
|
|
/* Had to read the page from swap area: Major fault */
|
|
ret = VM_FAULT_MAJOR;
|
|
ret = VM_FAULT_MAJOR;
|
|
count_vm_event(PGMAJFAULT);
|
|
count_vm_event(PGMAJFAULT);
|
|
- mem_cgroup_count_vm_event(mm, PGMAJFAULT);
|
|
|
|
|
|
+ mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
|
|
} else if (PageHWPoison(page)) {
|
|
} else if (PageHWPoison(page)) {
|
|
/*
|
|
/*
|
|
* hwpoisoned dirty swapcache pages are kept for killing
|
|
* hwpoisoned dirty swapcache pages are kept for killing
|
|
@@ -2583,7 +2573,7 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
}
|
|
}
|
|
|
|
|
|
swapcache = page;
|
|
swapcache = page;
|
|
- locked = lock_page_or_retry(page, mm, flags);
|
|
|
|
|
|
+ locked = lock_page_or_retry(page, vma->vm_mm, fe->flags);
|
|
|
|
|
|
delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
|
|
delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
|
|
if (!locked) {
|
|
if (!locked) {
|
|
@@ -2600,14 +2590,15 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val))
|
|
if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val))
|
|
goto out_page;
|
|
goto out_page;
|
|
|
|
|
|
- page = ksm_might_need_to_copy(page, vma, address);
|
|
|
|
|
|
+ page = ksm_might_need_to_copy(page, vma, fe->address);
|
|
if (unlikely(!page)) {
|
|
if (unlikely(!page)) {
|
|
ret = VM_FAULT_OOM;
|
|
ret = VM_FAULT_OOM;
|
|
page = swapcache;
|
|
page = swapcache;
|
|
goto out_page;
|
|
goto out_page;
|
|
}
|
|
}
|
|
|
|
|
|
- if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg, false)) {
|
|
|
|
|
|
+ if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL,
|
|
|
|
+ &memcg, false)) {
|
|
ret = VM_FAULT_OOM;
|
|
ret = VM_FAULT_OOM;
|
|
goto out_page;
|
|
goto out_page;
|
|
}
|
|
}
|
|
@@ -2615,8 +2606,9 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
/*
|
|
/*
|
|
* Back out if somebody else already faulted in this pte.
|
|
* Back out if somebody else already faulted in this pte.
|
|
*/
|
|
*/
|
|
- page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
|
|
|
|
- if (unlikely(!pte_same(*page_table, orig_pte)))
|
|
|
|
|
|
+ fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
|
|
|
|
+ &fe->ptl);
|
|
|
|
+ if (unlikely(!pte_same(*fe->pte, orig_pte)))
|
|
goto out_nomap;
|
|
goto out_nomap;
|
|
|
|
|
|
if (unlikely(!PageUptodate(page))) {
|
|
if (unlikely(!PageUptodate(page))) {
|
|
@@ -2634,24 +2626,24 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
* must be called after the swap_free(), or it will never succeed.
|
|
* must be called after the swap_free(), or it will never succeed.
|
|
*/
|
|
*/
|
|
|
|
|
|
- inc_mm_counter_fast(mm, MM_ANONPAGES);
|
|
|
|
- dec_mm_counter_fast(mm, MM_SWAPENTS);
|
|
|
|
|
|
+ inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
|
|
|
|
+ dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
|
|
pte = mk_pte(page, vma->vm_page_prot);
|
|
pte = mk_pte(page, vma->vm_page_prot);
|
|
- if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
|
|
|
|
|
|
+ if ((fe->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
|
|
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
|
|
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
|
|
- flags &= ~FAULT_FLAG_WRITE;
|
|
|
|
|
|
+ fe->flags &= ~FAULT_FLAG_WRITE;
|
|
ret |= VM_FAULT_WRITE;
|
|
ret |= VM_FAULT_WRITE;
|
|
exclusive = RMAP_EXCLUSIVE;
|
|
exclusive = RMAP_EXCLUSIVE;
|
|
}
|
|
}
|
|
flush_icache_page(vma, page);
|
|
flush_icache_page(vma, page);
|
|
if (pte_swp_soft_dirty(orig_pte))
|
|
if (pte_swp_soft_dirty(orig_pte))
|
|
pte = pte_mksoft_dirty(pte);
|
|
pte = pte_mksoft_dirty(pte);
|
|
- set_pte_at(mm, address, page_table, pte);
|
|
|
|
|
|
+ set_pte_at(vma->vm_mm, fe->address, fe->pte, pte);
|
|
if (page == swapcache) {
|
|
if (page == swapcache) {
|
|
- do_page_add_anon_rmap(page, vma, address, exclusive);
|
|
|
|
|
|
+ do_page_add_anon_rmap(page, vma, fe->address, exclusive);
|
|
mem_cgroup_commit_charge(page, memcg, true, false);
|
|
mem_cgroup_commit_charge(page, memcg, true, false);
|
|
} else { /* ksm created a completely new copy */
|
|
} else { /* ksm created a completely new copy */
|
|
- page_add_new_anon_rmap(page, vma, address, false);
|
|
|
|
|
|
+ page_add_new_anon_rmap(page, vma, fe->address, false);
|
|
mem_cgroup_commit_charge(page, memcg, false, false);
|
|
mem_cgroup_commit_charge(page, memcg, false, false);
|
|
lru_cache_add_active_or_unevictable(page, vma);
|
|
lru_cache_add_active_or_unevictable(page, vma);
|
|
}
|
|
}
|
|
@@ -2674,22 +2666,22 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
put_page(swapcache);
|
|
put_page(swapcache);
|
|
}
|
|
}
|
|
|
|
|
|
- if (flags & FAULT_FLAG_WRITE) {
|
|
|
|
- ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte);
|
|
|
|
|
|
+ if (fe->flags & FAULT_FLAG_WRITE) {
|
|
|
|
+ ret |= do_wp_page(fe, pte);
|
|
if (ret & VM_FAULT_ERROR)
|
|
if (ret & VM_FAULT_ERROR)
|
|
ret &= VM_FAULT_ERROR;
|
|
ret &= VM_FAULT_ERROR;
|
|
goto out;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
|
|
/* No need to invalidate - it was non-present before */
|
|
/* No need to invalidate - it was non-present before */
|
|
- update_mmu_cache(vma, address, page_table);
|
|
|
|
|
|
+ update_mmu_cache(vma, fe->address, fe->pte);
|
|
unlock:
|
|
unlock:
|
|
- pte_unmap_unlock(page_table, ptl);
|
|
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
out:
|
|
out:
|
|
return ret;
|
|
return ret;
|
|
out_nomap:
|
|
out_nomap:
|
|
mem_cgroup_cancel_charge(page, memcg, false);
|
|
mem_cgroup_cancel_charge(page, memcg, false);
|
|
- pte_unmap_unlock(page_table, ptl);
|
|
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
out_page:
|
|
out_page:
|
|
unlock_page(page);
|
|
unlock_page(page);
|
|
out_release:
|
|
out_release:
|
|
@@ -2740,37 +2732,36 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
|
|
* but allow concurrent faults), and pte mapped but not yet locked.
|
|
* but allow concurrent faults), and pte mapped but not yet locked.
|
|
* We return with mmap_sem still held, but pte unmapped and unlocked.
|
|
* We return with mmap_sem still held, but pte unmapped and unlocked.
|
|
*/
|
|
*/
|
|
-static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
|
- unsigned long address, pte_t *page_table, pmd_t *pmd,
|
|
|
|
- unsigned int flags)
|
|
|
|
|
|
+static int do_anonymous_page(struct fault_env *fe)
|
|
{
|
|
{
|
|
|
|
+ struct vm_area_struct *vma = fe->vma;
|
|
struct mem_cgroup *memcg;
|
|
struct mem_cgroup *memcg;
|
|
struct page *page;
|
|
struct page *page;
|
|
- spinlock_t *ptl;
|
|
|
|
pte_t entry;
|
|
pte_t entry;
|
|
|
|
|
|
- pte_unmap(page_table);
|
|
|
|
|
|
+ pte_unmap(fe->pte);
|
|
|
|
|
|
/* File mapping without ->vm_ops ? */
|
|
/* File mapping without ->vm_ops ? */
|
|
if (vma->vm_flags & VM_SHARED)
|
|
if (vma->vm_flags & VM_SHARED)
|
|
return VM_FAULT_SIGBUS;
|
|
return VM_FAULT_SIGBUS;
|
|
|
|
|
|
/* Check if we need to add a guard page to the stack */
|
|
/* Check if we need to add a guard page to the stack */
|
|
- if (check_stack_guard_page(vma, address) < 0)
|
|
|
|
|
|
+ if (check_stack_guard_page(vma, fe->address) < 0)
|
|
return VM_FAULT_SIGSEGV;
|
|
return VM_FAULT_SIGSEGV;
|
|
|
|
|
|
/* Use the zero-page for reads */
|
|
/* Use the zero-page for reads */
|
|
- if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm)) {
|
|
|
|
- entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
|
|
|
|
|
|
+ if (!(fe->flags & FAULT_FLAG_WRITE) &&
|
|
|
|
+ !mm_forbids_zeropage(vma->vm_mm)) {
|
|
|
|
+ entry = pte_mkspecial(pfn_pte(my_zero_pfn(fe->address),
|
|
vma->vm_page_prot));
|
|
vma->vm_page_prot));
|
|
- page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
|
|
|
|
- if (!pte_none(*page_table))
|
|
|
|
|
|
+ fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
|
|
|
|
+ &fe->ptl);
|
|
|
|
+ if (!pte_none(*fe->pte))
|
|
goto unlock;
|
|
goto unlock;
|
|
/* Deliver the page fault to userland, check inside PT lock */
|
|
/* Deliver the page fault to userland, check inside PT lock */
|
|
if (userfaultfd_missing(vma)) {
|
|
if (userfaultfd_missing(vma)) {
|
|
- pte_unmap_unlock(page_table, ptl);
|
|
|
|
- return handle_userfault(vma, address, flags,
|
|
|
|
- VM_UFFD_MISSING);
|
|
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
|
|
+ return handle_userfault(fe, VM_UFFD_MISSING);
|
|
}
|
|
}
|
|
goto setpte;
|
|
goto setpte;
|
|
}
|
|
}
|
|
@@ -2778,11 +2769,11 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
/* Allocate our own private page. */
|
|
/* Allocate our own private page. */
|
|
if (unlikely(anon_vma_prepare(vma)))
|
|
if (unlikely(anon_vma_prepare(vma)))
|
|
goto oom;
|
|
goto oom;
|
|
- page = alloc_zeroed_user_highpage_movable(vma, address);
|
|
|
|
|
|
+ page = alloc_zeroed_user_highpage_movable(vma, fe->address);
|
|
if (!page)
|
|
if (!page)
|
|
goto oom;
|
|
goto oom;
|
|
|
|
|
|
- if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg, false))
|
|
|
|
|
|
+ if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false))
|
|
goto oom_free_page;
|
|
goto oom_free_page;
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -2796,30 +2787,30 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
if (vma->vm_flags & VM_WRITE)
|
|
if (vma->vm_flags & VM_WRITE)
|
|
entry = pte_mkwrite(pte_mkdirty(entry));
|
|
entry = pte_mkwrite(pte_mkdirty(entry));
|
|
|
|
|
|
- page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
|
|
|
|
- if (!pte_none(*page_table))
|
|
|
|
|
|
+ fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
|
|
|
|
+ &fe->ptl);
|
|
|
|
+ if (!pte_none(*fe->pte))
|
|
goto release;
|
|
goto release;
|
|
|
|
|
|
/* Deliver the page fault to userland, check inside PT lock */
|
|
/* Deliver the page fault to userland, check inside PT lock */
|
|
if (userfaultfd_missing(vma)) {
|
|
if (userfaultfd_missing(vma)) {
|
|
- pte_unmap_unlock(page_table, ptl);
|
|
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
mem_cgroup_cancel_charge(page, memcg, false);
|
|
mem_cgroup_cancel_charge(page, memcg, false);
|
|
put_page(page);
|
|
put_page(page);
|
|
- return handle_userfault(vma, address, flags,
|
|
|
|
- VM_UFFD_MISSING);
|
|
|
|
|
|
+ return handle_userfault(fe, VM_UFFD_MISSING);
|
|
}
|
|
}
|
|
|
|
|
|
- inc_mm_counter_fast(mm, MM_ANONPAGES);
|
|
|
|
- page_add_new_anon_rmap(page, vma, address, false);
|
|
|
|
|
|
+ inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
|
|
|
|
+ page_add_new_anon_rmap(page, vma, fe->address, false);
|
|
mem_cgroup_commit_charge(page, memcg, false, false);
|
|
mem_cgroup_commit_charge(page, memcg, false, false);
|
|
lru_cache_add_active_or_unevictable(page, vma);
|
|
lru_cache_add_active_or_unevictable(page, vma);
|
|
setpte:
|
|
setpte:
|
|
- set_pte_at(mm, address, page_table, entry);
|
|
|
|
|
|
+ set_pte_at(vma->vm_mm, fe->address, fe->pte, entry);
|
|
|
|
|
|
/* No need to invalidate - it was non-present before */
|
|
/* No need to invalidate - it was non-present before */
|
|
- update_mmu_cache(vma, address, page_table);
|
|
|
|
|
|
+ update_mmu_cache(vma, fe->address, fe->pte);
|
|
unlock:
|
|
unlock:
|
|
- pte_unmap_unlock(page_table, ptl);
|
|
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
return 0;
|
|
return 0;
|
|
release:
|
|
release:
|
|
mem_cgroup_cancel_charge(page, memcg, false);
|
|
mem_cgroup_cancel_charge(page, memcg, false);
|
|
@@ -2836,17 +2827,16 @@ oom:
|
|
* released depending on flags and vma->vm_ops->fault() return value.
|
|
* released depending on flags and vma->vm_ops->fault() return value.
|
|
* See filemap_fault() and __lock_page_retry().
|
|
* See filemap_fault() and __lock_page_retry().
|
|
*/
|
|
*/
|
|
-static int __do_fault(struct vm_area_struct *vma, unsigned long address,
|
|
|
|
- pgoff_t pgoff, unsigned int flags,
|
|
|
|
- struct page *cow_page, struct page **page,
|
|
|
|
- void **entry)
|
|
|
|
|
|
+static int __do_fault(struct fault_env *fe, pgoff_t pgoff,
|
|
|
|
+ struct page *cow_page, struct page **page, void **entry)
|
|
{
|
|
{
|
|
|
|
+ struct vm_area_struct *vma = fe->vma;
|
|
struct vm_fault vmf;
|
|
struct vm_fault vmf;
|
|
int ret;
|
|
int ret;
|
|
|
|
|
|
- vmf.virtual_address = (void __user *)(address & PAGE_MASK);
|
|
|
|
|
|
+ vmf.virtual_address = (void __user *)(fe->address & PAGE_MASK);
|
|
vmf.pgoff = pgoff;
|
|
vmf.pgoff = pgoff;
|
|
- vmf.flags = flags;
|
|
|
|
|
|
+ vmf.flags = fe->flags;
|
|
vmf.page = NULL;
|
|
vmf.page = NULL;
|
|
vmf.gfp_mask = __get_fault_gfp_mask(vma);
|
|
vmf.gfp_mask = __get_fault_gfp_mask(vma);
|
|
vmf.cow_page = cow_page;
|
|
vmf.cow_page = cow_page;
|
|
@@ -2878,38 +2868,36 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
|
|
/**
|
|
/**
|
|
* do_set_pte - setup new PTE entry for given page and add reverse page mapping.
|
|
* do_set_pte - setup new PTE entry for given page and add reverse page mapping.
|
|
*
|
|
*
|
|
- * @vma: virtual memory area
|
|
|
|
- * @address: user virtual address
|
|
|
|
|
|
+ * @fe: fault environment
|
|
* @page: page to map
|
|
* @page: page to map
|
|
- * @pte: pointer to target page table entry
|
|
|
|
- * @write: true, if new entry is writable
|
|
|
|
- * @anon: true, if it's anonymous page
|
|
|
|
*
|
|
*
|
|
- * Caller must hold page table lock relevant for @pte.
|
|
|
|
|
|
+ * Caller must hold page table lock relevant for @fe->pte.
|
|
*
|
|
*
|
|
* Target users are page handler itself and implementations of
|
|
* Target users are page handler itself and implementations of
|
|
* vm_ops->map_pages.
|
|
* vm_ops->map_pages.
|
|
*/
|
|
*/
|
|
-void do_set_pte(struct vm_area_struct *vma, unsigned long address,
|
|
|
|
- struct page *page, pte_t *pte, bool write, bool anon)
|
|
|
|
|
|
+void do_set_pte(struct fault_env *fe, struct page *page)
|
|
{
|
|
{
|
|
|
|
+ struct vm_area_struct *vma = fe->vma;
|
|
|
|
+ bool write = fe->flags & FAULT_FLAG_WRITE;
|
|
pte_t entry;
|
|
pte_t entry;
|
|
|
|
|
|
flush_icache_page(vma, page);
|
|
flush_icache_page(vma, page);
|
|
entry = mk_pte(page, vma->vm_page_prot);
|
|
entry = mk_pte(page, vma->vm_page_prot);
|
|
if (write)
|
|
if (write)
|
|
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
|
|
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
|
|
- if (anon) {
|
|
|
|
|
|
+ /* copy-on-write page */
|
|
|
|
+ if (write && !(vma->vm_flags & VM_SHARED)) {
|
|
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
|
|
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
|
|
- page_add_new_anon_rmap(page, vma, address, false);
|
|
|
|
|
|
+ page_add_new_anon_rmap(page, vma, fe->address, false);
|
|
} else {
|
|
} else {
|
|
inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
|
|
inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
|
|
page_add_file_rmap(page);
|
|
page_add_file_rmap(page);
|
|
}
|
|
}
|
|
- set_pte_at(vma->vm_mm, address, pte, entry);
|
|
|
|
|
|
+ set_pte_at(vma->vm_mm, fe->address, fe->pte, entry);
|
|
|
|
|
|
/* no need to invalidate: a not-present page won't be cached */
|
|
/* no need to invalidate: a not-present page won't be cached */
|
|
- update_mmu_cache(vma, address, pte);
|
|
|
|
|
|
+ update_mmu_cache(vma, fe->address, fe->pte);
|
|
}
|
|
}
|
|
|
|
|
|
static unsigned long fault_around_bytes __read_mostly =
|
|
static unsigned long fault_around_bytes __read_mostly =
|
|
@@ -2976,57 +2964,53 @@ late_initcall(fault_around_debugfs);
|
|
* fault_around_pages() value (and therefore to page order). This way it's
|
|
* fault_around_pages() value (and therefore to page order). This way it's
|
|
* easier to guarantee that we don't cross page table boundaries.
|
|
* easier to guarantee that we don't cross page table boundaries.
|
|
*/
|
|
*/
|
|
-static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
|
|
|
|
- pte_t *pte, pgoff_t pgoff, unsigned int flags)
|
|
|
|
|
|
+static void do_fault_around(struct fault_env *fe, pgoff_t start_pgoff)
|
|
{
|
|
{
|
|
- unsigned long start_addr, nr_pages, mask;
|
|
|
|
- pgoff_t max_pgoff;
|
|
|
|
- struct vm_fault vmf;
|
|
|
|
|
|
+ unsigned long address = fe->address, start_addr, nr_pages, mask;
|
|
|
|
+ pte_t *pte = fe->pte;
|
|
|
|
+ pgoff_t end_pgoff;
|
|
int off;
|
|
int off;
|
|
|
|
|
|
nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT;
|
|
nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT;
|
|
mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
|
|
mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
|
|
|
|
|
|
- start_addr = max(address & mask, vma->vm_start);
|
|
|
|
- off = ((address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
|
|
|
|
- pte -= off;
|
|
|
|
- pgoff -= off;
|
|
|
|
|
|
+ start_addr = max(fe->address & mask, fe->vma->vm_start);
|
|
|
|
+ off = ((fe->address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
|
|
|
|
+ fe->pte -= off;
|
|
|
|
+ start_pgoff -= off;
|
|
|
|
|
|
/*
|
|
/*
|
|
- * max_pgoff is either end of page table or end of vma
|
|
|
|
- * or fault_around_pages() from pgoff, depending what is nearest.
|
|
|
|
|
|
+ * end_pgoff is either end of page table or end of vma
|
|
|
|
+ * or fault_around_pages() from start_pgoff, depending what is nearest.
|
|
*/
|
|
*/
|
|
- max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
|
|
|
|
|
|
+ end_pgoff = start_pgoff -
|
|
|
|
+ ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
|
|
PTRS_PER_PTE - 1;
|
|
PTRS_PER_PTE - 1;
|
|
- max_pgoff = min3(max_pgoff, vma_pages(vma) + vma->vm_pgoff - 1,
|
|
|
|
- pgoff + nr_pages - 1);
|
|
|
|
|
|
+ end_pgoff = min3(end_pgoff, vma_pages(fe->vma) + fe->vma->vm_pgoff - 1,
|
|
|
|
+ start_pgoff + nr_pages - 1);
|
|
|
|
|
|
/* Check if it makes any sense to call ->map_pages */
|
|
/* Check if it makes any sense to call ->map_pages */
|
|
- while (!pte_none(*pte)) {
|
|
|
|
- if (++pgoff > max_pgoff)
|
|
|
|
- return;
|
|
|
|
- start_addr += PAGE_SIZE;
|
|
|
|
- if (start_addr >= vma->vm_end)
|
|
|
|
- return;
|
|
|
|
- pte++;
|
|
|
|
|
|
+ fe->address = start_addr;
|
|
|
|
+ while (!pte_none(*fe->pte)) {
|
|
|
|
+ if (++start_pgoff > end_pgoff)
|
|
|
|
+ goto out;
|
|
|
|
+ fe->address += PAGE_SIZE;
|
|
|
|
+ if (fe->address >= fe->vma->vm_end)
|
|
|
|
+ goto out;
|
|
|
|
+ fe->pte++;
|
|
}
|
|
}
|
|
|
|
|
|
- vmf.virtual_address = (void __user *) start_addr;
|
|
|
|
- vmf.pte = pte;
|
|
|
|
- vmf.pgoff = pgoff;
|
|
|
|
- vmf.max_pgoff = max_pgoff;
|
|
|
|
- vmf.flags = flags;
|
|
|
|
- vmf.gfp_mask = __get_fault_gfp_mask(vma);
|
|
|
|
- vma->vm_ops->map_pages(vma, &vmf);
|
|
|
|
|
|
+ fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff);
|
|
|
|
+out:
|
|
|
|
+ /* restore fault_env */
|
|
|
|
+ fe->pte = pte;
|
|
|
|
+ fe->address = address;
|
|
}
|
|
}
|
|
|
|
|
|
-static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
|
- unsigned long address, pmd_t *pmd,
|
|
|
|
- pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
|
|
|
|
|
|
+static int do_read_fault(struct fault_env *fe, pgoff_t pgoff, pte_t orig_pte)
|
|
{
|
|
{
|
|
|
|
+ struct vm_area_struct *vma = fe->vma;
|
|
struct page *fault_page;
|
|
struct page *fault_page;
|
|
- spinlock_t *ptl;
|
|
|
|
- pte_t *pte;
|
|
|
|
int ret = 0;
|
|
int ret = 0;
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -3035,66 +3019,68 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
* something).
|
|
* something).
|
|
*/
|
|
*/
|
|
if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
|
|
if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
|
|
- pte = pte_offset_map_lock(mm, pmd, address, &ptl);
|
|
|
|
- do_fault_around(vma, address, pte, pgoff, flags);
|
|
|
|
- if (!pte_same(*pte, orig_pte))
|
|
|
|
|
|
+ fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
|
|
|
|
+ &fe->ptl);
|
|
|
|
+ if (!pte_same(*fe->pte, orig_pte))
|
|
|
|
+ goto unlock_out;
|
|
|
|
+ do_fault_around(fe, pgoff);
|
|
|
|
+ /* Check if the fault is handled by faultaround */
|
|
|
|
+ if (!pte_same(*fe->pte, orig_pte))
|
|
goto unlock_out;
|
|
goto unlock_out;
|
|
- pte_unmap_unlock(pte, ptl);
|
|
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
}
|
|
}
|
|
|
|
|
|
- ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page, NULL);
|
|
|
|
|
|
+ ret = __do_fault(fe, pgoff, NULL, &fault_page, NULL);
|
|
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
|
|
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
|
|
return ret;
|
|
return ret;
|
|
|
|
|
|
- pte = pte_offset_map_lock(mm, pmd, address, &ptl);
|
|
|
|
- if (unlikely(!pte_same(*pte, orig_pte))) {
|
|
|
|
- pte_unmap_unlock(pte, ptl);
|
|
|
|
|
|
+ fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, &fe->ptl);
|
|
|
|
+ if (unlikely(!pte_same(*fe->pte, orig_pte))) {
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
unlock_page(fault_page);
|
|
unlock_page(fault_page);
|
|
put_page(fault_page);
|
|
put_page(fault_page);
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
- do_set_pte(vma, address, fault_page, pte, false, false);
|
|
|
|
|
|
+ do_set_pte(fe, fault_page);
|
|
unlock_page(fault_page);
|
|
unlock_page(fault_page);
|
|
unlock_out:
|
|
unlock_out:
|
|
- pte_unmap_unlock(pte, ptl);
|
|
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
-static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
|
- unsigned long address, pmd_t *pmd,
|
|
|
|
- pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
|
|
|
|
|
|
+static int do_cow_fault(struct fault_env *fe, pgoff_t pgoff, pte_t orig_pte)
|
|
{
|
|
{
|
|
|
|
+ struct vm_area_struct *vma = fe->vma;
|
|
struct page *fault_page, *new_page;
|
|
struct page *fault_page, *new_page;
|
|
void *fault_entry;
|
|
void *fault_entry;
|
|
struct mem_cgroup *memcg;
|
|
struct mem_cgroup *memcg;
|
|
- spinlock_t *ptl;
|
|
|
|
- pte_t *pte;
|
|
|
|
int ret;
|
|
int ret;
|
|
|
|
|
|
if (unlikely(anon_vma_prepare(vma)))
|
|
if (unlikely(anon_vma_prepare(vma)))
|
|
return VM_FAULT_OOM;
|
|
return VM_FAULT_OOM;
|
|
|
|
|
|
- new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
|
|
|
|
|
|
+ new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, fe->address);
|
|
if (!new_page)
|
|
if (!new_page)
|
|
return VM_FAULT_OOM;
|
|
return VM_FAULT_OOM;
|
|
|
|
|
|
- if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) {
|
|
|
|
|
|
+ if (mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL,
|
|
|
|
+ &memcg, false)) {
|
|
put_page(new_page);
|
|
put_page(new_page);
|
|
return VM_FAULT_OOM;
|
|
return VM_FAULT_OOM;
|
|
}
|
|
}
|
|
|
|
|
|
- ret = __do_fault(vma, address, pgoff, flags, new_page, &fault_page,
|
|
|
|
- &fault_entry);
|
|
|
|
|
|
+ ret = __do_fault(fe, pgoff, new_page, &fault_page, &fault_entry);
|
|
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
|
|
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
|
|
goto uncharge_out;
|
|
goto uncharge_out;
|
|
|
|
|
|
if (!(ret & VM_FAULT_DAX_LOCKED))
|
|
if (!(ret & VM_FAULT_DAX_LOCKED))
|
|
- copy_user_highpage(new_page, fault_page, address, vma);
|
|
|
|
|
|
+ copy_user_highpage(new_page, fault_page, fe->address, vma);
|
|
__SetPageUptodate(new_page);
|
|
__SetPageUptodate(new_page);
|
|
|
|
|
|
- pte = pte_offset_map_lock(mm, pmd, address, &ptl);
|
|
|
|
- if (unlikely(!pte_same(*pte, orig_pte))) {
|
|
|
|
- pte_unmap_unlock(pte, ptl);
|
|
|
|
|
|
+ fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
|
|
|
|
+ &fe->ptl);
|
|
|
|
+ if (unlikely(!pte_same(*fe->pte, orig_pte))) {
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
if (!(ret & VM_FAULT_DAX_LOCKED)) {
|
|
if (!(ret & VM_FAULT_DAX_LOCKED)) {
|
|
unlock_page(fault_page);
|
|
unlock_page(fault_page);
|
|
put_page(fault_page);
|
|
put_page(fault_page);
|
|
@@ -3104,10 +3090,10 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
}
|
|
}
|
|
goto uncharge_out;
|
|
goto uncharge_out;
|
|
}
|
|
}
|
|
- do_set_pte(vma, address, new_page, pte, true, true);
|
|
|
|
|
|
+ do_set_pte(fe, new_page);
|
|
mem_cgroup_commit_charge(new_page, memcg, false, false);
|
|
mem_cgroup_commit_charge(new_page, memcg, false, false);
|
|
lru_cache_add_active_or_unevictable(new_page, vma);
|
|
lru_cache_add_active_or_unevictable(new_page, vma);
|
|
- pte_unmap_unlock(pte, ptl);
|
|
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
if (!(ret & VM_FAULT_DAX_LOCKED)) {
|
|
if (!(ret & VM_FAULT_DAX_LOCKED)) {
|
|
unlock_page(fault_page);
|
|
unlock_page(fault_page);
|
|
put_page(fault_page);
|
|
put_page(fault_page);
|
|
@@ -3121,18 +3107,15 @@ uncharge_out:
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
-static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
|
- unsigned long address, pmd_t *pmd,
|
|
|
|
- pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
|
|
|
|
|
|
+static int do_shared_fault(struct fault_env *fe, pgoff_t pgoff, pte_t orig_pte)
|
|
{
|
|
{
|
|
|
|
+ struct vm_area_struct *vma = fe->vma;
|
|
struct page *fault_page;
|
|
struct page *fault_page;
|
|
struct address_space *mapping;
|
|
struct address_space *mapping;
|
|
- spinlock_t *ptl;
|
|
|
|
- pte_t *pte;
|
|
|
|
int dirtied = 0;
|
|
int dirtied = 0;
|
|
int ret, tmp;
|
|
int ret, tmp;
|
|
|
|
|
|
- ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page, NULL);
|
|
|
|
|
|
+ ret = __do_fault(fe, pgoff, NULL, &fault_page, NULL);
|
|
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
|
|
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
|
|
return ret;
|
|
return ret;
|
|
|
|
|
|
@@ -3142,7 +3125,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
*/
|
|
*/
|
|
if (vma->vm_ops->page_mkwrite) {
|
|
if (vma->vm_ops->page_mkwrite) {
|
|
unlock_page(fault_page);
|
|
unlock_page(fault_page);
|
|
- tmp = do_page_mkwrite(vma, fault_page, address);
|
|
|
|
|
|
+ tmp = do_page_mkwrite(vma, fault_page, fe->address);
|
|
if (unlikely(!tmp ||
|
|
if (unlikely(!tmp ||
|
|
(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
|
|
(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
|
|
put_page(fault_page);
|
|
put_page(fault_page);
|
|
@@ -3150,15 +3133,16 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- pte = pte_offset_map_lock(mm, pmd, address, &ptl);
|
|
|
|
- if (unlikely(!pte_same(*pte, orig_pte))) {
|
|
|
|
- pte_unmap_unlock(pte, ptl);
|
|
|
|
|
|
+ fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
|
|
|
|
+ &fe->ptl);
|
|
|
|
+ if (unlikely(!pte_same(*fe->pte, orig_pte))) {
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
unlock_page(fault_page);
|
|
unlock_page(fault_page);
|
|
put_page(fault_page);
|
|
put_page(fault_page);
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
- do_set_pte(vma, address, fault_page, pte, true, false);
|
|
|
|
- pte_unmap_unlock(pte, ptl);
|
|
|
|
|
|
+ do_set_pte(fe, fault_page);
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
|
|
|
|
if (set_page_dirty(fault_page))
|
|
if (set_page_dirty(fault_page))
|
|
dirtied = 1;
|
|
dirtied = 1;
|
|
@@ -3190,23 +3174,20 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
* The mmap_sem may have been released depending on flags and our
|
|
* The mmap_sem may have been released depending on flags and our
|
|
* return value. See filemap_fault() and __lock_page_or_retry().
|
|
* return value. See filemap_fault() and __lock_page_or_retry().
|
|
*/
|
|
*/
|
|
-static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
|
- unsigned long address, pte_t *page_table, pmd_t *pmd,
|
|
|
|
- unsigned int flags, pte_t orig_pte)
|
|
|
|
|
|
+static int do_fault(struct fault_env *fe, pte_t orig_pte)
|
|
{
|
|
{
|
|
- pgoff_t pgoff = linear_page_index(vma, address);
|
|
|
|
|
|
+ struct vm_area_struct *vma = fe->vma;
|
|
|
|
+ pgoff_t pgoff = linear_page_index(vma, fe->address);
|
|
|
|
|
|
- pte_unmap(page_table);
|
|
|
|
|
|
+ pte_unmap(fe->pte);
|
|
/* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
|
|
/* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
|
|
if (!vma->vm_ops->fault)
|
|
if (!vma->vm_ops->fault)
|
|
return VM_FAULT_SIGBUS;
|
|
return VM_FAULT_SIGBUS;
|
|
- if (!(flags & FAULT_FLAG_WRITE))
|
|
|
|
- return do_read_fault(mm, vma, address, pmd, pgoff, flags,
|
|
|
|
- orig_pte);
|
|
|
|
|
|
+ if (!(fe->flags & FAULT_FLAG_WRITE))
|
|
|
|
+ return do_read_fault(fe, pgoff, orig_pte);
|
|
if (!(vma->vm_flags & VM_SHARED))
|
|
if (!(vma->vm_flags & VM_SHARED))
|
|
- return do_cow_fault(mm, vma, address, pmd, pgoff, flags,
|
|
|
|
- orig_pte);
|
|
|
|
- return do_shared_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
|
|
|
|
|
|
+ return do_cow_fault(fe, pgoff, orig_pte);
|
|
|
|
+ return do_shared_fault(fe, pgoff, orig_pte);
|
|
}
|
|
}
|
|
|
|
|
|
static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
|
|
static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
|
|
@@ -3224,11 +3205,10 @@ static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
|
|
return mpol_misplaced(page, vma, addr);
|
|
return mpol_misplaced(page, vma, addr);
|
|
}
|
|
}
|
|
|
|
|
|
-static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
|
- unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd)
|
|
|
|
|
|
+static int do_numa_page(struct fault_env *fe, pte_t pte)
|
|
{
|
|
{
|
|
|
|
+ struct vm_area_struct *vma = fe->vma;
|
|
struct page *page = NULL;
|
|
struct page *page = NULL;
|
|
- spinlock_t *ptl;
|
|
|
|
int page_nid = -1;
|
|
int page_nid = -1;
|
|
int last_cpupid;
|
|
int last_cpupid;
|
|
int target_nid;
|
|
int target_nid;
|
|
@@ -3248,10 +3228,10 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
* page table entry is not accessible, so there would be no
|
|
* page table entry is not accessible, so there would be no
|
|
* concurrent hardware modifications to the PTE.
|
|
* concurrent hardware modifications to the PTE.
|
|
*/
|
|
*/
|
|
- ptl = pte_lockptr(mm, pmd);
|
|
|
|
- spin_lock(ptl);
|
|
|
|
- if (unlikely(!pte_same(*ptep, pte))) {
|
|
|
|
- pte_unmap_unlock(ptep, ptl);
|
|
|
|
|
|
+ fe->ptl = pte_lockptr(vma->vm_mm, fe->pmd);
|
|
|
|
+ spin_lock(fe->ptl);
|
|
|
|
+ if (unlikely(!pte_same(*fe->pte, pte))) {
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
goto out;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -3260,18 +3240,18 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
pte = pte_mkyoung(pte);
|
|
pte = pte_mkyoung(pte);
|
|
if (was_writable)
|
|
if (was_writable)
|
|
pte = pte_mkwrite(pte);
|
|
pte = pte_mkwrite(pte);
|
|
- set_pte_at(mm, addr, ptep, pte);
|
|
|
|
- update_mmu_cache(vma, addr, ptep);
|
|
|
|
|
|
+ set_pte_at(vma->vm_mm, fe->address, fe->pte, pte);
|
|
|
|
+ update_mmu_cache(vma, fe->address, fe->pte);
|
|
|
|
|
|
- page = vm_normal_page(vma, addr, pte);
|
|
|
|
|
|
+ page = vm_normal_page(vma, fe->address, pte);
|
|
if (!page) {
|
|
if (!page) {
|
|
- pte_unmap_unlock(ptep, ptl);
|
|
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
/* TODO: handle PTE-mapped THP */
|
|
/* TODO: handle PTE-mapped THP */
|
|
if (PageCompound(page)) {
|
|
if (PageCompound(page)) {
|
|
- pte_unmap_unlock(ptep, ptl);
|
|
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -3295,8 +3275,9 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
|
|
|
last_cpupid = page_cpupid_last(page);
|
|
last_cpupid = page_cpupid_last(page);
|
|
page_nid = page_to_nid(page);
|
|
page_nid = page_to_nid(page);
|
|
- target_nid = numa_migrate_prep(page, vma, addr, page_nid, &flags);
|
|
|
|
- pte_unmap_unlock(ptep, ptl);
|
|
|
|
|
|
+ target_nid = numa_migrate_prep(page, vma, fe->address, page_nid,
|
|
|
|
+ &flags);
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
if (target_nid == -1) {
|
|
if (target_nid == -1) {
|
|
put_page(page);
|
|
put_page(page);
|
|
goto out;
|
|
goto out;
|
|
@@ -3316,24 +3297,24 @@ out:
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
-static int create_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
|
- unsigned long address, pmd_t *pmd, unsigned int flags)
|
|
|
|
|
|
+static int create_huge_pmd(struct fault_env *fe)
|
|
{
|
|
{
|
|
|
|
+ struct vm_area_struct *vma = fe->vma;
|
|
if (vma_is_anonymous(vma))
|
|
if (vma_is_anonymous(vma))
|
|
- return do_huge_pmd_anonymous_page(mm, vma, address, pmd, flags);
|
|
|
|
|
|
+ return do_huge_pmd_anonymous_page(fe);
|
|
if (vma->vm_ops->pmd_fault)
|
|
if (vma->vm_ops->pmd_fault)
|
|
- return vma->vm_ops->pmd_fault(vma, address, pmd, flags);
|
|
|
|
|
|
+ return vma->vm_ops->pmd_fault(vma, fe->address, fe->pmd,
|
|
|
|
+ fe->flags);
|
|
return VM_FAULT_FALLBACK;
|
|
return VM_FAULT_FALLBACK;
|
|
}
|
|
}
|
|
|
|
|
|
-static int wp_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
|
- unsigned long address, pmd_t *pmd, pmd_t orig_pmd,
|
|
|
|
- unsigned int flags)
|
|
|
|
|
|
+static int wp_huge_pmd(struct fault_env *fe, pmd_t orig_pmd)
|
|
{
|
|
{
|
|
- if (vma_is_anonymous(vma))
|
|
|
|
- return do_huge_pmd_wp_page(mm, vma, address, pmd, orig_pmd);
|
|
|
|
- if (vma->vm_ops->pmd_fault)
|
|
|
|
- return vma->vm_ops->pmd_fault(vma, address, pmd, flags);
|
|
|
|
|
|
+ if (vma_is_anonymous(fe->vma))
|
|
|
|
+ return do_huge_pmd_wp_page(fe, orig_pmd);
|
|
|
|
+ if (fe->vma->vm_ops->pmd_fault)
|
|
|
|
+ return fe->vma->vm_ops->pmd_fault(fe->vma, fe->address, fe->pmd,
|
|
|
|
+ fe->flags);
|
|
return VM_FAULT_FALLBACK;
|
|
return VM_FAULT_FALLBACK;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -3353,12 +3334,9 @@ static int wp_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
* The mmap_sem may have been released depending on flags and our
|
|
* The mmap_sem may have been released depending on flags and our
|
|
* return value. See filemap_fault() and __lock_page_or_retry().
|
|
* return value. See filemap_fault() and __lock_page_or_retry().
|
|
*/
|
|
*/
|
|
-static int handle_pte_fault(struct mm_struct *mm,
|
|
|
|
- struct vm_area_struct *vma, unsigned long address,
|
|
|
|
- pte_t *pte, pmd_t *pmd, unsigned int flags)
|
|
|
|
|
|
+static int handle_pte_fault(struct fault_env *fe)
|
|
{
|
|
{
|
|
pte_t entry;
|
|
pte_t entry;
|
|
- spinlock_t *ptl;
|
|
|
|
|
|
|
|
/*
|
|
/*
|
|
* some architectures can have larger ptes than wordsize,
|
|
* some architectures can have larger ptes than wordsize,
|
|
@@ -3368,37 +3346,34 @@ static int handle_pte_fault(struct mm_struct *mm,
|
|
* we later double check anyway with the ptl lock held. So here
|
|
* we later double check anyway with the ptl lock held. So here
|
|
* a barrier will do.
|
|
* a barrier will do.
|
|
*/
|
|
*/
|
|
- entry = *pte;
|
|
|
|
|
|
+ entry = *fe->pte;
|
|
barrier();
|
|
barrier();
|
|
if (!pte_present(entry)) {
|
|
if (!pte_present(entry)) {
|
|
if (pte_none(entry)) {
|
|
if (pte_none(entry)) {
|
|
- if (vma_is_anonymous(vma))
|
|
|
|
- return do_anonymous_page(mm, vma, address,
|
|
|
|
- pte, pmd, flags);
|
|
|
|
|
|
+ if (vma_is_anonymous(fe->vma))
|
|
|
|
+ return do_anonymous_page(fe);
|
|
else
|
|
else
|
|
- return do_fault(mm, vma, address, pte, pmd,
|
|
|
|
- flags, entry);
|
|
|
|
|
|
+ return do_fault(fe, entry);
|
|
}
|
|
}
|
|
- return do_swap_page(mm, vma, address,
|
|
|
|
- pte, pmd, flags, entry);
|
|
|
|
|
|
+ return do_swap_page(fe, entry);
|
|
}
|
|
}
|
|
|
|
|
|
if (pte_protnone(entry))
|
|
if (pte_protnone(entry))
|
|
- return do_numa_page(mm, vma, address, entry, pte, pmd);
|
|
|
|
|
|
+ return do_numa_page(fe, entry);
|
|
|
|
|
|
- ptl = pte_lockptr(mm, pmd);
|
|
|
|
- spin_lock(ptl);
|
|
|
|
- if (unlikely(!pte_same(*pte, entry)))
|
|
|
|
|
|
+ fe->ptl = pte_lockptr(fe->vma->vm_mm, fe->pmd);
|
|
|
|
+ spin_lock(fe->ptl);
|
|
|
|
+ if (unlikely(!pte_same(*fe->pte, entry)))
|
|
goto unlock;
|
|
goto unlock;
|
|
- if (flags & FAULT_FLAG_WRITE) {
|
|
|
|
|
|
+ if (fe->flags & FAULT_FLAG_WRITE) {
|
|
if (!pte_write(entry))
|
|
if (!pte_write(entry))
|
|
- return do_wp_page(mm, vma, address,
|
|
|
|
- pte, pmd, ptl, entry);
|
|
|
|
|
|
+ return do_wp_page(fe, entry);
|
|
entry = pte_mkdirty(entry);
|
|
entry = pte_mkdirty(entry);
|
|
}
|
|
}
|
|
entry = pte_mkyoung(entry);
|
|
entry = pte_mkyoung(entry);
|
|
- if (ptep_set_access_flags(vma, address, pte, entry, flags & FAULT_FLAG_WRITE)) {
|
|
|
|
- update_mmu_cache(vma, address, pte);
|
|
|
|
|
|
+ if (ptep_set_access_flags(fe->vma, fe->address, fe->pte, entry,
|
|
|
|
+ fe->flags & FAULT_FLAG_WRITE)) {
|
|
|
|
+ update_mmu_cache(fe->vma, fe->address, fe->pte);
|
|
} else {
|
|
} else {
|
|
/*
|
|
/*
|
|
* This is needed only for protection faults but the arch code
|
|
* This is needed only for protection faults but the arch code
|
|
@@ -3406,11 +3381,11 @@ static int handle_pte_fault(struct mm_struct *mm,
|
|
* This still avoids useless tlb flushes for .text page faults
|
|
* This still avoids useless tlb flushes for .text page faults
|
|
* with threads.
|
|
* with threads.
|
|
*/
|
|
*/
|
|
- if (flags & FAULT_FLAG_WRITE)
|
|
|
|
- flush_tlb_fix_spurious_fault(vma, address);
|
|
|
|
|
|
+ if (fe->flags & FAULT_FLAG_WRITE)
|
|
|
|
+ flush_tlb_fix_spurious_fault(fe->vma, fe->address);
|
|
}
|
|
}
|
|
unlock:
|
|
unlock:
|
|
- pte_unmap_unlock(pte, ptl);
|
|
|
|
|
|
+ pte_unmap_unlock(fe->pte, fe->ptl);
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -3423,51 +3398,42 @@ unlock:
|
|
static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
|
|
static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
|
|
unsigned int flags)
|
|
unsigned int flags)
|
|
{
|
|
{
|
|
|
|
+ struct fault_env fe = {
|
|
|
|
+ .vma = vma,
|
|
|
|
+ .address = address,
|
|
|
|
+ .flags = flags,
|
|
|
|
+ };
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
pgd_t *pgd;
|
|
pgd_t *pgd;
|
|
pud_t *pud;
|
|
pud_t *pud;
|
|
- pmd_t *pmd;
|
|
|
|
- pte_t *pte;
|
|
|
|
-
|
|
|
|
- if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
|
|
|
|
- flags & FAULT_FLAG_INSTRUCTION,
|
|
|
|
- flags & FAULT_FLAG_REMOTE))
|
|
|
|
- return VM_FAULT_SIGSEGV;
|
|
|
|
-
|
|
|
|
- if (unlikely(is_vm_hugetlb_page(vma)))
|
|
|
|
- return hugetlb_fault(mm, vma, address, flags);
|
|
|
|
|
|
|
|
pgd = pgd_offset(mm, address);
|
|
pgd = pgd_offset(mm, address);
|
|
pud = pud_alloc(mm, pgd, address);
|
|
pud = pud_alloc(mm, pgd, address);
|
|
if (!pud)
|
|
if (!pud)
|
|
return VM_FAULT_OOM;
|
|
return VM_FAULT_OOM;
|
|
- pmd = pmd_alloc(mm, pud, address);
|
|
|
|
- if (!pmd)
|
|
|
|
|
|
+ fe.pmd = pmd_alloc(mm, pud, address);
|
|
|
|
+ if (!fe.pmd)
|
|
return VM_FAULT_OOM;
|
|
return VM_FAULT_OOM;
|
|
- if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) {
|
|
|
|
- int ret = create_huge_pmd(mm, vma, address, pmd, flags);
|
|
|
|
|
|
+ if (pmd_none(*fe.pmd) && transparent_hugepage_enabled(vma)) {
|
|
|
|
+ int ret = create_huge_pmd(&fe);
|
|
if (!(ret & VM_FAULT_FALLBACK))
|
|
if (!(ret & VM_FAULT_FALLBACK))
|
|
return ret;
|
|
return ret;
|
|
} else {
|
|
} else {
|
|
- pmd_t orig_pmd = *pmd;
|
|
|
|
|
|
+ pmd_t orig_pmd = *fe.pmd;
|
|
int ret;
|
|
int ret;
|
|
|
|
|
|
barrier();
|
|
barrier();
|
|
if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
|
|
if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
|
|
- unsigned int dirty = flags & FAULT_FLAG_WRITE;
|
|
|
|
-
|
|
|
|
if (pmd_protnone(orig_pmd))
|
|
if (pmd_protnone(orig_pmd))
|
|
- return do_huge_pmd_numa_page(mm, vma, address,
|
|
|
|
- orig_pmd, pmd);
|
|
|
|
|
|
+ return do_huge_pmd_numa_page(&fe, orig_pmd);
|
|
|
|
|
|
- if (dirty && !pmd_write(orig_pmd)) {
|
|
|
|
- ret = wp_huge_pmd(mm, vma, address, pmd,
|
|
|
|
- orig_pmd, flags);
|
|
|
|
|
|
+ if ((fe.flags & FAULT_FLAG_WRITE) &&
|
|
|
|
+ !pmd_write(orig_pmd)) {
|
|
|
|
+ ret = wp_huge_pmd(&fe, orig_pmd);
|
|
if (!(ret & VM_FAULT_FALLBACK))
|
|
if (!(ret & VM_FAULT_FALLBACK))
|
|
return ret;
|
|
return ret;
|
|
} else {
|
|
} else {
|
|
- huge_pmd_set_accessed(mm, vma, address, pmd,
|
|
|
|
- orig_pmd, dirty);
|
|
|
|
|
|
+ huge_pmd_set_accessed(&fe, orig_pmd);
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -3478,7 +3444,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
|
|
* run pte_offset_map on the pmd, if an huge pmd could
|
|
* run pte_offset_map on the pmd, if an huge pmd could
|
|
* materialize from under us from a different thread.
|
|
* materialize from under us from a different thread.
|
|
*/
|
|
*/
|
|
- if (unlikely(pte_alloc(mm, pmd, address)))
|
|
|
|
|
|
+ if (unlikely(pte_alloc(fe.vma->vm_mm, fe.pmd, fe.address)))
|
|
return VM_FAULT_OOM;
|
|
return VM_FAULT_OOM;
|
|
/*
|
|
/*
|
|
* If a huge pmd materialized under us just retry later. Use
|
|
* If a huge pmd materialized under us just retry later. Use
|
|
@@ -3491,7 +3457,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
|
|
* through an atomic read in C, which is what pmd_trans_unstable()
|
|
* through an atomic read in C, which is what pmd_trans_unstable()
|
|
* provides.
|
|
* provides.
|
|
*/
|
|
*/
|
|
- if (unlikely(pmd_trans_unstable(pmd) || pmd_devmap(*pmd)))
|
|
|
|
|
|
+ if (unlikely(pmd_trans_unstable(fe.pmd) || pmd_devmap(*fe.pmd)))
|
|
return 0;
|
|
return 0;
|
|
/*
|
|
/*
|
|
* A regular pmd is established and it can't morph into a huge pmd
|
|
* A regular pmd is established and it can't morph into a huge pmd
|
|
@@ -3499,9 +3465,9 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
|
|
* read mode and khugepaged takes it in write mode. So now it's
|
|
* read mode and khugepaged takes it in write mode. So now it's
|
|
* safe to run pte_offset_map().
|
|
* safe to run pte_offset_map().
|
|
*/
|
|
*/
|
|
- pte = pte_offset_map(pmd, address);
|
|
|
|
|
|
+ fe.pte = pte_offset_map(fe.pmd, fe.address);
|
|
|
|
|
|
- return handle_pte_fault(mm, vma, address, pte, pmd, flags);
|
|
|
|
|
|
+ return handle_pte_fault(&fe);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -3530,7 +3496,15 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
|
|
if (flags & FAULT_FLAG_USER)
|
|
if (flags & FAULT_FLAG_USER)
|
|
mem_cgroup_oom_enable();
|
|
mem_cgroup_oom_enable();
|
|
|
|
|
|
- ret = __handle_mm_fault(vma, address, flags);
|
|
|
|
|
|
+ if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
|
|
|
|
+ flags & FAULT_FLAG_INSTRUCTION,
|
|
|
|
+ flags & FAULT_FLAG_REMOTE))
|
|
|
|
+ return VM_FAULT_SIGSEGV;
|
|
|
|
+
|
|
|
|
+ if (unlikely(is_vm_hugetlb_page(vma)))
|
|
|
|
+ ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
|
|
|
|
+ else
|
|
|
|
+ ret = __handle_mm_fault(vma, address, flags);
|
|
|
|
|
|
if (flags & FAULT_FLAG_USER) {
|
|
if (flags & FAULT_FLAG_USER) {
|
|
mem_cgroup_oom_disable();
|
|
mem_cgroup_oom_disable();
|