|
@@ -2935,6 +2935,19 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
+static void deposit_prealloc_pte(struct fault_env *fe)
|
|
|
+{
|
|
|
+ struct vm_area_struct *vma = fe->vma;
|
|
|
+
|
|
|
+ pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, fe->prealloc_pte);
|
|
|
+ /*
|
|
|
+ * We are going to consume the prealloc table,
|
|
|
+ * count that as nr_ptes.
|
|
|
+ */
|
|
|
+ atomic_long_inc(&vma->vm_mm->nr_ptes);
|
|
|
+ fe->prealloc_pte = 0;
|
|
|
+}
|
|
|
+
|
|
|
static int do_set_pmd(struct fault_env *fe, struct page *page)
|
|
|
{
|
|
|
struct vm_area_struct *vma = fe->vma;
|
|
@@ -2949,6 +2962,17 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
|
|
|
ret = VM_FAULT_FALLBACK;
|
|
|
page = compound_head(page);
|
|
|
|
|
|
+ /*
|
|
|
+ * Archs like ppc64 need additonal space to store information
|
|
|
+ * related to pte entry. Use the preallocated table for that.
|
|
|
+ */
|
|
|
+ if (arch_needs_pgtable_deposit() && !fe->prealloc_pte) {
|
|
|
+ fe->prealloc_pte = pte_alloc_one(vma->vm_mm, fe->address);
|
|
|
+ if (!fe->prealloc_pte)
|
|
|
+ return VM_FAULT_OOM;
|
|
|
+ smp_wmb(); /* See comment in __pte_alloc() */
|
|
|
+ }
|
|
|
+
|
|
|
fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
|
|
|
if (unlikely(!pmd_none(*fe->pmd)))
|
|
|
goto out;
|
|
@@ -2962,6 +2986,11 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
|
|
|
|
|
|
add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR);
|
|
|
page_add_file_rmap(page, true);
|
|
|
+ /*
|
|
|
+ * deposit and withdraw with pmd lock held
|
|
|
+ */
|
|
|
+ if (arch_needs_pgtable_deposit())
|
|
|
+ deposit_prealloc_pte(fe);
|
|
|
|
|
|
set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
|
|
|
|
|
@@ -2971,6 +3000,13 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
|
|
|
ret = 0;
|
|
|
count_vm_event(THP_FILE_MAPPED);
|
|
|
out:
|
|
|
+ /*
|
|
|
+ * If we are going to fallback to pte mapping, do a
|
|
|
+ * withdraw with pmd lock held.
|
|
|
+ */
|
|
|
+ if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK)
|
|
|
+ fe->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm,
|
|
|
+ fe->pmd);
|
|
|
spin_unlock(fe->ptl);
|
|
|
return ret;
|
|
|
}
|
|
@@ -3010,18 +3046,20 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
|
|
|
|
|
|
ret = do_set_pmd(fe, page);
|
|
|
if (ret != VM_FAULT_FALLBACK)
|
|
|
- return ret;
|
|
|
+ goto fault_handled;
|
|
|
}
|
|
|
|
|
|
if (!fe->pte) {
|
|
|
ret = pte_alloc_one_map(fe);
|
|
|
if (ret)
|
|
|
- return ret;
|
|
|
+ goto fault_handled;
|
|
|
}
|
|
|
|
|
|
/* Re-check under ptl */
|
|
|
- if (unlikely(!pte_none(*fe->pte)))
|
|
|
- return VM_FAULT_NOPAGE;
|
|
|
+ if (unlikely(!pte_none(*fe->pte))) {
|
|
|
+ ret = VM_FAULT_NOPAGE;
|
|
|
+ goto fault_handled;
|
|
|
+ }
|
|
|
|
|
|
flush_icache_page(vma, page);
|
|
|
entry = mk_pte(page, vma->vm_page_prot);
|
|
@@ -3041,8 +3079,15 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
|
|
|
|
|
|
/* no need to invalidate: a not-present page won't be cached */
|
|
|
update_mmu_cache(vma, fe->address, fe->pte);
|
|
|
+ ret = 0;
|
|
|
|
|
|
- return 0;
|
|
|
+fault_handled:
|
|
|
+ /* preallocated pagetable is unused: free it */
|
|
|
+ if (fe->prealloc_pte) {
|
|
|
+ pte_free(fe->vma->vm_mm, fe->prealloc_pte);
|
|
|
+ fe->prealloc_pte = 0;
|
|
|
+ }
|
|
|
+ return ret;
|
|
|
}
|
|
|
|
|
|
static unsigned long fault_around_bytes __read_mostly =
|
|
@@ -3141,11 +3186,6 @@ static int do_fault_around(struct fault_env *fe, pgoff_t start_pgoff)
|
|
|
|
|
|
fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff);
|
|
|
|
|
|
- /* preallocated pagetable is unused: free it */
|
|
|
- if (fe->prealloc_pte) {
|
|
|
- pte_free(fe->vma->vm_mm, fe->prealloc_pte);
|
|
|
- fe->prealloc_pte = 0;
|
|
|
- }
|
|
|
/* Huge page is mapped? Page fault is solved */
|
|
|
if (pmd_trans_huge(*fe->pmd)) {
|
|
|
ret = VM_FAULT_NOPAGE;
|