10 лет назад · 2f38ab2c3c
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2041,6 +2041,146 @@ static inline int wp_page_reuse(struct mm_struct *mm,
 
				 	return VM_FAULT_WRITE;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Handle the case of a page which we actually need to copy to a new page.
			
 
				+ *
			
 
				+ * Called with mmap_sem locked and the old page referenced, but
			
 
				+ * without the ptl held.
			
 
				+ *
			
 
				+ * High level logic flow:
			
 
				+ *
			
 
				+ * - Allocate a page, copy the content of the old page to the new one.
			
 
				+ * - Handle book keeping and accounting - cgroups, mmu-notifiers, etc.
			
 
				+ * - Take the PTL. If the pte changed, bail out and release the allocated page
			
 
				+ * - If the pte is still the way we remember it, update the page table and all
			
 
				+ *   relevant references. This includes dropping the reference the page-table
			
 
				+ *   held to the old page, as well as updating the rmap.
			
 
				+ * - In any case, unlock the PTL and drop the reference we took to the old page.
			
 
				+ */
			
 
				+static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
			
 
				+			unsigned long address, pte_t *page_table, pmd_t *pmd,
			
 
				+			pte_t orig_pte, struct page *old_page)
			
 
				+{
			
 
				+	struct page *new_page = NULL;
			
 
				+	spinlock_t *ptl = NULL;
			
 
				+	pte_t entry;
			
 
				+	int page_copied = 0;
			
 
				+	const unsigned long mmun_start = address & PAGE_MASK;	/* For mmu_notifiers */
			
 
				+	const unsigned long mmun_end = mmun_start + PAGE_SIZE;	/* For mmu_notifiers */
			
 
				+	struct mem_cgroup *memcg;
			
 
				+
			
 
				+	if (unlikely(anon_vma_prepare(vma)))
			
 
				+		goto oom;
			
 
				+
			
 
				+	if (is_zero_pfn(pte_pfn(orig_pte))) {
			
 
				+		new_page = alloc_zeroed_user_highpage_movable(vma, address);
			
 
				+		if (!new_page)
			
 
				+			goto oom;
			
 
				+	} else {
			
 
				+		new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
			
 
				+		if (!new_page)
			
 
				+			goto oom;
			
 
				+		cow_user_page(new_page, old_page, address, vma);
			
 
				+	}
			
 
				+	__SetPageUptodate(new_page);
			
 
				+
			
 
				+	if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
			
 
				+		goto oom_free_new;
			
 
				+
			
 
				+	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
			
 
				+
			
 
				+	/*
			
 
				+	 * Re-check the pte - we dropped the lock
			
 
				+	 */
			
 
				+	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
			
 
				+	if (likely(pte_same(*page_table, orig_pte))) {
			
 
				+		if (old_page) {
			
 
				+			if (!PageAnon(old_page)) {
			
 
				+				dec_mm_counter_fast(mm, MM_FILEPAGES);
			
 
				+				inc_mm_counter_fast(mm, MM_ANONPAGES);
			
 
				+			}
			
 
				+		} else {
			
 
				+			inc_mm_counter_fast(mm, MM_ANONPAGES);
			
 
				+		}
			
 
				+		flush_cache_page(vma, address, pte_pfn(orig_pte));
			
 
				+		entry = mk_pte(new_page, vma->vm_page_prot);
			
 
				+		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
			
 
				+		/*
			
 
				+		 * Clear the pte entry and flush it first, before updating the
			
 
				+		 * pte with the new entry. This will avoid a race condition
			
 
				+		 * seen in the presence of one thread doing SMC and another
			
 
				+		 * thread doing COW.
			
 
				+		 */
			
 
				+		ptep_clear_flush_notify(vma, address, page_table);
			
 
				+		page_add_new_anon_rmap(new_page, vma, address);
			
 
				+		mem_cgroup_commit_charge(new_page, memcg, false);
			
 
				+		lru_cache_add_active_or_unevictable(new_page, vma);
			
 
				+		/*
			
 
				+		 * We call the notify macro here because, when using secondary
			
 
				+		 * mmu page tables (such as kvm shadow page tables), we want the
			
 
				+		 * new page to be mapped directly into the secondary page table.
			
 
				+		 */
			
 
				+		set_pte_at_notify(mm, address, page_table, entry);
			
 
				+		update_mmu_cache(vma, address, page_table);
			
 
				+		if (old_page) {
			
 
				+			/*
			
 
				+			 * Only after switching the pte to the new page may
			
 
				+			 * we remove the mapcount here. Otherwise another
			
 
				+			 * process may come and find the rmap count decremented
			
 
				+			 * before the pte is switched to the new page, and
			
 
				+			 * "reuse" the old page writing into it while our pte
			
 
				+			 * here still points into it and can be read by other
			
 
				+			 * threads.
			
 
				+			 *
			
 
				+			 * The critical issue is to order this
			
 
				+			 * page_remove_rmap with the ptp_clear_flush above.
			
 
				+			 * Those stores are ordered by (if nothing else,)
			
 
				+			 * the barrier present in the atomic_add_negative
			
 
				+			 * in page_remove_rmap.
			
 
				+			 *
			
 
				+			 * Then the TLB flush in ptep_clear_flush ensures that
			
 
				+			 * no process can access the old page before the
			
 
				+			 * decremented mapcount is visible. And the old page
			
 
				+			 * cannot be reused until after the decremented
			
 
				+			 * mapcount is visible. So transitively, TLBs to
			
 
				+			 * old page will be flushed before it can be reused.
			
 
				+			 */
			
 
				+			page_remove_rmap(old_page);
			
 
				+		}
			
 
				+
			
 
				+		/* Free the old page.. */
			
 
				+		new_page = old_page;
			
 
				+		page_copied = 1;
			
 
				+	} else {
			
 
				+		mem_cgroup_cancel_charge(new_page, memcg);
			
 
				+	}
			
 
				+
			
 
				+	if (new_page)
			
 
				+		page_cache_release(new_page);
			
 
				+
			
 
				+	pte_unmap_unlock(page_table, ptl);
			
 
				+	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
			
 
				+	if (old_page) {
			
 
				+		/*
			
 
				+		 * Don't let another task, with possibly unlocked vma,
			
 
				+		 * keep the mlocked page.
			
 
				+		 */
			
 
				+		if (page_copied && (vma->vm_flags & VM_LOCKED)) {
			
 
				+			lock_page(old_page);	/* LRU manipulation */
			
 
				+			munlock_vma_page(old_page);
			
 
				+			unlock_page(old_page);
			
 
				+		}
			
 
				+		page_cache_release(old_page);
			
 
				+	}
			
 
				+	return page_copied ? VM_FAULT_WRITE : 0;
			
 
				+oom_free_new:
			
 
				+	page_cache_release(new_page);
			
 
				+oom:
			
 
				+	if (old_page)
			
 
				+		page_cache_release(old_page);
			
 
				+	return VM_FAULT_OOM;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * This routine handles present pages, when users try to write
			
 
				  * to a shared page. It is done by copying the page to a new address
			
@@ -2064,12 +2204,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
				 		spinlock_t *ptl, pte_t orig_pte)
			
 
				 	__releases(ptl)
			
 
				 {
			
 
				-	struct page *old_page, *new_page = NULL;
			
 
				-	pte_t entry;
			
 
				-	int page_copied = 0;
			
 
				-	unsigned long mmun_start = 0;	/* For mmu_notifiers */
			
 
				-	unsigned long mmun_end = 0;	/* For mmu_notifiers */
			
 
				-	struct mem_cgroup *memcg;
			
 
				+	struct page *old_page;
			
 
				 
			
 
				 	old_page = vm_normal_page(vma, address, orig_pte);
			
 
				 	if (!old_page) {
			
@@ -2085,7 +2220,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
				 				     (VM_WRITE|VM_SHARED))
			
 
				 			return wp_page_reuse(mm, vma, address, page_table, ptl,
			
 
				 					     orig_pte, old_page, 0, 0);
			
 
				-		goto gotten;
			
 
				+
			
 
				+		pte_unmap_unlock(page_table, ptl);
			
 
				+		return wp_page_copy(mm, vma, address, page_table, pmd,
			
 
				+				    orig_pte, old_page);
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -2165,119 +2303,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
				 	 * Ok, we need to copy. Oh, well..
			
 
				 	 */
			
 
				 	page_cache_get(old_page);
			
 
				-gotten:
			
 
				-	pte_unmap_unlock(page_table, ptl);
			
 
				-
			
 
				-	if (unlikely(anon_vma_prepare(vma)))
			
 
				-		goto oom;
			
 
				-
			
 
				-	if (is_zero_pfn(pte_pfn(orig_pte))) {
			
 
				-		new_page = alloc_zeroed_user_highpage_movable(vma, address);
			
 
				-		if (!new_page)
			
 
				-			goto oom;
			
 
				-	} else {
			
 
				-		new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
			
 
				-		if (!new_page)
			
 
				-			goto oom;
			
 
				-		cow_user_page(new_page, old_page, address, vma);
			
 
				-	}
			
 
				-	__SetPageUptodate(new_page);
			
 
				-
			
 
				-	if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
			
 
				-		goto oom_free_new;
			
 
				-
			
 
				-	mmun_start  = address & PAGE_MASK;
			
 
				-	mmun_end    = mmun_start + PAGE_SIZE;
			
 
				-	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
			
 
				-
			
 
				-	/*
			
 
				-	 * Re-check the pte - we dropped the lock
			
 
				-	 */
			
 
				-	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
			
 
				-	if (likely(pte_same(*page_table, orig_pte))) {
			
 
				-		if (old_page) {
			
 
				-			if (!PageAnon(old_page)) {
			
 
				-				dec_mm_counter_fast(mm, MM_FILEPAGES);
			
 
				-				inc_mm_counter_fast(mm, MM_ANONPAGES);
			
 
				-			}
			
 
				-		} else
			
 
				-			inc_mm_counter_fast(mm, MM_ANONPAGES);
			
 
				-		flush_cache_page(vma, address, pte_pfn(orig_pte));
			
 
				-		entry = mk_pte(new_page, vma->vm_page_prot);
			
 
				-		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
			
 
				-		/*
			
 
				-		 * Clear the pte entry and flush it first, before updating the
			
 
				-		 * pte with the new entry. This will avoid a race condition
			
 
				-		 * seen in the presence of one thread doing SMC and another
			
 
				-		 * thread doing COW.
			
 
				-		 */
			
 
				-		ptep_clear_flush_notify(vma, address, page_table);
			
 
				-		page_add_new_anon_rmap(new_page, vma, address);
			
 
				-		mem_cgroup_commit_charge(new_page, memcg, false);
			
 
				-		lru_cache_add_active_or_unevictable(new_page, vma);
			
 
				-		/*
			
 
				-		 * We call the notify macro here because, when using secondary
			
 
				-		 * mmu page tables (such as kvm shadow page tables), we want the
			
 
				-		 * new page to be mapped directly into the secondary page table.
			
 
				-		 */
			
 
				-		set_pte_at_notify(mm, address, page_table, entry);
			
 
				-		update_mmu_cache(vma, address, page_table);
			
 
				-		if (old_page) {
			
 
				-			/*
			
 
				-			 * Only after switching the pte to the new page may
			
 
				-			 * we remove the mapcount here. Otherwise another
			
 
				-			 * process may come and find the rmap count decremented
			
 
				-			 * before the pte is switched to the new page, and
			
 
				-			 * "reuse" the old page writing into it while our pte
			
 
				-			 * here still points into it and can be read by other
			
 
				-			 * threads.
			
 
				-			 *
			
 
				-			 * The critical issue is to order this
			
 
				-			 * page_remove_rmap with the ptp_clear_flush above.
			
 
				-			 * Those stores are ordered by (if nothing else,)
			
 
				-			 * the barrier present in the atomic_add_negative
			
 
				-			 * in page_remove_rmap.
			
 
				-			 *
			
 
				-			 * Then the TLB flush in ptep_clear_flush ensures that
			
 
				-			 * no process can access the old page before the
			
 
				-			 * decremented mapcount is visible. And the old page
			
 
				-			 * cannot be reused until after the decremented
			
 
				-			 * mapcount is visible. So transitively, TLBs to
			
 
				-			 * old page will be flushed before it can be reused.
			
 
				-			 */
			
 
				-			page_remove_rmap(old_page);
			
 
				-		}
			
 
				-
			
 
				-		/* Free the old page.. */
			
 
				-		new_page = old_page;
			
 
				-		page_copied = 1;
			
 
				-	} else
			
 
				-		mem_cgroup_cancel_charge(new_page, memcg);
			
 
				-
			
 
				-	if (new_page)
			
 
				-		page_cache_release(new_page);
			
 
				 
			
 
				 	pte_unmap_unlock(page_table, ptl);
			
 
				-	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
			
 
				-	if (old_page) {
			
 
				-		/*
			
 
				-		 * Don't let another task, with possibly unlocked vma,
			
 
				-		 * keep the mlocked page.
			
 
				-		 */
			
 
				-		if (page_copied && (vma->vm_flags & VM_LOCKED)) {
			
 
				-			lock_page(old_page);	/* LRU manipulation */
			
 
				-			munlock_vma_page(old_page);
			
 
				-			unlock_page(old_page);
			
 
				-		}
			
 
				-		page_cache_release(old_page);
			
 
				-	}
			
 
				-	return page_copied ? VM_FAULT_WRITE : 0;
			
 
				-oom_free_new:
			
 
				-	page_cache_release(new_page);
			
 
				-oom:
			
 
				-	if (old_page)
			
 
				-		page_cache_release(old_page);
			
 
				-	return VM_FAULT_OOM;
			
 
				+	return wp_page_copy(mm, vma, address, page_table, pmd,
			
 
				+			    orig_pte, old_page);
			
 
				 }
			
 
				 
			
 
				 static void unmap_mapping_range_vma(struct vm_area_struct *vma,