10 years ago · 0f792cf949
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3134,6 +3134,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
				 	struct page *pagecache_page = NULL;
			
 
				 	struct hstate *h = hstate_vma(vma);
			
 
				 	struct address_space *mapping;
			
 
				+	int need_wait_lock = 0;
			
 
				 
			
 
				 	address &= huge_page_mask(h);
			
 
				 
			
@@ -3171,6 +3172,16 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
				 
			
 
				 	ret = 0;
			
 
				 
			
 
				+	/*
			
 
				+	 * entry could be a migration/hwpoison entry at this point, so this
			
 
				+	 * check prevents the kernel from going below assuming that we have
			
 
				+	 * a active hugepage in pagecache. This goto expects the 2nd page fault,
			
 
				+	 * and is_hugetlb_entry_(migration|hwpoisoned) check will properly
			
 
				+	 * handle it.
			
 
				+	 */
			
 
				+	if (!pte_present(entry))
			
 
				+		goto out_mutex;
			
 
				+
			
 
				 	/*
			
 
				 	 * If we are going to COW the mapping later, we examine the pending
			
 
				 	 * reservations for this page now. This will ensure that any
			
@@ -3190,30 +3201,31 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
				 								vma, address);
			
 
				 	}
			
 
				 
			
 
				+	ptl = huge_pte_lock(h, mm, ptep);
			
 
				+
			
 
				+	/* Check for a racing update before calling hugetlb_cow */
			
 
				+	if (unlikely(!pte_same(entry, huge_ptep_get(ptep))))
			
 
				+		goto out_ptl;
			
 
				+
			
 
				 	/*
			
 
				 	 * hugetlb_cow() requires page locks of pte_page(entry) and
			
 
				 	 * pagecache_page, so here we need take the former one
			
 
				 	 * when page != pagecache_page or !pagecache_page.
			
 
				-	 * Note that locking order is always pagecache_page -> page,
			
 
				-	 * so no worry about deadlock.
			
 
				 	 */
			
 
				 	page = pte_page(entry);
			
 
				-	get_page(page);
			
 
				 	if (page != pagecache_page)
			
 
				-		lock_page(page);
			
 
				-
			
 
				-	ptl = huge_pte_lockptr(h, mm, ptep);
			
 
				-	spin_lock(ptl);
			
 
				-	/* Check for a racing update before calling hugetlb_cow */
			
 
				-	if (unlikely(!pte_same(entry, huge_ptep_get(ptep))))
			
 
				-		goto out_ptl;
			
 
				+		if (!trylock_page(page)) {
			
 
				+			need_wait_lock = 1;
			
 
				+			goto out_ptl;
			
 
				+		}
			
 
				 
			
 
				+	get_page(page);
			
 
				 
			
 
				 	if (flags & FAULT_FLAG_WRITE) {
			
 
				 		if (!huge_pte_write(entry)) {
			
 
				 			ret = hugetlb_cow(mm, vma, address, ptep, entry,
			
 
				 					pagecache_page, ptl);
			
 
				-			goto out_ptl;
			
 
				+			goto out_put_page;
			
 
				 		}
			
 
				 		entry = huge_pte_mkdirty(entry);
			
 
				 	}
			
@@ -3221,7 +3233,10 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
				 	if (huge_ptep_set_access_flags(vma, address, ptep, entry,
			
 
				 						flags & FAULT_FLAG_WRITE))
			
 
				 		update_mmu_cache(vma, address, ptep);
			
 
				-
			
 
				+out_put_page:
			
 
				+	if (page != pagecache_page)
			
 
				+		unlock_page(page);
			
 
				+	put_page(page);
			
 
				 out_ptl:
			
 
				 	spin_unlock(ptl);
			
 
				 
			
@@ -3229,12 +3244,17 @@ out_ptl:
 
				 		unlock_page(pagecache_page);
			
 
				 		put_page(pagecache_page);
			
 
				 	}
			
 
				-	if (page != pagecache_page)
			
 
				-		unlock_page(page);
			
 
				-	put_page(page);
			
 
				-
			
 
				 out_mutex:
			
 
				 	mutex_unlock(&htlb_fault_mutex_table[hash]);
			
 
				+	/*
			
 
				+	 * Generally it's safe to hold refcount during waiting page lock. But
			
 
				+	 * here we just wait to defer the next page fault to avoid busy loop and
			
 
				+	 * the page is not used after unlocked before returning from the current
			
 
				+	 * page fault. So we are safe from accessing freed page, even if we wait
			
 
				+	 * here without taking refcount.
			
 
				+	 */
			
 
				+	if (need_wait_lock)
			
 
				+		wait_on_page_locked(page);
			
 
				 	return ret;
			
 
				 }