11 年之前 · 8382d914eb
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -22,6 +22,7 @@
 
				 #include <linux/swap.h>
			
 
				 #include <linux/swapops.h>
			
 
				 #include <linux/page-isolation.h>
			
 
				+#include <linux/jhash.h>
			
 
				 
			
 
				 #include <asm/page.h>
			
 
				 #include <asm/pgtable.h>
			
@@ -53,6 +54,13 @@ static unsigned long __initdata default_hstate_size;
 
				  */
			
 
				 DEFINE_SPINLOCK(hugetlb_lock);
			
 
				 
			
 
				+/*
			
 
				+ * Serializes faults on the same logical page.  This is used to
			
 
				+ * prevent spurious OOMs when the hugepage pool is fully utilized.
			
 
				+ */
			
 
				+static int num_fault_mutexes;
			
 
				+static struct mutex *htlb_fault_mutex_table ____cacheline_aligned_in_smp;
			
 
				+
			
 
				 static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
			
 
				 {
			
 
				 	bool free = (spool->count == 0) && (spool->used_hpages == 0);
			
@@ -1961,11 +1969,14 @@ static void __exit hugetlb_exit(void)
 
				 	}
			
 
				 
			
 
				 	kobject_put(hugepages_kobj);
			
 
				+	kfree(htlb_fault_mutex_table);
			
 
				 }
			
 
				 module_exit(hugetlb_exit);
			
 
				 
			
 
				 static int __init hugetlb_init(void)
			
 
				 {
			
 
				+	int i;
			
 
				+
			
 
				 	/* Some platform decide whether they support huge pages at boot
			
 
				 	 * time. On these, such as powerpc, HPAGE_SHIFT is set to 0 when
			
 
				 	 * there is no such support
			
@@ -1990,6 +2001,17 @@ static int __init hugetlb_init(void)
 
				 	hugetlb_register_all_nodes();
			
 
				 	hugetlb_cgroup_file_init();
			
 
				 
			
 
				+#ifdef CONFIG_SMP
			
 
				+	num_fault_mutexes = roundup_pow_of_two(8 * num_possible_cpus());
			
 
				+#else
			
 
				+	num_fault_mutexes = 1;
			
 
				+#endif
			
 
				+	htlb_fault_mutex_table =
			
 
				+		kmalloc(sizeof(struct mutex) * num_fault_mutexes, GFP_KERNEL);
			
 
				+	BUG_ON(!htlb_fault_mutex_table);
			
 
				+
			
 
				+	for (i = 0; i < num_fault_mutexes; i++)
			
 
				+		mutex_init(&htlb_fault_mutex_table[i]);
			
 
				 	return 0;
			
 
				 }
			
 
				 module_init(hugetlb_init);
			
@@ -2767,15 +2789,14 @@ static bool hugetlbfs_pagecache_present(struct hstate *h,
 
				 }
			
 
				 
			
 
				 static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
			
 
				-			unsigned long address, pte_t *ptep, unsigned int flags)
			
 
				+			   struct address_space *mapping, pgoff_t idx,
			
 
				+			   unsigned long address, pte_t *ptep, unsigned int flags)
			
 
				 {
			
 
				 	struct hstate *h = hstate_vma(vma);
			
 
				 	int ret = VM_FAULT_SIGBUS;
			
 
				 	int anon_rmap = 0;
			
 
				-	pgoff_t idx;
			
 
				 	unsigned long size;
			
 
				 	struct page *page;
			
 
				-	struct address_space *mapping;
			
 
				 	pte_t new_pte;
			
 
				 	spinlock_t *ptl;
			
 
				 
			
@@ -2790,9 +2811,6 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
				 		return ret;
			
 
				 	}
			
 
				 
			
 
				-	mapping = vma->vm_file->f_mapping;
			
 
				-	idx = vma_hugecache_offset(h, vma, address);
			
 
				-
			
 
				 	/*
			
 
				 	 * Use page lock to guard against racing truncation
			
 
				 	 * before we get page_table_lock.
			
@@ -2902,17 +2920,53 @@ backout_unlocked:
 
				 	goto out;
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_SMP
			
 
				+static u32 fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
			
 
				+			    struct vm_area_struct *vma,
			
 
				+			    struct address_space *mapping,
			
 
				+			    pgoff_t idx, unsigned long address)
			
 
				+{
			
 
				+	unsigned long key[2];
			
 
				+	u32 hash;
			
 
				+
			
 
				+	if (vma->vm_flags & VM_SHARED) {
			
 
				+		key[0] = (unsigned long) mapping;
			
 
				+		key[1] = idx;
			
 
				+	} else {
			
 
				+		key[0] = (unsigned long) mm;
			
 
				+		key[1] = address >> huge_page_shift(h);
			
 
				+	}
			
 
				+
			
 
				+	hash = jhash2((u32 *)&key, sizeof(key)/sizeof(u32), 0);
			
 
				+
			
 
				+	return hash & (num_fault_mutexes - 1);
			
 
				+}
			
 
				+#else
			
 
				+/*
			
 
				+ * For uniprocesor systems we always use a single mutex, so just
			
 
				+ * return 0 and avoid the hashing overhead.
			
 
				+ */
			
 
				+static u32 fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
			
 
				+			    struct vm_area_struct *vma,
			
 
				+			    struct address_space *mapping,
			
 
				+			    pgoff_t idx, unsigned long address)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
			
 
				 			unsigned long address, unsigned int flags)
			
 
				 {
			
 
				-	pte_t *ptep;
			
 
				-	pte_t entry;
			
 
				+	pte_t *ptep, entry;
			
 
				 	spinlock_t *ptl;
			
 
				 	int ret;
			
 
				+	u32 hash;
			
 
				+	pgoff_t idx;
			
 
				 	struct page *page = NULL;
			
 
				 	struct page *pagecache_page = NULL;
			
 
				-	static DEFINE_MUTEX(hugetlb_instantiation_mutex);
			
 
				 	struct hstate *h = hstate_vma(vma);
			
 
				+	struct address_space *mapping;
			
 
				 
			
 
				 	address &= huge_page_mask(h);
			
 
				 
			
@@ -2931,15 +2985,20 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
				 	if (!ptep)
			
 
				 		return VM_FAULT_OOM;
			
 
				 
			
 
				+	mapping = vma->vm_file->f_mapping;
			
 
				+	idx = vma_hugecache_offset(h, vma, address);
			
 
				+
			
 
				 	/*
			
 
				 	 * Serialize hugepage allocation and instantiation, so that we don't
			
 
				 	 * get spurious allocation failures if two CPUs race to instantiate
			
 
				 	 * the same page in the page cache.
			
 
				 	 */
			
 
				-	mutex_lock(&hugetlb_instantiation_mutex);
			
 
				+	hash = fault_mutex_hash(h, mm, vma, mapping, idx, address);
			
 
				+	mutex_lock(&htlb_fault_mutex_table[hash]);
			
 
				+
			
 
				 	entry = huge_ptep_get(ptep);
			
 
				 	if (huge_pte_none(entry)) {
			
 
				-		ret = hugetlb_no_page(mm, vma, address, ptep, flags);
			
 
				+		ret = hugetlb_no_page(mm, vma, mapping, idx, address, ptep, flags);
			
 
				 		goto out_mutex;
			
 
				 	}
			
 
				 
			
@@ -3008,8 +3067,7 @@ out_ptl:
 
				 	put_page(page);
			
 
				 
			
 
				 out_mutex:
			
 
				-	mutex_unlock(&hugetlb_instantiation_mutex);
			
 
				-
			
 
				+	mutex_unlock(&htlb_fault_mutex_table[hash]);
			
 
				 	return ret;
			
 
				 }