9 years ago · 1d003af2ef
--- a/.mailmap
+++ b/.mailmap
@@ -48,6 +48,9 @@ Felix Kuhling <fxkuehl@gmx.de>
 
															 Felix Moeller <felix@derklecks.de>
														
 
															 Filipe Lautert <filipe@icewall.org>
														
 
															 Franck Bui-Huu <vagabon.xyz@gmail.com>
														
 
															+Frank Rowand <frowand.list@gmail.com> <frowand@mvista.com>
														
 
															+Frank Rowand <frowand.list@gmail.com> <frank.rowand@am.sony.com>
														
 
															+Frank Rowand <frowand.list@gmail.com> <frank.rowand@sonymobile.com>
														
 
															 Frank Zago <fzago@systemfabricworks.com>
														
 
															 Greg Kroah-Hartman <greg@echidna.(none)>
														
 
															 Greg Kroah-Hartman <gregkh@suse.de>
														
@@ -79,6 +82,7 @@ Kay Sievers <kay.sievers@vrfy.org>
 
															 Kenneth W Chen <kenneth.w.chen@intel.com>
														
 
															 Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com>
														
 
															 Koushik <raghavendra.koushik@neterion.com>
														
 
															+Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski.k@gmail.com>
														
 
															 Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
														
 
															 Leonid I Ananiev <leonid.i.ananiev@intel.com>
														
 
															 Linas Vepstas <linas@austin.ibm.com>
														
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -581,15 +581,16 @@ Specify "[Nn]ode" for node order
 
															 "Zone Order" orders the zonelists by zone type, then by node within each
														
 
															 zone.  Specify "[Zz]one" for zone order.
														
 
															-Specify "[Dd]efault" to request automatic configuration.  Autoconfiguration
														
 
															-will select "node" order in following case.
														
 
															-(1) if the DMA zone does not exist or
														
 
															-(2) if the DMA zone comprises greater than 50% of the available memory or
														
 
															-(3) if any node's DMA zone comprises greater than 70% of its local memory and
														
 
															-    the amount of local memory is big enough.
														
 
															-
														
 
															-Otherwise, "zone" order will be selected. Default order is recommended unless
														
 
															-this is causing problems for your system/application.
														
 
															+Specify "[Dd]efault" to request automatic configuration.
														
 
															+
														
 
															+On 32-bit, the Normal zone needs to be preserved for allocations accessible
														
 
															+by the kernel, so "zone" order will be selected.
														
 
															+
														
 
															+On 64-bit, devices that require DMA32/DMA are relatively rare, so "node"
														
 
															+order will be selected.
														
 
															+
														
 
															+Default order is recommended unless this is causing problems for your
														
 
															+system/application.
														
 
															 ==============================================================
														
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6400,7 +6400,7 @@ F:	mm/kmemleak.c
 
															 F:	mm/kmemleak-test.c
														
 
															 KPROBES
														
 
															-M:	Ananth N Mavinakayanahalli <ananth@in.ibm.com>
														
 
															+M:	Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
														
 
															 M:	Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
														
 
															 M:	"David S. Miller" <davem@davemloft.net>
														
 
															 M:	Masami Hiramatsu <mhiramat@kernel.org>
														
--- a/drivers/rapidio/devices/rio_mport_cdev.c
+++ b/drivers/rapidio/devices/rio_mport_cdev.c
@@ -2669,9 +2669,9 @@ static int __init mport_init(void)
 
															 	/* Create device class needed by udev */
														
 
															 	dev_class = class_create(THIS_MODULE, DRV_NAME);
														
 
															-	if (!dev_class) {
														
 
															+	if (IS_ERR(dev_class)) {
														
 
															 		rmcd_error("Unable to create " DRV_NAME " class");
														
 
															-		return -EINVAL;
														
 
															+		return PTR_ERR(dev_class);
														
 
															 	}
														
 
															 	ret = alloc_chrdev_region(&dev_number, 0, RIO_MAX_MPORTS, DRV_NAME);
														
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2455,6 +2455,8 @@ int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data,
 
															 	spin_unlock(&dlm->spinlock);
														
 
															+	ret = 0;
														
 
															+
														
 
															 done:
														
 
															 	dlm_put(dlm);
														
 
															 	return ret;
														
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1518,6 +1518,32 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
 
															 	return page;
														
 
															 }
														
 
															+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
														
 
															+static struct page *can_gather_numa_stats_pmd(pmd_t pmd,
														
 
															+					      struct vm_area_struct *vma,
														
 
															+					      unsigned long addr)
														
 
															+{
														
 
															+	struct page *page;
														
 
															+	int nid;
														
 
															+
														
 
															+	if (!pmd_present(pmd))
														
 
															+		return NULL;
														
 
															+
														
 
															+	page = vm_normal_page_pmd(vma, addr, pmd);
														
 
															+	if (!page)
														
 
															+		return NULL;
														
 
															+
														
 
															+	if (PageReserved(page))
														
 
															+		return NULL;
														
 
															+
														
 
															+	nid = page_to_nid(page);
														
 
															+	if (!node_isset(nid, node_states[N_MEMORY]))
														
 
															+		return NULL;
														
 
															+
														
 
															+	return page;
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															 static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
														
 
															 		unsigned long end, struct mm_walk *walk)
														
 
															 {
														
@@ -1527,14 +1553,14 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
 
															 	pte_t *orig_pte;
														
 
															 	pte_t *pte;
														
 
															+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
														
 
															 	ptl = pmd_trans_huge_lock(pmd, vma);
														
 
															 	if (ptl) {
														
 
															-		pte_t huge_pte = *(pte_t *)pmd;
														
 
															 		struct page *page;
														
 
															-		page = can_gather_numa_stats(huge_pte, vma, addr);
														
 
															+		page = can_gather_numa_stats_pmd(*pmd, vma, addr);
														
 
															 		if (page)
														
 
															-			gather_stats(page, md, pte_dirty(huge_pte),
														
 
															+			gather_stats(page, md, pmd_dirty(*pmd),
														
 
															 				     HPAGE_PMD_SIZE/PAGE_SIZE);
														
 
															 		spin_unlock(ptl);
														
 
															 		return 0;
														
@@ -1542,6 +1568,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
 
															 	if (pmd_trans_unstable(pmd))
														
 
															 		return 0;
														
 
															+#endif
														
 
															 	orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
														
 
															 	do {
														
 
															 		struct page *page = can_gather_numa_stats(*pte, vma, addr);
														
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -152,6 +152,7 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
 
															 }
														
 
															 struct page *get_huge_zero_page(void);
														
 
															+void put_huge_zero_page(void);
														
 
															 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
														
 
															 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
														
@@ -208,6 +209,10 @@ static inline bool is_huge_zero_page(struct page *page)
 
															 	return false;
														
 
															 }
														
 
															+static inline void put_huge_zero_page(void)
														
 
															+{
														
 
															+	BUILD_BUG();
														
 
															+}
														
 
															 static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
														
 
															 		unsigned long addr, pmd_t *pmd, int flags)
														
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1031,6 +1031,8 @@ static inline bool page_mapped(struct page *page)
 
															 	page = compound_head(page);
														
 
															 	if (atomic_read(compound_mapcount_ptr(page)) >= 0)
														
 
															 		return true;
														
 
															+	if (PageHuge(page))
														
 
															+		return false;
														
 
															 	for (i = 0; i < hpage_nr_pages(page); i++) {
														
 
															 		if (atomic_read(&page[i]._mapcount) >= 0)
														
 
															 			return true;
														
@@ -1138,6 +1140,8 @@ struct zap_details {
 
															 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
														
 
															 		pte_t pte);
														
 
															+struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
														
 
															+				pmd_t pmd);
														
 
															 int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
														
 
															 		unsigned long size);
														
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -1,5 +1,6 @@
 
															 #define pr_fmt(fmt) "kcov: " fmt
														
 
															+#define DISABLE_BRANCH_PROFILING
														
 
															 #include <linux/compiler.h>
														
 
															 #include <linux/types.h>
														
 
															 #include <linux/file.h>
														
@@ -43,7 +44,7 @@ struct kcov {
 
															  * Entry point from instrumented code.
														
 
															  * This is called once per basic-block/edge.
														
 
															  */
														
 
															-void __sanitizer_cov_trace_pc(void)
														
 
															+void notrace __sanitizer_cov_trace_pc(void)
														
 
															 {
														
 
															 	struct task_struct *t;
														
 
															 	enum kcov_mode mode;
														
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -1415,6 +1415,9 @@ static int __init crash_save_vmcoreinfo_init(void)
 
															 	VMCOREINFO_OFFSET(page, lru);
														
 
															 	VMCOREINFO_OFFSET(page, _mapcount);
														
 
															 	VMCOREINFO_OFFSET(page, private);
														
 
															+	VMCOREINFO_OFFSET(page, compound_dtor);
														
 
															+	VMCOREINFO_OFFSET(page, compound_order);
														
 
															+	VMCOREINFO_OFFSET(page, compound_head);
														
 
															 	VMCOREINFO_OFFSET(pglist_data, node_zones);
														
 
															 	VMCOREINFO_OFFSET(pglist_data, nr_zones);
														
 
															 #ifdef CONFIG_FLAT_NODE_MEM_MAP
														
@@ -1447,8 +1450,8 @@ static int __init crash_save_vmcoreinfo_init(void)
 
															 #ifdef CONFIG_X86
														
 
															 	VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
														
 
															 #endif
														
 
															-#ifdef CONFIG_HUGETLBFS
														
 
															-	VMCOREINFO_SYMBOL(free_huge_page);
														
 
															+#ifdef CONFIG_HUGETLB_PAGE
														
 
															+	VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR);
														
 
															 #endif
														
 
															 	arch_crash_save_vmcoreinfo();
														
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -210,10 +210,6 @@ depot_stack_handle_t depot_save_stack(struct stack_trace *trace,
 
															 		goto fast_exit;
														
 
															 	hash = hash_stack(trace->entries, trace->nr_entries);
														
 
															-	/* Bad luck, we won't store this stack. */
														
 
															-	if (hash == 0)
														
 
															-		goto exit;
														
 
															-
														
 
															 	bucket = &stack_table[hash & STACK_HASH_MASK];
														
 
															 	/*
														
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -232,7 +232,7 @@ retry:
 
															 	return READ_ONCE(huge_zero_page);
														
 
															 }
														
 
															-static void put_huge_zero_page(void)
														
 
															+void put_huge_zero_page(void)
														
 
															 {
														
 
															 	/*
														
 
															 	 * Counter should never go to zero here. Only shrinker can put
														
@@ -1684,12 +1684,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 
															 	if (vma_is_dax(vma)) {
														
 
															 		spin_unlock(ptl);
														
 
															 		if (is_huge_zero_pmd(orig_pmd))
														
 
															-			put_huge_zero_page();
														
 
															+			tlb_remove_page(tlb, pmd_page(orig_pmd));
														
 
															 	} else if (is_huge_zero_pmd(orig_pmd)) {
														
 
															 		pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd));
														
 
															 		atomic_long_dec(&tlb->mm->nr_ptes);
														
 
															 		spin_unlock(ptl);
														
 
															-		put_huge_zero_page();
														
 
															+		tlb_remove_page(tlb, pmd_page(orig_pmd));
														
 
															 	} else {
														
 
															 		struct page *page = pmd_page(orig_pmd);
														
 
															 		page_remove_rmap(page, true);
														
@@ -1960,10 +1960,9 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
 
															 		 * page fault if needed.
														
 
															 		 */
														
 
															 		return 0;
														
 
															-	if (vma->vm_ops)
														
 
															+	if (vma->vm_ops || (vm_flags & VM_NO_THP))
														
 
															 		/* khugepaged not yet working on file or special mappings */
														
 
															 		return 0;
														
 
															-	VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma);
														
 
															 	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
														
 
															 	hend = vma->vm_end & HPAGE_PMD_MASK;
														
 
															 	if (hstart < hend)
														
@@ -2352,8 +2351,7 @@ static bool hugepage_vma_check(struct vm_area_struct *vma)
 
															 		return false;
														
 
															 	if (is_vma_temporary_stack(vma))
														
 
															 		return false;
														
 
															-	VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma);
														
 
															-	return true;
														
 
															+	return !(vma->vm_flags & VM_NO_THP);
														
 
															 }
														
 
															 static void collapse_huge_page(struct mm_struct *mm,
														
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -888,7 +888,15 @@ int get_hwpoison_page(struct page *page)
 
															 		}
														
 
															 	}
														
 
															-	return get_page_unless_zero(head);
														
 
															+	if (get_page_unless_zero(head)) {
														
 
															+		if (head == compound_head(page))
														
 
															+			return 1;
														
 
															+
														
 
															+		pr_info("MCE: %#lx cannot catch tail\n", page_to_pfn(page));
														
 
															+		put_page(head);
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(get_hwpoison_page);
														
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -789,6 +789,46 @@ out:
 
															 	return pfn_to_page(pfn);
														
 
															 }
														
 
															+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
														
 
															+struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
														
 
															+				pmd_t pmd)
														
 
															+{
														
 
															+	unsigned long pfn = pmd_pfn(pmd);
														
 
															+
														
 
															+	/*
														
 
															+	 * There is no pmd_special() but there may be special pmds, e.g.
														
 
															+	 * in a direct-access (dax) mapping, so let's just replicate the
														
 
															+	 * !HAVE_PTE_SPECIAL case from vm_normal_page() here.
														
 
															+	 */
														
 
															+	if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
														
 
															+		if (vma->vm_flags & VM_MIXEDMAP) {
														
 
															+			if (!pfn_valid(pfn))
														
 
															+				return NULL;
														
 
															+			goto out;
														
 
															+		} else {
														
 
															+			unsigned long off;
														
 
															+			off = (addr - vma->vm_start) >> PAGE_SHIFT;
														
 
															+			if (pfn == vma->vm_pgoff + off)
														
 
															+				return NULL;
														
 
															+			if (!is_cow_mapping(vma->vm_flags))
														
 
															+				return NULL;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	if (is_zero_pfn(pfn))
														
 
															+		return NULL;
														
 
															+	if (unlikely(pfn > highest_memmap_pfn))
														
 
															+		return NULL;
														
 
															+
														
 
															+	/*
														
 
															+	 * NOTE! We still have PageReserved() pages in the page tables.
														
 
															+	 * eg. VDSO mappings can cause them to exist.
														
 
															+	 */
														
 
															+out:
														
 
															+	return pfn_to_page(pfn);
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															 /*
														
 
															  * copy one vm_area from one task to the other. Assumes the page tables
														
 
															  * already present in the new task to be cleared in the whole range
														
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -975,7 +975,13 @@ out:
 
															 		dec_zone_page_state(page, NR_ISOLATED_ANON +
														
 
															 				page_is_file_cache(page));
														
 
															 		/* Soft-offlined page shouldn't go through lru cache list */
														
 
															-		if (reason == MR_MEMORY_FAILURE) {
														
 
															+		if (reason == MR_MEMORY_FAILURE && rc == MIGRATEPAGE_SUCCESS) {
														
 
															+			/*
														
 
															+			 * With this release, we free successfully migrated
														
 
															+			 * page and set PG_HWPoison on just freed page
														
 
															+			 * intentionally. Although it's rather weird, it's how
														
 
															+			 * HWPoison flag works at the moment.
														
 
															+			 */
														
 
															 			put_page(page);
														
 
															 			if (!test_set_page_hwpoison(page))
														
 
															 				num_poisoned_pages_inc();
														
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -353,7 +353,11 @@ int swap_readpage(struct page *page)
 
															 	ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
														
 
															 	if (!ret) {
														
 
															-		swap_slot_free_notify(page);
														
 
															+		if (trylock_page(page)) {
														
 
															+			swap_slot_free_notify(page);
														
 
															+			unlock_page(page);
														
 
															+		}
														
 
															+
														
 
															 		count_vm_event(PSWPIN);
														
 
															 		return 0;
														
 
															 	}
														
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -728,6 +728,11 @@ void release_pages(struct page **pages, int nr, bool cold)
 
															 			zone = NULL;
														
 
															 		}
														
 
															+		if (is_huge_zero_page(page)) {
														
 
															+			put_huge_zero_page();
														
 
															+			continue;
														
 
															+		}
														
 
															+
														
 
															 		page = compound_head(page);
														
 
															 		if (!put_page_testzero(page))
														
 
															 			continue;
														
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2553,7 +2553,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 
															 		sc->gfp_mask |= __GFP_HIGHMEM;
														
 
															 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
														
 
															-					requested_highidx, sc->nodemask) {
														
 
															+					gfp_zone(sc->gfp_mask), sc->nodemask) {
														
 
															 		enum zone_type classzone_idx;
														
 
															 		if (!populated_zone(zone))
														
@@ -3318,6 +3318,20 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order,
 
															 	/* Try to sleep for a short interval */
														
 
															 	if (prepare_kswapd_sleep(pgdat, order, remaining,
														
 
															 						balanced_classzone_idx)) {
														
 
															+		/*
														
 
															+		 * Compaction records what page blocks it recently failed to
														
 
															+		 * isolate pages from and skips them in the future scanning.
														
 
															+		 * When kswapd is going to sleep, it is reasonable to assume
														
 
															+		 * that pages and compaction may succeed so reset the cache.
														
 
															+		 */
														
 
															+		reset_isolation_suitable(pgdat);
														
 
															+
														
 
															+		/*
														
 
															+		 * We have freed the memory, now we should compact it to make
														
 
															+		 * allocation of the requested order possible.
														
 
															+		 */
														
 
															+		wakeup_kcompactd(pgdat, order, classzone_idx);
														
 
															+
														
 
															 		remaining = schedule_timeout(HZ/10);
														
 
															 		finish_wait(&pgdat->kswapd_wait, &wait);
														
 
															 		prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
														
@@ -3341,20 +3355,6 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order,
 
															 		 */
														
 
															 		set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold);
														
 
															-		/*
														
 
															-		 * Compaction records what page blocks it recently failed to
														
 
															-		 * isolate pages from and skips them in the future scanning.
														
 
															-		 * When kswapd is going to sleep, it is reasonable to assume
														
 
															-		 * that pages and compaction may succeed so reset the cache.
														
 
															-		 */
														
 
															-		reset_isolation_suitable(pgdat);
														
 
															-
														
 
															-		/*
														
 
															-		 * We have freed the memory, now we should compact it to make
														
 
															-		 * allocation of the requested order possible.
														
 
															-		 */
														
 
															-		wakeup_kcompactd(pgdat, order, classzone_idx);
														
 
															-
														
 
															 		if (!kthread_should_stop())
														
 
															 			schedule();