8 years ago · 56c2997965
--- a/.mailmap
+++ b/.mailmap
@@ -171,6 +171,7 @@ Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com>
 
				 Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@virtuozzo.com>
			
 
				 Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com>
			
 
				 Takashi YOSHII <takashi.yoshii.zj@renesas.com>
			
 
				+Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com>
			
 
				 Yusuke Goda <goda.yusuke@renesas.com>
			
 
				 Gustavo Padovan <gustavo@las.ic.unicamp.br>
			
 
				 Gustavo Padovan <padovan@profusion.mobi>
			
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -373,6 +373,22 @@ restart:
 
				 		}
			
 
				 		spin_lock_irq(&mapping->tree_lock);
			
 
				 
			
 
				+		if (!entry) {
			
 
				+			/*
			
 
				+			 * We needed to drop the page_tree lock while calling
			
 
				+			 * radix_tree_preload() and we didn't have an entry to
			
 
				+			 * lock.  See if another thread inserted an entry at
			
 
				+			 * our index during this time.
			
 
				+			 */
			
 
				+			entry = __radix_tree_lookup(&mapping->page_tree, index,
			
 
				+					NULL, &slot);
			
 
				+			if (entry) {
			
 
				+				radix_tree_preload_end();
			
 
				+				spin_unlock_irq(&mapping->tree_lock);
			
 
				+				goto restart;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				 		if (pmd_downgrade) {
			
 
				 			radix_tree_delete(&mapping->page_tree, index);
			
 
				 			mapping->nrexceptional--;
			
@@ -388,19 +404,12 @@ restart:
 
				 		if (err) {
			
 
				 			spin_unlock_irq(&mapping->tree_lock);
			
 
				 			/*
			
 
				-			 * Someone already created the entry?  This is a
			
 
				-			 * normal failure when inserting PMDs in a range
			
 
				-			 * that already contains PTEs.  In that case we want
			
 
				-			 * to return -EEXIST immediately.
			
 
				-			 */
			
 
				-			if (err == -EEXIST && !(size_flag & RADIX_DAX_PMD))
			
 
				-				goto restart;
			
 
				-			/*
			
 
				-			 * Our insertion of a DAX PMD entry failed, most
			
 
				-			 * likely because it collided with a PTE sized entry
			
 
				-			 * at a different index in the PMD range.  We haven't
			
 
				-			 * inserted anything into the radix tree and have no
			
 
				-			 * waiters to wake.
			
 
				+			 * Our insertion of a DAX entry failed, most likely
			
 
				+			 * because we were inserting a PMD entry and it
			
 
				+			 * collided with a PTE sized entry at a different
			
 
				+			 * index in the PMD range.  We haven't inserted
			
 
				+			 * anything into the radix tree and have no waiters to
			
 
				+			 * wake.
			
 
				 			 */
			
 
				 			return ERR_PTR(err);
			
 
				 		}
			
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1756,7 +1756,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
 
				 	 *	protocols: aa:... bb:...
			
 
				 	 */
			
 
				 	seq_printf(m, "pending:\t%lu\ntotal:\t%lu\nAPI:\t%Lx:%x:%Lx\n",
			
 
				-		   pending, total, UFFD_API, UFFD_API_FEATURES,
			
 
				+		   pending, total, UFFD_API, ctx->features,
			
 
				 		   UFFD_API_IOCTLS|UFFD_API_RANGE_IOCTLS);
			
 
				 }
			
 
				 #endif
			
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -261,9 +261,9 @@
 
				  */
			
 
				 #ifndef RO_AFTER_INIT_DATA
			
 
				 #define RO_AFTER_INIT_DATA						\
			
 
				-	__start_ro_after_init = .;					\
			
 
				+	VMLINUX_SYMBOL(__start_ro_after_init) = .;			\
			
 
				 	*(.data..ro_after_init)						\
			
 
				-	__end_ro_after_init = .;
			
 
				+	VMLINUX_SYMBOL(__end_ro_after_init) = .;
			
 
				 #endif
			
 
				 
			
 
				 /*
			
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -184,11 +184,17 @@ static void ptrace_unfreeze_traced(struct task_struct *task)
 
				 
			
 
				 	WARN_ON(!task->ptrace || task->parent != current);
			
 
				 
			
 
				+	/*
			
 
				+	 * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely.
			
 
				+	 * Recheck state under the lock to close this race.
			
 
				+	 */
			
 
				 	spin_lock_irq(&task->sighand->siglock);
			
 
				-	if (__fatal_signal_pending(task))
			
 
				-		wake_up_state(task, __TASK_TRACED);
			
 
				-	else
			
 
				-		task->state = TASK_TRACED;
			
 
				+	if (task->state == __TASK_TRACED) {
			
 
				+		if (__fatal_signal_pending(task))
			
 
				+			wake_up_state(task, __TASK_TRACED);
			
 
				+		else
			
 
				+			task->state = TASK_TRACED;
			
 
				+	}
			
 
				 	spin_unlock_irq(&task->sighand->siglock);
			
 
				 }
			
 
				 
			
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -240,18 +240,18 @@ static ssize_t defrag_store(struct kobject *kobj,
 
				 		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
			
 
				 		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
			
 
				 		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
			
 
				-	} else if (!memcmp("defer", buf,
			
 
				-		    min(sizeof("defer")-1, count))) {
			
 
				-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
			
 
				-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
			
 
				-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
			
 
				-		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
			
 
				 	} else if (!memcmp("defer+madvise", buf,
			
 
				 		    min(sizeof("defer+madvise")-1, count))) {
			
 
				 		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
			
 
				 		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
			
 
				 		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
			
 
				 		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
			
 
				+	} else if (!memcmp("defer", buf,
			
 
				+		    min(sizeof("defer")-1, count))) {
			
 
				+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
			
 
				+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
			
 
				+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
			
 
				+		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
			
 
				 	} else if (!memcmp("madvise", buf,
			
 
				 			   min(sizeof("madvise")-1, count))) {
			
 
				 		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
			
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -481,6 +481,13 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
 
				 enum ttu_flags;
			
 
				 struct tlbflush_unmap_batch;
			
 
				 
			
 
				+
			
 
				+/*
			
 
				+ * only for MM internal work items which do not depend on
			
 
				+ * any allocations or locks which might depend on allocations
			
 
				+ */
			
 
				+extern struct workqueue_struct *mm_percpu_wq;
			
 
				+
			
 
				 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
			
 
				 void try_to_unmap_flush(void);
			
 
				 void try_to_unmap_flush_dirty(void);
			
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2373,6 +2373,13 @@ void drain_all_pages(struct zone *zone)
 
				 	 */
			
 
				 	static cpumask_t cpus_with_pcps;
			
 
				 
			
 
				+	/*
			
 
				+	 * Make sure nobody triggers this path before mm_percpu_wq is fully
			
 
				+	 * initialized.
			
 
				+	 */
			
 
				+	if (WARN_ON_ONCE(!mm_percpu_wq))
			
 
				+		return;
			
 
				+
			
 
				 	/* Workqueues cannot recurse */
			
 
				 	if (current->flags & PF_WQ_WORKER)
			
 
				 		return;
			
@@ -2422,7 +2429,7 @@ void drain_all_pages(struct zone *zone)
 
				 	for_each_cpu(cpu, &cpus_with_pcps) {
			
 
				 		struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
			
 
				 		INIT_WORK(work, drain_local_pages_wq);
			
 
				-		schedule_work_on(cpu, work);
			
 
				+		queue_work_on(cpu, mm_percpu_wq, work);
			
 
				 	}
			
 
				 	for_each_cpu(cpu, &cpus_with_pcps)
			
 
				 		flush_work(per_cpu_ptr(&pcpu_drain, cpu));
			
@@ -4519,13 +4526,13 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 
				 			K(node_page_state(pgdat, NR_FILE_MAPPED)),
			
 
				 			K(node_page_state(pgdat, NR_FILE_DIRTY)),
			
 
				 			K(node_page_state(pgdat, NR_WRITEBACK)),
			
 
				+			K(node_page_state(pgdat, NR_SHMEM)),
			
 
				 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
			
 
				 			K(node_page_state(pgdat, NR_SHMEM_THPS) * HPAGE_PMD_NR),
			
 
				 			K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)
			
 
				 					* HPAGE_PMD_NR),
			
 
				 			K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
			
 
				 #endif
			
 
				-			K(node_page_state(pgdat, NR_SHMEM)),
			
 
				 			K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
			
 
				 			K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
			
 
				 			node_page_state(pgdat, NR_PAGES_SCANNED),
			
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -111,12 +111,8 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 
				 	if (pvmw->pmd && !pvmw->pte)
			
 
				 		return not_found(pvmw);
			
 
				 
			
 
				-	/* Only for THP, seek to next pte entry makes sense */
			
 
				-	if (pvmw->pte) {
			
 
				-		if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
			
 
				-			return not_found(pvmw);
			
 
				+	if (pvmw->pte)
			
 
				 		goto next_pte;
			
 
				-	}
			
 
				 
			
 
				 	if (unlikely(PageHuge(pvmw->page))) {
			
 
				 		/* when pud is not present, pte will be NULL */
			
@@ -165,9 +161,14 @@ restart:
 
				 	while (1) {
			
 
				 		if (check_pte(pvmw))
			
 
				 			return true;
			
 
				-next_pte:	do {
			
 
				+next_pte:
			
 
				+		/* Seek to next pte only makes sense for THP */
			
 
				+		if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
			
 
				+			return not_found(pvmw);
			
 
				+		do {
			
 
				 			pvmw->address += PAGE_SIZE;
			
 
				-			if (pvmw->address >=
			
 
				+			if (pvmw->address >= pvmw->vma->vm_end ||
			
 
				+			    pvmw->address >=
			
 
				 					__vma_address(pvmw->page, pvmw->vma) +
			
 
				 					hpage_nr_pages(pvmw->page) * PAGE_SIZE)
			
 
				 				return not_found(pvmw);
			
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -670,30 +670,19 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
 
				 
			
 
				 static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
			
 
				 
			
 
				-/*
			
 
				- * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM
			
 
				- * workqueue, aiding in getting memory freed.
			
 
				- */
			
 
				-static struct workqueue_struct *lru_add_drain_wq;
			
 
				-
			
 
				-static int __init lru_init(void)
			
 
				-{
			
 
				-	lru_add_drain_wq = alloc_workqueue("lru-add-drain", WQ_MEM_RECLAIM, 0);
			
 
				-
			
 
				-	if (WARN(!lru_add_drain_wq,
			
 
				-		"Failed to create workqueue lru_add_drain_wq"))
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-early_initcall(lru_init);
			
 
				-
			
 
				 void lru_add_drain_all(void)
			
 
				 {
			
 
				 	static DEFINE_MUTEX(lock);
			
 
				 	static struct cpumask has_work;
			
 
				 	int cpu;
			
 
				 
			
 
				+	/*
			
 
				+	 * Make sure nobody triggers this path before mm_percpu_wq is fully
			
 
				+	 * initialized.
			
 
				+	 */
			
 
				+	if (WARN_ON(!mm_percpu_wq))
			
 
				+		return;
			
 
				+
			
 
				 	mutex_lock(&lock);
			
 
				 	get_online_cpus();
			
 
				 	cpumask_clear(&has_work);
			
@@ -707,7 +696,7 @@ void lru_add_drain_all(void)
 
				 		    pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
			
 
				 		    need_activate_page_drain(cpu)) {
			
 
				 			INIT_WORK(work, lru_add_drain_per_cpu);
			
 
				-			queue_work_on(cpu, lru_add_drain_wq, work);
			
 
				+			queue_work_on(cpu, mm_percpu_wq, work);
			
 
				 			cpumask_set_cpu(cpu, &has_work);
			
 
				 		}
			
 
				 	}
			
--- a/mm/swap_cgroup.c
+++ b/mm/swap_cgroup.c
@@ -201,6 +201,8 @@ void swap_cgroup_swapoff(int type)
 
				 			struct page *page = map[i];
			
 
				 			if (page)
			
 
				 				__free_page(page);
			
 
				+			if (!(i % SWAP_CLUSTER_MAX))
			
 
				+				cond_resched();
			
 
				 		}
			
 
				 		vfree(map);
			
 
				 	}
			
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1552,7 +1552,6 @@ static const struct file_operations proc_vmstat_file_operations = {
 
				 #endif /* CONFIG_PROC_FS */
			
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				-static struct workqueue_struct *vmstat_wq;
			
 
				 static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
			
 
				 int sysctl_stat_interval __read_mostly = HZ;
			
 
				 
			
@@ -1623,7 +1622,7 @@ static void vmstat_update(struct work_struct *w)
 
				 		 * to occur in the future. Keep on running the
			
 
				 		 * update worker thread.
			
 
				 		 */
			
 
				-		queue_delayed_work_on(smp_processor_id(), vmstat_wq,
			
 
				+		queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
			
 
				 				this_cpu_ptr(&vmstat_work),
			
 
				 				round_jiffies_relative(sysctl_stat_interval));
			
 
				 	}
			
@@ -1702,7 +1701,7 @@ static void vmstat_shepherd(struct work_struct *w)
 
				 		struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
			
 
				 
			
 
				 		if (!delayed_work_pending(dw) && need_update(cpu))
			
 
				-			queue_delayed_work_on(cpu, vmstat_wq, dw, 0);
			
 
				+			queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
			
 
				 	}
			
 
				 	put_online_cpus();
			
 
				 
			
@@ -1718,7 +1717,6 @@ static void __init start_shepherd_timer(void)
 
				 		INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
			
 
				 			vmstat_update);
			
 
				 
			
 
				-	vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
			
 
				 	schedule_delayed_work(&shepherd,
			
 
				 		round_jiffies_relative(sysctl_stat_interval));
			
 
				 }
			
@@ -1764,11 +1762,16 @@ static int vmstat_cpu_dead(unsigned int cpu)
 
				 
			
 
				 #endif
			
 
				 
			
 
				+struct workqueue_struct *mm_percpu_wq;
			
 
				+
			
 
				 void __init init_mm_internals(void)
			
 
				 {
			
 
				-#ifdef CONFIG_SMP
			
 
				-	int ret;
			
 
				+	int ret __maybe_unused;
			
 
				 
			
 
				+	mm_percpu_wq = alloc_workqueue("mm_percpu_wq",
			
 
				+				       WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
			
 
				+
			
 
				+#ifdef CONFIG_SMP
			
 
				 	ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
			
 
				 					NULL, vmstat_cpu_dead);
			
 
				 	if (ret < 0)