8 роки тому · 27df704d43
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14004,6 +14004,7 @@ F:	drivers/block/virtio_blk.c
 
				 F:	include/linux/virtio*.h
			
 
				 F:	include/uapi/linux/virtio_*.h
			
 
				 F:	drivers/crypto/virtio/
			
 
				+F:	mm/balloon_compaction.c
			
 
				 
			
 
				 VIRTIO CRYPTO DRIVER
			
 
				 M:	Gonglei <arei.gonglei@huawei.com>
			
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -148,7 +148,8 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 
				 }
			
 
				 
			
 
				 static inline void
			
 
				-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
			
 
				+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
			
 
				+			unsigned long start, unsigned long end)
			
 
				 {
			
 
				 	tlb->mm = mm;
			
 
				 	tlb->fullmm = !(start | (end+1));
			
@@ -166,8 +167,14 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start
 
				 }
			
 
				 
			
 
				 static inline void
			
 
				-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
			
 
				+arch_tlb_finish_mmu(struct mmu_gather *tlb,
			
 
				+			unsigned long start, unsigned long end, bool force)
			
 
				 {
			
 
				+	if (force) {
			
 
				+		tlb->range_start = start;
			
 
				+		tlb->range_end = end;
			
 
				+	}
			
 
				+
			
 
				 	tlb_flush_mmu(tlb);
			
 
				 
			
 
				 	/* keep the page table cache within bounds */
			
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -168,7 +168,8 @@ static inline void __tlb_alloc_page(struct mmu_gather *tlb)
 
				 
			
 
				 
			
 
				 static inline void
			
 
				-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
			
 
				+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
			
 
				+			unsigned long start, unsigned long end)
			
 
				 {
			
 
				 	tlb->mm = mm;
			
 
				 	tlb->max = ARRAY_SIZE(tlb->local);
			
@@ -185,8 +186,11 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start
 
				  * collected.
			
 
				  */
			
 
				 static inline void
			
 
				-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
			
 
				+arch_tlb_finish_mmu(struct mmu_gather *tlb,
			
 
				+			unsigned long start, unsigned long end, bool force)
			
 
				 {
			
 
				+	if (force)
			
 
				+		tlb->need_flush = 1;
			
 
				 	/*
			
 
				 	 * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
			
 
				 	 * tlb->end_addr.
			
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -47,10 +47,9 @@ struct mmu_table_batch {
 
				 extern void tlb_table_flush(struct mmu_gather *tlb);
			
 
				 extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
			
 
				 
			
 
				-static inline void tlb_gather_mmu(struct mmu_gather *tlb,
			
 
				-				  struct mm_struct *mm,
			
 
				-				  unsigned long start,
			
 
				-				  unsigned long end)
			
 
				+static inline void
			
 
				+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
			
 
				+			unsigned long start, unsigned long end)
			
 
				 {
			
 
				 	tlb->mm = mm;
			
 
				 	tlb->start = start;
			
@@ -76,9 +75,15 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 
				 	tlb_flush_mmu_free(tlb);
			
 
				 }
			
 
				 
			
 
				-static inline void tlb_finish_mmu(struct mmu_gather *tlb,
			
 
				-				  unsigned long start, unsigned long end)
			
 
				+static inline void
			
 
				+arch_tlb_finish_mmu(struct mmu_gather *tlb,
			
 
				+		unsigned long start, unsigned long end, bool force)
			
 
				 {
			
 
				+	if (force) {
			
 
				+		tlb->start = start;
			
 
				+		tlb->end = end;
			
 
				+	}
			
 
				+
			
 
				 	tlb_flush_mmu(tlb);
			
 
				 }
			
 
				 
			
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -36,7 +36,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb)
 
				 }
			
 
				 
			
 
				 static inline void
			
 
				-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
			
 
				+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
			
 
				+		unsigned long start, unsigned long end)
			
 
				 {
			
 
				 	tlb->mm = mm;
			
 
				 	tlb->start = start;
			
@@ -47,9 +48,10 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start
 
				 }
			
 
				 
			
 
				 static inline void
			
 
				-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
			
 
				+arch_tlb_finish_mmu(struct mmu_gather *tlb,
			
 
				+		unsigned long start, unsigned long end, bool force)
			
 
				 {
			
 
				-	if (tlb->fullmm)
			
 
				+	if (tlb->fullmm || force)
			
 
				 		flush_tlb_mm(tlb->mm);
			
 
				 
			
 
				 	/* keep the page table cache within bounds */
			
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -45,7 +45,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb)
 
				 }
			
 
				 
			
 
				 static inline void
			
 
				-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
			
 
				+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
			
 
				+		unsigned long start, unsigned long end)
			
 
				 {
			
 
				 	tlb->mm = mm;
			
 
				 	tlb->start = start;
			
@@ -80,13 +81,19 @@ tlb_flush_mmu(struct mmu_gather *tlb)
 
				 	tlb_flush_mmu_free(tlb);
			
 
				 }
			
 
				 
			
 
				-/* tlb_finish_mmu
			
 
				+/* arch_tlb_finish_mmu
			
 
				  *	Called at the end of the shootdown operation to free up any resources
			
 
				  *	that were required.
			
 
				  */
			
 
				 static inline void
			
 
				-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
			
 
				+arch_tlb_finish_mmu(struct mmu_gather *tlb,
			
 
				+		unsigned long start, unsigned long end, bool force)
			
 
				 {
			
 
				+	if (force) {
			
 
				+		tlb->start = start;
			
 
				+		tlb->end = end;
			
 
				+		tlb->need_flush = 1;
			
 
				+	}
			
 
				 	tlb_flush_mmu(tlb);
			
 
				 
			
 
				 	/* keep the page table cache within bounds */
			
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -308,7 +308,7 @@ static ssize_t comp_algorithm_store(struct device *dev,
 
				 		struct device_attribute *attr, const char *buf, size_t len)
			
 
				 {
			
 
				 	struct zram *zram = dev_to_zram(dev);
			
 
				-	char compressor[CRYPTO_MAX_ALG_NAME];
			
 
				+	char compressor[ARRAY_SIZE(zram->compressor)];
			
 
				 	size_t sz;
			
 
				 
			
 
				 	strlcpy(compressor, buf, sizeof(compressor));
			
@@ -327,7 +327,7 @@ static ssize_t comp_algorithm_store(struct device *dev,
 
				 		return -EBUSY;
			
 
				 	}
			
 
				 
			
 
				-	strlcpy(zram->compressor, compressor, sizeof(compressor));
			
 
				+	strcpy(zram->compressor, compressor);
			
 
				 	up_write(&zram->init_lock);
			
 
				 	return len;
			
 
				 }
			
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -106,13 +106,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 
				 		    global_node_page_state(NR_FILE_MAPPED));
			
 
				 	show_val_kb(m, "Shmem:          ", i.sharedram);
			
 
				 	show_val_kb(m, "Slab:           ",
			
 
				-		    global_page_state(NR_SLAB_RECLAIMABLE) +
			
 
				-		    global_page_state(NR_SLAB_UNRECLAIMABLE));
			
 
				+		    global_node_page_state(NR_SLAB_RECLAIMABLE) +
			
 
				+		    global_node_page_state(NR_SLAB_UNRECLAIMABLE));
			
 
				 
			
 
				 	show_val_kb(m, "SReclaimable:   ",
			
 
				-		    global_page_state(NR_SLAB_RECLAIMABLE));
			
 
				+		    global_node_page_state(NR_SLAB_RECLAIMABLE));
			
 
				 	show_val_kb(m, "SUnreclaim:     ",
			
 
				-		    global_page_state(NR_SLAB_UNRECLAIMABLE));
			
 
				+		    global_node_page_state(NR_SLAB_UNRECLAIMABLE));
			
 
				 	seq_printf(m, "KernelStack:    %8lu kB\n",
			
 
				 		   global_page_state(NR_KERNEL_STACK_KB));
			
 
				 	show_val_kb(m, "PageTables:     ",
			
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -16,9 +16,10 @@
 
				 #include <linux/mmu_notifier.h>
			
 
				 #include <linux/page_idle.h>
			
 
				 #include <linux/shmem_fs.h>
			
 
				+#include <linux/uaccess.h>
			
 
				 
			
 
				 #include <asm/elf.h>
			
 
				-#include <linux/uaccess.h>
			
 
				+#include <asm/tlb.h>
			
 
				 #include <asm/tlbflush.h>
			
 
				 #include "internal.h"
			
 
				 
			
@@ -1008,6 +1009,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 
				 	struct mm_struct *mm;
			
 
				 	struct vm_area_struct *vma;
			
 
				 	enum clear_refs_types type;
			
 
				+	struct mmu_gather tlb;
			
 
				 	int itype;
			
 
				 	int rv;
			
 
				 
			
@@ -1054,6 +1056,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 
				 		}
			
 
				 
			
 
				 		down_read(&mm->mmap_sem);
			
 
				+		tlb_gather_mmu(&tlb, mm, 0, -1);
			
 
				 		if (type == CLEAR_REFS_SOFT_DIRTY) {
			
 
				 			for (vma = mm->mmap; vma; vma = vma->vm_next) {
			
 
				 				if (!(vma->vm_flags & VM_SOFTDIRTY))
			
@@ -1075,7 +1078,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 
				 		walk_page_range(0, mm->highest_vm_end, &clear_refs_walk);
			
 
				 		if (type == CLEAR_REFS_SOFT_DIRTY)
			
 
				 			mmu_notifier_invalidate_range_end(mm, 0, -1);
			
 
				-		flush_tlb_mm(mm);
			
 
				+		tlb_finish_mmu(&tlb, 0, -1);
			
 
				 		up_read(&mm->mmap_sem);
			
 
				 out_mm:
			
 
				 		mmput(mm);
			
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1600,7 +1600,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
 
				 				   uffdio_copy.len);
			
 
				 		mmput(ctx->mm);
			
 
				 	} else {
			
 
				-		return -ENOSPC;
			
 
				+		return -ESRCH;
			
 
				 	}
			
 
				 	if (unlikely(put_user(ret, &user_uffdio_copy->copy)))
			
 
				 		return -EFAULT;
			
@@ -1647,7 +1647,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
 
				 				     uffdio_zeropage.range.len);
			
 
				 		mmput(ctx->mm);
			
 
				 	} else {
			
 
				-		return -ENOSPC;
			
 
				+		return -ESRCH;
			
 
				 	}
			
 
				 	if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage)))
			
 
				 		return -EFAULT;
			
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -112,10 +112,11 @@ struct mmu_gather {
 
				 
			
 
				 #define HAVE_GENERIC_MMU_GATHER
			
 
				 
			
 
				-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end);
			
 
				+void arch_tlb_gather_mmu(struct mmu_gather *tlb,
			
 
				+	struct mm_struct *mm, unsigned long start, unsigned long end);
			
 
				 void tlb_flush_mmu(struct mmu_gather *tlb);
			
 
				-void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start,
			
 
				-							unsigned long end);
			
 
				+void arch_tlb_finish_mmu(struct mmu_gather *tlb,
			
 
				+			 unsigned long start, unsigned long end, bool force);
			
 
				 extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
			
 
				 				   int page_size);
			
 
				 
			
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -487,14 +487,12 @@ struct mm_struct {
 
				 	/* numa_scan_seq prevents two threads setting pte_numa */
			
 
				 	int numa_scan_seq;
			
 
				 #endif
			
 
				-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
			
 
				 	/*
			
 
				 	 * An operation with batched TLB flushing is going on. Anything that
			
 
				 	 * can move process memory needs to flush the TLB when moving a
			
 
				 	 * PROT_NONE or PROT_NUMA mapped page.
			
 
				 	 */
			
 
				-	bool tlb_flush_pending;
			
 
				-#endif
			
 
				+	atomic_t tlb_flush_pending;
			
 
				 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
			
 
				 	/* See flush_tlb_batched_pending() */
			
 
				 	bool tlb_flush_batched;
			
@@ -522,46 +520,60 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 
				 	return mm->cpu_vm_mask_var;
			
 
				 }
			
 
				 
			
 
				-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
			
 
				+struct mmu_gather;
			
 
				+extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
			
 
				+				unsigned long start, unsigned long end);
			
 
				+extern void tlb_finish_mmu(struct mmu_gather *tlb,
			
 
				+				unsigned long start, unsigned long end);
			
 
				+
			
 
				 /*
			
 
				  * Memory barriers to keep this state in sync are graciously provided by
			
 
				  * the page table locks, outside of which no page table modifications happen.
			
 
				- * The barriers below prevent the compiler from re-ordering the instructions
			
 
				- * around the memory barriers that are already present in the code.
			
 
				+ * The barriers are used to ensure the order between tlb_flush_pending updates,
			
 
				+ * which happen while the lock is not taken, and the PTE updates, which happen
			
 
				+ * while the lock is taken, are serialized.
			
 
				  */
			
 
				 static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
			
 
				 {
			
 
				-	barrier();
			
 
				-	return mm->tlb_flush_pending;
			
 
				+	return atomic_read(&mm->tlb_flush_pending) > 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Returns true if there are two above TLB batching threads in parallel.
			
 
				+ */
			
 
				+static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
			
 
				+{
			
 
				+	return atomic_read(&mm->tlb_flush_pending) > 1;
			
 
				+}
			
 
				+
			
 
				+static inline void init_tlb_flush_pending(struct mm_struct *mm)
			
 
				+{
			
 
				+	atomic_set(&mm->tlb_flush_pending, 0);
			
 
				 }
			
 
				-static inline void set_tlb_flush_pending(struct mm_struct *mm)
			
 
				+
			
 
				+static inline void inc_tlb_flush_pending(struct mm_struct *mm)
			
 
				 {
			
 
				-	mm->tlb_flush_pending = true;
			
 
				+	atomic_inc(&mm->tlb_flush_pending);
			
 
				 
			
 
				 	/*
			
 
				-	 * Guarantee that the tlb_flush_pending store does not leak into the
			
 
				+	 * Guarantee that the tlb_flush_pending increase does not leak into the
			
 
				 	 * critical section updating the page tables
			
 
				 	 */
			
 
				 	smp_mb__before_spinlock();
			
 
				 }
			
 
				+
			
 
				 /* Clearing is done after a TLB flush, which also provides a barrier. */
			
 
				-static inline void clear_tlb_flush_pending(struct mm_struct *mm)
			
 
				-{
			
 
				-	barrier();
			
 
				-	mm->tlb_flush_pending = false;
			
 
				-}
			
 
				-#else
			
 
				-static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
			
 
				-{
			
 
				-	return false;
			
 
				-}
			
 
				-static inline void set_tlb_flush_pending(struct mm_struct *mm)
			
 
				-{
			
 
				-}
			
 
				-static inline void clear_tlb_flush_pending(struct mm_struct *mm)
			
 
				+static inline void dec_tlb_flush_pending(struct mm_struct *mm)
			
 
				 {
			
 
				+	/*
			
 
				+	 * Guarantee that the tlb_flush_pending does not not leak into the
			
 
				+	 * critical section, since we must order the PTE change and changes to
			
 
				+	 * the pending TLB flush indication. We could have relied on TLB flush
			
 
				+	 * as a memory barrier, but this behavior is not clearly documented.
			
 
				+	 */
			
 
				+	smp_mb__before_atomic();
			
 
				+	atomic_dec(&mm->tlb_flush_pending);
			
 
				 }
			
 
				-#endif
			
 
				 
			
 
				 struct vm_fault;
			
 
				 
			
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -807,7 +807,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 
				 	mm_init_aio(mm);
			
 
				 	mm_init_owner(mm, p);
			
 
				 	mmu_notifier_mm_init(mm);
			
 
				-	clear_tlb_flush_pending(mm);
			
 
				+	init_tlb_flush_pending(mm);
			
 
				 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
			
 
				 	mm->pmd_huge_pte = NULL;
			
 
				 #endif
			
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1650,7 +1650,7 @@ static unsigned long minimum_image_size(unsigned long saveable)
 
				 {
			
 
				 	unsigned long size;
			
 
				 
			
 
				-	size = global_page_state(NR_SLAB_RECLAIMABLE)
			
 
				+	size = global_node_page_state(NR_SLAB_RECLAIMABLE)
			
 
				 		+ global_node_page_state(NR_ACTIVE_ANON)
			
 
				 		+ global_node_page_state(NR_INACTIVE_ANON)
			
 
				 		+ global_node_page_state(NR_ACTIVE_FILE)
			
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -110,10 +110,12 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
 
				 	if (in_task()) {
			
 
				 		unsigned int fail_nth = READ_ONCE(current->fail_nth);
			
 
				 
			
 
				-		if (fail_nth && !WRITE_ONCE(current->fail_nth, fail_nth - 1))
			
 
				-			goto fail;
			
 
				+		if (fail_nth) {
			
 
				+			if (!WRITE_ONCE(current->fail_nth, fail_nth - 1))
			
 
				+				goto fail;
			
 
				 
			
 
				-		return false;
			
 
				+			return false;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	/* No need to check any other properties if the probability is 0 */
			
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -485,7 +485,7 @@ static ssize_t config_show(struct device *dev,
 
				 				config->test_driver);
			
 
				 	else
			
 
				 		len += snprintf(buf+len, PAGE_SIZE - len,
			
 
				-				"driver:\tEMTPY\n");
			
 
				+				"driver:\tEMPTY\n");
			
 
				 
			
 
				 	if (config->test_fs)
			
 
				 		len += snprintf(buf+len, PAGE_SIZE - len,
			
@@ -493,7 +493,7 @@ static ssize_t config_show(struct device *dev,
 
				 				config->test_fs);
			
 
				 	else
			
 
				 		len += snprintf(buf+len, PAGE_SIZE - len,
			
 
				-				"fs:\tEMTPY\n");
			
 
				+				"fs:\tEMPTY\n");
			
 
				 
			
 
				 	mutex_unlock(&test_dev->config_mutex);
			
 
				 
			
@@ -746,11 +746,11 @@ static int trigger_config_run_type(struct kmod_test_device *test_dev,
 
				 						      strlen(test_str));
			
 
				 		break;
			
 
				 	case TEST_KMOD_FS_TYPE:
			
 
				-		break;
			
 
				 		kfree_const(config->test_fs);
			
 
				 		config->test_driver = NULL;
			
 
				 		copied = config_copy_test_fs(config, test_str,
			
 
				 					     strlen(test_str));
			
 
				+		break;
			
 
				 	default:
			
 
				 		mutex_unlock(&test_dev->config_mutex);
			
 
				 		return -EINVAL;
			
@@ -880,10 +880,10 @@ static int test_dev_config_update_uint_sync(struct kmod_test_device *test_dev,
 
				 					    int (*test_sync)(struct kmod_test_device *test_dev))
			
 
				 {
			
 
				 	int ret;
			
 
				-	long new;
			
 
				+	unsigned long new;
			
 
				 	unsigned int old_val;
			
 
				 
			
 
				-	ret = kstrtol(buf, 10, &new);
			
 
				+	ret = kstrtoul(buf, 10, &new);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
@@ -918,9 +918,9 @@ static int test_dev_config_update_uint_range(struct kmod_test_device *test_dev,
 
				 					     unsigned int max)
			
 
				 {
			
 
				 	int ret;
			
 
				-	long new;
			
 
				+	unsigned long new;
			
 
				 
			
 
				-	ret = kstrtol(buf, 10, &new);
			
 
				+	ret = kstrtoul(buf, 10, &new);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
@@ -1146,7 +1146,7 @@ static struct kmod_test_device *register_test_dev_kmod(void)
 
				 	struct kmod_test_device *test_dev = NULL;
			
 
				 	int ret;
			
 
				 
			
 
				-	mutex_unlock(&reg_dev_mutex);
			
 
				+	mutex_lock(&reg_dev_mutex);
			
 
				 
			
 
				 	/* int should suffice for number of devices, test for wrap */
			
 
				 	if (unlikely(num_test_devs + 1) < 0) {
			
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -24,7 +24,7 @@ struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info)
 
				 {
			
 
				 	unsigned long flags;
			
 
				 	struct page *page = alloc_page(balloon_mapping_gfp_mask() |
			
 
				-				__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_ZERO);
			
 
				+				       __GFP_NOMEMALLOC | __GFP_NORETRY);
			
 
				 	if (!page)
			
 
				 		return NULL;
			
 
				 
			
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -124,9 +124,7 @@ void dump_mm(const struct mm_struct *mm)
 
				 #ifdef CONFIG_NUMA_BALANCING
			
 
				 		"numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n"
			
 
				 #endif
			
 
				-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
			
 
				 		"tlb_flush_pending %d\n"
			
 
				-#endif
			
 
				 		"def_flags: %#lx(%pGv)\n",
			
 
				 
			
 
				 		mm, mm->mmap, mm->vmacache_seqnum, mm->task_size,
			
@@ -158,9 +156,7 @@ void dump_mm(const struct mm_struct *mm)
 
				 #ifdef CONFIG_NUMA_BALANCING
			
 
				 		mm->numa_next_scan, mm->numa_scan_offset, mm->numa_scan_seq,
			
 
				 #endif
			
 
				-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
			
 
				-		mm->tlb_flush_pending,
			
 
				-#endif
			
 
				+		atomic_read(&mm->tlb_flush_pending),
			
 
				 		mm->def_flags, &mm->def_flags
			
 
				 	);
			
 
				 }
			
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1495,6 +1495,13 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 
				 		goto clear_pmdnuma;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * The page_table_lock above provides a memory barrier
			
 
				+	 * with change_protection_range.
			
 
				+	 */
			
 
				+	if (mm_tlb_flush_pending(vma->vm_mm))
			
 
				+		flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
			
 
				+
			
 
				 	/*
			
 
				 	 * Migrate the THP to the requested node, returns with page unlocked
			
 
				 	 * and access rights restored.
			
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4062,9 +4062,9 @@ out:
 
				 	return ret;
			
 
				 out_release_unlock:
			
 
				 	spin_unlock(ptl);
			
 
				-out_release_nounlock:
			
 
				 	if (vm_shared)
			
 
				 		unlock_page(page);
			
 
				+out_release_nounlock:
			
 
				 	put_page(page);
			
 
				 	goto out;
			
 
				 }
			
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1038,7 +1038,8 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
 
				 		goto out_unlock;
			
 
				 
			
 
				 	if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) ||
			
 
				-	    (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte))) {
			
 
				+	    (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte)) ||
			
 
				+						mm_tlb_flush_pending(mm)) {
			
 
				 		pte_t entry;
			
 
				 
			
 
				 		swapped = PageSwapCache(page);
			
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -215,12 +215,8 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				-/* tlb_gather_mmu
			
 
				- *	Called to initialize an (on-stack) mmu_gather structure for page-table
			
 
				- *	tear-down from @mm. The @fullmm argument is used when @mm is without
			
 
				- *	users and we're going to destroy the full address space (exit/execve).
			
 
				- */
			
 
				-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
			
 
				+void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
			
 
				+				unsigned long start, unsigned long end)
			
 
				 {
			
 
				 	tlb->mm = mm;
			
 
				 
			
@@ -275,10 +271,14 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
 
				  *	Called at the end of the shootdown operation to free up any resources
			
 
				  *	that were required.
			
 
				  */
			
 
				-void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
			
 
				+void arch_tlb_finish_mmu(struct mmu_gather *tlb,
			
 
				+		unsigned long start, unsigned long end, bool force)
			
 
				 {
			
 
				 	struct mmu_gather_batch *batch, *next;
			
 
				 
			
 
				+	if (force)
			
 
				+		__tlb_adjust_range(tlb, start, end - start);
			
 
				+
			
 
				 	tlb_flush_mmu(tlb);
			
 
				 
			
 
				 	/* keep the page table cache within bounds */
			
@@ -398,6 +398,34 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
 
				 
			
 
				 #endif /* CONFIG_HAVE_RCU_TABLE_FREE */
			
 
				 
			
 
				+/* tlb_gather_mmu
			
 
				+ *	Called to initialize an (on-stack) mmu_gather structure for page-table
			
 
				+ *	tear-down from @mm. The @fullmm argument is used when @mm is without
			
 
				+ *	users and we're going to destroy the full address space (exit/execve).
			
 
				+ */
			
 
				+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
			
 
				+			unsigned long start, unsigned long end)
			
 
				+{
			
 
				+	arch_tlb_gather_mmu(tlb, mm, start, end);
			
 
				+	inc_tlb_flush_pending(tlb->mm);
			
 
				+}
			
 
				+
			
 
				+void tlb_finish_mmu(struct mmu_gather *tlb,
			
 
				+		unsigned long start, unsigned long end)
			
 
				+{
			
 
				+	/*
			
 
				+	 * If there are parallel threads are doing PTE changes on same range
			
 
				+	 * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB
			
 
				+	 * flush by batching, a thread has stable TLB entry can fail to flush
			
 
				+	 * the TLB by observing pte_none|!pte_dirty, for example so flush TLB
			
 
				+	 * forcefully if we detect parallel PTE batching threads.
			
 
				+	 */
			
 
				+	bool force = mm_tlb_flush_nested(tlb->mm);
			
 
				+
			
 
				+	arch_tlb_finish_mmu(tlb, start, end, force);
			
 
				+	dec_tlb_flush_pending(tlb->mm);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Note: this doesn't free the actual pages themselves. That
			
 
				  * has been handled earlier when unmapping all the memory regions.
			
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1937,12 +1937,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 
				 		put_page(new_page);
			
 
				 		goto out_fail;
			
 
				 	}
			
 
				-	/*
			
 
				-	 * We are not sure a pending tlb flush here is for a huge page
			
 
				-	 * mapping or not. Hence use the tlb range variant
			
 
				-	 */
			
 
				-	if (mm_tlb_flush_pending(mm))
			
 
				-		flush_tlb_range(vma, mmun_start, mmun_end);
			
 
				 
			
 
				 	/* Prepare a page as a migration target */
			
 
				 	__SetPageLocked(new_page);
			
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -244,7 +244,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
 
				 	BUG_ON(addr >= end);
			
 
				 	pgd = pgd_offset(mm, addr);
			
 
				 	flush_cache_range(vma, addr, end);
			
 
				-	set_tlb_flush_pending(mm);
			
 
				+	inc_tlb_flush_pending(mm);
			
 
				 	do {
			
 
				 		next = pgd_addr_end(addr, end);
			
 
				 		if (pgd_none_or_clear_bad(pgd))
			
@@ -256,7 +256,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
 
				 	/* Only flush the TLB if we actually modified any entries: */
			
 
				 	if (pages)
			
 
				 		flush_tlb_range(vma, start, end);
			
 
				-	clear_tlb_flush_pending(mm);
			
 
				+	dec_tlb_flush_pending(mm);
			
 
				 
			
 
				 	return pages;
			
 
				 }
			
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4458,8 +4458,9 @@ long si_mem_available(void)
 
				 	 * Part of the reclaimable slab consists of items that are in use,
			
 
				 	 * and cannot be freed. Cap this estimate at the low watermark.
			
 
				 	 */
			
 
				-	available += global_page_state(NR_SLAB_RECLAIMABLE) -
			
 
				-		     min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
			
 
				+	available += global_node_page_state(NR_SLAB_RECLAIMABLE) -
			
 
				+		     min(global_node_page_state(NR_SLAB_RECLAIMABLE) / 2,
			
 
				+			 wmark_low);
			
 
				 
			
 
				 	if (available < 0)
			
 
				 		available = 0;
			
@@ -4602,8 +4603,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 
				 		global_node_page_state(NR_FILE_DIRTY),
			
 
				 		global_node_page_state(NR_WRITEBACK),
			
 
				 		global_node_page_state(NR_UNSTABLE_NFS),
			
 
				-		global_page_state(NR_SLAB_RECLAIMABLE),
			
 
				-		global_page_state(NR_SLAB_UNRECLAIMABLE),
			
 
				+		global_node_page_state(NR_SLAB_RECLAIMABLE),
			
 
				+		global_node_page_state(NR_SLAB_UNRECLAIMABLE),
			
 
				 		global_node_page_state(NR_FILE_MAPPED),
			
 
				 		global_node_page_state(NR_SHMEM),
			
 
				 		global_page_state(NR_PAGETABLE),
			
@@ -7668,7 +7669,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
 
				 
			
 
				 	/* Make sure the range is really isolated. */
			
 
				 	if (test_pages_isolated(outer_start, end, false)) {
			
 
				-		pr_info("%s: [%lx, %lx) PFNs busy\n",
			
 
				+		pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n",
			
 
				 			__func__, outer_start, end);
			
 
				 		ret = -EBUSY;
			
 
				 		goto done;
			
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -888,10 +888,10 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
 
				 		.flags = PVMW_SYNC,
			
 
				 	};
			
 
				 	int *cleaned = arg;
			
 
				+	bool invalidation_needed = false;
			
 
				 
			
 
				 	while (page_vma_mapped_walk(&pvmw)) {
			
 
				 		int ret = 0;
			
 
				-		address = pvmw.address;
			
 
				 		if (pvmw.pte) {
			
 
				 			pte_t entry;
			
 
				 			pte_t *pte = pvmw.pte;
			
@@ -899,11 +899,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
 
				 			if (!pte_dirty(*pte) && !pte_write(*pte))
			
 
				 				continue;
			
 
				 
			
 
				-			flush_cache_page(vma, address, pte_pfn(*pte));
			
 
				-			entry = ptep_clear_flush(vma, address, pte);
			
 
				+			flush_cache_page(vma, pvmw.address, pte_pfn(*pte));
			
 
				+			entry = ptep_clear_flush(vma, pvmw.address, pte);
			
 
				 			entry = pte_wrprotect(entry);
			
 
				 			entry = pte_mkclean(entry);
			
 
				-			set_pte_at(vma->vm_mm, address, pte, entry);
			
 
				+			set_pte_at(vma->vm_mm, pvmw.address, pte, entry);
			
 
				 			ret = 1;
			
 
				 		} else {
			
 
				 #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
			
@@ -913,11 +913,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
 
				 			if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
			
 
				 				continue;
			
 
				 
			
 
				-			flush_cache_page(vma, address, page_to_pfn(page));
			
 
				-			entry = pmdp_huge_clear_flush(vma, address, pmd);
			
 
				+			flush_cache_page(vma, pvmw.address, page_to_pfn(page));
			
 
				+			entry = pmdp_huge_clear_flush(vma, pvmw.address, pmd);
			
 
				 			entry = pmd_wrprotect(entry);
			
 
				 			entry = pmd_mkclean(entry);
			
 
				-			set_pmd_at(vma->vm_mm, address, pmd, entry);
			
 
				+			set_pmd_at(vma->vm_mm, pvmw.address, pmd, entry);
			
 
				 			ret = 1;
			
 
				 #else
			
 
				 			/* unexpected pmd-mapped page? */
			
@@ -926,11 +926,16 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
 
				 		}
			
 
				 
			
 
				 		if (ret) {
			
 
				-			mmu_notifier_invalidate_page(vma->vm_mm, address);
			
 
				 			(*cleaned)++;
			
 
				+			invalidation_needed = true;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	if (invalidation_needed) {
			
 
				+		mmu_notifier_invalidate_range(vma->vm_mm, address,
			
 
				+				address + (1UL << compound_order(page)));
			
 
				+	}
			
 
				+
			
 
				 	return true;
			
 
				 }
			
 
				 
			
@@ -1323,7 +1328,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 
				 	};
			
 
				 	pte_t pteval;
			
 
				 	struct page *subpage;
			
 
				-	bool ret = true;
			
 
				+	bool ret = true, invalidation_needed = false;
			
 
				 	enum ttu_flags flags = (enum ttu_flags)arg;
			
 
				 
			
 
				 	/* munlock has nothing to gain from examining un-locked vmas */
			
@@ -1363,11 +1368,9 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 
				 		VM_BUG_ON_PAGE(!pvmw.pte, page);
			
 
				 
			
 
				 		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
			
 
				-		address = pvmw.address;
			
 
				-
			
 
				 
			
 
				 		if (!(flags & TTU_IGNORE_ACCESS)) {
			
 
				-			if (ptep_clear_flush_young_notify(vma, address,
			
 
				+			if (ptep_clear_flush_young_notify(vma, pvmw.address,
			
 
				 						pvmw.pte)) {
			
 
				 				ret = false;
			
 
				 				page_vma_mapped_walk_done(&pvmw);
			
@@ -1376,7 +1379,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 
				 		}
			
 
				 
			
 
				 		/* Nuke the page table entry. */
			
 
				-		flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
			
 
				+		flush_cache_page(vma, pvmw.address, pte_pfn(*pvmw.pte));
			
 
				 		if (should_defer_flush(mm, flags)) {
			
 
				 			/*
			
 
				 			 * We clear the PTE but do not flush so potentially
			
@@ -1386,11 +1389,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 
				 			 * transition on a cached TLB entry is written through
			
 
				 			 * and traps if the PTE is unmapped.
			
 
				 			 */
			
 
				-			pteval = ptep_get_and_clear(mm, address, pvmw.pte);
			
 
				+			pteval = ptep_get_and_clear(mm, pvmw.address,
			
 
				+						    pvmw.pte);
			
 
				 
			
 
				 			set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
			
 
				 		} else {
			
 
				-			pteval = ptep_clear_flush(vma, address, pvmw.pte);
			
 
				+			pteval = ptep_clear_flush(vma, pvmw.address, pvmw.pte);
			
 
				 		}
			
 
				 
			
 
				 		/* Move the dirty bit to the page. Now the pte is gone. */
			
@@ -1405,12 +1409,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 
				 			if (PageHuge(page)) {
			
 
				 				int nr = 1 << compound_order(page);
			
 
				 				hugetlb_count_sub(nr, mm);
			
 
				-				set_huge_swap_pte_at(mm, address,
			
 
				+				set_huge_swap_pte_at(mm, pvmw.address,
			
 
				 						     pvmw.pte, pteval,
			
 
				 						     vma_mmu_pagesize(vma));
			
 
				 			} else {
			
 
				 				dec_mm_counter(mm, mm_counter(page));
			
 
				-				set_pte_at(mm, address, pvmw.pte, pteval);
			
 
				+				set_pte_at(mm, pvmw.address, pvmw.pte, pteval);
			
 
				 			}
			
 
				 
			
 
				 		} else if (pte_unused(pteval)) {
			
@@ -1434,7 +1438,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 
				 			swp_pte = swp_entry_to_pte(entry);
			
 
				 			if (pte_soft_dirty(pteval))
			
 
				 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
			
 
				-			set_pte_at(mm, address, pvmw.pte, swp_pte);
			
 
				+			set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
			
 
				 		} else if (PageAnon(page)) {
			
 
				 			swp_entry_t entry = { .val = page_private(subpage) };
			
 
				 			pte_t swp_pte;
			
@@ -1460,7 +1464,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 
				 				 * If the page was redirtied, it cannot be
			
 
				 				 * discarded. Remap the page to page table.
			
 
				 				 */
			
 
				-				set_pte_at(mm, address, pvmw.pte, pteval);
			
 
				+				set_pte_at(mm, pvmw.address, pvmw.pte, pteval);
			
 
				 				SetPageSwapBacked(page);
			
 
				 				ret = false;
			
 
				 				page_vma_mapped_walk_done(&pvmw);
			
@@ -1468,7 +1472,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 
				 			}
			
 
				 
			
 
				 			if (swap_duplicate(entry) < 0) {
			
 
				-				set_pte_at(mm, address, pvmw.pte, pteval);
			
 
				+				set_pte_at(mm, pvmw.address, pvmw.pte, pteval);
			
 
				 				ret = false;
			
 
				 				page_vma_mapped_walk_done(&pvmw);
			
 
				 				break;
			
@@ -1484,14 +1488,18 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 
				 			swp_pte = swp_entry_to_pte(entry);
			
 
				 			if (pte_soft_dirty(pteval))
			
 
				 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
			
 
				-			set_pte_at(mm, address, pvmw.pte, swp_pte);
			
 
				+			set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
			
 
				 		} else
			
 
				 			dec_mm_counter(mm, mm_counter_file(page));
			
 
				 discard:
			
 
				 		page_remove_rmap(subpage, PageHuge(page));
			
 
				 		put_page(page);
			
 
				-		mmu_notifier_invalidate_page(mm, address);
			
 
				+		invalidation_needed = true;
			
 
				 	}
			
 
				+
			
 
				+	if (invalidation_needed)
			
 
				+		mmu_notifier_invalidate_range(mm, address,
			
 
				+				address + (1UL << compound_order(page)));
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1022,7 +1022,11 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 
				 			 */
			
 
				 			if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) {
			
 
				 				spin_lock(&sbinfo->shrinklist_lock);
			
 
				-				if (list_empty(&info->shrinklist)) {
			
 
				+				/*
			
 
				+				 * _careful to defend against unlocked access to
			
 
				+				 * ->shrink_list in shmem_unused_huge_shrink()
			
 
				+				 */
			
 
				+				if (list_empty_careful(&info->shrinklist)) {
			
 
				 					list_add_tail(&info->shrinklist,
			
 
				 							&sbinfo->shrinklist);
			
 
				 					sbinfo->shrinklist_len++;
			
@@ -1817,7 +1821,11 @@ alloc_nohuge:		page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
 
				 			 * to shrink under memory pressure.
			
 
				 			 */
			
 
				 			spin_lock(&sbinfo->shrinklist_lock);
			
 
				-			if (list_empty(&info->shrinklist)) {
			
 
				+			/*
			
 
				+			 * _careful to defend against unlocked access to
			
 
				+			 * ->shrink_list in shmem_unused_huge_shrink()
			
 
				+			 */
			
 
				+			if (list_empty_careful(&info->shrinklist)) {
			
 
				 				list_add_tail(&info->shrinklist,
			
 
				 						&sbinfo->shrinklist);
			
 
				 				sbinfo->shrinklist_len++;
			
--- a/mm/util.c
+++ b/mm/util.c
@@ -633,7 +633,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 
				 		 * which are reclaimable, under pressure.  The dentry
			
 
				 		 * cache and most inode caches should fall into this
			
 
				 		 */
			
 
				-		free += global_page_state(NR_SLAB_RECLAIMABLE);
			
 
				+		free += global_node_page_state(NR_SLAB_RECLAIMABLE);
			
 
				 
			
 
				 		/*
			
 
				 		 * Leave reserved pages. The pages are not for anonymous pages.