7 years ago · 203b4fc903
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -292,5 +292,13 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 
				 {
			
 
				 }
			
 
				 
			
 
				+static inline void tlb_flush_remove_tables(struct mm_struct *mm)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline void tlb_flush_remove_tables_local(void *arg)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				 #endif /* CONFIG_MMU */
			
 
				 #endif
			
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -977,12 +977,12 @@ int pmd_clear_huge(pmd_t *pmdp)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-int pud_free_pmd_page(pud_t *pud)
			
 
				+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
			
 
				 {
			
 
				 	return pud_none(*pud);
			
 
				 }
			
 
				 
			
 
				-int pmd_free_pte_page(pmd_t *pmd)
			
 
				+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
			
 
				 {
			
 
				 	return pmd_none(*pmd);
			
 
				 }
			
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -148,22 +148,6 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
 
				 #define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr)
			
 
				 #endif
			
 
				 
			
 
				-static inline bool tlb_defer_switch_to_init_mm(void)
			
 
				-{
			
 
				-	/*
			
 
				-	 * If we have PCID, then switching to init_mm is reasonably
			
 
				-	 * fast.  If we don't have PCID, then switching to init_mm is
			
 
				-	 * quite slow, so we try to defer it in the hopes that we can
			
 
				-	 * avoid it entirely.  The latter approach runs the risk of
			
 
				-	 * receiving otherwise unnecessary IPIs.
			
 
				-	 *
			
 
				-	 * This choice is just a heuristic.  The tlb code can handle this
			
 
				-	 * function returning true or false regardless of whether we have
			
 
				-	 * PCID.
			
 
				-	 */
			
 
				-	return !static_cpu_has(X86_FEATURE_PCID);
			
 
				-}
			
 
				-
			
 
				 struct tlb_context {
			
 
				 	u64 ctx_id;
			
 
				 	u64 tlb_gen;
			
@@ -554,4 +538,9 @@ extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
 
				 	native_flush_tlb_others(mask, info)
			
 
				 #endif
			
 
				 
			
 
				+extern void tlb_flush_remove_tables(struct mm_struct *mm);
			
 
				+extern void tlb_flush_remove_tables_local(void *arg);
			
 
				+
			
 
				+#define HAVE_TLB_FLUSH_REMOVE_TABLES
			
 
				+
			
 
				 #endif /* _ASM_X86_TLBFLUSH_H */
			
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -329,9 +329,6 @@ static int __init pgd_cache_init(void)
 
				 	 */
			
 
				 	pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
			
 
				 				      SLAB_PANIC, NULL);
			
 
				-	if (!pgd_cache)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				 	return 0;
			
 
				 }
			
 
				 core_initcall(pgd_cache_init);
			
@@ -719,28 +716,50 @@ int pmd_clear_huge(pmd_t *pmd)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_X86_64
			
 
				 /**
			
 
				  * pud_free_pmd_page - Clear pud entry and free pmd page.
			
 
				  * @pud: Pointer to a PUD.
			
 
				+ * @addr: Virtual address associated with pud.
			
 
				  *
			
 
				- * Context: The pud range has been unmaped and TLB purged.
			
 
				+ * Context: The pud range has been unmapped and TLB purged.
			
 
				  * Return: 1 if clearing the entry succeeded. 0 otherwise.
			
 
				+ *
			
 
				+ * NOTE: Callers must allow a single page allocation.
			
 
				  */
			
 
				-int pud_free_pmd_page(pud_t *pud)
			
 
				+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
			
 
				 {
			
 
				-	pmd_t *pmd;
			
 
				+	pmd_t *pmd, *pmd_sv;
			
 
				+	pte_t *pte;
			
 
				 	int i;
			
 
				 
			
 
				 	if (pud_none(*pud))
			
 
				 		return 1;
			
 
				 
			
 
				 	pmd = (pmd_t *)pud_page_vaddr(*pud);
			
 
				+	pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL);
			
 
				+	if (!pmd_sv)
			
 
				+		return 0;
			
 
				 
			
 
				-	for (i = 0; i < PTRS_PER_PMD; i++)
			
 
				-		if (!pmd_free_pte_page(&pmd[i]))
			
 
				-			return 0;
			
 
				+	for (i = 0; i < PTRS_PER_PMD; i++) {
			
 
				+		pmd_sv[i] = pmd[i];
			
 
				+		if (!pmd_none(pmd[i]))
			
 
				+			pmd_clear(&pmd[i]);
			
 
				+	}
			
 
				 
			
 
				 	pud_clear(pud);
			
 
				+
			
 
				+	/* INVLPG to clear all paging-structure caches */
			
 
				+	flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
			
 
				+
			
 
				+	for (i = 0; i < PTRS_PER_PMD; i++) {
			
 
				+		if (!pmd_none(pmd_sv[i])) {
			
 
				+			pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]);
			
 
				+			free_page((unsigned long)pte);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	free_page((unsigned long)pmd_sv);
			
 
				 	free_page((unsigned long)pmd);
			
 
				 
			
 
				 	return 1;
			
@@ -749,11 +768,12 @@ int pud_free_pmd_page(pud_t *pud)
 
				 /**
			
 
				  * pmd_free_pte_page - Clear pmd entry and free pte page.
			
 
				  * @pmd: Pointer to a PMD.
			
 
				+ * @addr: Virtual address associated with pmd.
			
 
				  *
			
 
				- * Context: The pmd range has been unmaped and TLB purged.
			
 
				+ * Context: The pmd range has been unmapped and TLB purged.
			
 
				  * Return: 1 if clearing the entry succeeded. 0 otherwise.
			
 
				  */
			
 
				-int pmd_free_pte_page(pmd_t *pmd)
			
 
				+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
			
 
				 {
			
 
				 	pte_t *pte;
			
 
				 
			
@@ -762,8 +782,30 @@ int pmd_free_pte_page(pmd_t *pmd)
 
				 
			
 
				 	pte = (pte_t *)pmd_page_vaddr(*pmd);
			
 
				 	pmd_clear(pmd);
			
 
				+
			
 
				+	/* INVLPG to clear all paging-structure caches */
			
 
				+	flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
			
 
				+
			
 
				 	free_page((unsigned long)pte);
			
 
				 
			
 
				 	return 1;
			
 
				 }
			
 
				+
			
 
				+#else /* !CONFIG_X86_64 */
			
 
				+
			
 
				+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
			
 
				+{
			
 
				+	return pud_none(*pud);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Disable free page handling on x86-PAE. This assures that ioremap()
			
 
				+ * does not update sync'd pmd entries. See vmalloc_sync_one().
			
 
				+ */
			
 
				+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
			
 
				+{
			
 
				+	return pmd_none(*pmd);
			
 
				+}
			
 
				+
			
 
				+#endif /* CONFIG_X86_64 */
			
 
				 #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
			
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -7,6 +7,7 @@
 
				 #include <linux/export.h>
			
 
				 #include <linux/cpu.h>
			
 
				 #include <linux/debugfs.h>
			
 
				+#include <linux/gfp.h>
			
 
				 
			
 
				 #include <asm/tlbflush.h>
			
 
				 #include <asm/mmu_context.h>
			
@@ -35,7 +36,7 @@
 
				  * necessary invalidation by clearing out the 'ctx_id' which
			
 
				  * forces a TLB flush when the context is loaded.
			
 
				  */
			
 
				-void clear_asid_other(void)
			
 
				+static void clear_asid_other(void)
			
 
				 {
			
 
				 	u16 asid;
			
 
				 
			
@@ -185,8 +186,11 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
				 {
			
 
				 	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
			
 
				 	u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
			
 
				+	bool was_lazy = this_cpu_read(cpu_tlbstate.is_lazy);
			
 
				 	unsigned cpu = smp_processor_id();
			
 
				 	u64 next_tlb_gen;
			
 
				+	bool need_flush;
			
 
				+	u16 new_asid;
			
 
				 
			
 
				 	/*
			
 
				 	 * NB: The scheduler will call us with prev == next when switching
			
@@ -240,20 +244,41 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
				 			   next->context.ctx_id);
			
 
				 
			
 
				 		/*
			
 
				-		 * We don't currently support having a real mm loaded without
			
 
				-		 * our cpu set in mm_cpumask().  We have all the bookkeeping
			
 
				-		 * in place to figure out whether we would need to flush
			
 
				-		 * if our cpu were cleared in mm_cpumask(), but we don't
			
 
				-		 * currently use it.
			
 
				+		 * Even in lazy TLB mode, the CPU should stay set in the
			
 
				+		 * mm_cpumask. The TLB shootdown code can figure out from
			
 
				+		 * from cpu_tlbstate.is_lazy whether or not to send an IPI.
			
 
				 		 */
			
 
				 		if (WARN_ON_ONCE(real_prev != &init_mm &&
			
 
				 				 !cpumask_test_cpu(cpu, mm_cpumask(next))))
			
 
				 			cpumask_set_cpu(cpu, mm_cpumask(next));
			
 
				 
			
 
				-		return;
			
 
				+		/*
			
 
				+		 * If the CPU is not in lazy TLB mode, we are just switching
			
 
				+		 * from one thread in a process to another thread in the same
			
 
				+		 * process. No TLB flush required.
			
 
				+		 */
			
 
				+		if (!was_lazy)
			
 
				+			return;
			
 
				+
			
 
				+		/*
			
 
				+		 * Read the tlb_gen to check whether a flush is needed.
			
 
				+		 * If the TLB is up to date, just use it.
			
 
				+		 * The barrier synchronizes with the tlb_gen increment in
			
 
				+		 * the TLB shootdown code.
			
 
				+		 */
			
 
				+		smp_mb();
			
 
				+		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
			
 
				+		if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) ==
			
 
				+				next_tlb_gen)
			
 
				+			return;
			
 
				+
			
 
				+		/*
			
 
				+		 * TLB contents went out of date while we were in lazy
			
 
				+		 * mode. Fall through to the TLB switching code below.
			
 
				+		 */
			
 
				+		new_asid = prev_asid;
			
 
				+		need_flush = true;
			
 
				 	} else {
			
 
				-		u16 new_asid;
			
 
				-		bool need_flush;
			
 
				 		u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
			
 
				 
			
 
				 		/*
			
@@ -285,53 +310,60 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
				 			sync_current_stack_to_mm(next);
			
 
				 		}
			
 
				 
			
 
				-		/* Stop remote flushes for the previous mm */
			
 
				-		VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
			
 
				-				real_prev != &init_mm);
			
 
				-		cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
			
 
				+		/*
			
 
				+		 * Stop remote flushes for the previous mm.
			
 
				+		 * Skip kernel threads; we never send init_mm TLB flushing IPIs,
			
 
				+		 * but the bitmap manipulation can cause cache line contention.
			
 
				+		 */
			
 
				+		if (real_prev != &init_mm) {
			
 
				+			VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu,
			
 
				+						mm_cpumask(real_prev)));
			
 
				+			cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
			
 
				+		}
			
 
				 
			
 
				 		/*
			
 
				 		 * Start remote flushes and then read tlb_gen.
			
 
				 		 */
			
 
				-		cpumask_set_cpu(cpu, mm_cpumask(next));
			
 
				+		if (next != &init_mm)
			
 
				+			cpumask_set_cpu(cpu, mm_cpumask(next));
			
 
				 		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
			
 
				 
			
 
				 		choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
			
 
				+	}
			
 
				 
			
 
				-		if (need_flush) {
			
 
				-			this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
			
 
				-			this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
			
 
				-			load_new_mm_cr3(next->pgd, new_asid, true);
			
 
				-
			
 
				-			/*
			
 
				-			 * NB: This gets called via leave_mm() in the idle path
			
 
				-			 * where RCU functions differently.  Tracing normally
			
 
				-			 * uses RCU, so we need to use the _rcuidle variant.
			
 
				-			 *
			
 
				-			 * (There is no good reason for this.  The idle code should
			
 
				-			 *  be rearranged to call this before rcu_idle_enter().)
			
 
				-			 */
			
 
				-			trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
			
 
				-		} else {
			
 
				-			/* The new ASID is already up to date. */
			
 
				-			load_new_mm_cr3(next->pgd, new_asid, false);
			
 
				-
			
 
				-			/* See above wrt _rcuidle. */
			
 
				-			trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
			
 
				-		}
			
 
				+	if (need_flush) {
			
 
				+		this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
			
 
				+		this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
			
 
				+		load_new_mm_cr3(next->pgd, new_asid, true);
			
 
				 
			
 
				 		/*
			
 
				-		 * Record last user mm's context id, so we can avoid
			
 
				-		 * flushing branch buffer with IBPB if we switch back
			
 
				-		 * to the same user.
			
 
				+		 * NB: This gets called via leave_mm() in the idle path
			
 
				+		 * where RCU functions differently.  Tracing normally
			
 
				+		 * uses RCU, so we need to use the _rcuidle variant.
			
 
				+		 *
			
 
				+		 * (There is no good reason for this.  The idle code should
			
 
				+		 *  be rearranged to call this before rcu_idle_enter().)
			
 
				 		 */
			
 
				-		if (next != &init_mm)
			
 
				-			this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
			
 
				+		trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
			
 
				+	} else {
			
 
				+		/* The new ASID is already up to date. */
			
 
				+		load_new_mm_cr3(next->pgd, new_asid, false);
			
 
				 
			
 
				-		this_cpu_write(cpu_tlbstate.loaded_mm, next);
			
 
				-		this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
			
 
				+		/* See above wrt _rcuidle. */
			
 
				+		trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * Record last user mm's context id, so we can avoid
			
 
				+	 * flushing branch buffer with IBPB if we switch back
			
 
				+	 * to the same user.
			
 
				+	 */
			
 
				+	if (next != &init_mm)
			
 
				+		this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
			
 
				+
			
 
				+	this_cpu_write(cpu_tlbstate.loaded_mm, next);
			
 
				+	this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
			
 
				+
			
 
				 	load_mm_cr4(next);
			
 
				 	switch_ldt(real_prev, next);
			
 
				 }
			
@@ -354,20 +386,7 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 
				 	if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
			
 
				 		return;
			
 
				 
			
 
				-	if (tlb_defer_switch_to_init_mm()) {
			
 
				-		/*
			
 
				-		 * There's a significant optimization that may be possible
			
 
				-		 * here.  We have accurate enough TLB flush tracking that we
			
 
				-		 * don't need to maintain coherence of TLB per se when we're
			
 
				-		 * lazy.  We do, however, need to maintain coherence of
			
 
				-		 * paging-structure caches.  We could, in principle, leave our
			
 
				-		 * old mm loaded and only switch to init_mm when
			
 
				-		 * tlb_remove_page() happens.
			
 
				-		 */
			
 
				-		this_cpu_write(cpu_tlbstate.is_lazy, true);
			
 
				-	} else {
			
 
				-		switch_mm(NULL, &init_mm, NULL);
			
 
				-	}
			
 
				+	this_cpu_write(cpu_tlbstate.is_lazy, true);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -454,6 +473,9 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
 
				 		 * paging-structure cache to avoid speculatively reading
			
 
				 		 * garbage into our TLB.  Since switching to init_mm is barely
			
 
				 		 * slower than a minimal flush, just switch to init_mm.
			
 
				+		 *
			
 
				+		 * This should be rare, with native_flush_tlb_others skipping
			
 
				+		 * IPIs to lazy TLB mode CPUs.
			
 
				 		 */
			
 
				 		switch_mm_irqs_off(NULL, &init_mm, NULL);
			
 
				 		return;
			
@@ -560,6 +582,9 @@ static void flush_tlb_func_remote(void *info)
 
				 void native_flush_tlb_others(const struct cpumask *cpumask,
			
 
				 			     const struct flush_tlb_info *info)
			
 
				 {
			
 
				+	cpumask_var_t lazymask;
			
 
				+	unsigned int cpu;
			
 
				+
			
 
				 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
			
 
				 	if (info->end == TLB_FLUSH_ALL)
			
 
				 		trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
			
@@ -583,8 +608,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
 
				 		 * that UV should be updated so that smp_call_function_many(),
			
 
				 		 * etc, are optimal on UV.
			
 
				 		 */
			
 
				-		unsigned int cpu;
			
 
				-
			
 
				 		cpu = smp_processor_id();
			
 
				 		cpumask = uv_flush_tlb_others(cpumask, info);
			
 
				 		if (cpumask)
			
@@ -592,8 +615,29 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
 
				 					       (void *)info, 1);
			
 
				 		return;
			
 
				 	}
			
 
				-	smp_call_function_many(cpumask, flush_tlb_func_remote,
			
 
				+
			
 
				+	/*
			
 
				+	 * A temporary cpumask is used in order to skip sending IPIs
			
 
				+	 * to CPUs in lazy TLB state, while keeping them in mm_cpumask(mm).
			
 
				+	 * If the allocation fails, simply IPI every CPU in mm_cpumask.
			
 
				+	 */
			
 
				+	if (!alloc_cpumask_var(&lazymask, GFP_ATOMIC)) {
			
 
				+		smp_call_function_many(cpumask, flush_tlb_func_remote,
			
 
				 			       (void *)info, 1);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	cpumask_copy(lazymask, cpumask);
			
 
				+
			
 
				+	for_each_cpu(cpu, lazymask) {
			
 
				+		if (per_cpu(cpu_tlbstate.is_lazy, cpu))
			
 
				+			cpumask_clear_cpu(cpu, lazymask);
			
 
				+	}
			
 
				+
			
 
				+	smp_call_function_many(lazymask, flush_tlb_func_remote,
			
 
				+			       (void *)info, 1);
			
 
				+
			
 
				+	free_cpumask_var(lazymask);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -646,6 +690,68 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 
				 	put_cpu();
			
 
				 }
			
 
				 
			
 
				+void tlb_flush_remove_tables_local(void *arg)
			
 
				+{
			
 
				+	struct mm_struct *mm = arg;
			
 
				+
			
 
				+	if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm &&
			
 
				+			this_cpu_read(cpu_tlbstate.is_lazy)) {
			
 
				+		/*
			
 
				+		 * We're in lazy mode.  We need to at least flush our
			
 
				+		 * paging-structure cache to avoid speculatively reading
			
 
				+		 * garbage into our TLB.  Since switching to init_mm is barely
			
 
				+		 * slower than a minimal flush, just switch to init_mm.
			
 
				+		 */
			
 
				+		switch_mm_irqs_off(NULL, &init_mm, NULL);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void mm_fill_lazy_tlb_cpu_mask(struct mm_struct *mm,
			
 
				+				      struct cpumask *lazy_cpus)
			
 
				+{
			
 
				+	int cpu;
			
 
				+
			
 
				+	for_each_cpu(cpu, mm_cpumask(mm)) {
			
 
				+		if (!per_cpu(cpu_tlbstate.is_lazy, cpu))
			
 
				+			cpumask_set_cpu(cpu, lazy_cpus);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void tlb_flush_remove_tables(struct mm_struct *mm)
			
 
				+{
			
 
				+	int cpu = get_cpu();
			
 
				+	cpumask_var_t lazy_cpus;
			
 
				+
			
 
				+	if (cpumask_any_but(mm_cpumask(mm), cpu) >= nr_cpu_ids) {
			
 
				+		put_cpu();
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (!zalloc_cpumask_var(&lazy_cpus, GFP_ATOMIC)) {
			
 
				+		/*
			
 
				+		 * If the cpumask allocation fails, do a brute force flush
			
 
				+		 * on all the CPUs that have this mm loaded.
			
 
				+		 */
			
 
				+		smp_call_function_many(mm_cpumask(mm),
			
 
				+				tlb_flush_remove_tables_local, (void *)mm, 1);
			
 
				+		put_cpu();
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * CPUs with !is_lazy either received a TLB flush IPI while the user
			
 
				+	 * pages in this address range were unmapped, or have context switched
			
 
				+	 * and reloaded %CR3 since then.
			
 
				+	 *
			
 
				+	 * Shootdown IPIs at page table freeing time only need to be sent to
			
 
				+	 * CPUs that may have out of date TLB contents.
			
 
				+	 */
			
 
				+	mm_fill_lazy_tlb_cpu_mask(mm, lazy_cpus);
			
 
				+	smp_call_function_many(lazy_cpus,
			
 
				+				tlb_flush_remove_tables_local, (void *)mm, 1);
			
 
				+	free_cpumask_var(lazy_cpus);
			
 
				+	put_cpu();
			
 
				+}
			
 
				 
			
 
				 static void do_flush_tlb_all(void *info)
			
 
				 {
			
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -82,6 +82,7 @@ struct mm_struct efi_mm = {
 
				 	.mmap_sem		= __RWSEM_INITIALIZER(efi_mm.mmap_sem),
			
 
				 	.page_table_lock	= __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
			
 
				 	.mmlist			= LIST_HEAD_INIT(efi_mm.mmlist),
			
 
				+	.cpu_bitmap		= { [BITS_TO_LONGS(NR_CPUS)] = 0},
			
 
				 };
			
 
				 
			
 
				 struct workqueue_struct *efi_rts_wq;
			
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1019,8 +1019,8 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
 
				 int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
			
 
				 int pud_clear_huge(pud_t *pud);
			
 
				 int pmd_clear_huge(pmd_t *pmd);
			
 
				-int pud_free_pmd_page(pud_t *pud);
			
 
				-int pmd_free_pte_page(pmd_t *pmd);
			
 
				+int pud_free_pmd_page(pud_t *pud, unsigned long addr);
			
 
				+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr);
			
 
				 #else	/* !CONFIG_HAVE_ARCH_HUGE_VMAP */
			
 
				 static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
			
 
				 {
			
@@ -1046,11 +1046,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
 
				 {
			
 
				 	return 0;
			
 
				 }
			
 
				-static inline int pud_free_pmd_page(pud_t *pud)
			
 
				+static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr)
			
 
				 {
			
 
				 	return 0;
			
 
				 }
			
 
				-static inline int pmd_free_pte_page(pmd_t *pmd)
			
 
				+static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
			
 
				 {
			
 
				 	return 0;
			
 
				 }
			
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -303,4 +303,14 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 
				 
			
 
				 #define tlb_migrate_finish(mm) do {} while (0)
			
 
				 
			
 
				+/*
			
 
				+ * Used to flush the TLB when page tables are removed, when lazy
			
 
				+ * TLB mode may cause a CPU to retain intermediate translations
			
 
				+ * pointing to about-to-be-freed page table memory.
			
 
				+ */
			
 
				+#ifndef HAVE_TLB_FLUSH_REMOVE_TABLES
			
 
				+#define tlb_flush_remove_tables(mm) do {} while (0)
			
 
				+#define tlb_flush_remove_tables_local(mm) do {} while (0)
			
 
				+#endif
			
 
				+
			
 
				 #endif /* _ASM_GENERIC__TLB_H */
			
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -335,176 +335,183 @@ struct core_state {
 
				 
			
 
				 struct kioctx_table;
			
 
				 struct mm_struct {
			
 
				-	struct vm_area_struct *mmap;		/* list of VMAs */
			
 
				-	struct rb_root mm_rb;
			
 
				-	u32 vmacache_seqnum;                   /* per-thread vmacache */
			
 
				+	struct {
			
 
				+		struct vm_area_struct *mmap;		/* list of VMAs */
			
 
				+		struct rb_root mm_rb;
			
 
				+		u32 vmacache_seqnum;                   /* per-thread vmacache */
			
 
				 #ifdef CONFIG_MMU
			
 
				-	unsigned long (*get_unmapped_area) (struct file *filp,
			
 
				+		unsigned long (*get_unmapped_area) (struct file *filp,
			
 
				 				unsigned long addr, unsigned long len,
			
 
				 				unsigned long pgoff, unsigned long flags);
			
 
				 #endif
			
 
				-	unsigned long mmap_base;		/* base of mmap area */
			
 
				-	unsigned long mmap_legacy_base;         /* base of mmap area in bottom-up allocations */
			
 
				+		unsigned long mmap_base;	/* base of mmap area */
			
 
				+		unsigned long mmap_legacy_base;	/* base of mmap area in bottom-up allocations */
			
 
				 #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
			
 
				-	/* Base adresses for compatible mmap() */
			
 
				-	unsigned long mmap_compat_base;
			
 
				-	unsigned long mmap_compat_legacy_base;
			
 
				+		/* Base adresses for compatible mmap() */
			
 
				+		unsigned long mmap_compat_base;
			
 
				+		unsigned long mmap_compat_legacy_base;
			
 
				 #endif
			
 
				-	unsigned long task_size;		/* size of task vm space */
			
 
				-	unsigned long highest_vm_end;		/* highest vma end address */
			
 
				-	pgd_t * pgd;
			
 
				-
			
 
				-	/**
			
 
				-	 * @mm_users: The number of users including userspace.
			
 
				-	 *
			
 
				-	 * Use mmget()/mmget_not_zero()/mmput() to modify. When this drops
			
 
				-	 * to 0 (i.e. when the task exits and there are no other temporary
			
 
				-	 * reference holders), we also release a reference on @mm_count
			
 
				-	 * (which may then free the &struct mm_struct if @mm_count also
			
 
				-	 * drops to 0).
			
 
				-	 */
			
 
				-	atomic_t mm_users;
			
 
				-
			
 
				-	/**
			
 
				-	 * @mm_count: The number of references to &struct mm_struct
			
 
				-	 * (@mm_users count as 1).
			
 
				-	 *
			
 
				-	 * Use mmgrab()/mmdrop() to modify. When this drops to 0, the
			
 
				-	 * &struct mm_struct is freed.
			
 
				-	 */
			
 
				-	atomic_t mm_count;
			
 
				+		unsigned long task_size;	/* size of task vm space */
			
 
				+		unsigned long highest_vm_end;	/* highest vma end address */
			
 
				+		pgd_t * pgd;
			
 
				+
			
 
				+		/**
			
 
				+		 * @mm_users: The number of users including userspace.
			
 
				+		 *
			
 
				+		 * Use mmget()/mmget_not_zero()/mmput() to modify. When this
			
 
				+		 * drops to 0 (i.e. when the task exits and there are no other
			
 
				+		 * temporary reference holders), we also release a reference on
			
 
				+		 * @mm_count (which may then free the &struct mm_struct if
			
 
				+		 * @mm_count also drops to 0).
			
 
				+		 */
			
 
				+		atomic_t mm_users;
			
 
				+
			
 
				+		/**
			
 
				+		 * @mm_count: The number of references to &struct mm_struct
			
 
				+		 * (@mm_users count as 1).
			
 
				+		 *
			
 
				+		 * Use mmgrab()/mmdrop() to modify. When this drops to 0, the
			
 
				+		 * &struct mm_struct is freed.
			
 
				+		 */
			
 
				+		atomic_t mm_count;
			
 
				 
			
 
				 #ifdef CONFIG_MMU
			
 
				-	atomic_long_t pgtables_bytes;		/* PTE page table pages */
			
 
				+		atomic_long_t pgtables_bytes;	/* PTE page table pages */
			
 
				 #endif
			
 
				-	int map_count;				/* number of VMAs */
			
 
				+		int map_count;			/* number of VMAs */
			
 
				 
			
 
				-	spinlock_t page_table_lock;		/* Protects page tables and some counters */
			
 
				-	struct rw_semaphore mmap_sem;
			
 
				+		spinlock_t page_table_lock; /* Protects page tables and some
			
 
				+					     * counters
			
 
				+					     */
			
 
				+		struct rw_semaphore mmap_sem;
			
 
				 
			
 
				-	struct list_head mmlist;		/* List of maybe swapped mm's.	These are globally strung
			
 
				-						 * together off init_mm.mmlist, and are protected
			
 
				-						 * by mmlist_lock
			
 
				-						 */
			
 
				+		struct list_head mmlist; /* List of maybe swapped mm's.	These
			
 
				+					  * are globally strung together off
			
 
				+					  * init_mm.mmlist, and are protected
			
 
				+					  * by mmlist_lock
			
 
				+					  */
			
 
				 
			
 
				 
			
 
				-	unsigned long hiwater_rss;	/* High-watermark of RSS usage */
			
 
				-	unsigned long hiwater_vm;	/* High-water virtual memory usage */
			
 
				+		unsigned long hiwater_rss; /* High-watermark of RSS usage */
			
 
				+		unsigned long hiwater_vm;  /* High-water virtual memory usage */
			
 
				 
			
 
				-	unsigned long total_vm;		/* Total pages mapped */
			
 
				-	unsigned long locked_vm;	/* Pages that have PG_mlocked set */
			
 
				-	unsigned long pinned_vm;	/* Refcount permanently increased */
			
 
				-	unsigned long data_vm;		/* VM_WRITE & ~VM_SHARED & ~VM_STACK */
			
 
				-	unsigned long exec_vm;		/* VM_EXEC & ~VM_WRITE & ~VM_STACK */
			
 
				-	unsigned long stack_vm;		/* VM_STACK */
			
 
				-	unsigned long def_flags;
			
 
				+		unsigned long total_vm;	   /* Total pages mapped */
			
 
				+		unsigned long locked_vm;   /* Pages that have PG_mlocked set */
			
 
				+		unsigned long pinned_vm;   /* Refcount permanently increased */
			
 
				+		unsigned long data_vm;	   /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
			
 
				+		unsigned long exec_vm;	   /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
			
 
				+		unsigned long stack_vm;	   /* VM_STACK */
			
 
				+		unsigned long def_flags;
			
 
				 
			
 
				-	spinlock_t arg_lock; /* protect the below fields */
			
 
				-	unsigned long start_code, end_code, start_data, end_data;
			
 
				-	unsigned long start_brk, brk, start_stack;
			
 
				-	unsigned long arg_start, arg_end, env_start, env_end;
			
 
				+		spinlock_t arg_lock; /* protect the below fields */
			
 
				+		unsigned long start_code, end_code, start_data, end_data;
			
 
				+		unsigned long start_brk, brk, start_stack;
			
 
				+		unsigned long arg_start, arg_end, env_start, env_end;
			
 
				 
			
 
				-	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
			
 
				+		unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
			
 
				 
			
 
				-	/*
			
 
				-	 * Special counters, in some configurations protected by the
			
 
				-	 * page_table_lock, in other configurations by being atomic.
			
 
				-	 */
			
 
				-	struct mm_rss_stat rss_stat;
			
 
				-
			
 
				-	struct linux_binfmt *binfmt;
			
 
				+		/*
			
 
				+		 * Special counters, in some configurations protected by the
			
 
				+		 * page_table_lock, in other configurations by being atomic.
			
 
				+		 */
			
 
				+		struct mm_rss_stat rss_stat;
			
 
				 
			
 
				-	cpumask_var_t cpu_vm_mask_var;
			
 
				+		struct linux_binfmt *binfmt;
			
 
				 
			
 
				-	/* Architecture-specific MM context */
			
 
				-	mm_context_t context;
			
 
				+		/* Architecture-specific MM context */
			
 
				+		mm_context_t context;
			
 
				 
			
 
				-	unsigned long flags; /* Must use atomic bitops to access the bits */
			
 
				+		unsigned long flags; /* Must use atomic bitops to access */
			
 
				 
			
 
				-	struct core_state *core_state; /* coredumping support */
			
 
				+		struct core_state *core_state; /* coredumping support */
			
 
				 #ifdef CONFIG_MEMBARRIER
			
 
				-	atomic_t membarrier_state;
			
 
				+		atomic_t membarrier_state;
			
 
				 #endif
			
 
				 #ifdef CONFIG_AIO
			
 
				-	spinlock_t			ioctx_lock;
			
 
				-	struct kioctx_table __rcu	*ioctx_table;
			
 
				+		spinlock_t			ioctx_lock;
			
 
				+		struct kioctx_table __rcu	*ioctx_table;
			
 
				 #endif
			
 
				 #ifdef CONFIG_MEMCG
			
 
				-	/*
			
 
				-	 * "owner" points to a task that is regarded as the canonical
			
 
				-	 * user/owner of this mm. All of the following must be true in
			
 
				-	 * order for it to be changed:
			
 
				-	 *
			
 
				-	 * current == mm->owner
			
 
				-	 * current->mm != mm
			
 
				-	 * new_owner->mm == mm
			
 
				-	 * new_owner->alloc_lock is held
			
 
				-	 */
			
 
				-	struct task_struct __rcu *owner;
			
 
				+		/*
			
 
				+		 * "owner" points to a task that is regarded as the canonical
			
 
				+		 * user/owner of this mm. All of the following must be true in
			
 
				+		 * order for it to be changed:
			
 
				+		 *
			
 
				+		 * current == mm->owner
			
 
				+		 * current->mm != mm
			
 
				+		 * new_owner->mm == mm
			
 
				+		 * new_owner->alloc_lock is held
			
 
				+		 */
			
 
				+		struct task_struct __rcu *owner;
			
 
				 #endif
			
 
				-	struct user_namespace *user_ns;
			
 
				+		struct user_namespace *user_ns;
			
 
				 
			
 
				-	/* store ref to file /proc/<pid>/exe symlink points to */
			
 
				-	struct file __rcu *exe_file;
			
 
				+		/* store ref to file /proc/<pid>/exe symlink points to */
			
 
				+		struct file __rcu *exe_file;
			
 
				 #ifdef CONFIG_MMU_NOTIFIER
			
 
				-	struct mmu_notifier_mm *mmu_notifier_mm;
			
 
				+		struct mmu_notifier_mm *mmu_notifier_mm;
			
 
				 #endif
			
 
				 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
			
 
				-	pgtable_t pmd_huge_pte; /* protected by page_table_lock */
			
 
				-#endif
			
 
				-#ifdef CONFIG_CPUMASK_OFFSTACK
			
 
				-	struct cpumask cpumask_allocation;
			
 
				+		pgtable_t pmd_huge_pte; /* protected by page_table_lock */
			
 
				 #endif
			
 
				 #ifdef CONFIG_NUMA_BALANCING
			
 
				-	/*
			
 
				-	 * numa_next_scan is the next time that the PTEs will be marked
			
 
				-	 * pte_numa. NUMA hinting faults will gather statistics and migrate
			
 
				-	 * pages to new nodes if necessary.
			
 
				-	 */
			
 
				-	unsigned long numa_next_scan;
			
 
				+		/*
			
 
				+		 * numa_next_scan is the next time that the PTEs will be marked
			
 
				+		 * pte_numa. NUMA hinting faults will gather statistics and
			
 
				+		 * migrate pages to new nodes if necessary.
			
 
				+		 */
			
 
				+		unsigned long numa_next_scan;
			
 
				 
			
 
				-	/* Restart point for scanning and setting pte_numa */
			
 
				-	unsigned long numa_scan_offset;
			
 
				+		/* Restart point for scanning and setting pte_numa */
			
 
				+		unsigned long numa_scan_offset;
			
 
				 
			
 
				-	/* numa_scan_seq prevents two threads setting pte_numa */
			
 
				-	int numa_scan_seq;
			
 
				+		/* numa_scan_seq prevents two threads setting pte_numa */
			
 
				+		int numa_scan_seq;
			
 
				 #endif
			
 
				-	/*
			
 
				-	 * An operation with batched TLB flushing is going on. Anything that
			
 
				-	 * can move process memory needs to flush the TLB when moving a
			
 
				-	 * PROT_NONE or PROT_NUMA mapped page.
			
 
				-	 */
			
 
				-	atomic_t tlb_flush_pending;
			
 
				+		/*
			
 
				+		 * An operation with batched TLB flushing is going on. Anything
			
 
				+		 * that can move process memory needs to flush the TLB when
			
 
				+		 * moving a PROT_NONE or PROT_NUMA mapped page.
			
 
				+		 */
			
 
				+		atomic_t tlb_flush_pending;
			
 
				 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
			
 
				-	/* See flush_tlb_batched_pending() */
			
 
				-	bool tlb_flush_batched;
			
 
				+		/* See flush_tlb_batched_pending() */
			
 
				+		bool tlb_flush_batched;
			
 
				 #endif
			
 
				-	struct uprobes_state uprobes_state;
			
 
				+		struct uprobes_state uprobes_state;
			
 
				 #ifdef CONFIG_HUGETLB_PAGE
			
 
				-	atomic_long_t hugetlb_usage;
			
 
				+		atomic_long_t hugetlb_usage;
			
 
				 #endif
			
 
				-	struct work_struct async_put_work;
			
 
				+		struct work_struct async_put_work;
			
 
				 
			
 
				 #if IS_ENABLED(CONFIG_HMM)
			
 
				-	/* HMM needs to track a few things per mm */
			
 
				-	struct hmm *hmm;
			
 
				+		/* HMM needs to track a few things per mm */
			
 
				+		struct hmm *hmm;
			
 
				 #endif
			
 
				-} __randomize_layout;
			
 
				+	} __randomize_layout;
			
 
				+
			
 
				+	/*
			
 
				+	 * The mm_cpumask needs to be at the end of mm_struct, because it
			
 
				+	 * is dynamically sized based on nr_cpu_ids.
			
 
				+	 */
			
 
				+	unsigned long cpu_bitmap[];
			
 
				+};
			
 
				 
			
 
				 extern struct mm_struct init_mm;
			
 
				 
			
 
				+/* Pointer magic because the dynamic array size confuses some compilers. */
			
 
				 static inline void mm_init_cpumask(struct mm_struct *mm)
			
 
				 {
			
 
				-#ifdef CONFIG_CPUMASK_OFFSTACK
			
 
				-	mm->cpu_vm_mask_var = &mm->cpumask_allocation;
			
 
				-#endif
			
 
				-	cpumask_clear(mm->cpu_vm_mask_var);
			
 
				+	unsigned long cpu_bitmap = (unsigned long)mm;
			
 
				+
			
 
				+	cpu_bitmap += offsetof(struct mm_struct, cpu_bitmap);
			
 
				+	cpumask_clear((struct cpumask *)cpu_bitmap);
			
 
				 }
			
 
				 
			
 
				 /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
			
 
				 static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
			
 
				 {
			
 
				-	return mm->cpu_vm_mask_var;
			
 
				+	return (struct cpumask *)&mm->cpu_bitmap;
			
 
				 }
			
 
				 
			
 
				 struct mmu_gather;
			
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2276,6 +2276,8 @@ static void sighand_ctor(void *data)
 
				 
			
 
				 void __init proc_caches_init(void)
			
 
				 {
			
 
				+	unsigned int mm_size;
			
 
				+
			
 
				 	sighand_cachep = kmem_cache_create("sighand_cache",
			
 
				 			sizeof(struct sighand_struct), 0,
			
 
				 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
			
@@ -2292,15 +2294,16 @@ void __init proc_caches_init(void)
 
				 			sizeof(struct fs_struct), 0,
			
 
				 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
			
 
				 			NULL);
			
 
				+
			
 
				 	/*
			
 
				-	 * FIXME! The "sizeof(struct mm_struct)" currently includes the
			
 
				-	 * whole struct cpumask for the OFFSTACK case. We could change
			
 
				-	 * this to *only* allocate as much of it as required by the
			
 
				-	 * maximum number of CPU's we can ever have.  The cpumask_allocation
			
 
				-	 * is at the end of the structure, exactly for that reason.
			
 
				+	 * The mm_cpumask is located at the end of mm_struct, and is
			
 
				+	 * dynamically sized based on the maximum CPU number this system
			
 
				+	 * can have, taking hotplug into account (nr_cpu_ids).
			
 
				 	 */
			
 
				+	mm_size = sizeof(struct mm_struct) + cpumask_size();
			
 
				+
			
 
				 	mm_cachep = kmem_cache_create_usercopy("mm_struct",
			
 
				-			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
			
 
				+			mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
			
 
				 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
			
 
				 			offsetof(struct mm_struct, saved_auxv),
			
 
				 			sizeof_field(struct mm_struct, saved_auxv),
			
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -92,7 +92,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
 
				 		if (ioremap_pmd_enabled() &&
			
 
				 		    ((next - addr) == PMD_SIZE) &&
			
 
				 		    IS_ALIGNED(phys_addr + addr, PMD_SIZE) &&
			
 
				-		    pmd_free_pte_page(pmd)) {
			
 
				+		    pmd_free_pte_page(pmd, addr)) {
			
 
				 			if (pmd_set_huge(pmd, phys_addr + addr, prot))
			
 
				 				continue;
			
 
				 		}
			
@@ -119,7 +119,7 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
 
				 		if (ioremap_pud_enabled() &&
			
 
				 		    ((next - addr) == PUD_SIZE) &&
			
 
				 		    IS_ALIGNED(phys_addr + addr, PUD_SIZE) &&
			
 
				-		    pud_free_pmd_page(pud)) {
			
 
				+		    pud_free_pmd_page(pud, addr)) {
			
 
				 			if (pud_set_huge(pud, phys_addr + addr, prot))
			
 
				 				continue;
			
 
				 		}
			
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -15,6 +15,16 @@
 
				 #define INIT_MM_CONTEXT(name)
			
 
				 #endif
			
 
				 
			
 
				+/*
			
 
				+ * For dynamically allocated mm_structs, there is a dynamically sized cpumask
			
 
				+ * at the end of the structure, the size of which depends on the maximum CPU
			
 
				+ * number the system can see. That way we allocate only as much memory for
			
 
				+ * mm_cpumask() as needed for the hundreds, or thousands of processes that
			
 
				+ * a system typically runs.
			
 
				+ *
			
 
				+ * Since there is only one init_mm in the entire system, keep it simple
			
 
				+ * and size this cpu_bitmask to NR_CPUS.
			
 
				+ */
			
 
				 struct mm_struct init_mm = {
			
 
				 	.mm_rb		= RB_ROOT,
			
 
				 	.pgd		= swapper_pg_dir,
			
@@ -25,5 +35,6 @@ struct mm_struct init_mm = {
 
				 	.arg_lock	=  __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
			
 
				 	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
			
 
				 	.user_ns	= &init_user_ns,
			
 
				+	.cpu_bitmap	= { [BITS_TO_LONGS(NR_CPUS)] = 0},
			
 
				 	INIT_MM_CONTEXT(init_mm)
			
 
				 };
			
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -326,16 +326,20 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
 
				 
			
 
				 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
			
 
				 
			
 
				-/*
			
 
				- * See the comment near struct mmu_table_batch.
			
 
				- */
			
 
				-
			
 
				 static void tlb_remove_table_smp_sync(void *arg)
			
 
				 {
			
 
				-	/* Simply deliver the interrupt */
			
 
				+	struct mm_struct __maybe_unused *mm = arg;
			
 
				+	/*
			
 
				+	 * On most architectures this does nothing. Simply delivering the
			
 
				+	 * interrupt is enough to prevent races with software page table
			
 
				+	 * walking like that done in get_user_pages_fast.
			
 
				+	 *
			
 
				+	 * See the comment near struct mmu_table_batch.
			
 
				+	 */
			
 
				+	tlb_flush_remove_tables_local(mm);
			
 
				 }
			
 
				 
			
 
				-static void tlb_remove_table_one(void *table)
			
 
				+static void tlb_remove_table_one(void *table, struct mmu_gather *tlb)
			
 
				 {
			
 
				 	/*
			
 
				 	 * This isn't an RCU grace period and hence the page-tables cannot be
			
@@ -344,7 +348,7 @@ static void tlb_remove_table_one(void *table)
 
				 	 * It is however sufficient for software page-table walkers that rely on
			
 
				 	 * IRQ disabling. See the comment near struct mmu_table_batch.
			
 
				 	 */
			
 
				-	smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
			
 
				+	smp_call_function(tlb_remove_table_smp_sync, tlb->mm, 1);
			
 
				 	__tlb_remove_table(table);
			
 
				 }
			
 
				 
			
@@ -365,6 +369,8 @@ void tlb_table_flush(struct mmu_gather *tlb)
 
				 {
			
 
				 	struct mmu_table_batch **batch = &tlb->batch;
			
 
				 
			
 
				+	tlb_flush_remove_tables(tlb->mm);
			
 
				+
			
 
				 	if (*batch) {
			
 
				 		call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
			
 
				 		*batch = NULL;
			
@@ -387,7 +393,7 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
 
				 	if (*batch == NULL) {
			
 
				 		*batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
			
 
				 		if (*batch == NULL) {
			
 
				-			tlb_remove_table_one(table);
			
 
				+			tlb_remove_table_one(table, tlb);
			
 
				 			return;
			
 
				 		}
			
 
				 		(*batch)->nr = 0;