7 years ago · caf9a82657
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -1,6 +1,4 @@
 
				 
			
 
				-<previous description obsolete, deleted>
			
 
				-
			
 
				 Virtual memory map with 4 level page tables:
			
 
				 
			
 
				 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
			
@@ -14,13 +12,15 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
 
				 ... unused hole ...
			
 
				 ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
			
 
				 ... unused hole ...
			
 
				+fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
			
 
				 ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
			
 
				 ... unused hole ...
			
 
				 ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
			
 
				 ... unused hole ...
			
 
				 ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
			
 
				-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
			
 
				-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
			
 
				+ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space (variable)
			
 
				+[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
			
 
				+ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
			
 
				 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
			
 
				 
			
 
				 Virtual memory map with 5 level page tables:
			
@@ -36,19 +36,22 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
 
				 ... unused hole ...
			
 
				 ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
			
 
				 ... unused hole ...
			
 
				+fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
			
 
				 ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
			
 
				 ... unused hole ...
			
 
				 ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
			
 
				 ... unused hole ...
			
 
				 ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
			
 
				-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
			
 
				-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
			
 
				+ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space
			
 
				+[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
			
 
				+ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
			
 
				 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
			
 
				 
			
 
				 Architecture defines a 64-bit virtual address. Implementations can support
			
 
				 less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
			
 
				-through to the most-significant implemented bit are set to either all ones
			
 
				-or all zero. This causes hole between user space and kernel addresses.
			
 
				+through to the most-significant implemented bit are sign extended.
			
 
				+This causes hole between user space and kernel addresses if you interpret them
			
 
				+as unsigned.
			
 
				 
			
 
				 The direct mapping covers all memory in the system up to the highest
			
 
				 memory address (this means in some cases it can also include PCI memory
			
@@ -58,9 +61,6 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of
 
				 the processes using the page fault handler, with init_top_pgt as
			
 
				 reference.
			
 
				 
			
 
				-Current X86-64 implementations support up to 46 bits of address space (64 TB),
			
 
				-which is our current limit. This expands into MBZ space in the page tables.
			
 
				-
			
 
				 We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
			
 
				 memory window (this size is arbitrary, it can be raised later if needed).
			
 
				 The mappings are not part of any other kernel PGD and are only available
			
@@ -72,5 +72,3 @@ following fixmap section.
 
				 Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
			
 
				 physical memory, vmalloc/ioremap space and virtual memory map are randomized.
			
 
				 Their order is preserved but their base will be offset early at boot time.
			
 
				-
			
 
				--Andi Kleen, Jul 2004
			
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
 
				 #endif
			
 
				 }
			
 
				 
			
 
				-static inline void arch_dup_mmap(struct mm_struct *oldmm,
			
 
				-				 struct mm_struct *mm)
			
 
				+static inline int arch_dup_mmap(struct mm_struct *oldmm,
			
 
				+				struct mm_struct *mm)
			
 
				 {
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 #ifndef CONFIG_PPC_BOOK3S_64
			
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm);
 
				 /*
			
 
				  * Needed since we do not use the asm-generic/mm_hooks.h:
			
 
				  */
			
 
				-static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
			
 
				+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
			
 
				 {
			
 
				 	uml_setup_stubs(mm);
			
 
				+	return 0;
			
 
				 }
			
 
				 extern void arch_exit_mmap(struct mm_struct *mm);
			
 
				 static inline void arch_unmap(struct mm_struct *mm,
			
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -81,9 +81,10 @@ do { \
 
				 	} \
			
 
				 } while (0)
			
 
				 
			
 
				-static inline void arch_dup_mmap(struct mm_struct *oldmm,
			
 
				-				 struct mm_struct *mm)
			
 
				+static inline int arch_dup_mmap(struct mm_struct *oldmm,
			
 
				+				struct mm_struct *mm)
			
 
				 {
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 static inline void arch_unmap(struct mm_struct *mm,
			
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -926,7 +926,8 @@ config MAXSMP
 
				 config NR_CPUS
			
 
				 	int "Maximum number of CPUs" if SMP && !MAXSMP
			
 
				 	range 2 8 if SMP && X86_32 && !X86_BIGSMP
			
 
				-	range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK
			
 
				+	range 2 64 if SMP && X86_32 && X86_BIGSMP
			
 
				+	range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
			
 
				 	range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
			
 
				 	default "1" if !SMP
			
 
				 	default "8192" if MAXSMP
			
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -942,9 +942,9 @@ ENTRY(debug)
 
				 
			
 
				 	/* Are we currently on the SYSENTER stack? */
			
 
				 	movl	PER_CPU_VAR(cpu_entry_area), %ecx
			
 
				-	addl	$CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
			
 
				-	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
			
 
				-	cmpl	$SIZEOF_SYSENTER_stack, %ecx
			
 
				+	addl	$CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
			
 
				+	subl	%eax, %ecx	/* ecx = (end of entry_stack) - esp */
			
 
				+	cmpl	$SIZEOF_entry_stack, %ecx
			
 
				 	jb	.Ldebug_from_sysenter_stack
			
 
				 
			
 
				 	TRACE_IRQS_OFF
			
@@ -986,9 +986,9 @@ ENTRY(nmi)
 
				 
			
 
				 	/* Are we currently on the SYSENTER stack? */
			
 
				 	movl	PER_CPU_VAR(cpu_entry_area), %ecx
			
 
				-	addl	$CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
			
 
				-	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
			
 
				-	cmpl	$SIZEOF_SYSENTER_stack, %ecx
			
 
				+	addl	$CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
			
 
				+	subl	%eax, %ecx	/* ecx = (end of entry_stack) - esp */
			
 
				+	cmpl	$SIZEOF_entry_stack, %ecx
			
 
				 	jb	.Lnmi_from_sysenter_stack
			
 
				 
			
 
				 	/* Not on SYSENTER stack. */
			
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -158,8 +158,8 @@ END(native_usergs_sysret64)
 
				 	_entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
			
 
				 
			
 
				 /* The top word of the SYSENTER stack is hot and is usable as scratch space. */
			
 
				-#define RSP_SCRATCH	CPU_ENTRY_AREA_SYSENTER_stack + \
			
 
				-			SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
			
 
				+#define RSP_SCRATCH	CPU_ENTRY_AREA_entry_stack + \
			
 
				+			SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
			
 
				 
			
 
				 ENTRY(entry_SYSCALL_64_trampoline)
			
 
				 	UNWIND_HINT_EMPTY
			
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -37,6 +37,7 @@
 
				 #include <asm/unistd.h>
			
 
				 #include <asm/fixmap.h>
			
 
				 #include <asm/traps.h>
			
 
				+#include <asm/paravirt.h>
			
 
				 
			
 
				 #define CREATE_TRACE_POINTS
			
 
				 #include "vsyscall_trace.h"
			
@@ -138,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 
				 
			
 
				 	WARN_ON_ONCE(address != regs->ip);
			
 
				 
			
 
				+	/* This should be unreachable in NATIVE mode. */
			
 
				+	if (WARN_ON(vsyscall_mode == NATIVE))
			
 
				+		return false;
			
 
				+
			
 
				 	if (vsyscall_mode == NONE) {
			
 
				 		warn_bad_vsyscall(KERN_INFO, regs,
			
 
				 				  "vsyscall attempted with vsyscall=none");
			
@@ -329,16 +334,47 @@ int in_gate_area_no_mm(unsigned long addr)
 
				 	return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * The VSYSCALL page is the only user-accessible page in the kernel address
			
 
				+ * range.  Normally, the kernel page tables can have _PAGE_USER clear, but
			
 
				+ * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
			
 
				+ * are enabled.
			
 
				+ *
			
 
				+ * Some day we may create a "minimal" vsyscall mode in which we emulate
			
 
				+ * vsyscalls but leave the page not present.  If so, we skip calling
			
 
				+ * this.
			
 
				+ */
			
 
				+static void __init set_vsyscall_pgtable_user_bits(void)
			
 
				+{
			
 
				+	pgd_t *pgd;
			
 
				+	p4d_t *p4d;
			
 
				+	pud_t *pud;
			
 
				+	pmd_t *pmd;
			
 
				+
			
 
				+	pgd = pgd_offset_k(VSYSCALL_ADDR);
			
 
				+	set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
			
 
				+	p4d = p4d_offset(pgd, VSYSCALL_ADDR);
			
 
				+#if CONFIG_PGTABLE_LEVELS >= 5
			
 
				+	p4d->p4d |= _PAGE_USER;
			
 
				+#endif
			
 
				+	pud = pud_offset(p4d, VSYSCALL_ADDR);
			
 
				+	set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
			
 
				+	pmd = pmd_offset(pud, VSYSCALL_ADDR);
			
 
				+	set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
			
 
				+}
			
 
				+
			
 
				 void __init map_vsyscall(void)
			
 
				 {
			
 
				 	extern char __vsyscall_page;
			
 
				 	unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
			
 
				 
			
 
				-	if (vsyscall_mode != NONE)
			
 
				+	if (vsyscall_mode != NONE) {
			
 
				 		__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
			
 
				 			     vsyscall_mode == NATIVE
			
 
				 			     ? PAGE_KERNEL_VSYSCALL
			
 
				 			     : PAGE_KERNEL_VVAR);
			
 
				+		set_vsyscall_pgtable_user_bits();
			
 
				+	}
			
 
				 
			
 
				 	BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
			
 
				 		     (unsigned long)VSYSCALL_ADDR);
			
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -0,0 +1,68 @@
 
				+// SPDX-License-Identifier: GPL-2.0
			
 
				+
			
 
				+#ifndef _ASM_X86_CPU_ENTRY_AREA_H
			
 
				+#define _ASM_X86_CPU_ENTRY_AREA_H
			
 
				+
			
 
				+#include <linux/percpu-defs.h>
			
 
				+#include <asm/processor.h>
			
 
				+
			
 
				+/*
			
 
				+ * cpu_entry_area is a percpu region that contains things needed by the CPU
			
 
				+ * and early entry/exit code.  Real types aren't used for all fields here
			
 
				+ * to avoid circular header dependencies.
			
 
				+ *
			
 
				+ * Every field is a virtual alias of some other allocated backing store.
			
 
				+ * There is no direct allocation of a struct cpu_entry_area.
			
 
				+ */
			
 
				+struct cpu_entry_area {
			
 
				+	char gdt[PAGE_SIZE];
			
 
				+
			
 
				+	/*
			
 
				+	 * The GDT is just below entry_stack and thus serves (on x86_64) as
			
 
				+	 * a a read-only guard page.
			
 
				+	 */
			
 
				+	struct entry_stack_page entry_stack_page;
			
 
				+
			
 
				+	/*
			
 
				+	 * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
			
 
				+	 * we need task switches to work, and task switches write to the TSS.
			
 
				+	 */
			
 
				+	struct tss_struct tss;
			
 
				+
			
 
				+	char entry_trampoline[PAGE_SIZE];
			
 
				+
			
 
				+#ifdef CONFIG_X86_64
			
 
				+	/*
			
 
				+	 * Exception stacks used for IST entries.
			
 
				+	 *
			
 
				+	 * In the future, this should have a separate slot for each stack
			
 
				+	 * with guard pages between them.
			
 
				+	 */
			
 
				+	char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
			
 
				+#endif
			
 
				+};
			
 
				+
			
 
				+#define CPU_ENTRY_AREA_SIZE	(sizeof(struct cpu_entry_area))
			
 
				+#define CPU_ENTRY_AREA_TOT_SIZE	(CPU_ENTRY_AREA_SIZE * NR_CPUS)
			
 
				+
			
 
				+DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
			
 
				+
			
 
				+extern void setup_cpu_entry_areas(void);
			
 
				+extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
			
 
				+
			
 
				+#define	CPU_ENTRY_AREA_RO_IDT		CPU_ENTRY_AREA_BASE
			
 
				+#define CPU_ENTRY_AREA_PER_CPU		(CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
			
 
				+
			
 
				+#define CPU_ENTRY_AREA_RO_IDT_VADDR	((void *)CPU_ENTRY_AREA_RO_IDT)
			
 
				+
			
 
				+#define CPU_ENTRY_AREA_MAP_SIZE			\
			
 
				+	(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
			
 
				+
			
 
				+extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
			
 
				+
			
 
				+static inline struct entry_stack *cpu_entry_stack(int cpu)
			
 
				+{
			
 
				+	return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
			
 
				+}
			
 
				+
			
 
				+#endif
			
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -7,6 +7,7 @@
 
				 #include <asm/mmu.h>
			
 
				 #include <asm/fixmap.h>
			
 
				 #include <asm/irq_vectors.h>
			
 
				+#include <asm/cpu_entry_area.h>
			
 
				 
			
 
				 #include <linux/smp.h>
			
 
				 #include <linux/percpu.h>
			
--- a/arch/x86/include/asm/espfix.h
+++ b/arch/x86/include/asm/espfix.h
@@ -2,7 +2,7 @@
 
				 #ifndef _ASM_X86_ESPFIX_H
			
 
				 #define _ASM_X86_ESPFIX_H
			
 
				 
			
 
				-#ifdef CONFIG_X86_64
			
 
				+#ifdef CONFIG_X86_ESPFIX64
			
 
				 
			
 
				 #include <asm/percpu.h>
			
 
				 
			
@@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
 
				 
			
 
				 extern void init_espfix_bsp(void);
			
 
				 extern void init_espfix_ap(int cpu);
			
 
				-
			
 
				-#endif /* CONFIG_X86_64 */
			
 
				+#else
			
 
				+static inline void init_espfix_ap(int cpu) { }
			
 
				+#endif
			
 
				 
			
 
				 #endif /* _ASM_X86_ESPFIX_H */
			
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -44,46 +44,6 @@ extern unsigned long __FIXADDR_TOP;
 
				 			 PAGE_SIZE)
			
 
				 #endif
			
 
				 
			
 
				-/*
			
 
				- * cpu_entry_area is a percpu region in the fixmap that contains things
			
 
				- * needed by the CPU and early entry/exit code.  Real types aren't used
			
 
				- * for all fields here to avoid circular header dependencies.
			
 
				- *
			
 
				- * Every field is a virtual alias of some other allocated backing store.
			
 
				- * There is no direct allocation of a struct cpu_entry_area.
			
 
				- */
			
 
				-struct cpu_entry_area {
			
 
				-	char gdt[PAGE_SIZE];
			
 
				-
			
 
				-	/*
			
 
				-	 * The GDT is just below SYSENTER_stack and thus serves (on x86_64) as
			
 
				-	 * a a read-only guard page.
			
 
				-	 */
			
 
				-	struct SYSENTER_stack_page SYSENTER_stack_page;
			
 
				-
			
 
				-	/*
			
 
				-	 * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
			
 
				-	 * we need task switches to work, and task switches write to the TSS.
			
 
				-	 */
			
 
				-	struct tss_struct tss;
			
 
				-
			
 
				-	char entry_trampoline[PAGE_SIZE];
			
 
				-
			
 
				-#ifdef CONFIG_X86_64
			
 
				-	/*
			
 
				-	 * Exception stacks used for IST entries.
			
 
				-	 *
			
 
				-	 * In the future, this should have a separate slot for each stack
			
 
				-	 * with guard pages between them.
			
 
				-	 */
			
 
				-	char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
			
 
				-#endif
			
 
				-};
			
 
				-
			
 
				-#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
			
 
				-
			
 
				-extern void setup_cpu_entry_areas(void);
			
 
				-
			
 
				 /*
			
 
				  * Here we define all the compile-time 'special' virtual
			
 
				  * addresses. The point is to have a constant address at
			
@@ -123,7 +83,6 @@ enum fixed_addresses {
 
				 	FIX_IO_APIC_BASE_0,
			
 
				 	FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
			
 
				 #endif
			
 
				-	FIX_RO_IDT,	/* Virtual mapping for read-only IDT */
			
 
				 #ifdef CONFIG_X86_32
			
 
				 	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
			
 
				 	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
			
@@ -139,9 +98,6 @@ enum fixed_addresses {
 
				 #ifdef	CONFIG_X86_INTEL_MID
			
 
				 	FIX_LNW_VRTC,
			
 
				 #endif
			
 
				-	/* Fixmap entries to remap the GDTs, one per processor. */
			
 
				-	FIX_CPU_ENTRY_AREA_TOP,
			
 
				-	FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,
			
 
				 
			
 
				 #ifdef CONFIG_ACPI_APEI_GHES
			
 
				 	/* Used for GHES mapping from assorted contexts */
			
@@ -182,7 +138,7 @@ enum fixed_addresses {
 
				 extern void reserve_top_address(unsigned long reserve);
			
 
				 
			
 
				 #define FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
			
 
				-#define FIXADDR_START		(FIXADDR_TOP - FIXADDR_SIZE)
			
 
				+#define FIXADDR_START	(FIXADDR_TOP - FIXADDR_SIZE)
			
 
				 
			
 
				 extern int fixmaps_set;
			
 
				 
			
@@ -230,30 +186,5 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
 
				 void __early_set_fixmap(enum fixed_addresses idx,
			
 
				 			phys_addr_t phys, pgprot_t flags);
			
 
				 
			
 
				-static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page)
			
 
				-{
			
 
				-	BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
			
 
				-
			
 
				-	return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page;
			
 
				-}
			
 
				-
			
 
				-#define __get_cpu_entry_area_offset_index(cpu, offset) ({		\
			
 
				-	BUILD_BUG_ON(offset % PAGE_SIZE != 0);				\
			
 
				-	__get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE);	\
			
 
				-	})
			
 
				-
			
 
				-#define get_cpu_entry_area_index(cpu, field)				\
			
 
				-	__get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field))
			
 
				-
			
 
				-static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
			
 
				-{
			
 
				-	return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
			
 
				-}
			
 
				-
			
 
				-static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu)
			
 
				-{
			
 
				-	return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack;
			
 
				-}
			
 
				-
			
 
				 #endif /* !__ASSEMBLY__ */
			
 
				 #endif /* _ASM_X86_FIXMAP_H */
			
--- a/arch/x86/include/asm/invpcid.h
+++ b/arch/x86/include/asm/invpcid.h
@@ -0,0 +1,53 @@
 
				+/* SPDX-License-Identifier: GPL-2.0 */
			
 
				+#ifndef _ASM_X86_INVPCID
			
 
				+#define _ASM_X86_INVPCID
			
 
				+
			
 
				+static inline void __invpcid(unsigned long pcid, unsigned long addr,
			
 
				+			     unsigned long type)
			
 
				+{
			
 
				+	struct { u64 d[2]; } desc = { { pcid, addr } };
			
 
				+
			
 
				+	/*
			
 
				+	 * The memory clobber is because the whole point is to invalidate
			
 
				+	 * stale TLB entries and, especially if we're flushing global
			
 
				+	 * mappings, we don't want the compiler to reorder any subsequent
			
 
				+	 * memory accesses before the TLB flush.
			
 
				+	 *
			
 
				+	 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
			
 
				+	 * invpcid (%rcx), %rax in long mode.
			
 
				+	 */
			
 
				+	asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
			
 
				+		      : : "m" (desc), "a" (type), "c" (&desc) : "memory");
			
 
				+}
			
 
				+
			
 
				+#define INVPCID_TYPE_INDIV_ADDR		0
			
 
				+#define INVPCID_TYPE_SINGLE_CTXT	1
			
 
				+#define INVPCID_TYPE_ALL_INCL_GLOBAL	2
			
 
				+#define INVPCID_TYPE_ALL_NON_GLOBAL	3
			
 
				+
			
 
				+/* Flush all mappings for a given pcid and addr, not including globals. */
			
 
				+static inline void invpcid_flush_one(unsigned long pcid,
			
 
				+				     unsigned long addr)
			
 
				+{
			
 
				+	__invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
			
 
				+}
			
 
				+
			
 
				+/* Flush all mappings for a given PCID, not including globals. */
			
 
				+static inline void invpcid_flush_single_context(unsigned long pcid)
			
 
				+{
			
 
				+	__invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
			
 
				+}
			
 
				+
			
 
				+/* Flush all mappings, including globals, for all PCIDs. */
			
 
				+static inline void invpcid_flush_all(void)
			
 
				+{
			
 
				+	__invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
			
 
				+}
			
 
				+
			
 
				+/* Flush all mappings for all PCIDs except globals. */
			
 
				+static inline void invpcid_flush_all_nonglobals(void)
			
 
				+{
			
 
				+	__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
			
 
				+}
			
 
				+
			
 
				+#endif /* _ASM_X86_INVPCID */
			
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -3,6 +3,7 @@
 
				 #define _ASM_X86_MMU_H
			
 
				 
			
 
				 #include <linux/spinlock.h>
			
 
				+#include <linux/rwsem.h>
			
 
				 #include <linux/mutex.h>
			
 
				 #include <linux/atomic.h>
			
 
				 
			
@@ -27,7 +28,8 @@ typedef struct {
 
				 	atomic64_t tlb_gen;
			
 
				 
			
 
				 #ifdef CONFIG_MODIFY_LDT_SYSCALL
			
 
				-	struct ldt_struct *ldt;
			
 
				+	struct rw_semaphore	ldt_usr_sem;
			
 
				+	struct ldt_struct	*ldt;
			
 
				 #endif
			
 
				 
			
 
				 #ifdef CONFIG_X86_64
			
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -57,11 +57,17 @@ struct ldt_struct {
 
				 /*
			
 
				  * Used for LDT copy/destruction.
			
 
				  */
			
 
				-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
			
 
				+static inline void init_new_context_ldt(struct mm_struct *mm)
			
 
				+{
			
 
				+	mm->context.ldt = NULL;
			
 
				+	init_rwsem(&mm->context.ldt_usr_sem);
			
 
				+}
			
 
				+int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
			
 
				 void destroy_context_ldt(struct mm_struct *mm);
			
 
				 #else	/* CONFIG_MODIFY_LDT_SYSCALL */
			
 
				-static inline int init_new_context_ldt(struct task_struct *tsk,
			
 
				-				       struct mm_struct *mm)
			
 
				+static inline void init_new_context_ldt(struct mm_struct *mm) { }
			
 
				+static inline int ldt_dup_context(struct mm_struct *oldmm,
			
 
				+				  struct mm_struct *mm)
			
 
				 {
			
 
				 	return 0;
			
 
				 }
			
@@ -132,18 +138,21 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
 
				 static inline int init_new_context(struct task_struct *tsk,
			
 
				 				   struct mm_struct *mm)
			
 
				 {
			
 
				+	mutex_init(&mm->context.lock);
			
 
				+
			
 
				 	mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
			
 
				 	atomic64_set(&mm->context.tlb_gen, 0);
			
 
				 
			
 
				-	#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
			
 
				+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
			
 
				 	if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
			
 
				 		/* pkey 0 is the default and always allocated */
			
 
				 		mm->context.pkey_allocation_map = 0x1;
			
 
				 		/* -1 means unallocated or invalid */
			
 
				 		mm->context.execute_only_pkey = -1;
			
 
				 	}
			
 
				-	#endif
			
 
				-	return init_new_context_ldt(tsk, mm);
			
 
				+#endif
			
 
				+	init_new_context_ldt(mm);
			
 
				+	return 0;
			
 
				 }
			
 
				 static inline void destroy_context(struct mm_struct *mm)
			
 
				 {
			
@@ -176,10 +185,10 @@ do {						\
 
				 } while (0)
			
 
				 #endif
			
 
				 
			
 
				-static inline void arch_dup_mmap(struct mm_struct *oldmm,
			
 
				-				 struct mm_struct *mm)
			
 
				+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
			
 
				 {
			
 
				 	paravirt_arch_dup_mmap(oldmm, mm);
			
 
				+	return ldt_dup_context(oldmm, mm);
			
 
				 }
			
 
				 
			
 
				 static inline void arch_exit_mmap(struct mm_struct *mm)
			
@@ -281,33 +290,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
 
				 	return __pkru_allows_pkey(vma_pkey(vma), write);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
			
 
				- * bits.  This serves two purposes.  It prevents a nasty situation in
			
 
				- * which PCID-unaware code saves CR3, loads some other value (with PCID
			
 
				- * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
			
 
				- * the saved ASID was nonzero.  It also means that any bugs involving
			
 
				- * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
			
 
				- * deterministically.
			
 
				- */
			
 
				-
			
 
				-static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
			
 
				-{
			
 
				-	if (static_cpu_has(X86_FEATURE_PCID)) {
			
 
				-		VM_WARN_ON_ONCE(asid > 4094);
			
 
				-		return __sme_pa(mm->pgd) | (asid + 1);
			
 
				-	} else {
			
 
				-		VM_WARN_ON_ONCE(asid != 0);
			
 
				-		return __sme_pa(mm->pgd);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
			
 
				-{
			
 
				-	VM_WARN_ON_ONCE(asid > 4094);
			
 
				-	return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * This can be used from process context to figure out what the value of
			
 
				  * CR3 is without needing to do a (slow) __read_cr3().
			
@@ -317,7 +299,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
 
				  */
			
 
				 static inline unsigned long __get_current_cr3_fast(void)
			
 
				 {
			
 
				-	unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
			
 
				+	unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
			
 
				 		this_cpu_read(cpu_tlbstate.loaded_mm_asid));
			
 
				 
			
 
				 	/* For now, be very restrictive about when this can be called. */
			
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
 
				 #define LAST_PKMAP 1024
			
 
				 #endif
			
 
				 
			
 
				-#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1))	\
			
 
				-		    & PMD_MASK)
			
 
				+/*
			
 
				+ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
			
 
				+ * to avoid include recursion hell
			
 
				+ */
			
 
				+#define CPU_ENTRY_AREA_PAGES	(NR_CPUS * 40)
			
 
				+
			
 
				+#define CPU_ENTRY_AREA_BASE				\
			
 
				+	((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
			
 
				+
			
 
				+#define PKMAP_BASE		\
			
 
				+	((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
			
 
				 
			
 
				 #ifdef CONFIG_HIGHMEM
			
 
				 # define VMALLOC_END	(PKMAP_BASE - 2 * PAGE_SIZE)
			
 
				 #else
			
 
				-# define VMALLOC_END	(FIXADDR_START - 2 * PAGE_SIZE)
			
 
				+# define VMALLOC_END	(CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
			
 
				 #endif
			
 
				 
			
 
				 #define MODULES_VADDR	VMALLOC_START
			
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -76,32 +76,41 @@ typedef struct { pteval_t pte; } pte_t;
 
				 #define PGDIR_MASK	(~(PGDIR_SIZE - 1))
			
 
				 
			
 
				 /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
			
 
				-#define MAXMEM		_AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
			
 
				+#define MAXMEM			_AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
			
 
				+
			
 
				 #ifdef CONFIG_X86_5LEVEL
			
 
				-#define VMALLOC_SIZE_TB _AC(16384, UL)
			
 
				-#define __VMALLOC_BASE	_AC(0xff92000000000000, UL)
			
 
				-#define __VMEMMAP_BASE	_AC(0xffd4000000000000, UL)
			
 
				+# define VMALLOC_SIZE_TB	_AC(16384, UL)
			
 
				+# define __VMALLOC_BASE		_AC(0xff92000000000000, UL)
			
 
				+# define __VMEMMAP_BASE		_AC(0xffd4000000000000, UL)
			
 
				 #else
			
 
				-#define VMALLOC_SIZE_TB	_AC(32, UL)
			
 
				-#define __VMALLOC_BASE	_AC(0xffffc90000000000, UL)
			
 
				-#define __VMEMMAP_BASE	_AC(0xffffea0000000000, UL)
			
 
				+# define VMALLOC_SIZE_TB	_AC(32, UL)
			
 
				+# define __VMALLOC_BASE		_AC(0xffffc90000000000, UL)
			
 
				+# define __VMEMMAP_BASE		_AC(0xffffea0000000000, UL)
			
 
				 #endif
			
 
				+
			
 
				 #ifdef CONFIG_RANDOMIZE_MEMORY
			
 
				-#define VMALLOC_START	vmalloc_base
			
 
				-#define VMEMMAP_START	vmemmap_base
			
 
				+# define VMALLOC_START		vmalloc_base
			
 
				+# define VMEMMAP_START		vmemmap_base
			
 
				 #else
			
 
				-#define VMALLOC_START	__VMALLOC_BASE
			
 
				-#define VMEMMAP_START	__VMEMMAP_BASE
			
 
				+# define VMALLOC_START		__VMALLOC_BASE
			
 
				+# define VMEMMAP_START		__VMEMMAP_BASE
			
 
				 #endif /* CONFIG_RANDOMIZE_MEMORY */
			
 
				-#define VMALLOC_END	(VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
			
 
				-#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
			
 
				+
			
 
				+#define VMALLOC_END		(VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
			
 
				+
			
 
				+#define MODULES_VADDR		(__START_KERNEL_map + KERNEL_IMAGE_SIZE)
			
 
				 /* The module sections ends with the start of the fixmap */
			
 
				-#define MODULES_END   __fix_to_virt(__end_of_fixed_addresses + 1)
			
 
				-#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
			
 
				-#define ESPFIX_PGD_ENTRY _AC(-2, UL)
			
 
				-#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
			
 
				-#define EFI_VA_START	 ( -4 * (_AC(1, UL) << 30))
			
 
				-#define EFI_VA_END	 (-68 * (_AC(1, UL) << 30))
			
 
				+#define MODULES_END		__fix_to_virt(__end_of_fixed_addresses + 1)
			
 
				+#define MODULES_LEN		(MODULES_END - MODULES_VADDR)
			
 
				+
			
 
				+#define ESPFIX_PGD_ENTRY	_AC(-2, UL)
			
 
				+#define ESPFIX_BASE_ADDR	(ESPFIX_PGD_ENTRY << P4D_SHIFT)
			
 
				+
			
 
				+#define CPU_ENTRY_AREA_PGD	_AC(-3, UL)
			
 
				+#define CPU_ENTRY_AREA_BASE	(CPU_ENTRY_AREA_PGD << P4D_SHIFT)
			
 
				+
			
 
				+#define EFI_VA_START		( -4 * (_AC(1, UL) << 30))
			
 
				+#define EFI_VA_END		(-68 * (_AC(1, UL) << 30))
			
 
				 
			
 
				 #define EARLY_DYNAMIC_PAGE_TABLES	64
			
 
				 
			
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -337,12 +337,12 @@ struct x86_hw_tss {
 
				 #define IO_BITMAP_OFFSET		(offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
			
 
				 #define INVALID_IO_BITMAP_OFFSET	0x8000
			
 
				 
			
 
				-struct SYSENTER_stack {
			
 
				+struct entry_stack {
			
 
				 	unsigned long		words[64];
			
 
				 };
			
 
				 
			
 
				-struct SYSENTER_stack_page {
			
 
				-	struct SYSENTER_stack stack;
			
 
				+struct entry_stack_page {
			
 
				+	struct entry_stack stack;
			
 
				 } __aligned(PAGE_SIZE);
			
 
				 
			
 
				 struct tss_struct {
			
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -16,7 +16,7 @@ enum stack_type {
 
				 	STACK_TYPE_TASK,
			
 
				 	STACK_TYPE_IRQ,
			
 
				 	STACK_TYPE_SOFTIRQ,
			
 
				-	STACK_TYPE_SYSENTER,
			
 
				+	STACK_TYPE_ENTRY,
			
 
				 	STACK_TYPE_EXCEPTION,
			
 
				 	STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
			
 
				 };
			
@@ -29,7 +29,7 @@ struct stack_info {
 
				 bool in_task_stack(unsigned long *stack, struct task_struct *task,
			
 
				 		   struct stack_info *info);
			
 
				 
			
 
				-bool in_sysenter_stack(unsigned long *stack, struct stack_info *info);
			
 
				+bool in_entry_stack(unsigned long *stack, struct stack_info *info);
			
 
				 
			
 
				 int get_stack_info(unsigned long *stack, struct task_struct *task,
			
 
				 		   struct stack_info *info, unsigned long *visit_mask);
			
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -9,70 +9,66 @@
 
				 #include <asm/cpufeature.h>
			
 
				 #include <asm/special_insns.h>
			
 
				 #include <asm/smp.h>
			
 
				+#include <asm/invpcid.h>
			
 
				 
			
 
				-static inline void __invpcid(unsigned long pcid, unsigned long addr,
			
 
				-			     unsigned long type)
			
 
				+static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
			
 
				 {
			
 
				-	struct { u64 d[2]; } desc = { { pcid, addr } };
			
 
				-
			
 
				 	/*
			
 
				-	 * The memory clobber is because the whole point is to invalidate
			
 
				-	 * stale TLB entries and, especially if we're flushing global
			
 
				-	 * mappings, we don't want the compiler to reorder any subsequent
			
 
				-	 * memory accesses before the TLB flush.
			
 
				-	 *
			
 
				-	 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
			
 
				-	 * invpcid (%rcx), %rax in long mode.
			
 
				+	 * Bump the generation count.  This also serves as a full barrier
			
 
				+	 * that synchronizes with switch_mm(): callers are required to order
			
 
				+	 * their read of mm_cpumask after their writes to the paging
			
 
				+	 * structures.
			
 
				 	 */
			
 
				-	asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
			
 
				-		      : : "m" (desc), "a" (type), "c" (&desc) : "memory");
			
 
				+	return atomic64_inc_return(&mm->context.tlb_gen);
			
 
				 }
			
 
				 
			
 
				-#define INVPCID_TYPE_INDIV_ADDR		0
			
 
				-#define INVPCID_TYPE_SINGLE_CTXT	1
			
 
				-#define INVPCID_TYPE_ALL_INCL_GLOBAL	2
			
 
				-#define INVPCID_TYPE_ALL_NON_GLOBAL	3
			
 
				+/* There are 12 bits of space for ASIDS in CR3 */
			
 
				+#define CR3_HW_ASID_BITS		12
			
 
				+/*
			
 
				+ * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
			
 
				+ * user/kernel switches
			
 
				+ */
			
 
				+#define PTI_CONSUMED_ASID_BITS		0
			
 
				 
			
 
				-/* Flush all mappings for a given pcid and addr, not including globals. */
			
 
				-static inline void invpcid_flush_one(unsigned long pcid,
			
 
				-				     unsigned long addr)
			
 
				-{
			
 
				-	__invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
			
 
				-}
			
 
				+#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS)
			
 
				+/*
			
 
				+ * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid.  -1 below to account
			
 
				+ * for them being zero-based.  Another -1 is because ASID 0 is reserved for
			
 
				+ * use by non-PCID-aware users.
			
 
				+ */
			
 
				+#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2)
			
 
				 
			
 
				-/* Flush all mappings for a given PCID, not including globals. */
			
 
				-static inline void invpcid_flush_single_context(unsigned long pcid)
			
 
				+static inline u16 kern_pcid(u16 asid)
			
 
				 {
			
 
				-	__invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
			
 
				+	VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
			
 
				+	/*
			
 
				+	 * If PCID is on, ASID-aware code paths put the ASID+1 into the
			
 
				+	 * PCID bits.  This serves two purposes.  It prevents a nasty
			
 
				+	 * situation in which PCID-unaware code saves CR3, loads some other
			
 
				+	 * value (with PCID == 0), and then restores CR3, thus corrupting
			
 
				+	 * the TLB for ASID 0 if the saved ASID was nonzero.  It also means
			
 
				+	 * that any bugs involving loading a PCID-enabled CR3 with
			
 
				+	 * CR4.PCIDE off will trigger deterministically.
			
 
				+	 */
			
 
				+	return asid + 1;
			
 
				 }
			
 
				 
			
 
				-/* Flush all mappings, including globals, for all PCIDs. */
			
 
				-static inline void invpcid_flush_all(void)
			
 
				+struct pgd_t;
			
 
				+static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
			
 
				 {
			
 
				-	__invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
			
 
				+	if (static_cpu_has(X86_FEATURE_PCID)) {
			
 
				+		return __sme_pa(pgd) | kern_pcid(asid);
			
 
				+	} else {
			
 
				+		VM_WARN_ON_ONCE(asid != 0);
			
 
				+		return __sme_pa(pgd);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				-/* Flush all mappings for all PCIDs except globals. */
			
 
				-static inline void invpcid_flush_all_nonglobals(void)
			
 
				+static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
			
 
				 {
			
 
				-	__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
			
 
				-}
			
 
				-
			
 
				-static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
			
 
				-{
			
 
				-	u64 new_tlb_gen;
			
 
				-
			
 
				-	/*
			
 
				-	 * Bump the generation count.  This also serves as a full barrier
			
 
				-	 * that synchronizes with switch_mm(): callers are required to order
			
 
				-	 * their read of mm_cpumask after their writes to the paging
			
 
				-	 * structures.
			
 
				-	 */
			
 
				-	smp_mb__before_atomic();
			
 
				-	new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
			
 
				-	smp_mb__after_atomic();
			
 
				-
			
 
				-	return new_tlb_gen;
			
 
				+	VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
			
 
				+	VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
			
 
				+	return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
			
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_PARAVIRT
			
@@ -237,6 +233,9 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
 
				 
			
 
				 extern void initialize_tlbstate_and_flush(void);
			
 
				 
			
 
				+/*
			
 
				+ * flush the entire current user mapping
			
 
				+ */
			
 
				 static inline void __native_flush_tlb(void)
			
 
				 {
			
 
				 	/*
			
@@ -249,20 +248,12 @@ static inline void __native_flush_tlb(void)
 
				 	preempt_enable();
			
 
				 }
			
 
				 
			
 
				-static inline void __native_flush_tlb_global_irq_disabled(void)
			
 
				-{
			
 
				-	unsigned long cr4;
			
 
				-
			
 
				-	cr4 = this_cpu_read(cpu_tlbstate.cr4);
			
 
				-	/* clear PGE */
			
 
				-	native_write_cr4(cr4 & ~X86_CR4_PGE);
			
 
				-	/* write old PGE again and flush TLBs */
			
 
				-	native_write_cr4(cr4);
			
 
				-}
			
 
				-
			
 
				+/*
			
 
				+ * flush everything
			
 
				+ */
			
 
				 static inline void __native_flush_tlb_global(void)
			
 
				 {
			
 
				-	unsigned long flags;
			
 
				+	unsigned long cr4, flags;
			
 
				 
			
 
				 	if (static_cpu_has(X86_FEATURE_INVPCID)) {
			
 
				 		/*
			
@@ -280,22 +271,36 @@ static inline void __native_flush_tlb_global(void)
 
				 	 */
			
 
				 	raw_local_irq_save(flags);
			
 
				 
			
 
				-	__native_flush_tlb_global_irq_disabled();
			
 
				+	cr4 = this_cpu_read(cpu_tlbstate.cr4);
			
 
				+	/* toggle PGE */
			
 
				+	native_write_cr4(cr4 ^ X86_CR4_PGE);
			
 
				+	/* write old PGE again and flush TLBs */
			
 
				+	native_write_cr4(cr4);
			
 
				 
			
 
				 	raw_local_irq_restore(flags);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * flush one page in the user mapping
			
 
				+ */
			
 
				 static inline void __native_flush_tlb_single(unsigned long addr)
			
 
				 {
			
 
				 	asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * flush everything
			
 
				+ */
			
 
				 static inline void __flush_tlb_all(void)
			
 
				 {
			
 
				-	if (boot_cpu_has(X86_FEATURE_PGE))
			
 
				+	if (boot_cpu_has(X86_FEATURE_PGE)) {
			
 
				 		__flush_tlb_global();
			
 
				-	else
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * !PGE -> !PCID (setup_pcid()), thus every flush is total.
			
 
				+		 */
			
 
				 		__flush_tlb();
			
 
				+	}
			
 
				 
			
 
				 	/*
			
 
				 	 * Note: if we somehow had PCID but not PGE, then this wouldn't work --
			
@@ -306,6 +311,9 @@ static inline void __flush_tlb_all(void)
 
				 	 */
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * flush one page in the kernel mapping
			
 
				+ */
			
 
				 static inline void __flush_tlb_one(unsigned long addr)
			
 
				 {
			
 
				 	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
			
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -97,6 +97,6 @@ void common(void) {
 
				 	/* Layout info for cpu_entry_area */
			
 
				 	OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
			
 
				 	OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
			
 
				-	OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page);
			
 
				-	DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
			
 
				+	OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
			
 
				+	DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
			
 
				 }
			
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -48,7 +48,7 @@ void foo(void)
 
				 
			
 
				 	/* Offset from the sysenter stack to tss.sp0 */
			
 
				 	DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
			
 
				-	       offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack));
			
 
				+	       offsetofend(struct cpu_entry_area, entry_stack_page.stack));
			
 
				 
			
 
				 #ifdef CONFIG_CC_STACKPROTECTOR
			
 
				 	BLANK();
			
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -506,102 +506,8 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
 
				 	  [0 ... N_EXCEPTION_STACKS - 1]	= EXCEPTION_STKSZ,
			
 
				 	  [DEBUG_STACK - 1]			= DEBUG_STKSZ
			
 
				 };
			
 
				-
			
 
				-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
			
 
				-	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
			
 
				-#endif
			
 
				-
			
 
				-static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page,
			
 
				-				   SYSENTER_stack_storage);
			
 
				-
			
 
				-static void __init
			
 
				-set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
			
 
				-{
			
 
				-	for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
			
 
				-		__set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
			
 
				-}
			
 
				-
			
 
				-/* Setup the fixmap mappings only once per-processor */
			
 
				-static void __init setup_cpu_entry_area(int cpu)
			
 
				-{
			
 
				-#ifdef CONFIG_X86_64
			
 
				-	extern char _entry_trampoline[];
			
 
				-
			
 
				-	/* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
			
 
				-	pgprot_t gdt_prot = PAGE_KERNEL_RO;
			
 
				-	pgprot_t tss_prot = PAGE_KERNEL_RO;
			
 
				-#else
			
 
				-	/*
			
 
				-	 * On native 32-bit systems, the GDT cannot be read-only because
			
 
				-	 * our double fault handler uses a task gate, and entering through
			
 
				-	 * a task gate needs to change an available TSS to busy.  If the
			
 
				-	 * GDT is read-only, that will triple fault.  The TSS cannot be
			
 
				-	 * read-only because the CPU writes to it on task switches.
			
 
				-	 *
			
 
				-	 * On Xen PV, the GDT must be read-only because the hypervisor
			
 
				-	 * requires it.
			
 
				-	 */
			
 
				-	pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
			
 
				-		PAGE_KERNEL_RO : PAGE_KERNEL;
			
 
				-	pgprot_t tss_prot = PAGE_KERNEL;
			
 
				-#endif
			
 
				-
			
 
				-	__set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
			
 
				-	set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page),
			
 
				-				per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1,
			
 
				-				PAGE_KERNEL);
			
 
				-
			
 
				-	/*
			
 
				-	 * The Intel SDM says (Volume 3, 7.2.1):
			
 
				-	 *
			
 
				-	 *  Avoid placing a page boundary in the part of the TSS that the
			
 
				-	 *  processor reads during a task switch (the first 104 bytes). The
			
 
				-	 *  processor may not correctly perform address translations if a
			
 
				-	 *  boundary occurs in this area. During a task switch, the processor
			
 
				-	 *  reads and writes into the first 104 bytes of each TSS (using
			
 
				-	 *  contiguous physical addresses beginning with the physical address
			
 
				-	 *  of the first byte of the TSS). So, after TSS access begins, if
			
 
				-	 *  part of the 104 bytes is not physically contiguous, the processor
			
 
				-	 *  will access incorrect information without generating a page-fault
			
 
				-	 *  exception.
			
 
				-	 *
			
 
				-	 * There are also a lot of errata involving the TSS spanning a page
			
 
				-	 * boundary.  Assert that we're not doing that.
			
 
				-	 */
			
 
				-	BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
			
 
				-		      offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
			
 
				-	BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
			
 
				-	set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
			
 
				-				&per_cpu(cpu_tss_rw, cpu),
			
 
				-				sizeof(struct tss_struct) / PAGE_SIZE,
			
 
				-				tss_prot);
			
 
				-
			
 
				-#ifdef CONFIG_X86_32
			
 
				-	per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
			
 
				 #endif
			
 
				 
			
 
				-#ifdef CONFIG_X86_64
			
 
				-	BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
			
 
				-	BUILD_BUG_ON(sizeof(exception_stacks) !=
			
 
				-		     sizeof(((struct cpu_entry_area *)0)->exception_stacks));
			
 
				-	set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
			
 
				-				&per_cpu(exception_stacks, cpu),
			
 
				-				sizeof(exception_stacks) / PAGE_SIZE,
			
 
				-				PAGE_KERNEL);
			
 
				-
			
 
				-	__set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
			
 
				-		     __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-void __init setup_cpu_entry_areas(void)
			
 
				-{
			
 
				-	unsigned int cpu;
			
 
				-
			
 
				-	for_each_possible_cpu(cpu)
			
 
				-		setup_cpu_entry_area(cpu);
			
 
				-}
			
 
				-
			
 
				 /* Load the original GDT from the per-cpu structure */
			
 
				 void load_direct_gdt(int cpu)
			
 
				 {
			
@@ -1348,7 +1254,7 @@ void enable_sep_cpu(void)
 
				 
			
 
				 	tss->x86_tss.ss1 = __KERNEL_CS;
			
 
				 	wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
			
 
				-	wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0);
			
 
				+	wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
			
 
				 	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
			
 
				 
			
 
				 	put_cpu();
			
@@ -1465,7 +1371,7 @@ void syscall_init(void)
 
				 	 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
			
 
				 	 */
			
 
				 	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
			
 
				-	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
			
 
				+	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
			
 
				 	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
			
 
				 #else
			
 
				 	wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
			
@@ -1680,7 +1586,7 @@ void cpu_init(void)
 
				 	 */
			
 
				 	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
			
 
				 	load_TR_desc();
			
 
				-	load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
			
 
				+	load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
			
 
				 
			
 
				 	load_mm_ldt(&init_mm);
			
 
				 
			
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu_info *uci)
 
				 }
			
 
				 #else
			
 
				 
			
 
				-/*
			
 
				- * Flush global tlb. We only do this in x86_64 where paging has been enabled
			
 
				- * already and PGE should be enabled as well.
			
 
				- */
			
 
				-static inline void flush_tlb_early(void)
			
 
				-{
			
 
				-	__native_flush_tlb_global_irq_disabled();
			
 
				-}
			
 
				-
			
 
				 static inline void print_ucode(struct ucode_cpu_info *uci)
			
 
				 {
			
 
				 	struct microcode_intel *mc;
			
@@ -602,10 +593,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
 
				 	if (rev != mc->hdr.rev)
			
 
				 		return -1;
			
 
				 
			
 
				-#ifdef CONFIG_X86_64
			
 
				-	/* Flush global tlb. This is precaution. */
			
 
				-	flush_tlb_early();
			
 
				-#endif
			
 
				 	uci->cpu_sig.rev = rev;
			
 
				 
			
 
				 	if (early)
			
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -18,6 +18,7 @@
 
				 #include <linux/nmi.h>
			
 
				 #include <linux/sysfs.h>
			
 
				 
			
 
				+#include <asm/cpu_entry_area.h>
			
 
				 #include <asm/stacktrace.h>
			
 
				 #include <asm/unwind.h>
			
 
				 
			
@@ -43,9 +44,9 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				-bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
			
 
				+bool in_entry_stack(unsigned long *stack, struct stack_info *info)
			
 
				 {
			
 
				-	struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id());
			
 
				+	struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
			
 
				 
			
 
				 	void *begin = ss;
			
 
				 	void *end = ss + 1;
			
@@ -53,7 +54,7 @@ bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
 
				 	if ((void *)stack < begin || (void *)stack >= end)
			
 
				 		return false;
			
 
				 
			
 
				-	info->type	= STACK_TYPE_SYSENTER;
			
 
				+	info->type	= STACK_TYPE_ENTRY;
			
 
				 	info->begin	= begin;
			
 
				 	info->end	= end;
			
 
				 	info->next_sp	= NULL;
			
@@ -111,13 +112,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 
				 	 * - task stack
			
 
				 	 * - interrupt stack
			
 
				 	 * - HW exception stacks (double fault, nmi, debug, mce)
			
 
				-	 * - SYSENTER stack
			
 
				+	 * - entry stack
			
 
				 	 *
			
 
				 	 * x86-32 can have up to four stacks:
			
 
				 	 * - task stack
			
 
				 	 * - softirq stack
			
 
				 	 * - hardirq stack
			
 
				-	 * - SYSENTER stack
			
 
				+	 * - entry stack
			
 
				 	 */
			
 
				 	for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
			
 
				 		const char *stack_name;
			
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -26,8 +26,8 @@ const char *stack_type_name(enum stack_type type)
 
				 	if (type == STACK_TYPE_SOFTIRQ)
			
 
				 		return "SOFTIRQ";
			
 
				 
			
 
				-	if (type == STACK_TYPE_SYSENTER)
			
 
				-		return "SYSENTER";
			
 
				+	if (type == STACK_TYPE_ENTRY)
			
 
				+		return "ENTRY_TRAMPOLINE";
			
 
				 
			
 
				 	return NULL;
			
 
				 }
			
@@ -96,7 +96,7 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
 
				 	if (task != current)
			
 
				 		goto unknown;
			
 
				 
			
 
				-	if (in_sysenter_stack(stack, info))
			
 
				+	if (in_entry_stack(stack, info))
			
 
				 		goto recursion_check;
			
 
				 
			
 
				 	if (in_hardirq_stack(stack, info))
			
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -37,8 +37,14 @@ const char *stack_type_name(enum stack_type type)
 
				 	if (type == STACK_TYPE_IRQ)
			
 
				 		return "IRQ";
			
 
				 
			
 
				-	if (type == STACK_TYPE_SYSENTER)
			
 
				-		return "SYSENTER";
			
 
				+	if (type == STACK_TYPE_ENTRY) {
			
 
				+		/*
			
 
				+		 * On 64-bit, we have a generic entry stack that we
			
 
				+		 * use for all the kernel entry points, including
			
 
				+		 * SYSENTER.
			
 
				+		 */
			
 
				+		return "ENTRY_TRAMPOLINE";
			
 
				+	}
			
 
				 
			
 
				 	if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
			
 
				 		return exception_stack_names[type - STACK_TYPE_EXCEPTION];
			
@@ -118,7 +124,7 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
 
				 	if (in_irq_stack(stack, info))
			
 
				 		goto recursion_check;
			
 
				 
			
 
				-	if (in_sysenter_stack(stack, info))
			
 
				+	if (in_entry_stack(stack, info))
			
 
				 		goto recursion_check;
			
 
				 
			
 
				 	goto unknown;
			
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -5,6 +5,11 @@
 
				  * Copyright (C) 2002 Andi Kleen
			
 
				  *
			
 
				  * This handles calls from both 32bit and 64bit mode.
			
 
				+ *
			
 
				+ * Lock order:
			
 
				+ *	contex.ldt_usr_sem
			
 
				+ *	  mmap_sem
			
 
				+ *	    context.lock
			
 
				  */
			
 
				 
			
 
				 #include <linux/errno.h>
			
@@ -42,7 +47,7 @@ static void refresh_ldt_segments(void)
 
				 #endif
			
 
				 }
			
 
				 
			
 
				-/* context.lock is held for us, so we don't need any locking. */
			
 
				+/* context.lock is held by the task which issued the smp function call */
			
 
				 static void flush_ldt(void *__mm)
			
 
				 {
			
 
				 	struct mm_struct *mm = __mm;
			
@@ -99,15 +104,17 @@ static void finalize_ldt_struct(struct ldt_struct *ldt)
 
				 	paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
			
 
				 }
			
 
				 
			
 
				-/* context.lock is held */
			
 
				-static void install_ldt(struct mm_struct *current_mm,
			
 
				-			struct ldt_struct *ldt)
			
 
				+static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
			
 
				 {
			
 
				+	mutex_lock(&mm->context.lock);
			
 
				+
			
 
				 	/* Synchronizes with READ_ONCE in load_mm_ldt. */
			
 
				-	smp_store_release(&current_mm->context.ldt, ldt);
			
 
				+	smp_store_release(&mm->context.ldt, ldt);
			
 
				 
			
 
				-	/* Activate the LDT for all CPUs using current_mm. */
			
 
				-	on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
			
 
				+	/* Activate the LDT for all CPUs using currents mm. */
			
 
				+	on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
			
 
				+
			
 
				+	mutex_unlock(&mm->context.lock);
			
 
				 }
			
 
				 
			
 
				 static void free_ldt_struct(struct ldt_struct *ldt)
			
@@ -124,27 +131,20 @@ static void free_ldt_struct(struct ldt_struct *ldt)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * we do not have to muck with descriptors here, that is
			
 
				- * done in switch_mm() as needed.
			
 
				+ * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
			
 
				+ * the new task is not running, so nothing can be installed.
			
 
				  */
			
 
				-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
			
 
				+int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
			
 
				 {
			
 
				 	struct ldt_struct *new_ldt;
			
 
				-	struct mm_struct *old_mm;
			
 
				 	int retval = 0;
			
 
				 
			
 
				-	mutex_init(&mm->context.lock);
			
 
				-	old_mm = current->mm;
			
 
				-	if (!old_mm) {
			
 
				-		mm->context.ldt = NULL;
			
 
				+	if (!old_mm)
			
 
				 		return 0;
			
 
				-	}
			
 
				 
			
 
				 	mutex_lock(&old_mm->context.lock);
			
 
				-	if (!old_mm->context.ldt) {
			
 
				-		mm->context.ldt = NULL;
			
 
				+	if (!old_mm->context.ldt)
			
 
				 		goto out_unlock;
			
 
				-	}
			
 
				 
			
 
				 	new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
			
 
				 	if (!new_ldt) {
			
@@ -180,7 +180,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
 
				 	unsigned long entries_size;
			
 
				 	int retval;
			
 
				 
			
 
				-	mutex_lock(&mm->context.lock);
			
 
				+	down_read(&mm->context.ldt_usr_sem);
			
 
				 
			
 
				 	if (!mm->context.ldt) {
			
 
				 		retval = 0;
			
@@ -209,7 +209,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
 
				 	retval = bytecount;
			
 
				 
			
 
				 out_unlock:
			
 
				-	mutex_unlock(&mm->context.lock);
			
 
				+	up_read(&mm->context.ldt_usr_sem);
			
 
				 	return retval;
			
 
				 }
			
 
				 
			
@@ -269,7 +269,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 
				 			ldt.avl = 0;
			
 
				 	}
			
 
				 
			
 
				-	mutex_lock(&mm->context.lock);
			
 
				+	if (down_write_killable(&mm->context.ldt_usr_sem))
			
 
				+		return -EINTR;
			
 
				 
			
 
				 	old_ldt       = mm->context.ldt;
			
 
				 	old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
			
@@ -291,7 +292,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 
				 	error = 0;
			
 
				 
			
 
				 out_unlock:
			
 
				-	mutex_unlock(&mm->context.lock);
			
 
				+	up_write(&mm->context.ldt_usr_sem);
			
 
				 out:
			
 
				 	return error;
			
 
				 }
			
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -932,12 +932,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
 
				 	initial_code = (unsigned long)start_secondary;
			
 
				 	initial_stack  = idle->thread.sp;
			
 
				 
			
 
				-	/*
			
 
				-	 * Enable the espfix hack for this CPU
			
 
				-	*/
			
 
				-#ifdef CONFIG_X86_ESPFIX64
			
 
				+	/* Enable the espfix hack for this CPU */
			
 
				 	init_espfix_ap(cpu);
			
 
				-#endif
			
 
				 
			
 
				 	/* So we see what's up */
			
 
				 	announce_cpu(cpu, apicid);
			
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -51,6 +51,7 @@
 
				 #include <asm/traps.h>
			
 
				 #include <asm/desc.h>
			
 
				 #include <asm/fpu/internal.h>
			
 
				+#include <asm/cpu_entry_area.h>
			
 
				 #include <asm/mce.h>
			
 
				 #include <asm/fixmap.h>
			
 
				 #include <asm/mach_traps.h>
			
@@ -951,8 +952,9 @@ void __init trap_init(void)
 
				 	 * "sidt" instruction will not leak the location of the kernel, and
			
 
				 	 * to defend the IDT against arbitrary memory write vulnerabilities.
			
 
				 	 * It will be reloaded in cpu_init() */
			
 
				-	__set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
			
 
				-	idt_descr.address = fix_to_virt(FIX_RO_IDT);
			
 
				+	cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
			
 
				+		    PAGE_KERNEL_RO);
			
 
				+	idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
			
 
				 
			
 
				 	/*
			
 
				 	 * Should be a barrier for any external CPU state:
			
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o	= -pg
 
				 endif
			
 
				 
			
 
				 obj-y	:=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
			
 
				-	    pat.o pgtable.o physaddr.o setup_nx.o tlb.o
			
 
				+	    pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
			
 
				 
			
 
				 # Make sure __phys_addr has no stackprotector
			
 
				 nostackp := $(call cc-option, -fno-stack-protector)
			
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -0,0 +1,139 @@
 
				+// SPDX-License-Identifier: GPL-2.0
			
 
				+
			
 
				+#include <linux/spinlock.h>
			
 
				+#include <linux/percpu.h>
			
 
				+
			
 
				+#include <asm/cpu_entry_area.h>
			
 
				+#include <asm/pgtable.h>
			
 
				+#include <asm/fixmap.h>
			
 
				+#include <asm/desc.h>
			
 
				+
			
 
				+static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
			
 
				+
			
 
				+#ifdef CONFIG_X86_64
			
 
				+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
			
 
				+	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
			
 
				+#endif
			
 
				+
			
 
				+struct cpu_entry_area *get_cpu_entry_area(int cpu)
			
 
				+{
			
 
				+	unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
			
 
				+	BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
			
 
				+
			
 
				+	return (struct cpu_entry_area *) va;
			
 
				+}
			
 
				+EXPORT_SYMBOL(get_cpu_entry_area);
			
 
				+
			
 
				+void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
			
 
				+{
			
 
				+	unsigned long va = (unsigned long) cea_vaddr;
			
 
				+
			
 
				+	set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
			
 
				+}
			
 
				+
			
 
				+static void __init
			
 
				+cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
			
 
				+{
			
 
				+	for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
			
 
				+		cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
			
 
				+}
			
 
				+
			
 
				+/* Setup the fixmap mappings only once per-processor */
			
 
				+static void __init setup_cpu_entry_area(int cpu)
			
 
				+{
			
 
				+#ifdef CONFIG_X86_64
			
 
				+	extern char _entry_trampoline[];
			
 
				+
			
 
				+	/* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
			
 
				+	pgprot_t gdt_prot = PAGE_KERNEL_RO;
			
 
				+	pgprot_t tss_prot = PAGE_KERNEL_RO;
			
 
				+#else
			
 
				+	/*
			
 
				+	 * On native 32-bit systems, the GDT cannot be read-only because
			
 
				+	 * our double fault handler uses a task gate, and entering through
			
 
				+	 * a task gate needs to change an available TSS to busy.  If the
			
 
				+	 * GDT is read-only, that will triple fault.  The TSS cannot be
			
 
				+	 * read-only because the CPU writes to it on task switches.
			
 
				+	 *
			
 
				+	 * On Xen PV, the GDT must be read-only because the hypervisor
			
 
				+	 * requires it.
			
 
				+	 */
			
 
				+	pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
			
 
				+		PAGE_KERNEL_RO : PAGE_KERNEL;
			
 
				+	pgprot_t tss_prot = PAGE_KERNEL;
			
 
				+#endif
			
 
				+
			
 
				+	cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
			
 
				+		    gdt_prot);
			
 
				+
			
 
				+	cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
			
 
				+			     per_cpu_ptr(&entry_stack_storage, cpu), 1,
			
 
				+			     PAGE_KERNEL);
			
 
				+
			
 
				+	/*
			
 
				+	 * The Intel SDM says (Volume 3, 7.2.1):
			
 
				+	 *
			
 
				+	 *  Avoid placing a page boundary in the part of the TSS that the
			
 
				+	 *  processor reads during a task switch (the first 104 bytes). The
			
 
				+	 *  processor may not correctly perform address translations if a
			
 
				+	 *  boundary occurs in this area. During a task switch, the processor
			
 
				+	 *  reads and writes into the first 104 bytes of each TSS (using
			
 
				+	 *  contiguous physical addresses beginning with the physical address
			
 
				+	 *  of the first byte of the TSS). So, after TSS access begins, if
			
 
				+	 *  part of the 104 bytes is not physically contiguous, the processor
			
 
				+	 *  will access incorrect information without generating a page-fault
			
 
				+	 *  exception.
			
 
				+	 *
			
 
				+	 * There are also a lot of errata involving the TSS spanning a page
			
 
				+	 * boundary.  Assert that we're not doing that.
			
 
				+	 */
			
 
				+	BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
			
 
				+		      offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
			
 
				+	BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
			
 
				+	cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
			
 
				+			     &per_cpu(cpu_tss_rw, cpu),
			
 
				+			     sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
			
 
				+
			
 
				+#ifdef CONFIG_X86_32
			
 
				+	per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
			
 
				+#endif
			
 
				+
			
 
				+#ifdef CONFIG_X86_64
			
 
				+	BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
			
 
				+	BUILD_BUG_ON(sizeof(exception_stacks) !=
			
 
				+		     sizeof(((struct cpu_entry_area *)0)->exception_stacks));
			
 
				+	cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
			
 
				+			     &per_cpu(exception_stacks, cpu),
			
 
				+			     sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
			
 
				+
			
 
				+	cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
			
 
				+		     __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+static __init void setup_cpu_entry_area_ptes(void)
			
 
				+{
			
 
				+#ifdef CONFIG_X86_32
			
 
				+	unsigned long start, end;
			
 
				+
			
 
				+	BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
			
 
				+	BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
			
 
				+
			
 
				+	start = CPU_ENTRY_AREA_BASE;
			
 
				+	end = start + CPU_ENTRY_AREA_MAP_SIZE;
			
 
				+
			
 
				+	/* Careful here: start + PMD_SIZE might wrap around */
			
 
				+	for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
			
 
				+		populate_extra_pte(start);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+void __init setup_cpu_entry_areas(void)
			
 
				+{
			
 
				+	unsigned int cpu;
			
 
				+
			
 
				+	setup_cpu_entry_area_ptes();
			
 
				+
			
 
				+	for_each_possible_cpu(cpu)
			
 
				+		setup_cpu_entry_area(cpu);
			
 
				+}
			
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -44,10 +44,12 @@ struct addr_marker {
 
				 	unsigned long max_lines;
			
 
				 };
			
 
				 
			
 
				-/* indices for address_markers; keep sync'd w/ address_markers below */
			
 
				+/* Address space markers hints */
			
 
				+
			
 
				+#ifdef CONFIG_X86_64
			
 
				+
			
 
				 enum address_markers_idx {
			
 
				 	USER_SPACE_NR = 0,
			
 
				-#ifdef CONFIG_X86_64
			
 
				 	KERNEL_SPACE_NR,
			
 
				 	LOW_KERNEL_NR,
			
 
				 	VMALLOC_START_NR,
			
@@ -56,56 +58,74 @@ enum address_markers_idx {
 
				 	KASAN_SHADOW_START_NR,
			
 
				 	KASAN_SHADOW_END_NR,
			
 
				 #endif
			
 
				-# ifdef CONFIG_X86_ESPFIX64
			
 
				+	CPU_ENTRY_AREA_NR,
			
 
				+#ifdef CONFIG_X86_ESPFIX64
			
 
				 	ESPFIX_START_NR,
			
 
				-# endif
			
 
				+#endif
			
 
				+#ifdef CONFIG_EFI
			
 
				+	EFI_END_NR,
			
 
				+#endif
			
 
				 	HIGH_KERNEL_NR,
			
 
				 	MODULES_VADDR_NR,
			
 
				 	MODULES_END_NR,
			
 
				-#else
			
 
				+	FIXADDR_START_NR,
			
 
				+	END_OF_SPACE_NR,
			
 
				+};
			
 
				+
			
 
				+static struct addr_marker address_markers[] = {
			
 
				+	[USER_SPACE_NR]		= { 0,			"User Space" },
			
 
				+	[KERNEL_SPACE_NR]	= { (1UL << 63),	"Kernel Space" },
			
 
				+	[LOW_KERNEL_NR]		= { 0UL,		"Low Kernel Mapping" },
			
 
				+	[VMALLOC_START_NR]	= { 0UL,		"vmalloc() Area" },
			
 
				+	[VMEMMAP_START_NR]	= { 0UL,		"Vmemmap" },
			
 
				+#ifdef CONFIG_KASAN
			
 
				+	[KASAN_SHADOW_START_NR]	= { KASAN_SHADOW_START,	"KASAN shadow" },
			
 
				+	[KASAN_SHADOW_END_NR]	= { KASAN_SHADOW_END,	"KASAN shadow end" },
			
 
				+#endif
			
 
				+	[CPU_ENTRY_AREA_NR]	= { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
			
 
				+#ifdef CONFIG_X86_ESPFIX64
			
 
				+	[ESPFIX_START_NR]	= { ESPFIX_BASE_ADDR,	"ESPfix Area", 16 },
			
 
				+#endif
			
 
				+#ifdef CONFIG_EFI
			
 
				+	[EFI_END_NR]		= { EFI_VA_END,		"EFI Runtime Services" },
			
 
				+#endif
			
 
				+	[HIGH_KERNEL_NR]	= { __START_KERNEL_map,	"High Kernel Mapping" },
			
 
				+	[MODULES_VADDR_NR]	= { MODULES_VADDR,	"Modules" },
			
 
				+	[MODULES_END_NR]	= { MODULES_END,	"End Modules" },
			
 
				+	[FIXADDR_START_NR]	= { FIXADDR_START,	"Fixmap Area" },
			
 
				+	[END_OF_SPACE_NR]	= { -1,			NULL }
			
 
				+};
			
 
				+
			
 
				+#else /* CONFIG_X86_64 */
			
 
				+
			
 
				+enum address_markers_idx {
			
 
				+	USER_SPACE_NR = 0,
			
 
				 	KERNEL_SPACE_NR,
			
 
				 	VMALLOC_START_NR,
			
 
				 	VMALLOC_END_NR,
			
 
				-# ifdef CONFIG_HIGHMEM
			
 
				+#ifdef CONFIG_HIGHMEM
			
 
				 	PKMAP_BASE_NR,
			
 
				-# endif
			
 
				-	FIXADDR_START_NR,
			
 
				 #endif
			
 
				+	CPU_ENTRY_AREA_NR,
			
 
				+	FIXADDR_START_NR,
			
 
				+	END_OF_SPACE_NR,
			
 
				 };
			
 
				 
			
 
				-/* Address space markers hints */
			
 
				 static struct addr_marker address_markers[] = {
			
 
				-	{ 0, "User Space" },
			
 
				-#ifdef CONFIG_X86_64
			
 
				-	{ 0x8000000000000000UL, "Kernel Space" },
			
 
				-	{ 0/* PAGE_OFFSET */,   "Low Kernel Mapping" },
			
 
				-	{ 0/* VMALLOC_START */, "vmalloc() Area" },
			
 
				-	{ 0/* VMEMMAP_START */, "Vmemmap" },
			
 
				-#ifdef CONFIG_KASAN
			
 
				-	{ KASAN_SHADOW_START,	"KASAN shadow" },
			
 
				-	{ KASAN_SHADOW_END,	"KASAN shadow end" },
			
 
				+	[USER_SPACE_NR]		= { 0,			"User Space" },
			
 
				+	[KERNEL_SPACE_NR]	= { PAGE_OFFSET,	"Kernel Mapping" },
			
 
				+	[VMALLOC_START_NR]	= { 0UL,		"vmalloc() Area" },
			
 
				+	[VMALLOC_END_NR]	= { 0UL,		"vmalloc() End" },
			
 
				+#ifdef CONFIG_HIGHMEM
			
 
				+	[PKMAP_BASE_NR]		= { 0UL,		"Persistent kmap() Area" },
			
 
				 #endif
			
 
				-# ifdef CONFIG_X86_ESPFIX64
			
 
				-	{ ESPFIX_BASE_ADDR,	"ESPfix Area", 16 },
			
 
				-# endif
			
 
				-# ifdef CONFIG_EFI
			
 
				-	{ EFI_VA_END,		"EFI Runtime Services" },
			
 
				-# endif
			
 
				-	{ __START_KERNEL_map,   "High Kernel Mapping" },
			
 
				-	{ MODULES_VADDR,        "Modules" },
			
 
				-	{ MODULES_END,          "End Modules" },
			
 
				-#else
			
 
				-	{ PAGE_OFFSET,          "Kernel Mapping" },
			
 
				-	{ 0/* VMALLOC_START */, "vmalloc() Area" },
			
 
				-	{ 0/*VMALLOC_END*/,     "vmalloc() End" },
			
 
				-# ifdef CONFIG_HIGHMEM
			
 
				-	{ 0/*PKMAP_BASE*/,      "Persistent kmap() Area" },
			
 
				-# endif
			
 
				-	{ 0/*FIXADDR_START*/,   "Fixmap Area" },
			
 
				-#endif
			
 
				-	{ -1, NULL }		/* End of list */
			
 
				+	[CPU_ENTRY_AREA_NR]	= { 0UL,		"CPU entry area" },
			
 
				+	[FIXADDR_START_NR]	= { 0UL,		"Fixmap area" },
			
 
				+	[END_OF_SPACE_NR]	= { -1,			NULL }
			
 
				 };
			
 
				 
			
 
				+#endif /* !CONFIG_X86_64 */
			
 
				+
			
 
				 /* Multipliers for offsets within the PTEs */
			
 
				 #define PTE_LEVEL_MULT (PAGE_SIZE)
			
 
				 #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
			
@@ -140,7 +160,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
 
				 	static const char * const level_name[] =
			
 
				 		{ "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
			
 
				 
			
 
				-	if (!pgprot_val(prot)) {
			
 
				+	if (!(pr & _PAGE_PRESENT)) {
			
 
				 		/* Not present */
			
 
				 		pt_dump_cont_printf(m, dmsg, "                              ");
			
 
				 	} else {
			
@@ -525,8 +545,8 @@ static int __init pt_dump_init(void)
 
				 	address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
			
 
				 # endif
			
 
				 	address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
			
 
				+	address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
			
 
				 #endif
			
 
				-
			
 
				 	return 0;
			
 
				 }
			
 
				 __initcall(pt_dump_init);
			
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -50,6 +50,7 @@
 
				 #include <asm/setup.h>
			
 
				 #include <asm/set_memory.h>
			
 
				 #include <asm/page_types.h>
			
 
				+#include <asm/cpu_entry_area.h>
			
 
				 #include <asm/init.h>
			
 
				 
			
 
				 #include "mm_internal.h"
			
@@ -766,6 +767,7 @@ void __init mem_init(void)
 
				 	mem_init_print_info(NULL);
			
 
				 	printk(KERN_INFO "virtual kernel memory layout:\n"
			
 
				 		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
			
 
				+		"  cpu_entry : 0x%08lx - 0x%08lx   (%4ld kB)\n"
			
 
				 #ifdef CONFIG_HIGHMEM
			
 
				 		"    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
			
 
				 #endif
			
@@ -777,6 +779,10 @@ void __init mem_init(void)
 
				 		FIXADDR_START, FIXADDR_TOP,
			
 
				 		(FIXADDR_TOP - FIXADDR_START) >> 10,
			
 
				 
			
 
				+		CPU_ENTRY_AREA_BASE,
			
 
				+		CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
			
 
				+		CPU_ENTRY_AREA_MAP_SIZE >> 10,
			
 
				+
			
 
				 #ifdef CONFIG_HIGHMEM
			
 
				 		PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
			
 
				 		(LAST_PKMAP*PAGE_SIZE) >> 10,
			
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -15,6 +15,7 @@
 
				 #include <asm/tlbflush.h>
			
 
				 #include <asm/sections.h>
			
 
				 #include <asm/pgtable.h>
			
 
				+#include <asm/cpu_entry_area.h>
			
 
				 
			
 
				 extern struct range pfn_mapped[E820_MAX_ENTRIES];
			
 
				 
			
@@ -322,31 +323,33 @@ void __init kasan_init(void)
 
				 		map_range(&pfn_mapped[i]);
			
 
				 	}
			
 
				 
			
 
				-	kasan_populate_zero_shadow(
			
 
				-		kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
			
 
				-		kasan_mem_to_shadow((void *)__START_KERNEL_map));
			
 
				-
			
 
				-	kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
			
 
				-			      (unsigned long)kasan_mem_to_shadow(_end),
			
 
				-			      early_pfn_to_nid(__pa(_stext)));
			
 
				-
			
 
				-	shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM);
			
 
				+	shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
			
 
				 	shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
			
 
				 	shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
			
 
				 						PAGE_SIZE);
			
 
				 
			
 
				-	shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE);
			
 
				+	shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
			
 
				+					CPU_ENTRY_AREA_MAP_SIZE);
			
 
				 	shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
			
 
				 	shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
			
 
				 					PAGE_SIZE);
			
 
				 
			
 
				-	kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
			
 
				-				   shadow_cpu_entry_begin);
			
 
				+	kasan_populate_zero_shadow(
			
 
				+		kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
			
 
				+		shadow_cpu_entry_begin);
			
 
				 
			
 
				 	kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
			
 
				 			      (unsigned long)shadow_cpu_entry_end, 0);
			
 
				 
			
 
				-	kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END);
			
 
				+	kasan_populate_zero_shadow(shadow_cpu_entry_end,
			
 
				+				kasan_mem_to_shadow((void *)__START_KERNEL_map));
			
 
				+
			
 
				+	kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
			
 
				+			      (unsigned long)kasan_mem_to_shadow(_end),
			
 
				+			      early_pfn_to_nid(__pa(_stext)));
			
 
				+
			
 
				+	kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
			
 
				+				(void *)KASAN_SHADOW_END);
			
 
				 
			
 
				 	load_cr3(init_top_pgt);
			
 
				 	__flush_tlb_all();
			
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -10,6 +10,7 @@
 
				 #include <linux/pagemap.h>
			
 
				 #include <linux/spinlock.h>
			
 
				 
			
 
				+#include <asm/cpu_entry_area.h>
			
 
				 #include <asm/pgtable.h>
			
 
				 #include <asm/pgalloc.h>
			
 
				 #include <asm/fixmap.h>
			
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -128,7 +128,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
				 	 * isn't free.
			
 
				 	 */
			
 
				 #ifdef CONFIG_DEBUG_VM
			
 
				-	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
			
 
				+	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
			
 
				 		/*
			
 
				 		 * If we were to BUG here, we'd be very likely to kill
			
 
				 		 * the system so hard that we don't see the call trace.
			
@@ -195,7 +195,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
				 		if (need_flush) {
			
 
				 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
			
 
				 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
			
 
				-			write_cr3(build_cr3(next, new_asid));
			
 
				+			write_cr3(build_cr3(next->pgd, new_asid));
			
 
				 
			
 
				 			/*
			
 
				 			 * NB: This gets called via leave_mm() in the idle path
			
@@ -208,7 +208,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
				 			trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
			
 
				 		} else {
			
 
				 			/* The new ASID is already up to date. */
			
 
				-			write_cr3(build_cr3_noflush(next, new_asid));
			
 
				+			write_cr3(build_cr3_noflush(next->pgd, new_asid));
			
 
				 
			
 
				 			/* See above wrt _rcuidle. */
			
 
				 			trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
			
@@ -288,7 +288,7 @@ void initialize_tlbstate_and_flush(void)
 
				 		!(cr4_read_shadow() & X86_CR4_PCIDE));
			
 
				 
			
 
				 	/* Force ASID 0 and force a TLB flush. */
			
 
				-	write_cr3(build_cr3(mm, 0));
			
 
				+	write_cr3(build_cr3(mm->pgd, 0));
			
 
				 
			
 
				 	/* Reinitialize tlbstate. */
			
 
				 	this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
			
@@ -551,7 +551,7 @@ static void do_kernel_range_flush(void *info)
 
				 
			
 
				 	/* flush range by one by one 'invlpg' */
			
 
				 	for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
			
 
				-		__flush_tlb_single(addr);
			
 
				+		__flush_tlb_one(addr);
			
 
				 }
			
 
				 
			
 
				 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
			
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
 
				 		local_flush_tlb();
			
 
				 		stat->d_alltlb++;
			
 
				 	} else {
			
 
				-		__flush_tlb_one(msg->address);
			
 
				+		__flush_tlb_single(msg->address);
			
 
				 		stat->d_onetlb++;
			
 
				 	}
			
 
				 	stat->d_requestee++;
			
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2273,7 +2273,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 
				 
			
 
				 	switch (idx) {
			
 
				 	case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
			
 
				-	case FIX_RO_IDT:
			
 
				 #ifdef CONFIG_X86_32
			
 
				 	case FIX_WP_TEST:
			
 
				 # ifdef CONFIG_HIGHMEM
			
@@ -2284,7 +2283,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 
				 #endif
			
 
				 	case FIX_TEXT_POKE0:
			
 
				 	case FIX_TEXT_POKE1:
			
 
				-	case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM:
			
 
				 		/* All local page mappings */
			
 
				 		pte = pfn_pte(phys, prot);
			
 
				 		break;
			
--- a/include/asm-generic/mm_hooks.h
+++ b/include/asm-generic/mm_hooks.h
@@ -7,9 +7,10 @@
 
				 #ifndef _ASM_GENERIC_MM_HOOKS_H
			
 
				 #define _ASM_GENERIC_MM_HOOKS_H
			
 
				 
			
 
				-static inline void arch_dup_mmap(struct mm_struct *oldmm,
			
 
				-				 struct mm_struct *mm)
			
 
				+static inline int arch_dup_mmap(struct mm_struct *oldmm,
			
 
				+				struct mm_struct *mm)
			
 
				 {
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 static inline void arch_exit_mmap(struct mm_struct *mm)
			
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1025,6 +1025,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
 
				 struct file;
			
 
				 int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
			
 
				 			unsigned long size, pgprot_t *vma_prot);
			
 
				+
			
 
				+#ifndef CONFIG_X86_ESPFIX64
			
 
				+static inline void init_espfix_bsp(void) { }
			
 
				+#endif
			
 
				+
			
 
				 #endif /* !__ASSEMBLY__ */
			
 
				 
			
 
				 #ifndef io_remap_pfn_range
			
--- a/init/main.c
+++ b/init/main.c
@@ -504,6 +504,8 @@ static void __init mm_init(void)
 
				 	pgtable_init();
			
 
				 	vmalloc_init();
			
 
				 	ioremap_huge_init();
			
 
				+	/* Should be run before the first non-init thread is created */
			
 
				+	init_espfix_bsp();
			
 
				 }
			
 
				 
			
 
				 asmlinkage __visible void __init start_kernel(void)
			
@@ -678,10 +680,6 @@ asmlinkage __visible void __init start_kernel(void)
 
				 #ifdef CONFIG_X86
			
 
				 	if (efi_enabled(EFI_RUNTIME_SERVICES))
			
 
				 		efi_enter_virtual_mode();
			
 
				-#endif
			
 
				-#ifdef CONFIG_X86_ESPFIX64
			
 
				-	/* Should be run before the first non-init thread is created */
			
 
				-	init_espfix_bsp();
			
 
				 #endif
			
 
				 	thread_stack_cache_init();
			
 
				 	cred_init();
			
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -721,8 +721,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 
				 			goto out;
			
 
				 	}
			
 
				 	/* a new mm has just been created */
			
 
				-	arch_dup_mmap(oldmm, mm);
			
 
				-	retval = 0;
			
 
				+	retval = arch_dup_mmap(oldmm, mm);
			
 
				 out:
			
 
				 	up_write(&mm->mmap_sem);
			
 
				 	flush_tlb_mm(oldmm);
			
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -627,13 +627,10 @@ static void do_multicpu_tests(void)
 
				 static int finish_exec_test(void)
			
 
				 {
			
 
				 	/*
			
 
				-	 * In a sensible world, this would be check_invalid_segment(0, 1);
			
 
				-	 * For better or for worse, though, the LDT is inherited across exec.
			
 
				-	 * We can probably change this safely, but for now we test it.
			
 
				+	 * Older kernel versions did inherit the LDT on exec() which is
			
 
				+	 * wrong because exec() starts from a clean state.
			
 
				 	 */
			
 
				-	check_valid_segment(0, 1,
			
 
				-			    AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB,
			
 
				-			    42, true);
			
 
				+	check_invalid_segment(0, 1);
			
 
				 
			
 
				 	return nerrs ? 1 : 0;
			
 
				 }