7 years ago · 35277995e1
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2758,8 +2758,6 @@
 
				 	norandmaps	Don't use address space randomization.  Equivalent to
			
 
				 			echo 0 > /proc/sys/kernel/randomize_va_space
			
 
				 
			
 
				-	noreplace-paravirt	[X86,IA-64,PV_OPS] Don't patch paravirt_ops
			
 
				-
			
 
				 	noreplace-smp	[X86-32,SMP] Don't replace SMP instructions
			
 
				 			with UP alternatives
			
 
				 
			
--- a/Documentation/speculation.txt
+++ b/Documentation/speculation.txt
@@ -0,0 +1,90 @@
 
				+This document explains potential effects of speculation, and how undesirable
			
 
				+effects can be mitigated portably using common APIs.
			
 
				+
			
 
				+===========
			
 
				+Speculation
			
 
				+===========
			
 
				+
			
 
				+To improve performance and minimize average latencies, many contemporary CPUs
			
 
				+employ speculative execution techniques such as branch prediction, performing
			
 
				+work which may be discarded at a later stage.
			
 
				+
			
 
				+Typically speculative execution cannot be observed from architectural state,
			
 
				+such as the contents of registers. However, in some cases it is possible to
			
 
				+observe its impact on microarchitectural state, such as the presence or
			
 
				+absence of data in caches. Such state may form side-channels which can be
			
 
				+observed to extract secret information.
			
 
				+
			
 
				+For example, in the presence of branch prediction, it is possible for bounds
			
 
				+checks to be ignored by code which is speculatively executed. Consider the
			
 
				+following code:
			
 
				+
			
 
				+	int load_array(int *array, unsigned int index)
			
 
				+	{
			
 
				+		if (index >= MAX_ARRAY_ELEMS)
			
 
				+			return 0;
			
 
				+		else
			
 
				+			return array[index];
			
 
				+	}
			
 
				+
			
 
				+Which, on arm64, may be compiled to an assembly sequence such as:
			
 
				+
			
 
				+	CMP	<index>, #MAX_ARRAY_ELEMS
			
 
				+	B.LT	less
			
 
				+	MOV	<returnval>, #0
			
 
				+	RET
			
 
				+  less:
			
 
				+	LDR	<returnval>, [<array>, <index>]
			
 
				+	RET
			
 
				+
			
 
				+It is possible that a CPU mis-predicts the conditional branch, and
			
 
				+speculatively loads array[index], even if index >= MAX_ARRAY_ELEMS. This
			
 
				+value will subsequently be discarded, but the speculated load may affect
			
 
				+microarchitectural state which can be subsequently measured.
			
 
				+
			
 
				+More complex sequences involving multiple dependent memory accesses may
			
 
				+result in sensitive information being leaked. Consider the following
			
 
				+code, building on the prior example:
			
 
				+
			
 
				+	int load_dependent_arrays(int *arr1, int *arr2, int index)
			
 
				+	{
			
 
				+		int val1, val2,
			
 
				+
			
 
				+		val1 = load_array(arr1, index);
			
 
				+		val2 = load_array(arr2, val1);
			
 
				+
			
 
				+		return val2;
			
 
				+	}
			
 
				+
			
 
				+Under speculation, the first call to load_array() may return the value
			
 
				+of an out-of-bounds address, while the second call will influence
			
 
				+microarchitectural state dependent on this value. This may provide an
			
 
				+arbitrary read primitive.
			
 
				+
			
 
				+====================================
			
 
				+Mitigating speculation side-channels
			
 
				+====================================
			
 
				+
			
 
				+The kernel provides a generic API to ensure that bounds checks are
			
 
				+respected even under speculation. Architectures which are affected by
			
 
				+speculation-based side-channels are expected to implement these
			
 
				+primitives.
			
 
				+
			
 
				+The array_index_nospec() helper in <linux/nospec.h> can be used to
			
 
				+prevent information from being leaked via side-channels.
			
 
				+
			
 
				+A call to array_index_nospec(index, size) returns a sanitized index
			
 
				+value that is bounded to [0, size) even under cpu speculation
			
 
				+conditions.
			
 
				+
			
 
				+This can be used to protect the earlier load_array() example:
			
 
				+
			
 
				+	int load_array(int *array, unsigned int index)
			
 
				+	{
			
 
				+		if (index >= MAX_ARRAY_ELEMS)
			
 
				+			return 0;
			
 
				+		else {
			
 
				+			index = array_index_nospec(index, MAX_ARRAY_ELEMS);
			
 
				+			return array[index];
			
 
				+		}
			
 
				+	}
			
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -21,6 +21,7 @@
 
				 #include <linux/export.h>
			
 
				 #include <linux/context_tracking.h>
			
 
				 #include <linux/user-return-notifier.h>
			
 
				+#include <linux/nospec.h>
			
 
				 #include <linux/uprobes.h>
			
 
				 #include <linux/livepatch.h>
			
 
				 #include <linux/syscalls.h>
			
@@ -206,7 +207,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 
				 	 * special case only applies after poking regs and before the
			
 
				 	 * very next return to user mode.
			
 
				 	 */
			
 
				-	current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
			
 
				+	ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
			
 
				 #endif
			
 
				 
			
 
				 	user_enter_irqoff();
			
@@ -282,7 +283,8 @@ __visible void do_syscall_64(struct pt_regs *regs)
 
				 	 * regs->orig_ax, which changes the behavior of some syscalls.
			
 
				 	 */
			
 
				 	if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
			
 
				-		regs->ax = sys_call_table[nr & __SYSCALL_MASK](
			
 
				+		nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls);
			
 
				+		regs->ax = sys_call_table[nr](
			
 
				 			regs->di, regs->si, regs->dx,
			
 
				 			regs->r10, regs->r8, regs->r9);
			
 
				 	}
			
@@ -304,7 +306,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
 
				 	unsigned int nr = (unsigned int)regs->orig_ax;
			
 
				 
			
 
				 #ifdef CONFIG_IA32_EMULATION
			
 
				-	current->thread.status |= TS_COMPAT;
			
 
				+	ti->status |= TS_COMPAT;
			
 
				 #endif
			
 
				 
			
 
				 	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
			
@@ -318,6 +320,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
 
				 	}
			
 
				 
			
 
				 	if (likely(nr < IA32_NR_syscalls)) {
			
 
				+		nr = array_index_nospec(nr, IA32_NR_syscalls);
			
 
				 		/*
			
 
				 		 * It's possible that a 32-bit syscall implementation
			
 
				 		 * takes a 64-bit parameter but nonetheless assumes that
			
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -236,91 +236,20 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
 
				 	pushq	%r9				/* pt_regs->r9 */
			
 
				 	pushq	%r10				/* pt_regs->r10 */
			
 
				 	pushq	%r11				/* pt_regs->r11 */
			
 
				-	sub	$(6*8), %rsp			/* pt_regs->bp, bx, r12-15 not saved */
			
 
				-	UNWIND_HINT_REGS extra=0
			
 
				-
			
 
				-	TRACE_IRQS_OFF
			
 
				-
			
 
				-	/*
			
 
				-	 * If we need to do entry work or if we guess we'll need to do
			
 
				-	 * exit work, go straight to the slow path.
			
 
				-	 */
			
 
				-	movq	PER_CPU_VAR(current_task), %r11
			
 
				-	testl	$_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
			
 
				-	jnz	entry_SYSCALL64_slow_path
			
 
				-
			
 
				-entry_SYSCALL_64_fastpath:
			
 
				-	/*
			
 
				-	 * Easy case: enable interrupts and issue the syscall.  If the syscall
			
 
				-	 * needs pt_regs, we'll call a stub that disables interrupts again
			
 
				-	 * and jumps to the slow path.
			
 
				-	 */
			
 
				-	TRACE_IRQS_ON
			
 
				-	ENABLE_INTERRUPTS(CLBR_NONE)
			
 
				-#if __SYSCALL_MASK == ~0
			
 
				-	cmpq	$__NR_syscall_max, %rax
			
 
				-#else
			
 
				-	andl	$__SYSCALL_MASK, %eax
			
 
				-	cmpl	$__NR_syscall_max, %eax
			
 
				-#endif
			
 
				-	ja	1f				/* return -ENOSYS (already in pt_regs->ax) */
			
 
				-	movq	%r10, %rcx
			
 
				-
			
 
				-	/*
			
 
				-	 * This call instruction is handled specially in stub_ptregs_64.
			
 
				-	 * It might end up jumping to the slow path.  If it jumps, RAX
			
 
				-	 * and all argument registers are clobbered.
			
 
				-	 */
			
 
				-#ifdef CONFIG_RETPOLINE
			
 
				-	movq	sys_call_table(, %rax, 8), %rax
			
 
				-	call	__x86_indirect_thunk_rax
			
 
				-#else
			
 
				-	call	*sys_call_table(, %rax, 8)
			
 
				-#endif
			
 
				-.Lentry_SYSCALL_64_after_fastpath_call:
			
 
				-
			
 
				-	movq	%rax, RAX(%rsp)
			
 
				-1:
			
 
				+	pushq	%rbx				/* pt_regs->rbx */
			
 
				+	pushq	%rbp				/* pt_regs->rbp */
			
 
				+	pushq	%r12				/* pt_regs->r12 */
			
 
				+	pushq	%r13				/* pt_regs->r13 */
			
 
				+	pushq	%r14				/* pt_regs->r14 */
			
 
				+	pushq	%r15				/* pt_regs->r15 */
			
 
				+	UNWIND_HINT_REGS
			
 
				 
			
 
				-	/*
			
 
				-	 * If we get here, then we know that pt_regs is clean for SYSRET64.
			
 
				-	 * If we see that no exit work is required (which we are required
			
 
				-	 * to check with IRQs off), then we can go straight to SYSRET64.
			
 
				-	 */
			
 
				-	DISABLE_INTERRUPTS(CLBR_ANY)
			
 
				 	TRACE_IRQS_OFF
			
 
				-	movq	PER_CPU_VAR(current_task), %r11
			
 
				-	testl	$_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
			
 
				-	jnz	1f
			
 
				-
			
 
				-	LOCKDEP_SYS_EXIT
			
 
				-	TRACE_IRQS_ON		/* user mode is traced as IRQs on */
			
 
				-	movq	RIP(%rsp), %rcx
			
 
				-	movq	EFLAGS(%rsp), %r11
			
 
				-	addq	$6*8, %rsp	/* skip extra regs -- they were preserved */
			
 
				-	UNWIND_HINT_EMPTY
			
 
				-	jmp	.Lpop_c_regs_except_rcx_r11_and_sysret
			
 
				 
			
 
				-1:
			
 
				-	/*
			
 
				-	 * The fast path looked good when we started, but something changed
			
 
				-	 * along the way and we need to switch to the slow path.  Calling
			
 
				-	 * raise(3) will trigger this, for example.  IRQs are off.
			
 
				-	 */
			
 
				-	TRACE_IRQS_ON
			
 
				-	ENABLE_INTERRUPTS(CLBR_ANY)
			
 
				-	SAVE_EXTRA_REGS
			
 
				-	movq	%rsp, %rdi
			
 
				-	call	syscall_return_slowpath	/* returns with IRQs disabled */
			
 
				-	jmp	return_from_SYSCALL_64
			
 
				-
			
 
				-entry_SYSCALL64_slow_path:
			
 
				 	/* IRQs are off. */
			
 
				-	SAVE_EXTRA_REGS
			
 
				 	movq	%rsp, %rdi
			
 
				 	call	do_syscall_64		/* returns with IRQs disabled */
			
 
				 
			
 
				-return_from_SYSCALL_64:
			
 
				 	TRACE_IRQS_IRETQ		/* we're about to change IF */
			
 
				 
			
 
				 	/*
			
@@ -393,7 +322,6 @@ syscall_return_via_sysret:
 
				 	/* rcx and r11 are already restored (see code above) */
			
 
				 	UNWIND_HINT_EMPTY
			
 
				 	POP_EXTRA_REGS
			
 
				-.Lpop_c_regs_except_rcx_r11_and_sysret:
			
 
				 	popq	%rsi	/* skip r11 */
			
 
				 	popq	%r10
			
 
				 	popq	%r9
			
@@ -424,47 +352,6 @@ syscall_return_via_sysret:
 
				 	USERGS_SYSRET64
			
 
				 END(entry_SYSCALL_64)
			
 
				 
			
 
				-ENTRY(stub_ptregs_64)
			
 
				-	/*
			
 
				-	 * Syscalls marked as needing ptregs land here.
			
 
				-	 * If we are on the fast path, we need to save the extra regs,
			
 
				-	 * which we achieve by trying again on the slow path.  If we are on
			
 
				-	 * the slow path, the extra regs are already saved.
			
 
				-	 *
			
 
				-	 * RAX stores a pointer to the C function implementing the syscall.
			
 
				-	 * IRQs are on.
			
 
				-	 */
			
 
				-	cmpq	$.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
			
 
				-	jne	1f
			
 
				-
			
 
				-	/*
			
 
				-	 * Called from fast path -- disable IRQs again, pop return address
			
 
				-	 * and jump to slow path
			
 
				-	 */
			
 
				-	DISABLE_INTERRUPTS(CLBR_ANY)
			
 
				-	TRACE_IRQS_OFF
			
 
				-	popq	%rax
			
 
				-	UNWIND_HINT_REGS extra=0
			
 
				-	jmp	entry_SYSCALL64_slow_path
			
 
				-
			
 
				-1:
			
 
				-	JMP_NOSPEC %rax				/* Called from C */
			
 
				-END(stub_ptregs_64)
			
 
				-
			
 
				-.macro ptregs_stub func
			
 
				-ENTRY(ptregs_\func)
			
 
				-	UNWIND_HINT_FUNC
			
 
				-	leaq	\func(%rip), %rax
			
 
				-	jmp	stub_ptregs_64
			
 
				-END(ptregs_\func)
			
 
				-.endm
			
 
				-
			
 
				-/* Instantiate ptregs_stub for each ptregs-using syscall */
			
 
				-#define __SYSCALL_64_QUAL_(sym)
			
 
				-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
			
 
				-#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
			
 
				-#include <asm/syscalls_64.h>
			
 
				-
			
 
				 /*
			
 
				  * %rdi: prev task
			
 
				  * %rsi: next task
			
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -7,14 +7,11 @@
 
				 #include <asm/asm-offsets.h>
			
 
				 #include <asm/syscall.h>
			
 
				 
			
 
				-#define __SYSCALL_64_QUAL_(sym) sym
			
 
				-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
			
 
				-
			
 
				-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
			
 
				+#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
			
 
				 #include <asm/syscalls_64.h>
			
 
				 #undef __SYSCALL_64
			
 
				 
			
 
				-#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
			
 
				+#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
			
 
				 
			
 
				 extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
			
 
				 
			
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -24,6 +24,34 @@
 
				 #define wmb()	asm volatile("sfence" ::: "memory")
			
 
				 #endif
			
 
				 
			
 
				+/**
			
 
				+ * array_index_mask_nospec() - generate a mask that is ~0UL when the
			
 
				+ * 	bounds check succeeds and 0 otherwise
			
 
				+ * @index: array element index
			
 
				+ * @size: number of elements in array
			
 
				+ *
			
 
				+ * Returns:
			
 
				+ *     0 - (index < size)
			
 
				+ */
			
 
				+static inline unsigned long array_index_mask_nospec(unsigned long index,
			
 
				+		unsigned long size)
			
 
				+{
			
 
				+	unsigned long mask;
			
 
				+
			
 
				+	asm ("cmp %1,%2; sbb %0,%0;"
			
 
				+			:"=r" (mask)
			
 
				+			:"r"(size),"r" (index)
			
 
				+			:"cc");
			
 
				+	return mask;
			
 
				+}
			
 
				+
			
 
				+/* Override the default implementation from linux/nospec.h. */
			
 
				+#define array_index_mask_nospec array_index_mask_nospec
			
 
				+
			
 
				+/* Prevent speculative execution past this barrier. */
			
 
				+#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
			
 
				+					   "lfence", X86_FEATURE_LFENCE_RDTSC)
			
 
				+
			
 
				 #ifdef CONFIG_X86_PPRO_FENCE
			
 
				 #define dma_rmb()	rmb()
			
 
				 #else
			
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -137,8 +137,10 @@ enum fixed_addresses {
 
				 
			
 
				 extern void reserve_top_address(unsigned long reserve);
			
 
				 
			
 
				-#define FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
			
 
				-#define FIXADDR_START	(FIXADDR_TOP - FIXADDR_SIZE)
			
 
				+#define FIXADDR_SIZE		(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
			
 
				+#define FIXADDR_START		(FIXADDR_TOP - FIXADDR_SIZE)
			
 
				+#define FIXADDR_TOT_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
			
 
				+#define FIXADDR_TOT_START	(FIXADDR_TOP - FIXADDR_TOT_SIZE)
			
 
				 
			
 
				 extern int fixmaps_set;
			
 
				 
			
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -214,8 +214,7 @@ static __always_inline unsigned long long rdtsc_ordered(void)
 
				 	 * that some other imaginary CPU is updating continuously with a
			
 
				 	 * time stamp.
			
 
				 	 */
			
 
				-	alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
			
 
				-			  "lfence", X86_FEATURE_LFENCE_RDTSC);
			
 
				+	barrier_nospec();
			
 
				 	return rdtsc();
			
 
				 }
			
 
				 
			
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -150,7 +150,7 @@ extern char __indirect_thunk_end[];
 
				  * On VMEXIT we must ensure that no RSB predictions learned in the guest
			
 
				  * can be followed in the host, by overwriting the RSB completely. Both
			
 
				  * retpoline and IBRS mitigations for Spectre v2 need this; only on future
			
 
				- * CPUs with IBRS_ATT *might* it be avoided.
			
 
				+ * CPUs with IBRS_ALL *might* it be avoided.
			
 
				  */
			
 
				 static inline void vmexit_fill_RSB(void)
			
 
				 {
			
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -44,8 +44,9 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
 
				  */
			
 
				 #define CPU_ENTRY_AREA_PAGES	(NR_CPUS * 40)
			
 
				 
			
 
				-#define CPU_ENTRY_AREA_BASE				\
			
 
				-	((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
			
 
				+#define CPU_ENTRY_AREA_BASE						\
			
 
				+	((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1))   \
			
 
				+	 & PMD_MASK)
			
 
				 
			
 
				 #define PKMAP_BASE		\
			
 
				 	((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
			
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -460,8 +460,6 @@ struct thread_struct {
 
				 	unsigned short		gsindex;
			
 
				 #endif
			
 
				 
			
 
				-	u32			status;		/* thread synchronous flags */
			
 
				-
			
 
				 #ifdef CONFIG_X86_64
			
 
				 	unsigned long		fsbase;
			
 
				 	unsigned long		gsbase;
			
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task,
 
				 	 * TS_COMPAT is set for 32-bit syscall entries and then
			
 
				 	 * remains set until we return to user mode.
			
 
				 	 */
			
 
				-	if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
			
 
				+	if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED))
			
 
				 		/*
			
 
				 		 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
			
 
				 		 * and will match correctly in comparisons.
			
@@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
				 					 unsigned long *args)
			
 
				 {
			
 
				 # ifdef CONFIG_IA32_EMULATION
			
 
				-	if (task->thread.status & TS_COMPAT)
			
 
				+	if (task->thread_info.status & TS_COMPAT)
			
 
				 		switch (i) {
			
 
				 		case 0:
			
 
				 			if (!n--) break;
			
@@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
 
				 					 const unsigned long *args)
			
 
				 {
			
 
				 # ifdef CONFIG_IA32_EMULATION
			
 
				-	if (task->thread.status & TS_COMPAT)
			
 
				+	if (task->thread_info.status & TS_COMPAT)
			
 
				 		switch (i) {
			
 
				 		case 0:
			
 
				 			if (!n--) break;
			
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -55,6 +55,7 @@ struct task_struct;
 
				 
			
 
				 struct thread_info {
			
 
				 	unsigned long		flags;		/* low level flags */
			
 
				+	u32			status;		/* thread synchronous flags */
			
 
				 };
			
 
				 
			
 
				 #define INIT_THREAD_INFO(tsk)			\
			
@@ -219,7 +220,7 @@ static inline int arch_within_stack_frames(const void * const stack,
 
				 #define in_ia32_syscall() true
			
 
				 #else
			
 
				 #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
			
 
				-			   current->thread.status & TS_COMPAT)
			
 
				+			   current_thread_info()->status & TS_COMPAT)
			
 
				 #endif
			
 
				 
			
 
				 /*
			
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -174,6 +174,8 @@ struct tlb_state {
 
				 	struct mm_struct *loaded_mm;
			
 
				 	u16 loaded_mm_asid;
			
 
				 	u16 next_asid;
			
 
				+	/* last user mm's ctx id */
			
 
				+	u64 last_ctx_id;
			
 
				 
			
 
				 	/*
			
 
				 	 * We can be in one of several states:
			
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -124,6 +124,11 @@ extern int __get_user_bad(void);
 
				 
			
 
				 #define __uaccess_begin() stac()
			
 
				 #define __uaccess_end()   clac()
			
 
				+#define __uaccess_begin_nospec()	\
			
 
				+({					\
			
 
				+	stac();				\
			
 
				+	barrier_nospec();		\
			
 
				+})
			
 
				 
			
 
				 /*
			
 
				  * This is a type: either unsigned long, if the argument fits into
			
@@ -445,7 +450,7 @@ do {									\
 
				 ({									\
			
 
				 	int __gu_err;							\
			
 
				 	__inttype(*(ptr)) __gu_val;					\
			
 
				-	__uaccess_begin();						\
			
 
				+	__uaccess_begin_nospec();					\
			
 
				 	__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT);	\
			
 
				 	__uaccess_end();						\
			
 
				 	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
			
@@ -487,6 +492,10 @@ struct __large_struct { unsigned long buf[100]; };
 
				 	__uaccess_begin();						\
			
 
				 	barrier();
			
 
				 
			
 
				+#define uaccess_try_nospec do {						\
			
 
				+	current->thread.uaccess_err = 0;				\
			
 
				+	__uaccess_begin_nospec();					\
			
 
				+
			
 
				 #define uaccess_catch(err)						\
			
 
				 	__uaccess_end();						\
			
 
				 	(err) |= (current->thread.uaccess_err ? -EFAULT : 0);		\
			
@@ -548,7 +557,7 @@ struct __large_struct { unsigned long buf[100]; };
 
				  *	get_user_ex(...);
			
 
				  * } get_user_catch(err)
			
 
				  */
			
 
				-#define get_user_try		uaccess_try
			
 
				+#define get_user_try		uaccess_try_nospec
			
 
				 #define get_user_catch(err)	uaccess_catch(err)
			
 
				 
			
 
				 #define get_user_ex(x, ptr)	do {					\
			
@@ -582,7 +591,7 @@ extern void __cmpxchg_wrong_size(void)
 
				 	__typeof__(ptr) __uval = (uval);				\
			
 
				 	__typeof__(*(ptr)) __old = (old);				\
			
 
				 	__typeof__(*(ptr)) __new = (new);				\
			
 
				-	__uaccess_begin();						\
			
 
				+	__uaccess_begin_nospec();					\
			
 
				 	switch (size) {							\
			
 
				 	case 1:								\
			
 
				 	{								\
			
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -29,21 +29,21 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n)
 
				 		switch (n) {
			
 
				 		case 1:
			
 
				 			ret = 0;
			
 
				-			__uaccess_begin();
			
 
				+			__uaccess_begin_nospec();
			
 
				 			__get_user_asm_nozero(*(u8 *)to, from, ret,
			
 
				 					      "b", "b", "=q", 1);
			
 
				 			__uaccess_end();
			
 
				 			return ret;
			
 
				 		case 2:
			
 
				 			ret = 0;
			
 
				-			__uaccess_begin();
			
 
				+			__uaccess_begin_nospec();
			
 
				 			__get_user_asm_nozero(*(u16 *)to, from, ret,
			
 
				 					      "w", "w", "=r", 2);
			
 
				 			__uaccess_end();
			
 
				 			return ret;
			
 
				 		case 4:
			
 
				 			ret = 0;
			
 
				-			__uaccess_begin();
			
 
				+			__uaccess_begin_nospec();
			
 
				 			__get_user_asm_nozero(*(u32 *)to, from, ret,
			
 
				 					      "l", "k", "=r", 4);
			
 
				 			__uaccess_end();
			
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -55,31 +55,31 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
 
				 		return copy_user_generic(dst, (__force void *)src, size);
			
 
				 	switch (size) {
			
 
				 	case 1:
			
 
				-		__uaccess_begin();
			
 
				+		__uaccess_begin_nospec();
			
 
				 		__get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src,
			
 
				 			      ret, "b", "b", "=q", 1);
			
 
				 		__uaccess_end();
			
 
				 		return ret;
			
 
				 	case 2:
			
 
				-		__uaccess_begin();
			
 
				+		__uaccess_begin_nospec();
			
 
				 		__get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src,
			
 
				 			      ret, "w", "w", "=r", 2);
			
 
				 		__uaccess_end();
			
 
				 		return ret;
			
 
				 	case 4:
			
 
				-		__uaccess_begin();
			
 
				+		__uaccess_begin_nospec();
			
 
				 		__get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src,
			
 
				 			      ret, "l", "k", "=r", 4);
			
 
				 		__uaccess_end();
			
 
				 		return ret;
			
 
				 	case 8:
			
 
				-		__uaccess_begin();
			
 
				+		__uaccess_begin_nospec();
			
 
				 		__get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
			
 
				 			      ret, "q", "", "=r", 8);
			
 
				 		__uaccess_end();
			
 
				 		return ret;
			
 
				 	case 10:
			
 
				-		__uaccess_begin();
			
 
				+		__uaccess_begin_nospec();
			
 
				 		__get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
			
 
				 			       ret, "q", "", "=r", 10);
			
 
				 		if (likely(!ret))
			
@@ -89,7 +89,7 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
 
				 		__uaccess_end();
			
 
				 		return ret;
			
 
				 	case 16:
			
 
				-		__uaccess_begin();
			
 
				+		__uaccess_begin_nospec();
			
 
				 		__get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
			
 
				 			       ret, "q", "", "=r", 16);
			
 
				 		if (likely(!ret))
			
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(char *str)
 
				 }
			
 
				 __setup("noreplace-smp", setup_noreplace_smp);
			
 
				 
			
 
				-#ifdef CONFIG_PARAVIRT
			
 
				-static int __initdata_or_module noreplace_paravirt = 0;
			
 
				-
			
 
				-static int __init setup_noreplace_paravirt(char *str)
			
 
				-{
			
 
				-	noreplace_paravirt = 1;
			
 
				-	return 1;
			
 
				-}
			
 
				-__setup("noreplace-paravirt", setup_noreplace_paravirt);
			
 
				-#endif
			
 
				-
			
 
				 #define DPRINTK(fmt, args...)						\
			
 
				 do {									\
			
 
				 	if (debug_alternative)						\
			
@@ -599,9 +588,6 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
 
				 	struct paravirt_patch_site *p;
			
 
				 	char insnbuf[MAX_PATCH_LEN];
			
 
				 
			
 
				-	if (noreplace_paravirt)
			
 
				-		return;
			
 
				-
			
 
				 	for (p = start; p < end; p++) {
			
 
				 		unsigned int used;
			
 
				 
			
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -103,7 +103,7 @@ bool retpoline_module_ok(bool has_retpoline)
 
				 	if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline)
			
 
				 		return true;
			
 
				 
			
 
				-	pr_err("System may be vunerable to spectre v2\n");
			
 
				+	pr_err("System may be vulnerable to spectre v2\n");
			
 
				 	spectre_v2_bad_module = true;
			
 
				 	return false;
			
 
				 }
			
@@ -119,13 +119,13 @@ static inline const char *spectre_v2_module_string(void) { return ""; }
 
				 static void __init spec2_print_if_insecure(const char *reason)
			
 
				 {
			
 
				 	if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
			
 
				-		pr_info("%s\n", reason);
			
 
				+		pr_info("%s selected on command line.\n", reason);
			
 
				 }
			
 
				 
			
 
				 static void __init spec2_print_if_secure(const char *reason)
			
 
				 {
			
 
				 	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
			
 
				-		pr_info("%s\n", reason);
			
 
				+		pr_info("%s selected on command line.\n", reason);
			
 
				 }
			
 
				 
			
 
				 static inline bool retp_compiler(void)
			
@@ -140,42 +140,68 @@ static inline bool match_option(const char *arg, int arglen, const char *opt)
 
				 	return len == arglen && !strncmp(arg, opt, len);
			
 
				 }
			
 
				 
			
 
				+static const struct {
			
 
				+	const char *option;
			
 
				+	enum spectre_v2_mitigation_cmd cmd;
			
 
				+	bool secure;
			
 
				+} mitigation_options[] = {
			
 
				+	{ "off",               SPECTRE_V2_CMD_NONE,              false },
			
 
				+	{ "on",                SPECTRE_V2_CMD_FORCE,             true },
			
 
				+	{ "retpoline",         SPECTRE_V2_CMD_RETPOLINE,         false },
			
 
				+	{ "retpoline,amd",     SPECTRE_V2_CMD_RETPOLINE_AMD,     false },
			
 
				+	{ "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
			
 
				+	{ "auto",              SPECTRE_V2_CMD_AUTO,              false },
			
 
				+};
			
 
				+
			
 
				 static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
			
 
				 {
			
 
				 	char arg[20];
			
 
				-	int ret;
			
 
				-
			
 
				-	ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
			
 
				-				  sizeof(arg));
			
 
				-	if (ret > 0)  {
			
 
				-		if (match_option(arg, ret, "off")) {
			
 
				-			goto disable;
			
 
				-		} else if (match_option(arg, ret, "on")) {
			
 
				-			spec2_print_if_secure("force enabled on command line.");
			
 
				-			return SPECTRE_V2_CMD_FORCE;
			
 
				-		} else if (match_option(arg, ret, "retpoline")) {
			
 
				-			spec2_print_if_insecure("retpoline selected on command line.");
			
 
				-			return SPECTRE_V2_CMD_RETPOLINE;
			
 
				-		} else if (match_option(arg, ret, "retpoline,amd")) {
			
 
				-			if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
			
 
				-				pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
			
 
				-				return SPECTRE_V2_CMD_AUTO;
			
 
				-			}
			
 
				-			spec2_print_if_insecure("AMD retpoline selected on command line.");
			
 
				-			return SPECTRE_V2_CMD_RETPOLINE_AMD;
			
 
				-		} else if (match_option(arg, ret, "retpoline,generic")) {
			
 
				-			spec2_print_if_insecure("generic retpoline selected on command line.");
			
 
				-			return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
			
 
				-		} else if (match_option(arg, ret, "auto")) {
			
 
				+	int ret, i;
			
 
				+	enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
			
 
				+
			
 
				+	if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
			
 
				+		return SPECTRE_V2_CMD_NONE;
			
 
				+	else {
			
 
				+		ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
			
 
				+					  sizeof(arg));
			
 
				+		if (ret < 0)
			
 
				 			return SPECTRE_V2_CMD_AUTO;
			
 
				+
			
 
				+		for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
			
 
				+			if (!match_option(arg, ret, mitigation_options[i].option))
			
 
				+				continue;
			
 
				+			cmd = mitigation_options[i].cmd;
			
 
				+			break;
			
 
				 		}
			
 
				+
			
 
				+		if (i >= ARRAY_SIZE(mitigation_options)) {
			
 
				+			pr_err("unknown option (%s). Switching to AUTO select\n",
			
 
				+			       mitigation_options[i].option);
			
 
				+			return SPECTRE_V2_CMD_AUTO;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
			
 
				+	     cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
			
 
				+	     cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
			
 
				+	    !IS_ENABLED(CONFIG_RETPOLINE)) {
			
 
				+		pr_err("%s selected but not compiled in. Switching to AUTO select\n",
			
 
				+		       mitigation_options[i].option);
			
 
				+		return SPECTRE_V2_CMD_AUTO;
			
 
				 	}
			
 
				 
			
 
				-	if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
			
 
				+	if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD &&
			
 
				+	    boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
			
 
				+		pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
			
 
				 		return SPECTRE_V2_CMD_AUTO;
			
 
				-disable:
			
 
				-	spec2_print_if_insecure("disabled on command line.");
			
 
				-	return SPECTRE_V2_CMD_NONE;
			
 
				+	}
			
 
				+
			
 
				+	if (mitigation_options[i].secure)
			
 
				+		spec2_print_if_secure(mitigation_options[i].option);
			
 
				+	else
			
 
				+		spec2_print_if_insecure(mitigation_options[i].option);
			
 
				+
			
 
				+	return cmd;
			
 
				 }
			
 
				 
			
 
				 /* Check for Skylake-like CPUs (for RSB handling) */
			
@@ -213,10 +239,10 @@ static void __init spectre_v2_select_mitigation(void)
 
				 		return;
			
 
				 
			
 
				 	case SPECTRE_V2_CMD_FORCE:
			
 
				-		/* FALLTRHU */
			
 
				 	case SPECTRE_V2_CMD_AUTO:
			
 
				-		goto retpoline_auto;
			
 
				-
			
 
				+		if (IS_ENABLED(CONFIG_RETPOLINE))
			
 
				+			goto retpoline_auto;
			
 
				+		break;
			
 
				 	case SPECTRE_V2_CMD_RETPOLINE_AMD:
			
 
				 		if (IS_ENABLED(CONFIG_RETPOLINE))
			
 
				 			goto retpoline_amd;
			
@@ -297,7 +323,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev,
 
				 {
			
 
				 	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
			
 
				 		return sprintf(buf, "Not affected\n");
			
 
				-	return sprintf(buf, "Vulnerable\n");
			
 
				+	return sprintf(buf, "Mitigation: __user pointer sanitization\n");
			
 
				 }
			
 
				 
			
 
				 ssize_t cpu_show_spectre_v2(struct device *dev,
			
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -750,6 +750,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static void init_speculation_control(struct cpuinfo_x86 *c)
			
 
				+{
			
 
				+	/*
			
 
				+	 * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
			
 
				+	 * and they also have a different bit for STIBP support. Also,
			
 
				+	 * a hypervisor might have set the individual AMD bits even on
			
 
				+	 * Intel CPUs, for finer-grained selection of what's available.
			
 
				+	 *
			
 
				+	 * We use the AMD bits in 0x8000_0008 EBX as the generic hardware
			
 
				+	 * features, which are visible in /proc/cpuinfo and used by the
			
 
				+	 * kernel. So set those accordingly from the Intel bits.
			
 
				+	 */
			
 
				+	if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
			
 
				+		set_cpu_cap(c, X86_FEATURE_IBRS);
			
 
				+		set_cpu_cap(c, X86_FEATURE_IBPB);
			
 
				+	}
			
 
				+	if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
			
 
				+		set_cpu_cap(c, X86_FEATURE_STIBP);
			
 
				+}
			
 
				+
			
 
				 void get_cpu_cap(struct cpuinfo_x86 *c)
			
 
				 {
			
 
				 	u32 eax, ebx, ecx, edx;
			
@@ -844,6 +864,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
 
				 		c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
			
 
				 
			
 
				 	init_scattered_cpuid_features(c);
			
 
				+	init_speculation_control(c);
			
 
				 
			
 
				 	/*
			
 
				 	 * Clear/Set all flags overridden by options, after probe.
			
@@ -879,7 +900,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
 
				 #endif
			
 
				 }
			
 
				 
			
 
				-static const __initdata struct x86_cpu_id cpu_no_speculation[] = {
			
 
				+static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
			
 
				 	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_CEDARVIEW,	X86_FEATURE_ANY },
			
 
				 	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_CLOVERVIEW,	X86_FEATURE_ANY },
			
 
				 	{ X86_VENDOR_INTEL,	6, INTEL_FAM6_ATOM_LINCROFT,	X86_FEATURE_ANY },
			
@@ -892,7 +913,7 @@ static const __initdata struct x86_cpu_id cpu_no_speculation[] = {
 
				 	{}
			
 
				 };
			
 
				 
			
 
				-static const __initdata struct x86_cpu_id cpu_no_meltdown[] = {
			
 
				+static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
			
 
				 	{ X86_VENDOR_AMD },
			
 
				 	{}
			
 
				 };
			
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -175,28 +175,17 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 
				 	if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64))
			
 
				 		c->microcode = intel_get_microcode_revision();
			
 
				 
			
 
				-	/*
			
 
				-	 * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
			
 
				-	 * and they also have a different bit for STIBP support. Also,
			
 
				-	 * a hypervisor might have set the individual AMD bits even on
			
 
				-	 * Intel CPUs, for finer-grained selection of what's available.
			
 
				-	 */
			
 
				-	if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
			
 
				-		set_cpu_cap(c, X86_FEATURE_IBRS);
			
 
				-		set_cpu_cap(c, X86_FEATURE_IBPB);
			
 
				-	}
			
 
				-	if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
			
 
				-		set_cpu_cap(c, X86_FEATURE_STIBP);
			
 
				-
			
 
				 	/* Now if any of them are set, check the blacklist and clear the lot */
			
 
				-	if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
			
 
				+	if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
			
 
				+	     cpu_has(c, X86_FEATURE_INTEL_STIBP) ||
			
 
				+	     cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
			
 
				 	     cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
			
 
				 		pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
			
 
				-		clear_cpu_cap(c, X86_FEATURE_IBRS);
			
 
				-		clear_cpu_cap(c, X86_FEATURE_IBPB);
			
 
				-		clear_cpu_cap(c, X86_FEATURE_STIBP);
			
 
				-		clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL);
			
 
				-		clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP);
			
 
				+		setup_clear_cpu_cap(X86_FEATURE_IBRS);
			
 
				+		setup_clear_cpu_cap(X86_FEATURE_IBPB);
			
 
				+		setup_clear_cpu_cap(X86_FEATURE_STIBP);
			
 
				+		setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
			
 
				+		setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
			
 
				 	}
			
 
				 
			
 
				 	/*
			
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -557,7 +557,7 @@ static void __set_personality_x32(void)
 
				 	 * Pretend to come from a x32 execve.
			
 
				 	 */
			
 
				 	task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT;
			
 
				-	current->thread.status &= ~TS_COMPAT;
			
 
				+	current_thread_info()->status &= ~TS_COMPAT;
			
 
				 #endif
			
 
				 }
			
 
				 
			
@@ -571,7 +571,7 @@ static void __set_personality_ia32(void)
 
				 	current->personality |= force_personality32;
			
 
				 	/* Prepare the first "return" to user space */
			
 
				 	task_pt_regs(current)->orig_ax = __NR_ia32_execve;
			
 
				-	current->thread.status |= TS_COMPAT;
			
 
				+	current_thread_info()->status |= TS_COMPAT;
			
 
				 #endif
			
 
				 }
			
 
				 
			
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -935,7 +935,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
 
				 		 */
			
 
				 		regs->orig_ax = value;
			
 
				 		if (syscall_get_nr(child, regs) >= 0)
			
 
				-			child->thread.status |= TS_I386_REGS_POKED;
			
 
				+			child->thread_info.status |= TS_I386_REGS_POKED;
			
 
				 		break;
			
 
				 
			
 
				 	case offsetof(struct user32, regs.eflags):
			
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -787,7 +787,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 
				 	 * than the tracee.
			
 
				 	 */
			
 
				 #ifdef CONFIG_IA32_EMULATION
			
 
				-	if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
			
 
				+	if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
			
 
				 		return __NR_ia32_restart_syscall;
			
 
				 #endif
			
 
				 #ifdef CONFIG_X86_X32_ABI
			
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -67,9 +67,7 @@ u64 kvm_supported_xcr0(void)
 
				 
			
 
				 #define F(x) bit(X86_FEATURE_##x)
			
 
				 
			
 
				-/* These are scattered features in cpufeatures.h. */
			
 
				-#define KVM_CPUID_BIT_AVX512_4VNNIW     2
			
 
				-#define KVM_CPUID_BIT_AVX512_4FMAPS     3
			
 
				+/* For scattered features from cpufeatures.h; we currently expose none */
			
 
				 #define KF(x) bit(KVM_CPUID_BIT_##x)
			
 
				 
			
 
				 int kvm_update_cpuid(struct kvm_vcpu *vcpu)
			
@@ -367,6 +365,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
				 		F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
			
 
				 		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
			
 
				 
			
 
				+	/* cpuid 0x80000008.ebx */
			
 
				+	const u32 kvm_cpuid_8000_0008_ebx_x86_features =
			
 
				+		F(IBPB) | F(IBRS);
			
 
				+
			
 
				 	/* cpuid 0xC0000001.edx */
			
 
				 	const u32 kvm_cpuid_C000_0001_edx_x86_features =
			
 
				 		F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
			
@@ -392,7 +394,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
				 
			
 
				 	/* cpuid 7.0.edx*/
			
 
				 	const u32 kvm_cpuid_7_0_edx_x86_features =
			
 
				-		KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
			
 
				+		F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
			
 
				+		F(ARCH_CAPABILITIES);
			
 
				 
			
 
				 	/* all calls to cpuid_count() should be made on the same cpu */
			
 
				 	get_cpu();
			
@@ -477,7 +480,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
				 			if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
			
 
				 				entry->ecx &= ~F(PKU);
			
 
				 			entry->edx &= kvm_cpuid_7_0_edx_x86_features;
			
 
				-			entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX);
			
 
				+			cpuid_mask(&entry->edx, CPUID_7_EDX);
			
 
				 		} else {
			
 
				 			entry->ebx = 0;
			
 
				 			entry->ecx = 0;
			
@@ -627,7 +630,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
				 		if (!g_phys_as)
			
 
				 			g_phys_as = phys_as;
			
 
				 		entry->eax = g_phys_as | (virt_as << 8);
			
 
				-		entry->ebx = entry->edx = 0;
			
 
				+		entry->edx = 0;
			
 
				+		/* IBRS and IBPB aren't necessarily present in hardware cpuid */
			
 
				+		if (boot_cpu_has(X86_FEATURE_IBPB))
			
 
				+			entry->ebx |= F(IBPB);
			
 
				+		if (boot_cpu_has(X86_FEATURE_IBRS))
			
 
				+			entry->ebx |= F(IBRS);
			
 
				+		entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
			
 
				+		cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
			
 
				 		break;
			
 
				 	}
			
 
				 	case 0x80000019:
			
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
 
				 	[CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX},
			
 
				 	[CPUID_7_ECX]         = {         7, 0, CPUID_ECX},
			
 
				 	[CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX},
			
 
				+	[CPUID_7_EDX]         = {         7, 0, CPUID_EDX},
			
 
				 };
			
 
				 
			
 
				 static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature)
			
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -184,6 +184,8 @@ struct vcpu_svm {
 
				 		u64 gs_base;
			
 
				 	} host;
			
 
				 
			
 
				+	u64 spec_ctrl;
			
 
				+
			
 
				 	u32 *msrpm;
			
 
				 
			
 
				 	ulong nmi_iret_rip;
			
@@ -249,6 +251,8 @@ static const struct svm_direct_access_msrs {
 
				 	{ .index = MSR_CSTAR,				.always = true  },
			
 
				 	{ .index = MSR_SYSCALL_MASK,			.always = true  },
			
 
				 #endif
			
 
				+	{ .index = MSR_IA32_SPEC_CTRL,			.always = false },
			
 
				+	{ .index = MSR_IA32_PRED_CMD,			.always = false },
			
 
				 	{ .index = MSR_IA32_LASTBRANCHFROMIP,		.always = false },
			
 
				 	{ .index = MSR_IA32_LASTBRANCHTOIP,		.always = false },
			
 
				 	{ .index = MSR_IA32_LASTINTFROMIP,		.always = false },
			
@@ -529,6 +533,7 @@ struct svm_cpu_data {
 
				 	struct kvm_ldttss_desc *tss_desc;
			
 
				 
			
 
				 	struct page *save_area;
			
 
				+	struct vmcb *current_vmcb;
			
 
				 };
			
 
				 
			
 
				 static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
			
@@ -880,6 +885,25 @@ static bool valid_msr_intercept(u32 index)
 
				 	return false;
			
 
				 }
			
 
				 
			
 
				+static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
			
 
				+{
			
 
				+	u8 bit_write;
			
 
				+	unsigned long tmp;
			
 
				+	u32 offset;
			
 
				+	u32 *msrpm;
			
 
				+
			
 
				+	msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
			
 
				+				      to_svm(vcpu)->msrpm;
			
 
				+
			
 
				+	offset    = svm_msrpm_offset(msr);
			
 
				+	bit_write = 2 * (msr & 0x0f) + 1;
			
 
				+	tmp       = msrpm[offset];
			
 
				+
			
 
				+	BUG_ON(offset == MSR_INVALID);
			
 
				+
			
 
				+	return !!test_bit(bit_write,  &tmp);
			
 
				+}
			
 
				+
			
 
				 static void set_msr_interception(u32 *msrpm, unsigned msr,
			
 
				 				 int read, int write)
			
 
				 {
			
@@ -1582,6 +1606,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
				 	u32 dummy;
			
 
				 	u32 eax = 1;
			
 
				 
			
 
				+	svm->spec_ctrl = 0;
			
 
				+
			
 
				 	if (!init_event) {
			
 
				 		svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
			
 
				 					   MSR_IA32_APICBASE_ENABLE;
			
@@ -1703,11 +1729,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 
				 	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
			
 
				 	kvm_vcpu_uninit(vcpu);
			
 
				 	kmem_cache_free(kvm_vcpu_cache, svm);
			
 
				+	/*
			
 
				+	 * The vmcb page can be recycled, causing a false negative in
			
 
				+	 * svm_vcpu_load(). So do a full IBPB now.
			
 
				+	 */
			
 
				+	indirect_branch_prediction_barrier();
			
 
				 }
			
 
				 
			
 
				 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
			
 
				 {
			
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				+	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
			
 
				 	int i;
			
 
				 
			
 
				 	if (unlikely(cpu != vcpu->cpu)) {
			
@@ -1736,6 +1768,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
				 	if (static_cpu_has(X86_FEATURE_RDTSCP))
			
 
				 		wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
			
 
				 
			
 
				+	if (sd->current_vmcb != svm->vmcb) {
			
 
				+		sd->current_vmcb = svm->vmcb;
			
 
				+		indirect_branch_prediction_barrier();
			
 
				+	}
			
 
				 	avic_vcpu_load(vcpu, cpu);
			
 
				 }
			
 
				 
			
@@ -3593,6 +3629,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
				 	case MSR_VM_CR:
			
 
				 		msr_info->data = svm->nested.vm_cr_msr;
			
 
				 		break;
			
 
				+	case MSR_IA32_SPEC_CTRL:
			
 
				+		if (!msr_info->host_initiated &&
			
 
				+		    !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
			
 
				+			return 1;
			
 
				+
			
 
				+		msr_info->data = svm->spec_ctrl;
			
 
				+		break;
			
 
				 	case MSR_IA32_UCODE_REV:
			
 
				 		msr_info->data = 0x01000065;
			
 
				 		break;
			
@@ -3684,6 +3727,49 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 
				 	case MSR_IA32_TSC:
			
 
				 		kvm_write_tsc(vcpu, msr);
			
 
				 		break;
			
 
				+	case MSR_IA32_SPEC_CTRL:
			
 
				+		if (!msr->host_initiated &&
			
 
				+		    !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
			
 
				+			return 1;
			
 
				+
			
 
				+		/* The STIBP bit doesn't fault even if it's not advertised */
			
 
				+		if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
			
 
				+			return 1;
			
 
				+
			
 
				+		svm->spec_ctrl = data;
			
 
				+
			
 
				+		if (!data)
			
 
				+			break;
			
 
				+
			
 
				+		/*
			
 
				+		 * For non-nested:
			
 
				+		 * When it's written (to non-zero) for the first time, pass
			
 
				+		 * it through.
			
 
				+		 *
			
 
				+		 * For nested:
			
 
				+		 * The handling of the MSR bitmap for L2 guests is done in
			
 
				+		 * nested_svm_vmrun_msrpm.
			
 
				+		 * We update the L1 MSR bit as well since it will end up
			
 
				+		 * touching the MSR anyway now.
			
 
				+		 */
			
 
				+		set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
			
 
				+		break;
			
 
				+	case MSR_IA32_PRED_CMD:
			
 
				+		if (!msr->host_initiated &&
			
 
				+		    !guest_cpuid_has(vcpu, X86_FEATURE_IBPB))
			
 
				+			return 1;
			
 
				+
			
 
				+		if (data & ~PRED_CMD_IBPB)
			
 
				+			return 1;
			
 
				+
			
 
				+		if (!data)
			
 
				+			break;
			
 
				+
			
 
				+		wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
			
 
				+		if (is_guest_mode(vcpu))
			
 
				+			break;
			
 
				+		set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
			
 
				+		break;
			
 
				 	case MSR_STAR:
			
 
				 		svm->vmcb->save.star = data;
			
 
				 		break;
			
@@ -4936,6 +5022,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 
				 
			
 
				 	local_irq_enable();
			
 
				 
			
 
				+	/*
			
 
				+	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
			
 
				+	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
			
 
				+	 * is no need to worry about the conditional branch over the wrmsr
			
 
				+	 * being speculatively taken.
			
 
				+	 */
			
 
				+	if (svm->spec_ctrl)
			
 
				+		wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
			
 
				+
			
 
				 	asm volatile (
			
 
				 		"push %%" _ASM_BP "; \n\t"
			
 
				 		"mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
			
@@ -5028,6 +5123,27 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 
				 #endif
			
 
				 		);
			
 
				 
			
 
				+	/*
			
 
				+	 * We do not use IBRS in the kernel. If this vCPU has used the
			
 
				+	 * SPEC_CTRL MSR it may have left it on; save the value and
			
 
				+	 * turn it off. This is much more efficient than blindly adding
			
 
				+	 * it to the atomic save/restore list. Especially as the former
			
 
				+	 * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
			
 
				+	 *
			
 
				+	 * For non-nested case:
			
 
				+	 * If the L01 MSR bitmap does not intercept the MSR, then we need to
			
 
				+	 * save it.
			
 
				+	 *
			
 
				+	 * For nested case:
			
 
				+	 * If the L02 MSR bitmap does not intercept the MSR, then we need to
			
 
				+	 * save it.
			
 
				+	 */
			
 
				+	if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
			
 
				+		rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
			
 
				+
			
 
				+	if (svm->spec_ctrl)
			
 
				+		wrmsrl(MSR_IA32_SPEC_CTRL, 0);
			
 
				+
			
 
				 	/* Eliminate branch target predictions from guest mode */
			
 
				 	vmexit_fill_RSB();
			
 
				 
			
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -34,6 +34,7 @@
 
				 #include <linux/tboot.h>
			
 
				 #include <linux/hrtimer.h>
			
 
				 #include <linux/frame.h>
			
 
				+#include <linux/nospec.h>
			
 
				 #include "kvm_cache_regs.h"
			
 
				 #include "x86.h"
			
 
				 
			
@@ -111,6 +112,14 @@ static u64 __read_mostly host_xss;
 
				 static bool __read_mostly enable_pml = 1;
			
 
				 module_param_named(pml, enable_pml, bool, S_IRUGO);
			
 
				 
			
 
				+#define MSR_TYPE_R	1
			
 
				+#define MSR_TYPE_W	2
			
 
				+#define MSR_TYPE_RW	3
			
 
				+
			
 
				+#define MSR_BITMAP_MODE_X2APIC		1
			
 
				+#define MSR_BITMAP_MODE_X2APIC_APICV	2
			
 
				+#define MSR_BITMAP_MODE_LM		4
			
 
				+
			
 
				 #define KVM_VMX_TSC_MULTIPLIER_MAX     0xffffffffffffffffULL
			
 
				 
			
 
				 /* Guest_tsc -> host_tsc conversion requires 64-bit division.  */
			
@@ -185,7 +194,6 @@ module_param(ple_window_max, int, S_IRUGO);
 
				 extern const ulong vmx_return;
			
 
				 
			
 
				 #define NR_AUTOLOAD_MSRS 8
			
 
				-#define VMCS02_POOL_SIZE 1
			
 
				 
			
 
				 struct vmcs {
			
 
				 	u32 revision_id;
			
@@ -210,6 +218,7 @@ struct loaded_vmcs {
 
				 	int soft_vnmi_blocked;
			
 
				 	ktime_t entry_time;
			
 
				 	s64 vnmi_blocked_time;
			
 
				+	unsigned long *msr_bitmap;
			
 
				 	struct list_head loaded_vmcss_on_cpu_link;
			
 
				 };
			
 
				 
			
@@ -226,7 +235,7 @@ struct shared_msr_entry {
 
				  * stored in guest memory specified by VMPTRLD, but is opaque to the guest,
			
 
				  * which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
			
 
				  * More than one of these structures may exist, if L1 runs multiple L2 guests.
			
 
				- * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the
			
 
				+ * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
			
 
				  * underlying hardware which will be used to run L2.
			
 
				  * This structure is packed to ensure that its layout is identical across
			
 
				  * machines (necessary for live migration).
			
@@ -409,13 +418,6 @@ struct __packed vmcs12 {
 
				  */
			
 
				 #define VMCS12_SIZE 0x1000
			
 
				 
			
 
				-/* Used to remember the last vmcs02 used for some recently used vmcs12s */
			
 
				-struct vmcs02_list {
			
 
				-	struct list_head list;
			
 
				-	gpa_t vmptr;
			
 
				-	struct loaded_vmcs vmcs02;
			
 
				-};
			
 
				-
			
 
				 /*
			
 
				  * The nested_vmx structure is part of vcpu_vmx, and holds information we need
			
 
				  * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
			
@@ -440,15 +442,15 @@ struct nested_vmx {
 
				 	 */
			
 
				 	bool sync_shadow_vmcs;
			
 
				 
			
 
				-	/* vmcs02_list cache of VMCSs recently used to run L2 guests */
			
 
				-	struct list_head vmcs02_pool;
			
 
				-	int vmcs02_num;
			
 
				 	bool change_vmcs01_virtual_x2apic_mode;
			
 
				 	/* L2 must run next, and mustn't decide to exit to L1. */
			
 
				 	bool nested_run_pending;
			
 
				+
			
 
				+	struct loaded_vmcs vmcs02;
			
 
				+
			
 
				 	/*
			
 
				-	 * Guest pages referred to in vmcs02 with host-physical pointers, so
			
 
				-	 * we must keep them pinned while L2 runs.
			
 
				+	 * Guest pages referred to in the vmcs02 with host-physical
			
 
				+	 * pointers, so we must keep them pinned while L2 runs.
			
 
				 	 */
			
 
				 	struct page *apic_access_page;
			
 
				 	struct page *virtual_apic_page;
			
@@ -457,8 +459,6 @@ struct nested_vmx {
 
				 	bool pi_pending;
			
 
				 	u16 posted_intr_nv;
			
 
				 
			
 
				-	unsigned long *msr_bitmap;
			
 
				-
			
 
				 	struct hrtimer preemption_timer;
			
 
				 	bool preemption_timer_expired;
			
 
				 
			
@@ -581,6 +581,7 @@ struct vcpu_vmx {
 
				 	struct kvm_vcpu       vcpu;
			
 
				 	unsigned long         host_rsp;
			
 
				 	u8                    fail;
			
 
				+	u8		      msr_bitmap_mode;
			
 
				 	u32                   exit_intr_info;
			
 
				 	u32                   idt_vectoring_info;
			
 
				 	ulong                 rflags;
			
@@ -592,6 +593,10 @@ struct vcpu_vmx {
 
				 	u64 		      msr_host_kernel_gs_base;
			
 
				 	u64 		      msr_guest_kernel_gs_base;
			
 
				 #endif
			
 
				+
			
 
				+	u64 		      arch_capabilities;
			
 
				+	u64 		      spec_ctrl;
			
 
				+
			
 
				 	u32 vm_entry_controls_shadow;
			
 
				 	u32 vm_exit_controls_shadow;
			
 
				 	u32 secondary_exec_control;
			
@@ -898,21 +903,18 @@ static const unsigned short vmcs_field_to_offset_table[] = {
 
				 
			
 
				 static inline short vmcs_field_to_offset(unsigned long field)
			
 
				 {
			
 
				-	BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
			
 
				+	const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table);
			
 
				+	unsigned short offset;
			
 
				 
			
 
				-	if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
			
 
				+	BUILD_BUG_ON(size > SHRT_MAX);
			
 
				+	if (field >= size)
			
 
				 		return -ENOENT;
			
 
				 
			
 
				-	/*
			
 
				-	 * FIXME: Mitigation for CVE-2017-5753.  To be replaced with a
			
 
				-	 * generic mechanism.
			
 
				-	 */
			
 
				-	asm("lfence");
			
 
				-
			
 
				-	if (vmcs_field_to_offset_table[field] == 0)
			
 
				+	field = array_index_nospec(field, size);
			
 
				+	offset = vmcs_field_to_offset_table[field];
			
 
				+	if (offset == 0)
			
 
				 		return -ENOENT;
			
 
				-
			
 
				-	return vmcs_field_to_offset_table[field];
			
 
				+	return offset;
			
 
				 }
			
 
				 
			
 
				 static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
			
@@ -935,6 +937,9 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
 
				 static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
			
 
				 static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
			
 
				 					    u16 error_code);
			
 
				+static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
			
 
				+static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
			
 
				+							  u32 msr, int type);
			
 
				 
			
 
				 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
			
 
				 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
			
@@ -954,12 +959,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
 
				 enum {
			
 
				 	VMX_IO_BITMAP_A,
			
 
				 	VMX_IO_BITMAP_B,
			
 
				-	VMX_MSR_BITMAP_LEGACY,
			
 
				-	VMX_MSR_BITMAP_LONGMODE,
			
 
				-	VMX_MSR_BITMAP_LEGACY_X2APIC_APICV,
			
 
				-	VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV,
			
 
				-	VMX_MSR_BITMAP_LEGACY_X2APIC,
			
 
				-	VMX_MSR_BITMAP_LONGMODE_X2APIC,
			
 
				 	VMX_VMREAD_BITMAP,
			
 
				 	VMX_VMWRITE_BITMAP,
			
 
				 	VMX_BITMAP_NR
			
@@ -969,12 +968,6 @@ static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
 
				 
			
 
				 #define vmx_io_bitmap_a                      (vmx_bitmap[VMX_IO_BITMAP_A])
			
 
				 #define vmx_io_bitmap_b                      (vmx_bitmap[VMX_IO_BITMAP_B])
			
 
				-#define vmx_msr_bitmap_legacy                (vmx_bitmap[VMX_MSR_BITMAP_LEGACY])
			
 
				-#define vmx_msr_bitmap_longmode              (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE])
			
 
				-#define vmx_msr_bitmap_legacy_x2apic_apicv   (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV])
			
 
				-#define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV])
			
 
				-#define vmx_msr_bitmap_legacy_x2apic         (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC])
			
 
				-#define vmx_msr_bitmap_longmode_x2apic       (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC])
			
 
				 #define vmx_vmread_bitmap                    (vmx_bitmap[VMX_VMREAD_BITMAP])
			
 
				 #define vmx_vmwrite_bitmap                   (vmx_bitmap[VMX_VMWRITE_BITMAP])
			
 
				 
			
@@ -1918,6 +1911,52 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 
				 	vmcs_write32(EXCEPTION_BITMAP, eb);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Check if MSR is intercepted for currently loaded MSR bitmap.
			
 
				+ */
			
 
				+static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
			
 
				+{
			
 
				+	unsigned long *msr_bitmap;
			
 
				+	int f = sizeof(unsigned long);
			
 
				+
			
 
				+	if (!cpu_has_vmx_msr_bitmap())
			
 
				+		return true;
			
 
				+
			
 
				+	msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
			
 
				+
			
 
				+	if (msr <= 0x1fff) {
			
 
				+		return !!test_bit(msr, msr_bitmap + 0x800 / f);
			
 
				+	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
			
 
				+		msr &= 0x1fff;
			
 
				+		return !!test_bit(msr, msr_bitmap + 0xc00 / f);
			
 
				+	}
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Check if MSR is intercepted for L01 MSR bitmap.
			
 
				+ */
			
 
				+static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
			
 
				+{
			
 
				+	unsigned long *msr_bitmap;
			
 
				+	int f = sizeof(unsigned long);
			
 
				+
			
 
				+	if (!cpu_has_vmx_msr_bitmap())
			
 
				+		return true;
			
 
				+
			
 
				+	msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
			
 
				+
			
 
				+	if (msr <= 0x1fff) {
			
 
				+		return !!test_bit(msr, msr_bitmap + 0x800 / f);
			
 
				+	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
			
 
				+		msr &= 0x1fff;
			
 
				+		return !!test_bit(msr, msr_bitmap + 0xc00 / f);
			
 
				+	}
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				 static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
			
 
				 		unsigned long entry, unsigned long exit)
			
 
				 {
			
@@ -2296,6 +2335,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
				 	if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
			
 
				 		per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
			
 
				 		vmcs_load(vmx->loaded_vmcs->vmcs);
			
 
				+		indirect_branch_prediction_barrier();
			
 
				 	}
			
 
				 
			
 
				 	if (!already_loaded) {
			
@@ -2572,36 +2612,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
 
				 	vmx->guest_msrs[from] = tmp;
			
 
				 }
			
 
				 
			
 
				-static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	unsigned long *msr_bitmap;
			
 
				-
			
 
				-	if (is_guest_mode(vcpu))
			
 
				-		msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
			
 
				-	else if (cpu_has_secondary_exec_ctrls() &&
			
 
				-		 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
			
 
				-		  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
			
 
				-		if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) {
			
 
				-			if (is_long_mode(vcpu))
			
 
				-				msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv;
			
 
				-			else
			
 
				-				msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv;
			
 
				-		} else {
			
 
				-			if (is_long_mode(vcpu))
			
 
				-				msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
			
 
				-			else
			
 
				-				msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
			
 
				-		}
			
 
				-	} else {
			
 
				-		if (is_long_mode(vcpu))
			
 
				-			msr_bitmap = vmx_msr_bitmap_longmode;
			
 
				-		else
			
 
				-			msr_bitmap = vmx_msr_bitmap_legacy;
			
 
				-	}
			
 
				-
			
 
				-	vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Set up the vmcs to automatically save and restore system
			
 
				  * msrs.  Don't touch the 64-bit msrs if the guest is in legacy
			
@@ -2642,7 +2652,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 
				 	vmx->save_nmsrs = save_nmsrs;
			
 
				 
			
 
				 	if (cpu_has_vmx_msr_bitmap())
			
 
				-		vmx_set_msr_bitmap(&vmx->vcpu);
			
 
				+		vmx_update_msr_bitmap(&vmx->vcpu);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -3276,6 +3286,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
				 	case MSR_IA32_TSC:
			
 
				 		msr_info->data = guest_read_tsc(vcpu);
			
 
				 		break;
			
 
				+	case MSR_IA32_SPEC_CTRL:
			
 
				+		if (!msr_info->host_initiated &&
			
 
				+		    !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
			
 
				+		    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
			
 
				+			return 1;
			
 
				+
			
 
				+		msr_info->data = to_vmx(vcpu)->spec_ctrl;
			
 
				+		break;
			
 
				+	case MSR_IA32_ARCH_CAPABILITIES:
			
 
				+		if (!msr_info->host_initiated &&
			
 
				+		    !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
			
 
				+			return 1;
			
 
				+		msr_info->data = to_vmx(vcpu)->arch_capabilities;
			
 
				+		break;
			
 
				 	case MSR_IA32_SYSENTER_CS:
			
 
				 		msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
			
 
				 		break;
			
@@ -3383,6 +3407,70 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
				 	case MSR_IA32_TSC:
			
 
				 		kvm_write_tsc(vcpu, msr_info);
			
 
				 		break;
			
 
				+	case MSR_IA32_SPEC_CTRL:
			
 
				+		if (!msr_info->host_initiated &&
			
 
				+		    !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
			
 
				+		    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
			
 
				+			return 1;
			
 
				+
			
 
				+		/* The STIBP bit doesn't fault even if it's not advertised */
			
 
				+		if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
			
 
				+			return 1;
			
 
				+
			
 
				+		vmx->spec_ctrl = data;
			
 
				+
			
 
				+		if (!data)
			
 
				+			break;
			
 
				+
			
 
				+		/*
			
 
				+		 * For non-nested:
			
 
				+		 * When it's written (to non-zero) for the first time, pass
			
 
				+		 * it through.
			
 
				+		 *
			
 
				+		 * For nested:
			
 
				+		 * The handling of the MSR bitmap for L2 guests is done in
			
 
				+		 * nested_vmx_merge_msr_bitmap. We should not touch the
			
 
				+		 * vmcs02.msr_bitmap here since it gets completely overwritten
			
 
				+		 * in the merging. We update the vmcs01 here for L1 as well
			
 
				+		 * since it will end up touching the MSR anyway now.
			
 
				+		 */
			
 
				+		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
			
 
				+					      MSR_IA32_SPEC_CTRL,
			
 
				+					      MSR_TYPE_RW);
			
 
				+		break;
			
 
				+	case MSR_IA32_PRED_CMD:
			
 
				+		if (!msr_info->host_initiated &&
			
 
				+		    !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) &&
			
 
				+		    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
			
 
				+			return 1;
			
 
				+
			
 
				+		if (data & ~PRED_CMD_IBPB)
			
 
				+			return 1;
			
 
				+
			
 
				+		if (!data)
			
 
				+			break;
			
 
				+
			
 
				+		wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
			
 
				+
			
 
				+		/*
			
 
				+		 * For non-nested:
			
 
				+		 * When it's written (to non-zero) for the first time, pass
			
 
				+		 * it through.
			
 
				+		 *
			
 
				+		 * For nested:
			
 
				+		 * The handling of the MSR bitmap for L2 guests is done in
			
 
				+		 * nested_vmx_merge_msr_bitmap. We should not touch the
			
 
				+		 * vmcs02.msr_bitmap here since it gets completely overwritten
			
 
				+		 * in the merging.
			
 
				+		 */
			
 
				+		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
			
 
				+					      MSR_TYPE_W);
			
 
				+		break;
			
 
				+	case MSR_IA32_ARCH_CAPABILITIES:
			
 
				+		if (!msr_info->host_initiated)
			
 
				+			return 1;
			
 
				+		vmx->arch_capabilities = data;
			
 
				+		break;
			
 
				 	case MSR_IA32_CR_PAT:
			
 
				 		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
			
 
				 			if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
			
@@ -3837,11 +3925,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
 
				 	return vmcs;
			
 
				 }
			
 
				 
			
 
				-static struct vmcs *alloc_vmcs(void)
			
 
				-{
			
 
				-	return alloc_vmcs_cpu(raw_smp_processor_id());
			
 
				-}
			
 
				-
			
 
				 static void free_vmcs(struct vmcs *vmcs)
			
 
				 {
			
 
				 	free_pages((unsigned long)vmcs, vmcs_config.order);
			
@@ -3857,9 +3940,38 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
 
				 	loaded_vmcs_clear(loaded_vmcs);
			
 
				 	free_vmcs(loaded_vmcs->vmcs);
			
 
				 	loaded_vmcs->vmcs = NULL;
			
 
				+	if (loaded_vmcs->msr_bitmap)
			
 
				+		free_page((unsigned long)loaded_vmcs->msr_bitmap);
			
 
				 	WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
			
 
				 }
			
 
				 
			
 
				+static struct vmcs *alloc_vmcs(void)
			
 
				+{
			
 
				+	return alloc_vmcs_cpu(raw_smp_processor_id());
			
 
				+}
			
 
				+
			
 
				+static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
			
 
				+{
			
 
				+	loaded_vmcs->vmcs = alloc_vmcs();
			
 
				+	if (!loaded_vmcs->vmcs)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	loaded_vmcs->shadow_vmcs = NULL;
			
 
				+	loaded_vmcs_init(loaded_vmcs);
			
 
				+
			
 
				+	if (cpu_has_vmx_msr_bitmap()) {
			
 
				+		loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
			
 
				+		if (!loaded_vmcs->msr_bitmap)
			
 
				+			goto out_vmcs;
			
 
				+		memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
			
 
				+	}
			
 
				+	return 0;
			
 
				+
			
 
				+out_vmcs:
			
 
				+	free_loaded_vmcs(loaded_vmcs);
			
 
				+	return -ENOMEM;
			
 
				+}
			
 
				+
			
 
				 static void free_kvm_area(void)
			
 
				 {
			
 
				 	int cpu;
			
@@ -4918,10 +5030,8 @@ static void free_vpid(int vpid)
 
				 	spin_unlock(&vmx_vpid_lock);
			
 
				 }
			
 
				 
			
 
				-#define MSR_TYPE_R	1
			
 
				-#define MSR_TYPE_W	2
			
 
				-static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
			
 
				-						u32 msr, int type)
			
 
				+static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
			
 
				+							  u32 msr, int type)
			
 
				 {
			
 
				 	int f = sizeof(unsigned long);
			
 
				 
			
@@ -4955,6 +5065,50 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
			
 
				+							 u32 msr, int type)
			
 
				+{
			
 
				+	int f = sizeof(unsigned long);
			
 
				+
			
 
				+	if (!cpu_has_vmx_msr_bitmap())
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
			
 
				+	 * have the write-low and read-high bitmap offsets the wrong way round.
			
 
				+	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
			
 
				+	 */
			
 
				+	if (msr <= 0x1fff) {
			
 
				+		if (type & MSR_TYPE_R)
			
 
				+			/* read-low */
			
 
				+			__set_bit(msr, msr_bitmap + 0x000 / f);
			
 
				+
			
 
				+		if (type & MSR_TYPE_W)
			
 
				+			/* write-low */
			
 
				+			__set_bit(msr, msr_bitmap + 0x800 / f);
			
 
				+
			
 
				+	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
			
 
				+		msr &= 0x1fff;
			
 
				+		if (type & MSR_TYPE_R)
			
 
				+			/* read-high */
			
 
				+			__set_bit(msr, msr_bitmap + 0x400 / f);
			
 
				+
			
 
				+		if (type & MSR_TYPE_W)
			
 
				+			/* write-high */
			
 
				+			__set_bit(msr, msr_bitmap + 0xc00 / f);
			
 
				+
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
			
 
				+			     			      u32 msr, int type, bool value)
			
 
				+{
			
 
				+	if (value)
			
 
				+		vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
			
 
				+	else
			
 
				+		vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * If a msr is allowed by L0, we should check whether it is allowed by L1.
			
 
				  * The corresponding bit will be cleared unless both of L0 and L1 allow it.
			
@@ -5001,30 +5155,70 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
			
 
				+static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	if (!longmode_only)
			
 
				-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
			
 
				-						msr, MSR_TYPE_R | MSR_TYPE_W);
			
 
				-	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
			
 
				-						msr, MSR_TYPE_R | MSR_TYPE_W);
			
 
				+	u8 mode = 0;
			
 
				+
			
 
				+	if (cpu_has_secondary_exec_ctrls() &&
			
 
				+	    (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
			
 
				+	     SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
			
 
				+		mode |= MSR_BITMAP_MODE_X2APIC;
			
 
				+		if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
			
 
				+			mode |= MSR_BITMAP_MODE_X2APIC_APICV;
			
 
				+	}
			
 
				+
			
 
				+	if (is_long_mode(vcpu))
			
 
				+		mode |= MSR_BITMAP_MODE_LM;
			
 
				+
			
 
				+	return mode;
			
 
				 }
			
 
				 
			
 
				-static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_active)
			
 
				+#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
			
 
				+
			
 
				+static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
			
 
				+					 u8 mode)
			
 
				 {
			
 
				-	if (apicv_active) {
			
 
				-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv,
			
 
				-				msr, type);
			
 
				-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv,
			
 
				-				msr, type);
			
 
				-	} else {
			
 
				-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
			
 
				-				msr, type);
			
 
				-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
			
 
				-				msr, type);
			
 
				+	int msr;
			
 
				+
			
 
				+	for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
			
 
				+		unsigned word = msr / BITS_PER_LONG;
			
 
				+		msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
			
 
				+		msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
			
 
				+	}
			
 
				+
			
 
				+	if (mode & MSR_BITMAP_MODE_X2APIC) {
			
 
				+		/*
			
 
				+		 * TPR reads and writes can be virtualized even if virtual interrupt
			
 
				+		 * delivery is not in use.
			
 
				+		 */
			
 
				+		vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
			
 
				+		if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
			
 
				+			vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
			
 
				+			vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
			
 
				+			vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
			
 
				+		}
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct vcpu_vmx *vmx = to_vmx(vcpu);
			
 
				+	unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
			
 
				+	u8 mode = vmx_msr_bitmap_mode(vcpu);
			
 
				+	u8 changed = mode ^ vmx->msr_bitmap_mode;
			
 
				+
			
 
				+	if (!changed)
			
 
				+		return;
			
 
				+
			
 
				+	vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW,
			
 
				+				  !(mode & MSR_BITMAP_MODE_LM));
			
 
				+
			
 
				+	if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
			
 
				+		vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
			
 
				+
			
 
				+	vmx->msr_bitmap_mode = mode;
			
 
				+}
			
 
				+
			
 
				 static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	return enable_apicv;
			
@@ -5274,7 +5468,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 
				 	}
			
 
				 
			
 
				 	if (cpu_has_vmx_msr_bitmap())
			
 
				-		vmx_set_msr_bitmap(vcpu);
			
 
				+		vmx_update_msr_bitmap(vcpu);
			
 
				 }
			
 
				 
			
 
				 static u32 vmx_exec_control(struct vcpu_vmx *vmx)
			
@@ -5461,7 +5655,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
				 		vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
			
 
				 	}
			
 
				 	if (cpu_has_vmx_msr_bitmap())
			
 
				-		vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
			
 
				+		vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
			
 
				 
			
 
				 	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
			
 
				 
			
@@ -5539,6 +5733,8 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
				 		++vmx->nmsrs;
			
 
				 	}
			
 
				 
			
 
				+	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
			
 
				+		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities);
			
 
				 
			
 
				 	vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
			
 
				 
			
@@ -5567,6 +5763,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
				 	u64 cr0;
			
 
				 
			
 
				 	vmx->rmode.vm86_active = 0;
			
 
				+	vmx->spec_ctrl = 0;
			
 
				 
			
 
				 	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
			
 
				 	kvm_set_cr8(vcpu, 0);
			
@@ -6744,7 +6941,7 @@ void vmx_enable_tdp(void)
 
				 
			
 
				 static __init int hardware_setup(void)
			
 
				 {
			
 
				-	int r = -ENOMEM, i, msr;
			
 
				+	int r = -ENOMEM, i;
			
 
				 
			
 
				 	rdmsrl_safe(MSR_EFER, &host_efer);
			
 
				 
			
@@ -6764,9 +6961,6 @@ static __init int hardware_setup(void)
 
				 
			
 
				 	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
			
 
				 
			
 
				-	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
			
 
				-	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
			
 
				-
			
 
				 	if (setup_vmcs_config(&vmcs_config) < 0) {
			
 
				 		r = -EIO;
			
 
				 		goto out;
			
@@ -6835,42 +7029,8 @@ static __init int hardware_setup(void)
 
				 		kvm_tsc_scaling_ratio_frac_bits = 48;
			
 
				 	}
			
 
				 
			
 
				-	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
			
 
				-	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
			
 
				-	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
			
 
				-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
			
 
				-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
			
 
				-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
			
 
				-
			
 
				-	memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
			
 
				-			vmx_msr_bitmap_legacy, PAGE_SIZE);
			
 
				-	memcpy(vmx_msr_bitmap_longmode_x2apic_apicv,
			
 
				-			vmx_msr_bitmap_longmode, PAGE_SIZE);
			
 
				-	memcpy(vmx_msr_bitmap_legacy_x2apic,
			
 
				-			vmx_msr_bitmap_legacy, PAGE_SIZE);
			
 
				-	memcpy(vmx_msr_bitmap_longmode_x2apic,
			
 
				-			vmx_msr_bitmap_longmode, PAGE_SIZE);
			
 
				-
			
 
				 	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
			
 
				 
			
 
				-	for (msr = 0x800; msr <= 0x8ff; msr++) {
			
 
				-		if (msr == 0x839 /* TMCCT */)
			
 
				-			continue;
			
 
				-		vmx_disable_intercept_msr_x2apic(msr, MSR_TYPE_R, true);
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * TPR reads and writes can be virtualized even if virtual interrupt
			
 
				-	 * delivery is not in use.
			
 
				-	 */
			
 
				-	vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_W, true);
			
 
				-	vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_R | MSR_TYPE_W, false);
			
 
				-
			
 
				-	/* EOI */
			
 
				-	vmx_disable_intercept_msr_x2apic(0x80b, MSR_TYPE_W, true);
			
 
				-	/* SELF-IPI */
			
 
				-	vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true);
			
 
				-
			
 
				 	if (enable_ept)
			
 
				 		vmx_enable_tdp();
			
 
				 	else
			
@@ -6973,94 +7133,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
 
				 	return handle_nop(vcpu);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
			
 
				- * We could reuse a single VMCS for all the L2 guests, but we also want the
			
 
				- * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this
			
 
				- * allows keeping them loaded on the processor, and in the future will allow
			
 
				- * optimizations where prepare_vmcs02 doesn't need to set all the fields on
			
 
				- * every entry if they never change.
			
 
				- * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE
			
 
				- * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first.
			
 
				- *
			
 
				- * The following functions allocate and free a vmcs02 in this pool.
			
 
				- */
			
 
				-
			
 
				-/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */
			
 
				-static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
			
 
				-{
			
 
				-	struct vmcs02_list *item;
			
 
				-	list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
			
 
				-		if (item->vmptr == vmx->nested.current_vmptr) {
			
 
				-			list_move(&item->list, &vmx->nested.vmcs02_pool);
			
 
				-			return &item->vmcs02;
			
 
				-		}
			
 
				-
			
 
				-	if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
			
 
				-		/* Recycle the least recently used VMCS. */
			
 
				-		item = list_last_entry(&vmx->nested.vmcs02_pool,
			
 
				-				       struct vmcs02_list, list);
			
 
				-		item->vmptr = vmx->nested.current_vmptr;
			
 
				-		list_move(&item->list, &vmx->nested.vmcs02_pool);
			
 
				-		return &item->vmcs02;
			
 
				-	}
			
 
				-
			
 
				-	/* Create a new VMCS */
			
 
				-	item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
			
 
				-	if (!item)
			
 
				-		return NULL;
			
 
				-	item->vmcs02.vmcs = alloc_vmcs();
			
 
				-	item->vmcs02.shadow_vmcs = NULL;
			
 
				-	if (!item->vmcs02.vmcs) {
			
 
				-		kfree(item);
			
 
				-		return NULL;
			
 
				-	}
			
 
				-	loaded_vmcs_init(&item->vmcs02);
			
 
				-	item->vmptr = vmx->nested.current_vmptr;
			
 
				-	list_add(&(item->list), &(vmx->nested.vmcs02_pool));
			
 
				-	vmx->nested.vmcs02_num++;
			
 
				-	return &item->vmcs02;
			
 
				-}
			
 
				-
			
 
				-/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */
			
 
				-static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
			
 
				-{
			
 
				-	struct vmcs02_list *item;
			
 
				-	list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
			
 
				-		if (item->vmptr == vmptr) {
			
 
				-			free_loaded_vmcs(&item->vmcs02);
			
 
				-			list_del(&item->list);
			
 
				-			kfree(item);
			
 
				-			vmx->nested.vmcs02_num--;
			
 
				-			return;
			
 
				-		}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Free all VMCSs saved for this vcpu, except the one pointed by
			
 
				- * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
			
 
				- * must be &vmx->vmcs01.
			
 
				- */
			
 
				-static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
			
 
				-{
			
 
				-	struct vmcs02_list *item, *n;
			
 
				-
			
 
				-	WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
			
 
				-	list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
			
 
				-		/*
			
 
				-		 * Something will leak if the above WARN triggers.  Better than
			
 
				-		 * a use-after-free.
			
 
				-		 */
			
 
				-		if (vmx->loaded_vmcs == &item->vmcs02)
			
 
				-			continue;
			
 
				-
			
 
				-		free_loaded_vmcs(&item->vmcs02);
			
 
				-		list_del(&item->list);
			
 
				-		kfree(item);
			
 
				-		vmx->nested.vmcs02_num--;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
			
 
				  * set the success or error code of an emulated VMX instruction, as specified
			
@@ -7241,13 +7313,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
 
				 {
			
 
				 	struct vcpu_vmx *vmx = to_vmx(vcpu);
			
 
				 	struct vmcs *shadow_vmcs;
			
 
				+	int r;
			
 
				 
			
 
				-	if (cpu_has_vmx_msr_bitmap()) {
			
 
				-		vmx->nested.msr_bitmap =
			
 
				-				(unsigned long *)__get_free_page(GFP_KERNEL);
			
 
				-		if (!vmx->nested.msr_bitmap)
			
 
				-			goto out_msr_bitmap;
			
 
				-	}
			
 
				+	r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
			
 
				+	if (r < 0)
			
 
				+		goto out_vmcs02;
			
 
				 
			
 
				 	vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
			
 
				 	if (!vmx->nested.cached_vmcs12)
			
@@ -7264,9 +7334,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
 
				 		vmx->vmcs01.shadow_vmcs = shadow_vmcs;
			
 
				 	}
			
 
				 
			
 
				-	INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
			
 
				-	vmx->nested.vmcs02_num = 0;
			
 
				-
			
 
				 	hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
			
 
				 		     HRTIMER_MODE_REL_PINNED);
			
 
				 	vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
			
@@ -7278,9 +7345,9 @@ out_shadow_vmcs:
 
				 	kfree(vmx->nested.cached_vmcs12);
			
 
				 
			
 
				 out_cached_vmcs12:
			
 
				-	free_page((unsigned long)vmx->nested.msr_bitmap);
			
 
				+	free_loaded_vmcs(&vmx->nested.vmcs02);
			
 
				 
			
 
				-out_msr_bitmap:
			
 
				+out_vmcs02:
			
 
				 	return -ENOMEM;
			
 
				 }
			
 
				 
			
@@ -7423,10 +7490,6 @@ static void free_nested(struct vcpu_vmx *vmx)
 
				 	free_vpid(vmx->nested.vpid02);
			
 
				 	vmx->nested.posted_intr_nv = -1;
			
 
				 	vmx->nested.current_vmptr = -1ull;
			
 
				-	if (vmx->nested.msr_bitmap) {
			
 
				-		free_page((unsigned long)vmx->nested.msr_bitmap);
			
 
				-		vmx->nested.msr_bitmap = NULL;
			
 
				-	}
			
 
				 	if (enable_shadow_vmcs) {
			
 
				 		vmx_disable_shadow_vmcs(vmx);
			
 
				 		vmcs_clear(vmx->vmcs01.shadow_vmcs);
			
@@ -7434,7 +7497,7 @@ static void free_nested(struct vcpu_vmx *vmx)
 
				 		vmx->vmcs01.shadow_vmcs = NULL;
			
 
				 	}
			
 
				 	kfree(vmx->nested.cached_vmcs12);
			
 
				-	/* Unpin physical memory we referred to in current vmcs02 */
			
 
				+	/* Unpin physical memory we referred to in the vmcs02 */
			
 
				 	if (vmx->nested.apic_access_page) {
			
 
				 		kvm_release_page_dirty(vmx->nested.apic_access_page);
			
 
				 		vmx->nested.apic_access_page = NULL;
			
@@ -7450,7 +7513,7 @@ static void free_nested(struct vcpu_vmx *vmx)
 
				 		vmx->nested.pi_desc = NULL;
			
 
				 	}
			
 
				 
			
 
				-	nested_free_all_saved_vmcss(vmx);
			
 
				+	free_loaded_vmcs(&vmx->nested.vmcs02);
			
 
				 }
			
 
				 
			
 
				 /* Emulate the VMXOFF instruction */
			
@@ -7493,8 +7556,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
 
				 			vmptr + offsetof(struct vmcs12, launch_state),
			
 
				 			&zero, sizeof(zero));
			
 
				 
			
 
				-	nested_free_vmcs02(vmx, vmptr);
			
 
				-
			
 
				 	nested_vmx_succeed(vcpu);
			
 
				 	return kvm_skip_emulated_instruction(vcpu);
			
 
				 }
			
@@ -8406,10 +8467,11 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
 
				 
			
 
				 	/*
			
 
				 	 * The host physical addresses of some pages of guest memory
			
 
				-	 * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU
			
 
				-	 * may write to these pages via their host physical address while
			
 
				-	 * L2 is running, bypassing any address-translation-based dirty
			
 
				-	 * tracking (e.g. EPT write protection).
			
 
				+	 * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
			
 
				+	 * Page). The CPU may write to these pages via their host
			
 
				+	 * physical address while L2 is running, bypassing any
			
 
				+	 * address-translation-based dirty tracking (e.g. EPT write
			
 
				+	 * protection).
			
 
				 	 *
			
 
				 	 * Mark them dirty on every exit from L2 to prevent them from
			
 
				 	 * getting out of sync with dirty tracking.
			
@@ -8943,7 +9005,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 
				 	}
			
 
				 	vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
			
 
				 
			
 
				-	vmx_set_msr_bitmap(vcpu);
			
 
				+	vmx_update_msr_bitmap(vcpu);
			
 
				 }
			
 
				 
			
 
				 static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
			
@@ -9373,6 +9435,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
				 
			
 
				 	vmx_arm_hv_timer(vcpu);
			
 
				 
			
 
				+	/*
			
 
				+	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
			
 
				+	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
			
 
				+	 * is no need to worry about the conditional branch over the wrmsr
			
 
				+	 * being speculatively taken.
			
 
				+	 */
			
 
				+	if (vmx->spec_ctrl)
			
 
				+		wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
			
 
				+
			
 
				 	vmx->__launched = vmx->loaded_vmcs->launched;
			
 
				 	asm(
			
 
				 		/* Store host registers */
			
@@ -9491,6 +9562,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
				 #endif
			
 
				 	      );
			
 
				 
			
 
				+	/*
			
 
				+	 * We do not use IBRS in the kernel. If this vCPU has used the
			
 
				+	 * SPEC_CTRL MSR it may have left it on; save the value and
			
 
				+	 * turn it off. This is much more efficient than blindly adding
			
 
				+	 * it to the atomic save/restore list. Especially as the former
			
 
				+	 * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
			
 
				+	 *
			
 
				+	 * For non-nested case:
			
 
				+	 * If the L01 MSR bitmap does not intercept the MSR, then we need to
			
 
				+	 * save it.
			
 
				+	 *
			
 
				+	 * For nested case:
			
 
				+	 * If the L02 MSR bitmap does not intercept the MSR, then we need to
			
 
				+	 * save it.
			
 
				+	 */
			
 
				+	if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
			
 
				+		rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
			
 
				+
			
 
				+	if (vmx->spec_ctrl)
			
 
				+		wrmsrl(MSR_IA32_SPEC_CTRL, 0);
			
 
				+
			
 
				 	/* Eliminate branch target predictions from guest mode */
			
 
				 	vmexit_fill_RSB();
			
 
				 
			
@@ -9604,6 +9696,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
				 {
			
 
				 	int err;
			
 
				 	struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
			
 
				+	unsigned long *msr_bitmap;
			
 
				 	int cpu;
			
 
				 
			
 
				 	if (!vmx)
			
@@ -9636,13 +9729,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
				 	if (!vmx->guest_msrs)
			
 
				 		goto free_pml;
			
 
				 
			
 
				-	vmx->loaded_vmcs = &vmx->vmcs01;
			
 
				-	vmx->loaded_vmcs->vmcs = alloc_vmcs();
			
 
				-	vmx->loaded_vmcs->shadow_vmcs = NULL;
			
 
				-	if (!vmx->loaded_vmcs->vmcs)
			
 
				+	err = alloc_loaded_vmcs(&vmx->vmcs01);
			
 
				+	if (err < 0)
			
 
				 		goto free_msrs;
			
 
				-	loaded_vmcs_init(vmx->loaded_vmcs);
			
 
				 
			
 
				+	msr_bitmap = vmx->vmcs01.msr_bitmap;
			
 
				+	vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
			
 
				+	vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
			
 
				+	vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
			
 
				+	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
			
 
				+	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
			
 
				+	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
			
 
				+	vmx->msr_bitmap_mode = 0;
			
 
				+
			
 
				+	vmx->loaded_vmcs = &vmx->vmcs01;
			
 
				 	cpu = get_cpu();
			
 
				 	vmx_vcpu_load(&vmx->vcpu, cpu);
			
 
				 	vmx->vcpu.cpu = cpu;
			
@@ -10105,10 +10205,25 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
 
				 	int msr;
			
 
				 	struct page *page;
			
 
				 	unsigned long *msr_bitmap_l1;
			
 
				-	unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
			
 
				+	unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
			
 
				+	/*
			
 
				+	 * pred_cmd & spec_ctrl are trying to verify two things:
			
 
				+	 *
			
 
				+	 * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
			
 
				+	 *    ensures that we do not accidentally generate an L02 MSR bitmap
			
 
				+	 *    from the L12 MSR bitmap that is too permissive.
			
 
				+	 * 2. That L1 or L2s have actually used the MSR. This avoids
			
 
				+	 *    unnecessarily merging of the bitmap if the MSR is unused. This
			
 
				+	 *    works properly because we only update the L01 MSR bitmap lazily.
			
 
				+	 *    So even if L0 should pass L1 these MSRs, the L01 bitmap is only
			
 
				+	 *    updated to reflect this when L1 (or its L2s) actually write to
			
 
				+	 *    the MSR.
			
 
				+	 */
			
 
				+	bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
			
 
				+	bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
			
 
				 
			
 
				-	/* This shortcut is ok because we support only x2APIC MSRs so far. */
			
 
				-	if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
			
 
				+	if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
			
 
				+	    !pred_cmd && !spec_ctrl)
			
 
				 		return false;
			
 
				 
			
 
				 	page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
			
@@ -10141,6 +10256,19 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
 
				 				MSR_TYPE_W);
			
 
				 		}
			
 
				 	}
			
 
				+
			
 
				+	if (spec_ctrl)
			
 
				+		nested_vmx_disable_intercept_for_msr(
			
 
				+					msr_bitmap_l1, msr_bitmap_l0,
			
 
				+					MSR_IA32_SPEC_CTRL,
			
 
				+					MSR_TYPE_R | MSR_TYPE_W);
			
 
				+
			
 
				+	if (pred_cmd)
			
 
				+		nested_vmx_disable_intercept_for_msr(
			
 
				+					msr_bitmap_l1, msr_bitmap_l0,
			
 
				+					MSR_IA32_PRED_CMD,
			
 
				+					MSR_TYPE_W);
			
 
				+
			
 
				 	kunmap(page);
			
 
				 	kvm_release_page_clean(page);
			
 
				 
			
@@ -10682,6 +10810,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
				 	if (kvm_has_tsc_control)
			
 
				 		decache_tsc_multiplier(vmx);
			
 
				 
			
 
				+	if (cpu_has_vmx_msr_bitmap())
			
 
				+		vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
			
 
				+
			
 
				 	if (enable_vpid) {
			
 
				 		/*
			
 
				 		 * There is no direct mapping between vpid02 and vpid12, the
			
@@ -10903,20 +11034,15 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
 
				 {
			
 
				 	struct vcpu_vmx *vmx = to_vmx(vcpu);
			
 
				 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
			
 
				-	struct loaded_vmcs *vmcs02;
			
 
				 	u32 msr_entry_idx;
			
 
				 	u32 exit_qual;
			
 
				 
			
 
				-	vmcs02 = nested_get_current_vmcs02(vmx);
			
 
				-	if (!vmcs02)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				 	enter_guest_mode(vcpu);
			
 
				 
			
 
				 	if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
			
 
				 		vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
			
 
				 
			
 
				-	vmx_switch_vmcs(vcpu, vmcs02);
			
 
				+	vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
			
 
				 	vmx_segment_cache_clear(vmx);
			
 
				 
			
 
				 	if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) {
			
@@ -11485,7 +11611,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 
				 	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
			
 
				 
			
 
				 	if (cpu_has_vmx_msr_bitmap())
			
 
				-		vmx_set_msr_bitmap(vcpu);
			
 
				+		vmx_update_msr_bitmap(vcpu);
			
 
				 
			
 
				 	if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
			
 
				 				vmcs12->vm_exit_msr_load_count))
			
@@ -11534,10 +11660,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 
				 	vm_exit_controls_reset_shadow(vmx);
			
 
				 	vmx_segment_cache_clear(vmx);
			
 
				 
			
 
				-	/* if no vmcs02 cache requested, remove the one we used */
			
 
				-	if (VMCS02_POOL_SIZE == 0)
			
 
				-		nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
			
 
				-
			
 
				 	/* Update any VMCS fields that might have changed while L2 ran */
			
 
				 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
			
 
				 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
			
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1009,6 +1009,7 @@ static u32 msrs_to_save[] = {
 
				 #endif
			
 
				 	MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
			
 
				 	MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
			
 
				+	MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES
			
 
				 };
			
 
				 
			
 
				 static unsigned num_msrs_to_save;
			
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -40,6 +40,8 @@ ENTRY(__get_user_1)
 
				 	mov PER_CPU_VAR(current_task), %_ASM_DX
			
 
				 	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
			
 
				 	jae bad_get_user
			
 
				+	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
			
 
				+	and %_ASM_DX, %_ASM_AX
			
 
				 	ASM_STAC
			
 
				 1:	movzbl (%_ASM_AX),%edx
			
 
				 	xor %eax,%eax
			
@@ -54,6 +56,8 @@ ENTRY(__get_user_2)
 
				 	mov PER_CPU_VAR(current_task), %_ASM_DX
			
 
				 	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
			
 
				 	jae bad_get_user
			
 
				+	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
			
 
				+	and %_ASM_DX, %_ASM_AX
			
 
				 	ASM_STAC
			
 
				 2:	movzwl -1(%_ASM_AX),%edx
			
 
				 	xor %eax,%eax
			
@@ -68,6 +72,8 @@ ENTRY(__get_user_4)
 
				 	mov PER_CPU_VAR(current_task), %_ASM_DX
			
 
				 	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
			
 
				 	jae bad_get_user
			
 
				+	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
			
 
				+	and %_ASM_DX, %_ASM_AX
			
 
				 	ASM_STAC
			
 
				 3:	movl -3(%_ASM_AX),%edx
			
 
				 	xor %eax,%eax
			
@@ -83,6 +89,8 @@ ENTRY(__get_user_8)
 
				 	mov PER_CPU_VAR(current_task), %_ASM_DX
			
 
				 	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
			
 
				 	jae bad_get_user
			
 
				+	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
			
 
				+	and %_ASM_DX, %_ASM_AX
			
 
				 	ASM_STAC
			
 
				 4:	movq -7(%_ASM_AX),%rdx
			
 
				 	xor %eax,%eax
			
@@ -94,6 +102,8 @@ ENTRY(__get_user_8)
 
				 	mov PER_CPU_VAR(current_task), %_ASM_DX
			
 
				 	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
			
 
				 	jae bad_get_user_8
			
 
				+	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
			
 
				+	and %_ASM_DX, %_ASM_AX
			
 
				 	ASM_STAC
			
 
				 4:	movl -7(%_ASM_AX),%edx
			
 
				 5:	movl -3(%_ASM_AX),%ecx
			
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -331,12 +331,12 @@ do {									\
 
				 
			
 
				 unsigned long __copy_user_ll(void *to, const void *from, unsigned long n)
			
 
				 {
			
 
				-	stac();
			
 
				+	__uaccess_begin_nospec();
			
 
				 	if (movsl_is_ok(to, from, n))
			
 
				 		__copy_user(to, from, n);
			
 
				 	else
			
 
				 		n = __copy_user_intel(to, from, n);
			
 
				-	clac();
			
 
				+	__uaccess_end();
			
 
				 	return n;
			
 
				 }
			
 
				 EXPORT_SYMBOL(__copy_user_ll);
			
@@ -344,7 +344,7 @@ EXPORT_SYMBOL(__copy_user_ll);
 
				 unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
			
 
				 					unsigned long n)
			
 
				 {
			
 
				-	stac();
			
 
				+	__uaccess_begin_nospec();
			
 
				 #ifdef CONFIG_X86_INTEL_USERCOPY
			
 
				 	if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
			
 
				 		n = __copy_user_intel_nocache(to, from, n);
			
@@ -353,7 +353,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
 
				 #else
			
 
				 	__copy_user(to, from, n);
			
 
				 #endif
			
 
				-	clac();
			
 
				+	__uaccess_end();
			
 
				 	return n;
			
 
				 }
			
 
				 EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
			
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -6,13 +6,14 @@
 
				 #include <linux/interrupt.h>
			
 
				 #include <linux/export.h>
			
 
				 #include <linux/cpu.h>
			
 
				+#include <linux/debugfs.h>
			
 
				 
			
 
				 #include <asm/tlbflush.h>
			
 
				 #include <asm/mmu_context.h>
			
 
				+#include <asm/nospec-branch.h>
			
 
				 #include <asm/cache.h>
			
 
				 #include <asm/apic.h>
			
 
				 #include <asm/uv/uv.h>
			
 
				-#include <linux/debugfs.h>
			
 
				 
			
 
				 /*
			
 
				  *	TLB flushing, formerly SMP-only
			
@@ -247,6 +248,27 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
				 	} else {
			
 
				 		u16 new_asid;
			
 
				 		bool need_flush;
			
 
				+		u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
			
 
				+
			
 
				+		/*
			
 
				+		 * Avoid user/user BTB poisoning by flushing the branch
			
 
				+		 * predictor when switching between processes. This stops
			
 
				+		 * one process from doing Spectre-v2 attacks on another.
			
 
				+		 *
			
 
				+		 * As an optimization, flush indirect branches only when
			
 
				+		 * switching into processes that disable dumping. This
			
 
				+		 * protects high value processes like gpg, without having
			
 
				+		 * too high performance overhead. IBPB is *expensive*!
			
 
				+		 *
			
 
				+		 * This will not flush branches when switching into kernel
			
 
				+		 * threads. It will also not flush if we switch to idle
			
 
				+		 * thread and back to the same process. It will flush if we
			
 
				+		 * switch to a different non-dumpable process.
			
 
				+		 */
			
 
				+		if (tsk && tsk->mm &&
			
 
				+		    tsk->mm->context.ctx_id != last_ctx_id &&
			
 
				+		    get_dumpable(tsk->mm) != SUID_DUMP_USER)
			
 
				+			indirect_branch_prediction_barrier();
			
 
				 
			
 
				 		if (IS_ENABLED(CONFIG_VMAP_STACK)) {
			
 
				 			/*
			
@@ -292,6 +314,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
				 			trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
			
 
				 		}
			
 
				 
			
 
				+		/*
			
 
				+		 * Record last user mm's context id, so we can avoid
			
 
				+		 * flushing branch buffer with IBPB if we switch back
			
 
				+		 * to the same user.
			
 
				+		 */
			
 
				+		if (next != &init_mm)
			
 
				+			this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
			
 
				+
			
 
				 		this_cpu_write(cpu_tlbstate.loaded_mm, next);
			
 
				 		this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
			
 
				 	}
			
@@ -369,6 +399,7 @@ void initialize_tlbstate_and_flush(void)
 
				 	write_cr3(build_cr3(mm->pgd, 0));
			
 
				 
			
 
				 	/* Reinitialize tlbstate. */
			
 
				+	this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id);
			
 
				 	this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
			
 
				 	this_cpu_write(cpu_tlbstate.next_asid, 1);
			
 
				 	this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
			
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -10,6 +10,7 @@
 
				 #include <linux/compiler.h>
			
 
				 #include <linux/spinlock.h>
			
 
				 #include <linux/rcupdate.h>
			
 
				+#include <linux/nospec.h>
			
 
				 #include <linux/types.h>
			
 
				 #include <linux/init.h>
			
 
				 #include <linux/fs.h>
			
@@ -82,8 +83,10 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i
 
				 {
			
 
				 	struct fdtable *fdt = rcu_dereference_raw(files->fdt);
			
 
				 
			
 
				-	if (fd < fdt->max_fds)
			
 
				+	if (fd < fdt->max_fds) {
			
 
				+		fd = array_index_nospec(fd, fdt->max_fds);
			
 
				 		return rcu_dereference_raw(fdt->fd[fd]);
			
 
				+	}
			
 
				 	return NULL;
			
 
				 }
			
 
				 
			
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -5,6 +5,13 @@
 
				 #include <linux/compiler.h>
			
 
				 #include <linux/types.h>
			
 
				 
			
 
				+/* Built-in __init functions needn't be compiled with retpoline */
			
 
				+#if defined(RETPOLINE) && !defined(MODULE)
			
 
				+#define __noretpoline __attribute__((indirect_branch("keep")))
			
 
				+#else
			
 
				+#define __noretpoline
			
 
				+#endif
			
 
				+
			
 
				 /* These macros are used to mark some functions or 
			
 
				  * initialized data (doesn't apply to uninitialized data)
			
 
				  * as `initialization' functions. The kernel can take this
			
@@ -40,7 +47,7 @@
 
				 
			
 
				 /* These are for everybody (although not all archs will actually
			
 
				    discard it in modules) */
			
 
				-#define __init		__section(.init.text) __cold  __latent_entropy
			
 
				+#define __init		__section(.init.text) __cold  __latent_entropy __noretpoline
			
 
				 #define __initdata	__section(.init.data)
			
 
				 #define __initconst	__section(.init.rodata)
			
 
				 #define __exitdata	__section(.exit.data)
			
--- a/include/linux/nospec.h
+++ b/include/linux/nospec.h
@@ -0,0 +1,72 @@
 
				+// SPDX-License-Identifier: GPL-2.0
			
 
				+// Copyright(c) 2018 Linus Torvalds. All rights reserved.
			
 
				+// Copyright(c) 2018 Alexei Starovoitov. All rights reserved.
			
 
				+// Copyright(c) 2018 Intel Corporation. All rights reserved.
			
 
				+
			
 
				+#ifndef _LINUX_NOSPEC_H
			
 
				+#define _LINUX_NOSPEC_H
			
 
				+
			
 
				+/**
			
 
				+ * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
			
 
				+ * @index: array element index
			
 
				+ * @size: number of elements in array
			
 
				+ *
			
 
				+ * When @index is out of bounds (@index >= @size), the sign bit will be
			
 
				+ * set.  Extend the sign bit to all bits and invert, giving a result of
			
 
				+ * zero for an out of bounds index, or ~0 if within bounds [0, @size).
			
 
				+ */
			
 
				+#ifndef array_index_mask_nospec
			
 
				+static inline unsigned long array_index_mask_nospec(unsigned long index,
			
 
				+						    unsigned long size)
			
 
				+{
			
 
				+	/*
			
 
				+	 * Warn developers about inappropriate array_index_nospec() usage.
			
 
				+	 *
			
 
				+	 * Even if the CPU speculates past the WARN_ONCE branch, the
			
 
				+	 * sign bit of @index is taken into account when generating the
			
 
				+	 * mask.
			
 
				+	 *
			
 
				+	 * This warning is compiled out when the compiler can infer that
			
 
				+	 * @index and @size are less than LONG_MAX.
			
 
				+	 */
			
 
				+	if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX,
			
 
				+			"array_index_nospec() limited to range of [0, LONG_MAX]\n"))
			
 
				+		return 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * Always calculate and emit the mask even if the compiler
			
 
				+	 * thinks the mask is not needed. The compiler does not take
			
 
				+	 * into account the value of @index under speculation.
			
 
				+	 */
			
 
				+	OPTIMIZER_HIDE_VAR(index);
			
 
				+	return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * array_index_nospec - sanitize an array index after a bounds check
			
 
				+ *
			
 
				+ * For a code sequence like:
			
 
				+ *
			
 
				+ *     if (index < size) {
			
 
				+ *         index = array_index_nospec(index, size);
			
 
				+ *         val = array[index];
			
 
				+ *     }
			
 
				+ *
			
 
				+ * ...if the CPU speculates past the bounds check then
			
 
				+ * array_index_nospec() will clamp the index within the range of [0,
			
 
				+ * size).
			
 
				+ */
			
 
				+#define array_index_nospec(index, size)					\
			
 
				+({									\
			
 
				+	typeof(index) _i = (index);					\
			
 
				+	typeof(size) _s = (size);					\
			
 
				+	unsigned long _mask = array_index_mask_nospec(_i, _s);		\
			
 
				+									\
			
 
				+	BUILD_BUG_ON(sizeof(_i) > sizeof(long));			\
			
 
				+	BUILD_BUG_ON(sizeof(_s) > sizeof(long));			\
			
 
				+									\
			
 
				+	_i &= _mask;							\
			
 
				+	_i;								\
			
 
				+})
			
 
				+#endif /* _LINUX_NOSPEC_H */
			
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -16,6 +16,7 @@
 
				 #include <linux/nl80211.h>
			
 
				 #include <linux/rtnetlink.h>
			
 
				 #include <linux/netlink.h>
			
 
				+#include <linux/nospec.h>
			
 
				 #include <linux/etherdevice.h>
			
 
				 #include <net/net_namespace.h>
			
 
				 #include <net/genetlink.h>
			
@@ -2080,20 +2081,22 @@ static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = {
 
				 static int parse_txq_params(struct nlattr *tb[],
			
 
				 			    struct ieee80211_txq_params *txq_params)
			
 
				 {
			
 
				+	u8 ac;
			
 
				+
			
 
				 	if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] ||
			
 
				 	    !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] ||
			
 
				 	    !tb[NL80211_TXQ_ATTR_AIFS])
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	txq_params->ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]);
			
 
				+	ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]);
			
 
				 	txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]);
			
 
				 	txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]);
			
 
				 	txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]);
			
 
				 	txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]);
			
 
				 
			
 
				-	if (txq_params->ac >= NL80211_NUM_ACS)
			
 
				+	if (ac >= NL80211_NUM_ACS)
			
 
				 		return -EINVAL;
			
 
				-
			
 
				+	txq_params->ac = array_index_nospec(ac, NL80211_NUM_ACS);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -544,18 +544,14 @@ static int add_call_destinations(struct objtool_file *file)
 
				 			dest_off = insn->offset + insn->len + insn->immediate;
			
 
				 			insn->call_dest = find_symbol_by_offset(insn->sec,
			
 
				 								dest_off);
			
 
				-			/*
			
 
				-			 * FIXME: Thanks to retpolines, it's now considered
			
 
				-			 * normal for a function to call within itself.  So
			
 
				-			 * disable this warning for now.
			
 
				-			 */
			
 
				-#if 0
			
 
				-			if (!insn->call_dest) {
			
 
				-				WARN_FUNC("can't find call dest symbol at offset 0x%lx",
			
 
				-					  insn->sec, insn->offset, dest_off);
			
 
				+
			
 
				+			if (!insn->call_dest && !insn->ignore) {
			
 
				+				WARN_FUNC("unsupported intra-function call",
			
 
				+					  insn->sec, insn->offset);
			
 
				+				WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
			
 
				 				return -1;
			
 
				 			}
			
 
				-#endif
			
 
				+
			
 
				 		} else if (rela->sym->type == STT_SECTION) {
			
 
				 			insn->call_dest = find_symbol_by_offset(rela->sym->sec,
			
 
				 								rela->addend+4);
			
@@ -599,7 +595,7 @@ static int handle_group_alt(struct objtool_file *file,
 
				 			    struct instruction *orig_insn,
			
 
				 			    struct instruction **new_insn)
			
 
				 {
			
 
				-	struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump;
			
 
				+	struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
			
 
				 	unsigned long dest_off;
			
 
				 
			
 
				 	last_orig_insn = NULL;
			
@@ -615,28 +611,30 @@ static int handle_group_alt(struct objtool_file *file,
 
				 		last_orig_insn = insn;
			
 
				 	}
			
 
				 
			
 
				-	if (!next_insn_same_sec(file, last_orig_insn)) {
			
 
				-		WARN("%s: don't know how to handle alternatives at end of section",
			
 
				-		     special_alt->orig_sec->name);
			
 
				-		return -1;
			
 
				-	}
			
 
				-
			
 
				-	fake_jump = malloc(sizeof(*fake_jump));
			
 
				-	if (!fake_jump) {
			
 
				-		WARN("malloc failed");
			
 
				-		return -1;
			
 
				+	if (next_insn_same_sec(file, last_orig_insn)) {
			
 
				+		fake_jump = malloc(sizeof(*fake_jump));
			
 
				+		if (!fake_jump) {
			
 
				+			WARN("malloc failed");
			
 
				+			return -1;
			
 
				+		}
			
 
				+		memset(fake_jump, 0, sizeof(*fake_jump));
			
 
				+		INIT_LIST_HEAD(&fake_jump->alts);
			
 
				+		clear_insn_state(&fake_jump->state);
			
 
				+
			
 
				+		fake_jump->sec = special_alt->new_sec;
			
 
				+		fake_jump->offset = -1;
			
 
				+		fake_jump->type = INSN_JUMP_UNCONDITIONAL;
			
 
				+		fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
			
 
				+		fake_jump->ignore = true;
			
 
				 	}
			
 
				-	memset(fake_jump, 0, sizeof(*fake_jump));
			
 
				-	INIT_LIST_HEAD(&fake_jump->alts);
			
 
				-	clear_insn_state(&fake_jump->state);
			
 
				-
			
 
				-	fake_jump->sec = special_alt->new_sec;
			
 
				-	fake_jump->offset = -1;
			
 
				-	fake_jump->type = INSN_JUMP_UNCONDITIONAL;
			
 
				-	fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
			
 
				-	fake_jump->ignore = true;
			
 
				 
			
 
				 	if (!special_alt->new_len) {
			
 
				+		if (!fake_jump) {
			
 
				+			WARN("%s: empty alternative at end of section",
			
 
				+			     special_alt->orig_sec->name);
			
 
				+			return -1;
			
 
				+		}
			
 
				+
			
 
				 		*new_insn = fake_jump;
			
 
				 		return 0;
			
 
				 	}
			
@@ -649,6 +647,8 @@ static int handle_group_alt(struct objtool_file *file,
 
				 
			
 
				 		last_new_insn = insn;
			
 
				 
			
 
				+		insn->ignore = orig_insn->ignore_alts;
			
 
				+
			
 
				 		if (insn->type != INSN_JUMP_CONDITIONAL &&
			
 
				 		    insn->type != INSN_JUMP_UNCONDITIONAL)
			
 
				 			continue;
			
@@ -657,8 +657,14 @@ static int handle_group_alt(struct objtool_file *file,
 
				 			continue;
			
 
				 
			
 
				 		dest_off = insn->offset + insn->len + insn->immediate;
			
 
				-		if (dest_off == special_alt->new_off + special_alt->new_len)
			
 
				+		if (dest_off == special_alt->new_off + special_alt->new_len) {
			
 
				+			if (!fake_jump) {
			
 
				+				WARN("%s: alternative jump to end of section",
			
 
				+				     special_alt->orig_sec->name);
			
 
				+				return -1;
			
 
				+			}
			
 
				 			insn->jump_dest = fake_jump;
			
 
				+		}
			
 
				 
			
 
				 		if (!insn->jump_dest) {
			
 
				 			WARN_FUNC("can't find alternative jump destination",
			
@@ -673,7 +679,8 @@ static int handle_group_alt(struct objtool_file *file,
 
				 		return -1;
			
 
				 	}
			
 
				 
			
 
				-	list_add(&fake_jump->list, &last_new_insn->list);
			
 
				+	if (fake_jump)
			
 
				+		list_add(&fake_jump->list, &last_new_insn->list);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -730,10 +737,6 @@ static int add_special_section_alts(struct objtool_file *file)
 
				 			goto out;
			
 
				 		}
			
 
				 
			
 
				-		/* Ignore retpoline alternatives. */
			
 
				-		if (orig_insn->ignore_alts)
			
 
				-			continue;
			
 
				-
			
 
				 		new_insn = NULL;
			
 
				 		if (!special_alt->group || special_alt->new_len) {
			
 
				 			new_insn = find_insn(file, special_alt->new_sec,
			
@@ -1090,11 +1093,11 @@ static int decode_sections(struct objtool_file *file)
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				-	ret = add_call_destinations(file);
			
 
				+	ret = add_special_section_alts(file);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				-	ret = add_special_section_alts(file);
			
 
				+	ret = add_call_destinations(file);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
@@ -1721,10 +1724,12 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 
				 
			
 
				 		insn->visited = true;
			
 
				 
			
 
				-		list_for_each_entry(alt, &insn->alts, list) {
			
 
				-			ret = validate_branch(file, alt->insn, state);
			
 
				-			if (ret)
			
 
				-				return 1;
			
 
				+		if (!insn->ignore_alts) {
			
 
				+			list_for_each_entry(alt, &insn->alts, list) {
			
 
				+				ret = validate_branch(file, alt->insn, state);
			
 
				+				if (ret)
			
 
				+					return 1;
			
 
				+			}
			
 
				 		}
			
 
				 
			
 
				 		switch (insn->type) {
			
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -98,6 +98,11 @@ static int create_orc_entry(struct section *u_sec, struct section *ip_relasec,
 
				 	struct orc_entry *orc;
			
 
				 	struct rela *rela;
			
 
				 
			
 
				+	if (!insn_sec->sym) {
			
 
				+		WARN("missing symbol for section %s", insn_sec->name);
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				 	/* populate ORC data */
			
 
				 	orc = (struct orc_entry *)u_sec->data->d_buf + idx;
			
 
				 	memcpy(orc, o, sizeof(*orc));