7 years ago · fba961ab29
--- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
+++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
@@ -13,7 +13,6 @@ Required properties:
 
				                  at25df321a
			
 
				                  at25df641
			
 
				                  at26df081a
			
 
				-                 en25s64
			
 
				                  mr25h128
			
 
				                  mr25h256
			
 
				                  mr25h10
			
@@ -33,7 +32,6 @@ Required properties:
 
				                  s25fl008k
			
 
				                  s25fl064k
			
 
				                  sst25vf040b
			
 
				-                 sst25wf040b
			
 
				                  m25p40
			
 
				                  m25p80
			
 
				                  m25p16
			
--- a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
+++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
@@ -12,24 +12,30 @@ Required properties:
 
				   - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc
			
 
				 - reg : Offset and length of the register set for the device
			
 
				 - interrupts : Should contain CSPI/eCSPI interrupt
			
 
				-- cs-gpios : Specifies the gpio pins to be used for chipselects.
			
 
				 - clocks : Clock specifiers for both ipg and per clocks.
			
 
				 - clock-names : Clock names should include both "ipg" and "per"
			
 
				 See the clock consumer binding,
			
 
				 	Documentation/devicetree/bindings/clock/clock-bindings.txt
			
 
				-- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
			
 
				-		Documentation/devicetree/bindings/dma/dma.txt
			
 
				-- dma-names: DMA request names should include "tx" and "rx" if present.
			
 
				 
			
 
				-Obsolete properties:
			
 
				-- fsl,spi-num-chipselects : Contains the number of the chipselect
			
 
				+Recommended properties:
			
 
				+- cs-gpios : GPIOs to use as chip selects, see spi-bus.txt.  While the native chip
			
 
				+select lines can be used, they appear to always generate a pulse between each
			
 
				+word of a transfer.  Most use cases will require GPIO based chip selects to
			
 
				+generate a valid transaction.
			
 
				 
			
 
				 Optional properties:
			
 
				+- num-cs :  Number of total chip selects, see spi-bus.txt.
			
 
				+- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
			
 
				+Documentation/devicetree/bindings/dma/dma.txt.
			
 
				+- dma-names: DMA request names, if present, should include "tx" and "rx".
			
 
				 - fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register
			
 
				 controlling the SPI_READY handling. Note that to enable the DRCTL consideration,
			
 
				 the SPI_READY mode-flag needs to be set too.
			
 
				 Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst).
			
 
				 
			
 
				+Obsolete properties:
			
 
				+- fsl,spi-num-chipselects : Contains the number of the chipselect
			
 
				+
			
 
				 Example:
			
 
				 
			
 
				 ecspi@70010000 {
			
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 
				 VERSION = 4
			
 
				 PATCHLEVEL = 15
			
 
				 SUBLEVEL = 0
			
 
				-EXTRAVERSION = -rc3
			
 
				+EXTRAVERSION = -rc4
			
 
				 NAME = Fearless Coyote
			
 
				 
			
 
				 # *DOCUMENTATION*
			
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -85,7 +85,11 @@
 
				 		.pushsection .text.fixup,"ax"
			
 
				 		.align	4
			
 
				 9001:		mov	r4, #-EFAULT
			
 
				+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
			
 
				+		ldr	r5, [sp, #9*4]		@ *err_ptr
			
 
				+#else
			
 
				 		ldr	r5, [sp, #8*4]		@ *err_ptr
			
 
				+#endif
			
 
				 		str	r4, [r5]
			
 
				 		ldmia	sp, {r1, r2}		@ retrieve dst, len
			
 
				 		add	r2, r2, r1
			
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
 
				 {
			
 
				 	u64 reg;
			
 
				 
			
 
				+	/* Clear pmscr in case of early return */
			
 
				+	*pmscr_el1 = 0;
			
 
				+
			
 
				 	/* SPE present on this CPU? */
			
 
				 	if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
			
 
				 						  ID_AA64DFR0_PMSVER_SHIFT))
			
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -123,8 +123,8 @@ int puts(const char *s)
 
				 	while ((nuline = strchr(s, '\n')) != NULL) {
			
 
				 		if (nuline != s)
			
 
				 			pdc_iodc_print(s, nuline - s);
			
 
				-			pdc_iodc_print("\r\n", 2);
			
 
				-			s = nuline + 1;
			
 
				+		pdc_iodc_print("\r\n", 2);
			
 
				+		s = nuline + 1;
			
 
				 	}
			
 
				 	if (*s != '\0')
			
 
				 		pdc_iodc_print(s, strlen(s));
			
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -35,7 +35,12 @@ struct thread_info {
 
				 
			
 
				 /* thread information allocation */
			
 
				 
			
 
				+#ifdef CONFIG_IRQSTACKS
			
 
				+#define THREAD_SIZE_ORDER	2 /* PA-RISC requires at least 16k stack */
			
 
				+#else
			
 
				 #define THREAD_SIZE_ORDER	3 /* PA-RISC requires at least 32k stack */
			
 
				+#endif
			
 
				+
			
 
				 /* Be sure to hunt all references to this down when you change the size of
			
 
				  * the kernel stack */
			
 
				 #define THREAD_SIZE             (PAGE_SIZE << THREAD_SIZE_ORDER)
			
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -878,9 +878,6 @@ ENTRY_CFI(syscall_exit_rfi)
 
				 	STREG   %r19,PT_SR7(%r16)
			
 
				 
			
 
				 intr_return:
			
 
				-	/* NOTE: Need to enable interrupts incase we schedule. */
			
 
				-	ssm     PSW_SM_I, %r0
			
 
				-
			
 
				 	/* check for reschedule */
			
 
				 	mfctl   %cr30,%r1
			
 
				 	LDREG   TI_FLAGS(%r1),%r19	/* sched.h: TIF_NEED_RESCHED */
			
@@ -907,6 +904,11 @@ intr_check_sig:
 
				 	LDREG	PT_IASQ1(%r16), %r20
			
 
				 	cmpib,COND(=),n 0,%r20,intr_restore /* backward */
			
 
				 
			
 
				+	/* NOTE: We need to enable interrupts if we have to deliver
			
 
				+	 * signals. We used to do this earlier but it caused kernel
			
 
				+	 * stack overflows. */
			
 
				+	ssm     PSW_SM_I, %r0
			
 
				+
			
 
				 	copy	%r0, %r25			/* long in_syscall = 0 */
			
 
				 #ifdef CONFIG_64BIT
			
 
				 	ldo	-16(%r30),%r29			/* Reference param save area */
			
@@ -958,6 +960,10 @@ intr_do_resched:
 
				 	cmpib,COND(=)	0, %r20, intr_do_preempt
			
 
				 	nop
			
 
				 
			
 
				+	/* NOTE: We need to enable interrupts if we schedule.  We used
			
 
				+	 * to do this earlier but it caused kernel stack overflows. */
			
 
				+	ssm     PSW_SM_I, %r0
			
 
				+
			
 
				 #ifdef CONFIG_64BIT
			
 
				 	ldo	-16(%r30),%r29		/* Reference param save area */
			
 
				 #endif
			
--- a/arch/parisc/kernel/hpmc.S
+++ b/arch/parisc/kernel/hpmc.S
@@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc)
 
				 
			
 
				 
			
 
				 	__INITRODATA
			
 
				+	.align 4
			
 
				 	.export os_hpmc_size
			
 
				 os_hpmc_size:
			
 
				 	.word .os_hpmc_end-.os_hpmc
			
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -15,7 +15,6 @@
 
				 #include <linux/slab.h>
			
 
				 #include <linux/kallsyms.h>
			
 
				 #include <linux/sort.h>
			
 
				-#include <linux/sched.h>
			
 
				 
			
 
				 #include <linux/uaccess.h>
			
 
				 #include <asm/assembly.h>
			
--- a/arch/parisc/lib/delay.c
+++ b/arch/parisc/lib/delay.c
@@ -16,9 +16,7 @@
 
				 #include <linux/preempt.h>
			
 
				 #include <linux/init.h>
			
 
				 
			
 
				-#include <asm/processor.h>
			
 
				 #include <asm/delay.h>
			
 
				-
			
 
				 #include <asm/special_insns.h>    /* for mfctl() */
			
 
				 #include <asm/processor.h> /* for boot_cpu_data */
			
 
				 
			
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -763,7 +763,8 @@ emit_clear:
 
				 			func = (u8 *) __bpf_call_base + imm;
			
 
				 
			
 
				 			/* Save skb pointer if we need to re-cache skb data */
			
 
				-			if (bpf_helper_changes_pkt_data(func))
			
 
				+			if ((ctx->seen & SEEN_SKB) &&
			
 
				+			    bpf_helper_changes_pkt_data(func))
			
 
				 				PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
			
 
				 
			
 
				 			bpf_jit_emit_func_call(image, ctx, (u64)func);
			
@@ -772,7 +773,8 @@ emit_clear:
 
				 			PPC_MR(b2p[BPF_REG_0], 3);
			
 
				 
			
 
				 			/* refresh skb cache */
			
 
				-			if (bpf_helper_changes_pkt_data(func)) {
			
 
				+			if ((ctx->seen & SEEN_SKB) &&
			
 
				+			    bpf_helper_changes_pkt_data(func)) {
			
 
				 				/* reload skb pointer to r3 */
			
 
				 				PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
			
 
				 				bpf_jit_emit_skb_loads(image, ctx);
			
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -55,8 +55,7 @@ struct bpf_jit {
 
				 #define SEEN_LITERAL	8	/* code uses literals */
			
 
				 #define SEEN_FUNC	16	/* calls C functions */
			
 
				 #define SEEN_TAIL_CALL	32	/* code uses tail calls */
			
 
				-#define SEEN_SKB_CHANGE	64	/* code changes skb data */
			
 
				-#define SEEN_REG_AX	128	/* code uses constant blinding */
			
 
				+#define SEEN_REG_AX	64	/* code uses constant blinding */
			
 
				 #define SEEN_STACK	(SEEN_FUNC | SEEN_MEM | SEEN_SKB)
			
 
				 
			
 
				 /*
			
@@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
 
				 			EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
			
 
				 				      REG_15, 152);
			
 
				 	}
			
 
				-	if (jit->seen & SEEN_SKB)
			
 
				+	if (jit->seen & SEEN_SKB) {
			
 
				 		emit_load_skb_data_hlen(jit);
			
 
				-	if (jit->seen & SEEN_SKB_CHANGE)
			
 
				 		/* stg %b1,ST_OFF_SKBP(%r0,%r15) */
			
 
				 		EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
			
 
				 			      STK_OFF_SKBP);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
 
				 		EMIT2(0x0d00, REG_14, REG_W1);
			
 
				 		/* lgr %b0,%r2: load return value into %b0 */
			
 
				 		EMIT4(0xb9040000, BPF_REG_0, REG_2);
			
 
				-		if (bpf_helper_changes_pkt_data((void *)func)) {
			
 
				-			jit->seen |= SEEN_SKB_CHANGE;
			
 
				+		if ((jit->seen & SEEN_SKB) &&
			
 
				+		    bpf_helper_changes_pkt_data((void *)func)) {
			
 
				 			/* lg %b1,ST_OFF_SKBP(%r15) */
			
 
				 			EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
			
 
				 				      REG_15, STK_OFF_SKBP);
			
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
 
				 	if (!printk_ratelimit())
			
 
				 		return;
			
 
				 
			
 
				-	printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
			
 
				+	printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
			
 
				 	       task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
			
 
				 	       tsk->comm, task_pid_nr(tsk), address,
			
 
				 	       (void *)regs->pc, (void *)regs->u_regs[UREG_I7],
			
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
 
				 	if (!printk_ratelimit())
			
 
				 		return;
			
 
				 
			
 
				-	printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
			
 
				+	printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
			
 
				 	       task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
			
 
				 	       tsk->comm, task_pid_nr(tsk), address,
			
 
				 	       (void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
			
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 
				 		u8 *func = ((u8 *)__bpf_call_base) + imm;
			
 
				 
			
 
				 		ctx->saw_call = true;
			
 
				+		if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
			
 
				+			emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
			
 
				 
			
 
				 		emit_call((u32 *)func, ctx);
			
 
				 		emit_nop(ctx);
			
 
				 
			
 
				 		emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
			
 
				 
			
 
				-		if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind)
			
 
				-			load_skb_regs(ctx, bpf2sparc[BPF_REG_6]);
			
 
				+		if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
			
 
				+			load_skb_regs(ctx, L7);
			
 
				 		break;
			
 
				 	}
			
 
				 
			
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs)
 
				 	if (!printk_ratelimit())
			
 
				 		return;
			
 
				 
			
 
				-	printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x",
			
 
				+	printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x",
			
 
				 		task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
			
 
				 		tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
			
 
				 		(void *)UPT_IP(regs), (void *)UPT_SP(regs),
			
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -941,7 +941,8 @@ ENTRY(debug)
 
				 	movl	%esp, %eax			# pt_regs pointer
			
 
				 
			
 
				 	/* Are we currently on the SYSENTER stack? */
			
 
				-	PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
			
 
				+	movl	PER_CPU_VAR(cpu_entry_area), %ecx
			
 
				+	addl	$CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
			
 
				 	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
			
 
				 	cmpl	$SIZEOF_SYSENTER_stack, %ecx
			
 
				 	jb	.Ldebug_from_sysenter_stack
			
@@ -984,7 +985,8 @@ ENTRY(nmi)
 
				 	movl	%esp, %eax			# pt_regs pointer
			
 
				 
			
 
				 	/* Are we currently on the SYSENTER stack? */
			
 
				-	PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
			
 
				+	movl	PER_CPU_VAR(cpu_entry_area), %ecx
			
 
				+	addl	$CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
			
 
				 	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
			
 
				 	cmpl	$SIZEOF_SYSENTER_stack, %ecx
			
 
				 	jb	.Lnmi_from_sysenter_stack
			
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -140,6 +140,64 @@ END(native_usergs_sysret64)
 
				  * with them due to bugs in both AMD and Intel CPUs.
			
 
				  */
			
 
				 
			
 
				+	.pushsection .entry_trampoline, "ax"
			
 
				+
			
 
				+/*
			
 
				+ * The code in here gets remapped into cpu_entry_area's trampoline.  This means
			
 
				+ * that the assembler and linker have the wrong idea as to where this code
			
 
				+ * lives (and, in fact, it's mapped more than once, so it's not even at a
			
 
				+ * fixed address).  So we can't reference any symbols outside the entry
			
 
				+ * trampoline and expect it to work.
			
 
				+ *
			
 
				+ * Instead, we carefully abuse %rip-relative addressing.
			
 
				+ * _entry_trampoline(%rip) refers to the start of the remapped) entry
			
 
				+ * trampoline.  We can thus find cpu_entry_area with this macro:
			
 
				+ */
			
 
				+
			
 
				+#define CPU_ENTRY_AREA \
			
 
				+	_entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
			
 
				+
			
 
				+/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
			
 
				+#define RSP_SCRATCH	CPU_ENTRY_AREA_SYSENTER_stack + \
			
 
				+			SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
			
 
				+
			
 
				+ENTRY(entry_SYSCALL_64_trampoline)
			
 
				+	UNWIND_HINT_EMPTY
			
 
				+	swapgs
			
 
				+
			
 
				+	/* Stash the user RSP. */
			
 
				+	movq	%rsp, RSP_SCRATCH
			
 
				+
			
 
				+	/* Load the top of the task stack into RSP */
			
 
				+	movq	CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
			
 
				+
			
 
				+	/* Start building the simulated IRET frame. */
			
 
				+	pushq	$__USER_DS			/* pt_regs->ss */
			
 
				+	pushq	RSP_SCRATCH			/* pt_regs->sp */
			
 
				+	pushq	%r11				/* pt_regs->flags */
			
 
				+	pushq	$__USER_CS			/* pt_regs->cs */
			
 
				+	pushq	%rcx				/* pt_regs->ip */
			
 
				+
			
 
				+	/*
			
 
				+	 * x86 lacks a near absolute jump, and we can't jump to the real
			
 
				+	 * entry text with a relative jump.  We could push the target
			
 
				+	 * address and then use retq, but this destroys the pipeline on
			
 
				+	 * many CPUs (wasting over 20 cycles on Sandy Bridge).  Instead,
			
 
				+	 * spill RDI and restore it in a second-stage trampoline.
			
 
				+	 */
			
 
				+	pushq	%rdi
			
 
				+	movq	$entry_SYSCALL_64_stage2, %rdi
			
 
				+	jmp	*%rdi
			
 
				+END(entry_SYSCALL_64_trampoline)
			
 
				+
			
 
				+	.popsection
			
 
				+
			
 
				+ENTRY(entry_SYSCALL_64_stage2)
			
 
				+	UNWIND_HINT_EMPTY
			
 
				+	popq	%rdi
			
 
				+	jmp	entry_SYSCALL_64_after_hwframe
			
 
				+END(entry_SYSCALL_64_stage2)
			
 
				+
			
 
				 ENTRY(entry_SYSCALL_64)
			
 
				 	UNWIND_HINT_EMPTY
			
 
				 	/*
			
@@ -330,8 +388,24 @@ syscall_return_via_sysret:
 
				 	popq	%rsi	/* skip rcx */
			
 
				 	popq	%rdx
			
 
				 	popq	%rsi
			
 
				+
			
 
				+	/*
			
 
				+	 * Now all regs are restored except RSP and RDI.
			
 
				+	 * Save old stack pointer and switch to trampoline stack.
			
 
				+	 */
			
 
				+	movq	%rsp, %rdi
			
 
				+	movq	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
			
 
				+
			
 
				+	pushq	RSP-RDI(%rdi)	/* RSP */
			
 
				+	pushq	(%rdi)		/* RDI */
			
 
				+
			
 
				+	/*
			
 
				+	 * We are on the trampoline stack.  All regs except RDI are live.
			
 
				+	 * We can do future final exit work right here.
			
 
				+	 */
			
 
				+
			
 
				 	popq	%rdi
			
 
				-	movq	RSP-ORIG_RAX(%rsp), %rsp
			
 
				+	popq	%rsp
			
 
				 	USERGS_SYSRET64
			
 
				 END(entry_SYSCALL_64)
			
 
				 
			
@@ -466,12 +540,13 @@ END(irq_entries_start)
 
				 
			
 
				 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
			
 
				 #ifdef CONFIG_DEBUG_ENTRY
			
 
				-	pushfq
			
 
				-	testl $X86_EFLAGS_IF, (%rsp)
			
 
				+	pushq %rax
			
 
				+	SAVE_FLAGS(CLBR_RAX)
			
 
				+	testl $X86_EFLAGS_IF, %eax
			
 
				 	jz .Lokay_\@
			
 
				 	ud2
			
 
				 .Lokay_\@:
			
 
				-	addq $8, %rsp
			
 
				+	popq %rax
			
 
				 #endif
			
 
				 .endm
			
 
				 
			
@@ -563,6 +638,13 @@ END(irq_entries_start)
 
				 /* 0(%rsp): ~(interrupt number) */
			
 
				 	.macro interrupt func
			
 
				 	cld
			
 
				+
			
 
				+	testb	$3, CS-ORIG_RAX(%rsp)
			
 
				+	jz	1f
			
 
				+	SWAPGS
			
 
				+	call	switch_to_thread_stack
			
 
				+1:
			
 
				+
			
 
				 	ALLOC_PT_GPREGS_ON_STACK
			
 
				 	SAVE_C_REGS
			
 
				 	SAVE_EXTRA_REGS
			
@@ -572,12 +654,8 @@ END(irq_entries_start)
 
				 	jz	1f
			
 
				 
			
 
				 	/*
			
 
				-	 * IRQ from user mode.  Switch to kernel gsbase and inform context
			
 
				-	 * tracking that we're in kernel mode.
			
 
				-	 */
			
 
				-	SWAPGS
			
 
				-
			
 
				-	/*
			
 
				+	 * IRQ from user mode.
			
 
				+	 *
			
 
				 	 * We need to tell lockdep that IRQs are off.  We can't do this until
			
 
				 	 * we fix gsbase, and we should do it before enter_from_user_mode
			
 
				 	 * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
			
@@ -630,10 +708,41 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
 
				 	ud2
			
 
				 1:
			
 
				 #endif
			
 
				-	SWAPGS
			
 
				 	POP_EXTRA_REGS
			
 
				-	POP_C_REGS
			
 
				-	addq	$8, %rsp	/* skip regs->orig_ax */
			
 
				+	popq	%r11
			
 
				+	popq	%r10
			
 
				+	popq	%r9
			
 
				+	popq	%r8
			
 
				+	popq	%rax
			
 
				+	popq	%rcx
			
 
				+	popq	%rdx
			
 
				+	popq	%rsi
			
 
				+
			
 
				+	/*
			
 
				+	 * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
			
 
				+	 * Save old stack pointer and switch to trampoline stack.
			
 
				+	 */
			
 
				+	movq	%rsp, %rdi
			
 
				+	movq	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
			
 
				+
			
 
				+	/* Copy the IRET frame to the trampoline stack. */
			
 
				+	pushq	6*8(%rdi)	/* SS */
			
 
				+	pushq	5*8(%rdi)	/* RSP */
			
 
				+	pushq	4*8(%rdi)	/* EFLAGS */
			
 
				+	pushq	3*8(%rdi)	/* CS */
			
 
				+	pushq	2*8(%rdi)	/* RIP */
			
 
				+
			
 
				+	/* Push user RDI on the trampoline stack. */
			
 
				+	pushq	(%rdi)
			
 
				+
			
 
				+	/*
			
 
				+	 * We are on the trampoline stack.  All regs except RDI are live.
			
 
				+	 * We can do future final exit work right here.
			
 
				+	 */
			
 
				+
			
 
				+	/* Restore RDI. */
			
 
				+	popq	%rdi
			
 
				+	SWAPGS
			
 
				 	INTERRUPT_RETURN
			
 
				 
			
 
				 
			
@@ -829,7 +938,33 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
 
				 /*
			
 
				  * Exception entry points.
			
 
				  */
			
 
				-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
			
 
				+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
			
 
				+
			
 
				+/*
			
 
				+ * Switch to the thread stack.  This is called with the IRET frame and
			
 
				+ * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and
			
 
				+ * space has not been allocated for them.)
			
 
				+ */
			
 
				+ENTRY(switch_to_thread_stack)
			
 
				+	UNWIND_HINT_FUNC
			
 
				+
			
 
				+	pushq	%rdi
			
 
				+	movq	%rsp, %rdi
			
 
				+	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
			
 
				+	UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
			
 
				+
			
 
				+	pushq	7*8(%rdi)		/* regs->ss */
			
 
				+	pushq	6*8(%rdi)		/* regs->rsp */
			
 
				+	pushq	5*8(%rdi)		/* regs->eflags */
			
 
				+	pushq	4*8(%rdi)		/* regs->cs */
			
 
				+	pushq	3*8(%rdi)		/* regs->ip */
			
 
				+	pushq	2*8(%rdi)		/* regs->orig_ax */
			
 
				+	pushq	8(%rdi)			/* return address */
			
 
				+	UNWIND_HINT_FUNC
			
 
				+
			
 
				+	movq	(%rdi), %rdi
			
 
				+	ret
			
 
				+END(switch_to_thread_stack)
			
 
				 
			
 
				 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
			
 
				 ENTRY(\sym)
			
@@ -848,11 +983,12 @@ ENTRY(\sym)
 
				 
			
 
				 	ALLOC_PT_GPREGS_ON_STACK
			
 
				 
			
 
				-	.if \paranoid
			
 
				-	.if \paranoid == 1
			
 
				+	.if \paranoid < 2
			
 
				 	testb	$3, CS(%rsp)			/* If coming from userspace, switch stacks */
			
 
				-	jnz	1f
			
 
				+	jnz	.Lfrom_usermode_switch_stack_\@
			
 
				 	.endif
			
 
				+
			
 
				+	.if \paranoid
			
 
				 	call	paranoid_entry
			
 
				 	.else
			
 
				 	call	error_entry
			
@@ -894,20 +1030,15 @@ ENTRY(\sym)
 
				 	jmp	error_exit
			
 
				 	.endif
			
 
				 
			
 
				-	.if \paranoid == 1
			
 
				+	.if \paranoid < 2
			
 
				 	/*
			
 
				-	 * Paranoid entry from userspace.  Switch stacks and treat it
			
 
				+	 * Entry from userspace.  Switch stacks and treat it
			
 
				 	 * as a normal entry.  This means that paranoid handlers
			
 
				 	 * run in real process context if user_mode(regs).
			
 
				 	 */
			
 
				-1:
			
 
				+.Lfrom_usermode_switch_stack_\@:
			
 
				 	call	error_entry
			
 
				 
			
 
				-
			
 
				-	movq	%rsp, %rdi			/* pt_regs pointer */
			
 
				-	call	sync_regs
			
 
				-	movq	%rax, %rsp			/* switch stack */
			
 
				-
			
 
				 	movq	%rsp, %rdi			/* pt_regs pointer */
			
 
				 
			
 
				 	.if \has_error_code
			
@@ -1170,6 +1301,14 @@ ENTRY(error_entry)
 
				 	SWAPGS
			
 
				 
			
 
				 .Lerror_entry_from_usermode_after_swapgs:
			
 
				+	/* Put us onto the real thread stack. */
			
 
				+	popq	%r12				/* save return addr in %12 */
			
 
				+	movq	%rsp, %rdi			/* arg0 = pt_regs pointer */
			
 
				+	call	sync_regs
			
 
				+	movq	%rax, %rsp			/* switch stack */
			
 
				+	ENCODE_FRAME_POINTER
			
 
				+	pushq	%r12
			
 
				+
			
 
				 	/*
			
 
				 	 * We need to tell lockdep that IRQs are off.  We can't do this until
			
 
				 	 * we fix gsbase, and we should do it before enter_from_user_mode
			
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -48,7 +48,7 @@
 
				  */
			
 
				 ENTRY(entry_SYSENTER_compat)
			
 
				 	/* Interrupts are off on entry. */
			
 
				-	SWAPGS_UNSAFE_STACK
			
 
				+	SWAPGS
			
 
				 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
			
 
				 
			
 
				 	/*
			
@@ -306,8 +306,11 @@ ENTRY(entry_INT80_compat)
 
				 	 */
			
 
				 	movl	%eax, %eax
			
 
				 
			
 
				-	/* Construct struct pt_regs on stack (iret frame is already on stack) */
			
 
				 	pushq	%rax			/* pt_regs->orig_ax */
			
 
				+
			
 
				+	/* switch to thread stack expects orig_ax to be pushed */
			
 
				+	call	switch_to_thread_stack
			
 
				+
			
 
				 	pushq	%rdi			/* pt_regs->di */
			
 
				 	pushq	%rsi			/* pt_regs->si */
			
 
				 	pushq	%rdx			/* pt_regs->dx */
			
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
 
				 	set_bit(bit, (unsigned long *)cpu_caps_set);	\
			
 
				 } while (0)
			
 
				 
			
 
				+#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
			
 
				+
			
 
				 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
			
 
				 /*
			
 
				  * Static testing of CPU features.  Used the same as boot_cpu_has().
			
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -60,17 +60,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
 
				 	return this_cpu_ptr(&gdt_page)->gdt;
			
 
				 }
			
 
				 
			
 
				-/* Get the fixmap index for a specific processor */
			
 
				-static inline unsigned int get_cpu_gdt_ro_index(int cpu)
			
 
				-{
			
 
				-	return FIX_GDT_REMAP_BEGIN + cpu;
			
 
				-}
			
 
				-
			
 
				 /* Provide the fixmap address of the remapped GDT */
			
 
				 static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
			
 
				 {
			
 
				-	unsigned int idx = get_cpu_gdt_ro_index(cpu);
			
 
				-	return (struct desc_struct *)__fix_to_virt(idx);
			
 
				+	return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
			
 
				 }
			
 
				 
			
 
				 /* Provide the current read-only GDT */
			
@@ -185,7 +178,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
 
				 #endif
			
 
				 }
			
 
				 
			
 
				-static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
			
 
				+static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
			
 
				 {
			
 
				 	struct desc_struct *d = get_cpu_gdt_rw(cpu);
			
 
				 	tss_desc tss;
			
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -44,6 +44,45 @@ extern unsigned long __FIXADDR_TOP;
 
				 			 PAGE_SIZE)
			
 
				 #endif
			
 
				 
			
 
				+/*
			
 
				+ * cpu_entry_area is a percpu region in the fixmap that contains things
			
 
				+ * needed by the CPU and early entry/exit code.  Real types aren't used
			
 
				+ * for all fields here to avoid circular header dependencies.
			
 
				+ *
			
 
				+ * Every field is a virtual alias of some other allocated backing store.
			
 
				+ * There is no direct allocation of a struct cpu_entry_area.
			
 
				+ */
			
 
				+struct cpu_entry_area {
			
 
				+	char gdt[PAGE_SIZE];
			
 
				+
			
 
				+	/*
			
 
				+	 * The GDT is just below SYSENTER_stack and thus serves (on x86_64) as
			
 
				+	 * a a read-only guard page.
			
 
				+	 */
			
 
				+	struct SYSENTER_stack_page SYSENTER_stack_page;
			
 
				+
			
 
				+	/*
			
 
				+	 * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
			
 
				+	 * we need task switches to work, and task switches write to the TSS.
			
 
				+	 */
			
 
				+	struct tss_struct tss;
			
 
				+
			
 
				+	char entry_trampoline[PAGE_SIZE];
			
 
				+
			
 
				+#ifdef CONFIG_X86_64
			
 
				+	/*
			
 
				+	 * Exception stacks used for IST entries.
			
 
				+	 *
			
 
				+	 * In the future, this should have a separate slot for each stack
			
 
				+	 * with guard pages between them.
			
 
				+	 */
			
 
				+	char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
			
 
				+#endif
			
 
				+};
			
 
				+
			
 
				+#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
			
 
				+
			
 
				+extern void setup_cpu_entry_areas(void);
			
 
				 
			
 
				 /*
			
 
				  * Here we define all the compile-time 'special' virtual
			
@@ -101,8 +140,8 @@ enum fixed_addresses {
 
				 	FIX_LNW_VRTC,
			
 
				 #endif
			
 
				 	/* Fixmap entries to remap the GDTs, one per processor. */
			
 
				-	FIX_GDT_REMAP_BEGIN,
			
 
				-	FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
			
 
				+	FIX_CPU_ENTRY_AREA_TOP,
			
 
				+	FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,
			
 
				 
			
 
				 #ifdef CONFIG_ACPI_APEI_GHES
			
 
				 	/* Used for GHES mapping from assorted contexts */
			
@@ -191,5 +230,30 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
 
				 void __early_set_fixmap(enum fixed_addresses idx,
			
 
				 			phys_addr_t phys, pgprot_t flags);
			
 
				 
			
 
				+static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page)
			
 
				+{
			
 
				+	BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
			
 
				+
			
 
				+	return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page;
			
 
				+}
			
 
				+
			
 
				+#define __get_cpu_entry_area_offset_index(cpu, offset) ({		\
			
 
				+	BUILD_BUG_ON(offset % PAGE_SIZE != 0);				\
			
 
				+	__get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE);	\
			
 
				+	})
			
 
				+
			
 
				+#define get_cpu_entry_area_index(cpu, field)				\
			
 
				+	__get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field))
			
 
				+
			
 
				+static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
			
 
				+{
			
 
				+	return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
			
 
				+}
			
 
				+
			
 
				+static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu)
			
 
				+{
			
 
				+	return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack;
			
 
				+}
			
 
				+
			
 
				 #endif /* !__ASSEMBLY__ */
			
 
				 #endif /* _ASM_X86_FIXMAP_H */
			
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -20,16 +20,7 @@
 
				 #ifndef _ASM_X86_HYPERVISOR_H
			
 
				 #define _ASM_X86_HYPERVISOR_H
			
 
				 
			
 
				-#ifdef CONFIG_HYPERVISOR_GUEST
			
 
				-
			
 
				-#include <asm/kvm_para.h>
			
 
				-#include <asm/x86_init.h>
			
 
				-#include <asm/xen/hypervisor.h>
			
 
				-
			
 
				-/*
			
 
				- * x86 hypervisor information
			
 
				- */
			
 
				-
			
 
				+/* x86 hypervisor types  */
			
 
				 enum x86_hypervisor_type {
			
 
				 	X86_HYPER_NATIVE = 0,
			
 
				 	X86_HYPER_VMWARE,
			
@@ -39,6 +30,12 @@ enum x86_hypervisor_type {
 
				 	X86_HYPER_KVM,
			
 
				 };
			
 
				 
			
 
				+#ifdef CONFIG_HYPERVISOR_GUEST
			
 
				+
			
 
				+#include <asm/kvm_para.h>
			
 
				+#include <asm/x86_init.h>
			
 
				+#include <asm/xen/hypervisor.h>
			
 
				+
			
 
				 struct hypervisor_x86 {
			
 
				 	/* Hypervisor name */
			
 
				 	const char	*name;
			
@@ -58,7 +55,15 @@ struct hypervisor_x86 {
 
				 
			
 
				 extern enum x86_hypervisor_type x86_hyper_type;
			
 
				 extern void init_hypervisor_platform(void);
			
 
				+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
			
 
				+{
			
 
				+	return x86_hyper_type == type;
			
 
				+}
			
 
				 #else
			
 
				 static inline void init_hypervisor_platform(void) { }
			
 
				+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
			
 
				+{
			
 
				+	return type == X86_HYPER_NATIVE;
			
 
				+}
			
 
				 #endif /* CONFIG_HYPERVISOR_GUEST */
			
 
				 #endif /* _ASM_X86_HYPERVISOR_H */
			
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
 
				 	swapgs;					\
			
 
				 	sysretl
			
 
				 
			
 
				+#ifdef CONFIG_DEBUG_ENTRY
			
 
				+#define SAVE_FLAGS(x)		pushfq; popq %rax
			
 
				+#endif
			
 
				 #else
			
 
				 #define INTERRUPT_RETURN		iret
			
 
				 #define ENABLE_INTERRUPTS_SYSEXIT	sti; sysexit
			
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
 
				 extern int __must_check __die(const char *, struct pt_regs *, long);
			
 
				 extern void show_stack_regs(struct pt_regs *regs);
			
 
				 extern void __show_regs(struct pt_regs *regs, int all);
			
 
				+extern void show_iret_regs(struct pt_regs *regs);
			
 
				 extern unsigned long oops_begin(void);
			
 
				 extern void oops_end(unsigned long, struct pt_regs *, int signr);
			
 
				 
			
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -927,6 +927,15 @@ extern void default_banner(void);
 
				 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),	\
			
 
				 		  CLBR_NONE,						\
			
 
				 		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
			
 
				+
			
 
				+#ifdef CONFIG_DEBUG_ENTRY
			
 
				+#define SAVE_FLAGS(clobbers)                                        \
			
 
				+	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
			
 
				+		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);        \
			
 
				+		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl);    \
			
 
				+		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
			
 
				+#endif
			
 
				+
			
 
				 #endif	/* CONFIG_X86_32 */
			
 
				 
			
 
				 #endif /* __ASSEMBLY__ */
			
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -163,9 +163,9 @@ enum cpuid_regs_idx {
 
				 extern struct cpuinfo_x86	boot_cpu_data;
			
 
				 extern struct cpuinfo_x86	new_cpu_data;
			
 
				 
			
 
				-extern struct tss_struct	doublefault_tss;
			
 
				-extern __u32			cpu_caps_cleared[NCAPINTS];
			
 
				-extern __u32			cpu_caps_set[NCAPINTS];
			
 
				+extern struct x86_hw_tss	doublefault_tss;
			
 
				+extern __u32			cpu_caps_cleared[NCAPINTS + NBUGINTS];
			
 
				+extern __u32			cpu_caps_set[NCAPINTS + NBUGINTS];
			
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				 DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
			
@@ -253,6 +253,11 @@ static inline void load_cr3(pgd_t *pgdir)
 
				 	write_cr3(__sme_pa(pgdir));
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Note that while the legacy 'TSS' name comes from 'Task State Segment',
			
 
				+ * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
			
 
				+ * unrelated to the task-switch mechanism:
			
 
				+ */
			
 
				 #ifdef CONFIG_X86_32
			
 
				 /* This is the TSS defined by the hardware. */
			
 
				 struct x86_hw_tss {
			
@@ -305,7 +310,13 @@ struct x86_hw_tss {
 
				 struct x86_hw_tss {
			
 
				 	u32			reserved1;
			
 
				 	u64			sp0;
			
 
				+
			
 
				+	/*
			
 
				+	 * We store cpu_current_top_of_stack in sp1 so it's always accessible.
			
 
				+	 * Linux does not use ring 1, so sp1 is not otherwise needed.
			
 
				+	 */
			
 
				 	u64			sp1;
			
 
				+
			
 
				 	u64			sp2;
			
 
				 	u64			reserved2;
			
 
				 	u64			ist[7];
			
@@ -323,12 +334,22 @@ struct x86_hw_tss {
 
				 #define IO_BITMAP_BITS			65536
			
 
				 #define IO_BITMAP_BYTES			(IO_BITMAP_BITS/8)
			
 
				 #define IO_BITMAP_LONGS			(IO_BITMAP_BYTES/sizeof(long))
			
 
				-#define IO_BITMAP_OFFSET		offsetof(struct tss_struct, io_bitmap)
			
 
				+#define IO_BITMAP_OFFSET		(offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
			
 
				 #define INVALID_IO_BITMAP_OFFSET	0x8000
			
 
				 
			
 
				+struct SYSENTER_stack {
			
 
				+	unsigned long		words[64];
			
 
				+};
			
 
				+
			
 
				+struct SYSENTER_stack_page {
			
 
				+	struct SYSENTER_stack stack;
			
 
				+} __aligned(PAGE_SIZE);
			
 
				+
			
 
				 struct tss_struct {
			
 
				 	/*
			
 
				-	 * The hardware state:
			
 
				+	 * The fixed hardware portion.  This must not cross a page boundary
			
 
				+	 * at risk of violating the SDM's advice and potentially triggering
			
 
				+	 * errata.
			
 
				 	 */
			
 
				 	struct x86_hw_tss	x86_tss;
			
 
				 
			
@@ -339,18 +360,9 @@ struct tss_struct {
 
				 	 * be within the limit.
			
 
				 	 */
			
 
				 	unsigned long		io_bitmap[IO_BITMAP_LONGS + 1];
			
 
				+} __aligned(PAGE_SIZE);
			
 
				 
			
 
				-#ifdef CONFIG_X86_32
			
 
				-	/*
			
 
				-	 * Space for the temporary SYSENTER stack.
			
 
				-	 */
			
 
				-	unsigned long		SYSENTER_stack_canary;
			
 
				-	unsigned long		SYSENTER_stack[64];
			
 
				-#endif
			
 
				-
			
 
				-} ____cacheline_aligned;
			
 
				-
			
 
				-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
			
 
				+DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
			
 
				 
			
 
				 /*
			
 
				  * sizeof(unsigned long) coming from an extra "long" at the end
			
@@ -364,6 +376,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
 
				 
			
 
				 #ifdef CONFIG_X86_32
			
 
				 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
			
 
				+#else
			
 
				+/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
			
 
				+#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
			
 
				 #endif
			
 
				 
			
 
				 /*
			
@@ -523,7 +538,7 @@ static inline void native_set_iopl_mask(unsigned mask)
 
				 static inline void
			
 
				 native_load_sp0(unsigned long sp0)
			
 
				 {
			
 
				-	this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
			
 
				+	this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
			
 
				 }
			
 
				 
			
 
				 static inline void native_swapgs(void)
			
@@ -535,12 +550,12 @@ static inline void native_swapgs(void)
 
				 
			
 
				 static inline unsigned long current_top_of_stack(void)
			
 
				 {
			
 
				-#ifdef CONFIG_X86_64
			
 
				-	return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
			
 
				-#else
			
 
				-	/* sp0 on x86_32 is special in and around vm86 mode. */
			
 
				+	/*
			
 
				+	 *  We can't read directly from tss.sp0: sp0 on x86_32 is special in
			
 
				+	 *  and around vm86 mode and sp0 on x86_64 is special because of the
			
 
				+	 *  entry trampoline.
			
 
				+	 */
			
 
				 	return this_cpu_read_stable(cpu_current_top_of_stack);
			
 
				-#endif
			
 
				 }
			
 
				 
			
 
				 static inline bool on_thread_stack(void)
			
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -16,6 +16,7 @@ enum stack_type {
 
				 	STACK_TYPE_TASK,
			
 
				 	STACK_TYPE_IRQ,
			
 
				 	STACK_TYPE_SOFTIRQ,
			
 
				+	STACK_TYPE_SYSENTER,
			
 
				 	STACK_TYPE_EXCEPTION,
			
 
				 	STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
			
 
				 };
			
@@ -28,6 +29,8 @@ struct stack_info {
 
				 bool in_task_stack(unsigned long *stack, struct task_struct *task,
			
 
				 		   struct stack_info *info);
			
 
				 
			
 
				+bool in_sysenter_stack(unsigned long *stack, struct stack_info *info);
			
 
				+
			
 
				 int get_stack_info(unsigned long *stack, struct task_struct *task,
			
 
				 		   struct stack_info *info, unsigned long *visit_mask);
			
 
				 
			
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -79,10 +79,10 @@ do {									\
 
				 static inline void refresh_sysenter_cs(struct thread_struct *thread)
			
 
				 {
			
 
				 	/* Only happens when SEP is enabled, no need to test "SEP"arately: */
			
 
				-	if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
			
 
				+	if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
			
 
				 		return;
			
 
				 
			
 
				-	this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
			
 
				+	this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
			
 
				 	wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
			
 
				 }
			
 
				 #endif
			
@@ -90,10 +90,12 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
 
				 /* This is used when switching tasks or entering/exiting vm86 mode. */
			
 
				 static inline void update_sp0(struct task_struct *task)
			
 
				 {
			
 
				+	/* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
			
 
				 #ifdef CONFIG_X86_32
			
 
				 	load_sp0(task->thread.sp0);
			
 
				 #else
			
 
				-	load_sp0(task_top_of_stack(task));
			
 
				+	if (static_cpu_has(X86_FEATURE_XENPV))
			
 
				+		load_sp0(task_top_of_stack(task));
			
 
				 #endif
			
 
				 }
			
 
				 
			
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack,
 
				 #else /* !__ASSEMBLY__ */
			
 
				 
			
 
				 #ifdef CONFIG_X86_64
			
 
				-# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
			
 
				+# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
			
 
				 #endif
			
 
				 
			
 
				 #endif
			
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
 
				 dotraplinkage void do_stack_segment(struct pt_regs *, long);
			
 
				 #ifdef CONFIG_X86_64
			
 
				 dotraplinkage void do_double_fault(struct pt_regs *, long);
			
 
				-asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
			
 
				 #endif
			
 
				 dotraplinkage void do_general_protection(struct pt_regs *, long);
			
 
				 dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
			
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -7,6 +7,9 @@
 
				 #include <asm/ptrace.h>
			
 
				 #include <asm/stacktrace.h>
			
 
				 
			
 
				+#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
			
 
				+#define IRET_FRAME_SIZE   (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
			
 
				+
			
 
				 struct unwind_state {
			
 
				 	struct stack_info stack_info;
			
 
				 	unsigned long stack_mask;
			
@@ -52,6 +55,10 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
 
				 }
			
 
				 
			
 
				 #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
			
 
				+/*
			
 
				+ * WARNING: The entire pt_regs may not be safe to dereference.  In some cases,
			
 
				+ * only the iret frame registers are accessible.  Use with caution!
			
 
				+ */
			
 
				 static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
			
 
				 {
			
 
				 	if (unwind_done(state))
			
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -93,4 +93,10 @@ void common(void) {
 
				 
			
 
				 	BLANK();
			
 
				 	DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
			
 
				+
			
 
				+	/* Layout info for cpu_entry_area */
			
 
				+	OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
			
 
				+	OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
			
 
				+	OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page);
			
 
				+	DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
			
 
				 }
			
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -47,13 +47,8 @@ void foo(void)
 
				 	BLANK();
			
 
				 
			
 
				 	/* Offset from the sysenter stack to tss.sp0 */
			
 
				-	DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
			
 
				-	       offsetofend(struct tss_struct, SYSENTER_stack));
			
 
				-
			
 
				-	/* Offset from cpu_tss to SYSENTER_stack */
			
 
				-	OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
			
 
				-	/* Size of SYSENTER_stack */
			
 
				-	DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
			
 
				+	DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
			
 
				+	       offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack));
			
 
				 
			
 
				 #ifdef CONFIG_CC_STACKPROTECTOR
			
 
				 	BLANK();
			
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -23,6 +23,9 @@ int main(void)
 
				 #ifdef CONFIG_PARAVIRT
			
 
				 	OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
			
 
				 	OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
			
 
				+#ifdef CONFIG_DEBUG_ENTRY
			
 
				+	OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
			
 
				+#endif
			
 
				 	BLANK();
			
 
				 #endif
			
 
				 
			
@@ -63,6 +66,7 @@ int main(void)
 
				 
			
 
				 	OFFSET(TSS_ist, tss_struct, x86_tss.ist);
			
 
				 	OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
			
 
				+	OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
			
 
				 	BLANK();
			
 
				 
			
 
				 #ifdef CONFIG_CC_STACKPROTECTOR
			
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -476,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
 
				 	return NULL;		/* Not found */
			
 
				 }
			
 
				 
			
 
				-__u32 cpu_caps_cleared[NCAPINTS];
			
 
				-__u32 cpu_caps_set[NCAPINTS];
			
 
				+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
			
 
				+__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
			
 
				 
			
 
				 void load_percpu_segment(int cpu)
			
 
				 {
			
@@ -490,27 +490,116 @@ void load_percpu_segment(int cpu)
 
				 	load_stack_canary_segment();
			
 
				 }
			
 
				 
			
 
				-/* Setup the fixmap mapping only once per-processor */
			
 
				-static inline void setup_fixmap_gdt(int cpu)
			
 
				+#ifdef CONFIG_X86_32
			
 
				+/* The 32-bit entry code needs to find cpu_entry_area. */
			
 
				+DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
			
 
				+#endif
			
 
				+
			
 
				+#ifdef CONFIG_X86_64
			
 
				+/*
			
 
				+ * Special IST stacks which the CPU switches to when it calls
			
 
				+ * an IST-marked descriptor entry. Up to 7 stacks (hardware
			
 
				+ * limit), all of them are 4K, except the debug stack which
			
 
				+ * is 8K.
			
 
				+ */
			
 
				+static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
			
 
				+	  [0 ... N_EXCEPTION_STACKS - 1]	= EXCEPTION_STKSZ,
			
 
				+	  [DEBUG_STACK - 1]			= DEBUG_STKSZ
			
 
				+};
			
 
				+
			
 
				+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
			
 
				+	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
			
 
				+#endif
			
 
				+
			
 
				+static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page,
			
 
				+				   SYSENTER_stack_storage);
			
 
				+
			
 
				+static void __init
			
 
				+set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
			
 
				+{
			
 
				+	for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
			
 
				+		__set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
			
 
				+}
			
 
				+
			
 
				+/* Setup the fixmap mappings only once per-processor */
			
 
				+static void __init setup_cpu_entry_area(int cpu)
			
 
				 {
			
 
				 #ifdef CONFIG_X86_64
			
 
				-	/* On 64-bit systems, we use a read-only fixmap GDT. */
			
 
				-	pgprot_t prot = PAGE_KERNEL_RO;
			
 
				+	extern char _entry_trampoline[];
			
 
				+
			
 
				+	/* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
			
 
				+	pgprot_t gdt_prot = PAGE_KERNEL_RO;
			
 
				+	pgprot_t tss_prot = PAGE_KERNEL_RO;
			
 
				 #else
			
 
				 	/*
			
 
				 	 * On native 32-bit systems, the GDT cannot be read-only because
			
 
				 	 * our double fault handler uses a task gate, and entering through
			
 
				-	 * a task gate needs to change an available TSS to busy.  If the GDT
			
 
				-	 * is read-only, that will triple fault.
			
 
				+	 * a task gate needs to change an available TSS to busy.  If the
			
 
				+	 * GDT is read-only, that will triple fault.  The TSS cannot be
			
 
				+	 * read-only because the CPU writes to it on task switches.
			
 
				 	 *
			
 
				-	 * On Xen PV, the GDT must be read-only because the hypervisor requires
			
 
				-	 * it.
			
 
				+	 * On Xen PV, the GDT must be read-only because the hypervisor
			
 
				+	 * requires it.
			
 
				 	 */
			
 
				-	pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
			
 
				+	pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
			
 
				 		PAGE_KERNEL_RO : PAGE_KERNEL;
			
 
				+	pgprot_t tss_prot = PAGE_KERNEL;
			
 
				 #endif
			
 
				 
			
 
				-	__set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
			
 
				+	__set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
			
 
				+	set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page),
			
 
				+				per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1,
			
 
				+				PAGE_KERNEL);
			
 
				+
			
 
				+	/*
			
 
				+	 * The Intel SDM says (Volume 3, 7.2.1):
			
 
				+	 *
			
 
				+	 *  Avoid placing a page boundary in the part of the TSS that the
			
 
				+	 *  processor reads during a task switch (the first 104 bytes). The
			
 
				+	 *  processor may not correctly perform address translations if a
			
 
				+	 *  boundary occurs in this area. During a task switch, the processor
			
 
				+	 *  reads and writes into the first 104 bytes of each TSS (using
			
 
				+	 *  contiguous physical addresses beginning with the physical address
			
 
				+	 *  of the first byte of the TSS). So, after TSS access begins, if
			
 
				+	 *  part of the 104 bytes is not physically contiguous, the processor
			
 
				+	 *  will access incorrect information without generating a page-fault
			
 
				+	 *  exception.
			
 
				+	 *
			
 
				+	 * There are also a lot of errata involving the TSS spanning a page
			
 
				+	 * boundary.  Assert that we're not doing that.
			
 
				+	 */
			
 
				+	BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
			
 
				+		      offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
			
 
				+	BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
			
 
				+	set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
			
 
				+				&per_cpu(cpu_tss_rw, cpu),
			
 
				+				sizeof(struct tss_struct) / PAGE_SIZE,
			
 
				+				tss_prot);
			
 
				+
			
 
				+#ifdef CONFIG_X86_32
			
 
				+	per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
			
 
				+#endif
			
 
				+
			
 
				+#ifdef CONFIG_X86_64
			
 
				+	BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
			
 
				+	BUILD_BUG_ON(sizeof(exception_stacks) !=
			
 
				+		     sizeof(((struct cpu_entry_area *)0)->exception_stacks));
			
 
				+	set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
			
 
				+				&per_cpu(exception_stacks, cpu),
			
 
				+				sizeof(exception_stacks) / PAGE_SIZE,
			
 
				+				PAGE_KERNEL);
			
 
				+
			
 
				+	__set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
			
 
				+		     __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+void __init setup_cpu_entry_areas(void)
			
 
				+{
			
 
				+	unsigned int cpu;
			
 
				+
			
 
				+	for_each_possible_cpu(cpu)
			
 
				+		setup_cpu_entry_area(cpu);
			
 
				 }
			
 
				 
			
 
				 /* Load the original GDT from the per-cpu structure */
			
@@ -747,7 +836,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
 
				 {
			
 
				 	int i;
			
 
				 
			
 
				-	for (i = 0; i < NCAPINTS; i++) {
			
 
				+	for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
			
 
				 		c->x86_capability[i] &= ~cpu_caps_cleared[i];
			
 
				 		c->x86_capability[i] |= cpu_caps_set[i];
			
 
				 	}
			
@@ -1250,7 +1339,7 @@ void enable_sep_cpu(void)
 
				 		return;
			
 
				 
			
 
				 	cpu = get_cpu();
			
 
				-	tss = &per_cpu(cpu_tss, cpu);
			
 
				+	tss = &per_cpu(cpu_tss_rw, cpu);
			
 
				 
			
 
				 	/*
			
 
				 	 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
			
@@ -1259,11 +1348,7 @@ void enable_sep_cpu(void)
 
				 
			
 
				 	tss->x86_tss.ss1 = __KERNEL_CS;
			
 
				 	wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
			
 
				-
			
 
				-	wrmsr(MSR_IA32_SYSENTER_ESP,
			
 
				-	      (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
			
 
				-	      0);
			
 
				-
			
 
				+	wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0);
			
 
				 	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
			
 
				 
			
 
				 	put_cpu();
			
@@ -1357,25 +1442,19 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
 
				 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
			
 
				 EXPORT_PER_CPU_SYMBOL(__preempt_count);
			
 
				 
			
 
				-/*
			
 
				- * Special IST stacks which the CPU switches to when it calls
			
 
				- * an IST-marked descriptor entry. Up to 7 stacks (hardware
			
 
				- * limit), all of them are 4K, except the debug stack which
			
 
				- * is 8K.
			
 
				- */
			
 
				-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
			
 
				-	  [0 ... N_EXCEPTION_STACKS - 1]	= EXCEPTION_STKSZ,
			
 
				-	  [DEBUG_STACK - 1]			= DEBUG_STKSZ
			
 
				-};
			
 
				-
			
 
				-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
			
 
				-	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
			
 
				-
			
 
				 /* May not be marked __init: used by software suspend */
			
 
				 void syscall_init(void)
			
 
				 {
			
 
				+	extern char _entry_trampoline[];
			
 
				+	extern char entry_SYSCALL_64_trampoline[];
			
 
				+
			
 
				+	int cpu = smp_processor_id();
			
 
				+	unsigned long SYSCALL64_entry_trampoline =
			
 
				+		(unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
			
 
				+		(entry_SYSCALL_64_trampoline - _entry_trampoline);
			
 
				+
			
 
				 	wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
			
 
				-	wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
			
 
				+	wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
			
 
				 
			
 
				 #ifdef CONFIG_IA32_EMULATION
			
 
				 	wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
			
@@ -1386,7 +1465,7 @@ void syscall_init(void)
 
				 	 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
			
 
				 	 */
			
 
				 	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
			
 
				-	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
			
 
				+	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
			
 
				 	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
			
 
				 #else
			
 
				 	wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
			
@@ -1530,7 +1609,7 @@ void cpu_init(void)
 
				 	if (cpu)
			
 
				 		load_ucode_ap();
			
 
				 
			
 
				-	t = &per_cpu(cpu_tss, cpu);
			
 
				+	t = &per_cpu(cpu_tss_rw, cpu);
			
 
				 	oist = &per_cpu(orig_ist, cpu);
			
 
				 
			
 
				 #ifdef CONFIG_NUMA
			
@@ -1569,7 +1648,7 @@ void cpu_init(void)
 
				 	 * set up and load the per-CPU TSS
			
 
				 	 */
			
 
				 	if (!oist->ist[0]) {
			
 
				-		char *estacks = per_cpu(exception_stacks, cpu);
			
 
				+		char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
			
 
				 
			
 
				 		for (v = 0; v < N_EXCEPTION_STACKS; v++) {
			
 
				 			estacks += exception_stack_sizes[v];
			
@@ -1580,7 +1659,7 @@ void cpu_init(void)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
			
 
				+	t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
			
 
				 
			
 
				 	/*
			
 
				 	 * <= is required because the CPU will access up to
			
@@ -1596,11 +1675,12 @@ void cpu_init(void)
 
				 	enter_lazy_tlb(&init_mm, me);
			
 
				 
			
 
				 	/*
			
 
				-	 * Initialize the TSS.  Don't bother initializing sp0, as the initial
			
 
				-	 * task never enters user mode.
			
 
				+	 * Initialize the TSS.  sp0 points to the entry trampoline stack
			
 
				+	 * regardless of what task is running.
			
 
				 	 */
			
 
				-	set_tss_desc(cpu, t);
			
 
				+	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
			
 
				 	load_TR_desc();
			
 
				+	load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
			
 
				 
			
 
				 	load_mm_ldt(&init_mm);
			
 
				 
			
@@ -1612,7 +1692,6 @@ void cpu_init(void)
 
				 	if (is_uv_system())
			
 
				 		uv_cpu_init();
			
 
				 
			
 
				-	setup_fixmap_gdt(cpu);
			
 
				 	load_fixmap_gdt(cpu);
			
 
				 }
			
 
				 
			
@@ -1622,7 +1701,7 @@ void cpu_init(void)
 
				 {
			
 
				 	int cpu = smp_processor_id();
			
 
				 	struct task_struct *curr = current;
			
 
				-	struct tss_struct *t = &per_cpu(cpu_tss, cpu);
			
 
				+	struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
			
 
				 
			
 
				 	wait_for_master_cpu(cpu);
			
 
				 
			
@@ -1657,12 +1736,12 @@ void cpu_init(void)
 
				 	 * Initialize the TSS.  Don't bother initializing sp0, as the initial
			
 
				 	 * task never enters user mode.
			
 
				 	 */
			
 
				-	set_tss_desc(cpu, t);
			
 
				+	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
			
 
				 	load_TR_desc();
			
 
				 
			
 
				 	load_mm_ldt(&init_mm);
			
 
				 
			
 
				-	t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
			
 
				+	t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
			
 
				 
			
 
				 #ifdef CONFIG_DOUBLEFAULT
			
 
				 	/* Set up doublefault TSS pointer in the GDT */
			
@@ -1674,7 +1753,6 @@ void cpu_init(void)
 
				 
			
 
				 	fpu__init_cpu();
			
 
				 
			
 
				-	setup_fixmap_gdt(cpu);
			
 
				 	load_fixmap_gdt(cpu);
			
 
				 }
			
 
				 #endif
			
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -50,25 +50,23 @@ static void doublefault_fn(void)
 
				 		cpu_relax();
			
 
				 }
			
 
				 
			
 
				-struct tss_struct doublefault_tss __cacheline_aligned = {
			
 
				-	.x86_tss = {
			
 
				-		.sp0		= STACK_START,
			
 
				-		.ss0		= __KERNEL_DS,
			
 
				-		.ldt		= 0,
			
 
				-		.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,
			
 
				-
			
 
				-		.ip		= (unsigned long) doublefault_fn,
			
 
				-		/* 0x2 bit is always set */
			
 
				-		.flags		= X86_EFLAGS_SF | 0x2,
			
 
				-		.sp		= STACK_START,
			
 
				-		.es		= __USER_DS,
			
 
				-		.cs		= __KERNEL_CS,
			
 
				-		.ss		= __KERNEL_DS,
			
 
				-		.ds		= __USER_DS,
			
 
				-		.fs		= __KERNEL_PERCPU,
			
 
				-
			
 
				-		.__cr3		= __pa_nodebug(swapper_pg_dir),
			
 
				-	}
			
 
				+struct x86_hw_tss doublefault_tss __cacheline_aligned = {
			
 
				+	.sp0		= STACK_START,
			
 
				+	.ss0		= __KERNEL_DS,
			
 
				+	.ldt		= 0,
			
 
				+	.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,
			
 
				+
			
 
				+	.ip		= (unsigned long) doublefault_fn,
			
 
				+	/* 0x2 bit is always set */
			
 
				+	.flags		= X86_EFLAGS_SF | 0x2,
			
 
				+	.sp		= STACK_START,
			
 
				+	.es		= __USER_DS,
			
 
				+	.cs		= __KERNEL_CS,
			
 
				+	.ss		= __KERNEL_DS,
			
 
				+	.ds		= __USER_DS,
			
 
				+	.fs		= __KERNEL_PERCPU,
			
 
				+
			
 
				+	.__cr3		= __pa_nodebug(swapper_pg_dir),
			
 
				 };
			
 
				 
			
 
				 /* dummy for do_double_fault() call */
			
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -43,6 +43,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				+bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
			
 
				+{
			
 
				+	struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id());
			
 
				+
			
 
				+	void *begin = ss;
			
 
				+	void *end = ss + 1;
			
 
				+
			
 
				+	if ((void *)stack < begin || (void *)stack >= end)
			
 
				+		return false;
			
 
				+
			
 
				+	info->type	= STACK_TYPE_SYSENTER;
			
 
				+	info->begin	= begin;
			
 
				+	info->end	= end;
			
 
				+	info->next_sp	= NULL;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				 static void printk_stack_address(unsigned long address, int reliable,
			
 
				 				 char *log_lvl)
			
 
				 {
			
@@ -50,6 +68,28 @@ static void printk_stack_address(unsigned long address, int reliable,
 
				 	printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
			
 
				 }
			
 
				 
			
 
				+void show_iret_regs(struct pt_regs *regs)
			
 
				+{
			
 
				+	printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
			
 
				+	printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
			
 
				+		regs->sp, regs->flags);
			
 
				+}
			
 
				+
			
 
				+static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
			
 
				+{
			
 
				+	if (on_stack(info, regs, sizeof(*regs)))
			
 
				+		__show_regs(regs, 0);
			
 
				+	else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
			
 
				+			  IRET_FRAME_SIZE)) {
			
 
				+		/*
			
 
				+		 * When an interrupt or exception occurs in entry code, the
			
 
				+		 * full pt_regs might not have been saved yet.  In that case
			
 
				+		 * just print the iret frame.
			
 
				+		 */
			
 
				+		show_iret_regs(regs);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
			
 
				 			unsigned long *stack, char *log_lvl)
			
 
				 {
			
@@ -71,31 +111,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 
				 	 * - task stack
			
 
				 	 * - interrupt stack
			
 
				 	 * - HW exception stacks (double fault, nmi, debug, mce)
			
 
				+	 * - SYSENTER stack
			
 
				 	 *
			
 
				-	 * x86-32 can have up to three stacks:
			
 
				+	 * x86-32 can have up to four stacks:
			
 
				 	 * - task stack
			
 
				 	 * - softirq stack
			
 
				 	 * - hardirq stack
			
 
				+	 * - SYSENTER stack
			
 
				 	 */
			
 
				 	for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
			
 
				 		const char *stack_name;
			
 
				 
			
 
				-		/*
			
 
				-		 * If we overflowed the task stack into a guard page, jump back
			
 
				-		 * to the bottom of the usable stack.
			
 
				-		 */
			
 
				-		if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
			
 
				-			stack = task_stack_page(task);
			
 
				-
			
 
				-		if (get_stack_info(stack, task, &stack_info, &visit_mask))
			
 
				-			break;
			
 
				+		if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
			
 
				+			/*
			
 
				+			 * We weren't on a valid stack.  It's possible that
			
 
				+			 * we overflowed a valid stack into a guard page.
			
 
				+			 * See if the next page up is valid so that we can
			
 
				+			 * generate some kind of backtrace if this happens.
			
 
				+			 */
			
 
				+			stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
			
 
				+			if (get_stack_info(stack, task, &stack_info, &visit_mask))
			
 
				+				break;
			
 
				+		}
			
 
				 
			
 
				 		stack_name = stack_type_name(stack_info.type);
			
 
				 		if (stack_name)
			
 
				 			printk("%s <%s>\n", log_lvl, stack_name);
			
 
				 
			
 
				-		if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
			
 
				-			__show_regs(regs, 0);
			
 
				+		if (regs)
			
 
				+			show_regs_safe(&stack_info, regs);
			
 
				 
			
 
				 		/*
			
 
				 		 * Scan the stack, printing any text addresses we find.  At the
			
@@ -119,7 +163,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 
				 
			
 
				 			/*
			
 
				 			 * Don't print regs->ip again if it was already printed
			
 
				-			 * by __show_regs() below.
			
 
				+			 * by show_regs_safe() below.
			
 
				 			 */
			
 
				 			if (regs && stack == &regs->ip)
			
 
				 				goto next;
			
@@ -155,8 +199,8 @@ next:
 
				 
			
 
				 			/* if the frame has entry regs, print them */
			
 
				 			regs = unwind_get_entry_regs(&state);
			
 
				-			if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
			
 
				-				__show_regs(regs, 0);
			
 
				+			if (regs)
			
 
				+				show_regs_safe(&stack_info, regs);
			
 
				 		}
			
 
				 
			
 
				 		if (stack_name)
			
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type)
 
				 	if (type == STACK_TYPE_SOFTIRQ)
			
 
				 		return "SOFTIRQ";
			
 
				 
			
 
				+	if (type == STACK_TYPE_SYSENTER)
			
 
				+		return "SYSENTER";
			
 
				+
			
 
				 	return NULL;
			
 
				 }
			
 
				 
			
@@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
 
				 	if (task != current)
			
 
				 		goto unknown;
			
 
				 
			
 
				+	if (in_sysenter_stack(stack, info))
			
 
				+		goto recursion_check;
			
 
				+
			
 
				 	if (in_hardirq_stack(stack, info))
			
 
				 		goto recursion_check;
			
 
				 
			
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -37,6 +37,9 @@ const char *stack_type_name(enum stack_type type)
 
				 	if (type == STACK_TYPE_IRQ)
			
 
				 		return "IRQ";
			
 
				 
			
 
				+	if (type == STACK_TYPE_SYSENTER)
			
 
				+		return "SYSENTER";
			
 
				+
			
 
				 	if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
			
 
				 		return exception_stack_names[type - STACK_TYPE_EXCEPTION];
			
 
				 
			
@@ -115,6 +118,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
 
				 	if (in_irq_stack(stack, info))
			
 
				 		goto recursion_check;
			
 
				 
			
 
				+	if (in_sysenter_stack(stack, info))
			
 
				+		goto recursion_check;
			
 
				+
			
 
				 	goto unknown;
			
 
				 
			
 
				 recursion_check:
			
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 
				 	 * because the ->io_bitmap_max value must match the bitmap
			
 
				 	 * contents:
			
 
				 	 */
			
 
				-	tss = &per_cpu(cpu_tss, get_cpu());
			
 
				+	tss = &per_cpu(cpu_tss_rw, get_cpu());
			
 
				 
			
 
				 	if (turn_on)
			
 
				 		bitmap_clear(t->io_bitmap_ptr, from, num);
			
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
 
				 	/* high bit used in ret_from_ code  */
			
 
				 	unsigned vector = ~regs->orig_ax;
			
 
				 
			
 
				-	/*
			
 
				-	 * NB: Unlike exception entries, IRQ entries do not reliably
			
 
				-	 * handle context tracking in the low-level entry code.  This is
			
 
				-	 * because syscall entries execute briefly with IRQs on before
			
 
				-	 * updating context tracking state, so we can take an IRQ from
			
 
				-	 * kernel mode with CONTEXT_USER.  The low-level entry code only
			
 
				-	 * updates the context if we came from user mode, so we won't
			
 
				-	 * switch to CONTEXT_KERNEL.  We'll fix that once the syscall
			
 
				-	 * code is cleaned up enough that we can cleanly defer enabling
			
 
				-	 * IRQs.
			
 
				-	 */
			
 
				-
			
 
				 	entering_irq();
			
 
				 
			
 
				 	/* entering_irq() tells RCU that we're not quiescent.  Check it. */
			
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 
				 	if (regs->sp >= estack_top && regs->sp <= estack_bottom)
			
 
				 		return;
			
 
				 
			
 
				-	WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
			
 
				+	WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
			
 
				 		current->comm, curbase, regs->sp,
			
 
				 		irq_stack_top, irq_stack_bottom,
			
 
				-		estack_top, estack_bottom);
			
 
				+		estack_top, estack_bottom, (void *)regs->ip);
			
 
				 
			
 
				 	if (sysctl_panic_on_stackoverflow)
			
 
				 		panic("low stack detected by irq handler - check messages\n");
			
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
 
				 DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
			
 
				 DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
			
 
				 DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
			
 
				-DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
			
 
				 DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
			
 
				 
			
 
				 DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
			
@@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 
				 		PATCH_SITE(pv_mmu_ops, read_cr2);
			
 
				 		PATCH_SITE(pv_mmu_ops, read_cr3);
			
 
				 		PATCH_SITE(pv_mmu_ops, write_cr3);
			
 
				-		PATCH_SITE(pv_mmu_ops, flush_tlb_single);
			
 
				 		PATCH_SITE(pv_cpu_ops, wbinvd);
			
 
				 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
			
 
				 		case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
			
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -47,7 +47,7 @@
 
				  * section. Since TSS's are completely CPU-local, we want them
			
 
				  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
			
 
				  */
			
 
				-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
			
 
				+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
			
 
				 	.x86_tss = {
			
 
				 		/*
			
 
				 		 * .sp0 is only used when entering ring 0 from a lower
			
@@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
 
				 		 * Poison it.
			
 
				 		 */
			
 
				 		.sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
			
 
				+
			
 
				+#ifdef CONFIG_X86_64
			
 
				+		/*
			
 
				+		 * .sp1 is cpu_current_top_of_stack.  The init task never
			
 
				+		 * runs user code, but cpu_current_top_of_stack should still
			
 
				+		 * be well defined before the first context switch.
			
 
				+		 */
			
 
				+		.sp1 = TOP_OF_INIT_STACK,
			
 
				+#endif
			
 
				+
			
 
				 #ifdef CONFIG_X86_32
			
 
				 		.ss0 = __KERNEL_DS,
			
 
				 		.ss1 = __KERNEL_CS,
			
@@ -71,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
 
				 	  */
			
 
				 	.io_bitmap		= { [0 ... IO_BITMAP_LONGS] = ~0 },
			
 
				 #endif
			
 
				-#ifdef CONFIG_X86_32
			
 
				-	.SYSENTER_stack_canary	= STACK_END_MAGIC,
			
 
				-#endif
			
 
				 };
			
 
				-EXPORT_PER_CPU_SYMBOL(cpu_tss);
			
 
				+EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
			
 
				 
			
 
				 DEFINE_PER_CPU(bool, __tss_limit_invalid);
			
 
				 EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
			
@@ -104,7 +111,7 @@ void exit_thread(struct task_struct *tsk)
 
				 	struct fpu *fpu = &t->fpu;
			
 
				 
			
 
				 	if (bp) {
			
 
				-		struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
			
 
				+		struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
			
 
				 
			
 
				 		t->io_bitmap_ptr = NULL;
			
 
				 		clear_thread_flag(TIF_IO_BITMAP);
			
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
				 	struct fpu *prev_fpu = &prev->fpu;
			
 
				 	struct fpu *next_fpu = &next->fpu;
			
 
				 	int cpu = smp_processor_id();
			
 
				-	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
			
 
				+	struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
			
 
				 
			
 
				 	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
			
 
				 
			
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
 
				 	unsigned int fsindex, gsindex;
			
 
				 	unsigned int ds, cs, es;
			
 
				 
			
 
				-	printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip);
			
 
				-	printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
			
 
				-		regs->sp, regs->flags);
			
 
				+	show_iret_regs(regs);
			
 
				+
			
 
				 	if (regs->orig_ax != -1)
			
 
				 		pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
			
 
				 	else
			
@@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
 
				 	printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
			
 
				 	       regs->r13, regs->r14, regs->r15);
			
 
				 
			
 
				+	if (!all)
			
 
				+		return;
			
 
				+
			
 
				 	asm("movl %%ds,%0" : "=r" (ds));
			
 
				 	asm("movl %%cs,%0" : "=r" (cs));
			
 
				 	asm("movl %%es,%0" : "=r" (es));
			
@@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
 
				 	rdmsrl(MSR_GS_BASE, gs);
			
 
				 	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
			
 
				 
			
 
				-	if (!all)
			
 
				-		return;
			
 
				-
			
 
				 	cr0 = read_cr0();
			
 
				 	cr2 = read_cr2();
			
 
				 	cr3 = __read_cr3();
			
@@ -400,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
				 	struct fpu *prev_fpu = &prev->fpu;
			
 
				 	struct fpu *next_fpu = &next->fpu;
			
 
				 	int cpu = smp_processor_id();
			
 
				-	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
			
 
				+	struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
			
 
				 
			
 
				 	WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
			
 
				 		     this_cpu_read(irq_count) != -1);
			
@@ -462,6 +461,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
				 	 * Switch the PDA and FPU contexts.
			
 
				 	 */
			
 
				 	this_cpu_write(current_task, next_p);
			
 
				+	this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
			
 
				 
			
 
				 	/* Reload sp0. */
			
 
				 	update_sp0(next_p);
			
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -348,9 +348,15 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 
				 
			
 
				 	/*
			
 
				 	 * If IRET takes a non-IST fault on the espfix64 stack, then we
			
 
				-	 * end up promoting it to a doublefault.  In that case, modify
			
 
				-	 * the stack to make it look like we just entered the #GP
			
 
				-	 * handler from user space, similar to bad_iret.
			
 
				+	 * end up promoting it to a doublefault.  In that case, take
			
 
				+	 * advantage of the fact that we're not using the normal (TSS.sp0)
			
 
				+	 * stack right now.  We can write a fake #GP(0) frame at TSS.sp0
			
 
				+	 * and then modify our own IRET frame so that, when we return,
			
 
				+	 * we land directly at the #GP(0) vector with the stack already
			
 
				+	 * set up according to its expectations.
			
 
				+	 *
			
 
				+	 * The net result is that our #GP handler will think that we
			
 
				+	 * entered from usermode with the bad user context.
			
 
				 	 *
			
 
				 	 * No need for ist_enter here because we don't use RCU.
			
 
				 	 */
			
@@ -358,13 +364,26 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 
				 		regs->cs == __KERNEL_CS &&
			
 
				 		regs->ip == (unsigned long)native_irq_return_iret)
			
 
				 	{
			
 
				-		struct pt_regs *normal_regs = task_pt_regs(current);
			
 
				+		struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
			
 
				 
			
 
				-		/* Fake a #GP(0) from userspace. */
			
 
				-		memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
			
 
				-		normal_regs->orig_ax = 0;  /* Missing (lost) #GP error code */
			
 
				+		/*
			
 
				+		 * regs->sp points to the failing IRET frame on the
			
 
				+		 * ESPFIX64 stack.  Copy it to the entry stack.  This fills
			
 
				+		 * in gpregs->ss through gpregs->ip.
			
 
				+		 *
			
 
				+		 */
			
 
				+		memmove(&gpregs->ip, (void *)regs->sp, 5*8);
			
 
				+		gpregs->orig_ax = 0;  /* Missing (lost) #GP error code */
			
 
				+
			
 
				+		/*
			
 
				+		 * Adjust our frame so that we return straight to the #GP
			
 
				+		 * vector with the expected RSP value.  This is safe because
			
 
				+		 * we won't enable interupts or schedule before we invoke
			
 
				+		 * general_protection, so nothing will clobber the stack
			
 
				+		 * frame we just set up.
			
 
				+		 */
			
 
				 		regs->ip = (unsigned long)general_protection;
			
 
				-		regs->sp = (unsigned long)&normal_regs->orig_ax;
			
 
				+		regs->sp = (unsigned long)&gpregs->orig_ax;
			
 
				 
			
 
				 		return;
			
 
				 	}
			
@@ -389,7 +408,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 
				 	 *
			
 
				 	 *   Processors update CR2 whenever a page fault is detected. If a
			
 
				 	 *   second page fault occurs while an earlier page fault is being
			
 
				-	 *   deliv- ered, the faulting linear address of the second fault will
			
 
				+	 *   delivered, the faulting linear address of the second fault will
			
 
				 	 *   overwrite the contents of CR2 (replacing the previous
			
 
				 	 *   address). These updates to CR2 occur even if the page fault
			
 
				 	 *   results in a double fault or occurs during the delivery of a
			
@@ -605,14 +624,15 @@ NOKPROBE_SYMBOL(do_int3);
 
				 
			
 
				 #ifdef CONFIG_X86_64
			
 
				 /*
			
 
				- * Help handler running on IST stack to switch off the IST stack if the
			
 
				- * interrupted code was in user mode. The actual stack switch is done in
			
 
				- * entry_64.S
			
 
				+ * Help handler running on a per-cpu (IST or entry trampoline) stack
			
 
				+ * to switch to the normal thread stack if the interrupted code was in
			
 
				+ * user mode. The actual stack switch is done in entry_64.S
			
 
				  */
			
 
				 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
			
 
				 {
			
 
				-	struct pt_regs *regs = task_pt_regs(current);
			
 
				-	*regs = *eregs;
			
 
				+	struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
			
 
				+	if (regs != eregs)
			
 
				+		*regs = *eregs;
			
 
				 	return regs;
			
 
				 }
			
 
				 NOKPROBE_SYMBOL(sync_regs);
			
@@ -628,13 +648,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
 
				 	/*
			
 
				 	 * This is called from entry_64.S early in handling a fault
			
 
				 	 * caused by a bad iret to user mode.  To handle the fault
			
 
				-	 * correctly, we want move our stack frame to task_pt_regs
			
 
				-	 * and we want to pretend that the exception came from the
			
 
				-	 * iret target.
			
 
				+	 * correctly, we want to move our stack frame to where it would
			
 
				+	 * be had we entered directly on the entry stack (rather than
			
 
				+	 * just below the IRET frame) and we want to pretend that the
			
 
				+	 * exception came from the IRET target.
			
 
				 	 */
			
 
				 	struct bad_iret_stack *new_stack =
			
 
				-		container_of(task_pt_regs(current),
			
 
				-			     struct bad_iret_stack, regs);
			
 
				+		(struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
			
 
				 
			
 
				 	/* Copy the IRET target to the new stack. */
			
 
				 	memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
			
@@ -795,14 +815,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
 
				 	debug_stack_usage_dec();
			
 
				 
			
 
				 exit:
			
 
				-#if defined(CONFIG_X86_32)
			
 
				-	/*
			
 
				-	 * This is the most likely code path that involves non-trivial use
			
 
				-	 * of the SYSENTER stack.  Check that we haven't overrun it.
			
 
				-	 */
			
 
				-	WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
			
 
				-	     "Overran or corrupted SYSENTER stack\n");
			
 
				-#endif
			
 
				 	ist_exit(regs);
			
 
				 }
			
 
				 NOKPROBE_SYMBOL(do_debug);
			
@@ -929,6 +941,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 
				 
			
 
				 void __init trap_init(void)
			
 
				 {
			
 
				+	/* Init cpu_entry_area before IST entries are set up */
			
 
				+	setup_cpu_entry_areas();
			
 
				+
			
 
				 	idt_setup_traps();
			
 
				 
			
 
				 	/*
			
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				-static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
			
 
				+static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
			
 
				 			    size_t len)
			
 
				 {
			
 
				 	struct stack_info *info = &state->stack_info;
			
 
				+	void *addr = (void *)_addr;
			
 
				 
			
 
				-	/*
			
 
				-	 * If the address isn't on the current stack, switch to the next one.
			
 
				-	 *
			
 
				-	 * We may have to traverse multiple stacks to deal with the possibility
			
 
				-	 * that info->next_sp could point to an empty stack and the address
			
 
				-	 * could be on a subsequent stack.
			
 
				-	 */
			
 
				-	while (!on_stack(info, (void *)addr, len))
			
 
				-		if (get_stack_info(info->next_sp, state->task, info,
			
 
				-				   &state->stack_mask))
			
 
				-			return false;
			
 
				+	if (!on_stack(info, addr, len) &&
			
 
				+	    (get_stack_info(addr, state->task, info, &state->stack_mask)))
			
 
				+		return false;
			
 
				 
			
 
				 	return true;
			
 
				 }
			
@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				-#define REGS_SIZE (sizeof(struct pt_regs))
			
 
				-#define SP_OFFSET (offsetof(struct pt_regs, sp))
			
 
				-#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
			
 
				-#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
			
 
				-
			
 
				 static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
			
 
				-			     unsigned long *ip, unsigned long *sp, bool full)
			
 
				+			     unsigned long *ip, unsigned long *sp)
			
 
				 {
			
 
				-	size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
			
 
				-	size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
			
 
				-	struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
			
 
				-
			
 
				-	if (IS_ENABLED(CONFIG_X86_64)) {
			
 
				-		if (!stack_access_ok(state, addr, regs_size))
			
 
				-			return false;
			
 
				+	struct pt_regs *regs = (struct pt_regs *)addr;
			
 
				 
			
 
				-		*ip = regs->ip;
			
 
				-		*sp = regs->sp;
			
 
				+	/* x86-32 support will be more complicated due to the &regs->sp hack */
			
 
				+	BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
			
 
				 
			
 
				-		return true;
			
 
				-	}
			
 
				-
			
 
				-	if (!stack_access_ok(state, addr, sp_offset))
			
 
				+	if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
			
 
				 		return false;
			
 
				 
			
 
				 	*ip = regs->ip;
			
 
				+	*sp = regs->sp;
			
 
				+	return true;
			
 
				+}
			
 
				 
			
 
				-	if (user_mode(regs)) {
			
 
				-		if (!stack_access_ok(state, addr + sp_offset,
			
 
				-				     REGS_SIZE - SP_OFFSET))
			
 
				-			return false;
			
 
				+static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
			
 
				+				  unsigned long *ip, unsigned long *sp)
			
 
				+{
			
 
				+	struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
			
 
				 
			
 
				-		*sp = regs->sp;
			
 
				-	} else
			
 
				-		*sp = (unsigned long)&regs->sp;
			
 
				+	if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
			
 
				+		return false;
			
 
				 
			
 
				+	*ip = regs->ip;
			
 
				+	*sp = regs->sp;
			
 
				 	return true;
			
 
				 }
			
 
				 
			
@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
 
				 	unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
			
 
				 	enum stack_type prev_type = state->stack_info.type;
			
 
				 	struct orc_entry *orc;
			
 
				-	struct pt_regs *ptregs;
			
 
				 	bool indirect = false;
			
 
				 
			
 
				 	if (unwind_done(state))
			
@@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
				 		break;
			
 
				 
			
 
				 	case ORC_TYPE_REGS:
			
 
				-		if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
			
 
				+		if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
			
 
				 			orc_warn("can't dereference registers at %p for ip %pB\n",
			
 
				 				 (void *)sp, (void *)orig_ip);
			
 
				 			goto done;
			
@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
 
				 		break;
			
 
				 
			
 
				 	case ORC_TYPE_REGS_IRET:
			
 
				-		if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
			
 
				+		if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
			
 
				 			orc_warn("can't dereference iret registers at %p for ip %pB\n",
			
 
				 				 (void *)sp, (void *)orig_ip);
			
 
				 			goto done;
			
 
				 		}
			
 
				 
			
 
				-		ptregs = container_of((void *)sp, struct pt_regs, ip);
			
 
				-		if ((unsigned long)ptregs >= prev_sp &&
			
 
				-		    on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
			
 
				-			state->regs = ptregs;
			
 
				-			state->full_regs = false;
			
 
				-		} else
			
 
				-			state->regs = NULL;
			
 
				-
			
 
				+		state->regs = (void *)sp - IRET_FRAME_OFFSET;
			
 
				+		state->full_regs = false;
			
 
				 		state->signal = true;
			
 
				 		break;
			
 
				 
			
@@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 
				 	}
			
 
				 
			
 
				 	if (get_stack_info((unsigned long *)state->sp, state->task,
			
 
				-			   &state->stack_info, &state->stack_mask))
			
 
				-		return;
			
 
				+			   &state->stack_info, &state->stack_mask)) {
			
 
				+		/*
			
 
				+		 * We weren't on a valid stack.  It's possible that
			
 
				+		 * we overflowed a valid stack into a guard page.
			
 
				+		 * See if the next page up is valid so that we can
			
 
				+		 * generate some kind of backtrace if this happens.
			
 
				+		 */
			
 
				+		void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
			
 
				+		if (get_stack_info(next_page, state->task, &state->stack_info,
			
 
				+				   &state->stack_mask))
			
 
				+			return;
			
 
				+	}
			
 
				 
			
 
				 	/*
			
 
				 	 * The caller can provide the address of the first frame directly
			
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -107,6 +107,15 @@ SECTIONS
 
				 		SOFTIRQENTRY_TEXT
			
 
				 		*(.fixup)
			
 
				 		*(.gnu.warning)
			
 
				+
			
 
				+#ifdef CONFIG_X86_64
			
 
				+		. = ALIGN(PAGE_SIZE);
			
 
				+		_entry_trampoline = .;
			
 
				+		*(.entry_trampoline)
			
 
				+		. = ALIGN(PAGE_SIZE);
			
 
				+		ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
			
 
				+#endif
			
 
				+
			
 
				 		/* End of text section */
			
 
				 		_etext = .;
			
 
				 	} :text = 0x9090
			
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2390,9 +2390,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
 
				 }
			
 
				 
			
 
				 static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
			
 
				-				     u64 cr0, u64 cr4)
			
 
				+				    u64 cr0, u64 cr3, u64 cr4)
			
 
				 {
			
 
				 	int bad;
			
 
				+	u64 pcid;
			
 
				+
			
 
				+	/* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
			
 
				+	pcid = 0;
			
 
				+	if (cr4 & X86_CR4_PCIDE) {
			
 
				+		pcid = cr3 & 0xfff;
			
 
				+		cr3 &= ~0xfff;
			
 
				+	}
			
 
				+
			
 
				+	bad = ctxt->ops->set_cr(ctxt, 3, cr3);
			
 
				+	if (bad)
			
 
				+		return X86EMUL_UNHANDLEABLE;
			
 
				 
			
 
				 	/*
			
 
				 	 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
			
@@ -2411,6 +2423,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
 
				 		bad = ctxt->ops->set_cr(ctxt, 4, cr4);
			
 
				 		if (bad)
			
 
				 			return X86EMUL_UNHANDLEABLE;
			
 
				+		if (pcid) {
			
 
				+			bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
			
 
				+			if (bad)
			
 
				+				return X86EMUL_UNHANDLEABLE;
			
 
				+		}
			
 
				+
			
 
				 	}
			
 
				 
			
 
				 	return X86EMUL_CONTINUE;
			
@@ -2421,11 +2439,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
 
				 	struct desc_struct desc;
			
 
				 	struct desc_ptr dt;
			
 
				 	u16 selector;
			
 
				-	u32 val, cr0, cr4;
			
 
				+	u32 val, cr0, cr3, cr4;
			
 
				 	int i;
			
 
				 
			
 
				 	cr0 =                      GET_SMSTATE(u32, smbase, 0x7ffc);
			
 
				-	ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
			
 
				+	cr3 =                      GET_SMSTATE(u32, smbase, 0x7ff8);
			
 
				 	ctxt->eflags =             GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
			
 
				 	ctxt->_eip =               GET_SMSTATE(u32, smbase, 0x7ff0);
			
 
				 
			
@@ -2467,14 +2485,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
 
				 
			
 
				 	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
			
 
				 
			
 
				-	return rsm_enter_protected_mode(ctxt, cr0, cr4);
			
 
				+	return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
			
 
				 }
			
 
				 
			
 
				 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
			
 
				 {
			
 
				 	struct desc_struct desc;
			
 
				 	struct desc_ptr dt;
			
 
				-	u64 val, cr0, cr4;
			
 
				+	u64 val, cr0, cr3, cr4;
			
 
				 	u32 base3;
			
 
				 	u16 selector;
			
 
				 	int i, r;
			
@@ -2491,7 +2509,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
 
				 	ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
			
 
				 
			
 
				 	cr0 =                       GET_SMSTATE(u64, smbase, 0x7f58);
			
 
				-	ctxt->ops->set_cr(ctxt, 3,  GET_SMSTATE(u64, smbase, 0x7f50));
			
 
				+	cr3 =                       GET_SMSTATE(u64, smbase, 0x7f50);
			
 
				 	cr4 =                       GET_SMSTATE(u64, smbase, 0x7f48);
			
 
				 	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
			
 
				 	val =                       GET_SMSTATE(u64, smbase, 0x7ed0);
			
@@ -2519,7 +2537,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
 
				 	dt.address =                GET_SMSTATE(u64, smbase, 0x7e68);
			
 
				 	ctxt->ops->set_gdt(ctxt, &dt);
			
 
				 
			
 
				-	r = rsm_enter_protected_mode(ctxt, cr0, cr4);
			
 
				+	r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
			
 
				 	if (r != X86EMUL_CONTINUE)
			
 
				 		return r;
			
 
				 
			
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3395,7 +3395,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 
				 		spin_lock(&vcpu->kvm->mmu_lock);
			
 
				 		if(make_mmu_pages_available(vcpu) < 0) {
			
 
				 			spin_unlock(&vcpu->kvm->mmu_lock);
			
 
				-			return 1;
			
 
				+			return -ENOSPC;
			
 
				 		}
			
 
				 		sp = kvm_mmu_get_page(vcpu, 0, 0,
			
 
				 				vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
			
@@ -3410,7 +3410,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 
				 			spin_lock(&vcpu->kvm->mmu_lock);
			
 
				 			if (make_mmu_pages_available(vcpu) < 0) {
			
 
				 				spin_unlock(&vcpu->kvm->mmu_lock);
			
 
				-				return 1;
			
 
				+				return -ENOSPC;
			
 
				 			}
			
 
				 			sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
			
 
				 					i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
			
@@ -3450,7 +3450,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 
				 		spin_lock(&vcpu->kvm->mmu_lock);
			
 
				 		if (make_mmu_pages_available(vcpu) < 0) {
			
 
				 			spin_unlock(&vcpu->kvm->mmu_lock);
			
 
				-			return 1;
			
 
				+			return -ENOSPC;
			
 
				 		}
			
 
				 		sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
			
 
				 				vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
			
@@ -3487,7 +3487,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 
				 		spin_lock(&vcpu->kvm->mmu_lock);
			
 
				 		if (make_mmu_pages_available(vcpu) < 0) {
			
 
				 			spin_unlock(&vcpu->kvm->mmu_lock);
			
 
				-			return 1;
			
 
				+			return -ENOSPC;
			
 
				 		}
			
 
				 		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
			
 
				 				      0, ACC_ALL);
			
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2302,7 +2302,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
				 		 * processors.  See 22.2.4.
			
 
				 		 */
			
 
				 		vmcs_writel(HOST_TR_BASE,
			
 
				-			    (unsigned long)this_cpu_ptr(&cpu_tss));
			
 
				+			    (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
			
 
				 		vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
			
 
				 
			
 
				 		/*
			
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4384,7 +4384,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
 
				 					 addr, n, v))
			
 
				 		    && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
			
 
				 			break;
			
 
				-		trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
			
 
				+		trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
			
 
				 		handled += n;
			
 
				 		addr += n;
			
 
				 		len -= n;
			
@@ -4643,7 +4643,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
 
				 {
			
 
				 	if (vcpu->mmio_read_completed) {
			
 
				 		trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
			
 
				-			       vcpu->mmio_fragments[0].gpa, *(u64 *)val);
			
 
				+			       vcpu->mmio_fragments[0].gpa, val);
			
 
				 		vcpu->mmio_read_completed = 0;
			
 
				 		return 1;
			
 
				 	}
			
@@ -4665,14 +4665,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
 
				 
			
 
				 static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
			
 
				 {
			
 
				-	trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
			
 
				+	trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
			
 
				 	return vcpu_mmio_write(vcpu, gpa, bytes, val);
			
 
				 }
			
 
				 
			
 
				 static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
			
 
				 			  void *val, int bytes)
			
 
				 {
			
 
				-	trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
			
 
				+	trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
			
 
				 	return X86EMUL_IO_NEEDED;
			
 
				 }
			
 
				 
			
@@ -7264,13 +7264,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 
				 
			
 
				 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				 {
			
 
				-	struct fpu *fpu = &current->thread.fpu;
			
 
				 	int r;
			
 
				 
			
 
				-	fpu__initialize(fpu);
			
 
				-
			
 
				 	kvm_sigset_activate(vcpu);
			
 
				 
			
 
				+	kvm_load_guest_fpu(vcpu);
			
 
				+
			
 
				 	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
			
 
				 		if (kvm_run->immediate_exit) {
			
 
				 			r = -EINTR;
			
@@ -7296,14 +7295,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	kvm_load_guest_fpu(vcpu);
			
 
				-
			
 
				 	if (unlikely(vcpu->arch.complete_userspace_io)) {
			
 
				 		int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
			
 
				 		vcpu->arch.complete_userspace_io = NULL;
			
 
				 		r = cui(vcpu);
			
 
				 		if (r <= 0)
			
 
				-			goto out_fpu;
			
 
				+			goto out;
			
 
				 	} else
			
 
				 		WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
			
 
				 
			
@@ -7312,9 +7309,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	else
			
 
				 		r = vcpu_run(vcpu);
			
 
				 
			
 
				-out_fpu:
			
 
				-	kvm_put_guest_fpu(vcpu);
			
 
				 out:
			
 
				+	kvm_put_guest_fpu(vcpu);
			
 
				 	post_kvm_run_save(vcpu);
			
 
				 	kvm_sigset_deactivate(vcpu);
			
 
				 
			
@@ -7384,7 +7380,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
				 #endif
			
 
				 
			
 
				 	kvm_rip_write(vcpu, regs->rip);
			
 
				-	kvm_set_rflags(vcpu, regs->rflags);
			
 
				+	kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
			
 
				 
			
 
				 	vcpu->arch.exception.pending = false;
			
 
				 
			
@@ -7498,6 +7494,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(kvm_task_switch);
			
 
				 
			
 
				+int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
			
 
				+{
			
 
				+	if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) {
			
 
				+		/*
			
 
				+		 * When EFER.LME and CR0.PG are set, the processor is in
			
 
				+		 * 64-bit mode (though maybe in a 32-bit code segment).
			
 
				+		 * CR4.PAE and EFER.LMA must be set.
			
 
				+		 */
			
 
				+		if (!(sregs->cr4 & X86_CR4_PAE_BIT)
			
 
				+		    || !(sregs->efer & EFER_LMA))
			
 
				+			return -EINVAL;
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * Not in 64-bit mode: EFER.LMA is clear and the code
			
 
				+		 * segment cannot be 64-bit.
			
 
				+		 */
			
 
				+		if (sregs->efer & EFER_LMA || sregs->cs.l)
			
 
				+			return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
			
 
				 				  struct kvm_sregs *sregs)
			
 
				 {
			
@@ -7510,6 +7529,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
				 			(sregs->cr4 & X86_CR4_OSXSAVE))
			
 
				 		return -EINVAL;
			
 
				 
			
 
				+	if (kvm_valid_sregs(vcpu, sregs))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				 	apic_base_msr.data = sregs->apic_base;
			
 
				 	apic_base_msr.host_initiated = true;
			
 
				 	if (kvm_set_apic_base(vcpu, &apic_base_msr))
			
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops)
 
				 		delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
			
 
				 
			
 
				 		/*
			
 
				-		 * Use cpu_tss as a cacheline-aligned, seldomly
			
 
				+		 * Use cpu_tss_rw as a cacheline-aligned, seldomly
			
 
				 		 * accessed per-cpu variable as the monitor target.
			
 
				 		 */
			
 
				-		__monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
			
 
				+		__monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
			
 
				 
			
 
				 		/*
			
 
				 		 * AMD, like Intel, supports the EAX hint and EAX=0xf
			
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -860,7 +860,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
 
				 	if (!printk_ratelimit())
			
 
				 		return;
			
 
				 
			
 
				-	printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
			
 
				+	printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
			
 
				 		task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
			
 
				 		tsk->comm, task_pid_nr(tsk), address,
			
 
				 		(void *)regs->ip, (void *)regs->sp, error_code);
			
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -277,6 +277,7 @@ void __init kasan_early_init(void)
 
				 void __init kasan_init(void)
			
 
				 {
			
 
				 	int i;
			
 
				+	void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
			
 
				 
			
 
				 #ifdef CONFIG_KASAN_INLINE
			
 
				 	register_die_notifier(&kasan_die_notifier);
			
@@ -329,8 +330,23 @@ void __init kasan_init(void)
 
				 			      (unsigned long)kasan_mem_to_shadow(_end),
			
 
				 			      early_pfn_to_nid(__pa(_stext)));
			
 
				 
			
 
				+	shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM);
			
 
				+	shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
			
 
				+	shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
			
 
				+						PAGE_SIZE);
			
 
				+
			
 
				+	shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE);
			
 
				+	shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
			
 
				+	shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
			
 
				+					PAGE_SIZE);
			
 
				+
			
 
				 	kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
			
 
				-			(void *)KASAN_SHADOW_END);
			
 
				+				   shadow_cpu_entry_begin);
			
 
				+
			
 
				+	kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
			
 
				+			      (unsigned long)shadow_cpu_entry_end, 0);
			
 
				+
			
 
				+	kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END);
			
 
				 
			
 
				 	load_cr3(init_top_pgt);
			
 
				 	__flush_tlb_all();
			
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -152,17 +152,19 @@ static void do_fpu_end(void)
 
				 static void fix_processor_context(void)
			
 
				 {
			
 
				 	int cpu = smp_processor_id();
			
 
				-	struct tss_struct *t = &per_cpu(cpu_tss, cpu);
			
 
				 #ifdef CONFIG_X86_64
			
 
				 	struct desc_struct *desc = get_cpu_gdt_rw(cpu);
			
 
				 	tss_desc tss;
			
 
				 #endif
			
 
				-	set_tss_desc(cpu, t);	/*
			
 
				-				 * This just modifies memory; should not be
			
 
				-				 * necessary. But... This is necessary, because
			
 
				-				 * 386 hardware has concept of busy TSS or some
			
 
				-				 * similar stupidity.
			
 
				-				 */
			
 
				+
			
 
				+	/*
			
 
				+	 * We need to reload TR, which requires that we change the
			
 
				+	 * GDT entry to indicate "available" first.
			
 
				+	 *
			
 
				+	 * XXX: This could probably all be replaced by a call to
			
 
				+	 * force_reload_TR().
			
 
				+	 */
			
 
				+	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
			
 
				 
			
 
				 #ifdef CONFIG_X86_64
			
 
				 	memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
			
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -826,7 +826,7 @@ static void xen_load_sp0(unsigned long sp0)
 
				 	mcs = xen_mc_entry(0);
			
 
				 	MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
			
 
				 	xen_mc_issue(PARAVIRT_LAZY_CPU);
			
 
				-	this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
			
 
				+	this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
			
 
				 }
			
 
				 
			
 
				 void xen_set_iopl_mask(unsigned mask)
			
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2272,7 +2272,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 
				 #endif
			
 
				 	case FIX_TEXT_POKE0:
			
 
				 	case FIX_TEXT_POKE1:
			
 
				-	case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
			
 
				+	case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM:
			
 
				 		/* All local page mappings */
			
 
				 		pte = pfn_pte(phys, prot);
			
 
				 		break;
			
--- a/block/bio.c
+++ b/block/bio.c
@@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 
				 	bio->bi_disk = bio_src->bi_disk;
			
 
				 	bio->bi_partno = bio_src->bi_partno;
			
 
				 	bio_set_flag(bio, BIO_CLONED);
			
 
				+	if (bio_flagged(bio_src, BIO_THROTTLED))
			
 
				+		bio_set_flag(bio, BIO_THROTTLED);
			
 
				 	bio->bi_opf = bio_src->bi_opf;
			
 
				 	bio->bi_write_hint = bio_src->bi_write_hint;
			
 
				 	bio->bi_iter = bio_src->bi_iter;
			
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -12,22 +12,29 @@
 
				 #include "blk.h"
			
 
				 
			
 
				 /*
			
 
				- * Append a bio to a passthrough request.  Only works can be merged into
			
 
				- * the request based on the driver constraints.
			
 
				+ * Append a bio to a passthrough request.  Only works if the bio can be merged
			
 
				+ * into the request based on the driver constraints.
			
 
				  */
			
 
				-int blk_rq_append_bio(struct request *rq, struct bio *bio)
			
 
				+int blk_rq_append_bio(struct request *rq, struct bio **bio)
			
 
				 {
			
 
				-	blk_queue_bounce(rq->q, &bio);
			
 
				+	struct bio *orig_bio = *bio;
			
 
				+
			
 
				+	blk_queue_bounce(rq->q, bio);
			
 
				 
			
 
				 	if (!rq->bio) {
			
 
				-		blk_rq_bio_prep(rq->q, rq, bio);
			
 
				+		blk_rq_bio_prep(rq->q, rq, *bio);
			
 
				 	} else {
			
 
				-		if (!ll_back_merge_fn(rq->q, rq, bio))
			
 
				+		if (!ll_back_merge_fn(rq->q, rq, *bio)) {
			
 
				+			if (orig_bio != *bio) {
			
 
				+				bio_put(*bio);
			
 
				+				*bio = orig_bio;
			
 
				+			}
			
 
				 			return -EINVAL;
			
 
				+		}
			
 
				 
			
 
				-		rq->biotail->bi_next = bio;
			
 
				-		rq->biotail = bio;
			
 
				-		rq->__data_len += bio->bi_iter.bi_size;
			
 
				+		rq->biotail->bi_next = *bio;
			
 
				+		rq->biotail = *bio;
			
 
				+		rq->__data_len += (*bio)->bi_iter.bi_size;
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
@@ -73,14 +80,12 @@ static int __blk_rq_map_user_iov(struct request *rq,
 
				 	 * We link the bounce buffer in and could have to traverse it
			
 
				 	 * later so we have to get a ref to prevent it from being freed
			
 
				 	 */
			
 
				-	ret = blk_rq_append_bio(rq, bio);
			
 
				-	bio_get(bio);
			
 
				+	ret = blk_rq_append_bio(rq, &bio);
			
 
				 	if (ret) {
			
 
				-		bio_endio(bio);
			
 
				 		__blk_rq_unmap_user(orig_bio);
			
 
				-		bio_put(bio);
			
 
				 		return ret;
			
 
				 	}
			
 
				+	bio_get(bio);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -213,7 +218,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 
				 	int reading = rq_data_dir(rq) == READ;
			
 
				 	unsigned long addr = (unsigned long) kbuf;
			
 
				 	int do_copy = 0;
			
 
				-	struct bio *bio;
			
 
				+	struct bio *bio, *orig_bio;
			
 
				 	int ret;
			
 
				 
			
 
				 	if (len > (queue_max_hw_sectors(q) << 9))
			
@@ -236,10 +241,11 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 
				 	if (do_copy)
			
 
				 		rq->rq_flags |= RQF_COPY_USER;
			
 
				 
			
 
				-	ret = blk_rq_append_bio(rq, bio);
			
 
				+	orig_bio = bio;
			
 
				+	ret = blk_rq_append_bio(rq, &bio);
			
 
				 	if (unlikely(ret)) {
			
 
				 		/* request is too big */
			
 
				-		bio_put(bio);
			
 
				+		bio_put(orig_bio);
			
 
				 		return ret;
			
 
				 	}
			
 
				 
			
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2226,13 +2226,7 @@ again:
 
				 out_unlock:
			
 
				 	spin_unlock_irq(q->queue_lock);
			
 
				 out:
			
 
				-	/*
			
 
				-	 * As multiple blk-throtls may stack in the same issue path, we
			
 
				-	 * don't want bios to leave with the flag set.  Clear the flag if
			
 
				-	 * being issued.
			
 
				-	 */
			
 
				-	if (!throttled)
			
 
				-		bio_clear_flag(bio, BIO_THROTTLED);
			
 
				+	bio_set_flag(bio, BIO_THROTTLED);
			
 
				 
			
 
				 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
			
 
				 	if (throttled || !td->track_bio_latency)
			
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -200,6 +200,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 
				 	unsigned i = 0;
			
 
				 	bool bounce = false;
			
 
				 	int sectors = 0;
			
 
				+	bool passthrough = bio_is_passthrough(*bio_orig);
			
 
				 
			
 
				 	bio_for_each_segment(from, *bio_orig, iter) {
			
 
				 		if (i++ < BIO_MAX_PAGES)
			
@@ -210,13 +211,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 
				 	if (!bounce)
			
 
				 		return;
			
 
				 
			
 
				-	if (sectors < bio_sectors(*bio_orig)) {
			
 
				+	if (!passthrough && sectors < bio_sectors(*bio_orig)) {
			
 
				 		bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
			
 
				 		bio_chain(bio, *bio_orig);
			
 
				 		generic_make_request(*bio_orig);
			
 
				 		*bio_orig = bio;
			
 
				 	}
			
 
				-	bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
			
 
				+	bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
			
 
				+			bounce_bio_set);
			
 
				 
			
 
				 	bio_for_each_segment_all(to, bio, i) {
			
 
				 		struct page *page = to->bv_page;
			
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -100,9 +100,13 @@ struct kyber_hctx_data {
 
				 	unsigned int cur_domain;
			
 
				 	unsigned int batching;
			
 
				 	wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
			
 
				+	struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
			
 
				 	atomic_t wait_index[KYBER_NUM_DOMAINS];
			
 
				 };
			
 
				 
			
 
				+static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
			
 
				+			     void *key);
			
 
				+
			
 
				 static int rq_sched_domain(const struct request *rq)
			
 
				 {
			
 
				 	unsigned int op = rq->cmd_flags;
			
@@ -385,6 +389,9 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 
				 
			
 
				 	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
			
 
				 		INIT_LIST_HEAD(&khd->rqs[i]);
			
 
				+		init_waitqueue_func_entry(&khd->domain_wait[i],
			
 
				+					  kyber_domain_wake);
			
 
				+		khd->domain_wait[i].private = hctx;
			
 
				 		INIT_LIST_HEAD(&khd->domain_wait[i].entry);
			
 
				 		atomic_set(&khd->wait_index[i], 0);
			
 
				 	}
			
@@ -524,35 +531,39 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
 
				 	int nr;
			
 
				 
			
 
				 	nr = __sbitmap_queue_get(domain_tokens);
			
 
				-	if (nr >= 0)
			
 
				-		return nr;
			
 
				 
			
 
				 	/*
			
 
				 	 * If we failed to get a domain token, make sure the hardware queue is
			
 
				 	 * run when one becomes available. Note that this is serialized on
			
 
				 	 * khd->lock, but we still need to be careful about the waker.
			
 
				 	 */
			
 
				-	if (list_empty_careful(&wait->entry)) {
			
 
				-		init_waitqueue_func_entry(wait, kyber_domain_wake);
			
 
				-		wait->private = hctx;
			
 
				+	if (nr < 0 && list_empty_careful(&wait->entry)) {
			
 
				 		ws = sbq_wait_ptr(domain_tokens,
			
 
				 				  &khd->wait_index[sched_domain]);
			
 
				+		khd->domain_ws[sched_domain] = ws;
			
 
				 		add_wait_queue(&ws->wait, wait);
			
 
				 
			
 
				 		/*
			
 
				 		 * Try again in case a token was freed before we got on the wait
			
 
				-		 * queue. The waker may have already removed the entry from the
			
 
				-		 * wait queue, but list_del_init() is okay with that.
			
 
				+		 * queue.
			
 
				 		 */
			
 
				 		nr = __sbitmap_queue_get(domain_tokens);
			
 
				-		if (nr >= 0) {
			
 
				-			unsigned long flags;
			
 
				+	}
			
 
				 
			
 
				-			spin_lock_irqsave(&ws->wait.lock, flags);
			
 
				-			list_del_init(&wait->entry);
			
 
				-			spin_unlock_irqrestore(&ws->wait.lock, flags);
			
 
				-		}
			
 
				+	/*
			
 
				+	 * If we got a token while we were on the wait queue, remove ourselves
			
 
				+	 * from the wait queue to ensure that all wake ups make forward
			
 
				+	 * progress. It's possible that the waker already deleted the entry
			
 
				+	 * between the !list_empty_careful() check and us grabbing the lock, but
			
 
				+	 * list_del_init() is okay with that.
			
 
				+	 */
			
 
				+	if (nr >= 0 && !list_empty_careful(&wait->entry)) {
			
 
				+		ws = khd->domain_ws[sched_domain];
			
 
				+		spin_lock_irq(&ws->wait.lock);
			
 
				+		list_del_init(&wait->entry);
			
 
				+		spin_unlock_irq(&ws->wait.lock);
			
 
				 	}
			
 
				+
			
 
				 	return nr;
			
 
				 }
			
 
				 
			
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -1007,7 +1007,7 @@ skip:
 
				 	/* The record may be cleared by others, try read next record */
			
 
				 	if (len == -ENOENT)
			
 
				 		goto skip;
			
 
				-	else if (len < sizeof(*rcd)) {
			
 
				+	else if (len < 0 || len < sizeof(*rcd)) {
			
 
				 		rc = -EIO;
			
 
				 		goto out;
			
 
				 	}
			
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -1171,7 +1171,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
 
				 	struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
			
 
				 	struct cpc_register_resource *desired_reg;
			
 
				 	int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
			
 
				-	struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id];
			
 
				+	struct cppc_pcc_data *pcc_ss_data;
			
 
				 	int ret = 0;
			
 
				 
			
 
				 	if (!cpc_desc || pcc_ss_id < 0) {
			
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -35,13 +35,13 @@ static inline u64 mb_per_tick(int mbps)
 
				 struct nullb_cmd {
			
 
				 	struct list_head list;
			
 
				 	struct llist_node ll_list;
			
 
				-	call_single_data_t csd;
			
 
				+	struct __call_single_data csd;
			
 
				 	struct request *rq;
			
 
				 	struct bio *bio;
			
 
				 	unsigned int tag;
			
 
				+	blk_status_t error;
			
 
				 	struct nullb_queue *nq;
			
 
				 	struct hrtimer timer;
			
 
				-	blk_status_t error;
			
 
				 };
			
 
				 
			
 
				 struct nullb_queue {
			
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -22,6 +22,8 @@
 
				 
			
 
				 #include "cpufreq_governor.h"
			
 
				 
			
 
				+#define CPUFREQ_DBS_MIN_SAMPLING_INTERVAL	(2 * TICK_NSEC / NSEC_PER_USEC)
			
 
				+
			
 
				 static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs);
			
 
				 
			
 
				 static DEFINE_MUTEX(gov_dbs_data_mutex);
			
@@ -47,11 +49,15 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
 
				 {
			
 
				 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
			
 
				 	struct policy_dbs_info *policy_dbs;
			
 
				+	unsigned int sampling_interval;
			
 
				 	int ret;
			
 
				-	ret = sscanf(buf, "%u", &dbs_data->sampling_rate);
			
 
				-	if (ret != 1)
			
 
				+
			
 
				+	ret = sscanf(buf, "%u", &sampling_interval);
			
 
				+	if (ret != 1 || sampling_interval < CPUFREQ_DBS_MIN_SAMPLING_INTERVAL)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				+	dbs_data->sampling_rate = sampling_interval;
			
 
				+
			
 
				 	/*
			
 
				 	 * We are operating under dbs_data->mutex and so the list and its
			
 
				 	 * entries can't be freed concurrently.
			
@@ -430,7 +436,14 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
 
				 	if (ret)
			
 
				 		goto free_policy_dbs_info;
			
 
				 
			
 
				-	dbs_data->sampling_rate = cpufreq_policy_transition_delay_us(policy);
			
 
				+	/*
			
 
				+	 * The sampling interval should not be less than the transition latency
			
 
				+	 * of the CPU and it also cannot be too small for dbs_update() to work
			
 
				+	 * correctly.
			
 
				+	 */
			
 
				+	dbs_data->sampling_rate = max_t(unsigned int,
			
 
				+					CPUFREQ_DBS_MIN_SAMPLING_INTERVAL,
			
 
				+					cpufreq_policy_transition_delay_us(policy));
			
 
				 
			
 
				 	if (!have_governor_per_policy())
			
 
				 		gov->gdbs_data = dbs_data;
			
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -226,17 +226,18 @@ static void imx6q_opp_check_speed_grading(struct device *dev)
 
				 	val >>= OCOTP_CFG3_SPEED_SHIFT;
			
 
				 	val &= 0x3;
			
 
				 
			
 
				-	if ((val != OCOTP_CFG3_SPEED_1P2GHZ) &&
			
 
				-	     of_machine_is_compatible("fsl,imx6q"))
			
 
				-		if (dev_pm_opp_disable(dev, 1200000000))
			
 
				-			dev_warn(dev, "failed to disable 1.2GHz OPP\n");
			
 
				 	if (val < OCOTP_CFG3_SPEED_996MHZ)
			
 
				 		if (dev_pm_opp_disable(dev, 996000000))
			
 
				 			dev_warn(dev, "failed to disable 996MHz OPP\n");
			
 
				-	if (of_machine_is_compatible("fsl,imx6q")) {
			
 
				+
			
 
				+	if (of_machine_is_compatible("fsl,imx6q") ||
			
 
				+	    of_machine_is_compatible("fsl,imx6qp")) {
			
 
				 		if (val != OCOTP_CFG3_SPEED_852MHZ)
			
 
				 			if (dev_pm_opp_disable(dev, 852000000))
			
 
				 				dev_warn(dev, "failed to disable 852MHz OPP\n");
			
 
				+		if (val != OCOTP_CFG3_SPEED_1P2GHZ)
			
 
				+			if (dev_pm_opp_disable(dev, 1200000000))
			
 
				+				dev_warn(dev, "failed to disable 1.2GHz OPP\n");
			
 
				 	}
			
 
				 	iounmap(base);
			
 
				 put_node:
			
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -708,7 +708,7 @@ atc_prep_dma_interleaved(struct dma_chan *chan,
 
				 			 unsigned long flags)
			
 
				 {
			
 
				 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
			
 
				-	struct data_chunk	*first = xt->sgl;
			
 
				+	struct data_chunk	*first;
			
 
				 	struct at_desc		*desc = NULL;
			
 
				 	size_t			xfer_count;
			
 
				 	unsigned int		dwidth;
			
@@ -720,6 +720,8 @@ atc_prep_dma_interleaved(struct dma_chan *chan,
 
				 	if (unlikely(!xt || xt->numf != 1 || !xt->frame_size))
			
 
				 		return NULL;
			
 
				 
			
 
				+	first = xt->sgl;
			
 
				+
			
 
				 	dev_info(chan2dev(chan),
			
 
				 		 "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n",
			
 
				 		__func__, &xt->src_start, &xt->dst_start, xt->numf,
			
--- a/drivers/dma/dma-jz4740.c
+++ b/drivers/dma/dma-jz4740.c
@@ -555,7 +555,7 @@ static int jz4740_dma_probe(struct platform_device *pdev)
 
				 
			
 
				 	ret = dma_async_device_register(dd);
			
 
				 	if (ret)
			
 
				-		return ret;
			
 
				+		goto err_clk;
			
 
				 
			
 
				 	irq = platform_get_irq(pdev, 0);
			
 
				 	ret = request_irq(irq, jz4740_dma_irq, 0, dev_name(&pdev->dev), dmadev);
			
@@ -568,6 +568,8 @@ static int jz4740_dma_probe(struct platform_device *pdev)
 
				 
			
 
				 err_unregister:
			
 
				 	dma_async_device_unregister(dd);
			
 
				+err_clk:
			
 
				+	clk_disable_unprepare(dmadev->clk);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -155,6 +155,12 @@ MODULE_PARM_DESC(run, "Run the test (default: false)");
 
				 #define PATTERN_COUNT_MASK	0x1f
			
 
				 #define PATTERN_MEMSET_IDX	0x01
			
 
				 
			
 
				+/* poor man's completion - we want to use wait_event_freezable() on it */
			
 
				+struct dmatest_done {
			
 
				+	bool			done;
			
 
				+	wait_queue_head_t	*wait;
			
 
				+};
			
 
				+
			
 
				 struct dmatest_thread {
			
 
				 	struct list_head	node;
			
 
				 	struct dmatest_info	*info;
			
@@ -165,6 +171,8 @@ struct dmatest_thread {
 
				 	u8			**dsts;
			
 
				 	u8			**udsts;
			
 
				 	enum dma_transaction_type type;
			
 
				+	wait_queue_head_t done_wait;
			
 
				+	struct dmatest_done test_done;
			
 
				 	bool			done;
			
 
				 };
			
 
				 
			
@@ -342,18 +350,25 @@ static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
 
				 	return error_count;
			
 
				 }
			
 
				 
			
 
				-/* poor man's completion - we want to use wait_event_freezable() on it */
			
 
				-struct dmatest_done {
			
 
				-	bool			done;
			
 
				-	wait_queue_head_t	*wait;
			
 
				-};
			
 
				 
			
 
				 static void dmatest_callback(void *arg)
			
 
				 {
			
 
				 	struct dmatest_done *done = arg;
			
 
				-
			
 
				-	done->done = true;
			
 
				-	wake_up_all(done->wait);
			
 
				+	struct dmatest_thread *thread =
			
 
				+		container_of(arg, struct dmatest_thread, done_wait);
			
 
				+	if (!thread->done) {
			
 
				+		done->done = true;
			
 
				+		wake_up_all(done->wait);
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * If thread->done, it means that this callback occurred
			
 
				+		 * after the parent thread has cleaned up. This can
			
 
				+		 * happen in the case that driver doesn't implement
			
 
				+		 * the terminate_all() functionality and a dma operation
			
 
				+		 * did not occur within the timeout period
			
 
				+		 */
			
 
				+		WARN(1, "dmatest: Kernel memory may be corrupted!!\n");
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static unsigned int min_odd(unsigned int x, unsigned int y)
			
@@ -424,9 +439,8 @@ static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len)
 
				  */
			
 
				 static int dmatest_func(void *data)
			
 
				 {
			
 
				-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_wait);
			
 
				 	struct dmatest_thread	*thread = data;
			
 
				-	struct dmatest_done	done = { .wait = &done_wait };
			
 
				+	struct dmatest_done	*done = &thread->test_done;
			
 
				 	struct dmatest_info	*info;
			
 
				 	struct dmatest_params	*params;
			
 
				 	struct dma_chan		*chan;
			
@@ -673,9 +687,9 @@ static int dmatest_func(void *data)
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				-		done.done = false;
			
 
				+		done->done = false;
			
 
				 		tx->callback = dmatest_callback;
			
 
				-		tx->callback_param = &done;
			
 
				+		tx->callback_param = done;
			
 
				 		cookie = tx->tx_submit(tx);
			
 
				 
			
 
				 		if (dma_submit_error(cookie)) {
			
@@ -688,21 +702,12 @@ static int dmatest_func(void *data)
 
				 		}
			
 
				 		dma_async_issue_pending(chan);
			
 
				 
			
 
				-		wait_event_freezable_timeout(done_wait, done.done,
			
 
				+		wait_event_freezable_timeout(thread->done_wait, done->done,
			
 
				 					     msecs_to_jiffies(params->timeout));
			
 
				 
			
 
				 		status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
			
 
				 
			
 
				-		if (!done.done) {
			
 
				-			/*
			
 
				-			 * We're leaving the timed out dma operation with
			
 
				-			 * dangling pointer to done_wait.  To make this
			
 
				-			 * correct, we'll need to allocate wait_done for
			
 
				-			 * each test iteration and perform "who's gonna
			
 
				-			 * free it this time?" dancing.  For now, just
			
 
				-			 * leave it dangling.
			
 
				-			 */
			
 
				-			WARN(1, "dmatest: Kernel stack may be corrupted!!\n");
			
 
				+		if (!done->done) {
			
 
				 			dmaengine_unmap_put(um);
			
 
				 			result("test timed out", total_tests, src_off, dst_off,
			
 
				 			       len, 0);
			
@@ -789,7 +794,7 @@ err_thread_type:
 
				 		dmatest_KBs(runtime, total_len), ret);
			
 
				 
			
 
				 	/* terminate all transfers on specified channels */
			
 
				-	if (ret)
			
 
				+	if (ret || failed_tests)
			
 
				 		dmaengine_terminate_all(chan);
			
 
				 
			
 
				 	thread->done = true;
			
@@ -849,6 +854,8 @@ static int dmatest_add_threads(struct dmatest_info *info,
 
				 		thread->info = info;
			
 
				 		thread->chan = dtc->chan;
			
 
				 		thread->type = type;
			
 
				+		thread->test_done.wait = &thread->done_wait;
			
 
				+		init_waitqueue_head(&thread->done_wait);
			
 
				 		smp_wmb();
			
 
				 		thread->task = kthread_create(dmatest_func, thread, "%s-%s%u",
			
 
				 				dma_chan_name(chan), op, i);
			
--- a/drivers/dma/fsl-edma.c
+++ b/drivers/dma/fsl-edma.c
@@ -863,11 +863,11 @@ static void fsl_edma_irq_exit(
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void fsl_disable_clocks(struct fsl_edma_engine *fsl_edma)
			
 
				+static void fsl_disable_clocks(struct fsl_edma_engine *fsl_edma, int nr_clocks)
			
 
				 {
			
 
				 	int i;
			
 
				 
			
 
				-	for (i = 0; i < DMAMUX_NR; i++)
			
 
				+	for (i = 0; i < nr_clocks; i++)
			
 
				 		clk_disable_unprepare(fsl_edma->muxclk[i]);
			
 
				 }
			
 
				 
			
@@ -904,25 +904,25 @@ static int fsl_edma_probe(struct platform_device *pdev)
 
				 
			
 
				 		res = platform_get_resource(pdev, IORESOURCE_MEM, 1 + i);
			
 
				 		fsl_edma->muxbase[i] = devm_ioremap_resource(&pdev->dev, res);
			
 
				-		if (IS_ERR(fsl_edma->muxbase[i]))
			
 
				+		if (IS_ERR(fsl_edma->muxbase[i])) {
			
 
				+			/* on error: disable all previously enabled clks */
			
 
				+			fsl_disable_clocks(fsl_edma, i);
			
 
				 			return PTR_ERR(fsl_edma->muxbase[i]);
			
 
				+		}
			
 
				 
			
 
				 		sprintf(clkname, "dmamux%d", i);
			
 
				 		fsl_edma->muxclk[i] = devm_clk_get(&pdev->dev, clkname);
			
 
				 		if (IS_ERR(fsl_edma->muxclk[i])) {
			
 
				 			dev_err(&pdev->dev, "Missing DMAMUX block clock.\n");
			
 
				+			/* on error: disable all previously enabled clks */
			
 
				+			fsl_disable_clocks(fsl_edma, i);
			
 
				 			return PTR_ERR(fsl_edma->muxclk[i]);
			
 
				 		}
			
 
				 
			
 
				 		ret = clk_prepare_enable(fsl_edma->muxclk[i]);
			
 
				-		if (ret) {
			
 
				-			/* disable only clks which were enabled on error */
			
 
				-			for (; i >= 0; i--)
			
 
				-				clk_disable_unprepare(fsl_edma->muxclk[i]);
			
 
				-
			
 
				-			dev_err(&pdev->dev, "DMAMUX clk block failed.\n");
			
 
				-			return ret;
			
 
				-		}
			
 
				+		if (ret)
			
 
				+			/* on error: disable all previously enabled clks */
			
 
				+			fsl_disable_clocks(fsl_edma, i);
			
 
				 
			
 
				 	}
			
 
				 
			
@@ -976,7 +976,7 @@ static int fsl_edma_probe(struct platform_device *pdev)
 
				 	if (ret) {
			
 
				 		dev_err(&pdev->dev,
			
 
				 			"Can't register Freescale eDMA engine. (%d)\n", ret);
			
 
				-		fsl_disable_clocks(fsl_edma);
			
 
				+		fsl_disable_clocks(fsl_edma, DMAMUX_NR);
			
 
				 		return ret;
			
 
				 	}
			
 
				 
			
@@ -985,7 +985,7 @@ static int fsl_edma_probe(struct platform_device *pdev)
 
				 		dev_err(&pdev->dev,
			
 
				 			"Can't register Freescale eDMA of_dma. (%d)\n", ret);
			
 
				 		dma_async_device_unregister(&fsl_edma->dma_dev);
			
 
				-		fsl_disable_clocks(fsl_edma);
			
 
				+		fsl_disable_clocks(fsl_edma, DMAMUX_NR);
			
 
				 		return ret;
			
 
				 	}
			
 
				 
			
@@ -1015,7 +1015,7 @@ static int fsl_edma_remove(struct platform_device *pdev)
 
				 	fsl_edma_cleanup_vchan(&fsl_edma->dma_dev);
			
 
				 	of_dma_controller_free(np);
			
 
				 	dma_async_device_unregister(&fsl_edma->dma_dev);
			
 
				-	fsl_disable_clocks(fsl_edma);
			
 
				+	fsl_disable_clocks(fsl_edma, DMAMUX_NR);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -390,7 +390,7 @@ static int ioat_dma_self_test(struct ioatdma_device *ioat_dma)
 
				 	if (memcmp(src, dest, IOAT_TEST_SIZE)) {
			
 
				 		dev_err(dev, "Self-test copy failed compare, disabling\n");
			
 
				 		err = -ENODEV;
			
 
				-		goto free_resources;
			
 
				+		goto unmap_dma;
			
 
				 	}
			
 
				 
			
 
				 unmap_dma:
			
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -377,6 +377,7 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
 
				 	u8 *ptr;
			
 
				 	u8 *rx_buf;
			
 
				 	u8 sum;
			
 
				+	u8 rx_byte;
			
 
				 	int ret = 0, final_ret;
			
 
				 
			
 
				 	len = cros_ec_prepare_tx(ec_dev, ec_msg);
			
@@ -421,25 +422,22 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
 
				 	if (!ret) {
			
 
				 		/* Verify that EC can process command */
			
 
				 		for (i = 0; i < len; i++) {
			
 
				-			switch (rx_buf[i]) {
			
 
				-			case EC_SPI_PAST_END:
			
 
				-			case EC_SPI_RX_BAD_DATA:
			
 
				-			case EC_SPI_NOT_READY:
			
 
				-				ret = -EAGAIN;
			
 
				-				ec_msg->result = EC_RES_IN_PROGRESS;
			
 
				-			default:
			
 
				+			rx_byte = rx_buf[i];
			
 
				+			if (rx_byte == EC_SPI_PAST_END  ||
			
 
				+			    rx_byte == EC_SPI_RX_BAD_DATA ||
			
 
				+			    rx_byte == EC_SPI_NOT_READY) {
			
 
				+				ret = -EREMOTEIO;
			
 
				 				break;
			
 
				 			}
			
 
				-			if (ret)
			
 
				-				break;
			
 
				 		}
			
 
				-		if (!ret)
			
 
				-			ret = cros_ec_spi_receive_packet(ec_dev,
			
 
				-					ec_msg->insize + sizeof(*response));
			
 
				-	} else {
			
 
				-		dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
			
 
				 	}
			
 
				 
			
 
				+	if (!ret)
			
 
				+		ret = cros_ec_spi_receive_packet(ec_dev,
			
 
				+				ec_msg->insize + sizeof(*response));
			
 
				+	else
			
 
				+		dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
			
 
				+
			
 
				 	final_ret = terminate_request(ec_dev);
			
 
				 
			
 
				 	spi_bus_unlock(ec_spi->spi->master);
			
@@ -508,6 +506,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
 
				 	int i, len;
			
 
				 	u8 *ptr;
			
 
				 	u8 *rx_buf;
			
 
				+	u8 rx_byte;
			
 
				 	int sum;
			
 
				 	int ret = 0, final_ret;
			
 
				 
			
@@ -544,25 +543,22 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
 
				 	if (!ret) {
			
 
				 		/* Verify that EC can process command */
			
 
				 		for (i = 0; i < len; i++) {
			
 
				-			switch (rx_buf[i]) {
			
 
				-			case EC_SPI_PAST_END:
			
 
				-			case EC_SPI_RX_BAD_DATA:
			
 
				-			case EC_SPI_NOT_READY:
			
 
				-				ret = -EAGAIN;
			
 
				-				ec_msg->result = EC_RES_IN_PROGRESS;
			
 
				-			default:
			
 
				+			rx_byte = rx_buf[i];
			
 
				+			if (rx_byte == EC_SPI_PAST_END  ||
			
 
				+			    rx_byte == EC_SPI_RX_BAD_DATA ||
			
 
				+			    rx_byte == EC_SPI_NOT_READY) {
			
 
				+				ret = -EREMOTEIO;
			
 
				 				break;
			
 
				 			}
			
 
				-			if (ret)
			
 
				-				break;
			
 
				 		}
			
 
				-		if (!ret)
			
 
				-			ret = cros_ec_spi_receive_response(ec_dev,
			
 
				-					ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
			
 
				-	} else {
			
 
				-		dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
			
 
				 	}
			
 
				 
			
 
				+	if (!ret)
			
 
				+		ret = cros_ec_spi_receive_response(ec_dev,
			
 
				+				ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
			
 
				+	else
			
 
				+		dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
			
 
				+
			
 
				 	final_ret = terminate_request(ec_dev);
			
 
				 
			
 
				 	spi_bus_unlock(ec_spi->spi->master);
			
@@ -667,6 +663,7 @@ static int cros_ec_spi_probe(struct spi_device *spi)
 
				 			   sizeof(struct ec_response_get_protocol_info);
			
 
				 	ec_dev->dout_size = sizeof(struct ec_host_request);
			
 
				 
			
 
				+	ec_spi->last_transfer_ns = ktime_get_ns();
			
 
				 
			
 
				 	err = cros_ec_register(ec_dev);
			
 
				 	if (err) {
			
--- a/drivers/mfd/twl4030-audio.c
+++ b/drivers/mfd/twl4030-audio.c
@@ -159,13 +159,18 @@ unsigned int twl4030_audio_get_mclk(void)
 
				 EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk);
			
 
				 
			
 
				 static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata,
			
 
				-			      struct device_node *node)
			
 
				+			      struct device_node *parent)
			
 
				 {
			
 
				+	struct device_node *node;
			
 
				+
			
 
				 	if (pdata && pdata->codec)
			
 
				 		return true;
			
 
				 
			
 
				-	if (of_find_node_by_name(node, "codec"))
			
 
				+	node = of_get_child_by_name(parent, "codec");
			
 
				+	if (node) {
			
 
				+		of_node_put(node);
			
 
				 		return true;
			
 
				+	}
			
 
				 
			
 
				 	return false;
			
 
				 }
			
--- a/drivers/mfd/twl6040.c
+++ b/drivers/mfd/twl6040.c
@@ -97,12 +97,16 @@ static struct reg_sequence twl6040_patch[] = {
 
				 };
			
 
				 
			
 
				 
			
 
				-static bool twl6040_has_vibra(struct device_node *node)
			
 
				+static bool twl6040_has_vibra(struct device_node *parent)
			
 
				 {
			
 
				-#ifdef CONFIG_OF
			
 
				-	if (of_find_node_by_name(node, "vibra"))
			
 
				+	struct device_node *node;
			
 
				+
			
 
				+	node = of_get_child_by_name(parent, "vibra");
			
 
				+	if (node) {
			
 
				+		of_node_put(node);
			
 
				 		return true;
			
 
				-#endif
			
 
				+	}
			
 
				+
			
 
				 	return false;
			
 
				 }
			
 
				 
			
--- a/drivers/misc/pti.c
+++ b/drivers/misc/pti.c
@@ -32,7 +32,7 @@
 
				 #include <linux/pci.h>
			
 
				 #include <linux/mutex.h>
			
 
				 #include <linux/miscdevice.h>
			
 
				-#include <linux/pti.h>
			
 
				+#include <linux/intel-pti.h>
			
 
				 #include <linux/slab.h>
			
 
				 #include <linux/uaccess.h>
			
 
				 
			
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1114,7 +1114,7 @@ static int mtd_check_oob_ops(struct mtd_info *mtd, loff_t offs,
 
				 	if (!ops->oobbuf)
			
 
				 		ops->ooblen = 0;
			
 
				 
			
 
				-	if (offs < 0 || offs + ops->len >= mtd->size)
			
 
				+	if (offs < 0 || offs + ops->len > mtd->size)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				 	if (ops->ooblen) {
			
--- a/drivers/mtd/nand/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/brcmnand/brcmnand.c
@@ -1763,7 +1763,7 @@ try_dmaread:
 
				 			err = brcmstb_nand_verify_erased_page(mtd, chip, buf,
			
 
				 							      addr);
			
 
				 			/* erased page bitflips corrected */
			
 
				-			if (err > 0)
			
 
				+			if (err >= 0)
			
 
				 				return err;
			
 
				 		}
			
 
				 
			
--- a/drivers/mtd/nand/gpio.c
+++ b/drivers/mtd/nand/gpio.c
@@ -253,9 +253,9 @@ static int gpio_nand_probe(struct platform_device *pdev)
 
				 		goto out_ce;
			
 
				 	}
			
 
				 
			
 
				-	gpiomtd->nwp = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW);
			
 
				-	if (IS_ERR(gpiomtd->nwp)) {
			
 
				-		ret = PTR_ERR(gpiomtd->nwp);
			
 
				+	gpiomtd->ale = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW);
			
 
				+	if (IS_ERR(gpiomtd->ale)) {
			
 
				+		ret = PTR_ERR(gpiomtd->ale);
			
 
				 		goto out_ce;
			
 
				 	}
			
 
				 
			
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -1067,9 +1067,6 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 
				 		return ret;
			
 
				 	}
			
 
				 
			
 
				-	/* handle the block mark swapping */
			
 
				-	block_mark_swapping(this, payload_virt, auxiliary_virt);
			
 
				-
			
 
				 	/* Loop over status bytes, accumulating ECC status. */
			
 
				 	status = auxiliary_virt + nfc_geo->auxiliary_status_offset;
			
 
				 
			
@@ -1158,6 +1155,9 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 
				 		max_bitflips = max_t(unsigned int, max_bitflips, *status);
			
 
				 	}
			
 
				 
			
 
				+	/* handle the block mark swapping */
			
 
				+	block_mark_swapping(this, buf, auxiliary_virt);
			
 
				+
			
 
				 	if (oob_required) {
			
 
				 		/*
			
 
				 		 * It's time to deliver the OOB bytes. See gpmi_ecc_read_oob()
			
--- a/drivers/net/ethernet/arc/emac.h
+++ b/drivers/net/ethernet/arc/emac.h
@@ -159,6 +159,8 @@ struct arc_emac_priv {
 
				 	unsigned int link;
			
 
				 	unsigned int duplex;
			
 
				 	unsigned int speed;
			
 
				+
			
 
				+	unsigned int rx_missed_errors;
			
 
				 };
			
 
				 
			
 
				 /**
			
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -26,6 +26,8 @@
 
				 
			
 
				 #include "emac.h"
			
 
				 
			
 
				+static void arc_emac_restart(struct net_device *ndev);
			
 
				+
			
 
				 /**
			
 
				  * arc_emac_tx_avail - Return the number of available slots in the tx ring.
			
 
				  * @priv: Pointer to ARC EMAC private data structure.
			
@@ -210,39 +212,48 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				-		pktlen = info & LEN_MASK;
			
 
				-		stats->rx_packets++;
			
 
				-		stats->rx_bytes += pktlen;
			
 
				-		skb = rx_buff->skb;
			
 
				-		skb_put(skb, pktlen);
			
 
				-		skb->dev = ndev;
			
 
				-		skb->protocol = eth_type_trans(skb, ndev);
			
 
				-
			
 
				-		dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
			
 
				-				 dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
			
 
				-
			
 
				-		/* Prepare the BD for next cycle */
			
 
				-		rx_buff->skb = netdev_alloc_skb_ip_align(ndev,
			
 
				-							 EMAC_BUFFER_SIZE);
			
 
				-		if (unlikely(!rx_buff->skb)) {
			
 
				+		/* Prepare the BD for next cycle. netif_receive_skb()
			
 
				+		 * only if new skb was allocated and mapped to avoid holes
			
 
				+		 * in the RX fifo.
			
 
				+		 */
			
 
				+		skb = netdev_alloc_skb_ip_align(ndev, EMAC_BUFFER_SIZE);
			
 
				+		if (unlikely(!skb)) {
			
 
				+			if (net_ratelimit())
			
 
				+				netdev_err(ndev, "cannot allocate skb\n");
			
 
				+			/* Return ownership to EMAC */
			
 
				+			rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
			
 
				 			stats->rx_errors++;
			
 
				-			/* Because receive_skb is below, increment rx_dropped */
			
 
				 			stats->rx_dropped++;
			
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				-		/* receive_skb only if new skb was allocated to avoid holes */
			
 
				-		netif_receive_skb(skb);
			
 
				-
			
 
				-		addr = dma_map_single(&ndev->dev, (void *)rx_buff->skb->data,
			
 
				+		addr = dma_map_single(&ndev->dev, (void *)skb->data,
			
 
				 				      EMAC_BUFFER_SIZE, DMA_FROM_DEVICE);
			
 
				 		if (dma_mapping_error(&ndev->dev, addr)) {
			
 
				 			if (net_ratelimit())
			
 
				-				netdev_err(ndev, "cannot dma map\n");
			
 
				-			dev_kfree_skb(rx_buff->skb);
			
 
				+				netdev_err(ndev, "cannot map dma buffer\n");
			
 
				+			dev_kfree_skb(skb);
			
 
				+			/* Return ownership to EMAC */
			
 
				+			rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
			
 
				 			stats->rx_errors++;
			
 
				+			stats->rx_dropped++;
			
 
				 			continue;
			
 
				 		}
			
 
				+
			
 
				+		/* unmap previosly mapped skb */
			
 
				+		dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
			
 
				+				 dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
			
 
				+
			
 
				+		pktlen = info & LEN_MASK;
			
 
				+		stats->rx_packets++;
			
 
				+		stats->rx_bytes += pktlen;
			
 
				+		skb_put(rx_buff->skb, pktlen);
			
 
				+		rx_buff->skb->dev = ndev;
			
 
				+		rx_buff->skb->protocol = eth_type_trans(rx_buff->skb, ndev);
			
 
				+
			
 
				+		netif_receive_skb(rx_buff->skb);
			
 
				+
			
 
				+		rx_buff->skb = skb;
			
 
				 		dma_unmap_addr_set(rx_buff, addr, addr);
			
 
				 		dma_unmap_len_set(rx_buff, len, EMAC_BUFFER_SIZE);
			
 
				 
			
@@ -258,6 +269,53 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
 
				 	return work_done;
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * arc_emac_rx_miss_handle - handle R_MISS register
			
 
				+ * @ndev:	Pointer to the net_device structure.
			
 
				+ */
			
 
				+static void arc_emac_rx_miss_handle(struct net_device *ndev)
			
 
				+{
			
 
				+	struct arc_emac_priv *priv = netdev_priv(ndev);
			
 
				+	struct net_device_stats *stats = &ndev->stats;
			
 
				+	unsigned int miss;
			
 
				+
			
 
				+	miss = arc_reg_get(priv, R_MISS);
			
 
				+	if (miss) {
			
 
				+		stats->rx_errors += miss;
			
 
				+		stats->rx_missed_errors += miss;
			
 
				+		priv->rx_missed_errors += miss;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * arc_emac_rx_stall_check - check RX stall
			
 
				+ * @ndev:	Pointer to the net_device structure.
			
 
				+ * @budget:	How many BDs requested to process on 1 call.
			
 
				+ * @work_done:	How many BDs processed
			
 
				+ *
			
 
				+ * Under certain conditions EMAC stop reception of incoming packets and
			
 
				+ * continuously increment R_MISS register instead of saving data into
			
 
				+ * provided buffer. This function detect that condition and restart
			
 
				+ * EMAC.
			
 
				+ */
			
 
				+static void arc_emac_rx_stall_check(struct net_device *ndev,
			
 
				+				    int budget, unsigned int work_done)
			
 
				+{
			
 
				+	struct arc_emac_priv *priv = netdev_priv(ndev);
			
 
				+	struct arc_emac_bd *rxbd;
			
 
				+
			
 
				+	if (work_done)
			
 
				+		priv->rx_missed_errors = 0;
			
 
				+
			
 
				+	if (priv->rx_missed_errors && budget) {
			
 
				+		rxbd = &priv->rxbd[priv->last_rx_bd];
			
 
				+		if (le32_to_cpu(rxbd->info) & FOR_EMAC) {
			
 
				+			arc_emac_restart(ndev);
			
 
				+			priv->rx_missed_errors = 0;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * arc_emac_poll - NAPI poll handler.
			
 
				  * @napi:	Pointer to napi_struct structure.
			
@@ -272,6 +330,7 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
 
				 	unsigned int work_done;
			
 
				 
			
 
				 	arc_emac_tx_clean(ndev);
			
 
				+	arc_emac_rx_miss_handle(ndev);
			
 
				 
			
 
				 	work_done = arc_emac_rx(ndev, budget);
			
 
				 	if (work_done < budget) {
			
@@ -279,6 +338,8 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
 
				 		arc_reg_or(priv, R_ENABLE, RXINT_MASK | TXINT_MASK);
			
 
				 	}
			
 
				 
			
 
				+	arc_emac_rx_stall_check(ndev, budget, work_done);
			
 
				+
			
 
				 	return work_done;
			
 
				 }
			
 
				 
			
@@ -320,6 +381,8 @@ static irqreturn_t arc_emac_intr(int irq, void *dev_instance)
 
				 		if (status & MSER_MASK) {
			
 
				 			stats->rx_missed_errors += 0x100;
			
 
				 			stats->rx_errors += 0x100;
			
 
				+			priv->rx_missed_errors += 0x100;
			
 
				+			napi_schedule(&priv->napi);
			
 
				 		}
			
 
				 
			
 
				 		if (status & RXCR_MASK) {
			
@@ -732,6 +795,63 @@ static int arc_emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 
				 }
			
 
				 
			
 
				 
			
 
				+/**
			
 
				+ * arc_emac_restart - Restart EMAC
			
 
				+ * @ndev:	Pointer to net_device structure.
			
 
				+ *
			
 
				+ * This function do hardware reset of EMAC in order to restore
			
 
				+ * network packets reception.
			
 
				+ */
			
 
				+static void arc_emac_restart(struct net_device *ndev)
			
 
				+{
			
 
				+	struct arc_emac_priv *priv = netdev_priv(ndev);
			
 
				+	struct net_device_stats *stats = &ndev->stats;
			
 
				+	int i;
			
 
				+
			
 
				+	if (net_ratelimit())
			
 
				+		netdev_warn(ndev, "restarting stalled EMAC\n");
			
 
				+
			
 
				+	netif_stop_queue(ndev);
			
 
				+
			
 
				+	/* Disable interrupts */
			
 
				+	arc_reg_clr(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
			
 
				+
			
 
				+	/* Disable EMAC */
			
 
				+	arc_reg_clr(priv, R_CTRL, EN_MASK);
			
 
				+
			
 
				+	/* Return the sk_buff to system */
			
 
				+	arc_free_tx_queue(ndev);
			
 
				+
			
 
				+	/* Clean Tx BD's */
			
 
				+	priv->txbd_curr = 0;
			
 
				+	priv->txbd_dirty = 0;
			
 
				+	memset(priv->txbd, 0, TX_RING_SZ);
			
 
				+
			
 
				+	for (i = 0; i < RX_BD_NUM; i++) {
			
 
				+		struct arc_emac_bd *rxbd = &priv->rxbd[i];
			
 
				+		unsigned int info = le32_to_cpu(rxbd->info);
			
 
				+
			
 
				+		if (!(info & FOR_EMAC)) {
			
 
				+			stats->rx_errors++;
			
 
				+			stats->rx_dropped++;
			
 
				+		}
			
 
				+		/* Return ownership to EMAC */
			
 
				+		rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
			
 
				+	}
			
 
				+	priv->last_rx_bd = 0;
			
 
				+
			
 
				+	/* Make sure info is visible to EMAC before enable */
			
 
				+	wmb();
			
 
				+
			
 
				+	/* Enable interrupts */
			
 
				+	arc_reg_set(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
			
 
				+
			
 
				+	/* Enable EMAC */
			
 
				+	arc_reg_or(priv, R_CTRL, EN_MASK);
			
 
				+
			
 
				+	netif_start_queue(ndev);
			
 
				+}
			
 
				+
			
 
				 static const struct net_device_ops arc_emac_netdev_ops = {
			
 
				 	.ndo_open		= arc_emac_open,
			
 
				 	.ndo_stop		= arc_emac_stop,
			
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -14225,7 +14225,9 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
 
				 	/* Reset PHY, otherwise the read DMA engine will be in a mode that
			
 
				 	 * breaks all requests to 256 bytes.
			
 
				 	 */
			
 
				-	if (tg3_asic_rev(tp) == ASIC_REV_57766)
			
 
				+	if (tg3_asic_rev(tp) == ASIC_REV_57766 ||
			
 
				+	    tg3_asic_rev(tp) == ASIC_REV_5717 ||
			
 
				+	    tg3_asic_rev(tp) == ASIC_REV_5719)
			
 
				 		reset_phy = true;
			
 
				 
			
 
				 	err = tg3_restart_hw(tp, reset_phy);
			
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1214,6 +1214,10 @@ static void mvneta_port_disable(struct mvneta_port *pp)
 
				 	val &= ~MVNETA_GMAC0_PORT_ENABLE;
			
 
				 	mvreg_write(pp, MVNETA_GMAC_CTRL_0, val);
			
 
				 
			
 
				+	pp->link = 0;
			
 
				+	pp->duplex = -1;
			
 
				+	pp->speed = 0;
			
 
				+
			
 
				 	udelay(200);
			
 
				 }
			
 
				 
			
@@ -1958,9 +1962,9 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
 
				 
			
 
				 		if (!mvneta_rxq_desc_is_first_last(rx_status) ||
			
 
				 		    (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
			
 
				+			mvneta_rx_error(pp, rx_desc);
			
 
				 err_drop_frame:
			
 
				 			dev->stats.rx_errors++;
			
 
				-			mvneta_rx_error(pp, rx_desc);
			
 
				 			/* leave the descriptor untouched */
			
 
				 			continue;
			
 
				 		}
			
@@ -3011,7 +3015,7 @@ static void mvneta_cleanup_rxqs(struct mvneta_port *pp)
 
				 {
			
 
				 	int queue;
			
 
				 
			
 
				-	for (queue = 0; queue < txq_number; queue++)
			
 
				+	for (queue = 0; queue < rxq_number; queue++)
			
 
				 		mvneta_rxq_deinit(pp, &pp->rxqs[queue]);
			
 
				 }
			
 
				 
			
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1961,11 +1961,12 @@ static int mtk_hw_init(struct mtk_eth *eth)
 
				 	/* set GE2 TUNE */
			
 
				 	regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0);
			
 
				 
			
 
				-	/* GE1, Force 1000M/FD, FC ON */
			
 
				-	mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(0));
			
 
				-
			
 
				-	/* GE2, Force 1000M/FD, FC ON */
			
 
				-	mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(1));
			
 
				+	/* Set linkdown as the default for each GMAC. Its own MCR would be set
			
 
				+	 * up with the more appropriate value when mtk_phy_link_adjust call is
			
 
				+	 * being invoked.
			
 
				+	 */
			
 
				+	for (i = 0; i < MTK_MAC_COUNT; i++)
			
 
				+		mtk_w32(eth, 0, MTK_MAC_MCR(i));
			
 
				 
			
 
				 	/* Indicates CDM to parse the MTK special tag from CPU
			
 
				 	 * which also is working out for untag packets.
			
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -362,7 +362,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 
				 	case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
			
 
				 	case MLX5_CMD_OP_ALLOC_Q_COUNTER:
			
 
				 	case MLX5_CMD_OP_QUERY_Q_COUNTER:
			
 
				-	case MLX5_CMD_OP_SET_RATE_LIMIT:
			
 
				+	case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
			
 
				 	case MLX5_CMD_OP_QUERY_RATE_LIMIT:
			
 
				 	case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
			
 
				 	case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
			
@@ -505,7 +505,7 @@ const char *mlx5_command_str(int command)
 
				 	MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
			
 
				 	MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
			
 
				 	MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
			
 
				-	MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT);
			
 
				+	MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
			
 
				 	MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
			
 
				 	MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
			
 
				 	MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
			
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -82,6 +82,9 @@
 
				 	max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req)
			
 
				 #define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev)       MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6)
			
 
				 #define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8)
			
 
				+#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \
			
 
				+	(cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \
			
 
				+	MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev))
			
 
				 
			
 
				 #define MLX5_MPWRQ_LOG_WQE_SZ			18
			
 
				 #define MLX5_MPWRQ_WQE_PAGE_ORDER  (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
			
@@ -590,6 +593,7 @@ struct mlx5e_channel {
 
				 	struct mlx5_core_dev      *mdev;
			
 
				 	struct hwtstamp_config    *tstamp;
			
 
				 	int                        ix;
			
 
				+	int                        cpu;
			
 
				 };
			
 
				 
			
 
				 struct mlx5e_channels {
			
@@ -935,8 +939,9 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params,
 
				 				 u8 cq_period_mode);
			
 
				 void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
			
 
				 				 u8 cq_period_mode);
			
 
				-void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
			
 
				-			      struct mlx5e_params *params, u8 rq_type);
			
 
				+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
			
 
				+			       struct mlx5e_params *params,
			
 
				+			       u8 rq_type);
			
 
				 
			
 
				 static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
			
 
				 {
			
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -274,6 +274,7 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
 
				 static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
			
 
				 				    struct ieee_ets *ets)
			
 
				 {
			
 
				+	bool have_ets_tc = false;
			
 
				 	int bw_sum = 0;
			
 
				 	int i;
			
 
				 
			
@@ -288,11 +289,14 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
 
				 	}
			
 
				 
			
 
				 	/* Validate Bandwidth Sum */
			
 
				-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
			
 
				-		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS)
			
 
				+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
			
 
				+		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
			
 
				+			have_ets_tc = true;
			
 
				 			bw_sum += ets->tc_tx_bw[i];
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				-	if (bw_sum != 0 && bw_sum != 100) {
			
 
				+	if (have_ets_tc && bw_sum != 100) {
			
 
				 		netdev_err(netdev,
			
 
				 			   "Failed to validate ETS: BW sum is illegal\n");
			
 
				 		return -EINVAL;
			
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1523,8 +1523,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
 
				 	new_channels.params = priv->channels.params;
			
 
				 	MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
			
 
				 
			
 
				-	mlx5e_set_rq_type_params(priv->mdev, &new_channels.params,
			
 
				-				 new_channels.params.rq_wq_type);
			
 
				+	new_channels.params.mpwqe_log_stride_sz =
			
 
				+		MLX5E_MPWQE_STRIDE_SZ(priv->mdev, new_val);
			
 
				+	new_channels.params.mpwqe_log_num_strides =
			
 
				+		MLX5_MPWRQ_LOG_WQE_SZ - new_channels.params.mpwqe_log_stride_sz;
			
 
				 
			
 
				 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
			
 
				 		priv->channels.params = new_channels.params;
			
@@ -1536,6 +1538,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
 
				 		return err;
			
 
				 
			
 
				 	mlx5e_switch_priv_channels(priv, &new_channels, NULL);
			
 
				+	mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n",
			
 
				+		  MLX5E_GET_PFLAG(&priv->channels.params,
			
 
				+				  MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF");
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -71,11 +71,6 @@ struct mlx5e_channel_param {
 
				 	struct mlx5e_cq_param      icosq_cq;
			
 
				 };
			
 
				 
			
 
				-static int mlx5e_get_node(struct mlx5e_priv *priv, int ix)
			
 
				-{
			
 
				-	return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix);
			
 
				-}
			
 
				-
			
 
				 static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
			
 
				 {
			
 
				 	return MLX5_CAP_GEN(mdev, striding_rq) &&
			
@@ -83,8 +78,8 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 
				 		MLX5_CAP_ETH(mdev, reg_umr_sq);
			
 
				 }
			
 
				 
			
 
				-void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
			
 
				-			      struct mlx5e_params *params, u8 rq_type)
			
 
				+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
			
 
				+			       struct mlx5e_params *params, u8 rq_type)
			
 
				 {
			
 
				 	params->rq_wq_type = rq_type;
			
 
				 	params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
			
@@ -93,10 +88,8 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
 
				 		params->log_rq_size = is_kdump_kernel() ?
			
 
				 			MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW :
			
 
				 			MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
			
 
				-		params->mpwqe_log_stride_sz =
			
 
				-			MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ?
			
 
				-			MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) :
			
 
				-			MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
			
 
				+		params->mpwqe_log_stride_sz = MLX5E_MPWQE_STRIDE_SZ(mdev,
			
 
				+			MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
			
 
				 		params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
			
 
				 			params->mpwqe_log_stride_sz;
			
 
				 		break;
			
@@ -120,13 +113,14 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
 
				 		       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
			
 
				 }
			
 
				 
			
 
				-static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
			
 
				+static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev,
			
 
				+				struct mlx5e_params *params)
			
 
				 {
			
 
				 	u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
			
 
				 		    !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
			
 
				 		    MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
			
 
				 		    MLX5_WQ_TYPE_LINKED_LIST;
			
 
				-	mlx5e_set_rq_type_params(mdev, params, rq_type);
			
 
				+	mlx5e_init_rq_type_params(mdev, params, rq_type);
			
 
				 }
			
 
				 
			
 
				 static void mlx5e_update_carrier(struct mlx5e_priv *priv)
			
@@ -444,17 +438,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
 
				 	int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
			
 
				 	int mtt_sz = mlx5e_get_wqe_mtt_sz();
			
 
				 	int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
			
 
				-	int node = mlx5e_get_node(c->priv, c->ix);
			
 
				 	int i;
			
 
				 
			
 
				 	rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
			
 
				-					GFP_KERNEL, node);
			
 
				+				      GFP_KERNEL, cpu_to_node(c->cpu));
			
 
				 	if (!rq->mpwqe.info)
			
 
				 		goto err_out;
			
 
				 
			
 
				 	/* We allocate more than mtt_sz as we will align the pointer */
			
 
				-	rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz,
			
 
				-					GFP_KERNEL, node);
			
 
				+	rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
			
 
				+					cpu_to_node(c->cpu));
			
 
				 	if (unlikely(!rq->mpwqe.mtt_no_align))
			
 
				 		goto err_free_wqe_info;
			
 
				 
			
@@ -562,7 +555,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 
				 	int err;
			
 
				 	int i;
			
 
				 
			
 
				-	rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
			
 
				+	rqp->wq.db_numa_node = cpu_to_node(c->cpu);
			
 
				 
			
 
				 	err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
			
 
				 				&rq->wq_ctrl);
			
@@ -629,8 +622,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 
				 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
			
 
				 		rq->wqe.frag_info =
			
 
				 			kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
			
 
				-				     GFP_KERNEL,
			
 
				-				     mlx5e_get_node(c->priv, c->ix));
			
 
				+				     GFP_KERNEL, cpu_to_node(c->cpu));
			
 
				 		if (!rq->wqe.frag_info) {
			
 
				 			err = -ENOMEM;
			
 
				 			goto err_rq_wq_destroy;
			
@@ -1000,13 +992,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
 
				 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
			
 
				 	sq->min_inline_mode = params->tx_min_inline_mode;
			
 
				 
			
 
				-	param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
			
 
				+	param->wq.db_numa_node = cpu_to_node(c->cpu);
			
 
				 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
			
 
				 	if (err)
			
 
				 		return err;
			
 
				 	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
			
 
				 
			
 
				-	err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix));
			
 
				+	err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
			
 
				 	if (err)
			
 
				 		goto err_sq_wq_destroy;
			
 
				 
			
@@ -1053,13 +1045,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
 
				 	sq->channel   = c;
			
 
				 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
			
 
				 
			
 
				-	param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
			
 
				+	param->wq.db_numa_node = cpu_to_node(c->cpu);
			
 
				 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
			
 
				 	if (err)
			
 
				 		return err;
			
 
				 	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
			
 
				 
			
 
				-	err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix));
			
 
				+	err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
			
 
				 	if (err)
			
 
				 		goto err_sq_wq_destroy;
			
 
				 
			
@@ -1126,13 +1118,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 
				 	if (MLX5_IPSEC_DEV(c->priv->mdev))
			
 
				 		set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
			
 
				 
			
 
				-	param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
			
 
				+	param->wq.db_numa_node = cpu_to_node(c->cpu);
			
 
				 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
			
 
				 	if (err)
			
 
				 		return err;
			
 
				 	sq->wq.db    = &sq->wq.db[MLX5_SND_DBR];
			
 
				 
			
 
				-	err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix));
			
 
				+	err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
			
 
				 	if (err)
			
 
				 		goto err_sq_wq_destroy;
			
 
				 
			
@@ -1504,8 +1496,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,
 
				 	struct mlx5_core_dev *mdev = c->priv->mdev;
			
 
				 	int err;
			
 
				 
			
 
				-	param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix);
			
 
				-	param->wq.db_numa_node  = mlx5e_get_node(c->priv, c->ix);
			
 
				+	param->wq.buf_numa_node = cpu_to_node(c->cpu);
			
 
				+	param->wq.db_numa_node  = cpu_to_node(c->cpu);
			
 
				 	param->eq_ix   = c->ix;
			
 
				 
			
 
				 	err = mlx5e_alloc_cq_common(mdev, param, cq);
			
@@ -1604,6 +1596,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
 
				 	mlx5e_free_cq(cq);
			
 
				 }
			
 
				 
			
 
				+static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
			
 
				+{
			
 
				+	return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
			
 
				+}
			
 
				+
			
 
				 static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
			
 
				 			     struct mlx5e_params *params,
			
 
				 			     struct mlx5e_channel_param *cparam)
			
@@ -1752,12 +1749,13 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 
				 {
			
 
				 	struct mlx5e_cq_moder icocq_moder = {0, 0};
			
 
				 	struct net_device *netdev = priv->netdev;
			
 
				+	int cpu = mlx5e_get_cpu(priv, ix);
			
 
				 	struct mlx5e_channel *c;
			
 
				 	unsigned int irq;
			
 
				 	int err;
			
 
				 	int eqn;
			
 
				 
			
 
				-	c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix));
			
 
				+	c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
			
 
				 	if (!c)
			
 
				 		return -ENOMEM;
			
 
				 
			
@@ -1765,6 +1763,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 
				 	c->mdev     = priv->mdev;
			
 
				 	c->tstamp   = &priv->tstamp;
			
 
				 	c->ix       = ix;
			
 
				+	c->cpu      = cpu;
			
 
				 	c->pdev     = &priv->mdev->pdev->dev;
			
 
				 	c->netdev   = priv->netdev;
			
 
				 	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
			
@@ -1853,8 +1852,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
 
				 	for (tc = 0; tc < c->num_tc; tc++)
			
 
				 		mlx5e_activate_txqsq(&c->sq[tc]);
			
 
				 	mlx5e_activate_rq(&c->rq);
			
 
				-	netif_set_xps_queue(c->netdev,
			
 
				-		mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix);
			
 
				+	netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
			
 
				 }
			
 
				 
			
 
				 static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
			
@@ -3679,6 +3677,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
 
				 						     struct sk_buff *skb,
			
 
				 						     netdev_features_t features)
			
 
				 {
			
 
				+	unsigned int offset = 0;
			
 
				 	struct udphdr *udph;
			
 
				 	u8 proto;
			
 
				 	u16 port;
			
@@ -3688,7 +3687,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
 
				 		proto = ip_hdr(skb)->protocol;
			
 
				 		break;
			
 
				 	case htons(ETH_P_IPV6):
			
 
				-		proto = ipv6_hdr(skb)->nexthdr;
			
 
				+		proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
			
 
				 		break;
			
 
				 	default:
			
 
				 		goto out;
			
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -466,7 +466,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 
				 			break;
			
 
				 		case MLX5_EVENT_TYPE_CQ_ERROR:
			
 
				 			cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
			
 
				-			mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n",
			
 
				+			mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
			
 
				 				       cqn, eqe->data.cq_err.syndrome);
			
 
				 			mlx5_cq_event(dev, cqn, eqe->type);
			
 
				 			break;
			
@@ -775,7 +775,7 @@ err1:
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-int mlx5_stop_eqs(struct mlx5_core_dev *dev)
			
 
				+void mlx5_stop_eqs(struct mlx5_core_dev *dev)
			
 
				 {
			
 
				 	struct mlx5_eq_table *table = &dev->priv.eq_table;
			
 
				 	int err;
			
@@ -784,22 +784,26 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev)
 
				 	if (MLX5_CAP_GEN(dev, pg)) {
			
 
				 		err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
			
 
				 		if (err)
			
 
				-			return err;
			
 
				+			mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
			
 
				+				      err);
			
 
				 	}
			
 
				 #endif
			
 
				 
			
 
				 	err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
			
 
				 	if (err)
			
 
				-		return err;
			
 
				+		mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
			
 
				+			      err);
			
 
				 
			
 
				-	mlx5_destroy_unmap_eq(dev, &table->async_eq);
			
 
				+	err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
			
 
				+	if (err)
			
 
				+		mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
			
 
				+			      err);
			
 
				 	mlx5_cmd_use_polling(dev);
			
 
				 
			
 
				 	err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
			
 
				 	if (err)
			
 
				-		mlx5_cmd_use_events(dev);
			
 
				-
			
 
				-	return err;
			
 
				+		mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
			
 
				+			      err);
			
 
				 }
			
 
				 
			
 
				 int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
			
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
@@ -66,6 +66,9 @@ static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size,
 
				 	u8 actual_size;
			
 
				 	int err;
			
 
				 
			
 
				+	if (!size)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				 	if (!fdev->mdev)
			
 
				 		return -ENOTCONN;
			
 
				 
			
@@ -95,6 +98,9 @@ static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size,
 
				 	u8 actual_size;
			
 
				 	int err;
			
 
				 
			
 
				+	if (!size)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				 	if (!fdev->mdev)
			
 
				 		return -ENOTCONN;
			
 
				 
			
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -174,6 +174,8 @@ static void del_hw_fte(struct fs_node *node);
 
				 static void del_sw_flow_table(struct fs_node *node);
			
 
				 static void del_sw_flow_group(struct fs_node *node);
			
 
				 static void del_sw_fte(struct fs_node *node);
			
 
				+static void del_sw_prio(struct fs_node *node);
			
 
				+static void del_sw_ns(struct fs_node *node);
			
 
				 /* Delete rule (destination) is special case that 
			
 
				  * requires to lock the FTE for all the deletion process.
			
 
				  */
			
@@ -408,6 +410,16 @@ static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				+static void del_sw_ns(struct fs_node *node)
			
 
				+{
			
 
				+	kfree(node);
			
 
				+}
			
 
				+
			
 
				+static void del_sw_prio(struct fs_node *node)
			
 
				+{
			
 
				+	kfree(node);
			
 
				+}
			
 
				+
			
 
				 static void del_hw_flow_table(struct fs_node *node)
			
 
				 {
			
 
				 	struct mlx5_flow_table *ft;
			
@@ -2064,7 +2076,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
 
				 		return ERR_PTR(-ENOMEM);
			
 
				 
			
 
				 	fs_prio->node.type = FS_TYPE_PRIO;
			
 
				-	tree_init_node(&fs_prio->node, NULL, NULL);
			
 
				+	tree_init_node(&fs_prio->node, NULL, del_sw_prio);
			
 
				 	tree_add_node(&fs_prio->node, &ns->node);
			
 
				 	fs_prio->num_levels = num_levels;
			
 
				 	fs_prio->prio = prio;
			
@@ -2090,7 +2102,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
 
				 		return ERR_PTR(-ENOMEM);
			
 
				 
			
 
				 	fs_init_namespace(ns);
			
 
				-	tree_init_node(&ns->node, NULL, NULL);
			
 
				+	tree_init_node(&ns->node, NULL, del_sw_ns);
			
 
				 	tree_add_node(&ns->node, &prio->node);
			
 
				 	list_add_tail(&ns->node.list, &prio->node.children);
			
 
				 
			
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -241,7 +241,7 @@ static void print_health_info(struct mlx5_core_dev *dev)
 
				 	u32 fw;
			
 
				 	int i;
			
 
				 
			
 
				-	/* If the syndrom is 0, the device is OK and no need to print buffer */
			
 
				+	/* If the syndrome is 0, the device is OK and no need to print buffer */
			
 
				 	if (!ioread8(&h->synd))
			
 
				 		return;
			
 
				 
			
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -57,7 +57,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
 
				 				   struct mlx5e_params *params)
			
 
				 {
			
 
				 	/* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
			
 
				-	mlx5e_set_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
			
 
				+	mlx5e_init_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
			
 
				 
			
 
				 	/* RQ size in ipoib by default is 512 */
			
 
				 	params->log_rq_size = is_kdump_kernel() ?
			
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -317,9 +317,6 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
 
				 {
			
 
				 	struct mlx5_priv *priv = &dev->priv;
			
 
				 	struct mlx5_eq_table *table = &priv->eq_table;
			
 
				-	struct irq_affinity irqdesc = {
			
 
				-		.pre_vectors = MLX5_EQ_VEC_COMP_BASE,
			
 
				-	};
			
 
				 	int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
			
 
				 	int nvec;
			
 
				 
			
@@ -333,10 +330,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
 
				 	if (!priv->irq_info)
			
 
				 		goto err_free_msix;
			
 
				 
			
 
				-	nvec = pci_alloc_irq_vectors_affinity(dev->pdev,
			
 
				+	nvec = pci_alloc_irq_vectors(dev->pdev,
			
 
				 			MLX5_EQ_VEC_COMP_BASE + 1, nvec,
			
 
				-			PCI_IRQ_MSIX | PCI_IRQ_AFFINITY,
			
 
				-			&irqdesc);
			
 
				+			PCI_IRQ_MSIX);
			
 
				 	if (nvec < 0)
			
 
				 		return nvec;
			
 
				 
			
@@ -622,6 +618,63 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
 
				 	return (u64)timer_l | (u64)timer_h1 << 32;
			
 
				 }
			
 
				 
			
 
				+static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
			
 
				+{
			
 
				+	struct mlx5_priv *priv  = &mdev->priv;
			
 
				+	int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
			
 
				+
			
 
				+	if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
			
 
				+		mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
			
 
				+			priv->irq_info[i].mask);
			
 
				+
			
 
				+	if (IS_ENABLED(CONFIG_SMP) &&
			
 
				+	    irq_set_affinity_hint(irq, priv->irq_info[i].mask))
			
 
				+		mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
			
 
				+{
			
 
				+	struct mlx5_priv *priv  = &mdev->priv;
			
 
				+	int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
			
 
				+
			
 
				+	irq_set_affinity_hint(irq, NULL);
			
 
				+	free_cpumask_var(priv->irq_info[i].mask);
			
 
				+}
			
 
				+
			
 
				+static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
			
 
				+{
			
 
				+	int err;
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
			
 
				+		err = mlx5_irq_set_affinity_hint(mdev, i);
			
 
				+		if (err)
			
 
				+			goto err_out;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+err_out:
			
 
				+	for (i--; i >= 0; i--)
			
 
				+		mlx5_irq_clear_affinity_hint(mdev, i);
			
 
				+
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
			
 
				+		mlx5_irq_clear_affinity_hint(mdev, i);
			
 
				+}
			
 
				+
			
 
				 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
			
 
				 		    unsigned int *irqn)
			
 
				 {
			
@@ -1097,6 +1150,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 
				 		goto err_stop_eqs;
			
 
				 	}
			
 
				 
			
 
				+	err = mlx5_irq_set_affinity_hints(dev);
			
 
				+	if (err) {
			
 
				+		dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
			
 
				+		goto err_affinity_hints;
			
 
				+	}
			
 
				+
			
 
				 	err = mlx5_init_fs(dev);
			
 
				 	if (err) {
			
 
				 		dev_err(&pdev->dev, "Failed to init flow steering\n");
			
@@ -1154,6 +1213,9 @@ err_sriov:
 
				 	mlx5_cleanup_fs(dev);
			
 
				 
			
 
				 err_fs:
			
 
				+	mlx5_irq_clear_affinity_hints(dev);
			
 
				+
			
 
				+err_affinity_hints:
			
 
				 	free_comp_eqs(dev);
			
 
				 
			
 
				 err_stop_eqs:
			
@@ -1222,6 +1284,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 
				 
			
 
				 	mlx5_sriov_detach(dev);
			
 
				 	mlx5_cleanup_fs(dev);
			
 
				+	mlx5_irq_clear_affinity_hints(dev);
			
 
				 	free_comp_eqs(dev);
			
 
				 	mlx5_stop_eqs(dev);
			
 
				 	mlx5_put_uars_page(dev, priv->uar);