7 years ago · 526b96c4f8
--- a/Documentation/cpu-freq/index.txt
+++ b/Documentation/cpu-freq/index.txt
@@ -32,8 +32,6 @@ cpufreq-stats.txt -	General description of sysfs cpufreq stats.
 
				 
			
 
				 index.txt	-	File index, Mailing list and Links (this document)
			
 
				 
			
 
				-intel-pstate.txt -	Intel pstate cpufreq driver specific file.
			
 
				-
			
 
				 pcc-cpufreq.txt -	PCC cpufreq driver specific file.
			
 
				 
			
 
				 
			
--- a/Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt
+++ b/Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt
@@ -68,6 +68,8 @@ Optional properties:
 
				 - adi,disable-timing-generator: Only for ADV7533. Disables the internal timing
			
 
				   generator. The chip will rely on the sync signals in the DSI data lanes,
			
 
				   rather than generate its own timings for HDMI output.
			
 
				+- clocks: from common clock binding: reference to the CEC clock.
			
 
				+- clock-names: from common clock binding: must be "cec".
			
 
				 
			
 
				 Required nodes:
			
 
				 
			
@@ -89,6 +91,8 @@ Example
 
				 		reg = <39>;
			
 
				 		interrupt-parent = <&gpio3>;
			
 
				 		interrupts = <29 IRQ_TYPE_EDGE_FALLING>;
			
 
				+		clocks = <&cec_clock>;
			
 
				+		clock-names = "cec";
			
 
				 
			
 
				 		adi,input-depth = <8>;
			
 
				 		adi,input-colorspace = "rgb";
			
--- a/Documentation/devicetree/bindings/display/bridge/sii9234.txt
+++ b/Documentation/devicetree/bindings/display/bridge/sii9234.txt
@@ -0,0 +1,49 @@
 
				+Silicon Image SiI9234 HDMI/MHL bridge bindings
			
 
				+
			
 
				+Required properties:
			
 
				+	- compatible : "sil,sii9234".
			
 
				+	- reg : I2C address for TPI interface, use 0x39
			
 
				+	- avcc33-supply : MHL/USB Switch Supply Voltage (3.3V)
			
 
				+	- iovcc18-supply : I/O Supply Voltage (1.8V)
			
 
				+	- avcc12-supply : TMDS Analog Supply Voltage (1.2V)
			
 
				+	- cvcc12-supply : Digital Core Supply Voltage (1.2V)
			
 
				+	- interrupts, interrupt-parent: interrupt specifier of INT pin
			
 
				+	- reset-gpios: gpio specifier of RESET pin (active low)
			
 
				+	- video interfaces: Device node can contain two video interface port
			
 
				+			    nodes for HDMI encoder and connector according to [1].
			
 
				+			    - port@0 - MHL to HDMI
			
 
				+			    - port@1 - MHL to connector
			
 
				+
			
 
				+[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
			
 
				+
			
 
				+
			
 
				+Example:
			
 
				+	sii9234@39 {
			
 
				+		compatible = "sil,sii9234";
			
 
				+		reg = <0x39>;
			
 
				+		avcc33-supply = <&vcc33mhl>;
			
 
				+		iovcc18-supply = <&vcc18mhl>;
			
 
				+		avcc12-supply = <&vsil12>;
			
 
				+		cvcc12-supply = <&vsil12>;
			
 
				+		reset-gpios = <&gpf3 4 GPIO_ACTIVE_LOW>;
			
 
				+		interrupt-parent = <&gpf3>;
			
 
				+		interrupts = <5 IRQ_TYPE_LEVEL_HIGH>;
			
 
				+
			
 
				+		ports {
			
 
				+			#address-cells = <1>;
			
 
				+			#size-cells = <0>;
			
 
				+
			
 
				+			port@0 {
			
 
				+				reg = <0>;
			
 
				+				mhl_to_hdmi: endpoint {
			
 
				+					remote-endpoint = <&hdmi_to_mhl>;
			
 
				+				};
			
 
				+			};
			
 
				+			port@1 {
			
 
				+				reg = <1>;
			
 
				+				mhl_to_connector: endpoint {
			
 
				+					remote-endpoint = <&connector_to_mhl>;
			
 
				+				};
			
 
				+			};
			
 
				+		};
			
 
				+	};
			
--- a/Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.txt
+++ b/Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.txt
@@ -0,0 +1,49 @@
 
				+This binding covers the official 7" (800x480) Raspberry Pi touchscreen
			
 
				+panel.
			
 
				+
			
 
				+This DSI panel contains:
			
 
				+
			
 
				+- TC358762 DSI->DPI bridge
			
 
				+- Atmel microcontroller on I2C for power sequencing the DSI bridge and
			
 
				+  controlling backlight
			
 
				+- Touchscreen controller on I2C for touch input
			
 
				+
			
 
				+and this binding covers the DSI display parts but not its touch input.
			
 
				+
			
 
				+Required properties:
			
 
				+- compatible:	Must be "raspberrypi,7inch-touchscreen-panel"
			
 
				+- reg:		Must be "45"
			
 
				+- port:		See panel-common.txt
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+dsi1: dsi@7e700000 {
			
 
				+	#address-cells = <1>;
			
 
				+	#size-cells = <0>;
			
 
				+	<...>
			
 
				+
			
 
				+	port {
			
 
				+		dsi_out_port: endpoint {
			
 
				+			remote-endpoint = <&panel_dsi_port>;
			
 
				+		};
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+i2c_dsi: i2c {
			
 
				+	compatible = "i2c-gpio";
			
 
				+	#address-cells = <1>;
			
 
				+	#size-cells = <0>;
			
 
				+	gpios = <&gpio 28 0
			
 
				+		 &gpio 29 0>;
			
 
				+
			
 
				+	lcd@45 {
			
 
				+		compatible = "raspberrypi,7inch-touchscreen-panel";
			
 
				+		reg = <0x45>;
			
 
				+
			
 
				+		port {
			
 
				+			panel_dsi_port: endpoint {
			
 
				+				remote-endpoint = <&dsi_out_port>;
			
 
				+			};
			
 
				+		};
			
 
				+	};
			
 
				+};
			
--- a/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt
+++ b/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt
@@ -41,14 +41,17 @@ CEC. It is one end of the pipeline.
 
				 Required properties:
			
 
				   - compatible: value must be one of:
			
 
				     * allwinner,sun5i-a10s-hdmi
			
 
				+    * allwinner,sun6i-a31-hdmi
			
 
				   - reg: base address and size of memory-mapped region
			
 
				   - interrupts: interrupt associated to this IP
			
 
				   - clocks: phandles to the clocks feeding the HDMI encoder
			
 
				     * ahb: the HDMI interface clock
			
 
				     * mod: the HDMI module clock
			
 
				+    * ddc: the HDMI ddc clock (A31 only)
			
 
				     * pll-0: the first video PLL
			
 
				     * pll-1: the second video PLL
			
 
				   - clock-names: the clock names mentioned above
			
 
				+  - resets: phandle to the reset control for the HDMI encoder (A31 only)
			
 
				   - dmas: phandles to the DMA channels used by the HDMI encoder
			
 
				     * ddc-tx: The channel for DDC transmission
			
 
				     * ddc-rx: The channel for DDC reception
			
--- a/Documentation/devicetree/bindings/leds/ams,as3645a.txt
+++ b/Documentation/devicetree/bindings/leds/ams,as3645a.txt
@@ -15,11 +15,14 @@ Required properties
 
				 
			
 
				 compatible	: Must be "ams,as3645a".
			
 
				 reg		: The I2C address of the device. Typically 0x30.
			
 
				+#address-cells	: 1
			
 
				+#size-cells	: 0
			
 
				 
			
 
				 
			
 
				-Required properties of the "flash" child node
			
 
				-=============================================
			
 
				+Required properties of the flash child node (0)
			
 
				+===============================================
			
 
				 
			
 
				+reg: 0
			
 
				 flash-timeout-us: Flash timeout in microseconds. The value must be in
			
 
				 		  the range [100000, 850000] and divisible by 50000.
			
 
				 flash-max-microamp: Maximum flash current in microamperes. Has to be
			
@@ -33,20 +36,21 @@ ams,input-max-microamp: Maximum flash controller input current. The
 
				 			and divisible by 50000.
			
 
				 
			
 
				 
			
 
				-Optional properties of the "flash" child node
			
 
				-=============================================
			
 
				+Optional properties of the flash child node
			
 
				+===========================================
			
 
				 
			
 
				 label		: The label of the flash LED.
			
 
				 
			
 
				 
			
 
				-Required properties of the "indicator" child node
			
 
				-=================================================
			
 
				+Required properties of the indicator child node (1)
			
 
				+===================================================
			
 
				 
			
 
				+reg: 1
			
 
				 led-max-microamp: Maximum indicator current. The allowed values are
			
 
				 		  2500, 5000, 7500 and 10000.
			
 
				 
			
 
				-Optional properties of the "indicator" child node
			
 
				-=================================================
			
 
				+Optional properties of the indicator child node
			
 
				+===============================================
			
 
				 
			
 
				 label		: The label of the indicator LED.
			
 
				 
			
@@ -55,16 +59,20 @@ Example
 
				 =======
			
 
				 
			
 
				 	as3645a@30 {
			
 
				+		#address-cells: 1
			
 
				+		#size-cells: 0
			
 
				 		reg = <0x30>;
			
 
				 		compatible = "ams,as3645a";
			
 
				-		flash {
			
 
				+		flash@0 {
			
 
				+			reg = <0x0>;
			
 
				 			flash-timeout-us = <150000>;
			
 
				 			flash-max-microamp = <320000>;
			
 
				 			led-max-microamp = <60000>;
			
 
				 			ams,input-max-microamp = <1750000>;
			
 
				 			label = "as3645a:flash";
			
 
				 		};
			
 
				-		indicator {
			
 
				+		indicator@1 {
			
 
				+			reg = <0x1>;
			
 
				 			led-max-microamp = <10000>;
			
 
				 			label = "as3645a:indicator";
			
 
				 		};
			
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@@ -184,12 +184,6 @@ Contact: Sean Paul, Maintainer of the driver you plan to convert
 
				 Core refactorings
			
 
				 =================
			
 
				 
			
 
				-Use new IDR deletion interface to clean up drm_gem_handle_delete()
			
 
				-------------------------------------------------------------------
			
 
				-
			
 
				-See the "This is gross" comment -- apparently the IDR system now can return an
			
 
				-error code instead of oopsing.
			
 
				-
			
 
				 Clean up the DRM header mess
			
 
				 ----------------------------
			
 
				 
			
@@ -357,7 +351,16 @@ those drivers as simple as possible, so lots of room for refactoring:
 
				 - backlight helpers, probably best to put them into a new drm_backlight.c.
			
 
				   This is because drivers/video is de-facto unmaintained. We could also
			
 
				   move drivers/video/backlight to drivers/gpu/backlight and take it all
			
 
				-  over within drm-misc, but that's more work.
			
 
				+  over within drm-misc, but that's more work. Backlight helpers require a fair
			
 
				+  bit of reworking and refactoring. A simple example is the enabling of a backlight.
			
 
				+  Tinydrm has helpers for this. It would be good if other drivers can also use the
			
 
				+  helper. However, there are various cases we need to consider i.e different
			
 
				+  drivers seem to have different ways of enabling/disabling a backlight.
			
 
				+  We also need to consider the backlight drivers (like gpio_backlight). The situation
			
 
				+  is further complicated by the fact that the backlight is tied to fbdev
			
 
				+  via fb_notifier_callback() which has complicated logic. For further details, refer
			
 
				+  to the following discussion thread:
			
 
				+  https://groups.google.com/forum/#!topic/outreachy-kernel/8rBe30lwtdA
			
 
				 
			
 
				 - spi helpers, probably best put into spi core/helper code. Thierry said
			
 
				   the spi maintainer is fast&reactive, so shouldn't be a big issue.
			
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5461,6 +5461,7 @@ F:	drivers/net/wan/sdla.c
 
				 
			
 
				 FRAMEBUFFER LAYER
			
 
				 M:	Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
			
 
				+L:	dri-devel@lists.freedesktop.org
			
 
				 L:	linux-fbdev@vger.kernel.org
			
 
				 T:	git git://github.com/bzolnier/linux.git
			
 
				 Q:	http://patchwork.kernel.org/project/linux-fbdev/list/
			
@@ -8603,6 +8604,12 @@ M:	Sean Wang <sean.wang@mediatek.com>
 
				 S:	Maintained
			
 
				 F:	drivers/media/rc/mtk-cir.c
			
 
				 
			
 
				+MEDIATEK PMIC LED DRIVER
			
 
				+M:	Sean Wang <sean.wang@mediatek.com>
			
 
				+S:	Maintained
			
 
				+F:	drivers/leds/leds-mt6323.c
			
 
				+F:	Documentation/devicetree/bindings/leds/leds-mt6323.txt
			
 
				+
			
 
				 MEDIATEK ETHERNET DRIVER
			
 
				 M:	Felix Fietkau <nbd@openwrt.org>
			
 
				 M:	John Crispin <john@phrozen.org>
			
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 
				 VERSION = 4
			
 
				 PATCHLEVEL = 14
			
 
				 SUBLEVEL = 0
			
 
				-EXTRAVERSION = -rc2
			
 
				+EXTRAVERSION = -rc3
			
 
				 NAME = Fearless Coyote
			
 
				 
			
 
				 # *DOCUMENTATION*
			
@@ -1172,11 +1172,11 @@ headers_check: headers_install
 
				 
			
 
				 PHONY += kselftest
			
 
				 kselftest:
			
 
				-	$(Q)$(MAKE) -C tools/testing/selftests run_tests
			
 
				+	$(Q)$(MAKE) -C $(srctree)/tools/testing/selftests run_tests
			
 
				 
			
 
				 PHONY += kselftest-clean
			
 
				 kselftest-clean:
			
 
				-	$(Q)$(MAKE) -C tools/testing/selftests clean
			
 
				+	$(Q)$(MAKE) -C $(srctree)/tools/testing/selftests clean
			
 
				 
			
 
				 PHONY += kselftest-merge
			
 
				 kselftest-merge:
			
--- a/arch/arm/boot/dts/omap3-n950-n9.dtsi
+++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi
@@ -267,15 +267,19 @@
 
				 	clock-frequency = <400000>;
			
 
				 
			
 
				 	as3645a@30 {
			
 
				+		#address-cells = <1>;
			
 
				+		#size-cells = <0>;
			
 
				 		reg = <0x30>;
			
 
				 		compatible = "ams,as3645a";
			
 
				-		flash {
			
 
				+		flash@0 {
			
 
				+			reg = <0x0>;
			
 
				 			flash-timeout-us = <150000>;
			
 
				 			flash-max-microamp = <320000>;
			
 
				 			led-max-microamp = <60000>;
			
 
				-			peak-current-limit = <1750000>;
			
 
				+			ams,input-max-microamp = <1750000>;
			
 
				 		};
			
 
				-		indicator {
			
 
				+		indicator@1 {
			
 
				+			reg = <0x1>;
			
 
				 			led-max-microamp = <10000>;
			
 
				 		};
			
 
				 	};
			
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -401,7 +401,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 
				 /* Find an entry in the third-level page table. */
			
 
				 #define pte_index(addr)		(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
			
 
				 
			
 
				-#define pte_offset_phys(dir,addr)	(pmd_page_paddr(*(dir)) + pte_index(addr) * sizeof(pte_t))
			
 
				+#define pte_offset_phys(dir,addr)	(pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t))
			
 
				 #define pte_offset_kernel(dir,addr)	((pte_t *)__va(pte_offset_phys((dir), (addr))))
			
 
				 
			
 
				 #define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
			
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -384,6 +384,7 @@ ENTRY(kimage_vaddr)
 
				  * booted in EL1 or EL2 respectively.
			
 
				  */
			
 
				 ENTRY(el2_setup)
			
 
				+	msr	SPsel, #1			// We want to use SP_EL{1,2}
			
 
				 	mrs	x0, CurrentEL
			
 
				 	cmp	x0, #CurrentEL_EL2
			
 
				 	b.eq	1f
			
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -651,7 +651,7 @@ static const struct fault_info fault_info[] = {
 
				 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 0 translation fault"	},
			
 
				 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 1 translation fault"	},
			
 
				 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 2 translation fault"	},
			
 
				-	{ do_page_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	},
			
 
				+	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	},
			
 
				 	{ do_bad,		SIGBUS,  0,		"unknown 8"			},
			
 
				 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 1 access flag fault"	},
			
 
				 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
			
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -39,7 +39,7 @@ config MICROBLAZE
 
				 # Endianness selection
			
 
				 choice
			
 
				 	prompt "Endianness selection"
			
 
				-	default CPU_BIG_ENDIAN
			
 
				+	default CPU_LITTLE_ENDIAN
			
 
				 	help
			
 
				 	  microblaze architectures can be configured for either little or
			
 
				 	  big endian formats. Be sure to select the appropriate mode.
			
--- a/arch/microblaze/include/uapi/asm/Kbuild
+++ b/arch/microblaze/include/uapi/asm/Kbuild
@@ -7,6 +7,7 @@ generic-y += fcntl.h
 
				 generic-y += ioctl.h
			
 
				 generic-y += ioctls.h
			
 
				 generic-y += ipcbuf.h
			
 
				+generic-y += kvm_para.h
			
 
				 generic-y += mman.h
			
 
				 generic-y += msgbuf.h
			
 
				 generic-y += param.h
			
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -165,7 +165,7 @@ int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
 
				 			     unsigned long attrs)
			
 
				 {
			
 
				 #ifdef CONFIG_MMU
			
 
				-	unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
			
 
				+	unsigned long user_count = vma_pages(vma);
			
 
				 	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
			
 
				 	unsigned long off = vma->vm_pgoff;
			
 
				 	unsigned long pfn;
			
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1121,6 +1121,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
				 BEGIN_FTR_SECTION
			
 
				 	mtspr	SPRN_PPR, r0
			
 
				 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
			
 
				+
			
 
				+/* Move canary into DSISR to check for later */
			
 
				+BEGIN_FTR_SECTION
			
 
				+	li	r0, 0x7fff
			
 
				+	mtspr	SPRN_HDSISR, r0
			
 
				+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
			
 
				+
			
 
				 	ld	r0, VCPU_GPR(R0)(r4)
			
 
				 	ld	r4, VCPU_GPR(R4)(r4)
			
 
				 
			
@@ -1956,9 +1963,14 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 
				 kvmppc_hdsi:
			
 
				 	ld	r3, VCPU_KVM(r9)
			
 
				 	lbz	r0, KVM_RADIX(r3)
			
 
				-	cmpwi	r0, 0
			
 
				 	mfspr	r4, SPRN_HDAR
			
 
				 	mfspr	r6, SPRN_HDSISR
			
 
				+BEGIN_FTR_SECTION
			
 
				+	/* Look for DSISR canary. If we find it, retry instruction */
			
 
				+	cmpdi	r6, 0x7fff
			
 
				+	beq	6f
			
 
				+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
			
 
				+	cmpwi	r0, 0
			
 
				 	bne	.Lradix_hdsi		/* on radix, just save DAR/DSISR/ASDR */
			
 
				 	/* HPTE not found fault or protection fault? */
			
 
				 	andis.	r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
			
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -98,7 +98,7 @@ static struct clocksource timer_clocksource = {
 
				 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
			
 
				 };
			
 
				 
			
 
				-static void __init timer_setup(void)
			
 
				+static void __init um_timer_setup(void)
			
 
				 {
			
 
				 	int err;
			
 
				 
			
@@ -132,5 +132,5 @@ void read_persistent_clock(struct timespec *ts)
 
				 void __init time_init(void)
			
 
				 {
			
 
				 	timer_set_signal_handler();
			
 
				-	late_time_init = timer_setup;
			
 
				+	late_time_init = um_timer_setup;
			
 
				 }
			
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -552,6 +552,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 
				 
			
 
				 	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE,  snb_cstates),
			
 
				 	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
			
 
				+	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates),
			
 
				 
			
 
				 	X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE,  snb_cstates),
			
 
				 	X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates),
			
@@ -560,6 +561,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 
				 	X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
			
 
				 
			
 
				 	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
			
 
				+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_DENVERTON, glm_cstates),
			
 
				+
			
 
				+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GEMINI_LAKE, glm_cstates),
			
 
				 	{ },
			
 
				 };
			
 
				 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
			
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -775,6 +775,9 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
 
				 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init),
			
 
				 
			
 
				 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
			
 
				+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init),
			
 
				+
			
 
				+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GEMINI_LAKE, hsw_rapl_init),
			
 
				 	{},
			
 
				 };
			
 
				 
			
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3462,7 +3462,7 @@ static struct intel_uncore_ops skx_uncore_iio_ops = {
 
				 static struct intel_uncore_type skx_uncore_iio = {
			
 
				 	.name			= "iio",
			
 
				 	.num_counters		= 4,
			
 
				-	.num_boxes		= 5,
			
 
				+	.num_boxes		= 6,
			
 
				 	.perf_ctr_bits		= 48,
			
 
				 	.event_ctl		= SKX_IIO0_MSR_PMON_CTL0,
			
 
				 	.perf_ctr		= SKX_IIO0_MSR_PMON_CTR0,
			
@@ -3492,7 +3492,7 @@ static const struct attribute_group skx_uncore_format_group = {
 
				 static struct intel_uncore_type skx_uncore_irp = {
			
 
				 	.name			= "irp",
			
 
				 	.num_counters		= 2,
			
 
				-	.num_boxes		= 5,
			
 
				+	.num_boxes		= 6,
			
 
				 	.perf_ctr_bits		= 48,
			
 
				 	.event_ctl		= SKX_IRP0_MSR_PMON_CTL0,
			
 
				 	.perf_ctr		= SKX_IRP0_MSR_PMON_CTR0,
			
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -63,6 +63,14 @@ static bool test_intel(int idx)
 
				 	case INTEL_FAM6_ATOM_SILVERMONT1:
			
 
				 	case INTEL_FAM6_ATOM_SILVERMONT2:
			
 
				 	case INTEL_FAM6_ATOM_AIRMONT:
			
 
				+
			
 
				+	case INTEL_FAM6_ATOM_GOLDMONT:
			
 
				+	case INTEL_FAM6_ATOM_DENVERTON:
			
 
				+
			
 
				+	case INTEL_FAM6_ATOM_GEMINI_LAKE:
			
 
				+
			
 
				+	case INTEL_FAM6_XEON_PHI_KNL:
			
 
				+	case INTEL_FAM6_XEON_PHI_KNM:
			
 
				 		if (idx == PERF_MSR_SMI)
			
 
				 			return true;
			
 
				 		break;
			
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -231,7 +231,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
 
				 		 ksig->ka.sa.sa_restorer)
			
 
				 		sp = (unsigned long) ksig->ka.sa.sa_restorer;
			
 
				 
			
 
				-	if (fpu->fpstate_active) {
			
 
				+	if (fpu->initialized) {
			
 
				 		unsigned long fx_aligned, math_size;
			
 
				 
			
 
				 		sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
			
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -11,10 +11,12 @@
 
				 # define __ASM_FORM_COMMA(x) " " #x ","
			
 
				 #endif
			
 
				 
			
 
				-#ifdef CONFIG_X86_32
			
 
				+#ifndef __x86_64__
			
 
				+/* 32 bit */
			
 
				 # define __ASM_SEL(a,b) __ASM_FORM(a)
			
 
				 # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
			
 
				 #else
			
 
				+/* 64 bit */
			
 
				 # define __ASM_SEL(a,b) __ASM_FORM(b)
			
 
				 # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
			
 
				 #endif
			
@@ -139,8 +141,8 @@
 
				  * gets set up by the containing function.  If you forget to do this, objtool
			
 
				  * may print a "call without frame pointer save/setup" warning.
			
 
				  */
			
 
				-register unsigned int __asm_call_sp asm("esp");
			
 
				-#define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp)
			
 
				+register unsigned long current_stack_pointer asm(_ASM_SP);
			
 
				+#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
			
 
				 #endif
			
 
				 
			
 
				 #endif /* _ASM_X86_ASM_H */
			
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -23,11 +23,9 @@
 
				 /*
			
 
				  * High level FPU state handling functions:
			
 
				  */
			
 
				-extern void fpu__activate_curr(struct fpu *fpu);
			
 
				-extern void fpu__activate_fpstate_read(struct fpu *fpu);
			
 
				-extern void fpu__activate_fpstate_write(struct fpu *fpu);
			
 
				-extern void fpu__current_fpstate_write_begin(void);
			
 
				-extern void fpu__current_fpstate_write_end(void);
			
 
				+extern void fpu__initialize(struct fpu *fpu);
			
 
				+extern void fpu__prepare_read(struct fpu *fpu);
			
 
				+extern void fpu__prepare_write(struct fpu *fpu);
			
 
				 extern void fpu__save(struct fpu *fpu);
			
 
				 extern void fpu__restore(struct fpu *fpu);
			
 
				 extern int  fpu__restore_sig(void __user *buf, int ia32_frame);
			
@@ -120,20 +118,11 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu);
 
				 	err;								\
			
 
				 })
			
 
				 
			
 
				-#define check_insn(insn, output, input...)				\
			
 
				-({									\
			
 
				-	int err;							\
			
 
				+#define kernel_insn(insn, output, input...)				\
			
 
				 	asm volatile("1:" #insn "\n\t"					\
			
 
				 		     "2:\n"						\
			
 
				-		     ".section .fixup,\"ax\"\n"				\
			
 
				-		     "3:  movl $-1,%[err]\n"				\
			
 
				-		     "    jmp  2b\n"					\
			
 
				-		     ".previous\n"					\
			
 
				-		     _ASM_EXTABLE(1b, 3b)				\
			
 
				-		     : [err] "=r" (err), output				\
			
 
				-		     : "0"(0), input);					\
			
 
				-	err;								\
			
 
				-})
			
 
				+		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore)	\
			
 
				+		     : output : input)
			
 
				 
			
 
				 static inline int copy_fregs_to_user(struct fregs_state __user *fx)
			
 
				 {
			
@@ -153,20 +142,16 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
 
				 
			
 
				 static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
			
 
				 {
			
 
				-	int err;
			
 
				-
			
 
				 	if (IS_ENABLED(CONFIG_X86_32)) {
			
 
				-		err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
			
 
				+		kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
			
 
				 	} else {
			
 
				 		if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
			
 
				-			err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
			
 
				+			kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
			
 
				 		} else {
			
 
				 			/* See comment in copy_fxregs_to_kernel() below. */
			
 
				-			err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
			
 
				+			kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
			
 
				 		}
			
 
				 	}
			
 
				-	/* Copying from a kernel buffer to FPU registers should never fail: */
			
 
				-	WARN_ON_FPU(err);
			
 
				 }
			
 
				 
			
 
				 static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
			
@@ -183,9 +168,7 @@ static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
 
				 
			
 
				 static inline void copy_kernel_to_fregs(struct fregs_state *fx)
			
 
				 {
			
 
				-	int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
			
 
				-
			
 
				-	WARN_ON_FPU(err);
			
 
				+	kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
			
 
				 }
			
 
				 
			
 
				 static inline int copy_user_to_fregs(struct fregs_state __user *fx)
			
@@ -281,18 +264,13 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
 
				  * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
			
 
				  * XSAVE area format.
			
 
				  */
			
 
				-#define XSTATE_XRESTORE(st, lmask, hmask, err)				\
			
 
				+#define XSTATE_XRESTORE(st, lmask, hmask)				\
			
 
				 	asm volatile(ALTERNATIVE(XRSTOR,				\
			
 
				 				 XRSTORS, X86_FEATURE_XSAVES)		\
			
 
				 		     "\n"						\
			
 
				-		     "xor %[err], %[err]\n"				\
			
 
				 		     "3:\n"						\
			
 
				-		     ".pushsection .fixup,\"ax\"\n"			\
			
 
				-		     "4: movl $-2, %[err]\n"				\
			
 
				-		     "jmp 3b\n"						\
			
 
				-		     ".popsection\n"					\
			
 
				-		     _ASM_EXTABLE(661b, 4b)				\
			
 
				-		     : [err] "=r" (err)					\
			
 
				+		     _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\
			
 
				+		     :							\
			
 
				 		     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)	\
			
 
				 		     : "memory")
			
 
				 
			
@@ -336,7 +314,10 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
 
				 	else
			
 
				 		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
			
 
				 
			
 
				-	/* We should never fault when copying from a kernel buffer: */
			
 
				+	/*
			
 
				+	 * We should never fault when copying from a kernel buffer, and the FPU
			
 
				+	 * state we set at boot time should be valid.
			
 
				+	 */
			
 
				 	WARN_ON_FPU(err);
			
 
				 }
			
 
				 
			
@@ -350,7 +331,7 @@ static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
 
				 	u32 hmask = mask >> 32;
			
 
				 	int err;
			
 
				 
			
 
				-	WARN_ON(!alternatives_patched);
			
 
				+	WARN_ON_FPU(!alternatives_patched);
			
 
				 
			
 
				 	XSTATE_XSAVE(xstate, lmask, hmask, err);
			
 
				 
			
@@ -365,12 +346,8 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
 
				 {
			
 
				 	u32 lmask = mask;
			
 
				 	u32 hmask = mask >> 32;
			
 
				-	int err;
			
 
				-
			
 
				-	XSTATE_XRESTORE(xstate, lmask, hmask, err);
			
 
				 
			
 
				-	/* We should never fault when copying from a kernel buffer: */
			
 
				-	WARN_ON_FPU(err);
			
 
				+	XSTATE_XRESTORE(xstate, lmask, hmask);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -526,37 +503,16 @@ static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
 
				  */
			
 
				 static inline void fpregs_deactivate(struct fpu *fpu)
			
 
				 {
			
 
				-	WARN_ON_FPU(!fpu->fpregs_active);
			
 
				-
			
 
				-	fpu->fpregs_active = 0;
			
 
				 	this_cpu_write(fpu_fpregs_owner_ctx, NULL);
			
 
				 	trace_x86_fpu_regs_deactivated(fpu);
			
 
				 }
			
 
				 
			
 
				 static inline void fpregs_activate(struct fpu *fpu)
			
 
				 {
			
 
				-	WARN_ON_FPU(fpu->fpregs_active);
			
 
				-
			
 
				-	fpu->fpregs_active = 1;
			
 
				 	this_cpu_write(fpu_fpregs_owner_ctx, fpu);
			
 
				 	trace_x86_fpu_regs_activated(fpu);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * The question "does this thread have fpu access?"
			
 
				- * is slightly racy, since preemption could come in
			
 
				- * and revoke it immediately after the test.
			
 
				- *
			
 
				- * However, even in that very unlikely scenario,
			
 
				- * we can just assume we have FPU access - typically
			
 
				- * to save the FP state - we'll just take a #NM
			
 
				- * fault and get the FPU access back.
			
 
				- */
			
 
				-static inline int fpregs_active(void)
			
 
				-{
			
 
				-	return current->thread.fpu.fpregs_active;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * FPU state switching for scheduling.
			
 
				  *
			
@@ -571,14 +527,13 @@ static inline int fpregs_active(void)
 
				 static inline void
			
 
				 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
			
 
				 {
			
 
				-	if (old_fpu->fpregs_active) {
			
 
				+	if (old_fpu->initialized) {
			
 
				 		if (!copy_fpregs_to_fpstate(old_fpu))
			
 
				 			old_fpu->last_cpu = -1;
			
 
				 		else
			
 
				 			old_fpu->last_cpu = cpu;
			
 
				 
			
 
				 		/* But leave fpu_fpregs_owner_ctx! */
			
 
				-		old_fpu->fpregs_active = 0;
			
 
				 		trace_x86_fpu_regs_deactivated(old_fpu);
			
 
				 	} else
			
 
				 		old_fpu->last_cpu = -1;
			
@@ -595,7 +550,7 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 
				 static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
			
 
				 {
			
 
				 	bool preload = static_cpu_has(X86_FEATURE_FPU) &&
			
 
				-		       new_fpu->fpstate_active;
			
 
				+		       new_fpu->initialized;
			
 
				 
			
 
				 	if (preload) {
			
 
				 		if (!fpregs_state_valid(new_fpu, cpu))
			
@@ -617,8 +572,7 @@ static inline void user_fpu_begin(void)
 
				 	struct fpu *fpu = &current->thread.fpu;
			
 
				 
			
 
				 	preempt_disable();
			
 
				-	if (!fpregs_active())
			
 
				-		fpregs_activate(fpu);
			
 
				+	fpregs_activate(fpu);
			
 
				 	preempt_enable();
			
 
				 }
			
 
				 
			
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -68,6 +68,9 @@ struct fxregs_state {
 
				 /* Default value for fxregs_state.mxcsr: */
			
 
				 #define MXCSR_DEFAULT		0x1f80
			
 
				 
			
 
				+/* Copy both mxcsr & mxcsr_flags with a single u64 memcpy: */
			
 
				+#define MXCSR_AND_FLAGS_SIZE sizeof(u64)
			
 
				+
			
 
				 /*
			
 
				  * Software based FPU emulation state. This is arbitrary really,
			
 
				  * it matches the x87 format to make it easier to understand:
			
@@ -290,36 +293,13 @@ struct fpu {
 
				 	unsigned int			last_cpu;
			
 
				 
			
 
				 	/*
			
 
				-	 * @fpstate_active:
			
 
				+	 * @initialized:
			
 
				 	 *
			
 
				-	 * This flag indicates whether this context is active: if the task
			
 
				+	 * This flag indicates whether this context is initialized: if the task
			
 
				 	 * is not running then we can restore from this context, if the task
			
 
				 	 * is running then we should save into this context.
			
 
				 	 */
			
 
				-	unsigned char			fpstate_active;
			
 
				-
			
 
				-	/*
			
 
				-	 * @fpregs_active:
			
 
				-	 *
			
 
				-	 * This flag determines whether a given context is actively
			
 
				-	 * loaded into the FPU's registers and that those registers
			
 
				-	 * represent the task's current FPU state.
			
 
				-	 *
			
 
				-	 * Note the interaction with fpstate_active:
			
 
				-	 *
			
 
				-	 *   # task does not use the FPU:
			
 
				-	 *   fpstate_active == 0
			
 
				-	 *
			
 
				-	 *   # task uses the FPU and regs are active:
			
 
				-	 *   fpstate_active == 1 && fpregs_active == 1
			
 
				-	 *
			
 
				-	 *   # the regs are inactive but still match fpstate:
			
 
				-	 *   fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
			
 
				-	 *
			
 
				-	 * The third state is what we use for the lazy restore optimization
			
 
				-	 * on lazy-switching CPUs.
			
 
				-	 */
			
 
				-	unsigned char			fpregs_active;
			
 
				+	unsigned char			initialized;
			
 
				 
			
 
				 	/*
			
 
				 	 * @state:
			
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -48,8 +48,12 @@ void fpu__xstate_clear_all_cpu_caps(void);
 
				 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
			
 
				 const void *get_xsave_field_ptr(int xstate_field);
			
 
				 int using_compacted_format(void);
			
 
				-int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
			
 
				-			void __user *ubuf, struct xregs_state *xsave);
			
 
				-int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
			
 
				-		     struct xregs_state *xsave);
			
 
				+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
			
 
				+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
			
 
				+int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
			
 
				+int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
			
 
				+
			
 
				+/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
			
 
				+extern int validate_xstate_header(const struct xstate_header *hdr);
			
 
				+
			
 
				 #endif
			
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -158,17 +158,6 @@ struct thread_info {
 
				  */
			
 
				 #ifndef __ASSEMBLY__
			
 
				 
			
 
				-static inline unsigned long current_stack_pointer(void)
			
 
				-{
			
 
				-	unsigned long sp;
			
 
				-#ifdef CONFIG_X86_64
			
 
				-	asm("mov %%rsp,%0" : "=g" (sp));
			
 
				-#else
			
 
				-	asm("mov %%esp,%0" : "=g" (sp));
			
 
				-#endif
			
 
				-	return sp;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Walks up the stack frames to make sure that the specified object is
			
 
				  * entirely contained by a single stack frame.
			
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -12,25 +12,22 @@ DECLARE_EVENT_CLASS(x86_fpu,
 
				 
			
 
				 	TP_STRUCT__entry(
			
 
				 		__field(struct fpu *, fpu)
			
 
				-		__field(bool, fpregs_active)
			
 
				-		__field(bool, fpstate_active)
			
 
				+		__field(bool, initialized)
			
 
				 		__field(u64, xfeatures)
			
 
				 		__field(u64, xcomp_bv)
			
 
				 		),
			
 
				 
			
 
				 	TP_fast_assign(
			
 
				 		__entry->fpu		= fpu;
			
 
				-		__entry->fpregs_active	= fpu->fpregs_active;
			
 
				-		__entry->fpstate_active	= fpu->fpstate_active;
			
 
				+		__entry->initialized	= fpu->initialized;
			
 
				 		if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
			
 
				 			__entry->xfeatures = fpu->state.xsave.header.xfeatures;
			
 
				 			__entry->xcomp_bv  = fpu->state.xsave.header.xcomp_bv;
			
 
				 		}
			
 
				 	),
			
 
				-	TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx",
			
 
				+	TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx",
			
 
				 			__entry->fpu,
			
 
				-			__entry->fpregs_active,
			
 
				-			__entry->fpstate_active,
			
 
				+			__entry->initialized,
			
 
				 			__entry->xfeatures,
			
 
				 			__entry->xcomp_bv
			
 
				 	)
			
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -337,7 +337,7 @@ do {									\
 
				 		     _ASM_EXTABLE(1b, 4b)				\
			
 
				 		     _ASM_EXTABLE(2b, 4b)				\
			
 
				 		     : "=r" (retval), "=&A"(x)				\
			
 
				-		     : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1),	\
			
 
				+		     : "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1),	\
			
 
				 		       "i" (errret), "0" (retval));			\
			
 
				 })
			
 
				 
			
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -551,13 +551,13 @@ static inline void
 
				 MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
			
 
				 			struct desc_struct desc)
			
 
				 {
			
 
				-	u32 *p = (u32 *) &desc;
			
 
				-
			
 
				 	mcl->op = __HYPERVISOR_update_descriptor;
			
 
				 	if (sizeof(maddr) == sizeof(long)) {
			
 
				 		mcl->args[0] = maddr;
			
 
				 		mcl->args[1] = *(unsigned long *)&desc;
			
 
				 	} else {
			
 
				+		u32 *p = (u32 *)&desc;
			
 
				+
			
 
				 		mcl->args[0] = maddr;
			
 
				 		mcl->args[1] = maddr >> 32;
			
 
				 		mcl->args[2] = *p++;
			
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -100,7 +100,7 @@ void __kernel_fpu_begin(void)
 
				 
			
 
				 	kernel_fpu_disable();
			
 
				 
			
 
				-	if (fpu->fpregs_active) {
			
 
				+	if (fpu->initialized) {
			
 
				 		/*
			
 
				 		 * Ignore return value -- we don't care if reg state
			
 
				 		 * is clobbered.
			
@@ -116,7 +116,7 @@ void __kernel_fpu_end(void)
 
				 {
			
 
				 	struct fpu *fpu = &current->thread.fpu;
			
 
				 
			
 
				-	if (fpu->fpregs_active)
			
 
				+	if (fpu->initialized)
			
 
				 		copy_kernel_to_fpregs(&fpu->state);
			
 
				 
			
 
				 	kernel_fpu_enable();
			
@@ -148,7 +148,7 @@ void fpu__save(struct fpu *fpu)
 
				 
			
 
				 	preempt_disable();
			
 
				 	trace_x86_fpu_before_save(fpu);
			
 
				-	if (fpu->fpregs_active) {
			
 
				+	if (fpu->initialized) {
			
 
				 		if (!copy_fpregs_to_fpstate(fpu)) {
			
 
				 			copy_kernel_to_fpregs(&fpu->state);
			
 
				 		}
			
@@ -189,10 +189,9 @@ EXPORT_SYMBOL_GPL(fpstate_init);
 
				 
			
 
				 int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
			
 
				 {
			
 
				-	dst_fpu->fpregs_active = 0;
			
 
				 	dst_fpu->last_cpu = -1;
			
 
				 
			
 
				-	if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
			
 
				+	if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU))
			
 
				 		return 0;
			
 
				 
			
 
				 	WARN_ON_FPU(src_fpu != &current->thread.fpu);
			
@@ -206,26 +205,14 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 
				 	/*
			
 
				 	 * Save current FPU registers directly into the child
			
 
				 	 * FPU context, without any memory-to-memory copying.
			
 
				-	 * In lazy mode, if the FPU context isn't loaded into
			
 
				-	 * fpregs, CR0.TS will be set and do_device_not_available
			
 
				-	 * will load the FPU context.
			
 
				 	 *
			
 
				-	 * We have to do all this with preemption disabled,
			
 
				-	 * mostly because of the FNSAVE case, because in that
			
 
				-	 * case we must not allow preemption in the window
			
 
				-	 * between the FNSAVE and us marking the context lazy.
			
 
				-	 *
			
 
				-	 * It shouldn't be an issue as even FNSAVE is plenty
			
 
				-	 * fast in terms of critical section length.
			
 
				+	 * ( The function 'fails' in the FNSAVE case, which destroys
			
 
				+	 *   register contents so we have to copy them back. )
			
 
				 	 */
			
 
				-	preempt_disable();
			
 
				 	if (!copy_fpregs_to_fpstate(dst_fpu)) {
			
 
				-		memcpy(&src_fpu->state, &dst_fpu->state,
			
 
				-		       fpu_kernel_xstate_size);
			
 
				-
			
 
				+		memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size);
			
 
				 		copy_kernel_to_fpregs(&src_fpu->state);
			
 
				 	}
			
 
				-	preempt_enable();
			
 
				 
			
 
				 	trace_x86_fpu_copy_src(src_fpu);
			
 
				 	trace_x86_fpu_copy_dst(dst_fpu);
			
@@ -237,45 +224,48 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 
				  * Activate the current task's in-memory FPU context,
			
 
				  * if it has not been used before:
			
 
				  */
			
 
				-void fpu__activate_curr(struct fpu *fpu)
			
 
				+void fpu__initialize(struct fpu *fpu)
			
 
				 {
			
 
				 	WARN_ON_FPU(fpu != &current->thread.fpu);
			
 
				 
			
 
				-	if (!fpu->fpstate_active) {
			
 
				+	if (!fpu->initialized) {
			
 
				 		fpstate_init(&fpu->state);
			
 
				 		trace_x86_fpu_init_state(fpu);
			
 
				 
			
 
				 		trace_x86_fpu_activate_state(fpu);
			
 
				 		/* Safe to do for the current task: */
			
 
				-		fpu->fpstate_active = 1;
			
 
				+		fpu->initialized = 1;
			
 
				 	}
			
 
				 }
			
 
				-EXPORT_SYMBOL_GPL(fpu__activate_curr);
			
 
				+EXPORT_SYMBOL_GPL(fpu__initialize);
			
 
				 
			
 
				 /*
			
 
				  * This function must be called before we read a task's fpstate.
			
 
				  *
			
 
				- * If the task has not used the FPU before then initialize its
			
 
				- * fpstate.
			
 
				+ * There's two cases where this gets called:
			
 
				+ *
			
 
				+ * - for the current task (when coredumping), in which case we have
			
 
				+ *   to save the latest FPU registers into the fpstate,
			
 
				+ *
			
 
				+ * - or it's called for stopped tasks (ptrace), in which case the
			
 
				+ *   registers were already saved by the context-switch code when
			
 
				+ *   the task scheduled out - we only have to initialize the registers
			
 
				+ *   if they've never been initialized.
			
 
				  *
			
 
				  * If the task has used the FPU before then save it.
			
 
				  */
			
 
				-void fpu__activate_fpstate_read(struct fpu *fpu)
			
 
				+void fpu__prepare_read(struct fpu *fpu)
			
 
				 {
			
 
				-	/*
			
 
				-	 * If fpregs are active (in the current CPU), then
			
 
				-	 * copy them to the fpstate:
			
 
				-	 */
			
 
				-	if (fpu->fpregs_active) {
			
 
				+	if (fpu == &current->thread.fpu) {
			
 
				 		fpu__save(fpu);
			
 
				 	} else {
			
 
				-		if (!fpu->fpstate_active) {
			
 
				+		if (!fpu->initialized) {
			
 
				 			fpstate_init(&fpu->state);
			
 
				 			trace_x86_fpu_init_state(fpu);
			
 
				 
			
 
				 			trace_x86_fpu_activate_state(fpu);
			
 
				 			/* Safe to do for current and for stopped child tasks: */
			
 
				-			fpu->fpstate_active = 1;
			
 
				+			fpu->initialized = 1;
			
 
				 		}
			
 
				 	}
			
 
				 }
			
@@ -283,17 +273,17 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
 
				 /*
			
 
				  * This function must be called before we write a task's fpstate.
			
 
				  *
			
 
				- * If the task has used the FPU before then unlazy it.
			
 
				+ * If the task has used the FPU before then invalidate any cached FPU registers.
			
 
				  * If the task has not used the FPU before then initialize its fpstate.
			
 
				  *
			
 
				  * After this function call, after registers in the fpstate are
			
 
				  * modified and the child task has woken up, the child task will
			
 
				  * restore the modified FPU state from the modified context. If we
			
 
				- * didn't clear its lazy status here then the lazy in-registers
			
 
				+ * didn't clear its cached status here then the cached in-registers
			
 
				  * state pending on its former CPU could be restored, corrupting
			
 
				  * the modifications.
			
 
				  */
			
 
				-void fpu__activate_fpstate_write(struct fpu *fpu)
			
 
				+void fpu__prepare_write(struct fpu *fpu)
			
 
				 {
			
 
				 	/*
			
 
				 	 * Only stopped child tasks can be used to modify the FPU
			
@@ -301,8 +291,8 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 
				 	 */
			
 
				 	WARN_ON_FPU(fpu == &current->thread.fpu);
			
 
				 
			
 
				-	if (fpu->fpstate_active) {
			
 
				-		/* Invalidate any lazy state: */
			
 
				+	if (fpu->initialized) {
			
 
				+		/* Invalidate any cached state: */
			
 
				 		__fpu_invalidate_fpregs_state(fpu);
			
 
				 	} else {
			
 
				 		fpstate_init(&fpu->state);
			
@@ -310,73 +300,10 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 
				 
			
 
				 		trace_x86_fpu_activate_state(fpu);
			
 
				 		/* Safe to do for stopped child tasks: */
			
 
				-		fpu->fpstate_active = 1;
			
 
				+		fpu->initialized = 1;
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * This function must be called before we write the current
			
 
				- * task's fpstate.
			
 
				- *
			
 
				- * This call gets the current FPU register state and moves
			
 
				- * it in to the 'fpstate'.  Preemption is disabled so that
			
 
				- * no writes to the 'fpstate' can occur from context
			
 
				- * swiches.
			
 
				- *
			
 
				- * Must be followed by a fpu__current_fpstate_write_end().
			
 
				- */
			
 
				-void fpu__current_fpstate_write_begin(void)
			
 
				-{
			
 
				-	struct fpu *fpu = &current->thread.fpu;
			
 
				-
			
 
				-	/*
			
 
				-	 * Ensure that the context-switching code does not write
			
 
				-	 * over the fpstate while we are doing our update.
			
 
				-	 */
			
 
				-	preempt_disable();
			
 
				-
			
 
				-	/*
			
 
				-	 * Move the fpregs in to the fpu's 'fpstate'.
			
 
				-	 */
			
 
				-	fpu__activate_fpstate_read(fpu);
			
 
				-
			
 
				-	/*
			
 
				-	 * The caller is about to write to 'fpu'.  Ensure that no
			
 
				-	 * CPU thinks that its fpregs match the fpstate.  This
			
 
				-	 * ensures we will not be lazy and skip a XRSTOR in the
			
 
				-	 * future.
			
 
				-	 */
			
 
				-	__fpu_invalidate_fpregs_state(fpu);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This function must be paired with fpu__current_fpstate_write_begin()
			
 
				- *
			
 
				- * This will ensure that the modified fpstate gets placed back in
			
 
				- * the fpregs if necessary.
			
 
				- *
			
 
				- * Note: This function may be called whether or not an _actual_
			
 
				- * write to the fpstate occurred.
			
 
				- */
			
 
				-void fpu__current_fpstate_write_end(void)
			
 
				-{
			
 
				-	struct fpu *fpu = &current->thread.fpu;
			
 
				-
			
 
				-	/*
			
 
				-	 * 'fpu' now has an updated copy of the state, but the
			
 
				-	 * registers may still be out of date.  Update them with
			
 
				-	 * an XRSTOR if they are active.
			
 
				-	 */
			
 
				-	if (fpregs_active())
			
 
				-		copy_kernel_to_fpregs(&fpu->state);
			
 
				-
			
 
				-	/*
			
 
				-	 * Our update is done and the fpregs/fpstate are in sync
			
 
				-	 * if necessary.  Context switches can happen again.
			
 
				-	 */
			
 
				-	preempt_enable();
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * 'fpu__restore()' is called to copy FPU registers from
			
 
				  * the FPU fpstate to the live hw registers and to activate
			
@@ -389,7 +316,7 @@ void fpu__current_fpstate_write_end(void)
 
				  */
			
 
				 void fpu__restore(struct fpu *fpu)
			
 
				 {
			
 
				-	fpu__activate_curr(fpu);
			
 
				+	fpu__initialize(fpu);
			
 
				 
			
 
				 	/* Avoid __kernel_fpu_begin() right after fpregs_activate() */
			
 
				 	kernel_fpu_disable();
			
@@ -414,15 +341,17 @@ void fpu__drop(struct fpu *fpu)
 
				 {
			
 
				 	preempt_disable();
			
 
				 
			
 
				-	if (fpu->fpregs_active) {
			
 
				-		/* Ignore delayed exceptions from user space */
			
 
				-		asm volatile("1: fwait\n"
			
 
				-			     "2:\n"
			
 
				-			     _ASM_EXTABLE(1b, 2b));
			
 
				-		fpregs_deactivate(fpu);
			
 
				+	if (fpu == &current->thread.fpu) {
			
 
				+		if (fpu->initialized) {
			
 
				+			/* Ignore delayed exceptions from user space */
			
 
				+			asm volatile("1: fwait\n"
			
 
				+				     "2:\n"
			
 
				+				     _ASM_EXTABLE(1b, 2b));
			
 
				+			fpregs_deactivate(fpu);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				-	fpu->fpstate_active = 0;
			
 
				+	fpu->initialized = 0;
			
 
				 
			
 
				 	trace_x86_fpu_dropped(fpu);
			
 
				 
			
@@ -462,9 +391,11 @@ void fpu__clear(struct fpu *fpu)
 
				 	 * Make sure fpstate is cleared and initialized.
			
 
				 	 */
			
 
				 	if (static_cpu_has(X86_FEATURE_FPU)) {
			
 
				-		fpu__activate_curr(fpu);
			
 
				+		preempt_disable();
			
 
				+		fpu__initialize(fpu);
			
 
				 		user_fpu_begin();
			
 
				 		copy_init_fpstate_to_fpregs();
			
 
				+		preempt_enable();
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -240,7 +240,7 @@ static void __init fpu__init_system_ctx_switch(void)
 
				 	WARN_ON_FPU(!on_boot_cpu);
			
 
				 	on_boot_cpu = 0;
			
 
				 
			
 
				-	WARN_ON_FPU(current->thread.fpu.fpstate_active);
			
 
				+	WARN_ON_FPU(current->thread.fpu.initialized);
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -16,14 +16,14 @@ int regset_fpregs_active(struct task_struct *target, const struct user_regset *r
 
				 {
			
 
				 	struct fpu *target_fpu = &target->thread.fpu;
			
 
				 
			
 
				-	return target_fpu->fpstate_active ? regset->n : 0;
			
 
				+	return target_fpu->initialized ? regset->n : 0;
			
 
				 }
			
 
				 
			
 
				 int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
			
 
				 {
			
 
				 	struct fpu *target_fpu = &target->thread.fpu;
			
 
				 
			
 
				-	if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active)
			
 
				+	if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized)
			
 
				 		return regset->n;
			
 
				 	else
			
 
				 		return 0;
			
@@ -38,7 +38,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
 
				 	if (!boot_cpu_has(X86_FEATURE_FXSR))
			
 
				 		return -ENODEV;
			
 
				 
			
 
				-	fpu__activate_fpstate_read(fpu);
			
 
				+	fpu__prepare_read(fpu);
			
 
				 	fpstate_sanitize_xstate(fpu);
			
 
				 
			
 
				 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
			
@@ -55,7 +55,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
 
				 	if (!boot_cpu_has(X86_FEATURE_FXSR))
			
 
				 		return -ENODEV;
			
 
				 
			
 
				-	fpu__activate_fpstate_write(fpu);
			
 
				+	fpu__prepare_write(fpu);
			
 
				 	fpstate_sanitize_xstate(fpu);
			
 
				 
			
 
				 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
			
@@ -89,10 +89,13 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 
				 
			
 
				 	xsave = &fpu->state.xsave;
			
 
				 
			
 
				-	fpu__activate_fpstate_read(fpu);
			
 
				+	fpu__prepare_read(fpu);
			
 
				 
			
 
				 	if (using_compacted_format()) {
			
 
				-		ret = copyout_from_xsaves(pos, count, kbuf, ubuf, xsave);
			
 
				+		if (kbuf)
			
 
				+			ret = copy_xstate_to_kernel(kbuf, xsave, pos, count);
			
 
				+		else
			
 
				+			ret = copy_xstate_to_user(ubuf, xsave, pos, count);
			
 
				 	} else {
			
 
				 		fpstate_sanitize_xstate(fpu);
			
 
				 		/*
			
@@ -129,28 +132,29 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 
				 
			
 
				 	xsave = &fpu->state.xsave;
			
 
				 
			
 
				-	fpu__activate_fpstate_write(fpu);
			
 
				+	fpu__prepare_write(fpu);
			
 
				 
			
 
				-	if (boot_cpu_has(X86_FEATURE_XSAVES))
			
 
				-		ret = copyin_to_xsaves(kbuf, ubuf, xsave);
			
 
				-	else
			
 
				+	if (using_compacted_format()) {
			
 
				+		if (kbuf)
			
 
				+			ret = copy_kernel_to_xstate(xsave, kbuf);
			
 
				+		else
			
 
				+			ret = copy_user_to_xstate(xsave, ubuf);
			
 
				+	} else {
			
 
				 		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
			
 
				-
			
 
				-	/*
			
 
				-	 * In case of failure, mark all states as init:
			
 
				-	 */
			
 
				-	if (ret)
			
 
				-		fpstate_init(&fpu->state);
			
 
				+		if (!ret)
			
 
				+			ret = validate_xstate_header(&xsave->header);
			
 
				+	}
			
 
				 
			
 
				 	/*
			
 
				 	 * mxcsr reserved bits must be masked to zero for security reasons.
			
 
				 	 */
			
 
				 	xsave->i387.mxcsr &= mxcsr_feature_mask;
			
 
				-	xsave->header.xfeatures &= xfeatures_mask;
			
 
				+
			
 
				 	/*
			
 
				-	 * These bits must be zero.
			
 
				+	 * In case of failure, mark all states as init:
			
 
				 	 */
			
 
				-	memset(&xsave->header.reserved, 0, 48);
			
 
				+	if (ret)
			
 
				+		fpstate_init(&fpu->state);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
@@ -299,7 +303,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 
				 	struct fpu *fpu = &target->thread.fpu;
			
 
				 	struct user_i387_ia32_struct env;
			
 
				 
			
 
				-	fpu__activate_fpstate_read(fpu);
			
 
				+	fpu__prepare_read(fpu);
			
 
				 
			
 
				 	if (!boot_cpu_has(X86_FEATURE_FPU))
			
 
				 		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
			
@@ -329,7 +333,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 
				 	struct user_i387_ia32_struct env;
			
 
				 	int ret;
			
 
				 
			
 
				-	fpu__activate_fpstate_write(fpu);
			
 
				+	fpu__prepare_write(fpu);
			
 
				 	fpstate_sanitize_xstate(fpu);
			
 
				 
			
 
				 	if (!boot_cpu_has(X86_FEATURE_FPU))
			
@@ -369,7 +373,7 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
 
				 	struct fpu *fpu = &tsk->thread.fpu;
			
 
				 	int fpvalid;
			
 
				 
			
 
				-	fpvalid = fpu->fpstate_active;
			
 
				+	fpvalid = fpu->initialized;
			
 
				 	if (fpvalid)
			
 
				 		fpvalid = !fpregs_get(tsk, NULL,
			
 
				 				      0, sizeof(struct user_i387_ia32_struct),
			
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -155,7 +155,8 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
 
				  */
			
 
				 int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
			
 
				 {
			
 
				-	struct xregs_state *xsave = &current->thread.fpu.state.xsave;
			
 
				+	struct fpu *fpu = &current->thread.fpu;
			
 
				+	struct xregs_state *xsave = &fpu->state.xsave;
			
 
				 	struct task_struct *tsk = current;
			
 
				 	int ia32_fxstate = (buf != buf_fx);
			
 
				 
			
@@ -170,13 +171,13 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 
				 			sizeof(struct user_i387_ia32_struct), NULL,
			
 
				 			(struct _fpstate_32 __user *) buf) ? -1 : 1;
			
 
				 
			
 
				-	if (fpregs_active() || using_compacted_format()) {
			
 
				+	if (fpu->initialized || using_compacted_format()) {
			
 
				 		/* Save the live register state to the user directly. */
			
 
				 		if (copy_fpregs_to_sigframe(buf_fx))
			
 
				 			return -1;
			
 
				 		/* Update the thread's fxstate to save the fsave header. */
			
 
				 		if (ia32_fxstate)
			
 
				-			copy_fxregs_to_kernel(&tsk->thread.fpu);
			
 
				+			copy_fxregs_to_kernel(fpu);
			
 
				 	} else {
			
 
				 		/*
			
 
				 		 * It is a *bug* if kernel uses compacted-format for xsave
			
@@ -189,7 +190,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 
				 			return -1;
			
 
				 		}
			
 
				 
			
 
				-		fpstate_sanitize_xstate(&tsk->thread.fpu);
			
 
				+		fpstate_sanitize_xstate(fpu);
			
 
				 		if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
			
 
				 			return -1;
			
 
				 	}
			
@@ -213,8 +214,11 @@ sanitize_restored_xstate(struct task_struct *tsk,
 
				 	struct xstate_header *header = &xsave->header;
			
 
				 
			
 
				 	if (use_xsave()) {
			
 
				-		/* These bits must be zero. */
			
 
				-		memset(header->reserved, 0, 48);
			
 
				+		/*
			
 
				+		 * Note: we don't need to zero the reserved bits in the
			
 
				+		 * xstate_header here because we either didn't copy them at all,
			
 
				+		 * or we checked earlier that they aren't set.
			
 
				+		 */
			
 
				 
			
 
				 		/*
			
 
				 		 * Init the state that is not present in the memory
			
@@ -223,7 +227,7 @@ sanitize_restored_xstate(struct task_struct *tsk,
 
				 		if (fx_only)
			
 
				 			header->xfeatures = XFEATURE_MASK_FPSSE;
			
 
				 		else
			
 
				-			header->xfeatures &= (xfeatures_mask & xfeatures);
			
 
				+			header->xfeatures &= xfeatures;
			
 
				 	}
			
 
				 
			
 
				 	if (use_fxsr()) {
			
@@ -279,7 +283,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 
				 	if (!access_ok(VERIFY_READ, buf, size))
			
 
				 		return -EACCES;
			
 
				 
			
 
				-	fpu__activate_curr(fpu);
			
 
				+	fpu__initialize(fpu);
			
 
				 
			
 
				 	if (!static_cpu_has(X86_FEATURE_FPU))
			
 
				 		return fpregs_soft_set(current, NULL,
			
@@ -307,28 +311,29 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 
				 		/*
			
 
				 		 * For 32-bit frames with fxstate, copy the user state to the
			
 
				 		 * thread's fpu state, reconstruct fxstate from the fsave
			
 
				-		 * header. Sanitize the copied state etc.
			
 
				+		 * header. Validate and sanitize the copied state.
			
 
				 		 */
			
 
				 		struct fpu *fpu = &tsk->thread.fpu;
			
 
				 		struct user_i387_ia32_struct env;
			
 
				 		int err = 0;
			
 
				 
			
 
				 		/*
			
 
				-		 * Drop the current fpu which clears fpu->fpstate_active. This ensures
			
 
				+		 * Drop the current fpu which clears fpu->initialized. This ensures
			
 
				 		 * that any context-switch during the copy of the new state,
			
 
				 		 * avoids the intermediate state from getting restored/saved.
			
 
				 		 * Thus avoiding the new restored state from getting corrupted.
			
 
				 		 * We will be ready to restore/save the state only after
			
 
				-		 * fpu->fpstate_active is again set.
			
 
				+		 * fpu->initialized is again set.
			
 
				 		 */
			
 
				 		fpu__drop(fpu);
			
 
				 
			
 
				 		if (using_compacted_format()) {
			
 
				-			err = copyin_to_xsaves(NULL, buf_fx,
			
 
				-					       &fpu->state.xsave);
			
 
				+			err = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
			
 
				 		} else {
			
 
				-			err = __copy_from_user(&fpu->state.xsave,
			
 
				-					       buf_fx, state_size);
			
 
				+			err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
			
 
				+
			
 
				+			if (!err && state_size > offsetof(struct xregs_state, header))
			
 
				+				err = validate_xstate_header(&fpu->state.xsave.header);
			
 
				 		}
			
 
				 
			
 
				 		if (err || __copy_from_user(&env, buf, sizeof(env))) {
			
@@ -339,7 +344,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 
				 			sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
			
 
				 		}
			
 
				 
			
 
				-		fpu->fpstate_active = 1;
			
 
				+		fpu->initialized = 1;
			
 
				 		preempt_disable();
			
 
				 		fpu__restore(fpu);
			
 
				 		preempt_enable();
			
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -483,6 +483,30 @@ int using_compacted_format(void)
 
				 	return boot_cpu_has(X86_FEATURE_XSAVES);
			
 
				 }
			
 
				 
			
 
				+/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
			
 
				+int validate_xstate_header(const struct xstate_header *hdr)
			
 
				+{
			
 
				+	/* No unknown or supervisor features may be set */
			
 
				+	if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	/* Userspace must use the uncompacted format */
			
 
				+	if (hdr->xcomp_bv)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	/*
			
 
				+	 * If 'reserved' is shrunken to add a new field, make sure to validate
			
 
				+	 * that new field here!
			
 
				+	 */
			
 
				+	BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
			
 
				+
			
 
				+	/* No reserved bits may be set */
			
 
				+	if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static void __xstate_dump_leaves(void)
			
 
				 {
			
 
				 	int i;
			
@@ -867,7 +891,7 @@ const void *get_xsave_field_ptr(int xsave_state)
 
				 {
			
 
				 	struct fpu *fpu = &current->thread.fpu;
			
 
				 
			
 
				-	if (!fpu->fpstate_active)
			
 
				+	if (!fpu->initialized)
			
 
				 		return NULL;
			
 
				 	/*
			
 
				 	 * fpu__save() takes the CPU's xstate registers
			
@@ -920,39 +944,130 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 
				 }
			
 
				 #endif /* ! CONFIG_ARCH_HAS_PKEYS */
			
 
				 
			
 
				+/*
			
 
				+ * Weird legacy quirk: SSE and YMM states store information in the
			
 
				+ * MXCSR and MXCSR_FLAGS fields of the FP area. That means if the FP
			
 
				+ * area is marked as unused in the xfeatures header, we need to copy
			
 
				+ * MXCSR and MXCSR_FLAGS if either SSE or YMM are in use.
			
 
				+ */
			
 
				+static inline bool xfeatures_mxcsr_quirk(u64 xfeatures)
			
 
				+{
			
 
				+	if (!(xfeatures & (XFEATURE_MASK_SSE|XFEATURE_MASK_YMM)))
			
 
				+		return false;
			
 
				+
			
 
				+	if (xfeatures & XFEATURE_MASK_FP)
			
 
				+		return false;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * This is similar to user_regset_copyout(), but will not add offset to
			
 
				  * the source data pointer or increment pos, count, kbuf, and ubuf.
			
 
				  */
			
 
				-static inline int xstate_copyout(unsigned int pos, unsigned int count,
			
 
				-				 void *kbuf, void __user *ubuf,
			
 
				-				 const void *data, const int start_pos,
			
 
				-				 const int end_pos)
			
 
				+static inline void
			
 
				+__copy_xstate_to_kernel(void *kbuf, const void *data,
			
 
				+			unsigned int offset, unsigned int size, unsigned int size_total)
			
 
				 {
			
 
				-	if ((count == 0) || (pos < start_pos))
			
 
				-		return 0;
			
 
				+	if (offset < size_total) {
			
 
				+		unsigned int copy = min(size, size_total - offset);
			
 
				 
			
 
				-	if (end_pos < 0 || pos < end_pos) {
			
 
				-		unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
			
 
				+		memcpy(kbuf + offset, data, copy);
			
 
				+	}
			
 
				+}
			
 
				 
			
 
				-		if (kbuf) {
			
 
				-			memcpy(kbuf + pos, data, copy);
			
 
				-		} else {
			
 
				-			if (__copy_to_user(ubuf + pos, data, copy))
			
 
				-				return -EFAULT;
			
 
				+/*
			
 
				+ * Convert from kernel XSAVES compacted format to standard format and copy
			
 
				+ * to a kernel-space ptrace buffer.
			
 
				+ *
			
 
				+ * It supports partial copy but pos always starts from zero. This is called
			
 
				+ * from xstateregs_get() and there we check the CPU has XSAVES.
			
 
				+ */
			
 
				+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
			
 
				+{
			
 
				+	unsigned int offset, size;
			
 
				+	struct xstate_header header;
			
 
				+	int i;
			
 
				+
			
 
				+	/*
			
 
				+	 * Currently copy_regset_to_user() starts from pos 0:
			
 
				+	 */
			
 
				+	if (unlikely(offset_start != 0))
			
 
				+		return -EFAULT;
			
 
				+
			
 
				+	/*
			
 
				+	 * The destination is a ptrace buffer; we put in only user xstates:
			
 
				+	 */
			
 
				+	memset(&header, 0, sizeof(header));
			
 
				+	header.xfeatures = xsave->header.xfeatures;
			
 
				+	header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
			
 
				+
			
 
				+	/*
			
 
				+	 * Copy xregs_state->header:
			
 
				+	 */
			
 
				+	offset = offsetof(struct xregs_state, header);
			
 
				+	size = sizeof(header);
			
 
				+
			
 
				+	__copy_xstate_to_kernel(kbuf, &header, offset, size, size_total);
			
 
				+
			
 
				+	for (i = 0; i < XFEATURE_MAX; i++) {
			
 
				+		/*
			
 
				+		 * Copy only in-use xstates:
			
 
				+		 */
			
 
				+		if ((header.xfeatures >> i) & 1) {
			
 
				+			void *src = __raw_xsave_addr(xsave, 1 << i);
			
 
				+
			
 
				+			offset = xstate_offsets[i];
			
 
				+			size = xstate_sizes[i];
			
 
				+
			
 
				+			/* The next component has to fit fully into the output buffer: */
			
 
				+			if (offset + size > size_total)
			
 
				+				break;
			
 
				+
			
 
				+			__copy_xstate_to_kernel(kbuf, src, offset, size, size_total);
			
 
				 		}
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+	if (xfeatures_mxcsr_quirk(header.xfeatures)) {
			
 
				+		offset = offsetof(struct fxregs_state, mxcsr);
			
 
				+		size = MXCSR_AND_FLAGS_SIZE;
			
 
				+		__copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Fill xsave->i387.sw_reserved value for ptrace frame:
			
 
				+	 */
			
 
				+	offset = offsetof(struct fxregs_state, sw_reserved);
			
 
				+	size = sizeof(xstate_fx_sw_bytes);
			
 
				+
			
 
				+	__copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, unsigned int size_total)
			
 
				+{
			
 
				+	if (!size)
			
 
				+		return 0;
			
 
				+
			
 
				+	if (offset < size_total) {
			
 
				+		unsigned int copy = min(size, size_total - offset);
			
 
				+
			
 
				+		if (__copy_to_user(ubuf + offset, data, copy))
			
 
				+			return -EFAULT;
			
 
				 	}
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				  * Convert from kernel XSAVES compacted format to standard format and copy
			
 
				- * to a ptrace buffer. It supports partial copy but pos always starts from
			
 
				+ * to a user-space buffer. It supports partial copy but pos always starts from
			
 
				  * zero. This is called from xstateregs_get() and there we check the CPU
			
 
				  * has XSAVES.
			
 
				  */
			
 
				-int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
			
 
				-			void __user *ubuf, struct xregs_state *xsave)
			
 
				+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
			
 
				 {
			
 
				 	unsigned int offset, size;
			
 
				 	int ret, i;
			
@@ -961,7 +1076,7 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
 
				 	/*
			
 
				 	 * Currently copy_regset_to_user() starts from pos 0:
			
 
				 	 */
			
 
				-	if (unlikely(pos != 0))
			
 
				+	if (unlikely(offset_start != 0))
			
 
				 		return -EFAULT;
			
 
				 
			
 
				 	/*
			
@@ -977,8 +1092,7 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
 
				 	offset = offsetof(struct xregs_state, header);
			
 
				 	size = sizeof(header);
			
 
				 
			
 
				-	ret = xstate_copyout(offset, size, kbuf, ubuf, &header, 0, count);
			
 
				-
			
 
				+	ret = __copy_xstate_to_user(ubuf, &header, offset, size, size_total);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
@@ -992,25 +1106,30 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
 
				 			offset = xstate_offsets[i];
			
 
				 			size = xstate_sizes[i];
			
 
				 
			
 
				-			ret = xstate_copyout(offset, size, kbuf, ubuf, src, 0, count);
			
 
				+			/* The next component has to fit fully into the output buffer: */
			
 
				+			if (offset + size > size_total)
			
 
				+				break;
			
 
				 
			
 
				+			ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total);
			
 
				 			if (ret)
			
 
				 				return ret;
			
 
				-
			
 
				-			if (offset + size >= count)
			
 
				-				break;
			
 
				 		}
			
 
				 
			
 
				 	}
			
 
				 
			
 
				+	if (xfeatures_mxcsr_quirk(header.xfeatures)) {
			
 
				+		offset = offsetof(struct fxregs_state, mxcsr);
			
 
				+		size = MXCSR_AND_FLAGS_SIZE;
			
 
				+		__copy_xstate_to_user(ubuf, &xsave->i387.mxcsr, offset, size, size_total);
			
 
				+	}
			
 
				+
			
 
				 	/*
			
 
				 	 * Fill xsave->i387.sw_reserved value for ptrace frame:
			
 
				 	 */
			
 
				 	offset = offsetof(struct fxregs_state, sw_reserved);
			
 
				 	size = sizeof(xstate_fx_sw_bytes);
			
 
				 
			
 
				-	ret = xstate_copyout(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
			
 
				-
			
 
				+	ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, size_total);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
@@ -1018,55 +1137,98 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Convert from a ptrace standard-format buffer to kernel XSAVES format
			
 
				- * and copy to the target thread. This is called from xstateregs_set() and
			
 
				- * there we check the CPU has XSAVES and a whole standard-sized buffer
			
 
				- * exists.
			
 
				+ * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format
			
 
				+ * and copy to the target thread. This is called from xstateregs_set().
			
 
				  */
			
 
				-int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
			
 
				-		     struct xregs_state *xsave)
			
 
				+int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
			
 
				 {
			
 
				 	unsigned int offset, size;
			
 
				 	int i;
			
 
				-	u64 xfeatures;
			
 
				-	u64 allowed_features;
			
 
				+	struct xstate_header hdr;
			
 
				 
			
 
				 	offset = offsetof(struct xregs_state, header);
			
 
				-	size = sizeof(xfeatures);
			
 
				+	size = sizeof(hdr);
			
 
				 
			
 
				-	if (kbuf) {
			
 
				-		memcpy(&xfeatures, kbuf + offset, size);
			
 
				-	} else {
			
 
				-		if (__copy_from_user(&xfeatures, ubuf + offset, size))
			
 
				-			return -EFAULT;
			
 
				+	memcpy(&hdr, kbuf + offset, size);
			
 
				+
			
 
				+	if (validate_xstate_header(&hdr))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	for (i = 0; i < XFEATURE_MAX; i++) {
			
 
				+		u64 mask = ((u64)1 << i);
			
 
				+
			
 
				+		if (hdr.xfeatures & mask) {
			
 
				+			void *dst = __raw_xsave_addr(xsave, 1 << i);
			
 
				+
			
 
				+			offset = xstate_offsets[i];
			
 
				+			size = xstate_sizes[i];
			
 
				+
			
 
				+			memcpy(dst, kbuf + offset, size);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
			
 
				+		offset = offsetof(struct fxregs_state, mxcsr);
			
 
				+		size = MXCSR_AND_FLAGS_SIZE;
			
 
				+		memcpy(&xsave->i387.mxcsr, kbuf + offset, size);
			
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				-	 * Reject if the user sets any disabled or supervisor features:
			
 
				+	 * The state that came in from userspace was user-state only.
			
 
				+	 * Mask all the user states out of 'xfeatures':
			
 
				+	 */
			
 
				+	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
			
 
				+
			
 
				+	/*
			
 
				+	 * Add back in the features that came in from userspace:
			
 
				 	 */
			
 
				-	allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
			
 
				+	xsave->header.xfeatures |= hdr.xfeatures;
			
 
				 
			
 
				-	if (xfeatures & ~allowed_features)
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Convert from a ptrace or sigreturn standard-format user-space buffer to
			
 
				+ * kernel XSAVES format and copy to the target thread. This is called from
			
 
				+ * xstateregs_set(), as well as potentially from the sigreturn() and
			
 
				+ * rt_sigreturn() system calls.
			
 
				+ */
			
 
				+int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
			
 
				+{
			
 
				+	unsigned int offset, size;
			
 
				+	int i;
			
 
				+	struct xstate_header hdr;
			
 
				+
			
 
				+	offset = offsetof(struct xregs_state, header);
			
 
				+	size = sizeof(hdr);
			
 
				+
			
 
				+	if (__copy_from_user(&hdr, ubuf + offset, size))
			
 
				+		return -EFAULT;
			
 
				+
			
 
				+	if (validate_xstate_header(&hdr))
			
 
				 		return -EINVAL;
			
 
				 
			
 
				 	for (i = 0; i < XFEATURE_MAX; i++) {
			
 
				 		u64 mask = ((u64)1 << i);
			
 
				 
			
 
				-		if (xfeatures & mask) {
			
 
				+		if (hdr.xfeatures & mask) {
			
 
				 			void *dst = __raw_xsave_addr(xsave, 1 << i);
			
 
				 
			
 
				 			offset = xstate_offsets[i];
			
 
				 			size = xstate_sizes[i];
			
 
				 
			
 
				-			if (kbuf) {
			
 
				-				memcpy(dst, kbuf + offset, size);
			
 
				-			} else {
			
 
				-				if (__copy_from_user(dst, ubuf + offset, size))
			
 
				-					return -EFAULT;
			
 
				-			}
			
 
				+			if (__copy_from_user(dst, ubuf + offset, size))
			
 
				+				return -EFAULT;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
			
 
				+		offset = offsetof(struct fxregs_state, mxcsr);
			
 
				+		size = MXCSR_AND_FLAGS_SIZE;
			
 
				+		if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
			
 
				+			return -EFAULT;
			
 
				+	}
			
 
				+
			
 
				 	/*
			
 
				 	 * The state that came in from userspace was user-state only.
			
 
				 	 * Mask all the user states out of 'xfeatures':
			
@@ -1076,7 +1238,7 @@ int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
 
				 	/*
			
 
				 	 * Add back in the features that came in from userspace:
			
 
				 	 */
			
 
				-	xsave->header.xfeatures |= xfeatures;
			
 
				+	xsave->header.xfeatures |= hdr.xfeatures;
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -64,7 +64,7 @@ static void call_on_stack(void *func, void *stack)
 
				 
			
 
				 static inline void *current_stack(void)
			
 
				 {
			
 
				-	return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
			
 
				+	return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
			
 
				 }
			
 
				 
			
 
				 static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
			
@@ -88,7 +88,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
 
				 
			
 
				 	/* Save the next esp at the bottom of the stack */
			
 
				 	prev_esp = (u32 *)irqstk;
			
 
				-	*prev_esp = current_stack_pointer();
			
 
				+	*prev_esp = current_stack_pointer;
			
 
				 
			
 
				 	if (unlikely(overflow))
			
 
				 		call_on_stack(print_stack_overflow, isp);
			
@@ -139,7 +139,7 @@ void do_softirq_own_stack(void)
 
				 
			
 
				 	/* Push the previous esp onto the stack */
			
 
				 	prev_esp = (u32 *)irqstk;
			
 
				-	*prev_esp = current_stack_pointer();
			
 
				+	*prev_esp = current_stack_pointer;
			
 
				 
			
 
				 	call_on_stack(__do_softirq, isp);
			
 
				 }
			
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -299,7 +299,7 @@ static int __init create_setup_data_nodes(struct kobject *parent)
 
				 	return 0;
			
 
				 
			
 
				 out_clean_nodes:
			
 
				-	for (j = i - 1; j > 0; j--)
			
 
				+	for (j = i - 1; j >= 0; j--)
			
 
				 		cleanup_setup_data_node(*(kobjp + j));
			
 
				 	kfree(kobjp);
			
 
				 out_setup_data_kobj:
			
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -140,7 +140,8 @@ void kvm_async_pf_task_wait(u32 token)
 
				 
			
 
				 	n.token = token;
			
 
				 	n.cpu = smp_processor_id();
			
 
				-	n.halted = is_idle_task(current) || preempt_count() > 1;
			
 
				+	n.halted = is_idle_task(current) || preempt_count() > 1 ||
			
 
				+		   rcu_preempt_depth();
			
 
				 	init_swait_queue_head(&n.wq);
			
 
				 	hlist_add_head(&n.link, &b->list);
			
 
				 	raw_spin_unlock(&b->lock);
			
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -263,7 +263,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 
				 		sp = (unsigned long) ka->sa.sa_restorer;
			
 
				 	}
			
 
				 
			
 
				-	if (fpu->fpstate_active) {
			
 
				+	if (fpu->initialized) {
			
 
				 		sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
			
 
				 					  &buf_fx, &math_size);
			
 
				 		*fpstate = (void __user *)sp;
			
@@ -279,7 +279,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 
				 		return (void __user *)-1L;
			
 
				 
			
 
				 	/* save i387 and extended state */
			
 
				-	if (fpu->fpstate_active &&
			
 
				+	if (fpu->initialized &&
			
 
				 	    copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0)
			
 
				 		return (void __user *)-1L;
			
 
				 
			
@@ -755,7 +755,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 
				 		/*
			
 
				 		 * Ensure the signal handler starts with the new fpu state.
			
 
				 		 */
			
 
				-		if (fpu->fpstate_active)
			
 
				+		if (fpu->initialized)
			
 
				 			fpu__clear(fpu);
			
 
				 	}
			
 
				 	signal_setup_done(failed, ksig, stepping);
			
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -142,7 +142,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
 
				 	 * from double_fault.
			
 
				 	 */
			
 
				 	BUG_ON((unsigned long)(current_top_of_stack() -
			
 
				-			       current_stack_pointer()) >= THREAD_SIZE);
			
 
				+			       current_stack_pointer) >= THREAD_SIZE);
			
 
				 
			
 
				 	preempt_enable_no_resched();
			
 
				 }
			
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -200,6 +200,8 @@ struct loaded_vmcs {
 
				 	int cpu;
			
 
				 	bool launched;
			
 
				 	bool nmi_known_unmasked;
			
 
				+	unsigned long vmcs_host_cr3;	/* May not match real cr3 */
			
 
				+	unsigned long vmcs_host_cr4;	/* May not match real cr4 */
			
 
				 	struct list_head loaded_vmcss_on_cpu_link;
			
 
				 };
			
 
				 
			
@@ -600,8 +602,6 @@ struct vcpu_vmx {
 
				 		int           gs_ldt_reload_needed;
			
 
				 		int           fs_reload_needed;
			
 
				 		u64           msr_host_bndcfgs;
			
 
				-		unsigned long vmcs_host_cr3;	/* May not match real cr3 */
			
 
				-		unsigned long vmcs_host_cr4;	/* May not match real cr4 */
			
 
				 	} host_state;
			
 
				 	struct {
			
 
				 		int vm86_active;
			
@@ -2202,46 +2202,44 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 
				 	struct pi_desc old, new;
			
 
				 	unsigned int dest;
			
 
				 
			
 
				-	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
			
 
				-		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
			
 
				-		!kvm_vcpu_apicv_active(vcpu))
			
 
				+	/*
			
 
				+	 * In case of hot-plug or hot-unplug, we may have to undo
			
 
				+	 * vmx_vcpu_pi_put even if there is no assigned device.  And we
			
 
				+	 * always keep PI.NDST up to date for simplicity: it makes the
			
 
				+	 * code easier, and CPU migration is not a fast path.
			
 
				+	 */
			
 
				+	if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
			
 
				 		return;
			
 
				 
			
 
				+	/*
			
 
				+	 * First handle the simple case where no cmpxchg is necessary; just
			
 
				+	 * allow posting non-urgent interrupts.
			
 
				+	 *
			
 
				+	 * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
			
 
				+	 * PI.NDST: pi_post_block will do it for us and the wakeup_handler
			
 
				+	 * expects the VCPU to be on the blocked_vcpu_list that matches
			
 
				+	 * PI.NDST.
			
 
				+	 */
			
 
				+	if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
			
 
				+	    vcpu->cpu == cpu) {
			
 
				+		pi_clear_sn(pi_desc);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/* The full case.  */
			
 
				 	do {
			
 
				 		old.control = new.control = pi_desc->control;
			
 
				 
			
 
				-		/*
			
 
				-		 * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
			
 
				-		 * are two possible cases:
			
 
				-		 * 1. After running 'pre_block', context switch
			
 
				-		 *    happened. For this case, 'sn' was set in
			
 
				-		 *    vmx_vcpu_put(), so we need to clear it here.
			
 
				-		 * 2. After running 'pre_block', we were blocked,
			
 
				-		 *    and woken up by some other guy. For this case,
			
 
				-		 *    we don't need to do anything, 'pi_post_block'
			
 
				-		 *    will do everything for us. However, we cannot
			
 
				-		 *    check whether it is case #1 or case #2 here
			
 
				-		 *    (maybe, not needed), so we also clear sn here,
			
 
				-		 *    I think it is not a big deal.
			
 
				-		 */
			
 
				-		if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
			
 
				-			if (vcpu->cpu != cpu) {
			
 
				-				dest = cpu_physical_id(cpu);
			
 
				-
			
 
				-				if (x2apic_enabled())
			
 
				-					new.ndst = dest;
			
 
				-				else
			
 
				-					new.ndst = (dest << 8) & 0xFF00;
			
 
				-			}
			
 
				+		dest = cpu_physical_id(cpu);
			
 
				 
			
 
				-			/* set 'NV' to 'notification vector' */
			
 
				-			new.nv = POSTED_INTR_VECTOR;
			
 
				-		}
			
 
				+		if (x2apic_enabled())
			
 
				+			new.ndst = dest;
			
 
				+		else
			
 
				+			new.ndst = (dest << 8) & 0xFF00;
			
 
				 
			
 
				-		/* Allow posting non-urgent interrupts */
			
 
				 		new.sn = 0;
			
 
				-	} while (cmpxchg(&pi_desc->control, old.control,
			
 
				-			new.control) != old.control);
			
 
				+	} while (cmpxchg64(&pi_desc->control, old.control,
			
 
				+			   new.control) != old.control);
			
 
				 }
			
 
				 
			
 
				 static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
			
@@ -5178,12 +5176,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 
				 	 */
			
 
				 	cr3 = __read_cr3();
			
 
				 	vmcs_writel(HOST_CR3, cr3);		/* 22.2.3  FIXME: shadow tables */
			
 
				-	vmx->host_state.vmcs_host_cr3 = cr3;
			
 
				+	vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
			
 
				 
			
 
				 	/* Save the most likely value for this task's CR4 in the VMCS. */
			
 
				 	cr4 = cr4_read_shadow();
			
 
				 	vmcs_writel(HOST_CR4, cr4);			/* 22.2.3, 22.2.5 */
			
 
				-	vmx->host_state.vmcs_host_cr4 = cr4;
			
 
				+	vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
			
 
				 
			
 
				 	vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
			
 
				 #ifdef CONFIG_X86_64
			
@@ -9273,15 +9271,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
				 		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
			
 
				 
			
 
				 	cr3 = __get_current_cr3_fast();
			
 
				-	if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) {
			
 
				+	if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) {
			
 
				 		vmcs_writel(HOST_CR3, cr3);
			
 
				-		vmx->host_state.vmcs_host_cr3 = cr3;
			
 
				+		vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
			
 
				 	}
			
 
				 
			
 
				 	cr4 = cr4_read_shadow();
			
 
				-	if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
			
 
				+	if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) {
			
 
				 		vmcs_writel(HOST_CR4, cr4);
			
 
				-		vmx->host_state.vmcs_host_cr4 = cr4;
			
 
				+		vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
			
 
				 	}
			
 
				 
			
 
				 	/* When single-stepping over STI and MOV SS, we must clear the
			
@@ -9591,6 +9589,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
				 
			
 
				 	vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
			
 
				 
			
 
				+	/*
			
 
				+	 * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
			
 
				+	 * or POSTED_INTR_WAKEUP_VECTOR.
			
 
				+	 */
			
 
				+	vmx->pi_desc.nv = POSTED_INTR_VECTOR;
			
 
				+	vmx->pi_desc.sn = 1;
			
 
				+
			
 
				 	return &vmx->vcpu;
			
 
				 
			
 
				 free_vmcs:
			
@@ -9839,7 +9844,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
 
				 
			
 
				 	WARN_ON(!is_guest_mode(vcpu));
			
 
				 
			
 
				-	if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) {
			
 
				+	if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
			
 
				+		!to_vmx(vcpu)->nested.nested_run_pending) {
			
 
				 		vmcs12->vm_exit_intr_error_code = fault->error_code;
			
 
				 		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
			
 
				 				  PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
			
@@ -11704,6 +11710,37 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
 
				 	kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
			
 
				 }
			
 
				 
			
 
				+static void __pi_post_block(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
			
 
				+	struct pi_desc old, new;
			
 
				+	unsigned int dest;
			
 
				+
			
 
				+	do {
			
 
				+		old.control = new.control = pi_desc->control;
			
 
				+		WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
			
 
				+		     "Wakeup handler not enabled while the VCPU is blocked\n");
			
 
				+
			
 
				+		dest = cpu_physical_id(vcpu->cpu);
			
 
				+
			
 
				+		if (x2apic_enabled())
			
 
				+			new.ndst = dest;
			
 
				+		else
			
 
				+			new.ndst = (dest << 8) & 0xFF00;
			
 
				+
			
 
				+		/* set 'NV' to 'notification vector' */
			
 
				+		new.nv = POSTED_INTR_VECTOR;
			
 
				+	} while (cmpxchg64(&pi_desc->control, old.control,
			
 
				+			   new.control) != old.control);
			
 
				+
			
 
				+	if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
			
 
				+		spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
			
 
				+		list_del(&vcpu->blocked_vcpu_list);
			
 
				+		spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
			
 
				+		vcpu->pre_pcpu = -1;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * This routine does the following things for vCPU which is going
			
 
				  * to be blocked if VT-d PI is enabled.
			
@@ -11719,7 +11756,6 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
 
				  */
			
 
				 static int pi_pre_block(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	unsigned long flags;
			
 
				 	unsigned int dest;
			
 
				 	struct pi_desc old, new;
			
 
				 	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
			
@@ -11729,34 +11765,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
 
				 		!kvm_vcpu_apicv_active(vcpu))
			
 
				 		return 0;
			
 
				 
			
 
				-	vcpu->pre_pcpu = vcpu->cpu;
			
 
				-	spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
			
 
				-			  vcpu->pre_pcpu), flags);
			
 
				-	list_add_tail(&vcpu->blocked_vcpu_list,
			
 
				-		      &per_cpu(blocked_vcpu_on_cpu,
			
 
				-		      vcpu->pre_pcpu));
			
 
				-	spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
			
 
				-			       vcpu->pre_pcpu), flags);
			
 
				+	WARN_ON(irqs_disabled());
			
 
				+	local_irq_disable();
			
 
				+	if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
			
 
				+		vcpu->pre_pcpu = vcpu->cpu;
			
 
				+		spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
			
 
				+		list_add_tail(&vcpu->blocked_vcpu_list,
			
 
				+			      &per_cpu(blocked_vcpu_on_cpu,
			
 
				+				       vcpu->pre_pcpu));
			
 
				+		spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
			
 
				+	}
			
 
				 
			
 
				 	do {
			
 
				 		old.control = new.control = pi_desc->control;
			
 
				 
			
 
				-		/*
			
 
				-		 * We should not block the vCPU if
			
 
				-		 * an interrupt is posted for it.
			
 
				-		 */
			
 
				-		if (pi_test_on(pi_desc) == 1) {
			
 
				-			spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
			
 
				-					  vcpu->pre_pcpu), flags);
			
 
				-			list_del(&vcpu->blocked_vcpu_list);
			
 
				-			spin_unlock_irqrestore(
			
 
				-					&per_cpu(blocked_vcpu_on_cpu_lock,
			
 
				-					vcpu->pre_pcpu), flags);
			
 
				-			vcpu->pre_pcpu = -1;
			
 
				-
			
 
				-			return 1;
			
 
				-		}
			
 
				-
			
 
				 		WARN((pi_desc->sn == 1),
			
 
				 		     "Warning: SN field of posted-interrupts "
			
 
				 		     "is set before blocking\n");
			
@@ -11778,10 +11800,15 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
 
				 
			
 
				 		/* set 'NV' to 'wakeup vector' */
			
 
				 		new.nv = POSTED_INTR_WAKEUP_VECTOR;
			
 
				-	} while (cmpxchg(&pi_desc->control, old.control,
			
 
				-			new.control) != old.control);
			
 
				+	} while (cmpxchg64(&pi_desc->control, old.control,
			
 
				+			   new.control) != old.control);
			
 
				 
			
 
				-	return 0;
			
 
				+	/* We should not block the vCPU if an interrupt is posted for it.  */
			
 
				+	if (pi_test_on(pi_desc) == 1)
			
 
				+		__pi_post_block(vcpu);
			
 
				+
			
 
				+	local_irq_enable();
			
 
				+	return (vcpu->pre_pcpu == -1);
			
 
				 }
			
 
				 
			
 
				 static int vmx_pre_block(struct kvm_vcpu *vcpu)
			
@@ -11797,44 +11824,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
 
				 
			
 
				 static void pi_post_block(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
			
 
				-	struct pi_desc old, new;
			
 
				-	unsigned int dest;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
			
 
				-		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
			
 
				-		!kvm_vcpu_apicv_active(vcpu))
			
 
				+	if (vcpu->pre_pcpu == -1)
			
 
				 		return;
			
 
				 
			
 
				-	do {
			
 
				-		old.control = new.control = pi_desc->control;
			
 
				-
			
 
				-		dest = cpu_physical_id(vcpu->cpu);
			
 
				-
			
 
				-		if (x2apic_enabled())
			
 
				-			new.ndst = dest;
			
 
				-		else
			
 
				-			new.ndst = (dest << 8) & 0xFF00;
			
 
				-
			
 
				-		/* Allow posting non-urgent interrupts */
			
 
				-		new.sn = 0;
			
 
				-
			
 
				-		/* set 'NV' to 'notification vector' */
			
 
				-		new.nv = POSTED_INTR_VECTOR;
			
 
				-	} while (cmpxchg(&pi_desc->control, old.control,
			
 
				-			new.control) != old.control);
			
 
				-
			
 
				-	if(vcpu->pre_pcpu != -1) {
			
 
				-		spin_lock_irqsave(
			
 
				-			&per_cpu(blocked_vcpu_on_cpu_lock,
			
 
				-			vcpu->pre_pcpu), flags);
			
 
				-		list_del(&vcpu->blocked_vcpu_list);
			
 
				-		spin_unlock_irqrestore(
			
 
				-			&per_cpu(blocked_vcpu_on_cpu_lock,
			
 
				-			vcpu->pre_pcpu), flags);
			
 
				-		vcpu->pre_pcpu = -1;
			
 
				-	}
			
 
				+	WARN_ON(irqs_disabled());
			
 
				+	local_irq_disable();
			
 
				+	__pi_post_block(vcpu);
			
 
				+	local_irq_enable();
			
 
				 }
			
 
				 
			
 
				 static void vmx_post_block(struct kvm_vcpu *vcpu)
			
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7225,7 +7225,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	int r;
			
 
				 	sigset_t sigsaved;
			
 
				 
			
 
				-	fpu__activate_curr(fpu);
			
 
				+	fpu__initialize(fpu);
			
 
				 
			
 
				 	if (vcpu->sigset_active)
			
 
				 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
			
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -114,7 +114,7 @@ void math_emulate(struct math_emu_info *info)
 
				 	struct desc_struct code_descriptor;
			
 
				 	struct fpu *fpu = &current->thread.fpu;
			
 
				 
			
 
				-	fpu__activate_curr(fpu);
			
 
				+	fpu__initialize(fpu);
			
 
				 
			
 
				 #ifdef RE_ENTRANT_CHECKING
			
 
				 	if (emulating) {
			
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -2,6 +2,7 @@
 
				 #include <linux/uaccess.h>
			
 
				 #include <linux/sched/debug.h>
			
 
				 
			
 
				+#include <asm/fpu/internal.h>
			
 
				 #include <asm/traps.h>
			
 
				 #include <asm/kdebug.h>
			
 
				 
			
@@ -78,6 +79,29 @@ bool ex_handler_refcount(const struct exception_table_entry *fixup,
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(ex_handler_refcount);
			
 
				 
			
 
				+/*
			
 
				+ * Handler for when we fail to restore a task's FPU state.  We should never get
			
 
				+ * here because the FPU state of a task using the FPU (task->thread.fpu.state)
			
 
				+ * should always be valid.  However, past bugs have allowed userspace to set
			
 
				+ * reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn().
			
 
				+ * These caused XRSTOR to fail when switching to the task, leaking the FPU
			
 
				+ * registers of the task previously executing on the CPU.  Mitigate this class
			
 
				+ * of vulnerability by restoring from the initial state (essentially, zeroing
			
 
				+ * out all the FPU registers) if we can't restore from the task's FPU state.
			
 
				+ */
			
 
				+bool ex_handler_fprestore(const struct exception_table_entry *fixup,
			
 
				+			  struct pt_regs *regs, int trapnr)
			
 
				+{
			
 
				+	regs->ip = ex_fixup_addr(fixup);
			
 
				+
			
 
				+	WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
			
 
				+		  (void *)instruction_pointer(regs));
			
 
				+
			
 
				+	__copy_kernel_to_fpregs(&init_fpstate, -1);
			
 
				+	return true;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(ex_handler_fprestore);
			
 
				+
			
 
				 bool ex_handler_ext(const struct exception_table_entry *fixup,
			
 
				 		   struct pt_regs *regs, int trapnr)
			
 
				 {
			
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -192,8 +192,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
 
				  * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
			
 
				  *	     faulted on a pte with its pkey=4.
			
 
				  */
			
 
				-static void fill_sig_info_pkey(int si_code, siginfo_t *info,
			
 
				-		struct vm_area_struct *vma)
			
 
				+static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
			
 
				 {
			
 
				 	/* This is effectively an #ifdef */
			
 
				 	if (!boot_cpu_has(X86_FEATURE_OSPKE))
			
@@ -209,7 +208,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
 
				 	 * valid VMA, so we should never reach this without a
			
 
				 	 * valid VMA.
			
 
				 	 */
			
 
				-	if (!vma) {
			
 
				+	if (!pkey) {
			
 
				 		WARN_ONCE(1, "PKU fault with no VMA passed in");
			
 
				 		info->si_pkey = 0;
			
 
				 		return;
			
@@ -219,13 +218,12 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
 
				 	 * absolutely guranteed to be 100% accurate because of
			
 
				 	 * the race explained above.
			
 
				 	 */
			
 
				-	info->si_pkey = vma_pkey(vma);
			
 
				+	info->si_pkey = *pkey;
			
 
				 }
			
 
				 
			
 
				 static void
			
 
				 force_sig_info_fault(int si_signo, int si_code, unsigned long address,
			
 
				-		     struct task_struct *tsk, struct vm_area_struct *vma,
			
 
				-		     int fault)
			
 
				+		     struct task_struct *tsk, u32 *pkey, int fault)
			
 
				 {
			
 
				 	unsigned lsb = 0;
			
 
				 	siginfo_t info;
			
@@ -240,7 +238,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
 
				 		lsb = PAGE_SHIFT;
			
 
				 	info.si_addr_lsb = lsb;
			
 
				 
			
 
				-	fill_sig_info_pkey(si_code, &info, vma);
			
 
				+	fill_sig_info_pkey(si_code, &info, pkey);
			
 
				 
			
 
				 	force_sig_info(si_signo, &info, tsk);
			
 
				 }
			
@@ -762,8 +760,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 
				 	struct task_struct *tsk = current;
			
 
				 	unsigned long flags;
			
 
				 	int sig;
			
 
				-	/* No context means no VMA to pass down */
			
 
				-	struct vm_area_struct *vma = NULL;
			
 
				 
			
 
				 	/* Are we prepared to handle this kernel fault? */
			
 
				 	if (fixup_exception(regs, X86_TRAP_PF)) {
			
@@ -788,7 +784,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 
				 
			
 
				 			/* XXX: hwpoison faults will set the wrong code. */
			
 
				 			force_sig_info_fault(signal, si_code, address,
			
 
				-					     tsk, vma, 0);
			
 
				+					     tsk, NULL, 0);
			
 
				 		}
			
 
				 
			
 
				 		/*
			
@@ -896,8 +892,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
 
				 
			
 
				 static void
			
 
				 __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
			
 
				-		       unsigned long address, struct vm_area_struct *vma,
			
 
				-		       int si_code)
			
 
				+		       unsigned long address, u32 *pkey, int si_code)
			
 
				 {
			
 
				 	struct task_struct *tsk = current;
			
 
				 
			
@@ -945,7 +940,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 
				 		tsk->thread.error_code	= error_code;
			
 
				 		tsk->thread.trap_nr	= X86_TRAP_PF;
			
 
				 
			
 
				-		force_sig_info_fault(SIGSEGV, si_code, address, tsk, vma, 0);
			
 
				+		force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0);
			
 
				 
			
 
				 		return;
			
 
				 	}
			
@@ -958,9 +953,9 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 
				 
			
 
				 static noinline void
			
 
				 bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
			
 
				-		     unsigned long address, struct vm_area_struct *vma)
			
 
				+		     unsigned long address, u32 *pkey)
			
 
				 {
			
 
				-	__bad_area_nosemaphore(regs, error_code, address, vma, SEGV_MAPERR);
			
 
				+	__bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR);
			
 
				 }
			
 
				 
			
 
				 static void
			
@@ -968,6 +963,10 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
 
				 	   unsigned long address,  struct vm_area_struct *vma, int si_code)
			
 
				 {
			
 
				 	struct mm_struct *mm = current->mm;
			
 
				+	u32 pkey;
			
 
				+
			
 
				+	if (vma)
			
 
				+		pkey = vma_pkey(vma);
			
 
				 
			
 
				 	/*
			
 
				 	 * Something tried to access memory that isn't in our memory map..
			
@@ -975,7 +974,8 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
 
				 	 */
			
 
				 	up_read(&mm->mmap_sem);
			
 
				 
			
 
				-	__bad_area_nosemaphore(regs, error_code, address, vma, si_code);
			
 
				+	__bad_area_nosemaphore(regs, error_code, address,
			
 
				+			       (vma) ? &pkey : NULL, si_code);
			
 
				 }
			
 
				 
			
 
				 static noinline void
			
@@ -1018,7 +1018,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
 
				 
			
 
				 static void
			
 
				 do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
			
 
				-	  struct vm_area_struct *vma, unsigned int fault)
			
 
				+	  u32 *pkey, unsigned int fault)
			
 
				 {
			
 
				 	struct task_struct *tsk = current;
			
 
				 	int code = BUS_ADRERR;
			
@@ -1045,13 +1045,12 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
 
				 		code = BUS_MCEERR_AR;
			
 
				 	}
			
 
				 #endif
			
 
				-	force_sig_info_fault(SIGBUS, code, address, tsk, vma, fault);
			
 
				+	force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault);
			
 
				 }
			
 
				 
			
 
				 static noinline void
			
 
				 mm_fault_error(struct pt_regs *regs, unsigned long error_code,
			
 
				-	       unsigned long address, struct vm_area_struct *vma,
			
 
				-	       unsigned int fault)
			
 
				+	       unsigned long address, u32 *pkey, unsigned int fault)
			
 
				 {
			
 
				 	if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
			
 
				 		no_context(regs, error_code, address, 0, 0);
			
@@ -1075,9 +1074,9 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 
				 	} else {
			
 
				 		if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
			
 
				 			     VM_FAULT_HWPOISON_LARGE))
			
 
				-			do_sigbus(regs, error_code, address, vma, fault);
			
 
				+			do_sigbus(regs, error_code, address, pkey, fault);
			
 
				 		else if (fault & VM_FAULT_SIGSEGV)
			
 
				-			bad_area_nosemaphore(regs, error_code, address, vma);
			
 
				+			bad_area_nosemaphore(regs, error_code, address, pkey);
			
 
				 		else
			
 
				 			BUG();
			
 
				 	}
			
@@ -1267,6 +1266,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 
				 	struct mm_struct *mm;
			
 
				 	int fault, major = 0;
			
 
				 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
			
 
				+	u32 pkey;
			
 
				 
			
 
				 	tsk = current;
			
 
				 	mm = tsk->mm;
			
@@ -1467,9 +1467,10 @@ good_area:
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				+	pkey = vma_pkey(vma);
			
 
				 	up_read(&mm->mmap_sem);
			
 
				 	if (unlikely(fault & VM_FAULT_ERROR)) {
			
 
				-		mm_fault_error(regs, error_code, address, vma, fault);
			
 
				+		mm_fault_error(regs, error_code, address, &pkey, fault);
			
 
				 		return;
			
 
				 	}
			
 
				 
			
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -10,6 +10,8 @@
 
				  * published by the Free Software Foundation.
			
 
				  */
			
 
				 
			
 
				+#define DISABLE_BRANCH_PROFILING
			
 
				+
			
 
				 #include <linux/linkage.h>
			
 
				 #include <linux/init.h>
			
 
				 #include <linux/mm.h>
			
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -18,7 +18,6 @@
 
				 
			
 
				 #include <asm/cpufeature.h>             /* boot_cpu_has, ...            */
			
 
				 #include <asm/mmu_context.h>            /* vma_pkey()                   */
			
 
				-#include <asm/fpu/internal.h>           /* fpregs_active()              */
			
 
				 
			
 
				 int __execute_only_pkey(struct mm_struct *mm)
			
 
				 {
			
@@ -45,7 +44,7 @@ int __execute_only_pkey(struct mm_struct *mm)
 
				 	 */
			
 
				 	preempt_disable();
			
 
				 	if (!need_to_set_mm_pkey &&
			
 
				-	    fpregs_active() &&
			
 
				+	    current->thread.fpu.initialized &&
			
 
				 	    !__pkru_allows_read(read_pkru(), execute_only_pkey)) {
			
 
				 		preempt_enable();
			
 
				 		return execute_only_pkey;
			
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -191,7 +191,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
				 			 * mapped in the new pgd, we'll double-fault.  Forcibly
			
 
				 			 * map it.
			
 
				 			 */
			
 
				-			unsigned int index = pgd_index(current_stack_pointer());
			
 
				+			unsigned int index = pgd_index(current_stack_pointer);
			
 
				 			pgd_t *pgd = next->pgd + index;
			
 
				 
			
 
				 			if (unlikely(pgd_none(*pgd)))
			
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1238,21 +1238,16 @@ static void __init xen_pagetable_cleanhighmap(void)
 
				 	 * from _brk_limit way up to the max_pfn_mapped (which is the end of
			
 
				 	 * the ramdisk). We continue on, erasing PMD entries that point to page
			
 
				 	 * tables - do note that they are accessible at this stage via __va.
			
 
				-	 * For good measure we also round up to the PMD - which means that if
			
 
				+	 * As Xen is aligning the memory end to a 4MB boundary, for good
			
 
				+	 * measure we also round up to PMD_SIZE * 2 - which means that if
			
 
				 	 * anybody is using __ka address to the initial boot-stack - and try
			
 
				 	 * to use it - they are going to crash. The xen_start_info has been
			
 
				 	 * taken care of already in xen_setup_kernel_pagetable. */
			
 
				 	addr = xen_start_info->pt_base;
			
 
				-	size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE);
			
 
				+	size = xen_start_info->nr_pt_frames * PAGE_SIZE;
			
 
				 
			
 
				-	xen_cleanhighmap(addr, addr + size);
			
 
				+	xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2));
			
 
				 	xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
			
 
				-#ifdef DEBUG
			
 
				-	/* This is superfluous and is not necessary, but you know what
			
 
				-	 * lets do it. The MODULES_VADDR -> MODULES_END should be clear of
			
 
				-	 * anything at this stage. */
			
 
				-	xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
			
 
				-#endif
			
 
				 }
			
 
				 #endif
			
 
				 
			
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -854,6 +854,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 
				 
			
 
				 	kobject_init(&q->kobj, &blk_queue_ktype);
			
 
				 
			
 
				+#ifdef CONFIG_BLK_DEV_IO_TRACE
			
 
				+	mutex_init(&q->blk_trace_mutex);
			
 
				+#endif
			
 
				 	mutex_init(&q->sysfs_lock);
			
 
				 	spin_lock_init(&q->__queue_lock);
			
 
				 
			
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -154,7 +154,6 @@ static int bsg_prepare_job(struct device *dev, struct request *req)
 
				 failjob_rls_rqst_payload:
			
 
				 	kfree(job->request_payload.sg_list);
			
 
				 failjob_rls_job:
			
 
				-	kfree(job);
			
 
				 	return -ENOMEM;
			
 
				 }
			
 
				 
			
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -112,7 +112,7 @@ ssize_t part_stat_show(struct device *dev,
 
				 		       struct device_attribute *attr, char *buf)
			
 
				 {
			
 
				 	struct hd_struct *p = dev_to_part(dev);
			
 
				-	struct request_queue *q = dev_to_disk(dev)->queue;
			
 
				+	struct request_queue *q = part_to_disk(p)->queue;
			
 
				 	unsigned int inflight[2];
			
 
				 	int cpu;
			
 
				 
			
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -743,17 +743,19 @@ static int ghes_proc(struct ghes *ghes)
 
				 	}
			
 
				 	ghes_do_proc(ghes, ghes->estatus);
			
 
				 
			
 
				+out:
			
 
				+	ghes_clear_estatus(ghes);
			
 
				+
			
 
				+	if (rc == -ENOENT)
			
 
				+		return rc;
			
 
				+
			
 
				 	/*
			
 
				 	 * GHESv2 type HEST entries introduce support for error acknowledgment,
			
 
				 	 * so only acknowledge the error if this support is present.
			
 
				 	 */
			
 
				-	if (is_hest_type_generic_v2(ghes)) {
			
 
				-		rc = ghes_ack_error(ghes->generic_v2);
			
 
				-		if (rc)
			
 
				-			return rc;
			
 
				-	}
			
 
				-out:
			
 
				-	ghes_clear_estatus(ghes);
			
 
				+	if (is_hest_type_generic_v2(ghes))
			
 
				+		return ghes_ack_error(ghes->generic_v2);
			
 
				+
			
 
				 	return rc;
			
 
				 }
			
 
				 
			
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -1581,6 +1581,9 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
 
				 
			
 
				 	opp->available = availability_req;
			
 
				 
			
 
				+	dev_pm_opp_get(opp);
			
 
				+	mutex_unlock(&opp_table->lock);
			
 
				+
			
 
				 	/* Notify the change of the OPP availability */
			
 
				 	if (availability_req)
			
 
				 		blocking_notifier_call_chain(&opp_table->head, OPP_EVENT_ENABLE,
			
@@ -1589,8 +1592,12 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
 
				 		blocking_notifier_call_chain(&opp_table->head,
			
 
				 					     OPP_EVENT_DISABLE, opp);
			
 
				 
			
 
				+	dev_pm_opp_put(opp);
			
 
				+	goto put_table;
			
 
				+
			
 
				 unlock:
			
 
				 	mutex_unlock(&opp_table->lock);
			
 
				+put_table:
			
 
				 	dev_pm_opp_put_opp_table(opp_table);
			
 
				 	return r;
			
 
				 }
			
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -342,7 +342,7 @@ static long __brd_direct_access(struct brd_device *brd, pgoff_t pgoff,
 
				 
			
 
				 	if (!brd)
			
 
				 		return -ENODEV;
			
 
				-	page = brd_insert_page(brd, PFN_PHYS(pgoff) / 512);
			
 
				+	page = brd_insert_page(brd, (sector_t)pgoff << PAGE_SECTORS_SHIFT);
			
 
				 	if (!page)
			
 
				 		return -ENOSPC;
			
 
				 	*kaddr = page_address(page);
			
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -67,10 +67,8 @@ struct loop_device {
 
				 struct loop_cmd {
			
 
				 	struct kthread_work work;
			
 
				 	struct request *rq;
			
 
				-	union {
			
 
				-		bool use_aio; /* use AIO interface to handle I/O */
			
 
				-		atomic_t ref; /* only for aio */
			
 
				-	};
			
 
				+	bool use_aio; /* use AIO interface to handle I/O */
			
 
				+	atomic_t ref; /* only for aio */
			
 
				 	long ret;
			
 
				 	struct kiocb iocb;
			
 
				 	struct bio_vec *bvec;
			
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1194,6 +1194,12 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
 
				 	if (!capable(CAP_SYS_ADMIN))
			
 
				 		return -EPERM;
			
 
				 
			
 
				+	/* The block layer will pass back some non-nbd ioctls in case we have
			
 
				+	 * special handling for them, but we don't so just return an error.
			
 
				+	 */
			
 
				+	if (_IOC_TYPE(cmd) != 0xab)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				 	mutex_lock(&nbd->config_lock);
			
 
				 
			
 
				 	/* Don't allow ioctl operations on a nbd device that was created with
			
--- a/drivers/clocksource/numachip.c
+++ b/drivers/clocksource/numachip.c
@@ -43,7 +43,7 @@ static int numachip2_set_next_event(unsigned long delta, struct clock_event_devi
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static struct clock_event_device numachip2_clockevent = {
			
 
				+static const struct clock_event_device numachip2_clockevent __initconst = {
			
 
				 	.name            = "numachip2",
			
 
				 	.rating          = 400,
			
 
				 	.set_next_event  = numachip2_set_next_event,
			
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -118,6 +118,10 @@ static const struct of_device_id blacklist[] __initconst = {
 
				 
			
 
				 	{ .compatible = "sigma,tango4", },
			
 
				 
			
 
				+	{ .compatible = "ti,am33xx", },
			
 
				+	{ .compatible = "ti,am43", },
			
 
				+	{ .compatible = "ti,dra7", },
			
 
				+
			
 
				 	{ }
			
 
				 };
			
 
				 
			
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -625,7 +625,7 @@ EXPORT_SYMBOL_GPL(dma_buf_detach);
 
				 struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
			
 
				 					enum dma_data_direction direction)
			
 
				 {
			
 
				-	struct sg_table *sg_table = ERR_PTR(-EINVAL);
			
 
				+	struct sg_table *sg_table;
			
 
				 
			
 
				 	might_sleep();
			
 
				 
			
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -266,8 +266,7 @@ EXPORT_SYMBOL(reservation_object_add_excl_fence);
 
				 * @dst: the destination reservation object
			
 
				 * @src: the source reservation object
			
 
				 *
			
 
				-* Copy all fences from src to dst. Both src->lock as well as dst-lock must be
			
 
				-* held.
			
 
				+* Copy all fences from src to dst. dst-lock must be held.
			
 
				 */
			
 
				 int reservation_object_copy_fences(struct reservation_object *dst,
			
 
				 				   struct reservation_object *src)
			
@@ -277,33 +276,62 @@ int reservation_object_copy_fences(struct reservation_object *dst,
 
				 	size_t size;
			
 
				 	unsigned i;
			
 
				 
			
 
				-	src_list = reservation_object_get_list(src);
			
 
				+	rcu_read_lock();
			
 
				+	src_list = rcu_dereference(src->fence);
			
 
				 
			
 
				+retry:
			
 
				 	if (src_list) {
			
 
				-		size = offsetof(typeof(*src_list),
			
 
				-				shared[src_list->shared_count]);
			
 
				+		unsigned shared_count = src_list->shared_count;
			
 
				+
			
 
				+		size = offsetof(typeof(*src_list), shared[shared_count]);
			
 
				+		rcu_read_unlock();
			
 
				+
			
 
				 		dst_list = kmalloc(size, GFP_KERNEL);
			
 
				 		if (!dst_list)
			
 
				 			return -ENOMEM;
			
 
				 
			
 
				-		dst_list->shared_count = src_list->shared_count;
			
 
				-		dst_list->shared_max = src_list->shared_count;
			
 
				-		for (i = 0; i < src_list->shared_count; ++i)
			
 
				-			dst_list->shared[i] =
			
 
				-				dma_fence_get(src_list->shared[i]);
			
 
				+		rcu_read_lock();
			
 
				+		src_list = rcu_dereference(src->fence);
			
 
				+		if (!src_list || src_list->shared_count > shared_count) {
			
 
				+			kfree(dst_list);
			
 
				+			goto retry;
			
 
				+		}
			
 
				+
			
 
				+		dst_list->shared_count = 0;
			
 
				+		dst_list->shared_max = shared_count;
			
 
				+		for (i = 0; i < src_list->shared_count; ++i) {
			
 
				+			struct dma_fence *fence;
			
 
				+
			
 
				+			fence = rcu_dereference(src_list->shared[i]);
			
 
				+			if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
			
 
				+				     &fence->flags))
			
 
				+				continue;
			
 
				+
			
 
				+			if (!dma_fence_get_rcu(fence)) {
			
 
				+				kfree(dst_list);
			
 
				+				src_list = rcu_dereference(src->fence);
			
 
				+				goto retry;
			
 
				+			}
			
 
				+
			
 
				+			if (dma_fence_is_signaled(fence)) {
			
 
				+				dma_fence_put(fence);
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			dst_list->shared[dst_list->shared_count++] = fence;
			
 
				+		}
			
 
				 	} else {
			
 
				 		dst_list = NULL;
			
 
				 	}
			
 
				 
			
 
				+	new = dma_fence_get_rcu_safe(&src->fence_excl);
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				 	kfree(dst->staged);
			
 
				 	dst->staged = NULL;
			
 
				 
			
 
				 	src_list = reservation_object_get_list(dst);
			
 
				-
			
 
				 	old = reservation_object_get_excl(dst);
			
 
				-	new = reservation_object_get_excl(src);
			
 
				-
			
 
				-	dma_fence_get(new);
			
 
				 
			
 
				 	preempt_disable();
			
 
				 	write_seqcount_begin(&dst->seq);
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -121,6 +121,7 @@ extern int amdgpu_cntl_sb_buf_per_se;
 
				 extern int amdgpu_param_buf_per_se;
			
 
				 extern int amdgpu_job_hang_limit;
			
 
				 extern int amdgpu_lbpw;
			
 
				+extern int amdgpu_compute_multipipe;
			
 
				 
			
 
				 #ifdef CONFIG_DRM_AMDGPU_SI
			
 
				 extern int amdgpu_si_support;
			
@@ -1310,6 +1311,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 
				 int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
			
 
				 			struct drm_file *filp);
			
 
				 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
			
 
				+int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
			
 
				+				    struct drm_file *filp);
			
 
				 int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
			
 
				 int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
			
 
				 				struct drm_file *filp);
			
@@ -1524,7 +1527,6 @@ struct amdgpu_device {
 
				 
			
 
				 	/* powerplay */
			
 
				 	struct amd_powerplay		powerplay;
			
 
				-	bool				pp_enabled;
			
 
				 	bool				pp_force_state_enabled;
			
 
				 
			
 
				 	/* dpm */
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -338,6 +338,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 
				 	struct cik_mqd *m;
			
 
				 	uint32_t *mqd_hqd;
			
 
				 	uint32_t reg, wptr_val, data;
			
 
				+	bool valid_wptr = false;
			
 
				 
			
 
				 	m = get_mqd(mqd);
			
 
				 
			
@@ -356,7 +357,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 
				 			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
			
 
				 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
			
 
				 
			
 
				-	if (read_user_wptr(mm, wptr, wptr_val))
			
 
				+	/* read_user_ptr may take the mm->mmap_sem.
			
 
				+	 * release srbm_mutex to avoid circular dependency between
			
 
				+	 * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex.
			
 
				+	 */
			
 
				+	release_queue(kgd);
			
 
				+	valid_wptr = read_user_wptr(mm, wptr, wptr_val);
			
 
				+	acquire_queue(kgd, pipe_id, queue_id);
			
 
				+	if (valid_wptr)
			
 
				 		WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
			
 
				 
			
 
				 	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -292,6 +292,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 
				 	struct vi_mqd *m;
			
 
				 	uint32_t *mqd_hqd;
			
 
				 	uint32_t reg, wptr_val, data;
			
 
				+	bool valid_wptr = false;
			
 
				 
			
 
				 	m = get_mqd(mqd);
			
 
				 
			
@@ -339,7 +340,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 
				 			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
			
 
				 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
			
 
				 
			
 
				-	if (read_user_wptr(mm, wptr, wptr_val))
			
 
				+	/* read_user_ptr may take the mm->mmap_sem.
			
 
				+	 * release srbm_mutex to avoid circular dependency between
			
 
				+	 * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex.
			
 
				+	 */
			
 
				+	release_queue(kgd);
			
 
				+	valid_wptr = read_user_wptr(mm, wptr, wptr_val);
			
 
				+	acquire_queue(kgd, pipe_id, queue_id);
			
 
				+	if (valid_wptr)
			
 
				 		WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
			
 
				 
			
 
				 	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -42,6 +42,28 @@ struct amdgpu_cgs_device {
 
				 	struct amdgpu_device *adev =					\
			
 
				 		((struct amdgpu_cgs_device *)cgs_device)->adev
			
 
				 
			
 
				+static void *amdgpu_cgs_register_pp_handle(struct cgs_device *cgs_device,
			
 
				+			int (*call_back_func)(struct amd_pp_init *, void **))
			
 
				+{
			
 
				+	CGS_FUNC_ADEV;
			
 
				+	struct amd_pp_init pp_init;
			
 
				+	struct amd_powerplay *amd_pp;
			
 
				+
			
 
				+	if (call_back_func == NULL)
			
 
				+		return NULL;
			
 
				+
			
 
				+	amd_pp = &(adev->powerplay);
			
 
				+	pp_init.chip_family = adev->family;
			
 
				+	pp_init.chip_id = adev->asic_type;
			
 
				+	pp_init.pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : false;
			
 
				+	pp_init.feature_mask = amdgpu_pp_feature_mask;
			
 
				+	pp_init.device = cgs_device;
			
 
				+	if (call_back_func(&pp_init, &(amd_pp->pp_handle)))
			
 
				+		return NULL;
			
 
				+
			
 
				+	return adev->powerplay.pp_handle;
			
 
				+}
			
 
				+
			
 
				 static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
			
 
				 				    enum cgs_gpu_mem_type type,
			
 
				 				    uint64_t size, uint64_t align,
			
@@ -1179,6 +1201,7 @@ static const struct cgs_ops amdgpu_cgs_ops = {
 
				 	.is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled,
			
 
				 	.enter_safe_mode = amdgpu_cgs_enter_safe_mode,
			
 
				 	.lock_grbm_idx = amdgpu_cgs_lock_grbm_idx,
			
 
				+	.register_pp_handle = amdgpu_cgs_register_pp_handle,
			
 
				 };
			
 
				 
			
 
				 static const struct cgs_os_ops amdgpu_cgs_os_ops = {
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -231,7 +231,7 @@ amdgpu_connector_update_scratch_regs(struct drm_connector *connector,
 
				 		if (connector->encoder_ids[i] == 0)
			
 
				 			break;
			
 
				 
			
 
				-		encoder = drm_encoder_find(connector->dev,
			
 
				+		encoder = drm_encoder_find(connector->dev, NULL,
			
 
				 					connector->encoder_ids[i]);
			
 
				 		if (!encoder)
			
 
				 			continue;
			
@@ -256,7 +256,7 @@ amdgpu_connector_find_encoder(struct drm_connector *connector,
 
				 	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
			
 
				 		if (connector->encoder_ids[i] == 0)
			
 
				 			break;
			
 
				-		encoder = drm_encoder_find(connector->dev,
			
 
				+		encoder = drm_encoder_find(connector->dev, NULL,
			
 
				 					connector->encoder_ids[i]);
			
 
				 		if (!encoder)
			
 
				 			continue;
			
@@ -372,7 +372,7 @@ amdgpu_connector_best_single_encoder(struct drm_connector *connector)
 
				 
			
 
				 	/* pick the encoder ids */
			
 
				 	if (enc_id)
			
 
				-		return drm_encoder_find(connector->dev, enc_id);
			
 
				+		return drm_encoder_find(connector->dev, NULL, enc_id);
			
 
				 	return NULL;
			
 
				 }
			
 
				 
			
@@ -1077,7 +1077,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
 
				 			if (connector->encoder_ids[i] == 0)
			
 
				 				break;
			
 
				 
			
 
				-			encoder = drm_encoder_find(connector->dev, connector->encoder_ids[i]);
			
 
				+			encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]);
			
 
				 			if (!encoder)
			
 
				 				continue;
			
 
				 
			
@@ -1134,7 +1134,7 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector)
 
				 		if (connector->encoder_ids[i] == 0)
			
 
				 			break;
			
 
				 
			
 
				-		encoder = drm_encoder_find(connector->dev, connector->encoder_ids[i]);
			
 
				+		encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]);
			
 
				 		if (!encoder)
			
 
				 			continue;
			
 
				 
			
@@ -1153,7 +1153,7 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector)
 
				 	/* then check use digitial */
			
 
				 	/* pick the first one */
			
 
				 	if (enc_id)
			
 
				-		return drm_encoder_find(connector->dev, enc_id);
			
 
				+		return drm_encoder_find(connector->dev, NULL, enc_id);
			
 
				 	return NULL;
			
 
				 }
			
 
				 
			
@@ -1294,7 +1294,7 @@ u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *conn
 
				 		if (connector->encoder_ids[i] == 0)
			
 
				 			break;
			
 
				 
			
 
				-		encoder = drm_encoder_find(connector->dev,
			
 
				+		encoder = drm_encoder_find(connector->dev, NULL,
			
 
				 					connector->encoder_ids[i]);
			
 
				 		if (!encoder)
			
 
				 			continue;
			
@@ -1323,7 +1323,7 @@ static bool amdgpu_connector_encoder_is_hbr2(struct drm_connector *connector)
 
				 	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
			
 
				 		if (connector->encoder_ids[i] == 0)
			
 
				 			break;
			
 
				-		encoder = drm_encoder_find(connector->dev,
			
 
				+		encoder = drm_encoder_find(connector->dev, NULL,
			
 
				 					connector->encoder_ids[i]);
			
 
				 		if (!encoder)
			
 
				 			continue;
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -25,6 +25,7 @@
 
				  *    Jerome Glisse <glisse@freedesktop.org>
			
 
				  */
			
 
				 #include <linux/pagemap.h>
			
 
				+#include <linux/sync_file.h>
			
 
				 #include <drm/drmP.h>
			
 
				 #include <drm/amdgpu_drm.h>
			
 
				 #include <drm/drm_syncobj.h>
			
@@ -1330,6 +1331,66 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
 
				 	return fence;
			
 
				 }
			
 
				 
			
 
				+int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
			
 
				+				    struct drm_file *filp)
			
 
				+{
			
 
				+	struct amdgpu_device *adev = dev->dev_private;
			
 
				+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
			
 
				+	union drm_amdgpu_fence_to_handle *info = data;
			
 
				+	struct dma_fence *fence;
			
 
				+	struct drm_syncobj *syncobj;
			
 
				+	struct sync_file *sync_file;
			
 
				+	int fd, r;
			
 
				+
			
 
				+	if (amdgpu_kms_vram_lost(adev, fpriv))
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
			
 
				+	if (IS_ERR(fence))
			
 
				+		return PTR_ERR(fence);
			
 
				+
			
 
				+	switch (info->in.what) {
			
 
				+	case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
			
 
				+		r = drm_syncobj_create(&syncobj, 0, fence);
			
 
				+		dma_fence_put(fence);
			
 
				+		if (r)
			
 
				+			return r;
			
 
				+		r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);
			
 
				+		drm_syncobj_put(syncobj);
			
 
				+		return r;
			
 
				+
			
 
				+	case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:
			
 
				+		r = drm_syncobj_create(&syncobj, 0, fence);
			
 
				+		dma_fence_put(fence);
			
 
				+		if (r)
			
 
				+			return r;
			
 
				+		r = drm_syncobj_get_fd(syncobj, (int*)&info->out.handle);
			
 
				+		drm_syncobj_put(syncobj);
			
 
				+		return r;
			
 
				+
			
 
				+	case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:
			
 
				+		fd = get_unused_fd_flags(O_CLOEXEC);
			
 
				+		if (fd < 0) {
			
 
				+			dma_fence_put(fence);
			
 
				+			return fd;
			
 
				+		}
			
 
				+
			
 
				+		sync_file = sync_file_create(fence);
			
 
				+		dma_fence_put(fence);
			
 
				+		if (!sync_file) {
			
 
				+			put_unused_fd(fd);
			
 
				+			return -ENOMEM;
			
 
				+		}
			
 
				+
			
 
				+		fd_install(fd, sync_file->file);
			
 
				+		info->out.handle = fd;
			
 
				+		return 0;
			
 
				+
			
 
				+	default:
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * amdgpu_cs_wait_all_fence - wait on all fences to signal
			
 
				  *
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -56,6 +56,7 @@
 
				 #include "amdgpu_vf_error.h"
			
 
				 
			
 
				 #include "amdgpu_amdkfd.h"
			
 
				+#include "amdgpu_pm.h"
			
 
				 
			
 
				 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
			
 
				 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
			
@@ -1603,6 +1604,7 @@ static int amdgpu_init(struct amdgpu_device *adev)
 
				 			return r;
			
 
				 		}
			
 
				 		adev->ip_blocks[i].status.sw = true;
			
 
				+
			
 
				 		/* need to do gmc hw init early so we can allocate gpu mem */
			
 
				 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
			
 
				 			r = amdgpu_vram_scratch_init(adev);
			
@@ -1633,6 +1635,11 @@ static int amdgpu_init(struct amdgpu_device *adev)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	mutex_lock(&adev->firmware.mutex);
			
 
				+	if (amdgpu_ucode_init_bo(adev))
			
 
				+		adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT;
			
 
				+	mutex_unlock(&adev->firmware.mutex);
			
 
				+
			
 
				 	for (i = 0; i < adev->num_ip_blocks; i++) {
			
 
				 		if (!adev->ip_blocks[i].status.sw)
			
 
				 			continue;
			
@@ -1768,6 +1775,8 @@ static int amdgpu_fini(struct amdgpu_device *adev)
 
				 
			
 
				 		adev->ip_blocks[i].status.hw = false;
			
 
				 	}
			
 
				+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT)
			
 
				+		amdgpu_ucode_fini_bo(adev);
			
 
				 
			
 
				 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
			
 
				 		if (!adev->ip_blocks[i].status.sw)
			
@@ -2040,6 +2049,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
				 	mutex_init(&adev->srbm_mutex);
			
 
				 	mutex_init(&adev->grbm_idx_mutex);
			
 
				 	mutex_init(&adev->mn_lock);
			
 
				+	mutex_init(&adev->virt.vf_errors.lock);
			
 
				 	hash_init(adev->mn_hash);
			
 
				 
			
 
				 	amdgpu_check_arguments(adev);
			
@@ -2125,7 +2135,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
				 	r = amdgpu_atombios_init(adev);
			
 
				 	if (r) {
			
 
				 		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
			
 
				-		amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
			
 
				+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
			
 
				 		goto failed;
			
 
				 	}
			
 
				 
			
@@ -2136,7 +2146,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
				 	if (amdgpu_vpost_needed(adev)) {
			
 
				 		if (!adev->bios) {
			
 
				 			dev_err(adev->dev, "no vBIOS found\n");
			
 
				-			amdgpu_vf_error_put(AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
			
 
				+			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
			
 
				 			r = -EINVAL;
			
 
				 			goto failed;
			
 
				 		}
			
@@ -2144,7 +2154,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
				 		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
			
 
				 		if (r) {
			
 
				 			dev_err(adev->dev, "gpu post error!\n");
			
 
				-			amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_POST_ERROR, 0, 0);
			
 
				+			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_POST_ERROR, 0, 0);
			
 
				 			goto failed;
			
 
				 		}
			
 
				 	} else {
			
@@ -2156,7 +2166,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
				 		r = amdgpu_atomfirmware_get_clock_info(adev);
			
 
				 		if (r) {
			
 
				 			dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
			
 
				-			amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
			
 
				+			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
			
 
				 			goto failed;
			
 
				 		}
			
 
				 	} else {
			
@@ -2164,7 +2174,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
				 		r = amdgpu_atombios_get_clock_info(adev);
			
 
				 		if (r) {
			
 
				 			dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
			
 
				-			amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
			
 
				+			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
			
 
				 			goto failed;
			
 
				 		}
			
 
				 		/* init i2c buses */
			
@@ -2175,7 +2185,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
				 	r = amdgpu_fence_driver_init(adev);
			
 
				 	if (r) {
			
 
				 		dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
			
 
				-		amdgpu_vf_error_put(AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
			
 
				+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
			
 
				 		goto failed;
			
 
				 	}
			
 
				 
			
@@ -2185,7 +2195,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
				 	r = amdgpu_init(adev);
			
 
				 	if (r) {
			
 
				 		dev_err(adev->dev, "amdgpu_init failed\n");
			
 
				-		amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
			
 
				+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
			
 
				 		amdgpu_fini(adev);
			
 
				 		goto failed;
			
 
				 	}
			
@@ -2205,7 +2215,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
				 	r = amdgpu_ib_pool_init(adev);
			
 
				 	if (r) {
			
 
				 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
			
 
				-		amdgpu_vf_error_put(AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
			
 
				+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
			
 
				 		goto failed;
			
 
				 	}
			
 
				 
			
@@ -2215,6 +2225,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
				 
			
 
				 	amdgpu_fbdev_init(adev);
			
 
				 
			
 
				+	r = amdgpu_pm_sysfs_init(adev);
			
 
				+	if (r)
			
 
				+		DRM_ERROR("registering pm debugfs failed (%d).\n", r);
			
 
				+
			
 
				 	r = amdgpu_gem_debugfs_init(adev);
			
 
				 	if (r)
			
 
				 		DRM_ERROR("registering gem debugfs failed (%d).\n", r);
			
@@ -2254,7 +2268,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
				 	r = amdgpu_late_init(adev);
			
 
				 	if (r) {
			
 
				 		dev_err(adev->dev, "amdgpu_late_init failed\n");
			
 
				-		amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
			
 
				+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
			
 
				 		goto failed;
			
 
				 	}
			
 
				 
			
@@ -2311,6 +2325,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 
				 	iounmap(adev->rmmio);
			
 
				 	adev->rmmio = NULL;
			
 
				 	amdgpu_doorbell_fini(adev);
			
 
				+	amdgpu_pm_sysfs_fini(adev);
			
 
				 	amdgpu_debugfs_regs_cleanup(adev);
			
 
				 }
			
 
				 
			
@@ -2936,7 +2951,7 @@ out:
 
				 		}
			
 
				 	} else {
			
 
				 		dev_err(adev->dev, "asic resume failed (%d).\n", r);
			
 
				-		amdgpu_vf_error_put(AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, 0, r);
			
 
				+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, 0, r);
			
 
				 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
			
 
				 			if (adev->rings[i] && adev->rings[i]->sched.thread) {
			
 
				 				kthread_unpark(adev->rings[i]->sched.thread);
			
@@ -2950,7 +2965,7 @@ out:
 
				 	if (r) {
			
 
				 		/* bad news, how to tell it to userspace ? */
			
 
				 		dev_info(adev->dev, "GPU reset failed\n");
			
 
				-		amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
			
 
				+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
			
 
				 	}
			
 
				 	else {
			
 
				 		dev_info(adev->dev, "GPU reset successed!\n");
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -356,6 +356,10 @@ enum amdgpu_pcie_gen {
 
				 		((adev)->powerplay.pp_funcs->switch_power_profile(\
			
 
				 			(adev)->powerplay.pp_handle, type))
			
 
				 
			
 
				+#define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \
			
 
				+		((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
			
 
				+			(adev)->powerplay.pp_handle, msg_id))
			
 
				+
			
 
				 struct amdgpu_dpm {
			
 
				 	struct amdgpu_ps        *ps;
			
 
				 	/* number of valid power states */
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -70,9 +70,10 @@
 
				  * - 3.18.0 - Export gpu always on cu bitmap
			
 
				  * - 3.19.0 - Add support for UVD MJPEG decode
			
 
				  * - 3.20.0 - Add support for local BOs
			
 
				+ * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
			
 
				  */
			
 
				 #define KMS_DRIVER_MAJOR	3
			
 
				-#define KMS_DRIVER_MINOR	20
			
 
				+#define KMS_DRIVER_MINOR	21
			
 
				 #define KMS_DRIVER_PATCHLEVEL	0
			
 
				 
			
 
				 int amdgpu_vram_limit = 0;
			
@@ -122,6 +123,7 @@ int amdgpu_cntl_sb_buf_per_se = 0;
 
				 int amdgpu_param_buf_per_se = 0;
			
 
				 int amdgpu_job_hang_limit = 0;
			
 
				 int amdgpu_lbpw = -1;
			
 
				+int amdgpu_compute_multipipe = -1;
			
 
				 
			
 
				 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
			
 
				 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
			
@@ -265,6 +267,9 @@ module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444);
 
				 MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)");
			
 
				 module_param_named(lbpw, amdgpu_lbpw, int, 0444);
			
 
				 
			
 
				+MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)");
			
 
				+module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
			
 
				+
			
 
				 #ifdef CONFIG_DRM_AMDGPU_SI
			
 
				 
			
 
				 #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -109,9 +109,26 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
			
 
				+{
			
 
				+	if (amdgpu_compute_multipipe != -1) {
			
 
				+		DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
			
 
				+			 amdgpu_compute_multipipe);
			
 
				+		return amdgpu_compute_multipipe == 1;
			
 
				+	}
			
 
				+
			
 
				+	/* FIXME: spreading the queues across pipes causes perf regressions
			
 
				+	 * on POLARIS11 compute workloads */
			
 
				+	if (adev->asic_type == CHIP_POLARIS11)
			
 
				+		return false;
			
 
				+
			
 
				+	return adev->gfx.mec.num_mec > 1;
			
 
				+}
			
 
				+
			
 
				 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
			
 
				 {
			
 
				 	int i, queue, pipe, mec;
			
 
				+	bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
			
 
				 
			
 
				 	/* policy for amdgpu compute queue ownership */
			
 
				 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
			
@@ -125,8 +142,7 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 
				 		if (mec >= adev->gfx.mec.num_mec)
			
 
				 			break;
			
 
				 
			
 
				-		/* FIXME: spreading the queues across pipes causes perf regressions */
			
 
				-		if (0) {
			
 
				+		if (multipipe_policy) {
			
 
				 			/* policy: amdgpu owns the first two queues of the first MEC */
			
 
				 			if (mec == 0 && queue < 2)
			
 
				 				set_bit(i, adev->gfx.mec.queue_bitmap);
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1024,6 +1024,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
 
				 	DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
			
 
				 	DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
			
 
				 	DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
			
 
				+	DRM_IOCTL_DEF_DRV(AMDGPU_FENCE_TO_HANDLE, amdgpu_cs_fence_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
			
 
				 	/* KMS */
			
 
				 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
			
 
				 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_WAIT_IDLE, amdgpu_gem_wait_idle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -64,10 +64,6 @@ static const struct cg_flag_name clocks[] = {
 
				 
			
 
				 void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
			
 
				 {
			
 
				-	if (adev->pp_enabled)
			
 
				-		/* TODO */
			
 
				-		return;
			
 
				-
			
 
				 	if (adev->pm.dpm_enabled) {
			
 
				 		mutex_lock(&adev->pm.mutex);
			
 
				 		if (power_supply_is_system_supplied() > 0)
			
@@ -118,7 +114,7 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
 
				 		goto fail;
			
 
				 	}
			
 
				 
			
 
				-	if (adev->pp_enabled) {
			
 
				+	if (adev->powerplay.pp_funcs->dispatch_tasks) {
			
 
				 		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state, NULL);
			
 
				 	} else {
			
 
				 		mutex_lock(&adev->pm.mutex);
			
@@ -303,7 +299,8 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 
				 
			
 
				 	if (strlen(buf) == 1)
			
 
				 		adev->pp_force_state_enabled = false;
			
 
				-	else if (adev->pp_enabled) {
			
 
				+	else if (adev->powerplay.pp_funcs->dispatch_tasks &&
			
 
				+			adev->powerplay.pp_funcs->get_pp_num_states) {
			
 
				 		struct pp_states_info data;
			
 
				 
			
 
				 		ret = kstrtoul(buf, 0, &idx);
			
@@ -531,7 +528,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
 
				 	if (adev->powerplay.pp_funcs->set_sclk_od)
			
 
				 		amdgpu_dpm_set_sclk_od(adev, (uint32_t)value);
			
 
				 
			
 
				-	if (adev->pp_enabled) {
			
 
				+	if (adev->powerplay.pp_funcs->dispatch_tasks) {
			
 
				 		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL);
			
 
				 	} else {
			
 
				 		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
			
@@ -575,7 +572,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
 
				 	if (adev->powerplay.pp_funcs->set_mclk_od)
			
 
				 		amdgpu_dpm_set_mclk_od(adev, (uint32_t)value);
			
 
				 
			
 
				-	if (adev->pp_enabled) {
			
 
				+	if (adev->powerplay.pp_funcs->dispatch_tasks) {
			
 
				 		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL);
			
 
				 	} else {
			
 
				 		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
			
@@ -959,9 +956,6 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 
				 	     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
			
 
				 		return 0;
			
 
				 
			
 
				-	if (adev->pp_enabled)
			
 
				-		return effective_mode;
			
 
				-
			
 
				 	/* Skip fan attributes if fan is not present */
			
 
				 	if (adev->pm.no_fan &&
			
 
				 	    (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
			
@@ -1317,6 +1311,9 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 
				 	if (adev->pm.sysfs_initialized)
			
 
				 		return 0;
			
 
				 
			
 
				+	if (adev->pm.dpm_enabled == 0)
			
 
				+		return 0;
			
 
				+
			
 
				 	if (adev->powerplay.pp_funcs->get_temperature == NULL)
			
 
				 		return 0;
			
 
				 
			
@@ -1341,27 +1338,26 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 
				 		return ret;
			
 
				 	}
			
 
				 
			
 
				-	if (adev->pp_enabled) {
			
 
				-		ret = device_create_file(adev->dev, &dev_attr_pp_num_states);
			
 
				-		if (ret) {
			
 
				-			DRM_ERROR("failed to create device file pp_num_states\n");
			
 
				-			return ret;
			
 
				-		}
			
 
				-		ret = device_create_file(adev->dev, &dev_attr_pp_cur_state);
			
 
				-		if (ret) {
			
 
				-			DRM_ERROR("failed to create device file pp_cur_state\n");
			
 
				-			return ret;
			
 
				-		}
			
 
				-		ret = device_create_file(adev->dev, &dev_attr_pp_force_state);
			
 
				-		if (ret) {
			
 
				-			DRM_ERROR("failed to create device file pp_force_state\n");
			
 
				-			return ret;
			
 
				-		}
			
 
				-		ret = device_create_file(adev->dev, &dev_attr_pp_table);
			
 
				-		if (ret) {
			
 
				-			DRM_ERROR("failed to create device file pp_table\n");
			
 
				-			return ret;
			
 
				-		}
			
 
				+
			
 
				+	ret = device_create_file(adev->dev, &dev_attr_pp_num_states);
			
 
				+	if (ret) {
			
 
				+		DRM_ERROR("failed to create device file pp_num_states\n");
			
 
				+		return ret;
			
 
				+	}
			
 
				+	ret = device_create_file(adev->dev, &dev_attr_pp_cur_state);
			
 
				+	if (ret) {
			
 
				+		DRM_ERROR("failed to create device file pp_cur_state\n");
			
 
				+		return ret;
			
 
				+	}
			
 
				+	ret = device_create_file(adev->dev, &dev_attr_pp_force_state);
			
 
				+	if (ret) {
			
 
				+		DRM_ERROR("failed to create device file pp_force_state\n");
			
 
				+		return ret;
			
 
				+	}
			
 
				+	ret = device_create_file(adev->dev, &dev_attr_pp_table);
			
 
				+	if (ret) {
			
 
				+		DRM_ERROR("failed to create device file pp_table\n");
			
 
				+		return ret;
			
 
				 	}
			
 
				 
			
 
				 	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
			
@@ -1417,16 +1413,19 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 
				 
			
 
				 void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
			
 
				 {
			
 
				+	if (adev->pm.dpm_enabled == 0)
			
 
				+		return;
			
 
				+
			
 
				 	if (adev->pm.int_hwmon_dev)
			
 
				 		hwmon_device_unregister(adev->pm.int_hwmon_dev);
			
 
				 	device_remove_file(adev->dev, &dev_attr_power_dpm_state);
			
 
				 	device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level);
			
 
				-	if (adev->pp_enabled) {
			
 
				-		device_remove_file(adev->dev, &dev_attr_pp_num_states);
			
 
				-		device_remove_file(adev->dev, &dev_attr_pp_cur_state);
			
 
				-		device_remove_file(adev->dev, &dev_attr_pp_force_state);
			
 
				-		device_remove_file(adev->dev, &dev_attr_pp_table);
			
 
				-	}
			
 
				+
			
 
				+	device_remove_file(adev->dev, &dev_attr_pp_num_states);
			
 
				+	device_remove_file(adev->dev, &dev_attr_pp_cur_state);
			
 
				+	device_remove_file(adev->dev, &dev_attr_pp_force_state);
			
 
				+	device_remove_file(adev->dev, &dev_attr_pp_table);
			
 
				+
			
 
				 	device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
			
 
				 	device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
			
 
				 	device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
			
@@ -1457,7 +1456,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
 
				 			amdgpu_fence_wait_empty(ring);
			
 
				 	}
			
 
				 
			
 
				-	if (adev->pp_enabled) {
			
 
				+	if (adev->powerplay.pp_funcs->dispatch_tasks) {
			
 
				 		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL, NULL);
			
 
				 	} else {
			
 
				 		mutex_lock(&adev->pm.mutex);
			
@@ -1592,15 +1591,15 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
 
				 	if  ((adev->flags & AMD_IS_PX) &&
			
 
				 	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) {
			
 
				 		seq_printf(m, "PX asic powered off\n");
			
 
				-	} else if (adev->pp_enabled) {
			
 
				-		return amdgpu_debugfs_pm_info_pp(m, adev);
			
 
				-	} else {
			
 
				+	} else if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level) {
			
 
				 		mutex_lock(&adev->pm.mutex);
			
 
				 		if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level)
			
 
				 			adev->powerplay.pp_funcs->debugfs_print_current_performance_level(adev, m);
			
 
				 		else
			
 
				 			seq_printf(m, "Debugfs support not implemented for this asic\n");
			
 
				 		mutex_unlock(&adev->pm.mutex);
			
 
				+	} else {
			
 
				+		return amdgpu_debugfs_pm_info_pp(m, adev);
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -34,24 +34,6 @@
 
				 #include "cik_dpm.h"
			
 
				 #include "vi_dpm.h"
			
 
				 
			
 
				-static int amdgpu_create_pp_handle(struct amdgpu_device *adev)
			
 
				-{
			
 
				-	struct amd_pp_init pp_init;
			
 
				-	struct amd_powerplay *amd_pp;
			
 
				-	int ret;
			
 
				-
			
 
				-	amd_pp = &(adev->powerplay);
			
 
				-	pp_init.chip_family = adev->family;
			
 
				-	pp_init.chip_id = adev->asic_type;
			
 
				-	pp_init.pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : false;
			
 
				-	pp_init.feature_mask = amdgpu_pp_feature_mask;
			
 
				-	pp_init.device = amdgpu_cgs_create_device(adev);
			
 
				-	ret = amd_powerplay_create(&pp_init, &(amd_pp->pp_handle));
			
 
				-	if (ret)
			
 
				-		return -EINVAL;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 static int amdgpu_pp_early_init(void *handle)
			
 
				 {
			
 
				 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
			
@@ -59,7 +41,6 @@ static int amdgpu_pp_early_init(void *handle)
 
				 	int ret = 0;
			
 
				 
			
 
				 	amd_pp = &(adev->powerplay);
			
 
				-	adev->pp_enabled = false;
			
 
				 	amd_pp->pp_handle = (void *)adev;
			
 
				 
			
 
				 	switch (adev->asic_type) {
			
@@ -73,9 +54,7 @@ static int amdgpu_pp_early_init(void *handle)
 
				 	case CHIP_STONEY:
			
 
				 	case CHIP_VEGA10:
			
 
				 	case CHIP_RAVEN:
			
 
				-		adev->pp_enabled = true;
			
 
				-		if (amdgpu_create_pp_handle(adev))
			
 
				-			return -EINVAL;
			
 
				+		amd_pp->cgs_device = amdgpu_cgs_create_device(adev);
			
 
				 		amd_pp->ip_funcs = &pp_ip_funcs;
			
 
				 		amd_pp->pp_funcs = &pp_dpm_funcs;
			
 
				 		break;
			
@@ -97,9 +76,7 @@ static int amdgpu_pp_early_init(void *handle)
 
				 			amd_pp->ip_funcs = &ci_dpm_ip_funcs;
			
 
				 			amd_pp->pp_funcs = &ci_dpm_funcs;
			
 
				 		} else {
			
 
				-			adev->pp_enabled = true;
			
 
				-			if (amdgpu_create_pp_handle(adev))
			
 
				-				return -EINVAL;
			
 
				+			amd_pp->cgs_device = amdgpu_cgs_create_device(adev);
			
 
				 			amd_pp->ip_funcs = &pp_ip_funcs;
			
 
				 			amd_pp->pp_funcs = &pp_dpm_funcs;
			
 
				 		}
			
@@ -118,12 +95,9 @@ static int amdgpu_pp_early_init(void *handle)
 
				 
			
 
				 	if (adev->powerplay.ip_funcs->early_init)
			
 
				 		ret = adev->powerplay.ip_funcs->early_init(
			
 
				-					adev->powerplay.pp_handle);
			
 
				+					amd_pp->cgs_device ? amd_pp->cgs_device :
			
 
				+					amd_pp->pp_handle);
			
 
				 
			
 
				-	if (ret == PP_DPM_DISABLED) {
			
 
				-		adev->pm.dpm_enabled = false;
			
 
				-		return 0;
			
 
				-	}
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -137,11 +111,6 @@ static int amdgpu_pp_late_init(void *handle)
 
				 		ret = adev->powerplay.ip_funcs->late_init(
			
 
				 					adev->powerplay.pp_handle);
			
 
				 
			
 
				-	if (adev->pp_enabled && adev->pm.dpm_enabled) {
			
 
				-		amdgpu_pm_sysfs_init(adev);
			
 
				-		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_COMPLETE_INIT, NULL, NULL);
			
 
				-	}
			
 
				-
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -176,21 +145,11 @@ static int amdgpu_pp_hw_init(void *handle)
 
				 	int ret = 0;
			
 
				 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
			
 
				 
			
 
				-	if (adev->pp_enabled && adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
			
 
				-		amdgpu_ucode_init_bo(adev);
			
 
				 
			
 
				 	if (adev->powerplay.ip_funcs->hw_init)
			
 
				 		ret = adev->powerplay.ip_funcs->hw_init(
			
 
				 					adev->powerplay.pp_handle);
			
 
				 
			
 
				-	if (ret == PP_DPM_DISABLED) {
			
 
				-		adev->pm.dpm_enabled = false;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if ((amdgpu_dpm != 0) && !amdgpu_sriov_vf(adev))
			
 
				-		adev->pm.dpm_enabled = true;
			
 
				-
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -199,16 +158,10 @@ static int amdgpu_pp_hw_fini(void *handle)
 
				 	int ret = 0;
			
 
				 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
			
 
				 
			
 
				-	if (adev->pp_enabled && adev->pm.dpm_enabled)
			
 
				-		amdgpu_pm_sysfs_fini(adev);
			
 
				-
			
 
				 	if (adev->powerplay.ip_funcs->hw_fini)
			
 
				 		ret = adev->powerplay.ip_funcs->hw_fini(
			
 
				 					adev->powerplay.pp_handle);
			
 
				 
			
 
				-	if (adev->pp_enabled && adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
			
 
				-		amdgpu_ucode_fini_bo(adev);
			
 
				-
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -220,9 +173,8 @@ static void amdgpu_pp_late_fini(void *handle)
 
				 		adev->powerplay.ip_funcs->late_fini(
			
 
				 			  adev->powerplay.pp_handle);
			
 
				 
			
 
				-
			
 
				-	if (adev->pp_enabled)
			
 
				-		amd_powerplay_destroy(adev->powerplay.pp_handle);
			
 
				+	if (adev->powerplay.cgs_device)
			
 
				+		amdgpu_cgs_destroy_device(adev->powerplay.cgs_device);
			
 
				 }
			
 
				 
			
 
				 static int amdgpu_pp_suspend(void *handle)
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -411,13 +411,6 @@ static int psp_hw_init(void *handle)
 
				 		return 0;
			
 
				 
			
 
				 	mutex_lock(&adev->firmware.mutex);
			
 
				-	/*
			
 
				-	 * This sequence is just used on hw_init only once, no need on
			
 
				-	 * resume.
			
 
				-	 */
			
 
				-	ret = amdgpu_ucode_init_bo(adev);
			
 
				-	if (ret)
			
 
				-		goto failed;
			
 
				 
			
 
				 	ret = psp_load_fw(adev);
			
 
				 	if (ret) {
			
@@ -442,8 +435,6 @@ static int psp_hw_fini(void *handle)
 
				 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
			
 
				 		return 0;
			
 
				 
			
 
				-	amdgpu_ucode_fini_bo(adev);
			
 
				-
			
 
				 	psp_ring_destroy(psp, PSP_RING_TYPE__KM);
			
 
				 
			
 
				 	amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
@@ -121,7 +121,7 @@ static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip)
 
				 
			
 
				 static int amdgpu_lru_map(struct amdgpu_device *adev,
			
 
				 			  struct amdgpu_queue_mapper *mapper,
			
 
				-			  int user_ring,
			
 
				+			  int user_ring, bool lru_pipe_order,
			
 
				 			  struct amdgpu_ring **out_ring)
			
 
				 {
			
 
				 	int r, i, j;
			
@@ -139,7 +139,7 @@ static int amdgpu_lru_map(struct amdgpu_device *adev,
 
				 	}
			
 
				 
			
 
				 	r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist,
			
 
				-				j, out_ring);
			
 
				+				j, lru_pipe_order, out_ring);
			
 
				 	if (r)
			
 
				 		return r;
			
 
				 
			
@@ -284,8 +284,10 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
 
				 		r = amdgpu_identity_map(adev, mapper, ring, out_ring);
			
 
				 		break;
			
 
				 	case AMDGPU_HW_IP_DMA:
			
 
				+		r = amdgpu_lru_map(adev, mapper, ring, false, out_ring);
			
 
				+		break;
			
 
				 	case AMDGPU_HW_IP_COMPUTE:
			
 
				-		r = amdgpu_lru_map(adev, mapper, ring, out_ring);
			
 
				+		r = amdgpu_lru_map(adev, mapper, ring, true, out_ring);
			
 
				 		break;
			
 
				 	default:
			
 
				 		*out_ring = NULL;
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -315,14 +315,16 @@ static bool amdgpu_ring_is_blacklisted(struct amdgpu_ring *ring,
 
				  * @type: amdgpu_ring_type enum
			
 
				  * @blacklist: blacklisted ring ids array
			
 
				  * @num_blacklist: number of entries in @blacklist
			
 
				+ * @lru_pipe_order: find a ring from the least recently used pipe
			
 
				  * @ring: output ring
			
 
				  *
			
 
				  * Retrieve the amdgpu_ring structure for the least recently used ring of
			
 
				  * a specific IP block (all asics).
			
 
				  * Returns 0 on success, error on failure.
			
 
				  */
			
 
				-int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist,
			
 
				-			int num_blacklist, struct amdgpu_ring **ring)
			
 
				+int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
			
 
				+			int *blacklist,	int num_blacklist,
			
 
				+			bool lru_pipe_order, struct amdgpu_ring **ring)
			
 
				 {
			
 
				 	struct amdgpu_ring *entry;
			
 
				 
			
@@ -337,10 +339,23 @@ int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist,
 
				 		if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist))
			
 
				 			continue;
			
 
				 
			
 
				-		*ring = entry;
			
 
				-		amdgpu_ring_lru_touch_locked(adev, *ring);
			
 
				-		break;
			
 
				+		if (!*ring) {
			
 
				+			*ring = entry;
			
 
				+
			
 
				+			/* We are done for ring LRU */
			
 
				+			if (!lru_pipe_order)
			
 
				+				break;
			
 
				+		}
			
 
				+
			
 
				+		/* Move all rings on the same pipe to the end of the list */
			
 
				+		if (entry->pipe == (*ring)->pipe)
			
 
				+			amdgpu_ring_lru_touch_locked(adev, entry);
			
 
				 	}
			
 
				+
			
 
				+	/* Move the ring we found to the end of the list */
			
 
				+	if (*ring)
			
 
				+		amdgpu_ring_lru_touch_locked(adev, *ring);
			
 
				+
			
 
				 	spin_unlock(&adev->ring_lru_list_lock);
			
 
				 
			
 
				 	if (!*ring) {
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -201,8 +201,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 
				 		     unsigned ring_size, struct amdgpu_irq_src *irq_src,
			
 
				 		     unsigned irq_type);
			
 
				 void amdgpu_ring_fini(struct amdgpu_ring *ring);
			
 
				-int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist,
			
 
				-			int num_blacklist, struct amdgpu_ring **ring);
			
 
				+int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
			
 
				+			int *blacklist, int num_blacklist,
			
 
				+			bool lru_pipe_order, struct amdgpu_ring **ring);
			
 
				 void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
			
 
				 static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
			
 
				 {
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c
@@ -25,30 +25,21 @@
 
				 #include "amdgpu_vf_error.h"
			
 
				 #include "mxgpu_ai.h"
			
 
				 
			
 
				-#define AMDGPU_VF_ERROR_ENTRY_SIZE    16 
			
 
				-
			
 
				-/* struct error_entry - amdgpu VF error information. */
			
 
				-struct amdgpu_vf_error_buffer {
			
 
				-	int read_count;
			
 
				-	int write_count;
			
 
				-	uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE];
			
 
				-	uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE];
			
 
				-	uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
			
 
				-};
			
 
				-
			
 
				-struct amdgpu_vf_error_buffer admgpu_vf_errors;
			
 
				-
			
 
				-
			
 
				-void amdgpu_vf_error_put(uint16_t sub_error_code, uint16_t error_flags, uint64_t error_data)
			
 
				+void amdgpu_vf_error_put(struct amdgpu_device *adev,
			
 
				+			 uint16_t sub_error_code,
			
 
				+			 uint16_t error_flags,
			
 
				+			 uint64_t error_data)
			
 
				 {
			
 
				 	int index;
			
 
				 	uint16_t error_code = AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF, sub_error_code);
			
 
				 
			
 
				-	index = admgpu_vf_errors.write_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
			
 
				-	admgpu_vf_errors.code [index] = error_code;
			
 
				-	admgpu_vf_errors.flags [index] = error_flags;
			
 
				-	admgpu_vf_errors.data [index] = error_data;
			
 
				-	admgpu_vf_errors.write_count ++;
			
 
				+	mutex_lock(&adev->virt.vf_errors.lock);
			
 
				+	index = adev->virt.vf_errors.write_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
			
 
				+	adev->virt.vf_errors.code [index] = error_code;
			
 
				+	adev->virt.vf_errors.flags [index] = error_flags;
			
 
				+	adev->virt.vf_errors.data [index] = error_data;
			
 
				+	adev->virt.vf_errors.write_count ++;
			
 
				+	mutex_unlock(&adev->virt.vf_errors.lock);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -58,7 +49,8 @@ void amdgpu_vf_error_trans_all(struct amdgpu_device *adev)
 
				 	u32 data1, data2, data3;
			
 
				 	int index;
			
 
				 
			
 
				-	if ((NULL == adev) || (!amdgpu_sriov_vf(adev)) || (!adev->virt.ops) || (!adev->virt.ops->trans_msg)) {
			
 
				+	if ((NULL == adev) || (!amdgpu_sriov_vf(adev)) ||
			
 
				+	    (!adev->virt.ops) || (!adev->virt.ops->trans_msg)) {
			
 
				 		return;
			
 
				 	}
			
 
				 /*
			
@@ -68,18 +60,22 @@ void amdgpu_vf_error_trans_all(struct amdgpu_device *adev)
 
				 		return;
			
 
				 	}
			
 
				 */
			
 
				+
			
 
				+	mutex_lock(&adev->virt.vf_errors.lock);
			
 
				 	/* The errors are overlay of array, correct read_count as full. */
			
 
				-	if (admgpu_vf_errors.write_count - admgpu_vf_errors.read_count > AMDGPU_VF_ERROR_ENTRY_SIZE) {
			
 
				-		admgpu_vf_errors.read_count = admgpu_vf_errors.write_count - AMDGPU_VF_ERROR_ENTRY_SIZE;
			
 
				+	if (adev->virt.vf_errors.write_count - adev->virt.vf_errors.read_count > AMDGPU_VF_ERROR_ENTRY_SIZE) {
			
 
				+		adev->virt.vf_errors.read_count = adev->virt.vf_errors.write_count - AMDGPU_VF_ERROR_ENTRY_SIZE;
			
 
				 	}
			
 
				 
			
 
				-	while (admgpu_vf_errors.read_count < admgpu_vf_errors.write_count) {
			
 
				-		index =admgpu_vf_errors.read_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
			
 
				-		data1 = AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX (admgpu_vf_errors.code[index], admgpu_vf_errors.flags[index]);
			
 
				-		data2 = admgpu_vf_errors.data[index] & 0xFFFFFFFF;
			
 
				-		data3 = (admgpu_vf_errors.data[index] >> 32) & 0xFFFFFFFF;
			
 
				+	while (adev->virt.vf_errors.read_count < adev->virt.vf_errors.write_count) {
			
 
				+		index =adev->virt.vf_errors.read_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
			
 
				+		data1 = AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX(adev->virt.vf_errors.code[index],
			
 
				+							   adev->virt.vf_errors.flags[index]);
			
 
				+		data2 = adev->virt.vf_errors.data[index] & 0xFFFFFFFF;
			
 
				+		data3 = (adev->virt.vf_errors.data[index] >> 32) & 0xFFFFFFFF;
			
 
				 
			
 
				 		adev->virt.ops->trans_msg(adev, IDH_LOG_VF_ERROR, data1, data2, data3);
			
 
				-		admgpu_vf_errors.read_count ++;
			
 
				+		adev->virt.vf_errors.read_count ++;
			
 
				 	}
			
 
				+	mutex_unlock(&adev->virt.vf_errors.lock);
			
 
				 }
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h
@@ -56,7 +56,10 @@ enum AMDGIM_ERROR_CATEGORY {
 
				 	AMDGIM_ERROR_CATEGORY_MAX
			
 
				 };
			
 
				 
			
 
				-void amdgpu_vf_error_put(uint16_t sub_error_code, uint16_t error_flags, uint64_t error_data);
			
 
				+void amdgpu_vf_error_put(struct amdgpu_device *adev,
			
 
				+			 uint16_t sub_error_code,
			
 
				+			 uint16_t error_flags,
			
 
				+			 uint64_t error_data);
			
 
				 void amdgpu_vf_error_trans_all (struct amdgpu_device *adev);
			
 
				 
			
 
				 #endif /* __VF_ERROR_H__ */
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -36,6 +36,18 @@ struct amdgpu_mm_table {
 
				 	uint64_t		gpu_addr;
			
 
				 };
			
 
				 
			
 
				+#define AMDGPU_VF_ERROR_ENTRY_SIZE    16
			
 
				+
			
 
				+/* struct error_entry - amdgpu VF error information. */
			
 
				+struct amdgpu_vf_error_buffer {
			
 
				+	struct mutex lock;
			
 
				+	int read_count;
			
 
				+	int write_count;
			
 
				+	uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE];
			
 
				+	uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE];
			
 
				+	uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
			
 
				+};
			
 
				+
			
 
				 /**
			
 
				  * struct amdgpu_virt_ops - amdgpu device virt operations
			
 
				  */
			
@@ -59,6 +71,7 @@ struct amdgpu_virt {
 
				 	struct work_struct		flr_work;
			
 
				 	struct amdgpu_mm_table		mm_table;
			
 
				 	const struct amdgpu_virt_ops	*ops;
			
 
				+	struct amdgpu_vf_error_buffer   vf_errors;
			
 
				 };
			
 
				 
			
 
				 #define AMDGPU_CSA_SIZE    (8 * 1024)
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2541,7 +2541,8 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
 
				  * @adev: amdgpu_device pointer
			
 
				  * @fragment_size_default: the default fragment size if it's set auto
			
 
				  */
			
 
				-void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, uint32_t fragment_size_default)
			
 
				+void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev,
			
 
				+				 uint32_t fragment_size_default)
			
 
				 {
			
 
				 	if (amdgpu_vm_fragment_size == -1)
			
 
				 		adev->vm_manager.fragment_size = fragment_size_default;
			
@@ -2555,7 +2556,8 @@ void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, uint32_t fragment_s
 
				  * @adev: amdgpu_device pointer
			
 
				  * @vm_size: the default vm size if it's set auto
			
 
				  */
			
 
				-void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, uint32_t fragment_size_default)
			
 
				+void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size,
			
 
				+			   uint32_t fragment_size_default)
			
 
				 {
			
 
				 	/* adjust vm size firstly */
			
 
				 	if (amdgpu_vm_size == -1)
			
@@ -2682,6 +2684,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
				 	}
			
 
				 
			
 
				 	INIT_KFIFO(vm->faults);
			
 
				+	vm->fault_credit = 16;
			
 
				 
			
 
				 	return 0;
			
 
				 
			
@@ -2776,6 +2779,36 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
				 		amdgpu_vm_free_reserved_vmid(adev, vm, i);
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID
			
 
				+ *
			
 
				+ * @adev: amdgpu_device pointer
			
 
				+ * @pasid: PASID do identify the VM
			
 
				+ *
			
 
				+ * This function is expected to be called in interrupt context. Returns
			
 
				+ * true if there was fault credit, false otherwise
			
 
				+ */
			
 
				+bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
			
 
				+				  unsigned int pasid)
			
 
				+{
			
 
				+	struct amdgpu_vm *vm;
			
 
				+
			
 
				+	spin_lock(&adev->vm_manager.pasid_lock);
			
 
				+	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
			
 
				+	spin_unlock(&adev->vm_manager.pasid_lock);
			
 
				+	if (!vm)
			
 
				+		/* VM not found, can't track fault credit */
			
 
				+		return true;
			
 
				+
			
 
				+	/* No lock needed. only accessed by IRQ handler */
			
 
				+	if (!vm->fault_credit)
			
 
				+		/* Too many faults in this VM */
			
 
				+		return false;
			
 
				+
			
 
				+	vm->fault_credit--;
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * amdgpu_vm_manager_init - init the VM manager
			
 
				  *
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -165,8 +165,11 @@ struct amdgpu_vm {
 
				 	/* Flag to indicate ATS support from PTE for GFX9 */
			
 
				 	bool			pte_support_ats;
			
 
				 
			
 
				-	/* Up to 128 pending page faults */
			
 
				+	/* Up to 128 pending retry page faults */
			
 
				 	DECLARE_KFIFO(faults, u64, 128);
			
 
				+
			
 
				+	/* Limit non-retry fault storms */
			
 
				+	unsigned int		fault_credit;
			
 
				 };
			
 
				 
			
 
				 struct amdgpu_vm_id {
			
@@ -244,6 +247,8 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
 
				 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
			
 
				 		   int vm_context, unsigned int pasid);
			
 
				 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
			
 
				+bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
			
 
				+				  unsigned int pasid);
			
 
				 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
			
 
				 			 struct list_head *validated,
			
 
				 			 struct amdgpu_bo_list_entry *entry);
			
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -6365,7 +6365,6 @@ static int ci_dpm_sw_fini(void *handle)
 
				 	flush_work(&adev->pm.dpm.thermal.work);
			
 
				 
			
 
				 	mutex_lock(&adev->pm.mutex);
			
 
				-	amdgpu_pm_sysfs_fini(adev);
			
 
				 	ci_dpm_fini(adev);
			
 
				 	mutex_unlock(&adev->pm.mutex);
			
 
				 
			
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -237,8 +237,23 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev)
 
				  */
			
 
				 static bool cik_ih_prescreen_iv(struct amdgpu_device *adev)
			
 
				 {
			
 
				-	/* Process all interrupts */
			
 
				-	return true;
			
 
				+	u32 ring_index = adev->irq.ih.rptr >> 2;
			
 
				+	u16 pasid;
			
 
				+
			
 
				+	switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
			
 
				+	case 146:
			
 
				+	case 147:
			
 
				+		pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
			
 
				+		if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
			
 
				+			return true;
			
 
				+		break;
			
 
				+	default:
			
 
				+		/* Not a VM fault */
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	adev->irq.ih.rptr += 16;
			
 
				+	return false;
			
 
				 }
			
 
				 
			
 
				  /**
			
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -216,8 +216,23 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev)
 
				  */
			
 
				 static bool cz_ih_prescreen_iv(struct amdgpu_device *adev)
			
 
				 {
			
 
				-	/* Process all interrupts */
			
 
				-	return true;
			
 
				+	u32 ring_index = adev->irq.ih.rptr >> 2;
			
 
				+	u16 pasid;
			
 
				+
			
 
				+	switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
			
 
				+	case 146:
			
 
				+	case 147:
			
 
				+		pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
			
 
				+		if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
			
 
				+			return true;
			
 
				+		break;
			
 
				+	default:
			
 
				+		/* Not a VM fault */
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	adev->irq.ih.rptr += 16;
			
 
				+	return false;
			
 
				 }
			
 
				 
			
 
				 /**
			
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -288,7 +288,7 @@ dce_virtual_encoder(struct drm_connector *connector)
 
				 		if (connector->encoder_ids[i] == 0)
			
 
				 			break;
			
 
				 
			
 
				-		encoder = drm_encoder_find(connector->dev, connector->encoder_ids[i]);
			
 
				+		encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]);
			
 
				 		if (!encoder)
			
 
				 			continue;
			
 
				 
			
@@ -298,7 +298,7 @@ dce_virtual_encoder(struct drm_connector *connector)
 
				 
			
 
				 	/* pick the first one */
			
 
				 	if (enc_id)
			
 
				-		return drm_encoder_find(connector->dev, enc_id);
			
 
				+		return drm_encoder_find(connector->dev, NULL, enc_id);
			
 
				 	return NULL;
			
 
				 }
			
 
				 
			
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -636,7 +636,194 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev)
 
				 				NUM_BANKS(ADDR_SURF_2_BANK);
			
 
				 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
			
 
				 			WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
			
 
				-	} else if (adev->asic_type == CHIP_OLAND || adev->asic_type == CHIP_HAINAN) {
			
 
				+	} else if (adev->asic_type == CHIP_OLAND) {
			
 
				+		tilemode[0] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
			
 
				+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
			
 
				+		tilemode[1] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
			
 
				+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
			
 
				+		tilemode[2] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
			
 
				+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
			
 
				+		tilemode[3] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
			
 
				+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
			
 
				+		tilemode[4] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
			
 
				+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
			
 
				+		tilemode[5] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
			
 
				+				TILE_SPLIT(split_equal_to_row_size) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
			
 
				+		tilemode[6] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
			
 
				+				TILE_SPLIT(split_equal_to_row_size) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
			
 
				+		tilemode[7] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
			
 
				+				TILE_SPLIT(split_equal_to_row_size) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
			
 
				+		tilemode[8] =   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
			
 
				+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
			
 
				+		tilemode[9] =   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
			
 
				+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
			
 
				+		tilemode[10] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
			
 
				+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
			
 
				+		tilemode[11] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
			
 
				+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
			
 
				+		tilemode[12] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
			
 
				+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
			
 
				+		tilemode[13] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
			
 
				+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
			
 
				+		tilemode[14] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
			
 
				+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
			
 
				+		tilemode[15] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
			
 
				+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
			
 
				+		tilemode[16] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
			
 
				+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
			
 
				+		tilemode[17] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
			
 
				+				TILE_SPLIT(split_equal_to_row_size) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
			
 
				+		tilemode[21] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
			
 
				+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
			
 
				+		tilemode[22] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
			
 
				+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
			
 
				+		tilemode[23] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
			
 
				+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
			
 
				+		tilemode[24] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
			
 
				+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
			
 
				+				NUM_BANKS(ADDR_SURF_16_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
			
 
				+		tilemode[25] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
			
 
				+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
			
 
				+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
			
 
				+				NUM_BANKS(ADDR_SURF_8_BANK) |
			
 
				+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
			
 
				+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
			
 
				+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1);
			
 
				+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
			
 
				+			WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
			
 
				+	} else if (adev->asic_type == CHIP_HAINAN) {
			
 
				 		tilemode[0] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
			
 
				 				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
			
 
				 				PIPE_CONFIG(ADDR_SURF_P2) |
			
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4132,18 +4132,12 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
 
				 	gfx_v8_0_rlc_reset(adev);
			
 
				 	gfx_v8_0_init_pg(adev);
			
 
				 
			
 
				-	if (!adev->pp_enabled) {
			
 
				-		if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
			
 
				-			/* legacy rlc firmware loading */
			
 
				-			r = gfx_v8_0_rlc_load_microcode(adev);
			
 
				-			if (r)
			
 
				-				return r;
			
 
				-		} else {
			
 
				-			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
			
 
				-							AMDGPU_UCODE_ID_RLC_G);
			
 
				-			if (r)
			
 
				-				return -EINVAL;
			
 
				-		}
			
 
				+
			
 
				+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
			
 
				+		/* legacy rlc firmware loading */
			
 
				+		r = gfx_v8_0_rlc_load_microcode(adev);
			
 
				+		if (r)
			
 
				+			return r;
			
 
				 	}
			
 
				 
			
 
				 	gfx_v8_0_rlc_start(adev);
			
@@ -4959,43 +4953,15 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
 
				 	if (!(adev->flags & AMD_IS_APU))
			
 
				 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
			
 
				 
			
 
				-	if (!adev->pp_enabled) {
			
 
				-		if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
			
 
				+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
			
 
				 			/* legacy firmware loading */
			
 
				-			r = gfx_v8_0_cp_gfx_load_microcode(adev);
			
 
				-			if (r)
			
 
				-				return r;
			
 
				+		r = gfx_v8_0_cp_gfx_load_microcode(adev);
			
 
				+		if (r)
			
 
				+			return r;
			
 
				 
			
 
				-			r = gfx_v8_0_cp_compute_load_microcode(adev);
			
 
				-			if (r)
			
 
				-				return r;
			
 
				-		} else {
			
 
				-			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
			
 
				-							AMDGPU_UCODE_ID_CP_CE);
			
 
				-			if (r)
			
 
				-				return -EINVAL;
			
 
				-
			
 
				-			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
			
 
				-							AMDGPU_UCODE_ID_CP_PFP);
			
 
				-			if (r)
			
 
				-				return -EINVAL;
			
 
				-
			
 
				-			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
			
 
				-							AMDGPU_UCODE_ID_CP_ME);
			
 
				-			if (r)
			
 
				-				return -EINVAL;
			
 
				-
			
 
				-			if (adev->asic_type == CHIP_TOPAZ) {
			
 
				-				r = gfx_v8_0_cp_compute_load_microcode(adev);
			
 
				-				if (r)
			
 
				-					return r;
			
 
				-			} else {
			
 
				-				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
			
 
				-										 AMDGPU_UCODE_ID_CP_MEC1);
			
 
				-				if (r)
			
 
				-					return -EINVAL;
			
 
				-			}
			
 
				-		}
			
 
				+		r = gfx_v8_0_cp_compute_load_microcode(adev);
			
 
				+		if (r)
			
 
				+			return r;
			
 
				 	}
			
 
				 
			
 
				 	r = gfx_v8_0_cp_gfx_resume(adev);
			
@@ -6018,7 +5984,6 @@ static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
 
				 {
			
 
				 	uint32_t msg_id, pp_state = 0;
			
 
				 	uint32_t pp_support_state = 0;
			
 
				-	void *pp_handle = adev->powerplay.pp_handle;
			
 
				 
			
 
				 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
			
 
				 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
			
@@ -6036,7 +6001,8 @@ static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
 
				 				PP_BLOCK_GFX_CG,
			
 
				 				pp_support_state,
			
 
				 				pp_state);
			
 
				-		amd_set_clockgating_by_smu(pp_handle, msg_id);
			
 
				+		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
			
 
				+			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
			
 
				 	}
			
 
				 
			
 
				 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
			
@@ -6057,7 +6023,8 @@ static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
 
				 				PP_BLOCK_GFX_MG,
			
 
				 				pp_support_state,
			
 
				 				pp_state);
			
 
				-		amd_set_clockgating_by_smu(pp_handle, msg_id);
			
 
				+		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
			
 
				+			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
@@ -6069,7 +6036,6 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
 
				 
			
 
				 	uint32_t msg_id, pp_state = 0;
			
 
				 	uint32_t pp_support_state = 0;
			
 
				-	void *pp_handle = adev->powerplay.pp_handle;
			
 
				 
			
 
				 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
			
 
				 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
			
@@ -6087,7 +6053,8 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
 
				 				PP_BLOCK_GFX_CG,
			
 
				 				pp_support_state,
			
 
				 				pp_state);
			
 
				-		amd_set_clockgating_by_smu(pp_handle, msg_id);
			
 
				+		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
			
 
				+			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
			
 
				 	}
			
 
				 
			
 
				 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
			
@@ -6106,7 +6073,8 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
 
				 				PP_BLOCK_GFX_3D,
			
 
				 				pp_support_state,
			
 
				 				pp_state);
			
 
				-		amd_set_clockgating_by_smu(pp_handle, msg_id);
			
 
				+		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
			
 
				+			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
			
 
				 	}
			
 
				 
			
 
				 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
			
@@ -6127,7 +6095,8 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
 
				 				PP_BLOCK_GFX_MG,
			
 
				 				pp_support_state,
			
 
				 				pp_state);
			
 
				-		amd_set_clockgating_by_smu(pp_handle, msg_id);
			
 
				+		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
			
 
				+			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
			
 
				 	}
			
 
				 
			
 
				 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
			
@@ -6142,7 +6111,8 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
 
				 				PP_BLOCK_GFX_RLC,
			
 
				 				pp_support_state,
			
 
				 				pp_state);
			
 
				-		amd_set_clockgating_by_smu(pp_handle, msg_id);
			
 
				+		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
			
 
				+			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
			
 
				 	}
			
 
				 
			
 
				 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
			
@@ -6156,7 +6126,8 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
 
				 			PP_BLOCK_GFX_CP,
			
 
				 			pp_support_state,
			
 
				 			pp_state);
			
 
				-		amd_set_clockgating_by_smu(pp_handle, msg_id);
			
 
				+		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
			
 
				+			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
@@ -7076,7 +7047,7 @@ static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
 
				 {
			
 
				 	uint64_t ce_payload_addr;
			
 
				 	int cnt_ce;
			
 
				-	static union {
			
 
				+	union {
			
 
				 		struct vi_ce_ib_state regular;
			
 
				 		struct vi_ce_ib_state_chained_ib chained;
			
 
				 	} ce_payload = {};
			
@@ -7105,7 +7076,7 @@ static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
 
				 {
			
 
				 	uint64_t de_payload_addr, gds_addr, csa_addr;
			
 
				 	int cnt_de;
			
 
				-	static union {
			
 
				+	union {
			
 
				 		struct vi_de_ib_state regular;
			
 
				 		struct vi_de_ib_state_chained_ib chained;
			
 
				 	} de_payload = {};
			
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3583,7 +3583,7 @@ static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
 
				 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
			
 
				 {
			
 
				 	u32 ref_and_mask, reg_mem_engine;
			
 
				-	struct nbio_hdp_flush_reg *nbio_hf_reg;
			
 
				+	const struct nbio_hdp_flush_reg *nbio_hf_reg;
			
 
				 
			
 
				 	if (ring->adev->flags & AMD_IS_APU)
			
 
				 		nbio_hf_reg = &nbio_v7_0_hdp_flush_reg;
			
@@ -3806,7 +3806,7 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
 
				 
			
 
				 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
			
 
				 {
			
 
				-	static struct v9_ce_ib_state ce_payload = {0};
			
 
				+	struct v9_ce_ib_state ce_payload = {0};
			
 
				 	uint64_t csa_addr;
			
 
				 	int cnt;
			
 
				 
			
@@ -3825,7 +3825,7 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
 
				 
			
 
				 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
			
 
				 {
			
 
				-	static struct v9_de_ib_state de_payload = {0};
			
 
				+	struct v9_de_ib_state de_payload = {0};
			
 
				 	uint64_t csa_addr, gds_addr;
			
 
				 	int cnt;
			
 
				 
			
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -216,8 +216,23 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev)
 
				  */
			
 
				 static bool iceland_ih_prescreen_iv(struct amdgpu_device *adev)
			
 
				 {
			
 
				-	/* Process all interrupts */
			
 
				-	return true;
			
 
				+	u32 ring_index = adev->irq.ih.rptr >> 2;
			
 
				+	u16 pasid;
			
 
				+
			
 
				+	switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
			
 
				+	case 146:
			
 
				+	case 147:
			
 
				+		pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
			
 
				+		if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
			
 
				+			return true;
			
 
				+		break;
			
 
				+	default:
			
 
				+		/* Not a VM fault */
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	adev->irq.ih.rptr += 16;
			
 
				+	return false;
			
 
				 }
			
 
				 
			
 
				 /**
			
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -2969,16 +2969,10 @@ static int kv_dpm_late_init(void *handle)
 
				 {
			
 
				 	/* powerdown unused blocks for now */
			
 
				 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
			
 
				-	int ret;
			
 
				 
			
 
				 	if (!amdgpu_dpm)
			
 
				 		return 0;
			
 
				 
			
 
				-	/* init the sysfs and debugfs files late */
			
 
				-	ret = amdgpu_pm_sysfs_init(adev);
			
 
				-	if (ret)
			
 
				-		return ret;
			
 
				-
			
 
				 	kv_dpm_powergate_acp(adev, true);
			
 
				 	kv_dpm_powergate_samu(adev, true);
			
 
				 
			
@@ -3040,7 +3034,6 @@ static int kv_dpm_sw_fini(void *handle)
 
				 	flush_work(&adev->pm.dpm.thermal.work);
			
 
				 
			
 
				 	mutex_lock(&adev->pm.mutex);
			
 
				-	amdgpu_pm_sysfs_fini(adev);
			
 
				 	kv_dpm_fini(adev);
			
 
				 	mutex_unlock(&adev->pm.mutex);
			
 
				 
			
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -215,31 +215,27 @@ void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, u32 *flags)
 
				 		*flags |= AMD_CG_SUPPORT_BIF_LS;
			
 
				 }
			
 
				 
			
 
				-struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg;
			
 
				-struct nbio_pcie_index_data nbio_v6_1_pcie_index_data;
			
 
				+const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = {
			
 
				+	.hdp_flush_req_offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_REQ),
			
 
				+	.hdp_flush_done_offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_DONE),
			
 
				+	.ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK,
			
 
				+	.ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK,
			
 
				+	.ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK,
			
 
				+	.ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK,
			
 
				+	.ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK,
			
 
				+	.ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK,
			
 
				+	.ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK,
			
 
				+	.ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK,
			
 
				+	.ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK,
			
 
				+	.ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK,
			
 
				+	.ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
			
 
				+	.ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK
			
 
				+};
			
 
				 
			
 
				-int nbio_v6_1_init(struct amdgpu_device *adev)
			
 
				-{
			
 
				-	nbio_v6_1_hdp_flush_reg.hdp_flush_req_offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_REQ);
			
 
				-	nbio_v6_1_hdp_flush_reg.hdp_flush_done_offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_DONE);
			
 
				-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK;
			
 
				-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK;
			
 
				-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK;
			
 
				-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK;
			
 
				-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK;
			
 
				-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK;
			
 
				-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK;
			
 
				-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK;
			
 
				-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK;
			
 
				-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK;
			
 
				-	nbio_v6_1_hdp_flush_reg.ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK;
			
 
				-	nbio_v6_1_hdp_flush_reg.ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK;
			
 
				-
			
 
				-	nbio_v6_1_pcie_index_data.index_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX);
			
 
				-	nbio_v6_1_pcie_index_data.data_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				+const struct nbio_pcie_index_data nbio_v6_1_pcie_index_data = {
			
 
				+	.index_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX),
			
 
				+	.data_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA),
			
 
				+};
			
 
				 
			
 
				 void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev)
			
 
				 {
			
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
@@ -26,8 +26,8 @@
 
				 
			
 
				 #include "soc15_common.h"
			
 
				 
			
 
				-extern struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg;
			
 
				-extern struct nbio_pcie_index_data nbio_v6_1_pcie_index_data;
			
 
				+extern const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg;
			
 
				+extern const struct nbio_pcie_index_data nbio_v6_1_pcie_index_data;
			
 
				 int nbio_v6_1_init(struct amdgpu_device *adev);
			
 
				 u32 nbio_v6_1_get_atombios_scratch_regs(struct amdgpu_device *adev,
			
 
				                                         uint32_t idx);
			
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -185,28 +185,24 @@ void nbio_v7_0_ih_control(struct amdgpu_device *adev)
 
				 	WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl);
			
 
				 }
			
 
				 
			
 
				-struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg;
			
 
				-struct nbio_pcie_index_data nbio_v7_0_pcie_index_data;
			
 
				+const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = {
			
 
				+	.hdp_flush_req_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ),
			
 
				+	.hdp_flush_done_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE),
			
 
				+	.ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK,
			
 
				+	.ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK,
			
 
				+	.ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK,
			
 
				+	.ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK,
			
 
				+	.ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK,
			
 
				+	.ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK,
			
 
				+	.ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK,
			
 
				+	.ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK,
			
 
				+	.ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK,
			
 
				+	.ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK,
			
 
				+	.ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK,
			
 
				+	.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
			
 
				+};
			
 
				 
			
 
				-int nbio_v7_0_init(struct amdgpu_device *adev)
			
 
				-{
			
 
				-	nbio_v7_0_hdp_flush_reg.hdp_flush_req_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ);
			
 
				-	nbio_v7_0_hdp_flush_reg.hdp_flush_done_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE);
			
 
				-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK;
			
 
				-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK;
			
 
				-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK;
			
 
				-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK;
			
 
				-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK;
			
 
				-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK;
			
 
				-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK;
			
 
				-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK;
			
 
				-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK;
			
 
				-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK;
			
 
				-	nbio_v7_0_hdp_flush_reg.ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK;
			
 
				-	nbio_v7_0_hdp_flush_reg.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK;
			
 
				-
			
 
				-	nbio_v7_0_pcie_index_data.index_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2);
			
 
				-	nbio_v7_0_pcie_index_data.data_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				+const struct nbio_pcie_index_data nbio_v7_0_pcie_index_data = {
			
 
				+	.index_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2),
			
 
				+	.data_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2)
			
 
				+};
			
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h
@@ -26,8 +26,8 @@
 
				 
			
 
				 #include "soc15_common.h"
			
 
				 
			
 
				-extern struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg;
			
 
				-extern struct nbio_pcie_index_data nbio_v7_0_pcie_index_data;
			
 
				+extern const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg;
			
 
				+extern const struct nbio_pcie_index_data nbio_v7_0_pcie_index_data;
			
 
				 int nbio_v7_0_init(struct amdgpu_device *adev);
			
 
				 u32 nbio_v7_0_get_atombios_scratch_regs(struct amdgpu_device *adev,
			
 
				                                         uint32_t idx);
			
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -561,21 +561,11 @@ static int sdma_v2_4_start(struct amdgpu_device *adev)
 
				 {
			
 
				 	int r;
			
 
				 
			
 
				-	if (!adev->pp_enabled) {
			
 
				-		if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
			
 
				-			r = sdma_v2_4_load_microcode(adev);
			
 
				-			if (r)
			
 
				-				return r;
			
 
				-		} else {
			
 
				-			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
			
 
				-							AMDGPU_UCODE_ID_SDMA0);
			
 
				-			if (r)
			
 
				-				return -EINVAL;
			
 
				-			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
			
 
				-							AMDGPU_UCODE_ID_SDMA1);
			
 
				-			if (r)
			
 
				-				return -EINVAL;
			
 
				-		}
			
 
				+
			
 
				+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
			
 
				+		r = sdma_v2_4_load_microcode(adev);
			
 
				+		if (r)
			
 
				+			return r;
			
 
				 	}
			
 
				 
			
 
				 	/* halt the engine before programing */
			
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -819,23 +819,12 @@ static int sdma_v3_0_load_microcode(struct amdgpu_device *adev)
 
				  */
			
 
				 static int sdma_v3_0_start(struct amdgpu_device *adev)
			
 
				 {
			
 
				-	int r, i;
			
 
				+	int r;
			
 
				 
			
 
				-	if (!adev->pp_enabled) {
			
 
				-		if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
			
 
				-			r = sdma_v3_0_load_microcode(adev);
			
 
				-			if (r)
			
 
				-				return r;
			
 
				-		} else {
			
 
				-			for (i = 0; i < adev->sdma.num_instances; i++) {
			
 
				-				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
			
 
				-										 (i == 0) ?
			
 
				-										 AMDGPU_UCODE_ID_SDMA0 :
			
 
				-										 AMDGPU_UCODE_ID_SDMA1);
			
 
				-				if (r)
			
 
				-					return -EINVAL;
			
 
				-			}
			
 
				-		}
			
 
				+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
			
 
				+		r = sdma_v3_0_load_microcode(adev);
			
 
				+		if (r)
			
 
				+			return r;
			
 
				 	}
			
 
				 
			
 
				 	/* disable sdma engine before programing it */
			
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -371,7 +371,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
 
				 static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
			
 
				 {
			
 
				 	u32 ref_and_mask = 0;
			
 
				-	struct nbio_hdp_flush_reg *nbio_hf_reg;
			
 
				+	const struct nbio_hdp_flush_reg *nbio_hf_reg;
			
 
				 
			
 
				 	if (ring->adev->flags & AMD_IS_APU)
			
 
				 		nbio_hf_reg = &nbio_v7_0_hdp_flush_reg;