Browse Source

Merge drm-upstream/drm-next into drm-intel-next-queued

Needed for timer_setup() and drm_dev_{get,put}() conversions in i915.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Jani Nikula 7 years ago
parent
commit
526b96c4f8
100 changed files with 1441 additions and 875 deletions
  1. 0 2
      Documentation/cpu-freq/index.txt
  2. 4 0
      Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt
  3. 49 0
      Documentation/devicetree/bindings/display/bridge/sii9234.txt
  4. 49 0
      Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.txt
  5. 3 0
      Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt
  6. 18 10
      Documentation/devicetree/bindings/leds/ams,as3645a.txt
  7. 10 7
      Documentation/gpu/todo.rst
  8. 7 0
      MAINTAINERS
  9. 3 3
      Makefile
  10. 7 3
      arch/arm/boot/dts/omap3-n950-n9.dtsi
  11. 1 1
      arch/arm64/include/asm/pgtable.h
  12. 1 0
      arch/arm64/kernel/head.S
  13. 1 1
      arch/arm64/mm/fault.c
  14. 1 1
      arch/microblaze/Kconfig
  15. 1 0
      arch/microblaze/include/uapi/asm/Kbuild
  16. 1 1
      arch/microblaze/kernel/dma.c
  17. 13 1
      arch/powerpc/kvm/book3s_hv_rmhandlers.S
  18. 2 2
      arch/um/kernel/time.c
  19. 4 0
      arch/x86/events/intel/cstate.c
  20. 3 0
      arch/x86/events/intel/rapl.c
  21. 2 2
      arch/x86/events/intel/uncore_snbep.c
  22. 8 0
      arch/x86/events/msr.c
  23. 1 1
      arch/x86/ia32/ia32_signal.c
  24. 5 3
      arch/x86/include/asm/asm.h
  25. 22 68
      arch/x86/include/asm/fpu/internal.h
  26. 6 26
      arch/x86/include/asm/fpu/types.h
  27. 8 4
      arch/x86/include/asm/fpu/xstate.h
  28. 0 11
      arch/x86/include/asm/thread_info.h
  29. 4 7
      arch/x86/include/asm/trace/fpu.h
  30. 1 1
      arch/x86/include/asm/uaccess.h
  31. 2 2
      arch/x86/include/asm/xen/hypercall.h
  32. 43 112
      arch/x86/kernel/fpu/core.c
  33. 1 1
      arch/x86/kernel/fpu/init.c
  34. 26 22
      arch/x86/kernel/fpu/regset.c
  35. 21 16
      arch/x86/kernel/fpu/signal.c
  36. 213 51
      arch/x86/kernel/fpu/xstate.c
  37. 3 3
      arch/x86/kernel/irq_32.c
  38. 1 1
      arch/x86/kernel/ksysfs.c
  39. 2 1
      arch/x86/kernel/kvm.c
  40. 3 3
      arch/x86/kernel/signal.c
  41. 1 1
      arch/x86/kernel/traps.c
  42. 101 105
      arch/x86/kvm/vmx.c
  43. 1 1
      arch/x86/kvm/x86.c
  44. 1 1
      arch/x86/math-emu/fpu_entry.c
  45. 24 0
      arch/x86/mm/extable.c
  46. 24 23
      arch/x86/mm/fault.c
  47. 2 0
      arch/x86/mm/mem_encrypt.c
  48. 1 2
      arch/x86/mm/pkeys.c
  49. 1 1
      arch/x86/mm/tlb.c
  50. 4 9
      arch/x86/xen/mmu_pv.c
  51. 3 0
      block/blk-core.c
  52. 0 1
      block/bsg-lib.c
  53. 1 1
      block/partition-generic.c
  54. 9 7
      drivers/acpi/apei/ghes.c
  55. 7 0
      drivers/base/power/opp/core.c
  56. 1 1
      drivers/block/brd.c
  57. 2 4
      drivers/block/loop.h
  58. 6 0
      drivers/block/nbd.c
  59. 1 1
      drivers/clocksource/numachip.c
  60. 4 0
      drivers/cpufreq/cpufreq-dt-platdev.c
  61. 1 1
      drivers/dma-buf/dma-buf.c
  62. 42 14
      drivers/dma-buf/reservation.c
  63. 3 1
      drivers/gpu/drm/amd/amdgpu/amdgpu.h
  64. 9 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
  65. 9 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
  66. 23 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
  67. 8 8
      drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
  68. 61 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
  69. 26 11
      drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
  70. 4 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
  71. 6 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
  72. 18 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
  73. 1 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
  74. 41 42
      drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
  75. 6 54
      drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
  76. 0 9
      drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
  77. 5 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
  78. 20 5
      drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
  79. 3 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
  80. 25 29
      drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c
  81. 4 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h
  82. 13 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
  83. 35 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
  84. 6 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
  85. 0 1
      drivers/gpu/drm/amd/amdgpu/ci_dpm.c
  86. 17 2
      drivers/gpu/drm/amd/amdgpu/cik_ih.c
  87. 17 2
      drivers/gpu/drm/amd/amdgpu/cz_ih.c
  88. 2 2
      drivers/gpu/drm/amd/amdgpu/dce_virtual.c
  89. 188 1
      drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
  90. 29 58
      drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
  91. 3 3
      drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
  92. 17 2
      drivers/gpu/drm/amd/amdgpu/iceland_ih.c
  93. 0 7
      drivers/gpu/drm/amd/amdgpu/kv_dpm.c
  94. 20 24
      drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
  95. 2 2
      drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
  96. 20 24
      drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
  97. 2 2
      drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h
  98. 5 15
      drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
  99. 5 16
      drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
  100. 1 1
      drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c

+ 0 - 2
Documentation/cpu-freq/index.txt

@@ -32,8 +32,6 @@ cpufreq-stats.txt -	General description of sysfs cpufreq stats.
 
 
 index.txt	-	File index, Mailing list and Links (this document)
 index.txt	-	File index, Mailing list and Links (this document)
 
 
-intel-pstate.txt -	Intel pstate cpufreq driver specific file.
-
 pcc-cpufreq.txt -	PCC cpufreq driver specific file.
 pcc-cpufreq.txt -	PCC cpufreq driver specific file.
 
 
 
 

+ 4 - 0
Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt

@@ -68,6 +68,8 @@ Optional properties:
 - adi,disable-timing-generator: Only for ADV7533. Disables the internal timing
 - adi,disable-timing-generator: Only for ADV7533. Disables the internal timing
   generator. The chip will rely on the sync signals in the DSI data lanes,
   generator. The chip will rely on the sync signals in the DSI data lanes,
   rather than generate its own timings for HDMI output.
   rather than generate its own timings for HDMI output.
+- clocks: from common clock binding: reference to the CEC clock.
+- clock-names: from common clock binding: must be "cec".
 
 
 Required nodes:
 Required nodes:
 
 
@@ -89,6 +91,8 @@ Example
 		reg = <39>;
 		reg = <39>;
 		interrupt-parent = <&gpio3>;
 		interrupt-parent = <&gpio3>;
 		interrupts = <29 IRQ_TYPE_EDGE_FALLING>;
 		interrupts = <29 IRQ_TYPE_EDGE_FALLING>;
+		clocks = <&cec_clock>;
+		clock-names = "cec";
 
 
 		adi,input-depth = <8>;
 		adi,input-depth = <8>;
 		adi,input-colorspace = "rgb";
 		adi,input-colorspace = "rgb";

+ 49 - 0
Documentation/devicetree/bindings/display/bridge/sii9234.txt

@@ -0,0 +1,49 @@
+Silicon Image SiI9234 HDMI/MHL bridge bindings
+
+Required properties:
+	- compatible : "sil,sii9234".
+	- reg : I2C address for TPI interface, use 0x39
+	- avcc33-supply : MHL/USB Switch Supply Voltage (3.3V)
+	- iovcc18-supply : I/O Supply Voltage (1.8V)
+	- avcc12-supply : TMDS Analog Supply Voltage (1.2V)
+	- cvcc12-supply : Digital Core Supply Voltage (1.2V)
+	- interrupts, interrupt-parent: interrupt specifier of INT pin
+	- reset-gpios: gpio specifier of RESET pin (active low)
+	- video interfaces: Device node can contain two video interface port
+			    nodes for HDMI encoder and connector according to [1].
+			    - port@0 - MHL to HDMI
+			    - port@1 - MHL to connector
+
+[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
+
+
+Example:
+	sii9234@39 {
+		compatible = "sil,sii9234";
+		reg = <0x39>;
+		avcc33-supply = <&vcc33mhl>;
+		iovcc18-supply = <&vcc18mhl>;
+		avcc12-supply = <&vsil12>;
+		cvcc12-supply = <&vsil12>;
+		reset-gpios = <&gpf3 4 GPIO_ACTIVE_LOW>;
+		interrupt-parent = <&gpf3>;
+		interrupts = <5 IRQ_TYPE_LEVEL_HIGH>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
+				reg = <0>;
+				mhl_to_hdmi: endpoint {
+					remote-endpoint = <&hdmi_to_mhl>;
+				};
+			};
+			port@1 {
+				reg = <1>;
+				mhl_to_connector: endpoint {
+					remote-endpoint = <&connector_to_mhl>;
+				};
+			};
+		};
+	};

+ 49 - 0
Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.txt

@@ -0,0 +1,49 @@
+This binding covers the official 7" (800x480) Raspberry Pi touchscreen
+panel.
+
+This DSI panel contains:
+
+- TC358762 DSI->DPI bridge
+- Atmel microcontroller on I2C for power sequencing the DSI bridge and
+  controlling backlight
+- Touchscreen controller on I2C for touch input
+
+and this binding covers the DSI display parts but not its touch input.
+
+Required properties:
+- compatible:	Must be "raspberrypi,7inch-touchscreen-panel"
+- reg:		Must be "45"
+- port:		See panel-common.txt
+
+Example:
+
+dsi1: dsi@7e700000 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	<...>
+
+	port {
+		dsi_out_port: endpoint {
+			remote-endpoint = <&panel_dsi_port>;
+		};
+	};
+};
+
+i2c_dsi: i2c {
+	compatible = "i2c-gpio";
+	#address-cells = <1>;
+	#size-cells = <0>;
+	gpios = <&gpio 28 0
+		 &gpio 29 0>;
+
+	lcd@45 {
+		compatible = "raspberrypi,7inch-touchscreen-panel";
+		reg = <0x45>;
+
+		port {
+			panel_dsi_port: endpoint {
+				remote-endpoint = <&dsi_out_port>;
+			};
+		};
+	};
+};

+ 3 - 0
Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt

@@ -41,14 +41,17 @@ CEC. It is one end of the pipeline.
 Required properties:
 Required properties:
   - compatible: value must be one of:
   - compatible: value must be one of:
     * allwinner,sun5i-a10s-hdmi
     * allwinner,sun5i-a10s-hdmi
+    * allwinner,sun6i-a31-hdmi
   - reg: base address and size of memory-mapped region
   - reg: base address and size of memory-mapped region
   - interrupts: interrupt associated to this IP
   - interrupts: interrupt associated to this IP
   - clocks: phandles to the clocks feeding the HDMI encoder
   - clocks: phandles to the clocks feeding the HDMI encoder
     * ahb: the HDMI interface clock
     * ahb: the HDMI interface clock
     * mod: the HDMI module clock
     * mod: the HDMI module clock
+    * ddc: the HDMI ddc clock (A31 only)
     * pll-0: the first video PLL
     * pll-0: the first video PLL
     * pll-1: the second video PLL
     * pll-1: the second video PLL
   - clock-names: the clock names mentioned above
   - clock-names: the clock names mentioned above
+  - resets: phandle to the reset control for the HDMI encoder (A31 only)
   - dmas: phandles to the DMA channels used by the HDMI encoder
   - dmas: phandles to the DMA channels used by the HDMI encoder
     * ddc-tx: The channel for DDC transmission
     * ddc-tx: The channel for DDC transmission
     * ddc-rx: The channel for DDC reception
     * ddc-rx: The channel for DDC reception

+ 18 - 10
Documentation/devicetree/bindings/leds/ams,as3645a.txt

@@ -15,11 +15,14 @@ Required properties
 
 
 compatible	: Must be "ams,as3645a".
 compatible	: Must be "ams,as3645a".
 reg		: The I2C address of the device. Typically 0x30.
 reg		: The I2C address of the device. Typically 0x30.
+#address-cells	: 1
+#size-cells	: 0
 
 
 
 
-Required properties of the "flash" child node
-=============================================
+Required properties of the flash child node (0)
+===============================================
 
 
+reg: 0
 flash-timeout-us: Flash timeout in microseconds. The value must be in
 flash-timeout-us: Flash timeout in microseconds. The value must be in
 		  the range [100000, 850000] and divisible by 50000.
 		  the range [100000, 850000] and divisible by 50000.
 flash-max-microamp: Maximum flash current in microamperes. Has to be
 flash-max-microamp: Maximum flash current in microamperes. Has to be
@@ -33,20 +36,21 @@ ams,input-max-microamp: Maximum flash controller input current. The
 			and divisible by 50000.
 			and divisible by 50000.
 
 
 
 
-Optional properties of the "flash" child node
-=============================================
+Optional properties of the flash child node
+===========================================
 
 
 label		: The label of the flash LED.
 label		: The label of the flash LED.
 
 
 
 
-Required properties of the "indicator" child node
-=================================================
+Required properties of the indicator child node (1)
+===================================================
 
 
+reg: 1
 led-max-microamp: Maximum indicator current. The allowed values are
 led-max-microamp: Maximum indicator current. The allowed values are
 		  2500, 5000, 7500 and 10000.
 		  2500, 5000, 7500 and 10000.
 
 
-Optional properties of the "indicator" child node
-=================================================
+Optional properties of the indicator child node
+===============================================
 
 
 label		: The label of the indicator LED.
 label		: The label of the indicator LED.
 
 
@@ -55,16 +59,20 @@ Example
 =======
 =======
 
 
 	as3645a@30 {
 	as3645a@30 {
+		#address-cells: 1
+		#size-cells: 0
 		reg = <0x30>;
 		reg = <0x30>;
 		compatible = "ams,as3645a";
 		compatible = "ams,as3645a";
-		flash {
+		flash@0 {
+			reg = <0x0>;
 			flash-timeout-us = <150000>;
 			flash-timeout-us = <150000>;
 			flash-max-microamp = <320000>;
 			flash-max-microamp = <320000>;
 			led-max-microamp = <60000>;
 			led-max-microamp = <60000>;
 			ams,input-max-microamp = <1750000>;
 			ams,input-max-microamp = <1750000>;
 			label = "as3645a:flash";
 			label = "as3645a:flash";
 		};
 		};
-		indicator {
+		indicator@1 {
+			reg = <0x1>;
 			led-max-microamp = <10000>;
 			led-max-microamp = <10000>;
 			label = "as3645a:indicator";
 			label = "as3645a:indicator";
 		};
 		};

+ 10 - 7
Documentation/gpu/todo.rst

@@ -184,12 +184,6 @@ Contact: Sean Paul, Maintainer of the driver you plan to convert
 Core refactorings
 Core refactorings
 =================
 =================
 
 
-Use new IDR deletion interface to clean up drm_gem_handle_delete()
-------------------------------------------------------------------
-
-See the "This is gross" comment -- apparently the IDR system now can return an
-error code instead of oopsing.
-
 Clean up the DRM header mess
 Clean up the DRM header mess
 ----------------------------
 ----------------------------
 
 
@@ -357,7 +351,16 @@ those drivers as simple as possible, so lots of room for refactoring:
 - backlight helpers, probably best to put them into a new drm_backlight.c.
 - backlight helpers, probably best to put them into a new drm_backlight.c.
   This is because drivers/video is de-facto unmaintained. We could also
   This is because drivers/video is de-facto unmaintained. We could also
   move drivers/video/backlight to drivers/gpu/backlight and take it all
   move drivers/video/backlight to drivers/gpu/backlight and take it all
-  over within drm-misc, but that's more work.
+  over within drm-misc, but that's more work. Backlight helpers require a fair
+  bit of reworking and refactoring. A simple example is the enabling of a backlight.
+  Tinydrm has helpers for this. It would be good if other drivers can also use the
+  helper. However, there are various cases we need to consider i.e different
+  drivers seem to have different ways of enabling/disabling a backlight.
+  We also need to consider the backlight drivers (like gpio_backlight). The situation
+  is further complicated by the fact that the backlight is tied to fbdev
+  via fb_notifier_callback() which has complicated logic. For further details, refer
+  to the following discussion thread:
+  https://groups.google.com/forum/#!topic/outreachy-kernel/8rBe30lwtdA
 
 
 - spi helpers, probably best put into spi core/helper code. Thierry said
 - spi helpers, probably best put into spi core/helper code. Thierry said
   the spi maintainer is fast&reactive, so shouldn't be a big issue.
   the spi maintainer is fast&reactive, so shouldn't be a big issue.

+ 7 - 0
MAINTAINERS

@@ -5461,6 +5461,7 @@ F:	drivers/net/wan/sdla.c
 
 
 FRAMEBUFFER LAYER
 FRAMEBUFFER LAYER
 M:	Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
 M:	Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
+L:	dri-devel@lists.freedesktop.org
 L:	linux-fbdev@vger.kernel.org
 L:	linux-fbdev@vger.kernel.org
 T:	git git://github.com/bzolnier/linux.git
 T:	git git://github.com/bzolnier/linux.git
 Q:	http://patchwork.kernel.org/project/linux-fbdev/list/
 Q:	http://patchwork.kernel.org/project/linux-fbdev/list/
@@ -8603,6 +8604,12 @@ M:	Sean Wang <sean.wang@mediatek.com>
 S:	Maintained
 S:	Maintained
 F:	drivers/media/rc/mtk-cir.c
 F:	drivers/media/rc/mtk-cir.c
 
 
+MEDIATEK PMIC LED DRIVER
+M:	Sean Wang <sean.wang@mediatek.com>
+S:	Maintained
+F:	drivers/leds/leds-mt6323.c
+F:	Documentation/devicetree/bindings/leds/leds-mt6323.txt
+
 MEDIATEK ETHERNET DRIVER
 MEDIATEK ETHERNET DRIVER
 M:	Felix Fietkau <nbd@openwrt.org>
 M:	Felix Fietkau <nbd@openwrt.org>
 M:	John Crispin <john@phrozen.org>
 M:	John Crispin <john@phrozen.org>

+ 3 - 3
Makefile

@@ -1,7 +1,7 @@
 VERSION = 4
 VERSION = 4
 PATCHLEVEL = 14
 PATCHLEVEL = 14
 SUBLEVEL = 0
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc3
 NAME = Fearless Coyote
 NAME = Fearless Coyote
 
 
 # *DOCUMENTATION*
 # *DOCUMENTATION*
@@ -1172,11 +1172,11 @@ headers_check: headers_install
 
 
 PHONY += kselftest
 PHONY += kselftest
 kselftest:
 kselftest:
-	$(Q)$(MAKE) -C tools/testing/selftests run_tests
+	$(Q)$(MAKE) -C $(srctree)/tools/testing/selftests run_tests
 
 
 PHONY += kselftest-clean
 PHONY += kselftest-clean
 kselftest-clean:
 kselftest-clean:
-	$(Q)$(MAKE) -C tools/testing/selftests clean
+	$(Q)$(MAKE) -C $(srctree)/tools/testing/selftests clean
 
 
 PHONY += kselftest-merge
 PHONY += kselftest-merge
 kselftest-merge:
 kselftest-merge:

+ 7 - 3
arch/arm/boot/dts/omap3-n950-n9.dtsi

@@ -267,15 +267,19 @@
 	clock-frequency = <400000>;
 	clock-frequency = <400000>;
 
 
 	as3645a@30 {
 	as3645a@30 {
+		#address-cells = <1>;
+		#size-cells = <0>;
 		reg = <0x30>;
 		reg = <0x30>;
 		compatible = "ams,as3645a";
 		compatible = "ams,as3645a";
-		flash {
+		flash@0 {
+			reg = <0x0>;
 			flash-timeout-us = <150000>;
 			flash-timeout-us = <150000>;
 			flash-max-microamp = <320000>;
 			flash-max-microamp = <320000>;
 			led-max-microamp = <60000>;
 			led-max-microamp = <60000>;
-			peak-current-limit = <1750000>;
+			ams,input-max-microamp = <1750000>;
 		};
 		};
-		indicator {
+		indicator@1 {
+			reg = <0x1>;
 			led-max-microamp = <10000>;
 			led-max-microamp = <10000>;
 		};
 		};
 	};
 	};

+ 1 - 1
arch/arm64/include/asm/pgtable.h

@@ -401,7 +401,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 /* Find an entry in the third-level page table. */
 /* Find an entry in the third-level page table. */
 #define pte_index(addr)		(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 #define pte_index(addr)		(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 
 
-#define pte_offset_phys(dir,addr)	(pmd_page_paddr(*(dir)) + pte_index(addr) * sizeof(pte_t))
+#define pte_offset_phys(dir,addr)	(pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t))
 #define pte_offset_kernel(dir,addr)	((pte_t *)__va(pte_offset_phys((dir), (addr))))
 #define pte_offset_kernel(dir,addr)	((pte_t *)__va(pte_offset_phys((dir), (addr))))
 
 
 #define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
 #define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))

+ 1 - 0
arch/arm64/kernel/head.S

@@ -384,6 +384,7 @@ ENTRY(kimage_vaddr)
  * booted in EL1 or EL2 respectively.
  * booted in EL1 or EL2 respectively.
  */
  */
 ENTRY(el2_setup)
 ENTRY(el2_setup)
+	msr	SPsel, #1			// We want to use SP_EL{1,2}
 	mrs	x0, CurrentEL
 	mrs	x0, CurrentEL
 	cmp	x0, #CurrentEL_EL2
 	cmp	x0, #CurrentEL_EL2
 	b.eq	1f
 	b.eq	1f

+ 1 - 1
arch/arm64/mm/fault.c

@@ -651,7 +651,7 @@ static const struct fault_info fault_info[] = {
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 0 translation fault"	},
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 0 translation fault"	},
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 1 translation fault"	},
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 1 translation fault"	},
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 2 translation fault"	},
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 2 translation fault"	},
-	{ do_page_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	},
+	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	},
 	{ do_bad,		SIGBUS,  0,		"unknown 8"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 8"			},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 1 access flag fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 1 access flag fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},

+ 1 - 1
arch/microblaze/Kconfig

@@ -39,7 +39,7 @@ config MICROBLAZE
 # Endianness selection
 # Endianness selection
 choice
 choice
 	prompt "Endianness selection"
 	prompt "Endianness selection"
-	default CPU_BIG_ENDIAN
+	default CPU_LITTLE_ENDIAN
 	help
 	help
 	  microblaze architectures can be configured for either little or
 	  microblaze architectures can be configured for either little or
 	  big endian formats. Be sure to select the appropriate mode.
 	  big endian formats. Be sure to select the appropriate mode.

+ 1 - 0
arch/microblaze/include/uapi/asm/Kbuild

@@ -7,6 +7,7 @@ generic-y += fcntl.h
 generic-y += ioctl.h
 generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += ipcbuf.h
+generic-y += kvm_para.h
 generic-y += mman.h
 generic-y += mman.h
 generic-y += msgbuf.h
 generic-y += msgbuf.h
 generic-y += param.h
 generic-y += param.h

+ 1 - 1
arch/microblaze/kernel/dma.c

@@ -165,7 +165,7 @@ int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
 			     unsigned long attrs)
 			     unsigned long attrs)
 {
 {
 #ifdef CONFIG_MMU
 #ifdef CONFIG_MMU
-	unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	unsigned long user_count = vma_pages(vma);
 	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	unsigned long off = vma->vm_pgoff;
 	unsigned long off = vma->vm_pgoff;
 	unsigned long pfn;
 	unsigned long pfn;

+ 13 - 1
arch/powerpc/kvm/book3s_hv_rmhandlers.S

@@ -1121,6 +1121,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 BEGIN_FTR_SECTION
 BEGIN_FTR_SECTION
 	mtspr	SPRN_PPR, r0
 	mtspr	SPRN_PPR, r0
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+/* Move canary into DSISR to check for later */
+BEGIN_FTR_SECTION
+	li	r0, 0x7fff
+	mtspr	SPRN_HDSISR, r0
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
 	ld	r0, VCPU_GPR(R0)(r4)
 	ld	r0, VCPU_GPR(R0)(r4)
 	ld	r4, VCPU_GPR(R4)(r4)
 	ld	r4, VCPU_GPR(R4)(r4)
 
 
@@ -1956,9 +1963,14 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 kvmppc_hdsi:
 kvmppc_hdsi:
 	ld	r3, VCPU_KVM(r9)
 	ld	r3, VCPU_KVM(r9)
 	lbz	r0, KVM_RADIX(r3)
 	lbz	r0, KVM_RADIX(r3)
-	cmpwi	r0, 0
 	mfspr	r4, SPRN_HDAR
 	mfspr	r4, SPRN_HDAR
 	mfspr	r6, SPRN_HDSISR
 	mfspr	r6, SPRN_HDSISR
+BEGIN_FTR_SECTION
+	/* Look for DSISR canary. If we find it, retry instruction */
+	cmpdi	r6, 0x7fff
+	beq	6f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+	cmpwi	r0, 0
 	bne	.Lradix_hdsi		/* on radix, just save DAR/DSISR/ASDR */
 	bne	.Lradix_hdsi		/* on radix, just save DAR/DSISR/ASDR */
 	/* HPTE not found fault or protection fault? */
 	/* HPTE not found fault or protection fault? */
 	andis.	r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
 	andis.	r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h

+ 2 - 2
arch/um/kernel/time.c

@@ -98,7 +98,7 @@ static struct clocksource timer_clocksource = {
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 };
 
 
-static void __init timer_setup(void)
+static void __init um_timer_setup(void)
 {
 {
 	int err;
 	int err;
 
 
@@ -132,5 +132,5 @@ void read_persistent_clock(struct timespec *ts)
 void __init time_init(void)
 void __init time_init(void)
 {
 {
 	timer_set_signal_handler();
 	timer_set_signal_handler();
-	late_time_init = timer_setup;
+	late_time_init = um_timer_setup;
 }
 }

+ 4 - 0
arch/x86/events/intel/cstate.c

@@ -552,6 +552,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 
 
 	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE,  snb_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE,  snb_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates),
 
 
 	X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE,  snb_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE,  snb_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates),
@@ -560,6 +561,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 	X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
 
 
 	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_DENVERTON, glm_cstates),
+
+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GEMINI_LAKE, glm_cstates),
 	{ },
 	{ },
 };
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);

+ 3 - 0
arch/x86/events/intel/rapl.c

@@ -775,6 +775,9 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init),
 
 
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init),
+
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GEMINI_LAKE, hsw_rapl_init),
 	{},
 	{},
 };
 };
 
 

+ 2 - 2
arch/x86/events/intel/uncore_snbep.c

@@ -3462,7 +3462,7 @@ static struct intel_uncore_ops skx_uncore_iio_ops = {
 static struct intel_uncore_type skx_uncore_iio = {
 static struct intel_uncore_type skx_uncore_iio = {
 	.name			= "iio",
 	.name			= "iio",
 	.num_counters		= 4,
 	.num_counters		= 4,
-	.num_boxes		= 5,
+	.num_boxes		= 6,
 	.perf_ctr_bits		= 48,
 	.perf_ctr_bits		= 48,
 	.event_ctl		= SKX_IIO0_MSR_PMON_CTL0,
 	.event_ctl		= SKX_IIO0_MSR_PMON_CTL0,
 	.perf_ctr		= SKX_IIO0_MSR_PMON_CTR0,
 	.perf_ctr		= SKX_IIO0_MSR_PMON_CTR0,
@@ -3492,7 +3492,7 @@ static const struct attribute_group skx_uncore_format_group = {
 static struct intel_uncore_type skx_uncore_irp = {
 static struct intel_uncore_type skx_uncore_irp = {
 	.name			= "irp",
 	.name			= "irp",
 	.num_counters		= 2,
 	.num_counters		= 2,
-	.num_boxes		= 5,
+	.num_boxes		= 6,
 	.perf_ctr_bits		= 48,
 	.perf_ctr_bits		= 48,
 	.event_ctl		= SKX_IRP0_MSR_PMON_CTL0,
 	.event_ctl		= SKX_IRP0_MSR_PMON_CTL0,
 	.perf_ctr		= SKX_IRP0_MSR_PMON_CTR0,
 	.perf_ctr		= SKX_IRP0_MSR_PMON_CTR0,

+ 8 - 0
arch/x86/events/msr.c

@@ -63,6 +63,14 @@ static bool test_intel(int idx)
 	case INTEL_FAM6_ATOM_SILVERMONT1:
 	case INTEL_FAM6_ATOM_SILVERMONT1:
 	case INTEL_FAM6_ATOM_SILVERMONT2:
 	case INTEL_FAM6_ATOM_SILVERMONT2:
 	case INTEL_FAM6_ATOM_AIRMONT:
 	case INTEL_FAM6_ATOM_AIRMONT:
+
+	case INTEL_FAM6_ATOM_GOLDMONT:
+	case INTEL_FAM6_ATOM_DENVERTON:
+
+	case INTEL_FAM6_ATOM_GEMINI_LAKE:
+
+	case INTEL_FAM6_XEON_PHI_KNL:
+	case INTEL_FAM6_XEON_PHI_KNM:
 		if (idx == PERF_MSR_SMI)
 		if (idx == PERF_MSR_SMI)
 			return true;
 			return true;
 		break;
 		break;

+ 1 - 1
arch/x86/ia32/ia32_signal.c

@@ -231,7 +231,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
 		 ksig->ka.sa.sa_restorer)
 		 ksig->ka.sa.sa_restorer)
 		sp = (unsigned long) ksig->ka.sa.sa_restorer;
 		sp = (unsigned long) ksig->ka.sa.sa_restorer;
 
 
-	if (fpu->fpstate_active) {
+	if (fpu->initialized) {
 		unsigned long fx_aligned, math_size;
 		unsigned long fx_aligned, math_size;
 
 
 		sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
 		sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);

+ 5 - 3
arch/x86/include/asm/asm.h

@@ -11,10 +11,12 @@
 # define __ASM_FORM_COMMA(x) " " #x ","
 # define __ASM_FORM_COMMA(x) " " #x ","
 #endif
 #endif
 
 
-#ifdef CONFIG_X86_32
+#ifndef __x86_64__
+/* 32 bit */
 # define __ASM_SEL(a,b) __ASM_FORM(a)
 # define __ASM_SEL(a,b) __ASM_FORM(a)
 # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
 # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
 #else
 #else
+/* 64 bit */
 # define __ASM_SEL(a,b) __ASM_FORM(b)
 # define __ASM_SEL(a,b) __ASM_FORM(b)
 # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
 # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
 #endif
 #endif
@@ -139,8 +141,8 @@
  * gets set up by the containing function.  If you forget to do this, objtool
  * gets set up by the containing function.  If you forget to do this, objtool
  * may print a "call without frame pointer save/setup" warning.
  * may print a "call without frame pointer save/setup" warning.
  */
  */
-register unsigned int __asm_call_sp asm("esp");
-#define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp)
+register unsigned long current_stack_pointer asm(_ASM_SP);
+#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
 #endif
 #endif
 
 
 #endif /* _ASM_X86_ASM_H */
 #endif /* _ASM_X86_ASM_H */

+ 22 - 68
arch/x86/include/asm/fpu/internal.h

@@ -23,11 +23,9 @@
 /*
 /*
  * High level FPU state handling functions:
  * High level FPU state handling functions:
  */
  */
-extern void fpu__activate_curr(struct fpu *fpu);
-extern void fpu__activate_fpstate_read(struct fpu *fpu);
-extern void fpu__activate_fpstate_write(struct fpu *fpu);
-extern void fpu__current_fpstate_write_begin(void);
-extern void fpu__current_fpstate_write_end(void);
+extern void fpu__initialize(struct fpu *fpu);
+extern void fpu__prepare_read(struct fpu *fpu);
+extern void fpu__prepare_write(struct fpu *fpu);
 extern void fpu__save(struct fpu *fpu);
 extern void fpu__save(struct fpu *fpu);
 extern void fpu__restore(struct fpu *fpu);
 extern void fpu__restore(struct fpu *fpu);
 extern int  fpu__restore_sig(void __user *buf, int ia32_frame);
 extern int  fpu__restore_sig(void __user *buf, int ia32_frame);
@@ -120,20 +118,11 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu);
 	err;								\
 	err;								\
 })
 })
 
 
-#define check_insn(insn, output, input...)				\
-({									\
-	int err;							\
+#define kernel_insn(insn, output, input...)				\
 	asm volatile("1:" #insn "\n\t"					\
 	asm volatile("1:" #insn "\n\t"					\
 		     "2:\n"						\
 		     "2:\n"						\
-		     ".section .fixup,\"ax\"\n"				\
-		     "3:  movl $-1,%[err]\n"				\
-		     "    jmp  2b\n"					\
-		     ".previous\n"					\
-		     _ASM_EXTABLE(1b, 3b)				\
-		     : [err] "=r" (err), output				\
-		     : "0"(0), input);					\
-	err;								\
-})
+		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore)	\
+		     : output : input)
 
 
 static inline int copy_fregs_to_user(struct fregs_state __user *fx)
 static inline int copy_fregs_to_user(struct fregs_state __user *fx)
 {
 {
@@ -153,20 +142,16 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
 
 
 static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
 static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
 {
 {
-	int err;
-
 	if (IS_ENABLED(CONFIG_X86_32)) {
 	if (IS_ENABLED(CONFIG_X86_32)) {
-		err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+		kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
 	} else {
 	} else {
 		if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
 		if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
-			err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
+			kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
 		} else {
 		} else {
 			/* See comment in copy_fxregs_to_kernel() below. */
 			/* See comment in copy_fxregs_to_kernel() below. */
-			err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
+			kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
 		}
 		}
 	}
 	}
-	/* Copying from a kernel buffer to FPU registers should never fail: */
-	WARN_ON_FPU(err);
 }
 }
 
 
 static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
 static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
@@ -183,9 +168,7 @@ static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
 
 
 static inline void copy_kernel_to_fregs(struct fregs_state *fx)
 static inline void copy_kernel_to_fregs(struct fregs_state *fx)
 {
 {
-	int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-
-	WARN_ON_FPU(err);
+	kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
 }
 }
 
 
 static inline int copy_user_to_fregs(struct fregs_state __user *fx)
 static inline int copy_user_to_fregs(struct fregs_state __user *fx)
@@ -281,18 +264,13 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
  * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
  * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
  * XSAVE area format.
  * XSAVE area format.
  */
  */
-#define XSTATE_XRESTORE(st, lmask, hmask, err)				\
+#define XSTATE_XRESTORE(st, lmask, hmask)				\
 	asm volatile(ALTERNATIVE(XRSTOR,				\
 	asm volatile(ALTERNATIVE(XRSTOR,				\
 				 XRSTORS, X86_FEATURE_XSAVES)		\
 				 XRSTORS, X86_FEATURE_XSAVES)		\
 		     "\n"						\
 		     "\n"						\
-		     "xor %[err], %[err]\n"				\
 		     "3:\n"						\
 		     "3:\n"						\
-		     ".pushsection .fixup,\"ax\"\n"			\
-		     "4: movl $-2, %[err]\n"				\
-		     "jmp 3b\n"						\
-		     ".popsection\n"					\
-		     _ASM_EXTABLE(661b, 4b)				\
-		     : [err] "=r" (err)					\
+		     _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\
+		     :							\
 		     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)	\
 		     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)	\
 		     : "memory")
 		     : "memory")
 
 
@@ -336,7 +314,10 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
 	else
 	else
 		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
 		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
 
 
-	/* We should never fault when copying from a kernel buffer: */
+	/*
+	 * We should never fault when copying from a kernel buffer, and the FPU
+	 * state we set at boot time should be valid.
+	 */
 	WARN_ON_FPU(err);
 	WARN_ON_FPU(err);
 }
 }
 
 
@@ -350,7 +331,7 @@ static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
 	u32 hmask = mask >> 32;
 	u32 hmask = mask >> 32;
 	int err;
 	int err;
 
 
-	WARN_ON(!alternatives_patched);
+	WARN_ON_FPU(!alternatives_patched);
 
 
 	XSTATE_XSAVE(xstate, lmask, hmask, err);
 	XSTATE_XSAVE(xstate, lmask, hmask, err);
 
 
@@ -365,12 +346,8 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
 {
 {
 	u32 lmask = mask;
 	u32 lmask = mask;
 	u32 hmask = mask >> 32;
 	u32 hmask = mask >> 32;
-	int err;
-
-	XSTATE_XRESTORE(xstate, lmask, hmask, err);
 
 
-	/* We should never fault when copying from a kernel buffer: */
-	WARN_ON_FPU(err);
+	XSTATE_XRESTORE(xstate, lmask, hmask);
 }
 }
 
 
 /*
 /*
@@ -526,37 +503,16 @@ static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
  */
  */
 static inline void fpregs_deactivate(struct fpu *fpu)
 static inline void fpregs_deactivate(struct fpu *fpu)
 {
 {
-	WARN_ON_FPU(!fpu->fpregs_active);
-
-	fpu->fpregs_active = 0;
 	this_cpu_write(fpu_fpregs_owner_ctx, NULL);
 	this_cpu_write(fpu_fpregs_owner_ctx, NULL);
 	trace_x86_fpu_regs_deactivated(fpu);
 	trace_x86_fpu_regs_deactivated(fpu);
 }
 }
 
 
 static inline void fpregs_activate(struct fpu *fpu)
 static inline void fpregs_activate(struct fpu *fpu)
 {
 {
-	WARN_ON_FPU(fpu->fpregs_active);
-
-	fpu->fpregs_active = 1;
 	this_cpu_write(fpu_fpregs_owner_ctx, fpu);
 	this_cpu_write(fpu_fpregs_owner_ctx, fpu);
 	trace_x86_fpu_regs_activated(fpu);
 	trace_x86_fpu_regs_activated(fpu);
 }
 }
 
 
-/*
- * The question "does this thread have fpu access?"
- * is slightly racy, since preemption could come in
- * and revoke it immediately after the test.
- *
- * However, even in that very unlikely scenario,
- * we can just assume we have FPU access - typically
- * to save the FP state - we'll just take a #NM
- * fault and get the FPU access back.
- */
-static inline int fpregs_active(void)
-{
-	return current->thread.fpu.fpregs_active;
-}
-
 /*
 /*
  * FPU state switching for scheduling.
  * FPU state switching for scheduling.
  *
  *
@@ -571,14 +527,13 @@ static inline int fpregs_active(void)
 static inline void
 static inline void
 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
 {
-	if (old_fpu->fpregs_active) {
+	if (old_fpu->initialized) {
 		if (!copy_fpregs_to_fpstate(old_fpu))
 		if (!copy_fpregs_to_fpstate(old_fpu))
 			old_fpu->last_cpu = -1;
 			old_fpu->last_cpu = -1;
 		else
 		else
 			old_fpu->last_cpu = cpu;
 			old_fpu->last_cpu = cpu;
 
 
 		/* But leave fpu_fpregs_owner_ctx! */
 		/* But leave fpu_fpregs_owner_ctx! */
-		old_fpu->fpregs_active = 0;
 		trace_x86_fpu_regs_deactivated(old_fpu);
 		trace_x86_fpu_regs_deactivated(old_fpu);
 	} else
 	} else
 		old_fpu->last_cpu = -1;
 		old_fpu->last_cpu = -1;
@@ -595,7 +550,7 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
 static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
 {
 {
 	bool preload = static_cpu_has(X86_FEATURE_FPU) &&
 	bool preload = static_cpu_has(X86_FEATURE_FPU) &&
-		       new_fpu->fpstate_active;
+		       new_fpu->initialized;
 
 
 	if (preload) {
 	if (preload) {
 		if (!fpregs_state_valid(new_fpu, cpu))
 		if (!fpregs_state_valid(new_fpu, cpu))
@@ -617,8 +572,7 @@ static inline void user_fpu_begin(void)
 	struct fpu *fpu = &current->thread.fpu;
 	struct fpu *fpu = &current->thread.fpu;
 
 
 	preempt_disable();
 	preempt_disable();
-	if (!fpregs_active())
-		fpregs_activate(fpu);
+	fpregs_activate(fpu);
 	preempt_enable();
 	preempt_enable();
 }
 }
 
 

+ 6 - 26
arch/x86/include/asm/fpu/types.h

@@ -68,6 +68,9 @@ struct fxregs_state {
 /* Default value for fxregs_state.mxcsr: */
 /* Default value for fxregs_state.mxcsr: */
 #define MXCSR_DEFAULT		0x1f80
 #define MXCSR_DEFAULT		0x1f80
 
 
+/* Copy both mxcsr & mxcsr_flags with a single u64 memcpy: */
+#define MXCSR_AND_FLAGS_SIZE sizeof(u64)
+
 /*
 /*
  * Software based FPU emulation state. This is arbitrary really,
  * Software based FPU emulation state. This is arbitrary really,
  * it matches the x87 format to make it easier to understand:
  * it matches the x87 format to make it easier to understand:
@@ -290,36 +293,13 @@ struct fpu {
 	unsigned int			last_cpu;
 	unsigned int			last_cpu;
 
 
 	/*
 	/*
-	 * @fpstate_active:
+	 * @initialized:
 	 *
 	 *
-	 * This flag indicates whether this context is active: if the task
+	 * This flag indicates whether this context is initialized: if the task
 	 * is not running then we can restore from this context, if the task
 	 * is not running then we can restore from this context, if the task
 	 * is running then we should save into this context.
 	 * is running then we should save into this context.
 	 */
 	 */
-	unsigned char			fpstate_active;
-
-	/*
-	 * @fpregs_active:
-	 *
-	 * This flag determines whether a given context is actively
-	 * loaded into the FPU's registers and that those registers
-	 * represent the task's current FPU state.
-	 *
-	 * Note the interaction with fpstate_active:
-	 *
-	 *   # task does not use the FPU:
-	 *   fpstate_active == 0
-	 *
-	 *   # task uses the FPU and regs are active:
-	 *   fpstate_active == 1 && fpregs_active == 1
-	 *
-	 *   # the regs are inactive but still match fpstate:
-	 *   fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
-	 *
-	 * The third state is what we use for the lazy restore optimization
-	 * on lazy-switching CPUs.
-	 */
-	unsigned char			fpregs_active;
+	unsigned char			initialized;
 
 
 	/*
 	/*
 	 * @state:
 	 * @state:

+ 8 - 4
arch/x86/include/asm/fpu/xstate.h

@@ -48,8 +48,12 @@ void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
 const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
 int using_compacted_format(void);
-int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
-			void __user *ubuf, struct xregs_state *xsave);
-int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
-		     struct xregs_state *xsave);
+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
+int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
+int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
+
+/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
+extern int validate_xstate_header(const struct xstate_header *hdr);
+
 #endif
 #endif

+ 0 - 11
arch/x86/include/asm/thread_info.h

@@ -158,17 +158,6 @@ struct thread_info {
  */
  */
 #ifndef __ASSEMBLY__
 #ifndef __ASSEMBLY__
 
 
-static inline unsigned long current_stack_pointer(void)
-{
-	unsigned long sp;
-#ifdef CONFIG_X86_64
-	asm("mov %%rsp,%0" : "=g" (sp));
-#else
-	asm("mov %%esp,%0" : "=g" (sp));
-#endif
-	return sp;
-}
-
 /*
 /*
  * Walks up the stack frames to make sure that the specified object is
  * Walks up the stack frames to make sure that the specified object is
  * entirely contained by a single stack frame.
  * entirely contained by a single stack frame.

+ 4 - 7
arch/x86/include/asm/trace/fpu.h

@@ -12,25 +12,22 @@ DECLARE_EVENT_CLASS(x86_fpu,
 
 
 	TP_STRUCT__entry(
 	TP_STRUCT__entry(
 		__field(struct fpu *, fpu)
 		__field(struct fpu *, fpu)
-		__field(bool, fpregs_active)
-		__field(bool, fpstate_active)
+		__field(bool, initialized)
 		__field(u64, xfeatures)
 		__field(u64, xfeatures)
 		__field(u64, xcomp_bv)
 		__field(u64, xcomp_bv)
 		),
 		),
 
 
 	TP_fast_assign(
 	TP_fast_assign(
 		__entry->fpu		= fpu;
 		__entry->fpu		= fpu;
-		__entry->fpregs_active	= fpu->fpregs_active;
-		__entry->fpstate_active	= fpu->fpstate_active;
+		__entry->initialized	= fpu->initialized;
 		if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
 		if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
 			__entry->xfeatures = fpu->state.xsave.header.xfeatures;
 			__entry->xfeatures = fpu->state.xsave.header.xfeatures;
 			__entry->xcomp_bv  = fpu->state.xsave.header.xcomp_bv;
 			__entry->xcomp_bv  = fpu->state.xsave.header.xcomp_bv;
 		}
 		}
 	),
 	),
-	TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx",
+	TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx",
 			__entry->fpu,
 			__entry->fpu,
-			__entry->fpregs_active,
-			__entry->fpstate_active,
+			__entry->initialized,
 			__entry->xfeatures,
 			__entry->xfeatures,
 			__entry->xcomp_bv
 			__entry->xcomp_bv
 	)
 	)

+ 1 - 1
arch/x86/include/asm/uaccess.h

@@ -337,7 +337,7 @@ do {									\
 		     _ASM_EXTABLE(1b, 4b)				\
 		     _ASM_EXTABLE(1b, 4b)				\
 		     _ASM_EXTABLE(2b, 4b)				\
 		     _ASM_EXTABLE(2b, 4b)				\
 		     : "=r" (retval), "=&A"(x)				\
 		     : "=r" (retval), "=&A"(x)				\
-		     : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1),	\
+		     : "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1),	\
 		       "i" (errret), "0" (retval));			\
 		       "i" (errret), "0" (retval));			\
 })
 })
 
 

+ 2 - 2
arch/x86/include/asm/xen/hypercall.h

@@ -551,13 +551,13 @@ static inline void
 MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
 MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
 			struct desc_struct desc)
 			struct desc_struct desc)
 {
 {
-	u32 *p = (u32 *) &desc;
-
 	mcl->op = __HYPERVISOR_update_descriptor;
 	mcl->op = __HYPERVISOR_update_descriptor;
 	if (sizeof(maddr) == sizeof(long)) {
 	if (sizeof(maddr) == sizeof(long)) {
 		mcl->args[0] = maddr;
 		mcl->args[0] = maddr;
 		mcl->args[1] = *(unsigned long *)&desc;
 		mcl->args[1] = *(unsigned long *)&desc;
 	} else {
 	} else {
+		u32 *p = (u32 *)&desc;
+
 		mcl->args[0] = maddr;
 		mcl->args[0] = maddr;
 		mcl->args[1] = maddr >> 32;
 		mcl->args[1] = maddr >> 32;
 		mcl->args[2] = *p++;
 		mcl->args[2] = *p++;

+ 43 - 112
arch/x86/kernel/fpu/core.c

@@ -100,7 +100,7 @@ void __kernel_fpu_begin(void)
 
 
 	kernel_fpu_disable();
 	kernel_fpu_disable();
 
 
-	if (fpu->fpregs_active) {
+	if (fpu->initialized) {
 		/*
 		/*
 		 * Ignore return value -- we don't care if reg state
 		 * Ignore return value -- we don't care if reg state
 		 * is clobbered.
 		 * is clobbered.
@@ -116,7 +116,7 @@ void __kernel_fpu_end(void)
 {
 {
 	struct fpu *fpu = &current->thread.fpu;
 	struct fpu *fpu = &current->thread.fpu;
 
 
-	if (fpu->fpregs_active)
+	if (fpu->initialized)
 		copy_kernel_to_fpregs(&fpu->state);
 		copy_kernel_to_fpregs(&fpu->state);
 
 
 	kernel_fpu_enable();
 	kernel_fpu_enable();
@@ -148,7 +148,7 @@ void fpu__save(struct fpu *fpu)
 
 
 	preempt_disable();
 	preempt_disable();
 	trace_x86_fpu_before_save(fpu);
 	trace_x86_fpu_before_save(fpu);
-	if (fpu->fpregs_active) {
+	if (fpu->initialized) {
 		if (!copy_fpregs_to_fpstate(fpu)) {
 		if (!copy_fpregs_to_fpstate(fpu)) {
 			copy_kernel_to_fpregs(&fpu->state);
 			copy_kernel_to_fpregs(&fpu->state);
 		}
 		}
@@ -189,10 +189,9 @@ EXPORT_SYMBOL_GPL(fpstate_init);
 
 
 int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 {
 {
-	dst_fpu->fpregs_active = 0;
 	dst_fpu->last_cpu = -1;
 	dst_fpu->last_cpu = -1;
 
 
-	if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
+	if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU))
 		return 0;
 		return 0;
 
 
 	WARN_ON_FPU(src_fpu != &current->thread.fpu);
 	WARN_ON_FPU(src_fpu != &current->thread.fpu);
@@ -206,26 +205,14 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	/*
 	/*
 	 * Save current FPU registers directly into the child
 	 * Save current FPU registers directly into the child
 	 * FPU context, without any memory-to-memory copying.
 	 * FPU context, without any memory-to-memory copying.
-	 * In lazy mode, if the FPU context isn't loaded into
-	 * fpregs, CR0.TS will be set and do_device_not_available
-	 * will load the FPU context.
 	 *
 	 *
-	 * We have to do all this with preemption disabled,
-	 * mostly because of the FNSAVE case, because in that
-	 * case we must not allow preemption in the window
-	 * between the FNSAVE and us marking the context lazy.
-	 *
-	 * It shouldn't be an issue as even FNSAVE is plenty
-	 * fast in terms of critical section length.
+	 * ( The function 'fails' in the FNSAVE case, which destroys
+	 *   register contents so we have to copy them back. )
 	 */
 	 */
-	preempt_disable();
 	if (!copy_fpregs_to_fpstate(dst_fpu)) {
 	if (!copy_fpregs_to_fpstate(dst_fpu)) {
-		memcpy(&src_fpu->state, &dst_fpu->state,
-		       fpu_kernel_xstate_size);
-
+		memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size);
 		copy_kernel_to_fpregs(&src_fpu->state);
 		copy_kernel_to_fpregs(&src_fpu->state);
 	}
 	}
-	preempt_enable();
 
 
 	trace_x86_fpu_copy_src(src_fpu);
 	trace_x86_fpu_copy_src(src_fpu);
 	trace_x86_fpu_copy_dst(dst_fpu);
 	trace_x86_fpu_copy_dst(dst_fpu);
@@ -237,45 +224,48 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
  * Activate the current task's in-memory FPU context,
  * Activate the current task's in-memory FPU context,
  * if it has not been used before:
  * if it has not been used before:
  */
  */
-void fpu__activate_curr(struct fpu *fpu)
+void fpu__initialize(struct fpu *fpu)
 {
 {
 	WARN_ON_FPU(fpu != &current->thread.fpu);
 	WARN_ON_FPU(fpu != &current->thread.fpu);
 
 
-	if (!fpu->fpstate_active) {
+	if (!fpu->initialized) {
 		fpstate_init(&fpu->state);
 		fpstate_init(&fpu->state);
 		trace_x86_fpu_init_state(fpu);
 		trace_x86_fpu_init_state(fpu);
 
 
 		trace_x86_fpu_activate_state(fpu);
 		trace_x86_fpu_activate_state(fpu);
 		/* Safe to do for the current task: */
 		/* Safe to do for the current task: */
-		fpu->fpstate_active = 1;
+		fpu->initialized = 1;
 	}
 	}
 }
 }
-EXPORT_SYMBOL_GPL(fpu__activate_curr);
+EXPORT_SYMBOL_GPL(fpu__initialize);
 
 
 /*
 /*
  * This function must be called before we read a task's fpstate.
  * This function must be called before we read a task's fpstate.
  *
  *
- * If the task has not used the FPU before then initialize its
- * fpstate.
+ * There's two cases where this gets called:
+ *
+ * - for the current task (when coredumping), in which case we have
+ *   to save the latest FPU registers into the fpstate,
+ *
+ * - or it's called for stopped tasks (ptrace), in which case the
+ *   registers were already saved by the context-switch code when
+ *   the task scheduled out - we only have to initialize the registers
+ *   if they've never been initialized.
  *
  *
  * If the task has used the FPU before then save it.
  * If the task has used the FPU before then save it.
  */
  */
-void fpu__activate_fpstate_read(struct fpu *fpu)
+void fpu__prepare_read(struct fpu *fpu)
 {
 {
-	/*
-	 * If fpregs are active (in the current CPU), then
-	 * copy them to the fpstate:
-	 */
-	if (fpu->fpregs_active) {
+	if (fpu == &current->thread.fpu) {
 		fpu__save(fpu);
 		fpu__save(fpu);
 	} else {
 	} else {
-		if (!fpu->fpstate_active) {
+		if (!fpu->initialized) {
 			fpstate_init(&fpu->state);
 			fpstate_init(&fpu->state);
 			trace_x86_fpu_init_state(fpu);
 			trace_x86_fpu_init_state(fpu);
 
 
 			trace_x86_fpu_activate_state(fpu);
 			trace_x86_fpu_activate_state(fpu);
 			/* Safe to do for current and for stopped child tasks: */
 			/* Safe to do for current and for stopped child tasks: */
-			fpu->fpstate_active = 1;
+			fpu->initialized = 1;
 		}
 		}
 	}
 	}
 }
 }
@@ -283,17 +273,17 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
 /*
 /*
  * This function must be called before we write a task's fpstate.
  * This function must be called before we write a task's fpstate.
  *
  *
- * If the task has used the FPU before then unlazy it.
+ * If the task has used the FPU before then invalidate any cached FPU registers.
  * If the task has not used the FPU before then initialize its fpstate.
  * If the task has not used the FPU before then initialize its fpstate.
  *
  *
  * After this function call, after registers in the fpstate are
  * After this function call, after registers in the fpstate are
  * modified and the child task has woken up, the child task will
  * modified and the child task has woken up, the child task will
  * restore the modified FPU state from the modified context. If we
  * restore the modified FPU state from the modified context. If we
- * didn't clear its lazy status here then the lazy in-registers
+ * didn't clear its cached status here then the cached in-registers
  * state pending on its former CPU could be restored, corrupting
  * state pending on its former CPU could be restored, corrupting
  * the modifications.
  * the modifications.
  */
  */
-void fpu__activate_fpstate_write(struct fpu *fpu)
+void fpu__prepare_write(struct fpu *fpu)
 {
 {
 	/*
 	/*
 	 * Only stopped child tasks can be used to modify the FPU
 	 * Only stopped child tasks can be used to modify the FPU
@@ -301,8 +291,8 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 	 */
 	 */
 	WARN_ON_FPU(fpu == &current->thread.fpu);
 	WARN_ON_FPU(fpu == &current->thread.fpu);
 
 
-	if (fpu->fpstate_active) {
-		/* Invalidate any lazy state: */
+	if (fpu->initialized) {
+		/* Invalidate any cached state: */
 		__fpu_invalidate_fpregs_state(fpu);
 		__fpu_invalidate_fpregs_state(fpu);
 	} else {
 	} else {
 		fpstate_init(&fpu->state);
 		fpstate_init(&fpu->state);
@@ -310,73 +300,10 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 
 
 		trace_x86_fpu_activate_state(fpu);
 		trace_x86_fpu_activate_state(fpu);
 		/* Safe to do for stopped child tasks: */
 		/* Safe to do for stopped child tasks: */
-		fpu->fpstate_active = 1;
+		fpu->initialized = 1;
 	}
 	}
 }
 }
 
 
-/*
- * This function must be called before we write the current
- * task's fpstate.
- *
- * This call gets the current FPU register state and moves
- * it in to the 'fpstate'.  Preemption is disabled so that
- * no writes to the 'fpstate' can occur from context
- * swiches.
- *
- * Must be followed by a fpu__current_fpstate_write_end().
- */
-void fpu__current_fpstate_write_begin(void)
-{
-	struct fpu *fpu = &current->thread.fpu;
-
-	/*
-	 * Ensure that the context-switching code does not write
-	 * over the fpstate while we are doing our update.
-	 */
-	preempt_disable();
-
-	/*
-	 * Move the fpregs in to the fpu's 'fpstate'.
-	 */
-	fpu__activate_fpstate_read(fpu);
-
-	/*
-	 * The caller is about to write to 'fpu'.  Ensure that no
-	 * CPU thinks that its fpregs match the fpstate.  This
-	 * ensures we will not be lazy and skip a XRSTOR in the
-	 * future.
-	 */
-	__fpu_invalidate_fpregs_state(fpu);
-}
-
-/*
- * This function must be paired with fpu__current_fpstate_write_begin()
- *
- * This will ensure that the modified fpstate gets placed back in
- * the fpregs if necessary.
- *
- * Note: This function may be called whether or not an _actual_
- * write to the fpstate occurred.
- */
-void fpu__current_fpstate_write_end(void)
-{
-	struct fpu *fpu = &current->thread.fpu;
-
-	/*
-	 * 'fpu' now has an updated copy of the state, but the
-	 * registers may still be out of date.  Update them with
-	 * an XRSTOR if they are active.
-	 */
-	if (fpregs_active())
-		copy_kernel_to_fpregs(&fpu->state);
-
-	/*
-	 * Our update is done and the fpregs/fpstate are in sync
-	 * if necessary.  Context switches can happen again.
-	 */
-	preempt_enable();
-}
-
 /*
 /*
  * 'fpu__restore()' is called to copy FPU registers from
  * 'fpu__restore()' is called to copy FPU registers from
  * the FPU fpstate to the live hw registers and to activate
  * the FPU fpstate to the live hw registers and to activate
@@ -389,7 +316,7 @@ void fpu__current_fpstate_write_end(void)
  */
  */
 void fpu__restore(struct fpu *fpu)
 void fpu__restore(struct fpu *fpu)
 {
 {
-	fpu__activate_curr(fpu);
+	fpu__initialize(fpu);
 
 
 	/* Avoid __kernel_fpu_begin() right after fpregs_activate() */
 	/* Avoid __kernel_fpu_begin() right after fpregs_activate() */
 	kernel_fpu_disable();
 	kernel_fpu_disable();
@@ -414,15 +341,17 @@ void fpu__drop(struct fpu *fpu)
 {
 {
 	preempt_disable();
 	preempt_disable();
 
 
-	if (fpu->fpregs_active) {
-		/* Ignore delayed exceptions from user space */
-		asm volatile("1: fwait\n"
-			     "2:\n"
-			     _ASM_EXTABLE(1b, 2b));
-		fpregs_deactivate(fpu);
+	if (fpu == &current->thread.fpu) {
+		if (fpu->initialized) {
+			/* Ignore delayed exceptions from user space */
+			asm volatile("1: fwait\n"
+				     "2:\n"
+				     _ASM_EXTABLE(1b, 2b));
+			fpregs_deactivate(fpu);
+		}
 	}
 	}
 
 
-	fpu->fpstate_active = 0;
+	fpu->initialized = 0;
 
 
 	trace_x86_fpu_dropped(fpu);
 	trace_x86_fpu_dropped(fpu);
 
 
@@ -462,9 +391,11 @@ void fpu__clear(struct fpu *fpu)
 	 * Make sure fpstate is cleared and initialized.
 	 * Make sure fpstate is cleared and initialized.
 	 */
 	 */
 	if (static_cpu_has(X86_FEATURE_FPU)) {
 	if (static_cpu_has(X86_FEATURE_FPU)) {
-		fpu__activate_curr(fpu);
+		preempt_disable();
+		fpu__initialize(fpu);
 		user_fpu_begin();
 		user_fpu_begin();
 		copy_init_fpstate_to_fpregs();
 		copy_init_fpstate_to_fpregs();
+		preempt_enable();
 	}
 	}
 }
 }
 
 

+ 1 - 1
arch/x86/kernel/fpu/init.c

@@ -240,7 +240,7 @@ static void __init fpu__init_system_ctx_switch(void)
 	WARN_ON_FPU(!on_boot_cpu);
 	WARN_ON_FPU(!on_boot_cpu);
 	on_boot_cpu = 0;
 	on_boot_cpu = 0;
 
 
-	WARN_ON_FPU(current->thread.fpu.fpstate_active);
+	WARN_ON_FPU(current->thread.fpu.initialized);
 }
 }
 
 
 /*
 /*

+ 26 - 22
arch/x86/kernel/fpu/regset.c

@@ -16,14 +16,14 @@ int regset_fpregs_active(struct task_struct *target, const struct user_regset *r
 {
 {
 	struct fpu *target_fpu = &target->thread.fpu;
 	struct fpu *target_fpu = &target->thread.fpu;
 
 
-	return target_fpu->fpstate_active ? regset->n : 0;
+	return target_fpu->initialized ? regset->n : 0;
 }
 }
 
 
 int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
 int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
 {
 {
 	struct fpu *target_fpu = &target->thread.fpu;
 	struct fpu *target_fpu = &target->thread.fpu;
 
 
-	if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active)
+	if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized)
 		return regset->n;
 		return regset->n;
 	else
 	else
 		return 0;
 		return 0;
@@ -38,7 +38,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
 	if (!boot_cpu_has(X86_FEATURE_FXSR))
 	if (!boot_cpu_has(X86_FEATURE_FXSR))
 		return -ENODEV;
 		return -ENODEV;
 
 
-	fpu__activate_fpstate_read(fpu);
+	fpu__prepare_read(fpu);
 	fpstate_sanitize_xstate(fpu);
 	fpstate_sanitize_xstate(fpu);
 
 
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
@@ -55,7 +55,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
 	if (!boot_cpu_has(X86_FEATURE_FXSR))
 	if (!boot_cpu_has(X86_FEATURE_FXSR))
 		return -ENODEV;
 		return -ENODEV;
 
 
-	fpu__activate_fpstate_write(fpu);
+	fpu__prepare_write(fpu);
 	fpstate_sanitize_xstate(fpu);
 	fpstate_sanitize_xstate(fpu);
 
 
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
@@ -89,10 +89,13 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 
 
 	xsave = &fpu->state.xsave;
 	xsave = &fpu->state.xsave;
 
 
-	fpu__activate_fpstate_read(fpu);
+	fpu__prepare_read(fpu);
 
 
 	if (using_compacted_format()) {
 	if (using_compacted_format()) {
-		ret = copyout_from_xsaves(pos, count, kbuf, ubuf, xsave);
+		if (kbuf)
+			ret = copy_xstate_to_kernel(kbuf, xsave, pos, count);
+		else
+			ret = copy_xstate_to_user(ubuf, xsave, pos, count);
 	} else {
 	} else {
 		fpstate_sanitize_xstate(fpu);
 		fpstate_sanitize_xstate(fpu);
 		/*
 		/*
@@ -129,28 +132,29 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 
 
 	xsave = &fpu->state.xsave;
 	xsave = &fpu->state.xsave;
 
 
-	fpu__activate_fpstate_write(fpu);
+	fpu__prepare_write(fpu);
 
 
-	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		ret = copyin_to_xsaves(kbuf, ubuf, xsave);
-	else
+	if (using_compacted_format()) {
+		if (kbuf)
+			ret = copy_kernel_to_xstate(xsave, kbuf);
+		else
+			ret = copy_user_to_xstate(xsave, ubuf);
+	} else {
 		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
 		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
-
-	/*
-	 * In case of failure, mark all states as init:
-	 */
-	if (ret)
-		fpstate_init(&fpu->state);
+		if (!ret)
+			ret = validate_xstate_header(&xsave->header);
+	}
 
 
 	/*
 	/*
 	 * mxcsr reserved bits must be masked to zero for security reasons.
 	 * mxcsr reserved bits must be masked to zero for security reasons.
 	 */
 	 */
 	xsave->i387.mxcsr &= mxcsr_feature_mask;
 	xsave->i387.mxcsr &= mxcsr_feature_mask;
-	xsave->header.xfeatures &= xfeatures_mask;
+
 	/*
 	/*
-	 * These bits must be zero.
+	 * In case of failure, mark all states as init:
 	 */
 	 */
-	memset(&xsave->header.reserved, 0, 48);
+	if (ret)
+		fpstate_init(&fpu->state);
 
 
 	return ret;
 	return ret;
 }
 }
@@ -299,7 +303,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 	struct fpu *fpu = &target->thread.fpu;
 	struct fpu *fpu = &target->thread.fpu;
 	struct user_i387_ia32_struct env;
 	struct user_i387_ia32_struct env;
 
 
-	fpu__activate_fpstate_read(fpu);
+	fpu__prepare_read(fpu);
 
 
 	if (!boot_cpu_has(X86_FEATURE_FPU))
 	if (!boot_cpu_has(X86_FEATURE_FPU))
 		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
 		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
@@ -329,7 +333,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 	struct user_i387_ia32_struct env;
 	struct user_i387_ia32_struct env;
 	int ret;
 	int ret;
 
 
-	fpu__activate_fpstate_write(fpu);
+	fpu__prepare_write(fpu);
 	fpstate_sanitize_xstate(fpu);
 	fpstate_sanitize_xstate(fpu);
 
 
 	if (!boot_cpu_has(X86_FEATURE_FPU))
 	if (!boot_cpu_has(X86_FEATURE_FPU))
@@ -369,7 +373,7 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
 	struct fpu *fpu = &tsk->thread.fpu;
 	struct fpu *fpu = &tsk->thread.fpu;
 	int fpvalid;
 	int fpvalid;
 
 
-	fpvalid = fpu->fpstate_active;
+	fpvalid = fpu->initialized;
 	if (fpvalid)
 	if (fpvalid)
 		fpvalid = !fpregs_get(tsk, NULL,
 		fpvalid = !fpregs_get(tsk, NULL,
 				      0, sizeof(struct user_i387_ia32_struct),
 				      0, sizeof(struct user_i387_ia32_struct),

+ 21 - 16
arch/x86/kernel/fpu/signal.c

@@ -155,7 +155,8 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
  */
  */
 int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 {
 {
-	struct xregs_state *xsave = &current->thread.fpu.state.xsave;
+	struct fpu *fpu = &current->thread.fpu;
+	struct xregs_state *xsave = &fpu->state.xsave;
 	struct task_struct *tsk = current;
 	struct task_struct *tsk = current;
 	int ia32_fxstate = (buf != buf_fx);
 	int ia32_fxstate = (buf != buf_fx);
 
 
@@ -170,13 +171,13 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			sizeof(struct user_i387_ia32_struct), NULL,
 			sizeof(struct user_i387_ia32_struct), NULL,
 			(struct _fpstate_32 __user *) buf) ? -1 : 1;
 			(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
 
-	if (fpregs_active() || using_compacted_format()) {
+	if (fpu->initialized || using_compacted_format()) {
 		/* Save the live register state to the user directly. */
 		/* Save the live register state to the user directly. */
 		if (copy_fpregs_to_sigframe(buf_fx))
 		if (copy_fpregs_to_sigframe(buf_fx))
 			return -1;
 			return -1;
 		/* Update the thread's fxstate to save the fsave header. */
 		/* Update the thread's fxstate to save the fsave header. */
 		if (ia32_fxstate)
 		if (ia32_fxstate)
-			copy_fxregs_to_kernel(&tsk->thread.fpu);
+			copy_fxregs_to_kernel(fpu);
 	} else {
 	} else {
 		/*
 		/*
 		 * It is a *bug* if kernel uses compacted-format for xsave
 		 * It is a *bug* if kernel uses compacted-format for xsave
@@ -189,7 +190,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			return -1;
 			return -1;
 		}
 		}
 
 
-		fpstate_sanitize_xstate(&tsk->thread.fpu);
+		fpstate_sanitize_xstate(fpu);
 		if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
 		if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
 			return -1;
 			return -1;
 	}
 	}
@@ -213,8 +214,11 @@ sanitize_restored_xstate(struct task_struct *tsk,
 	struct xstate_header *header = &xsave->header;
 	struct xstate_header *header = &xsave->header;
 
 
 	if (use_xsave()) {
 	if (use_xsave()) {
-		/* These bits must be zero. */
-		memset(header->reserved, 0, 48);
+		/*
+		 * Note: we don't need to zero the reserved bits in the
+		 * xstate_header here because we either didn't copy them at all,
+		 * or we checked earlier that they aren't set.
+		 */
 
 
 		/*
 		/*
 		 * Init the state that is not present in the memory
 		 * Init the state that is not present in the memory
@@ -223,7 +227,7 @@ sanitize_restored_xstate(struct task_struct *tsk,
 		if (fx_only)
 		if (fx_only)
 			header->xfeatures = XFEATURE_MASK_FPSSE;
 			header->xfeatures = XFEATURE_MASK_FPSSE;
 		else
 		else
-			header->xfeatures &= (xfeatures_mask & xfeatures);
+			header->xfeatures &= xfeatures;
 	}
 	}
 
 
 	if (use_fxsr()) {
 	if (use_fxsr()) {
@@ -279,7 +283,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 	if (!access_ok(VERIFY_READ, buf, size))
 	if (!access_ok(VERIFY_READ, buf, size))
 		return -EACCES;
 		return -EACCES;
 
 
-	fpu__activate_curr(fpu);
+	fpu__initialize(fpu);
 
 
 	if (!static_cpu_has(X86_FEATURE_FPU))
 	if (!static_cpu_has(X86_FEATURE_FPU))
 		return fpregs_soft_set(current, NULL,
 		return fpregs_soft_set(current, NULL,
@@ -307,28 +311,29 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		/*
 		/*
 		 * For 32-bit frames with fxstate, copy the user state to the
 		 * For 32-bit frames with fxstate, copy the user state to the
 		 * thread's fpu state, reconstruct fxstate from the fsave
 		 * thread's fpu state, reconstruct fxstate from the fsave
-		 * header. Sanitize the copied state etc.
+		 * header. Validate and sanitize the copied state.
 		 */
 		 */
 		struct fpu *fpu = &tsk->thread.fpu;
 		struct fpu *fpu = &tsk->thread.fpu;
 		struct user_i387_ia32_struct env;
 		struct user_i387_ia32_struct env;
 		int err = 0;
 		int err = 0;
 
 
 		/*
 		/*
-		 * Drop the current fpu which clears fpu->fpstate_active. This ensures
+		 * Drop the current fpu which clears fpu->initialized. This ensures
 		 * that any context-switch during the copy of the new state,
 		 * that any context-switch during the copy of the new state,
 		 * avoids the intermediate state from getting restored/saved.
 		 * avoids the intermediate state from getting restored/saved.
 		 * Thus avoiding the new restored state from getting corrupted.
 		 * Thus avoiding the new restored state from getting corrupted.
 		 * We will be ready to restore/save the state only after
 		 * We will be ready to restore/save the state only after
-		 * fpu->fpstate_active is again set.
+		 * fpu->initialized is again set.
 		 */
 		 */
 		fpu__drop(fpu);
 		fpu__drop(fpu);
 
 
 		if (using_compacted_format()) {
 		if (using_compacted_format()) {
-			err = copyin_to_xsaves(NULL, buf_fx,
-					       &fpu->state.xsave);
+			err = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
 		} else {
 		} else {
-			err = __copy_from_user(&fpu->state.xsave,
-					       buf_fx, state_size);
+			err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
+
+			if (!err && state_size > offsetof(struct xregs_state, header))
+				err = validate_xstate_header(&fpu->state.xsave.header);
 		}
 		}
 
 
 		if (err || __copy_from_user(&env, buf, sizeof(env))) {
 		if (err || __copy_from_user(&env, buf, sizeof(env))) {
@@ -339,7 +344,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 			sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
 			sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
 		}
 		}
 
 
-		fpu->fpstate_active = 1;
+		fpu->initialized = 1;
 		preempt_disable();
 		preempt_disable();
 		fpu__restore(fpu);
 		fpu__restore(fpu);
 		preempt_enable();
 		preempt_enable();

+ 213 - 51
arch/x86/kernel/fpu/xstate.c

@@ -483,6 +483,30 @@ int using_compacted_format(void)
 	return boot_cpu_has(X86_FEATURE_XSAVES);
 	return boot_cpu_has(X86_FEATURE_XSAVES);
 }
 }
 
 
+/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
+int validate_xstate_header(const struct xstate_header *hdr)
+{
+	/* No unknown or supervisor features may be set */
+	if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR))
+		return -EINVAL;
+
+	/* Userspace must use the uncompacted format */
+	if (hdr->xcomp_bv)
+		return -EINVAL;
+
+	/*
+	 * If 'reserved' is shrunken to add a new field, make sure to validate
+	 * that new field here!
+	 */
+	BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
+
+	/* No reserved bits may be set */
+	if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
+		return -EINVAL;
+
+	return 0;
+}
+
 static void __xstate_dump_leaves(void)
 static void __xstate_dump_leaves(void)
 {
 {
 	int i;
 	int i;
@@ -867,7 +891,7 @@ const void *get_xsave_field_ptr(int xsave_state)
 {
 {
 	struct fpu *fpu = &current->thread.fpu;
 	struct fpu *fpu = &current->thread.fpu;
 
 
-	if (!fpu->fpstate_active)
+	if (!fpu->initialized)
 		return NULL;
 		return NULL;
 	/*
 	/*
 	 * fpu__save() takes the CPU's xstate registers
 	 * fpu__save() takes the CPU's xstate registers
@@ -920,39 +944,130 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 }
 }
 #endif /* ! CONFIG_ARCH_HAS_PKEYS */
 #endif /* ! CONFIG_ARCH_HAS_PKEYS */
 
 
+/*
+ * Weird legacy quirk: SSE and YMM states store information in the
+ * MXCSR and MXCSR_FLAGS fields of the FP area. That means if the FP
+ * area is marked as unused in the xfeatures header, we need to copy
+ * MXCSR and MXCSR_FLAGS if either SSE or YMM are in use.
+ */
+static inline bool xfeatures_mxcsr_quirk(u64 xfeatures)
+{
+	if (!(xfeatures & (XFEATURE_MASK_SSE|XFEATURE_MASK_YMM)))
+		return false;
+
+	if (xfeatures & XFEATURE_MASK_FP)
+		return false;
+
+	return true;
+}
+
 /*
 /*
  * This is similar to user_regset_copyout(), but will not add offset to
  * This is similar to user_regset_copyout(), but will not add offset to
  * the source data pointer or increment pos, count, kbuf, and ubuf.
  * the source data pointer or increment pos, count, kbuf, and ubuf.
  */
  */
-static inline int xstate_copyout(unsigned int pos, unsigned int count,
-				 void *kbuf, void __user *ubuf,
-				 const void *data, const int start_pos,
-				 const int end_pos)
+static inline void
+__copy_xstate_to_kernel(void *kbuf, const void *data,
+			unsigned int offset, unsigned int size, unsigned int size_total)
 {
 {
-	if ((count == 0) || (pos < start_pos))
-		return 0;
+	if (offset < size_total) {
+		unsigned int copy = min(size, size_total - offset);
 
 
-	if (end_pos < 0 || pos < end_pos) {
-		unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
+		memcpy(kbuf + offset, data, copy);
+	}
+}
 
 
-		if (kbuf) {
-			memcpy(kbuf + pos, data, copy);
-		} else {
-			if (__copy_to_user(ubuf + pos, data, copy))
-				return -EFAULT;
+/*
+ * Convert from kernel XSAVES compacted format to standard format and copy
+ * to a kernel-space ptrace buffer.
+ *
+ * It supports partial copy but pos always starts from zero. This is called
+ * from xstateregs_get() and there we check the CPU has XSAVES.
+ */
+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
+{
+	unsigned int offset, size;
+	struct xstate_header header;
+	int i;
+
+	/*
+	 * Currently copy_regset_to_user() starts from pos 0:
+	 */
+	if (unlikely(offset_start != 0))
+		return -EFAULT;
+
+	/*
+	 * The destination is a ptrace buffer; we put in only user xstates:
+	 */
+	memset(&header, 0, sizeof(header));
+	header.xfeatures = xsave->header.xfeatures;
+	header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
+
+	/*
+	 * Copy xregs_state->header:
+	 */
+	offset = offsetof(struct xregs_state, header);
+	size = sizeof(header);
+
+	__copy_xstate_to_kernel(kbuf, &header, offset, size, size_total);
+
+	for (i = 0; i < XFEATURE_MAX; i++) {
+		/*
+		 * Copy only in-use xstates:
+		 */
+		if ((header.xfeatures >> i) & 1) {
+			void *src = __raw_xsave_addr(xsave, 1 << i);
+
+			offset = xstate_offsets[i];
+			size = xstate_sizes[i];
+
+			/* The next component has to fit fully into the output buffer: */
+			if (offset + size > size_total)
+				break;
+
+			__copy_xstate_to_kernel(kbuf, src, offset, size, size_total);
 		}
 		}
+
+	}
+
+	if (xfeatures_mxcsr_quirk(header.xfeatures)) {
+		offset = offsetof(struct fxregs_state, mxcsr);
+		size = MXCSR_AND_FLAGS_SIZE;
+		__copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total);
+	}
+
+	/*
+	 * Fill xsave->i387.sw_reserved value for ptrace frame:
+	 */
+	offset = offsetof(struct fxregs_state, sw_reserved);
+	size = sizeof(xstate_fx_sw_bytes);
+
+	__copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total);
+
+	return 0;
+}
+
+static inline int
+__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, unsigned int size_total)
+{
+	if (!size)
+		return 0;
+
+	if (offset < size_total) {
+		unsigned int copy = min(size, size_total - offset);
+
+		if (__copy_to_user(ubuf + offset, data, copy))
+			return -EFAULT;
 	}
 	}
 	return 0;
 	return 0;
 }
 }
 
 
 /*
 /*
  * Convert from kernel XSAVES compacted format to standard format and copy
  * Convert from kernel XSAVES compacted format to standard format and copy
- * to a ptrace buffer. It supports partial copy but pos always starts from
+ * to a user-space buffer. It supports partial copy but pos always starts from
  * zero. This is called from xstateregs_get() and there we check the CPU
  * zero. This is called from xstateregs_get() and there we check the CPU
  * has XSAVES.
  * has XSAVES.
  */
  */
-int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
-			void __user *ubuf, struct xregs_state *xsave)
+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
 {
 {
 	unsigned int offset, size;
 	unsigned int offset, size;
 	int ret, i;
 	int ret, i;
@@ -961,7 +1076,7 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
 	/*
 	/*
 	 * Currently copy_regset_to_user() starts from pos 0:
 	 * Currently copy_regset_to_user() starts from pos 0:
 	 */
 	 */
-	if (unlikely(pos != 0))
+	if (unlikely(offset_start != 0))
 		return -EFAULT;
 		return -EFAULT;
 
 
 	/*
 	/*
@@ -977,8 +1092,7 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
 	offset = offsetof(struct xregs_state, header);
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 	size = sizeof(header);
 
 
-	ret = xstate_copyout(offset, size, kbuf, ubuf, &header, 0, count);
-
+	ret = __copy_xstate_to_user(ubuf, &header, offset, size, size_total);
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 
@@ -992,25 +1106,30 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
 			offset = xstate_offsets[i];
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 			size = xstate_sizes[i];
 
 
-			ret = xstate_copyout(offset, size, kbuf, ubuf, src, 0, count);
+			/* The next component has to fit fully into the output buffer: */
+			if (offset + size > size_total)
+				break;
 
 
+			ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total);
 			if (ret)
 			if (ret)
 				return ret;
 				return ret;
-
-			if (offset + size >= count)
-				break;
 		}
 		}
 
 
 	}
 	}
 
 
+	if (xfeatures_mxcsr_quirk(header.xfeatures)) {
+		offset = offsetof(struct fxregs_state, mxcsr);
+		size = MXCSR_AND_FLAGS_SIZE;
+		__copy_xstate_to_user(ubuf, &xsave->i387.mxcsr, offset, size, size_total);
+	}
+
 	/*
 	/*
 	 * Fill xsave->i387.sw_reserved value for ptrace frame:
 	 * Fill xsave->i387.sw_reserved value for ptrace frame:
 	 */
 	 */
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 	size = sizeof(xstate_fx_sw_bytes);
 
 
-	ret = xstate_copyout(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
-
+	ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, size_total);
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 
@@ -1018,55 +1137,98 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
 }
 }
 
 
 /*
 /*
- * Convert from a ptrace standard-format buffer to kernel XSAVES format
- * and copy to the target thread. This is called from xstateregs_set() and
- * there we check the CPU has XSAVES and a whole standard-sized buffer
- * exists.
+ * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format
+ * and copy to the target thread. This is called from xstateregs_set().
  */
  */
-int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
-		     struct xregs_state *xsave)
+int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 {
 {
 	unsigned int offset, size;
 	unsigned int offset, size;
 	int i;
 	int i;
-	u64 xfeatures;
-	u64 allowed_features;
+	struct xstate_header hdr;
 
 
 	offset = offsetof(struct xregs_state, header);
 	offset = offsetof(struct xregs_state, header);
-	size = sizeof(xfeatures);
+	size = sizeof(hdr);
 
 
-	if (kbuf) {
-		memcpy(&xfeatures, kbuf + offset, size);
-	} else {
-		if (__copy_from_user(&xfeatures, ubuf + offset, size))
-			return -EFAULT;
+	memcpy(&hdr, kbuf + offset, size);
+
+	if (validate_xstate_header(&hdr))
+		return -EINVAL;
+
+	for (i = 0; i < XFEATURE_MAX; i++) {
+		u64 mask = ((u64)1 << i);
+
+		if (hdr.xfeatures & mask) {
+			void *dst = __raw_xsave_addr(xsave, 1 << i);
+
+			offset = xstate_offsets[i];
+			size = xstate_sizes[i];
+
+			memcpy(dst, kbuf + offset, size);
+		}
+	}
+
+	if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
+		offset = offsetof(struct fxregs_state, mxcsr);
+		size = MXCSR_AND_FLAGS_SIZE;
+		memcpy(&xsave->i387.mxcsr, kbuf + offset, size);
 	}
 	}
 
 
 	/*
 	/*
-	 * Reject if the user sets any disabled or supervisor features:
+	 * The state that came in from userspace was user-state only.
+	 * Mask all the user states out of 'xfeatures':
+	 */
+	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+
+	/*
+	 * Add back in the features that came in from userspace:
 	 */
 	 */
-	allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
+	xsave->header.xfeatures |= hdr.xfeatures;
 
 
-	if (xfeatures & ~allowed_features)
+	return 0;
+}
+
+/*
+ * Convert from a ptrace or sigreturn standard-format user-space buffer to
+ * kernel XSAVES format and copy to the target thread. This is called from
+ * xstateregs_set(), as well as potentially from the sigreturn() and
+ * rt_sigreturn() system calls.
+ */
+int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
+{
+	unsigned int offset, size;
+	int i;
+	struct xstate_header hdr;
+
+	offset = offsetof(struct xregs_state, header);
+	size = sizeof(hdr);
+
+	if (__copy_from_user(&hdr, ubuf + offset, size))
+		return -EFAULT;
+
+	if (validate_xstate_header(&hdr))
 		return -EINVAL;
 		return -EINVAL;
 
 
 	for (i = 0; i < XFEATURE_MAX; i++) {
 	for (i = 0; i < XFEATURE_MAX; i++) {
 		u64 mask = ((u64)1 << i);
 		u64 mask = ((u64)1 << i);
 
 
-		if (xfeatures & mask) {
+		if (hdr.xfeatures & mask) {
 			void *dst = __raw_xsave_addr(xsave, 1 << i);
 			void *dst = __raw_xsave_addr(xsave, 1 << i);
 
 
 			offset = xstate_offsets[i];
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 			size = xstate_sizes[i];
 
 
-			if (kbuf) {
-				memcpy(dst, kbuf + offset, size);
-			} else {
-				if (__copy_from_user(dst, ubuf + offset, size))
-					return -EFAULT;
-			}
+			if (__copy_from_user(dst, ubuf + offset, size))
+				return -EFAULT;
 		}
 		}
 	}
 	}
 
 
+	if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
+		offset = offsetof(struct fxregs_state, mxcsr);
+		size = MXCSR_AND_FLAGS_SIZE;
+		if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
+			return -EFAULT;
+	}
+
 	/*
 	/*
 	 * The state that came in from userspace was user-state only.
 	 * The state that came in from userspace was user-state only.
 	 * Mask all the user states out of 'xfeatures':
 	 * Mask all the user states out of 'xfeatures':
@@ -1076,7 +1238,7 @@ int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
 	/*
 	/*
 	 * Add back in the features that came in from userspace:
 	 * Add back in the features that came in from userspace:
 	 */
 	 */
-	xsave->header.xfeatures |= xfeatures;
+	xsave->header.xfeatures |= hdr.xfeatures;
 
 
 	return 0;
 	return 0;
 }
 }

+ 3 - 3
arch/x86/kernel/irq_32.c

@@ -64,7 +64,7 @@ static void call_on_stack(void *func, void *stack)
 
 
 static inline void *current_stack(void)
 static inline void *current_stack(void)
 {
 {
-	return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
+	return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
 }
 }
 
 
 static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
 static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
@@ -88,7 +88,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
 
 
 	/* Save the next esp at the bottom of the stack */
 	/* Save the next esp at the bottom of the stack */
 	prev_esp = (u32 *)irqstk;
 	prev_esp = (u32 *)irqstk;
-	*prev_esp = current_stack_pointer();
+	*prev_esp = current_stack_pointer;
 
 
 	if (unlikely(overflow))
 	if (unlikely(overflow))
 		call_on_stack(print_stack_overflow, isp);
 		call_on_stack(print_stack_overflow, isp);
@@ -139,7 +139,7 @@ void do_softirq_own_stack(void)
 
 
 	/* Push the previous esp onto the stack */
 	/* Push the previous esp onto the stack */
 	prev_esp = (u32 *)irqstk;
 	prev_esp = (u32 *)irqstk;
-	*prev_esp = current_stack_pointer();
+	*prev_esp = current_stack_pointer;
 
 
 	call_on_stack(__do_softirq, isp);
 	call_on_stack(__do_softirq, isp);
 }
 }

+ 1 - 1
arch/x86/kernel/ksysfs.c

@@ -299,7 +299,7 @@ static int __init create_setup_data_nodes(struct kobject *parent)
 	return 0;
 	return 0;
 
 
 out_clean_nodes:
 out_clean_nodes:
-	for (j = i - 1; j > 0; j--)
+	for (j = i - 1; j >= 0; j--)
 		cleanup_setup_data_node(*(kobjp + j));
 		cleanup_setup_data_node(*(kobjp + j));
 	kfree(kobjp);
 	kfree(kobjp);
 out_setup_data_kobj:
 out_setup_data_kobj:

+ 2 - 1
arch/x86/kernel/kvm.c

@@ -140,7 +140,8 @@ void kvm_async_pf_task_wait(u32 token)
 
 
 	n.token = token;
 	n.token = token;
 	n.cpu = smp_processor_id();
 	n.cpu = smp_processor_id();
-	n.halted = is_idle_task(current) || preempt_count() > 1;
+	n.halted = is_idle_task(current) || preempt_count() > 1 ||
+		   rcu_preempt_depth();
 	init_swait_queue_head(&n.wq);
 	init_swait_queue_head(&n.wq);
 	hlist_add_head(&n.link, &b->list);
 	hlist_add_head(&n.link, &b->list);
 	raw_spin_unlock(&b->lock);
 	raw_spin_unlock(&b->lock);

+ 3 - 3
arch/x86/kernel/signal.c

@@ -263,7 +263,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 		sp = (unsigned long) ka->sa.sa_restorer;
 		sp = (unsigned long) ka->sa.sa_restorer;
 	}
 	}
 
 
-	if (fpu->fpstate_active) {
+	if (fpu->initialized) {
 		sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
 		sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
 					  &buf_fx, &math_size);
 					  &buf_fx, &math_size);
 		*fpstate = (void __user *)sp;
 		*fpstate = (void __user *)sp;
@@ -279,7 +279,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 		return (void __user *)-1L;
 		return (void __user *)-1L;
 
 
 	/* save i387 and extended state */
 	/* save i387 and extended state */
-	if (fpu->fpstate_active &&
+	if (fpu->initialized &&
 	    copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0)
 	    copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0)
 		return (void __user *)-1L;
 		return (void __user *)-1L;
 
 
@@ -755,7 +755,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 		/*
 		/*
 		 * Ensure the signal handler starts with the new fpu state.
 		 * Ensure the signal handler starts with the new fpu state.
 		 */
 		 */
-		if (fpu->fpstate_active)
+		if (fpu->initialized)
 			fpu__clear(fpu);
 			fpu__clear(fpu);
 	}
 	}
 	signal_setup_done(failed, ksig, stepping);
 	signal_setup_done(failed, ksig, stepping);

+ 1 - 1
arch/x86/kernel/traps.c

@@ -142,7 +142,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
 	 * from double_fault.
 	 * from double_fault.
 	 */
 	 */
 	BUG_ON((unsigned long)(current_top_of_stack() -
 	BUG_ON((unsigned long)(current_top_of_stack() -
-			       current_stack_pointer()) >= THREAD_SIZE);
+			       current_stack_pointer) >= THREAD_SIZE);
 
 
 	preempt_enable_no_resched();
 	preempt_enable_no_resched();
 }
 }

+ 101 - 105
arch/x86/kvm/vmx.c

@@ -200,6 +200,8 @@ struct loaded_vmcs {
 	int cpu;
 	int cpu;
 	bool launched;
 	bool launched;
 	bool nmi_known_unmasked;
 	bool nmi_known_unmasked;
+	unsigned long vmcs_host_cr3;	/* May not match real cr3 */
+	unsigned long vmcs_host_cr4;	/* May not match real cr4 */
 	struct list_head loaded_vmcss_on_cpu_link;
 	struct list_head loaded_vmcss_on_cpu_link;
 };
 };
 
 
@@ -600,8 +602,6 @@ struct vcpu_vmx {
 		int           gs_ldt_reload_needed;
 		int           gs_ldt_reload_needed;
 		int           fs_reload_needed;
 		int           fs_reload_needed;
 		u64           msr_host_bndcfgs;
 		u64           msr_host_bndcfgs;
-		unsigned long vmcs_host_cr3;	/* May not match real cr3 */
-		unsigned long vmcs_host_cr4;	/* May not match real cr4 */
 	} host_state;
 	} host_state;
 	struct {
 	struct {
 		int vm86_active;
 		int vm86_active;
@@ -2202,46 +2202,44 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 	struct pi_desc old, new;
 	struct pi_desc old, new;
 	unsigned int dest;
 	unsigned int dest;
 
 
-	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
-		!kvm_vcpu_apicv_active(vcpu))
+	/*
+	 * In case of hot-plug or hot-unplug, we may have to undo
+	 * vmx_vcpu_pi_put even if there is no assigned device.  And we
+	 * always keep PI.NDST up to date for simplicity: it makes the
+	 * code easier, and CPU migration is not a fast path.
+	 */
+	if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
 		return;
 		return;
 
 
+	/*
+	 * First handle the simple case where no cmpxchg is necessary; just
+	 * allow posting non-urgent interrupts.
+	 *
+	 * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
+	 * PI.NDST: pi_post_block will do it for us and the wakeup_handler
+	 * expects the VCPU to be on the blocked_vcpu_list that matches
+	 * PI.NDST.
+	 */
+	if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
+	    vcpu->cpu == cpu) {
+		pi_clear_sn(pi_desc);
+		return;
+	}
+
+	/* The full case.  */
 	do {
 	do {
 		old.control = new.control = pi_desc->control;
 		old.control = new.control = pi_desc->control;
 
 
-		/*
-		 * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
-		 * are two possible cases:
-		 * 1. After running 'pre_block', context switch
-		 *    happened. For this case, 'sn' was set in
-		 *    vmx_vcpu_put(), so we need to clear it here.
-		 * 2. After running 'pre_block', we were blocked,
-		 *    and woken up by some other guy. For this case,
-		 *    we don't need to do anything, 'pi_post_block'
-		 *    will do everything for us. However, we cannot
-		 *    check whether it is case #1 or case #2 here
-		 *    (maybe, not needed), so we also clear sn here,
-		 *    I think it is not a big deal.
-		 */
-		if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
-			if (vcpu->cpu != cpu) {
-				dest = cpu_physical_id(cpu);
-
-				if (x2apic_enabled())
-					new.ndst = dest;
-				else
-					new.ndst = (dest << 8) & 0xFF00;
-			}
+		dest = cpu_physical_id(cpu);
 
 
-			/* set 'NV' to 'notification vector' */
-			new.nv = POSTED_INTR_VECTOR;
-		}
+		if (x2apic_enabled())
+			new.ndst = dest;
+		else
+			new.ndst = (dest << 8) & 0xFF00;
 
 
-		/* Allow posting non-urgent interrupts */
 		new.sn = 0;
 		new.sn = 0;
-	} while (cmpxchg(&pi_desc->control, old.control,
-			new.control) != old.control);
+	} while (cmpxchg64(&pi_desc->control, old.control,
+			   new.control) != old.control);
 }
 }
 
 
 static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
 static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
@@ -5178,12 +5176,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 	 */
 	 */
 	cr3 = __read_cr3();
 	cr3 = __read_cr3();
 	vmcs_writel(HOST_CR3, cr3);		/* 22.2.3  FIXME: shadow tables */
 	vmcs_writel(HOST_CR3, cr3);		/* 22.2.3  FIXME: shadow tables */
-	vmx->host_state.vmcs_host_cr3 = cr3;
+	vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
 
 
 	/* Save the most likely value for this task's CR4 in the VMCS. */
 	/* Save the most likely value for this task's CR4 in the VMCS. */
 	cr4 = cr4_read_shadow();
 	cr4 = cr4_read_shadow();
 	vmcs_writel(HOST_CR4, cr4);			/* 22.2.3, 22.2.5 */
 	vmcs_writel(HOST_CR4, cr4);			/* 22.2.3, 22.2.5 */
-	vmx->host_state.vmcs_host_cr4 = cr4;
+	vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
 
 
 	vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
 	vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
@@ -9273,15 +9271,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
 		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
 
 
 	cr3 = __get_current_cr3_fast();
 	cr3 = __get_current_cr3_fast();
-	if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) {
+	if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) {
 		vmcs_writel(HOST_CR3, cr3);
 		vmcs_writel(HOST_CR3, cr3);
-		vmx->host_state.vmcs_host_cr3 = cr3;
+		vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
 	}
 	}
 
 
 	cr4 = cr4_read_shadow();
 	cr4 = cr4_read_shadow();
-	if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
+	if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) {
 		vmcs_writel(HOST_CR4, cr4);
 		vmcs_writel(HOST_CR4, cr4);
-		vmx->host_state.vmcs_host_cr4 = cr4;
+		vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
 	}
 	}
 
 
 	/* When single-stepping over STI and MOV SS, we must clear the
 	/* When single-stepping over STI and MOV SS, we must clear the
@@ -9591,6 +9589,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
 
 	vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
 	vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
 
 
+	/*
+	 * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
+	 * or POSTED_INTR_WAKEUP_VECTOR.
+	 */
+	vmx->pi_desc.nv = POSTED_INTR_VECTOR;
+	vmx->pi_desc.sn = 1;
+
 	return &vmx->vcpu;
 	return &vmx->vcpu;
 
 
 free_vmcs:
 free_vmcs:
@@ -9839,7 +9844,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
 
 
 	WARN_ON(!is_guest_mode(vcpu));
 	WARN_ON(!is_guest_mode(vcpu));
 
 
-	if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) {
+	if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
+		!to_vmx(vcpu)->nested.nested_run_pending) {
 		vmcs12->vm_exit_intr_error_code = fault->error_code;
 		vmcs12->vm_exit_intr_error_code = fault->error_code;
 		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
 		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
 				  PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
 				  PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
@@ -11704,6 +11710,37 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
 	kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
 	kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
 }
 }
 
 
+static void __pi_post_block(struct kvm_vcpu *vcpu)
+{
+	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+	struct pi_desc old, new;
+	unsigned int dest;
+
+	do {
+		old.control = new.control = pi_desc->control;
+		WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
+		     "Wakeup handler not enabled while the VCPU is blocked\n");
+
+		dest = cpu_physical_id(vcpu->cpu);
+
+		if (x2apic_enabled())
+			new.ndst = dest;
+		else
+			new.ndst = (dest << 8) & 0xFF00;
+
+		/* set 'NV' to 'notification vector' */
+		new.nv = POSTED_INTR_VECTOR;
+	} while (cmpxchg64(&pi_desc->control, old.control,
+			   new.control) != old.control);
+
+	if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
+		spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+		list_del(&vcpu->blocked_vcpu_list);
+		spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+		vcpu->pre_pcpu = -1;
+	}
+}
+
 /*
 /*
  * This routine does the following things for vCPU which is going
  * This routine does the following things for vCPU which is going
  * to be blocked if VT-d PI is enabled.
  * to be blocked if VT-d PI is enabled.
@@ -11719,7 +11756,6 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
  */
  */
 static int pi_pre_block(struct kvm_vcpu *vcpu)
 static int pi_pre_block(struct kvm_vcpu *vcpu)
 {
 {
-	unsigned long flags;
 	unsigned int dest;
 	unsigned int dest;
 	struct pi_desc old, new;
 	struct pi_desc old, new;
 	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
 	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
@@ -11729,34 +11765,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
 		!kvm_vcpu_apicv_active(vcpu))
 		!kvm_vcpu_apicv_active(vcpu))
 		return 0;
 		return 0;
 
 
-	vcpu->pre_pcpu = vcpu->cpu;
-	spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
-			  vcpu->pre_pcpu), flags);
-	list_add_tail(&vcpu->blocked_vcpu_list,
-		      &per_cpu(blocked_vcpu_on_cpu,
-		      vcpu->pre_pcpu));
-	spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
-			       vcpu->pre_pcpu), flags);
+	WARN_ON(irqs_disabled());
+	local_irq_disable();
+	if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
+		vcpu->pre_pcpu = vcpu->cpu;
+		spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+		list_add_tail(&vcpu->blocked_vcpu_list,
+			      &per_cpu(blocked_vcpu_on_cpu,
+				       vcpu->pre_pcpu));
+		spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+	}
 
 
 	do {
 	do {
 		old.control = new.control = pi_desc->control;
 		old.control = new.control = pi_desc->control;
 
 
-		/*
-		 * We should not block the vCPU if
-		 * an interrupt is posted for it.
-		 */
-		if (pi_test_on(pi_desc) == 1) {
-			spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
-					  vcpu->pre_pcpu), flags);
-			list_del(&vcpu->blocked_vcpu_list);
-			spin_unlock_irqrestore(
-					&per_cpu(blocked_vcpu_on_cpu_lock,
-					vcpu->pre_pcpu), flags);
-			vcpu->pre_pcpu = -1;
-
-			return 1;
-		}
-
 		WARN((pi_desc->sn == 1),
 		WARN((pi_desc->sn == 1),
 		     "Warning: SN field of posted-interrupts "
 		     "Warning: SN field of posted-interrupts "
 		     "is set before blocking\n");
 		     "is set before blocking\n");
@@ -11778,10 +11800,15 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
 
 
 		/* set 'NV' to 'wakeup vector' */
 		/* set 'NV' to 'wakeup vector' */
 		new.nv = POSTED_INTR_WAKEUP_VECTOR;
 		new.nv = POSTED_INTR_WAKEUP_VECTOR;
-	} while (cmpxchg(&pi_desc->control, old.control,
-			new.control) != old.control);
+	} while (cmpxchg64(&pi_desc->control, old.control,
+			   new.control) != old.control);
 
 
-	return 0;
+	/* We should not block the vCPU if an interrupt is posted for it.  */
+	if (pi_test_on(pi_desc) == 1)
+		__pi_post_block(vcpu);
+
+	local_irq_enable();
+	return (vcpu->pre_pcpu == -1);
 }
 }
 
 
 static int vmx_pre_block(struct kvm_vcpu *vcpu)
 static int vmx_pre_block(struct kvm_vcpu *vcpu)
@@ -11797,44 +11824,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
 
 
 static void pi_post_block(struct kvm_vcpu *vcpu)
 static void pi_post_block(struct kvm_vcpu *vcpu)
 {
 {
-	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-	struct pi_desc old, new;
-	unsigned int dest;
-	unsigned long flags;
-
-	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
-		!kvm_vcpu_apicv_active(vcpu))
+	if (vcpu->pre_pcpu == -1)
 		return;
 		return;
 
 
-	do {
-		old.control = new.control = pi_desc->control;
-
-		dest = cpu_physical_id(vcpu->cpu);
-
-		if (x2apic_enabled())
-			new.ndst = dest;
-		else
-			new.ndst = (dest << 8) & 0xFF00;
-
-		/* Allow posting non-urgent interrupts */
-		new.sn = 0;
-
-		/* set 'NV' to 'notification vector' */
-		new.nv = POSTED_INTR_VECTOR;
-	} while (cmpxchg(&pi_desc->control, old.control,
-			new.control) != old.control);
-
-	if(vcpu->pre_pcpu != -1) {
-		spin_lock_irqsave(
-			&per_cpu(blocked_vcpu_on_cpu_lock,
-			vcpu->pre_pcpu), flags);
-		list_del(&vcpu->blocked_vcpu_list);
-		spin_unlock_irqrestore(
-			&per_cpu(blocked_vcpu_on_cpu_lock,
-			vcpu->pre_pcpu), flags);
-		vcpu->pre_pcpu = -1;
-	}
+	WARN_ON(irqs_disabled());
+	local_irq_disable();
+	__pi_post_block(vcpu);
+	local_irq_enable();
 }
 }
 
 
 static void vmx_post_block(struct kvm_vcpu *vcpu)
 static void vmx_post_block(struct kvm_vcpu *vcpu)

+ 1 - 1
arch/x86/kvm/x86.c

@@ -7225,7 +7225,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	int r;
 	int r;
 	sigset_t sigsaved;
 	sigset_t sigsaved;
 
 
-	fpu__activate_curr(fpu);
+	fpu__initialize(fpu);
 
 
 	if (vcpu->sigset_active)
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);

+ 1 - 1
arch/x86/math-emu/fpu_entry.c

@@ -114,7 +114,7 @@ void math_emulate(struct math_emu_info *info)
 	struct desc_struct code_descriptor;
 	struct desc_struct code_descriptor;
 	struct fpu *fpu = &current->thread.fpu;
 	struct fpu *fpu = &current->thread.fpu;
 
 
-	fpu__activate_curr(fpu);
+	fpu__initialize(fpu);
 
 
 #ifdef RE_ENTRANT_CHECKING
 #ifdef RE_ENTRANT_CHECKING
 	if (emulating) {
 	if (emulating) {

+ 24 - 0
arch/x86/mm/extable.c

@@ -2,6 +2,7 @@
 #include <linux/uaccess.h>
 #include <linux/uaccess.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/debug.h>
 
 
+#include <asm/fpu/internal.h>
 #include <asm/traps.h>
 #include <asm/traps.h>
 #include <asm/kdebug.h>
 #include <asm/kdebug.h>
 
 
@@ -78,6 +79,29 @@ bool ex_handler_refcount(const struct exception_table_entry *fixup,
 }
 }
 EXPORT_SYMBOL_GPL(ex_handler_refcount);
 EXPORT_SYMBOL_GPL(ex_handler_refcount);
 
 
+/*
+ * Handler for when we fail to restore a task's FPU state.  We should never get
+ * here because the FPU state of a task using the FPU (task->thread.fpu.state)
+ * should always be valid.  However, past bugs have allowed userspace to set
+ * reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn().
+ * These caused XRSTOR to fail when switching to the task, leaking the FPU
+ * registers of the task previously executing on the CPU.  Mitigate this class
+ * of vulnerability by restoring from the initial state (essentially, zeroing
+ * out all the FPU registers) if we can't restore from the task's FPU state.
+ */
+bool ex_handler_fprestore(const struct exception_table_entry *fixup,
+			  struct pt_regs *regs, int trapnr)
+{
+	regs->ip = ex_fixup_addr(fixup);
+
+	WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
+		  (void *)instruction_pointer(regs));
+
+	__copy_kernel_to_fpregs(&init_fpstate, -1);
+	return true;
+}
+EXPORT_SYMBOL_GPL(ex_handler_fprestore);
+
 bool ex_handler_ext(const struct exception_table_entry *fixup,
 bool ex_handler_ext(const struct exception_table_entry *fixup,
 		   struct pt_regs *regs, int trapnr)
 		   struct pt_regs *regs, int trapnr)
 {
 {

+ 24 - 23
arch/x86/mm/fault.c

@@ -192,8 +192,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
  * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
  * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
  *	     faulted on a pte with its pkey=4.
  *	     faulted on a pte with its pkey=4.
  */
  */
-static void fill_sig_info_pkey(int si_code, siginfo_t *info,
-		struct vm_area_struct *vma)
+static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
 {
 {
 	/* This is effectively an #ifdef */
 	/* This is effectively an #ifdef */
 	if (!boot_cpu_has(X86_FEATURE_OSPKE))
 	if (!boot_cpu_has(X86_FEATURE_OSPKE))
@@ -209,7 +208,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
 	 * valid VMA, so we should never reach this without a
 	 * valid VMA, so we should never reach this without a
 	 * valid VMA.
 	 * valid VMA.
 	 */
 	 */
-	if (!vma) {
+	if (!pkey) {
 		WARN_ONCE(1, "PKU fault with no VMA passed in");
 		WARN_ONCE(1, "PKU fault with no VMA passed in");
 		info->si_pkey = 0;
 		info->si_pkey = 0;
 		return;
 		return;
@@ -219,13 +218,12 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
 	 * absolutely guranteed to be 100% accurate because of
 	 * absolutely guranteed to be 100% accurate because of
 	 * the race explained above.
 	 * the race explained above.
 	 */
 	 */
-	info->si_pkey = vma_pkey(vma);
+	info->si_pkey = *pkey;
 }
 }
 
 
 static void
 static void
 force_sig_info_fault(int si_signo, int si_code, unsigned long address,
 force_sig_info_fault(int si_signo, int si_code, unsigned long address,
-		     struct task_struct *tsk, struct vm_area_struct *vma,
-		     int fault)
+		     struct task_struct *tsk, u32 *pkey, int fault)
 {
 {
 	unsigned lsb = 0;
 	unsigned lsb = 0;
 	siginfo_t info;
 	siginfo_t info;
@@ -240,7 +238,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
 		lsb = PAGE_SHIFT;
 		lsb = PAGE_SHIFT;
 	info.si_addr_lsb = lsb;
 	info.si_addr_lsb = lsb;
 
 
-	fill_sig_info_pkey(si_code, &info, vma);
+	fill_sig_info_pkey(si_code, &info, pkey);
 
 
 	force_sig_info(si_signo, &info, tsk);
 	force_sig_info(si_signo, &info, tsk);
 }
 }
@@ -762,8 +760,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 	struct task_struct *tsk = current;
 	struct task_struct *tsk = current;
 	unsigned long flags;
 	unsigned long flags;
 	int sig;
 	int sig;
-	/* No context means no VMA to pass down */
-	struct vm_area_struct *vma = NULL;
 
 
 	/* Are we prepared to handle this kernel fault? */
 	/* Are we prepared to handle this kernel fault? */
 	if (fixup_exception(regs, X86_TRAP_PF)) {
 	if (fixup_exception(regs, X86_TRAP_PF)) {
@@ -788,7 +784,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 
 
 			/* XXX: hwpoison faults will set the wrong code. */
 			/* XXX: hwpoison faults will set the wrong code. */
 			force_sig_info_fault(signal, si_code, address,
 			force_sig_info_fault(signal, si_code, address,
-					     tsk, vma, 0);
+					     tsk, NULL, 0);
 		}
 		}
 
 
 		/*
 		/*
@@ -896,8 +892,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
 
 
 static void
 static void
 __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
-		       unsigned long address, struct vm_area_struct *vma,
-		       int si_code)
+		       unsigned long address, u32 *pkey, int si_code)
 {
 {
 	struct task_struct *tsk = current;
 	struct task_struct *tsk = current;
 
 
@@ -945,7 +940,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 		tsk->thread.error_code	= error_code;
 		tsk->thread.error_code	= error_code;
 		tsk->thread.trap_nr	= X86_TRAP_PF;
 		tsk->thread.trap_nr	= X86_TRAP_PF;
 
 
-		force_sig_info_fault(SIGSEGV, si_code, address, tsk, vma, 0);
+		force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0);
 
 
 		return;
 		return;
 	}
 	}
@@ -958,9 +953,9 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 
 
 static noinline void
 static noinline void
 bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
-		     unsigned long address, struct vm_area_struct *vma)
+		     unsigned long address, u32 *pkey)
 {
 {
-	__bad_area_nosemaphore(regs, error_code, address, vma, SEGV_MAPERR);
+	__bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR);
 }
 }
 
 
 static void
 static void
@@ -968,6 +963,10 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
 	   unsigned long address,  struct vm_area_struct *vma, int si_code)
 	   unsigned long address,  struct vm_area_struct *vma, int si_code)
 {
 {
 	struct mm_struct *mm = current->mm;
 	struct mm_struct *mm = current->mm;
+	u32 pkey;
+
+	if (vma)
+		pkey = vma_pkey(vma);
 
 
 	/*
 	/*
 	 * Something tried to access memory that isn't in our memory map..
 	 * Something tried to access memory that isn't in our memory map..
@@ -975,7 +974,8 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
 	 */
 	 */
 	up_read(&mm->mmap_sem);
 	up_read(&mm->mmap_sem);
 
 
-	__bad_area_nosemaphore(regs, error_code, address, vma, si_code);
+	__bad_area_nosemaphore(regs, error_code, address,
+			       (vma) ? &pkey : NULL, si_code);
 }
 }
 
 
 static noinline void
 static noinline void
@@ -1018,7 +1018,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
 
 
 static void
 static void
 do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
 do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
-	  struct vm_area_struct *vma, unsigned int fault)
+	  u32 *pkey, unsigned int fault)
 {
 {
 	struct task_struct *tsk = current;
 	struct task_struct *tsk = current;
 	int code = BUS_ADRERR;
 	int code = BUS_ADRERR;
@@ -1045,13 +1045,12 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
 		code = BUS_MCEERR_AR;
 		code = BUS_MCEERR_AR;
 	}
 	}
 #endif
 #endif
-	force_sig_info_fault(SIGBUS, code, address, tsk, vma, fault);
+	force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault);
 }
 }
 
 
 static noinline void
 static noinline void
 mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 mm_fault_error(struct pt_regs *regs, unsigned long error_code,
-	       unsigned long address, struct vm_area_struct *vma,
-	       unsigned int fault)
+	       unsigned long address, u32 *pkey, unsigned int fault)
 {
 {
 	if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
 	if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
 		no_context(regs, error_code, address, 0, 0);
 		no_context(regs, error_code, address, 0, 0);
@@ -1075,9 +1074,9 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 	} else {
 	} else {
 		if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
 		if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
 			     VM_FAULT_HWPOISON_LARGE))
 			     VM_FAULT_HWPOISON_LARGE))
-			do_sigbus(regs, error_code, address, vma, fault);
+			do_sigbus(regs, error_code, address, pkey, fault);
 		else if (fault & VM_FAULT_SIGSEGV)
 		else if (fault & VM_FAULT_SIGSEGV)
-			bad_area_nosemaphore(regs, error_code, address, vma);
+			bad_area_nosemaphore(regs, error_code, address, pkey);
 		else
 		else
 			BUG();
 			BUG();
 	}
 	}
@@ -1267,6 +1266,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 	struct mm_struct *mm;
 	struct mm_struct *mm;
 	int fault, major = 0;
 	int fault, major = 0;
 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+	u32 pkey;
 
 
 	tsk = current;
 	tsk = current;
 	mm = tsk->mm;
 	mm = tsk->mm;
@@ -1467,9 +1467,10 @@ good_area:
 		return;
 		return;
 	}
 	}
 
 
+	pkey = vma_pkey(vma);
 	up_read(&mm->mmap_sem);
 	up_read(&mm->mmap_sem);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 	if (unlikely(fault & VM_FAULT_ERROR)) {
-		mm_fault_error(regs, error_code, address, vma, fault);
+		mm_fault_error(regs, error_code, address, &pkey, fault);
 		return;
 		return;
 	}
 	}
 
 

+ 2 - 0
arch/x86/mm/mem_encrypt.c

@@ -10,6 +10,8 @@
  * published by the Free Software Foundation.
  * published by the Free Software Foundation.
  */
  */
 
 
+#define DISABLE_BRANCH_PROFILING
+
 #include <linux/linkage.h>
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/mm.h>

+ 1 - 2
arch/x86/mm/pkeys.c

@@ -18,7 +18,6 @@
 
 
 #include <asm/cpufeature.h>             /* boot_cpu_has, ...            */
 #include <asm/cpufeature.h>             /* boot_cpu_has, ...            */
 #include <asm/mmu_context.h>            /* vma_pkey()                   */
 #include <asm/mmu_context.h>            /* vma_pkey()                   */
-#include <asm/fpu/internal.h>           /* fpregs_active()              */
 
 
 int __execute_only_pkey(struct mm_struct *mm)
 int __execute_only_pkey(struct mm_struct *mm)
 {
 {
@@ -45,7 +44,7 @@ int __execute_only_pkey(struct mm_struct *mm)
 	 */
 	 */
 	preempt_disable();
 	preempt_disable();
 	if (!need_to_set_mm_pkey &&
 	if (!need_to_set_mm_pkey &&
-	    fpregs_active() &&
+	    current->thread.fpu.initialized &&
 	    !__pkru_allows_read(read_pkru(), execute_only_pkey)) {
 	    !__pkru_allows_read(read_pkru(), execute_only_pkey)) {
 		preempt_enable();
 		preempt_enable();
 		return execute_only_pkey;
 		return execute_only_pkey;

+ 1 - 1
arch/x86/mm/tlb.c

@@ -191,7 +191,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			 * mapped in the new pgd, we'll double-fault.  Forcibly
 			 * mapped in the new pgd, we'll double-fault.  Forcibly
 			 * map it.
 			 * map it.
 			 */
 			 */
-			unsigned int index = pgd_index(current_stack_pointer());
+			unsigned int index = pgd_index(current_stack_pointer);
 			pgd_t *pgd = next->pgd + index;
 			pgd_t *pgd = next->pgd + index;
 
 
 			if (unlikely(pgd_none(*pgd)))
 			if (unlikely(pgd_none(*pgd)))

+ 4 - 9
arch/x86/xen/mmu_pv.c

@@ -1238,21 +1238,16 @@ static void __init xen_pagetable_cleanhighmap(void)
 	 * from _brk_limit way up to the max_pfn_mapped (which is the end of
 	 * from _brk_limit way up to the max_pfn_mapped (which is the end of
 	 * the ramdisk). We continue on, erasing PMD entries that point to page
 	 * the ramdisk). We continue on, erasing PMD entries that point to page
 	 * tables - do note that they are accessible at this stage via __va.
 	 * tables - do note that they are accessible at this stage via __va.
-	 * For good measure we also round up to the PMD - which means that if
+	 * As Xen is aligning the memory end to a 4MB boundary, for good
+	 * measure we also round up to PMD_SIZE * 2 - which means that if
 	 * anybody is using __ka address to the initial boot-stack - and try
 	 * anybody is using __ka address to the initial boot-stack - and try
 	 * to use it - they are going to crash. The xen_start_info has been
 	 * to use it - they are going to crash. The xen_start_info has been
 	 * taken care of already in xen_setup_kernel_pagetable. */
 	 * taken care of already in xen_setup_kernel_pagetable. */
 	addr = xen_start_info->pt_base;
 	addr = xen_start_info->pt_base;
-	size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE);
+	size = xen_start_info->nr_pt_frames * PAGE_SIZE;
 
 
-	xen_cleanhighmap(addr, addr + size);
+	xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2));
 	xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
 	xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
-#ifdef DEBUG
-	/* This is superfluous and is not necessary, but you know what
-	 * lets do it. The MODULES_VADDR -> MODULES_END should be clear of
-	 * anything at this stage. */
-	xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
-#endif
 }
 }
 #endif
 #endif
 
 

+ 3 - 0
block/blk-core.c

@@ -854,6 +854,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 
 
 	kobject_init(&q->kobj, &blk_queue_ktype);
 	kobject_init(&q->kobj, &blk_queue_ktype);
 
 
+#ifdef CONFIG_BLK_DEV_IO_TRACE
+	mutex_init(&q->blk_trace_mutex);
+#endif
 	mutex_init(&q->sysfs_lock);
 	mutex_init(&q->sysfs_lock);
 	spin_lock_init(&q->__queue_lock);
 	spin_lock_init(&q->__queue_lock);
 
 

+ 0 - 1
block/bsg-lib.c

@@ -154,7 +154,6 @@ static int bsg_prepare_job(struct device *dev, struct request *req)
 failjob_rls_rqst_payload:
 failjob_rls_rqst_payload:
 	kfree(job->request_payload.sg_list);
 	kfree(job->request_payload.sg_list);
 failjob_rls_job:
 failjob_rls_job:
-	kfree(job);
 	return -ENOMEM;
 	return -ENOMEM;
 }
 }
 
 

+ 1 - 1
block/partition-generic.c

@@ -112,7 +112,7 @@ ssize_t part_stat_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
 		       struct device_attribute *attr, char *buf)
 {
 {
 	struct hd_struct *p = dev_to_part(dev);
 	struct hd_struct *p = dev_to_part(dev);
-	struct request_queue *q = dev_to_disk(dev)->queue;
+	struct request_queue *q = part_to_disk(p)->queue;
 	unsigned int inflight[2];
 	unsigned int inflight[2];
 	int cpu;
 	int cpu;
 
 

+ 9 - 7
drivers/acpi/apei/ghes.c

@@ -743,17 +743,19 @@ static int ghes_proc(struct ghes *ghes)
 	}
 	}
 	ghes_do_proc(ghes, ghes->estatus);
 	ghes_do_proc(ghes, ghes->estatus);
 
 
+out:
+	ghes_clear_estatus(ghes);
+
+	if (rc == -ENOENT)
+		return rc;
+
 	/*
 	/*
 	 * GHESv2 type HEST entries introduce support for error acknowledgment,
 	 * GHESv2 type HEST entries introduce support for error acknowledgment,
 	 * so only acknowledge the error if this support is present.
 	 * so only acknowledge the error if this support is present.
 	 */
 	 */
-	if (is_hest_type_generic_v2(ghes)) {
-		rc = ghes_ack_error(ghes->generic_v2);
-		if (rc)
-			return rc;
-	}
-out:
-	ghes_clear_estatus(ghes);
+	if (is_hest_type_generic_v2(ghes))
+		return ghes_ack_error(ghes->generic_v2);
+
 	return rc;
 	return rc;
 }
 }
 
 

+ 7 - 0
drivers/base/power/opp/core.c

@@ -1581,6 +1581,9 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
 
 
 	opp->available = availability_req;
 	opp->available = availability_req;
 
 
+	dev_pm_opp_get(opp);
+	mutex_unlock(&opp_table->lock);
+
 	/* Notify the change of the OPP availability */
 	/* Notify the change of the OPP availability */
 	if (availability_req)
 	if (availability_req)
 		blocking_notifier_call_chain(&opp_table->head, OPP_EVENT_ENABLE,
 		blocking_notifier_call_chain(&opp_table->head, OPP_EVENT_ENABLE,
@@ -1589,8 +1592,12 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
 		blocking_notifier_call_chain(&opp_table->head,
 		blocking_notifier_call_chain(&opp_table->head,
 					     OPP_EVENT_DISABLE, opp);
 					     OPP_EVENT_DISABLE, opp);
 
 
+	dev_pm_opp_put(opp);
+	goto put_table;
+
 unlock:
 unlock:
 	mutex_unlock(&opp_table->lock);
 	mutex_unlock(&opp_table->lock);
+put_table:
 	dev_pm_opp_put_opp_table(opp_table);
 	dev_pm_opp_put_opp_table(opp_table);
 	return r;
 	return r;
 }
 }

+ 1 - 1
drivers/block/brd.c

@@ -342,7 +342,7 @@ static long __brd_direct_access(struct brd_device *brd, pgoff_t pgoff,
 
 
 	if (!brd)
 	if (!brd)
 		return -ENODEV;
 		return -ENODEV;
-	page = brd_insert_page(brd, PFN_PHYS(pgoff) / 512);
+	page = brd_insert_page(brd, (sector_t)pgoff << PAGE_SECTORS_SHIFT);
 	if (!page)
 	if (!page)
 		return -ENOSPC;
 		return -ENOSPC;
 	*kaddr = page_address(page);
 	*kaddr = page_address(page);

+ 2 - 4
drivers/block/loop.h

@@ -67,10 +67,8 @@ struct loop_device {
 struct loop_cmd {
 struct loop_cmd {
 	struct kthread_work work;
 	struct kthread_work work;
 	struct request *rq;
 	struct request *rq;
-	union {
-		bool use_aio; /* use AIO interface to handle I/O */
-		atomic_t ref; /* only for aio */
-	};
+	bool use_aio; /* use AIO interface to handle I/O */
+	atomic_t ref; /* only for aio */
 	long ret;
 	long ret;
 	struct kiocb iocb;
 	struct kiocb iocb;
 	struct bio_vec *bvec;
 	struct bio_vec *bvec;

+ 6 - 0
drivers/block/nbd.c

@@ -1194,6 +1194,12 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
 	if (!capable(CAP_SYS_ADMIN))
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 		return -EPERM;
 
 
+	/* The block layer will pass back some non-nbd ioctls in case we have
+	 * special handling for them, but we don't so just return an error.
+	 */
+	if (_IOC_TYPE(cmd) != 0xab)
+		return -EINVAL;
+
 	mutex_lock(&nbd->config_lock);
 	mutex_lock(&nbd->config_lock);
 
 
 	/* Don't allow ioctl operations on a nbd device that was created with
 	/* Don't allow ioctl operations on a nbd device that was created with

+ 1 - 1
drivers/clocksource/numachip.c

@@ -43,7 +43,7 @@ static int numachip2_set_next_event(unsigned long delta, struct clock_event_devi
 	return 0;
 	return 0;
 }
 }
 
 
-static struct clock_event_device numachip2_clockevent = {
+static const struct clock_event_device numachip2_clockevent __initconst = {
 	.name            = "numachip2",
 	.name            = "numachip2",
 	.rating          = 400,
 	.rating          = 400,
 	.set_next_event  = numachip2_set_next_event,
 	.set_next_event  = numachip2_set_next_event,

+ 4 - 0
drivers/cpufreq/cpufreq-dt-platdev.c

@@ -118,6 +118,10 @@ static const struct of_device_id blacklist[] __initconst = {
 
 
 	{ .compatible = "sigma,tango4", },
 	{ .compatible = "sigma,tango4", },
 
 
+	{ .compatible = "ti,am33xx", },
+	{ .compatible = "ti,am43", },
+	{ .compatible = "ti,dra7", },
+
 	{ }
 	{ }
 };
 };
 
 

+ 1 - 1
drivers/dma-buf/dma-buf.c

@@ -625,7 +625,7 @@ EXPORT_SYMBOL_GPL(dma_buf_detach);
 struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
 struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
 					enum dma_data_direction direction)
 					enum dma_data_direction direction)
 {
 {
-	struct sg_table *sg_table = ERR_PTR(-EINVAL);
+	struct sg_table *sg_table;
 
 
 	might_sleep();
 	might_sleep();
 
 

+ 42 - 14
drivers/dma-buf/reservation.c

@@ -266,8 +266,7 @@ EXPORT_SYMBOL(reservation_object_add_excl_fence);
 * @dst: the destination reservation object
 * @dst: the destination reservation object
 * @src: the source reservation object
 * @src: the source reservation object
 *
 *
-* Copy all fences from src to dst. Both src->lock as well as dst-lock must be
-* held.
+* Copy all fences from src to dst. dst-lock must be held.
 */
 */
 int reservation_object_copy_fences(struct reservation_object *dst,
 int reservation_object_copy_fences(struct reservation_object *dst,
 				   struct reservation_object *src)
 				   struct reservation_object *src)
@@ -277,33 +276,62 @@ int reservation_object_copy_fences(struct reservation_object *dst,
 	size_t size;
 	size_t size;
 	unsigned i;
 	unsigned i;
 
 
-	src_list = reservation_object_get_list(src);
+	rcu_read_lock();
+	src_list = rcu_dereference(src->fence);
 
 
+retry:
 	if (src_list) {
 	if (src_list) {
-		size = offsetof(typeof(*src_list),
-				shared[src_list->shared_count]);
+		unsigned shared_count = src_list->shared_count;
+
+		size = offsetof(typeof(*src_list), shared[shared_count]);
+		rcu_read_unlock();
+
 		dst_list = kmalloc(size, GFP_KERNEL);
 		dst_list = kmalloc(size, GFP_KERNEL);
 		if (!dst_list)
 		if (!dst_list)
 			return -ENOMEM;
 			return -ENOMEM;
 
 
-		dst_list->shared_count = src_list->shared_count;
-		dst_list->shared_max = src_list->shared_count;
-		for (i = 0; i < src_list->shared_count; ++i)
-			dst_list->shared[i] =
-				dma_fence_get(src_list->shared[i]);
+		rcu_read_lock();
+		src_list = rcu_dereference(src->fence);
+		if (!src_list || src_list->shared_count > shared_count) {
+			kfree(dst_list);
+			goto retry;
+		}
+
+		dst_list->shared_count = 0;
+		dst_list->shared_max = shared_count;
+		for (i = 0; i < src_list->shared_count; ++i) {
+			struct dma_fence *fence;
+
+			fence = rcu_dereference(src_list->shared[i]);
+			if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				     &fence->flags))
+				continue;
+
+			if (!dma_fence_get_rcu(fence)) {
+				kfree(dst_list);
+				src_list = rcu_dereference(src->fence);
+				goto retry;
+			}
+
+			if (dma_fence_is_signaled(fence)) {
+				dma_fence_put(fence);
+				continue;
+			}
+
+			dst_list->shared[dst_list->shared_count++] = fence;
+		}
 	} else {
 	} else {
 		dst_list = NULL;
 		dst_list = NULL;
 	}
 	}
 
 
+	new = dma_fence_get_rcu_safe(&src->fence_excl);
+	rcu_read_unlock();
+
 	kfree(dst->staged);
 	kfree(dst->staged);
 	dst->staged = NULL;
 	dst->staged = NULL;
 
 
 	src_list = reservation_object_get_list(dst);
 	src_list = reservation_object_get_list(dst);
-
 	old = reservation_object_get_excl(dst);
 	old = reservation_object_get_excl(dst);
-	new = reservation_object_get_excl(src);
-
-	dma_fence_get(new);
 
 
 	preempt_disable();
 	preempt_disable();
 	write_seqcount_begin(&dst->seq);
 	write_seqcount_begin(&dst->seq);

+ 3 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu.h

@@ -121,6 +121,7 @@ extern int amdgpu_cntl_sb_buf_per_se;
 extern int amdgpu_param_buf_per_se;
 extern int amdgpu_param_buf_per_se;
 extern int amdgpu_job_hang_limit;
 extern int amdgpu_job_hang_limit;
 extern int amdgpu_lbpw;
 extern int amdgpu_lbpw;
+extern int amdgpu_compute_multipipe;
 
 
 #ifdef CONFIG_DRM_AMDGPU_SI
 #ifdef CONFIG_DRM_AMDGPU_SI
 extern int amdgpu_si_support;
 extern int amdgpu_si_support;
@@ -1310,6 +1311,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *filp);
 			struct drm_file *filp);
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *filp);
 int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
 int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp);
 				struct drm_file *filp);
@@ -1524,7 +1527,6 @@ struct amdgpu_device {
 
 
 	/* powerplay */
 	/* powerplay */
 	struct amd_powerplay		powerplay;
 	struct amd_powerplay		powerplay;
-	bool				pp_enabled;
 	bool				pp_force_state_enabled;
 	bool				pp_force_state_enabled;
 
 
 	/* dpm */
 	/* dpm */

+ 9 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c

@@ -338,6 +338,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 	struct cik_mqd *m;
 	struct cik_mqd *m;
 	uint32_t *mqd_hqd;
 	uint32_t *mqd_hqd;
 	uint32_t reg, wptr_val, data;
 	uint32_t reg, wptr_val, data;
+	bool valid_wptr = false;
 
 
 	m = get_mqd(mqd);
 	m = get_mqd(mqd);
 
 
@@ -356,7 +357,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
 			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
 
 
-	if (read_user_wptr(mm, wptr, wptr_val))
+	/* read_user_ptr may take the mm->mmap_sem.
+	 * release srbm_mutex to avoid circular dependency between
+	 * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex.
+	 */
+	release_queue(kgd);
+	valid_wptr = read_user_wptr(mm, wptr, wptr_val);
+	acquire_queue(kgd, pipe_id, queue_id);
+	if (valid_wptr)
 		WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
 		WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
 
 
 	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
 	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);

+ 9 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c

@@ -292,6 +292,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 	struct vi_mqd *m;
 	struct vi_mqd *m;
 	uint32_t *mqd_hqd;
 	uint32_t *mqd_hqd;
 	uint32_t reg, wptr_val, data;
 	uint32_t reg, wptr_val, data;
+	bool valid_wptr = false;
 
 
 	m = get_mqd(mqd);
 	m = get_mqd(mqd);
 
 
@@ -339,7 +340,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
 			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
 
 
-	if (read_user_wptr(mm, wptr, wptr_val))
+	/* read_user_ptr may take the mm->mmap_sem.
+	 * release srbm_mutex to avoid circular dependency between
+	 * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex.
+	 */
+	release_queue(kgd);
+	valid_wptr = read_user_wptr(mm, wptr, wptr_val);
+	acquire_queue(kgd, pipe_id, queue_id);
+	if (valid_wptr)
 		WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
 		WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
 
 
 	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
 	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);

+ 23 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c

@@ -42,6 +42,28 @@ struct amdgpu_cgs_device {
 	struct amdgpu_device *adev =					\
 	struct amdgpu_device *adev =					\
 		((struct amdgpu_cgs_device *)cgs_device)->adev
 		((struct amdgpu_cgs_device *)cgs_device)->adev
 
 
+static void *amdgpu_cgs_register_pp_handle(struct cgs_device *cgs_device,
+			int (*call_back_func)(struct amd_pp_init *, void **))
+{
+	CGS_FUNC_ADEV;
+	struct amd_pp_init pp_init;
+	struct amd_powerplay *amd_pp;
+
+	if (call_back_func == NULL)
+		return NULL;
+
+	amd_pp = &(adev->powerplay);
+	pp_init.chip_family = adev->family;
+	pp_init.chip_id = adev->asic_type;
+	pp_init.pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : false;
+	pp_init.feature_mask = amdgpu_pp_feature_mask;
+	pp_init.device = cgs_device;
+	if (call_back_func(&pp_init, &(amd_pp->pp_handle)))
+		return NULL;
+
+	return adev->powerplay.pp_handle;
+}
+
 static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
 static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
 				    enum cgs_gpu_mem_type type,
 				    enum cgs_gpu_mem_type type,
 				    uint64_t size, uint64_t align,
 				    uint64_t size, uint64_t align,
@@ -1179,6 +1201,7 @@ static const struct cgs_ops amdgpu_cgs_ops = {
 	.is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled,
 	.is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled,
 	.enter_safe_mode = amdgpu_cgs_enter_safe_mode,
 	.enter_safe_mode = amdgpu_cgs_enter_safe_mode,
 	.lock_grbm_idx = amdgpu_cgs_lock_grbm_idx,
 	.lock_grbm_idx = amdgpu_cgs_lock_grbm_idx,
+	.register_pp_handle = amdgpu_cgs_register_pp_handle,
 };
 };
 
 
 static const struct cgs_os_ops amdgpu_cgs_os_ops = {
 static const struct cgs_os_ops amdgpu_cgs_os_ops = {

+ 8 - 8
drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c

@@ -231,7 +231,7 @@ amdgpu_connector_update_scratch_regs(struct drm_connector *connector,
 		if (connector->encoder_ids[i] == 0)
 		if (connector->encoder_ids[i] == 0)
 			break;
 			break;
 
 
-		encoder = drm_encoder_find(connector->dev,
+		encoder = drm_encoder_find(connector->dev, NULL,
 					connector->encoder_ids[i]);
 					connector->encoder_ids[i]);
 		if (!encoder)
 		if (!encoder)
 			continue;
 			continue;
@@ -256,7 +256,7 @@ amdgpu_connector_find_encoder(struct drm_connector *connector,
 	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
 	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
 		if (connector->encoder_ids[i] == 0)
 		if (connector->encoder_ids[i] == 0)
 			break;
 			break;
-		encoder = drm_encoder_find(connector->dev,
+		encoder = drm_encoder_find(connector->dev, NULL,
 					connector->encoder_ids[i]);
 					connector->encoder_ids[i]);
 		if (!encoder)
 		if (!encoder)
 			continue;
 			continue;
@@ -372,7 +372,7 @@ amdgpu_connector_best_single_encoder(struct drm_connector *connector)
 
 
 	/* pick the encoder ids */
 	/* pick the encoder ids */
 	if (enc_id)
 	if (enc_id)
-		return drm_encoder_find(connector->dev, enc_id);
+		return drm_encoder_find(connector->dev, NULL, enc_id);
 	return NULL;
 	return NULL;
 }
 }
 
 
@@ -1077,7 +1077,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
 			if (connector->encoder_ids[i] == 0)
 			if (connector->encoder_ids[i] == 0)
 				break;
 				break;
 
 
-			encoder = drm_encoder_find(connector->dev, connector->encoder_ids[i]);
+			encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]);
 			if (!encoder)
 			if (!encoder)
 				continue;
 				continue;
 
 
@@ -1134,7 +1134,7 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector)
 		if (connector->encoder_ids[i] == 0)
 		if (connector->encoder_ids[i] == 0)
 			break;
 			break;
 
 
-		encoder = drm_encoder_find(connector->dev, connector->encoder_ids[i]);
+		encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]);
 		if (!encoder)
 		if (!encoder)
 			continue;
 			continue;
 
 
@@ -1153,7 +1153,7 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector)
 	/* then check use digitial */
 	/* then check use digitial */
 	/* pick the first one */
 	/* pick the first one */
 	if (enc_id)
 	if (enc_id)
-		return drm_encoder_find(connector->dev, enc_id);
+		return drm_encoder_find(connector->dev, NULL, enc_id);
 	return NULL;
 	return NULL;
 }
 }
 
 
@@ -1294,7 +1294,7 @@ u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *conn
 		if (connector->encoder_ids[i] == 0)
 		if (connector->encoder_ids[i] == 0)
 			break;
 			break;
 
 
-		encoder = drm_encoder_find(connector->dev,
+		encoder = drm_encoder_find(connector->dev, NULL,
 					connector->encoder_ids[i]);
 					connector->encoder_ids[i]);
 		if (!encoder)
 		if (!encoder)
 			continue;
 			continue;
@@ -1323,7 +1323,7 @@ static bool amdgpu_connector_encoder_is_hbr2(struct drm_connector *connector)
 	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
 	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
 		if (connector->encoder_ids[i] == 0)
 		if (connector->encoder_ids[i] == 0)
 			break;
 			break;
-		encoder = drm_encoder_find(connector->dev,
+		encoder = drm_encoder_find(connector->dev, NULL,
 					connector->encoder_ids[i]);
 					connector->encoder_ids[i]);
 		if (!encoder)
 		if (!encoder)
 			continue;
 			continue;

+ 61 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

@@ -25,6 +25,7 @@
  *    Jerome Glisse <glisse@freedesktop.org>
  *    Jerome Glisse <glisse@freedesktop.org>
  */
  */
 #include <linux/pagemap.h>
 #include <linux/pagemap.h>
+#include <linux/sync_file.h>
 #include <drm/drmP.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include <drm/amdgpu_drm.h>
 #include <drm/drm_syncobj.h>
 #include <drm/drm_syncobj.h>
@@ -1330,6 +1331,66 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
 	return fence;
 	return fence;
 }
 }
 
 
+int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *filp)
+{
+	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	union drm_amdgpu_fence_to_handle *info = data;
+	struct dma_fence *fence;
+	struct drm_syncobj *syncobj;
+	struct sync_file *sync_file;
+	int fd, r;
+
+	if (amdgpu_kms_vram_lost(adev, fpriv))
+		return -ENODEV;
+
+	fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
+	if (IS_ERR(fence))
+		return PTR_ERR(fence);
+
+	switch (info->in.what) {
+	case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
+		r = drm_syncobj_create(&syncobj, 0, fence);
+		dma_fence_put(fence);
+		if (r)
+			return r;
+		r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);
+		drm_syncobj_put(syncobj);
+		return r;
+
+	case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:
+		r = drm_syncobj_create(&syncobj, 0, fence);
+		dma_fence_put(fence);
+		if (r)
+			return r;
+		r = drm_syncobj_get_fd(syncobj, (int*)&info->out.handle);
+		drm_syncobj_put(syncobj);
+		return r;
+
+	case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:
+		fd = get_unused_fd_flags(O_CLOEXEC);
+		if (fd < 0) {
+			dma_fence_put(fence);
+			return fd;
+		}
+
+		sync_file = sync_file_create(fence);
+		dma_fence_put(fence);
+		if (!sync_file) {
+			put_unused_fd(fd);
+			return -ENOMEM;
+		}
+
+		fd_install(fd, sync_file->file);
+		info->out.handle = fd;
+		return 0;
+
+	default:
+		return -EINVAL;
+	}
+}
+
 /**
 /**
  * amdgpu_cs_wait_all_fence - wait on all fences to signal
  * amdgpu_cs_wait_all_fence - wait on all fences to signal
  *
  *

+ 26 - 11
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

@@ -56,6 +56,7 @@
 #include "amdgpu_vf_error.h"
 #include "amdgpu_vf_error.h"
 
 
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_pm.h"
 
 
 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -1603,6 +1604,7 @@ static int amdgpu_init(struct amdgpu_device *adev)
 			return r;
 			return r;
 		}
 		}
 		adev->ip_blocks[i].status.sw = true;
 		adev->ip_blocks[i].status.sw = true;
+
 		/* need to do gmc hw init early so we can allocate gpu mem */
 		/* need to do gmc hw init early so we can allocate gpu mem */
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
 			r = amdgpu_vram_scratch_init(adev);
 			r = amdgpu_vram_scratch_init(adev);
@@ -1633,6 +1635,11 @@ static int amdgpu_init(struct amdgpu_device *adev)
 		}
 		}
 	}
 	}
 
 
+	mutex_lock(&adev->firmware.mutex);
+	if (amdgpu_ucode_init_bo(adev))
+		adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT;
+	mutex_unlock(&adev->firmware.mutex);
+
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.sw)
 		if (!adev->ip_blocks[i].status.sw)
 			continue;
 			continue;
@@ -1768,6 +1775,8 @@ static int amdgpu_fini(struct amdgpu_device *adev)
 
 
 		adev->ip_blocks[i].status.hw = false;
 		adev->ip_blocks[i].status.hw = false;
 	}
 	}
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT)
+		amdgpu_ucode_fini_bo(adev);
 
 
 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
 		if (!adev->ip_blocks[i].status.sw)
 		if (!adev->ip_blocks[i].status.sw)
@@ -2040,6 +2049,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	mutex_init(&adev->srbm_mutex);
 	mutex_init(&adev->srbm_mutex);
 	mutex_init(&adev->grbm_idx_mutex);
 	mutex_init(&adev->grbm_idx_mutex);
 	mutex_init(&adev->mn_lock);
 	mutex_init(&adev->mn_lock);
+	mutex_init(&adev->virt.vf_errors.lock);
 	hash_init(adev->mn_hash);
 	hash_init(adev->mn_hash);
 
 
 	amdgpu_check_arguments(adev);
 	amdgpu_check_arguments(adev);
@@ -2125,7 +2135,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_atombios_init(adev);
 	r = amdgpu_atombios_init(adev);
 	if (r) {
 	if (r) {
 		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
 		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
-		amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
 		goto failed;
 		goto failed;
 	}
 	}
 
 
@@ -2136,7 +2146,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	if (amdgpu_vpost_needed(adev)) {
 	if (amdgpu_vpost_needed(adev)) {
 		if (!adev->bios) {
 		if (!adev->bios) {
 			dev_err(adev->dev, "no vBIOS found\n");
 			dev_err(adev->dev, "no vBIOS found\n");
-			amdgpu_vf_error_put(AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
+			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
 			r = -EINVAL;
 			r = -EINVAL;
 			goto failed;
 			goto failed;
 		}
 		}
@@ -2144,7 +2154,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
 		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
 		if (r) {
 		if (r) {
 			dev_err(adev->dev, "gpu post error!\n");
 			dev_err(adev->dev, "gpu post error!\n");
-			amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_POST_ERROR, 0, 0);
+			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_POST_ERROR, 0, 0);
 			goto failed;
 			goto failed;
 		}
 		}
 	} else {
 	} else {
@@ -2156,7 +2166,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		r = amdgpu_atomfirmware_get_clock_info(adev);
 		r = amdgpu_atomfirmware_get_clock_info(adev);
 		if (r) {
 		if (r) {
 			dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
 			dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
-			amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
+			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
 			goto failed;
 			goto failed;
 		}
 		}
 	} else {
 	} else {
@@ -2164,7 +2174,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		r = amdgpu_atombios_get_clock_info(adev);
 		r = amdgpu_atombios_get_clock_info(adev);
 		if (r) {
 		if (r) {
 			dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
 			dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
-			amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
+			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
 			goto failed;
 			goto failed;
 		}
 		}
 		/* init i2c buses */
 		/* init i2c buses */
@@ -2175,7 +2185,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_fence_driver_init(adev);
 	r = amdgpu_fence_driver_init(adev);
 	if (r) {
 	if (r) {
 		dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
 		dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
-		amdgpu_vf_error_put(AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
 		goto failed;
 		goto failed;
 	}
 	}
 
 
@@ -2185,7 +2195,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_init(adev);
 	r = amdgpu_init(adev);
 	if (r) {
 	if (r) {
 		dev_err(adev->dev, "amdgpu_init failed\n");
 		dev_err(adev->dev, "amdgpu_init failed\n");
-		amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
 		amdgpu_fini(adev);
 		amdgpu_fini(adev);
 		goto failed;
 		goto failed;
 	}
 	}
@@ -2205,7 +2215,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_ib_pool_init(adev);
 	r = amdgpu_ib_pool_init(adev);
 	if (r) {
 	if (r) {
 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
-		amdgpu_vf_error_put(AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
 		goto failed;
 		goto failed;
 	}
 	}
 
 
@@ -2215,6 +2225,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
 
 	amdgpu_fbdev_init(adev);
 	amdgpu_fbdev_init(adev);
 
 
+	r = amdgpu_pm_sysfs_init(adev);
+	if (r)
+		DRM_ERROR("registering pm debugfs failed (%d).\n", r);
+
 	r = amdgpu_gem_debugfs_init(adev);
 	r = amdgpu_gem_debugfs_init(adev);
 	if (r)
 	if (r)
 		DRM_ERROR("registering gem debugfs failed (%d).\n", r);
 		DRM_ERROR("registering gem debugfs failed (%d).\n", r);
@@ -2254,7 +2268,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_late_init(adev);
 	r = amdgpu_late_init(adev);
 	if (r) {
 	if (r) {
 		dev_err(adev->dev, "amdgpu_late_init failed\n");
 		dev_err(adev->dev, "amdgpu_late_init failed\n");
-		amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
 		goto failed;
 		goto failed;
 	}
 	}
 
 
@@ -2311,6 +2325,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	iounmap(adev->rmmio);
 	iounmap(adev->rmmio);
 	adev->rmmio = NULL;
 	adev->rmmio = NULL;
 	amdgpu_doorbell_fini(adev);
 	amdgpu_doorbell_fini(adev);
+	amdgpu_pm_sysfs_fini(adev);
 	amdgpu_debugfs_regs_cleanup(adev);
 	amdgpu_debugfs_regs_cleanup(adev);
 }
 }
 
 
@@ -2936,7 +2951,7 @@ out:
 		}
 		}
 	} else {
 	} else {
 		dev_err(adev->dev, "asic resume failed (%d).\n", r);
 		dev_err(adev->dev, "asic resume failed (%d).\n", r);
-		amdgpu_vf_error_put(AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, 0, r);
+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, 0, r);
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			if (adev->rings[i] && adev->rings[i]->sched.thread) {
 			if (adev->rings[i] && adev->rings[i]->sched.thread) {
 				kthread_unpark(adev->rings[i]->sched.thread);
 				kthread_unpark(adev->rings[i]->sched.thread);
@@ -2950,7 +2965,7 @@ out:
 	if (r) {
 	if (r) {
 		/* bad news, how to tell it to userspace ? */
 		/* bad news, how to tell it to userspace ? */
 		dev_info(adev->dev, "GPU reset failed\n");
 		dev_info(adev->dev, "GPU reset failed\n");
-		amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
 	}
 	}
 	else {
 	else {
 		dev_info(adev->dev, "GPU reset successed!\n");
 		dev_info(adev->dev, "GPU reset successed!\n");

+ 4 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h

@@ -356,6 +356,10 @@ enum amdgpu_pcie_gen {
 		((adev)->powerplay.pp_funcs->switch_power_profile(\
 		((adev)->powerplay.pp_funcs->switch_power_profile(\
 			(adev)->powerplay.pp_handle, type))
 			(adev)->powerplay.pp_handle, type))
 
 
+#define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \
+		((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
+			(adev)->powerplay.pp_handle, msg_id))
+
 struct amdgpu_dpm {
 struct amdgpu_dpm {
 	struct amdgpu_ps        *ps;
 	struct amdgpu_ps        *ps;
 	/* number of valid power states */
 	/* number of valid power states */

+ 6 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

@@ -70,9 +70,10 @@
  * - 3.18.0 - Export gpu always on cu bitmap
  * - 3.18.0 - Export gpu always on cu bitmap
  * - 3.19.0 - Add support for UVD MJPEG decode
  * - 3.19.0 - Add support for UVD MJPEG decode
  * - 3.20.0 - Add support for local BOs
  * - 3.20.0 - Add support for local BOs
+ * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
  */
  */
 #define KMS_DRIVER_MAJOR	3
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	20
+#define KMS_DRIVER_MINOR	21
 #define KMS_DRIVER_PATCHLEVEL	0
 #define KMS_DRIVER_PATCHLEVEL	0
 
 
 int amdgpu_vram_limit = 0;
 int amdgpu_vram_limit = 0;
@@ -122,6 +123,7 @@ int amdgpu_cntl_sb_buf_per_se = 0;
 int amdgpu_param_buf_per_se = 0;
 int amdgpu_param_buf_per_se = 0;
 int amdgpu_job_hang_limit = 0;
 int amdgpu_job_hang_limit = 0;
 int amdgpu_lbpw = -1;
 int amdgpu_lbpw = -1;
+int amdgpu_compute_multipipe = -1;
 
 
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -265,6 +267,9 @@ module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444);
 MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)");
 MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(lbpw, amdgpu_lbpw, int, 0444);
 module_param_named(lbpw, amdgpu_lbpw, int, 0444);
 
 
+MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)");
+module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
+
 #ifdef CONFIG_DRM_AMDGPU_SI
 #ifdef CONFIG_DRM_AMDGPU_SI
 
 
 #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
 #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)

+ 18 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

@@ -109,9 +109,26 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s
 	}
 	}
 }
 }
 
 
+static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
+{
+	if (amdgpu_compute_multipipe != -1) {
+		DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
+			 amdgpu_compute_multipipe);
+		return amdgpu_compute_multipipe == 1;
+	}
+
+	/* FIXME: spreading the queues across pipes causes perf regressions
+	 * on POLARIS11 compute workloads */
+	if (adev->asic_type == CHIP_POLARIS11)
+		return false;
+
+	return adev->gfx.mec.num_mec > 1;
+}
+
 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 {
 {
 	int i, queue, pipe, mec;
 	int i, queue, pipe, mec;
+	bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
 
 
 	/* policy for amdgpu compute queue ownership */
 	/* policy for amdgpu compute queue ownership */
 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
@@ -125,8 +142,7 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 		if (mec >= adev->gfx.mec.num_mec)
 		if (mec >= adev->gfx.mec.num_mec)
 			break;
 			break;
 
 
-		/* FIXME: spreading the queues across pipes causes perf regressions */
-		if (0) {
+		if (multipipe_policy) {
 			/* policy: amdgpu owns the first two queues of the first MEC */
 			/* policy: amdgpu owns the first two queues of the first MEC */
 			if (mec == 0 && queue < 2)
 			if (mec == 0 && queue < 2)
 				set_bit(i, adev->gfx.mec.queue_bitmap);
 				set_bit(i, adev->gfx.mec.queue_bitmap);

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

@@ -1024,6 +1024,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_FENCE_TO_HANDLE, amdgpu_cs_fence_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	/* KMS */
 	/* KMS */
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_WAIT_IDLE, amdgpu_gem_wait_idle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_WAIT_IDLE, amdgpu_gem_wait_idle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),

+ 41 - 42
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c

@@ -64,10 +64,6 @@ static const struct cg_flag_name clocks[] = {
 
 
 void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
 void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
 {
 {
-	if (adev->pp_enabled)
-		/* TODO */
-		return;
-
 	if (adev->pm.dpm_enabled) {
 	if (adev->pm.dpm_enabled) {
 		mutex_lock(&adev->pm.mutex);
 		mutex_lock(&adev->pm.mutex);
 		if (power_supply_is_system_supplied() > 0)
 		if (power_supply_is_system_supplied() > 0)
@@ -118,7 +114,7 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
 		goto fail;
 		goto fail;
 	}
 	}
 
 
-	if (adev->pp_enabled) {
+	if (adev->powerplay.pp_funcs->dispatch_tasks) {
 		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state, NULL);
 		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state, NULL);
 	} else {
 	} else {
 		mutex_lock(&adev->pm.mutex);
 		mutex_lock(&adev->pm.mutex);
@@ -303,7 +299,8 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 
 
 	if (strlen(buf) == 1)
 	if (strlen(buf) == 1)
 		adev->pp_force_state_enabled = false;
 		adev->pp_force_state_enabled = false;
-	else if (adev->pp_enabled) {
+	else if (adev->powerplay.pp_funcs->dispatch_tasks &&
+			adev->powerplay.pp_funcs->get_pp_num_states) {
 		struct pp_states_info data;
 		struct pp_states_info data;
 
 
 		ret = kstrtoul(buf, 0, &idx);
 		ret = kstrtoul(buf, 0, &idx);
@@ -531,7 +528,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
 	if (adev->powerplay.pp_funcs->set_sclk_od)
 	if (adev->powerplay.pp_funcs->set_sclk_od)
 		amdgpu_dpm_set_sclk_od(adev, (uint32_t)value);
 		amdgpu_dpm_set_sclk_od(adev, (uint32_t)value);
 
 
-	if (adev->pp_enabled) {
+	if (adev->powerplay.pp_funcs->dispatch_tasks) {
 		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL);
 		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL);
 	} else {
 	} else {
 		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
 		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
@@ -575,7 +572,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
 	if (adev->powerplay.pp_funcs->set_mclk_od)
 	if (adev->powerplay.pp_funcs->set_mclk_od)
 		amdgpu_dpm_set_mclk_od(adev, (uint32_t)value);
 		amdgpu_dpm_set_mclk_od(adev, (uint32_t)value);
 
 
-	if (adev->pp_enabled) {
+	if (adev->powerplay.pp_funcs->dispatch_tasks) {
 		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL);
 		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL);
 	} else {
 	} else {
 		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
 		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
@@ -959,9 +956,6 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
 	     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
 		return 0;
 		return 0;
 
 
-	if (adev->pp_enabled)
-		return effective_mode;
-
 	/* Skip fan attributes if fan is not present */
 	/* Skip fan attributes if fan is not present */
 	if (adev->pm.no_fan &&
 	if (adev->pm.no_fan &&
 	    (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
 	    (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
@@ -1317,6 +1311,9 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 	if (adev->pm.sysfs_initialized)
 	if (adev->pm.sysfs_initialized)
 		return 0;
 		return 0;
 
 
+	if (adev->pm.dpm_enabled == 0)
+		return 0;
+
 	if (adev->powerplay.pp_funcs->get_temperature == NULL)
 	if (adev->powerplay.pp_funcs->get_temperature == NULL)
 		return 0;
 		return 0;
 
 
@@ -1341,27 +1338,26 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 		return ret;
 		return ret;
 	}
 	}
 
 
-	if (adev->pp_enabled) {
-		ret = device_create_file(adev->dev, &dev_attr_pp_num_states);
-		if (ret) {
-			DRM_ERROR("failed to create device file pp_num_states\n");
-			return ret;
-		}
-		ret = device_create_file(adev->dev, &dev_attr_pp_cur_state);
-		if (ret) {
-			DRM_ERROR("failed to create device file pp_cur_state\n");
-			return ret;
-		}
-		ret = device_create_file(adev->dev, &dev_attr_pp_force_state);
-		if (ret) {
-			DRM_ERROR("failed to create device file pp_force_state\n");
-			return ret;
-		}
-		ret = device_create_file(adev->dev, &dev_attr_pp_table);
-		if (ret) {
-			DRM_ERROR("failed to create device file pp_table\n");
-			return ret;
-		}
+
+	ret = device_create_file(adev->dev, &dev_attr_pp_num_states);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_num_states\n");
+		return ret;
+	}
+	ret = device_create_file(adev->dev, &dev_attr_pp_cur_state);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_cur_state\n");
+		return ret;
+	}
+	ret = device_create_file(adev->dev, &dev_attr_pp_force_state);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_force_state\n");
+		return ret;
+	}
+	ret = device_create_file(adev->dev, &dev_attr_pp_table);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_table\n");
+		return ret;
 	}
 	}
 
 
 	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
 	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
@@ -1417,16 +1413,19 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 
 
 void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
 void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
 {
 {
+	if (adev->pm.dpm_enabled == 0)
+		return;
+
 	if (adev->pm.int_hwmon_dev)
 	if (adev->pm.int_hwmon_dev)
 		hwmon_device_unregister(adev->pm.int_hwmon_dev);
 		hwmon_device_unregister(adev->pm.int_hwmon_dev);
 	device_remove_file(adev->dev, &dev_attr_power_dpm_state);
 	device_remove_file(adev->dev, &dev_attr_power_dpm_state);
 	device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level);
 	device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level);
-	if (adev->pp_enabled) {
-		device_remove_file(adev->dev, &dev_attr_pp_num_states);
-		device_remove_file(adev->dev, &dev_attr_pp_cur_state);
-		device_remove_file(adev->dev, &dev_attr_pp_force_state);
-		device_remove_file(adev->dev, &dev_attr_pp_table);
-	}
+
+	device_remove_file(adev->dev, &dev_attr_pp_num_states);
+	device_remove_file(adev->dev, &dev_attr_pp_cur_state);
+	device_remove_file(adev->dev, &dev_attr_pp_force_state);
+	device_remove_file(adev->dev, &dev_attr_pp_table);
+
 	device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
 	device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
 	device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
 	device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
 	device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
 	device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
@@ -1457,7 +1456,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
 			amdgpu_fence_wait_empty(ring);
 			amdgpu_fence_wait_empty(ring);
 	}
 	}
 
 
-	if (adev->pp_enabled) {
+	if (adev->powerplay.pp_funcs->dispatch_tasks) {
 		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL, NULL);
 		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL, NULL);
 	} else {
 	} else {
 		mutex_lock(&adev->pm.mutex);
 		mutex_lock(&adev->pm.mutex);
@@ -1592,15 +1591,15 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
 	if  ((adev->flags & AMD_IS_PX) &&
 	if  ((adev->flags & AMD_IS_PX) &&
 	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) {
 	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) {
 		seq_printf(m, "PX asic powered off\n");
 		seq_printf(m, "PX asic powered off\n");
-	} else if (adev->pp_enabled) {
-		return amdgpu_debugfs_pm_info_pp(m, adev);
-	} else {
+	} else if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level) {
 		mutex_lock(&adev->pm.mutex);
 		mutex_lock(&adev->pm.mutex);
 		if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level)
 		if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level)
 			adev->powerplay.pp_funcs->debugfs_print_current_performance_level(adev, m);
 			adev->powerplay.pp_funcs->debugfs_print_current_performance_level(adev, m);
 		else
 		else
 			seq_printf(m, "Debugfs support not implemented for this asic\n");
 			seq_printf(m, "Debugfs support not implemented for this asic\n");
 		mutex_unlock(&adev->pm.mutex);
 		mutex_unlock(&adev->pm.mutex);
+	} else {
+		return amdgpu_debugfs_pm_info_pp(m, adev);
 	}
 	}
 
 
 	return 0;
 	return 0;

+ 6 - 54
drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c

@@ -34,24 +34,6 @@
 #include "cik_dpm.h"
 #include "cik_dpm.h"
 #include "vi_dpm.h"
 #include "vi_dpm.h"
 
 
-static int amdgpu_create_pp_handle(struct amdgpu_device *adev)
-{
-	struct amd_pp_init pp_init;
-	struct amd_powerplay *amd_pp;
-	int ret;
-
-	amd_pp = &(adev->powerplay);
-	pp_init.chip_family = adev->family;
-	pp_init.chip_id = adev->asic_type;
-	pp_init.pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : false;
-	pp_init.feature_mask = amdgpu_pp_feature_mask;
-	pp_init.device = amdgpu_cgs_create_device(adev);
-	ret = amd_powerplay_create(&pp_init, &(amd_pp->pp_handle));
-	if (ret)
-		return -EINVAL;
-	return 0;
-}
-
 static int amdgpu_pp_early_init(void *handle)
 static int amdgpu_pp_early_init(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -59,7 +41,6 @@ static int amdgpu_pp_early_init(void *handle)
 	int ret = 0;
 	int ret = 0;
 
 
 	amd_pp = &(adev->powerplay);
 	amd_pp = &(adev->powerplay);
-	adev->pp_enabled = false;
 	amd_pp->pp_handle = (void *)adev;
 	amd_pp->pp_handle = (void *)adev;
 
 
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
@@ -73,9 +54,7 @@ static int amdgpu_pp_early_init(void *handle)
 	case CHIP_STONEY:
 	case CHIP_STONEY:
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
 	case CHIP_RAVEN:
 	case CHIP_RAVEN:
-		adev->pp_enabled = true;
-		if (amdgpu_create_pp_handle(adev))
-			return -EINVAL;
+		amd_pp->cgs_device = amdgpu_cgs_create_device(adev);
 		amd_pp->ip_funcs = &pp_ip_funcs;
 		amd_pp->ip_funcs = &pp_ip_funcs;
 		amd_pp->pp_funcs = &pp_dpm_funcs;
 		amd_pp->pp_funcs = &pp_dpm_funcs;
 		break;
 		break;
@@ -97,9 +76,7 @@ static int amdgpu_pp_early_init(void *handle)
 			amd_pp->ip_funcs = &ci_dpm_ip_funcs;
 			amd_pp->ip_funcs = &ci_dpm_ip_funcs;
 			amd_pp->pp_funcs = &ci_dpm_funcs;
 			amd_pp->pp_funcs = &ci_dpm_funcs;
 		} else {
 		} else {
-			adev->pp_enabled = true;
-			if (amdgpu_create_pp_handle(adev))
-				return -EINVAL;
+			amd_pp->cgs_device = amdgpu_cgs_create_device(adev);
 			amd_pp->ip_funcs = &pp_ip_funcs;
 			amd_pp->ip_funcs = &pp_ip_funcs;
 			amd_pp->pp_funcs = &pp_dpm_funcs;
 			amd_pp->pp_funcs = &pp_dpm_funcs;
 		}
 		}
@@ -118,12 +95,9 @@ static int amdgpu_pp_early_init(void *handle)
 
 
 	if (adev->powerplay.ip_funcs->early_init)
 	if (adev->powerplay.ip_funcs->early_init)
 		ret = adev->powerplay.ip_funcs->early_init(
 		ret = adev->powerplay.ip_funcs->early_init(
-					adev->powerplay.pp_handle);
+					amd_pp->cgs_device ? amd_pp->cgs_device :
+					amd_pp->pp_handle);
 
 
-	if (ret == PP_DPM_DISABLED) {
-		adev->pm.dpm_enabled = false;
-		return 0;
-	}
 	return ret;
 	return ret;
 }
 }
 
 
@@ -137,11 +111,6 @@ static int amdgpu_pp_late_init(void *handle)
 		ret = adev->powerplay.ip_funcs->late_init(
 		ret = adev->powerplay.ip_funcs->late_init(
 					adev->powerplay.pp_handle);
 					adev->powerplay.pp_handle);
 
 
-	if (adev->pp_enabled && adev->pm.dpm_enabled) {
-		amdgpu_pm_sysfs_init(adev);
-		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_COMPLETE_INIT, NULL, NULL);
-	}
-
 	return ret;
 	return ret;
 }
 }
 
 
@@ -176,21 +145,11 @@ static int amdgpu_pp_hw_init(void *handle)
 	int ret = 0;
 	int ret = 0;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
-	if (adev->pp_enabled && adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
-		amdgpu_ucode_init_bo(adev);
 
 
 	if (adev->powerplay.ip_funcs->hw_init)
 	if (adev->powerplay.ip_funcs->hw_init)
 		ret = adev->powerplay.ip_funcs->hw_init(
 		ret = adev->powerplay.ip_funcs->hw_init(
 					adev->powerplay.pp_handle);
 					adev->powerplay.pp_handle);
 
 
-	if (ret == PP_DPM_DISABLED) {
-		adev->pm.dpm_enabled = false;
-		return 0;
-	}
-
-	if ((amdgpu_dpm != 0) && !amdgpu_sriov_vf(adev))
-		adev->pm.dpm_enabled = true;
-
 	return ret;
 	return ret;
 }
 }
 
 
@@ -199,16 +158,10 @@ static int amdgpu_pp_hw_fini(void *handle)
 	int ret = 0;
 	int ret = 0;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
-	if (adev->pp_enabled && adev->pm.dpm_enabled)
-		amdgpu_pm_sysfs_fini(adev);
-
 	if (adev->powerplay.ip_funcs->hw_fini)
 	if (adev->powerplay.ip_funcs->hw_fini)
 		ret = adev->powerplay.ip_funcs->hw_fini(
 		ret = adev->powerplay.ip_funcs->hw_fini(
 					adev->powerplay.pp_handle);
 					adev->powerplay.pp_handle);
 
 
-	if (adev->pp_enabled && adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
-		amdgpu_ucode_fini_bo(adev);
-
 	return ret;
 	return ret;
 }
 }
 
 
@@ -220,9 +173,8 @@ static void amdgpu_pp_late_fini(void *handle)
 		adev->powerplay.ip_funcs->late_fini(
 		adev->powerplay.ip_funcs->late_fini(
 			  adev->powerplay.pp_handle);
 			  adev->powerplay.pp_handle);
 
 
-
-	if (adev->pp_enabled)
-		amd_powerplay_destroy(adev->powerplay.pp_handle);
+	if (adev->powerplay.cgs_device)
+		amdgpu_cgs_destroy_device(adev->powerplay.cgs_device);
 }
 }
 
 
 static int amdgpu_pp_suspend(void *handle)
 static int amdgpu_pp_suspend(void *handle)

+ 0 - 9
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c

@@ -411,13 +411,6 @@ static int psp_hw_init(void *handle)
 		return 0;
 		return 0;
 
 
 	mutex_lock(&adev->firmware.mutex);
 	mutex_lock(&adev->firmware.mutex);
-	/*
-	 * This sequence is just used on hw_init only once, no need on
-	 * resume.
-	 */
-	ret = amdgpu_ucode_init_bo(adev);
-	if (ret)
-		goto failed;
 
 
 	ret = psp_load_fw(adev);
 	ret = psp_load_fw(adev);
 	if (ret) {
 	if (ret) {
@@ -442,8 +435,6 @@ static int psp_hw_fini(void *handle)
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
 		return 0;
 		return 0;
 
 
-	amdgpu_ucode_fini_bo(adev);
-
 	psp_ring_destroy(psp, PSP_RING_TYPE__KM);
 	psp_ring_destroy(psp, PSP_RING_TYPE__KM);
 
 
 	amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
 	amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);

+ 5 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c

@@ -121,7 +121,7 @@ static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip)
 
 
 static int amdgpu_lru_map(struct amdgpu_device *adev,
 static int amdgpu_lru_map(struct amdgpu_device *adev,
 			  struct amdgpu_queue_mapper *mapper,
 			  struct amdgpu_queue_mapper *mapper,
-			  int user_ring,
+			  int user_ring, bool lru_pipe_order,
 			  struct amdgpu_ring **out_ring)
 			  struct amdgpu_ring **out_ring)
 {
 {
 	int r, i, j;
 	int r, i, j;
@@ -139,7 +139,7 @@ static int amdgpu_lru_map(struct amdgpu_device *adev,
 	}
 	}
 
 
 	r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist,
 	r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist,
-				j, out_ring);
+				j, lru_pipe_order, out_ring);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
@@ -284,8 +284,10 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
 		r = amdgpu_identity_map(adev, mapper, ring, out_ring);
 		r = amdgpu_identity_map(adev, mapper, ring, out_ring);
 		break;
 		break;
 	case AMDGPU_HW_IP_DMA:
 	case AMDGPU_HW_IP_DMA:
+		r = amdgpu_lru_map(adev, mapper, ring, false, out_ring);
+		break;
 	case AMDGPU_HW_IP_COMPUTE:
 	case AMDGPU_HW_IP_COMPUTE:
-		r = amdgpu_lru_map(adev, mapper, ring, out_ring);
+		r = amdgpu_lru_map(adev, mapper, ring, true, out_ring);
 		break;
 		break;
 	default:
 	default:
 		*out_ring = NULL;
 		*out_ring = NULL;

+ 20 - 5
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c

@@ -315,14 +315,16 @@ static bool amdgpu_ring_is_blacklisted(struct amdgpu_ring *ring,
  * @type: amdgpu_ring_type enum
  * @type: amdgpu_ring_type enum
  * @blacklist: blacklisted ring ids array
  * @blacklist: blacklisted ring ids array
  * @num_blacklist: number of entries in @blacklist
  * @num_blacklist: number of entries in @blacklist
+ * @lru_pipe_order: find a ring from the least recently used pipe
  * @ring: output ring
  * @ring: output ring
  *
  *
  * Retrieve the amdgpu_ring structure for the least recently used ring of
  * Retrieve the amdgpu_ring structure for the least recently used ring of
  * a specific IP block (all asics).
  * a specific IP block (all asics).
  * Returns 0 on success, error on failure.
  * Returns 0 on success, error on failure.
  */
  */
-int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist,
-			int num_blacklist, struct amdgpu_ring **ring)
+int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
+			int *blacklist,	int num_blacklist,
+			bool lru_pipe_order, struct amdgpu_ring **ring)
 {
 {
 	struct amdgpu_ring *entry;
 	struct amdgpu_ring *entry;
 
 
@@ -337,10 +339,23 @@ int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist,
 		if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist))
 		if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist))
 			continue;
 			continue;
 
 
-		*ring = entry;
-		amdgpu_ring_lru_touch_locked(adev, *ring);
-		break;
+		if (!*ring) {
+			*ring = entry;
+
+			/* We are done for ring LRU */
+			if (!lru_pipe_order)
+				break;
+		}
+
+		/* Move all rings on the same pipe to the end of the list */
+		if (entry->pipe == (*ring)->pipe)
+			amdgpu_ring_lru_touch_locked(adev, entry);
 	}
 	}
+
+	/* Move the ring we found to the end of the list */
+	if (*ring)
+		amdgpu_ring_lru_touch_locked(adev, *ring);
+
 	spin_unlock(&adev->ring_lru_list_lock);
 	spin_unlock(&adev->ring_lru_list_lock);
 
 
 	if (!*ring) {
 	if (!*ring) {

+ 3 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

@@ -201,8 +201,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     unsigned ring_size, struct amdgpu_irq_src *irq_src,
 		     unsigned ring_size, struct amdgpu_irq_src *irq_src,
 		     unsigned irq_type);
 		     unsigned irq_type);
 void amdgpu_ring_fini(struct amdgpu_ring *ring);
 void amdgpu_ring_fini(struct amdgpu_ring *ring);
-int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist,
-			int num_blacklist, struct amdgpu_ring **ring);
+int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
+			int *blacklist, int num_blacklist,
+			bool lru_pipe_order, struct amdgpu_ring **ring);
 void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
 void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
 static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
 static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
 {
 {

+ 25 - 29
drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c

@@ -25,30 +25,21 @@
 #include "amdgpu_vf_error.h"
 #include "amdgpu_vf_error.h"
 #include "mxgpu_ai.h"
 #include "mxgpu_ai.h"
 
 
-#define AMDGPU_VF_ERROR_ENTRY_SIZE    16 
-
-/* struct error_entry - amdgpu VF error information. */
-struct amdgpu_vf_error_buffer {
-	int read_count;
-	int write_count;
-	uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE];
-	uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE];
-	uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
-};
-
-struct amdgpu_vf_error_buffer admgpu_vf_errors;
-
-
-void amdgpu_vf_error_put(uint16_t sub_error_code, uint16_t error_flags, uint64_t error_data)
+void amdgpu_vf_error_put(struct amdgpu_device *adev,
+			 uint16_t sub_error_code,
+			 uint16_t error_flags,
+			 uint64_t error_data)
 {
 {
 	int index;
 	int index;
 	uint16_t error_code = AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF, sub_error_code);
 	uint16_t error_code = AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF, sub_error_code);
 
 
-	index = admgpu_vf_errors.write_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
-	admgpu_vf_errors.code [index] = error_code;
-	admgpu_vf_errors.flags [index] = error_flags;
-	admgpu_vf_errors.data [index] = error_data;
-	admgpu_vf_errors.write_count ++;
+	mutex_lock(&adev->virt.vf_errors.lock);
+	index = adev->virt.vf_errors.write_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
+	adev->virt.vf_errors.code [index] = error_code;
+	adev->virt.vf_errors.flags [index] = error_flags;
+	adev->virt.vf_errors.data [index] = error_data;
+	adev->virt.vf_errors.write_count ++;
+	mutex_unlock(&adev->virt.vf_errors.lock);
 }
 }
 
 
 
 
@@ -58,7 +49,8 @@ void amdgpu_vf_error_trans_all(struct amdgpu_device *adev)
 	u32 data1, data2, data3;
 	u32 data1, data2, data3;
 	int index;
 	int index;
 
 
-	if ((NULL == adev) || (!amdgpu_sriov_vf(adev)) || (!adev->virt.ops) || (!adev->virt.ops->trans_msg)) {
+	if ((NULL == adev) || (!amdgpu_sriov_vf(adev)) ||
+	    (!adev->virt.ops) || (!adev->virt.ops->trans_msg)) {
 		return;
 		return;
 	}
 	}
 /*
 /*
@@ -68,18 +60,22 @@ void amdgpu_vf_error_trans_all(struct amdgpu_device *adev)
 		return;
 		return;
 	}
 	}
 */
 */
+
+	mutex_lock(&adev->virt.vf_errors.lock);
 	/* The errors are overlay of array, correct read_count as full. */
 	/* The errors are overlay of array, correct read_count as full. */
-	if (admgpu_vf_errors.write_count - admgpu_vf_errors.read_count > AMDGPU_VF_ERROR_ENTRY_SIZE) {
-		admgpu_vf_errors.read_count = admgpu_vf_errors.write_count - AMDGPU_VF_ERROR_ENTRY_SIZE;
+	if (adev->virt.vf_errors.write_count - adev->virt.vf_errors.read_count > AMDGPU_VF_ERROR_ENTRY_SIZE) {
+		adev->virt.vf_errors.read_count = adev->virt.vf_errors.write_count - AMDGPU_VF_ERROR_ENTRY_SIZE;
 	}
 	}
 
 
-	while (admgpu_vf_errors.read_count < admgpu_vf_errors.write_count) {
-		index =admgpu_vf_errors.read_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
-		data1 = AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX (admgpu_vf_errors.code[index], admgpu_vf_errors.flags[index]);
-		data2 = admgpu_vf_errors.data[index] & 0xFFFFFFFF;
-		data3 = (admgpu_vf_errors.data[index] >> 32) & 0xFFFFFFFF;
+	while (adev->virt.vf_errors.read_count < adev->virt.vf_errors.write_count) {
+		index =adev->virt.vf_errors.read_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
+		data1 = AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX(adev->virt.vf_errors.code[index],
+							   adev->virt.vf_errors.flags[index]);
+		data2 = adev->virt.vf_errors.data[index] & 0xFFFFFFFF;
+		data3 = (adev->virt.vf_errors.data[index] >> 32) & 0xFFFFFFFF;
 
 
 		adev->virt.ops->trans_msg(adev, IDH_LOG_VF_ERROR, data1, data2, data3);
 		adev->virt.ops->trans_msg(adev, IDH_LOG_VF_ERROR, data1, data2, data3);
-		admgpu_vf_errors.read_count ++;
+		adev->virt.vf_errors.read_count ++;
 	}
 	}
+	mutex_unlock(&adev->virt.vf_errors.lock);
 }
 }

+ 4 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h

@@ -56,7 +56,10 @@ enum AMDGIM_ERROR_CATEGORY {
 	AMDGIM_ERROR_CATEGORY_MAX
 	AMDGIM_ERROR_CATEGORY_MAX
 };
 };
 
 
-void amdgpu_vf_error_put(uint16_t sub_error_code, uint16_t error_flags, uint64_t error_data);
+void amdgpu_vf_error_put(struct amdgpu_device *adev,
+			 uint16_t sub_error_code,
+			 uint16_t error_flags,
+			 uint64_t error_data);
 void amdgpu_vf_error_trans_all (struct amdgpu_device *adev);
 void amdgpu_vf_error_trans_all (struct amdgpu_device *adev);
 
 
 #endif /* __VF_ERROR_H__ */
 #endif /* __VF_ERROR_H__ */

+ 13 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h

@@ -36,6 +36,18 @@ struct amdgpu_mm_table {
 	uint64_t		gpu_addr;
 	uint64_t		gpu_addr;
 };
 };
 
 
+#define AMDGPU_VF_ERROR_ENTRY_SIZE    16
+
+/* struct error_entry - amdgpu VF error information. */
+struct amdgpu_vf_error_buffer {
+	struct mutex lock;
+	int read_count;
+	int write_count;
+	uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE];
+	uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE];
+	uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
+};
+
 /**
 /**
  * struct amdgpu_virt_ops - amdgpu device virt operations
  * struct amdgpu_virt_ops - amdgpu device virt operations
  */
  */
@@ -59,6 +71,7 @@ struct amdgpu_virt {
 	struct work_struct		flr_work;
 	struct work_struct		flr_work;
 	struct amdgpu_mm_table		mm_table;
 	struct amdgpu_mm_table		mm_table;
 	const struct amdgpu_virt_ops	*ops;
 	const struct amdgpu_virt_ops	*ops;
+	struct amdgpu_vf_error_buffer   vf_errors;
 };
 };
 
 
 #define AMDGPU_CSA_SIZE    (8 * 1024)
 #define AMDGPU_CSA_SIZE    (8 * 1024)

+ 35 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

@@ -2541,7 +2541,8 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
  * @adev: amdgpu_device pointer
  * @adev: amdgpu_device pointer
  * @fragment_size_default: the default fragment size if it's set auto
  * @fragment_size_default: the default fragment size if it's set auto
  */
  */
-void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, uint32_t fragment_size_default)
+void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev,
+				 uint32_t fragment_size_default)
 {
 {
 	if (amdgpu_vm_fragment_size == -1)
 	if (amdgpu_vm_fragment_size == -1)
 		adev->vm_manager.fragment_size = fragment_size_default;
 		adev->vm_manager.fragment_size = fragment_size_default;
@@ -2555,7 +2556,8 @@ void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, uint32_t fragment_s
  * @adev: amdgpu_device pointer
  * @adev: amdgpu_device pointer
  * @vm_size: the default vm size if it's set auto
  * @vm_size: the default vm size if it's set auto
  */
  */
-void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, uint32_t fragment_size_default)
+void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size,
+			   uint32_t fragment_size_default)
 {
 {
 	/* adjust vm size firstly */
 	/* adjust vm size firstly */
 	if (amdgpu_vm_size == -1)
 	if (amdgpu_vm_size == -1)
@@ -2682,6 +2684,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	}
 	}
 
 
 	INIT_KFIFO(vm->faults);
 	INIT_KFIFO(vm->faults);
+	vm->fault_credit = 16;
 
 
 	return 0;
 	return 0;
 
 
@@ -2776,6 +2779,36 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		amdgpu_vm_free_reserved_vmid(adev, vm, i);
 		amdgpu_vm_free_reserved_vmid(adev, vm, i);
 }
 }
 
 
+/**
+ * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: PASID do identify the VM
+ *
+ * This function is expected to be called in interrupt context. Returns
+ * true if there was fault credit, false otherwise
+ */
+bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
+				  unsigned int pasid)
+{
+	struct amdgpu_vm *vm;
+
+	spin_lock(&adev->vm_manager.pasid_lock);
+	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+	spin_unlock(&adev->vm_manager.pasid_lock);
+	if (!vm)
+		/* VM not found, can't track fault credit */
+		return true;
+
+	/* No lock needed. only accessed by IRQ handler */
+	if (!vm->fault_credit)
+		/* Too many faults in this VM */
+		return false;
+
+	vm->fault_credit--;
+	return true;
+}
+
 /**
 /**
  * amdgpu_vm_manager_init - init the VM manager
  * amdgpu_vm_manager_init - init the VM manager
  *
  *

+ 6 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

@@ -165,8 +165,11 @@ struct amdgpu_vm {
 	/* Flag to indicate ATS support from PTE for GFX9 */
 	/* Flag to indicate ATS support from PTE for GFX9 */
 	bool			pte_support_ats;
 	bool			pte_support_ats;
 
 
-	/* Up to 128 pending page faults */
+	/* Up to 128 pending retry page faults */
 	DECLARE_KFIFO(faults, u64, 128);
 	DECLARE_KFIFO(faults, u64, 128);
+
+	/* Limit non-retry fault storms */
+	unsigned int		fault_credit;
 };
 };
 
 
 struct amdgpu_vm_id {
 struct amdgpu_vm_id {
@@ -244,6 +247,8 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		   int vm_context, unsigned int pasid);
 		   int vm_context, unsigned int pasid);
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
+				  unsigned int pasid);
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
 			 struct list_head *validated,
 			 struct amdgpu_bo_list_entry *entry);
 			 struct amdgpu_bo_list_entry *entry);

+ 0 - 1
drivers/gpu/drm/amd/amdgpu/ci_dpm.c

@@ -6365,7 +6365,6 @@ static int ci_dpm_sw_fini(void *handle)
 	flush_work(&adev->pm.dpm.thermal.work);
 	flush_work(&adev->pm.dpm.thermal.work);
 
 
 	mutex_lock(&adev->pm.mutex);
 	mutex_lock(&adev->pm.mutex);
-	amdgpu_pm_sysfs_fini(adev);
 	ci_dpm_fini(adev);
 	ci_dpm_fini(adev);
 	mutex_unlock(&adev->pm.mutex);
 	mutex_unlock(&adev->pm.mutex);
 
 

+ 17 - 2
drivers/gpu/drm/amd/amdgpu/cik_ih.c

@@ -237,8 +237,23 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev)
  */
  */
 static bool cik_ih_prescreen_iv(struct amdgpu_device *adev)
 static bool cik_ih_prescreen_iv(struct amdgpu_device *adev)
 {
 {
-	/* Process all interrupts */
-	return true;
+	u32 ring_index = adev->irq.ih.rptr >> 2;
+	u16 pasid;
+
+	switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
+	case 146:
+	case 147:
+		pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
+		if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
+			return true;
+		break;
+	default:
+		/* Not a VM fault */
+		return true;
+	}
+
+	adev->irq.ih.rptr += 16;
+	return false;
 }
 }
 
 
  /**
  /**

+ 17 - 2
drivers/gpu/drm/amd/amdgpu/cz_ih.c

@@ -216,8 +216,23 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev)
  */
  */
 static bool cz_ih_prescreen_iv(struct amdgpu_device *adev)
 static bool cz_ih_prescreen_iv(struct amdgpu_device *adev)
 {
 {
-	/* Process all interrupts */
-	return true;
+	u32 ring_index = adev->irq.ih.rptr >> 2;
+	u16 pasid;
+
+	switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
+	case 146:
+	case 147:
+		pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
+		if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
+			return true;
+		break;
+	default:
+		/* Not a VM fault */
+		return true;
+	}
+
+	adev->irq.ih.rptr += 16;
+	return false;
 }
 }
 
 
 /**
 /**

+ 2 - 2
drivers/gpu/drm/amd/amdgpu/dce_virtual.c

@@ -288,7 +288,7 @@ dce_virtual_encoder(struct drm_connector *connector)
 		if (connector->encoder_ids[i] == 0)
 		if (connector->encoder_ids[i] == 0)
 			break;
 			break;
 
 
-		encoder = drm_encoder_find(connector->dev, connector->encoder_ids[i]);
+		encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]);
 		if (!encoder)
 		if (!encoder)
 			continue;
 			continue;
 
 
@@ -298,7 +298,7 @@ dce_virtual_encoder(struct drm_connector *connector)
 
 
 	/* pick the first one */
 	/* pick the first one */
 	if (enc_id)
 	if (enc_id)
-		return drm_encoder_find(connector->dev, enc_id);
+		return drm_encoder_find(connector->dev, NULL, enc_id);
 	return NULL;
 	return NULL;
 }
 }
 
 

+ 188 - 1
drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c

@@ -636,7 +636,194 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev)
 				NUM_BANKS(ADDR_SURF_2_BANK);
 				NUM_BANKS(ADDR_SURF_2_BANK);
 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
 			WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
 			WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
-	} else if (adev->asic_type == CHIP_OLAND || adev->asic_type == CHIP_HAINAN) {
+	} else if (adev->asic_type == CHIP_OLAND) {
+		tilemode[0] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+		tilemode[1] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+		tilemode[2] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+		tilemode[3] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+		tilemode[4] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[5] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(split_equal_to_row_size) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[6] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(split_equal_to_row_size) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[7] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(split_equal_to_row_size) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+		tilemode[8] =   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[9] =   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[10] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+		tilemode[11] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[12] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[13] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[14] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[15] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[16] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[17] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(split_equal_to_row_size) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[21] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[22] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+		tilemode[23] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[24] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+		tilemode[25] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				NUM_BANKS(ADDR_SURF_8_BANK) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1);
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
+	} else if (adev->asic_type == CHIP_HAINAN) {
 		tilemode[0] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
 		tilemode[0] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
 				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
 				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
 				PIPE_CONFIG(ADDR_SURF_P2) |
 				PIPE_CONFIG(ADDR_SURF_P2) |

+ 29 - 58
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

@@ -4132,18 +4132,12 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
 	gfx_v8_0_rlc_reset(adev);
 	gfx_v8_0_rlc_reset(adev);
 	gfx_v8_0_init_pg(adev);
 	gfx_v8_0_init_pg(adev);
 
 
-	if (!adev->pp_enabled) {
-		if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
-			/* legacy rlc firmware loading */
-			r = gfx_v8_0_rlc_load_microcode(adev);
-			if (r)
-				return r;
-		} else {
-			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
-							AMDGPU_UCODE_ID_RLC_G);
-			if (r)
-				return -EINVAL;
-		}
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+		/* legacy rlc firmware loading */
+		r = gfx_v8_0_rlc_load_microcode(adev);
+		if (r)
+			return r;
 	}
 	}
 
 
 	gfx_v8_0_rlc_start(adev);
 	gfx_v8_0_rlc_start(adev);
@@ -4959,43 +4953,15 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
 	if (!(adev->flags & AMD_IS_APU))
 	if (!(adev->flags & AMD_IS_APU))
 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
 
 
-	if (!adev->pp_enabled) {
-		if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
 			/* legacy firmware loading */
 			/* legacy firmware loading */
-			r = gfx_v8_0_cp_gfx_load_microcode(adev);
-			if (r)
-				return r;
+		r = gfx_v8_0_cp_gfx_load_microcode(adev);
+		if (r)
+			return r;
 
 
-			r = gfx_v8_0_cp_compute_load_microcode(adev);
-			if (r)
-				return r;
-		} else {
-			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
-							AMDGPU_UCODE_ID_CP_CE);
-			if (r)
-				return -EINVAL;
-
-			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
-							AMDGPU_UCODE_ID_CP_PFP);
-			if (r)
-				return -EINVAL;
-
-			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
-							AMDGPU_UCODE_ID_CP_ME);
-			if (r)
-				return -EINVAL;
-
-			if (adev->asic_type == CHIP_TOPAZ) {
-				r = gfx_v8_0_cp_compute_load_microcode(adev);
-				if (r)
-					return r;
-			} else {
-				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
-										 AMDGPU_UCODE_ID_CP_MEC1);
-				if (r)
-					return -EINVAL;
-			}
-		}
+		r = gfx_v8_0_cp_compute_load_microcode(adev);
+		if (r)
+			return r;
 	}
 	}
 
 
 	r = gfx_v8_0_cp_gfx_resume(adev);
 	r = gfx_v8_0_cp_gfx_resume(adev);
@@ -6018,7 +5984,6 @@ static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
 {
 {
 	uint32_t msg_id, pp_state = 0;
 	uint32_t msg_id, pp_state = 0;
 	uint32_t pp_support_state = 0;
 	uint32_t pp_support_state = 0;
-	void *pp_handle = adev->powerplay.pp_handle;
 
 
 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
@@ -6036,7 +6001,8 @@ static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
 				PP_BLOCK_GFX_CG,
 				PP_BLOCK_GFX_CG,
 				pp_support_state,
 				pp_support_state,
 				pp_state);
 				pp_state);
-		amd_set_clockgating_by_smu(pp_handle, msg_id);
+		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
+			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 	}
 	}
 
 
 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
@@ -6057,7 +6023,8 @@ static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
 				PP_BLOCK_GFX_MG,
 				PP_BLOCK_GFX_MG,
 				pp_support_state,
 				pp_support_state,
 				pp_state);
 				pp_state);
-		amd_set_clockgating_by_smu(pp_handle, msg_id);
+		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
+			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 	}
 	}
 
 
 	return 0;
 	return 0;
@@ -6069,7 +6036,6 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
 
 
 	uint32_t msg_id, pp_state = 0;
 	uint32_t msg_id, pp_state = 0;
 	uint32_t pp_support_state = 0;
 	uint32_t pp_support_state = 0;
-	void *pp_handle = adev->powerplay.pp_handle;
 
 
 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
@@ -6087,7 +6053,8 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
 				PP_BLOCK_GFX_CG,
 				PP_BLOCK_GFX_CG,
 				pp_support_state,
 				pp_support_state,
 				pp_state);
 				pp_state);
-		amd_set_clockgating_by_smu(pp_handle, msg_id);
+		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
+			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 	}
 	}
 
 
 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
@@ -6106,7 +6073,8 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
 				PP_BLOCK_GFX_3D,
 				PP_BLOCK_GFX_3D,
 				pp_support_state,
 				pp_support_state,
 				pp_state);
 				pp_state);
-		amd_set_clockgating_by_smu(pp_handle, msg_id);
+		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
+			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 	}
 	}
 
 
 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
@@ -6127,7 +6095,8 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
 				PP_BLOCK_GFX_MG,
 				PP_BLOCK_GFX_MG,
 				pp_support_state,
 				pp_support_state,
 				pp_state);
 				pp_state);
-		amd_set_clockgating_by_smu(pp_handle, msg_id);
+		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
+			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 	}
 	}
 
 
 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
@@ -6142,7 +6111,8 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
 				PP_BLOCK_GFX_RLC,
 				PP_BLOCK_GFX_RLC,
 				pp_support_state,
 				pp_support_state,
 				pp_state);
 				pp_state);
-		amd_set_clockgating_by_smu(pp_handle, msg_id);
+		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
+			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 	}
 	}
 
 
 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
@@ -6156,7 +6126,8 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
 			PP_BLOCK_GFX_CP,
 			PP_BLOCK_GFX_CP,
 			pp_support_state,
 			pp_support_state,
 			pp_state);
 			pp_state);
-		amd_set_clockgating_by_smu(pp_handle, msg_id);
+		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
+			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 	}
 	}
 
 
 	return 0;
 	return 0;
@@ -7076,7 +7047,7 @@ static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
 {
 {
 	uint64_t ce_payload_addr;
 	uint64_t ce_payload_addr;
 	int cnt_ce;
 	int cnt_ce;
-	static union {
+	union {
 		struct vi_ce_ib_state regular;
 		struct vi_ce_ib_state regular;
 		struct vi_ce_ib_state_chained_ib chained;
 		struct vi_ce_ib_state_chained_ib chained;
 	} ce_payload = {};
 	} ce_payload = {};
@@ -7105,7 +7076,7 @@ static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
 {
 {
 	uint64_t de_payload_addr, gds_addr, csa_addr;
 	uint64_t de_payload_addr, gds_addr, csa_addr;
 	int cnt_de;
 	int cnt_de;
-	static union {
+	union {
 		struct vi_de_ib_state regular;
 		struct vi_de_ib_state regular;
 		struct vi_de_ib_state_chained_ib chained;
 		struct vi_de_ib_state_chained_ib chained;
 	} de_payload = {};
 	} de_payload = {};

+ 3 - 3
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

@@ -3583,7 +3583,7 @@ static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 {
 	u32 ref_and_mask, reg_mem_engine;
 	u32 ref_and_mask, reg_mem_engine;
-	struct nbio_hdp_flush_reg *nbio_hf_reg;
+	const struct nbio_hdp_flush_reg *nbio_hf_reg;
 
 
 	if (ring->adev->flags & AMD_IS_APU)
 	if (ring->adev->flags & AMD_IS_APU)
 		nbio_hf_reg = &nbio_v7_0_hdp_flush_reg;
 		nbio_hf_reg = &nbio_v7_0_hdp_flush_reg;
@@ -3806,7 +3806,7 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
 
 
 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
 {
 {
-	static struct v9_ce_ib_state ce_payload = {0};
+	struct v9_ce_ib_state ce_payload = {0};
 	uint64_t csa_addr;
 	uint64_t csa_addr;
 	int cnt;
 	int cnt;
 
 
@@ -3825,7 +3825,7 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
 
 
 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
 {
 {
-	static struct v9_de_ib_state de_payload = {0};
+	struct v9_de_ib_state de_payload = {0};
 	uint64_t csa_addr, gds_addr;
 	uint64_t csa_addr, gds_addr;
 	int cnt;
 	int cnt;
 
 

+ 17 - 2
drivers/gpu/drm/amd/amdgpu/iceland_ih.c

@@ -216,8 +216,23 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev)
  */
  */
 static bool iceland_ih_prescreen_iv(struct amdgpu_device *adev)
 static bool iceland_ih_prescreen_iv(struct amdgpu_device *adev)
 {
 {
-	/* Process all interrupts */
-	return true;
+	u32 ring_index = adev->irq.ih.rptr >> 2;
+	u16 pasid;
+
+	switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
+	case 146:
+	case 147:
+		pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
+		if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
+			return true;
+		break;
+	default:
+		/* Not a VM fault */
+		return true;
+	}
+
+	adev->irq.ih.rptr += 16;
+	return false;
 }
 }
 
 
 /**
 /**

+ 0 - 7
drivers/gpu/drm/amd/amdgpu/kv_dpm.c

@@ -2969,16 +2969,10 @@ static int kv_dpm_late_init(void *handle)
 {
 {
 	/* powerdown unused blocks for now */
 	/* powerdown unused blocks for now */
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	int ret;
 
 
 	if (!amdgpu_dpm)
 	if (!amdgpu_dpm)
 		return 0;
 		return 0;
 
 
-	/* init the sysfs and debugfs files late */
-	ret = amdgpu_pm_sysfs_init(adev);
-	if (ret)
-		return ret;
-
 	kv_dpm_powergate_acp(adev, true);
 	kv_dpm_powergate_acp(adev, true);
 	kv_dpm_powergate_samu(adev, true);
 	kv_dpm_powergate_samu(adev, true);
 
 
@@ -3040,7 +3034,6 @@ static int kv_dpm_sw_fini(void *handle)
 	flush_work(&adev->pm.dpm.thermal.work);
 	flush_work(&adev->pm.dpm.thermal.work);
 
 
 	mutex_lock(&adev->pm.mutex);
 	mutex_lock(&adev->pm.mutex);
-	amdgpu_pm_sysfs_fini(adev);
 	kv_dpm_fini(adev);
 	kv_dpm_fini(adev);
 	mutex_unlock(&adev->pm.mutex);
 	mutex_unlock(&adev->pm.mutex);
 
 

+ 20 - 24
drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c

@@ -215,31 +215,27 @@ void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, u32 *flags)
 		*flags |= AMD_CG_SUPPORT_BIF_LS;
 		*flags |= AMD_CG_SUPPORT_BIF_LS;
 }
 }
 
 
-struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg;
-struct nbio_pcie_index_data nbio_v6_1_pcie_index_data;
+const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = {
+	.hdp_flush_req_offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_REQ),
+	.hdp_flush_done_offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_DONE),
+	.ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK,
+	.ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK,
+	.ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK,
+	.ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK,
+	.ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK,
+	.ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK,
+	.ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK,
+	.ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK,
+	.ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK,
+	.ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK,
+	.ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+	.ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK
+};
 
 
-int nbio_v6_1_init(struct amdgpu_device *adev)
-{
-	nbio_v6_1_hdp_flush_reg.hdp_flush_req_offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_REQ);
-	nbio_v6_1_hdp_flush_reg.hdp_flush_done_offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_DONE);
-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK;
-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK;
-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK;
-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK;
-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK;
-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK;
-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK;
-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK;
-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK;
-	nbio_v6_1_hdp_flush_reg.ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK;
-	nbio_v6_1_hdp_flush_reg.ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK;
-	nbio_v6_1_hdp_flush_reg.ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK;
-
-	nbio_v6_1_pcie_index_data.index_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX);
-	nbio_v6_1_pcie_index_data.data_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA);
-
-	return 0;
-}
+const struct nbio_pcie_index_data nbio_v6_1_pcie_index_data = {
+	.index_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX),
+	.data_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA),
+};
 
 
 void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev)
 void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev)
 {
 {

+ 2 - 2
drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h

@@ -26,8 +26,8 @@
 
 
 #include "soc15_common.h"
 #include "soc15_common.h"
 
 
-extern struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg;
-extern struct nbio_pcie_index_data nbio_v6_1_pcie_index_data;
+extern const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg;
+extern const struct nbio_pcie_index_data nbio_v6_1_pcie_index_data;
 int nbio_v6_1_init(struct amdgpu_device *adev);
 int nbio_v6_1_init(struct amdgpu_device *adev);
 u32 nbio_v6_1_get_atombios_scratch_regs(struct amdgpu_device *adev,
 u32 nbio_v6_1_get_atombios_scratch_regs(struct amdgpu_device *adev,
                                         uint32_t idx);
                                         uint32_t idx);

+ 20 - 24
drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c

@@ -185,28 +185,24 @@ void nbio_v7_0_ih_control(struct amdgpu_device *adev)
 	WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl);
 	WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl);
 }
 }
 
 
-struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg;
-struct nbio_pcie_index_data nbio_v7_0_pcie_index_data;
+const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = {
+	.hdp_flush_req_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ),
+	.hdp_flush_done_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE),
+	.ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK,
+	.ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK,
+	.ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK,
+	.ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK,
+	.ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK,
+	.ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK,
+	.ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK,
+	.ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK,
+	.ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK,
+	.ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK,
+	.ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+	.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+};
 
 
-int nbio_v7_0_init(struct amdgpu_device *adev)
-{
-	nbio_v7_0_hdp_flush_reg.hdp_flush_req_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ);
-	nbio_v7_0_hdp_flush_reg.hdp_flush_done_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE);
-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK;
-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK;
-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK;
-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK;
-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK;
-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK;
-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK;
-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK;
-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK;
-	nbio_v7_0_hdp_flush_reg.ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK;
-	nbio_v7_0_hdp_flush_reg.ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK;
-	nbio_v7_0_hdp_flush_reg.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK;
-
-	nbio_v7_0_pcie_index_data.index_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2);
-	nbio_v7_0_pcie_index_data.data_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2);
-
-	return 0;
-}
+const struct nbio_pcie_index_data nbio_v7_0_pcie_index_data = {
+	.index_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2),
+	.data_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2)
+};

+ 2 - 2
drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h

@@ -26,8 +26,8 @@
 
 
 #include "soc15_common.h"
 #include "soc15_common.h"
 
 
-extern struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg;
-extern struct nbio_pcie_index_data nbio_v7_0_pcie_index_data;
+extern const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg;
+extern const struct nbio_pcie_index_data nbio_v7_0_pcie_index_data;
 int nbio_v7_0_init(struct amdgpu_device *adev);
 int nbio_v7_0_init(struct amdgpu_device *adev);
 u32 nbio_v7_0_get_atombios_scratch_regs(struct amdgpu_device *adev,
 u32 nbio_v7_0_get_atombios_scratch_regs(struct amdgpu_device *adev,
                                         uint32_t idx);
                                         uint32_t idx);

+ 5 - 15
drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c

@@ -561,21 +561,11 @@ static int sdma_v2_4_start(struct amdgpu_device *adev)
 {
 {
 	int r;
 	int r;
 
 
-	if (!adev->pp_enabled) {
-		if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
-			r = sdma_v2_4_load_microcode(adev);
-			if (r)
-				return r;
-		} else {
-			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
-							AMDGPU_UCODE_ID_SDMA0);
-			if (r)
-				return -EINVAL;
-			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
-							AMDGPU_UCODE_ID_SDMA1);
-			if (r)
-				return -EINVAL;
-		}
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+		r = sdma_v2_4_load_microcode(adev);
+		if (r)
+			return r;
 	}
 	}
 
 
 	/* halt the engine before programing */
 	/* halt the engine before programing */

+ 5 - 16
drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c

@@ -819,23 +819,12 @@ static int sdma_v3_0_load_microcode(struct amdgpu_device *adev)
  */
  */
 static int sdma_v3_0_start(struct amdgpu_device *adev)
 static int sdma_v3_0_start(struct amdgpu_device *adev)
 {
 {
-	int r, i;
+	int r;
 
 
-	if (!adev->pp_enabled) {
-		if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
-			r = sdma_v3_0_load_microcode(adev);
-			if (r)
-				return r;
-		} else {
-			for (i = 0; i < adev->sdma.num_instances; i++) {
-				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
-										 (i == 0) ?
-										 AMDGPU_UCODE_ID_SDMA0 :
-										 AMDGPU_UCODE_ID_SDMA1);
-				if (r)
-					return -EINVAL;
-			}
-		}
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+		r = sdma_v3_0_load_microcode(adev);
+		if (r)
+			return r;
 	}
 	}
 
 
 	/* disable sdma engine before programing it */
 	/* disable sdma engine before programing it */

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c

@@ -371,7 +371,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
 static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 {
 	u32 ref_and_mask = 0;
 	u32 ref_and_mask = 0;
-	struct nbio_hdp_flush_reg *nbio_hf_reg;
+	const struct nbio_hdp_flush_reg *nbio_hf_reg;
 
 
 	if (ring->adev->flags & AMD_IS_APU)
 	if (ring->adev->flags & AMD_IS_APU)
 		nbio_hf_reg = &nbio_v7_0_hdp_flush_reg;
 		nbio_hf_reg = &nbio_v7_0_hdp_flush_reg;

Some files were not shown because too many files changed in this diff