Эх сурвалжийг харах

Merge tag 'drm-next-2018-06-06-1' of git://anongit.freedesktop.org/drm/drm

Pull drm updates from Dave Airlie:
 "This starts to support NVIDIA volta hardware with nouveau, and adds
  amdgpu support for the GPU in the Kabylake-G (the intel + radeon
  single package chip), along with some initial Intel icelake enabling.

  Summary:

  New Drivers:
   - v3d - driver for broadcom V3D V3.x+ hardware
   - xen-front - XEN PV display frontend

  core:
   - handle zpos normalization in the core
   - stop looking at legacy pointers in atomic paths
   - improved scheduler documentation
   - improved aspect ratio validation
   - aspect ratio support for 64:27 and 256:135
   - drop unused control node code.

  i915:
   - Icelake (ICL) enabling
   - GuC/HuC refactoring
   - PSR/PSR2 enabling and fixes
   - DPLL management refactoring
   - DP MST fixes
   - NV12 enabling
   - HDCP improvements
   - GEM/Execlist/reset improvements
   - GVT improvements
   - stolen memory first 4k fix

  amdgpu:
   - Vega 20 support
   - VEGAM support (Kabylake-G)
   - preOS scanout buffer reservation
   - power management gfxoff support for raven
   - SR-IOV fixes
   - Vega10 power profiles and clock voltage control
   - scatter/gather display support on CZ/ST

  amdkfd:
   - GFX9 dGPU support
   - userptr memory mapping

  nouveau:
   - major refactoring for Volta GV100 support

  tda998x:
   - HDMI i2c CEC support

  etnaviv:
   - removed unused logging code
   - license text cleanups
   - MMU handling improvements
   - timeout fence fix for 50 days uptime

  tegra:
   - IOMMU support in gr2d/gr3d drivers
   - zpos support

  vc4:
   - syncobj support
   - CTM, plane alpha and async cursor support

  analogix_dp:
   - HPD and aux chan fixes

  sun4i:
   - MIPI DSI support

  tilcdc:
   - clock divider fixes for OMAP-l138 LCDK board

  rcar-du:
   - R8A77965 support
   - dma-buf fences fixes
   - hardware indexed crtc/du group handling
   - generic zplane property support

  atmel-hclcdc:
   - generic zplane property support

  mediatek:
   - use generic video mode function

  exynos:
   - S5PV210 FIMD variant support
   - IPP v2 framework
   - more HW overlays support"

* tag 'drm-next-2018-06-06-1' of git://anongit.freedesktop.org/drm/drm: (1286 commits)
  drm/amdgpu: fix 32-bit build warning
  drm/exynos: fimc: signedness bug in fimc_setup_clocks()
  drm/exynos: scaler: fix static checker warning
  drm/amdgpu: Use dev_info() to report amdkfd is not supported for this ASIC
  drm/amd/display: Remove use of division operator for long longs
  drm/amdgpu: Update GFX info structure to match what vega20 used
  drm/amdgpu/pp: remove duplicate assignment
  drm/sched: add rcu_barrier after entity fini
  drm/amdgpu: move VM BOs on LRU again
  drm/amdgpu: consistenly use VM moved flag
  drm/amdgpu: kmap PDs/PTs in amdgpu_vm_update_directories
  drm/amdgpu: further optimize amdgpu_vm_handle_moved
  drm/amdgpu: cleanup amdgpu_vm_validate_pt_bos v2
  drm/amdgpu: rework VM state machine lock handling v2
  drm/amdgpu: Add runtime VCN PG support
  drm/amdgpu: Enable VCN static PG by default on RV
  drm/amdgpu: Add VCN static PG support on RV
  drm/amdgpu: Enable VCN CG by default on RV
  drm/amdgpu: Add static CG control for VCN on RV
  drm/exynos: Fix default value for zpos plane property
  ...
Linus Torvalds 7 жил өмнө
parent
commit
135c5504a6
100 өөрчлөгдсөн 5375 нэмэгдсэн , 1370 устгасан
  1. 16 2
      Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt
  2. 133 0
      Documentation/devicetree/bindings/display/bridge/cdns,dsi.txt
  3. 1 0
      Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt
  4. 3 0
      Documentation/devicetree/bindings/display/bridge/tda998x.txt
  5. 60 0
      Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt
  6. 7 2
      Documentation/devicetree/bindings/display/exynos/exynos5433-decon.txt
  7. 15 13
      Documentation/devicetree/bindings/display/renesas,du.txt
  8. 93 0
      Documentation/devicetree/bindings/display/sunxi/sun6i-dsi.txt
  9. 28 0
      Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.txt
  10. 27 0
      Documentation/devicetree/bindings/gpu/samsung-scaler.txt
  11. 2 0
      Documentation/gpu/drivers.rst
  12. 121 20
      Documentation/gpu/i915.rst
  13. 0 1
      Documentation/gpu/kms-properties.csv
  14. 18 0
      Documentation/gpu/todo.rst
  15. 31 0
      Documentation/gpu/xen-front.rst
  16. 20 1
      MAINTAINERS
  17. 0 10
      drivers/dma-buf/sync_debug.h
  18. 9 4
      drivers/gpu/drm/Kconfig
  19. 3 1
      drivers/gpu/drm/Makefile
  20. 19 5
      drivers/gpu/drm/amd/amdgpu/Makefile
  21. 42 3
      drivers/gpu/drm/amd/amdgpu/amdgpu.h
  22. 5 6
      drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
  23. 83 4
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
  24. 19 6
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
  25. 0 10
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
  26. 0 10
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
  27. 1043 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
  28. 559 25
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
  29. 44 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
  30. 1 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
  31. 1 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
  32. 11 4
      drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
  33. 6 241
      drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
  34. 4 4
      drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
  35. 6 12
      drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
  36. 72 9
      drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
  37. 190 5
      drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
  38. 87 34
      drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
  39. 9 31
      drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
  40. 1 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
  41. 20 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
  42. 3 8
      drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
  43. 19 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
  44. 6 6
      drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
  45. 6 5
      drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
  46. 11 6
      drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
  47. 37 13
      drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
  48. 6 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
  49. 58 12
      drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
  50. 88 23
      drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
  51. 9 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
  52. 0 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
  53. 72 48
      drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
  54. 34 4
      drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
  55. 282 83
      drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
  56. 10 4
      drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
  57. 1 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
  58. 10 4
      drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
  59. 21 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
  60. 11 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
  61. 12 6
      drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
  62. 1 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
  63. 441 55
      drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
  64. 1 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
  65. 51 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
  66. 22 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
  67. 162 130
      drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
  68. 13 6
      drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
  69. 37 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
  70. 16 9
      drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
  71. 11 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
  72. 146 127
      drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
  73. 7 8
      drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
  74. 2 2
      drivers/gpu/drm/amd/amdgpu/ci_dpm.c
  75. 7 0
      drivers/gpu/drm/amd/amdgpu/cik.c
  76. 5 12
      drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
  77. 15 15
      drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
  78. 5 12
      drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
  79. 5 12
      drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
  80. 5 5
      drivers/gpu/drm/amd/amdgpu/dce_virtual.c
  81. 120 0
      drivers/gpu/drm/amd/amdgpu/df_v1_7.c
  82. 16 14
      drivers/gpu/drm/amd/amdgpu/df_v1_7.h
  83. 116 0
      drivers/gpu/drm/amd/amdgpu/df_v3_6.c
  84. 16 14
      drivers/gpu/drm/amd/amdgpu/df_v3_6.h
  85. 80 22
      drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
  86. 248 96
      drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
  87. 23 2
      drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
  88. 23 2
      drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
  89. 29 4
      drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
  90. 82 64
      drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
  91. 2 2
      drivers/gpu/drm/amd/amdgpu/kv_dpm.c
  92. 1 0
      drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
  93. 3 1
      drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
  94. 17 1
      drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
  95. 46 21
      drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
  96. 9 0
      drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
  97. 3 0
      drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
  98. 9 3
      drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
  99. 68 28
      drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
  100. 7 0
      drivers/gpu/drm/amd/amdgpu/si.c

+ 16 - 2
Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt

@@ -14,7 +14,13 @@ Required properties:
 		"adi,adv7513"
 		"adi,adv7513"
 		"adi,adv7533"
 		"adi,adv7533"
 
 
-- reg: I2C slave address
+- reg: I2C slave addresses
+  The ADV7511 internal registers are split into four pages exposed through
+  different I2C addresses, creating four register maps. Each map has it own
+  I2C address and acts as a standard slave device on the I2C bus. The main
+  address is mandatory, others are optional and revert to defaults if not
+  specified.
+
 
 
 The ADV7511 supports a large number of input data formats that differ by their
 The ADV7511 supports a large number of input data formats that differ by their
 color depth, color format, clock mode, bit justification and random
 color depth, color format, clock mode, bit justification and random
@@ -70,6 +76,9 @@ Optional properties:
   rather than generate its own timings for HDMI output.
   rather than generate its own timings for HDMI output.
 - clocks: from common clock binding: reference to the CEC clock.
 - clocks: from common clock binding: reference to the CEC clock.
 - clock-names: from common clock binding: must be "cec".
 - clock-names: from common clock binding: must be "cec".
+- reg-names : Names of maps with programmable addresses.
+	It can contain any map needing a non-default address.
+	Possible maps names are : "main", "edid", "cec", "packet"
 
 
 Required nodes:
 Required nodes:
 
 
@@ -88,7 +97,12 @@ Example
 
 
 	adv7511w: hdmi@39 {
 	adv7511w: hdmi@39 {
 		compatible = "adi,adv7511w";
 		compatible = "adi,adv7511w";
-		reg = <39>;
+		/*
+		 * The EDID page will be accessible on address 0x66 on the I2C
+		 * bus. All other maps continue to use their default addresses.
+		 */
+		reg = <0x39>, <0x66>;
+		reg-names = "main", "edid";
 		interrupt-parent = <&gpio3>;
 		interrupt-parent = <&gpio3>;
 		interrupts = <29 IRQ_TYPE_EDGE_FALLING>;
 		interrupts = <29 IRQ_TYPE_EDGE_FALLING>;
 		clocks = <&cec_clock>;
 		clocks = <&cec_clock>;

+ 133 - 0
Documentation/devicetree/bindings/display/bridge/cdns,dsi.txt

@@ -0,0 +1,133 @@
+Cadence DSI bridge
+==================
+
+The Cadence DSI bridge is a DPI to DSI bridge supporting up to 4 DSI lanes.
+
+Required properties:
+- compatible: should be set to "cdns,dsi".
+- reg: physical base address and length of the controller's registers.
+- interrupts: interrupt line connected to the DSI bridge.
+- clocks: DSI bridge clocks.
+- clock-names: must contain "dsi_p_clk" and "dsi_sys_clk".
+- phys: phandle link to the MIPI D-PHY controller.
+- phy-names: must contain "dphy".
+- #address-cells: must be set to 1.
+- #size-cells: must be set to 0.
+
+Optional properties:
+- resets: DSI reset lines.
+- reset-names: can contain "dsi_p_rst".
+
+Required subnodes:
+- ports: Ports as described in Documentation/devicetree/bindings/graph.txt.
+  2 ports are available:
+  * port 0: this port is only needed if some of your DSI devices are
+	    controlled through  an external bus like I2C or SPI. Can have at
+	    most 4 endpoints. The endpoint number is directly encoding the
+	    DSI virtual channel used by this device.
+  * port 1: represents the DPI input.
+  Other ports will be added later to support the new kind of inputs.
+
+- one subnode per DSI device connected on the DSI bus. Each DSI device should
+  contain a reg property encoding its virtual channel.
+
+Cadence DPHY
+============
+
+Cadence DPHY block.
+
+Required properties:
+- compatible: should be set to "cdns,dphy".
+- reg: physical base address and length of the DPHY registers.
+- clocks: DPHY reference clocks.
+- clock-names: must contain "psm" and "pll_ref".
+- #phy-cells: must be set to 0.
+
+
+Example:
+	dphy0: dphy@fd0e0000{
+		compatible = "cdns,dphy";
+		reg = <0x0 0xfd0e0000 0x0 0x1000>;
+		clocks = <&psm_clk>, <&pll_ref_clk>;
+		clock-names = "psm", "pll_ref";
+		#phy-cells = <0>;
+	};
+
+	dsi0: dsi@fd0c0000 {
+		compatible = "cdns,dsi";
+		reg = <0x0 0xfd0c0000 0x0 0x1000>;
+		clocks = <&pclk>, <&sysclk>;
+		clock-names = "dsi_p_clk", "dsi_sys_clk";
+		interrupts = <1>;
+		phys = <&dphy0>;
+		phy-names = "dphy";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@1 {
+				reg = <1>;
+				dsi0_dpi_input: endpoint {
+					remote-endpoint = <&xxx_dpi_output>;
+				};
+			};
+		};
+
+		panel: dsi-dev@0 {
+			compatible = "<vendor,panel>";
+			reg = <0>;
+		};
+	};
+
+or
+
+	dsi0: dsi@fd0c0000 {
+		compatible = "cdns,dsi";
+		reg = <0x0 0xfd0c0000 0x0 0x1000>;
+		clocks = <&pclk>, <&sysclk>;
+		clock-names = "dsi_p_clk", "dsi_sys_clk";
+		interrupts = <1>;
+		phys = <&dphy1>;
+		phy-names = "dphy";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
+				reg = <0>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				dsi0_output: endpoint@0 {
+					reg = <0>;
+					remote-endpoint = <&dsi_panel_input>;
+				};
+			};
+
+			port@1 {
+				reg = <1>;
+				dsi0_dpi_input: endpoint {
+					remote-endpoint = <&xxx_dpi_output>;
+				};
+			};
+		};
+	};
+
+	i2c@xxx {
+		panel: panel@59 {
+			compatible = "<vendor,panel>";
+			reg = <0x59>;
+
+			port {
+				dsi_panel_input: endpoint {
+					remote-endpoint = <&dsi0_output>;
+				};
+			};
+		};
+	};

+ 1 - 0
Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt

@@ -14,6 +14,7 @@ Required properties:
 - compatible : Shall contain one or more of
 - compatible : Shall contain one or more of
   - "renesas,r8a7795-hdmi" for R8A7795 (R-Car H3) compatible HDMI TX
   - "renesas,r8a7795-hdmi" for R8A7795 (R-Car H3) compatible HDMI TX
   - "renesas,r8a7796-hdmi" for R8A7796 (R-Car M3-W) compatible HDMI TX
   - "renesas,r8a7796-hdmi" for R8A7796 (R-Car M3-W) compatible HDMI TX
+  - "renesas,r8a77965-hdmi" for R8A77965 (R-Car M3-N) compatible HDMI TX
   - "renesas,rcar-gen3-hdmi" for the generic R-Car Gen3 compatible HDMI TX
   - "renesas,rcar-gen3-hdmi" for the generic R-Car Gen3 compatible HDMI TX
 
 
     When compatible with generic versions, nodes must list the SoC-specific
     When compatible with generic versions, nodes must list the SoC-specific

+ 3 - 0
Documentation/devicetree/bindings/display/bridge/tda998x.txt

@@ -27,6 +27,9 @@ Optional properties:
 	in question is used. The implementation allows one or two DAIs. If two
 	in question is used. The implementation allows one or two DAIs. If two
 	DAIs are defined, they must be of different type.
 	DAIs are defined, they must be of different type.
 
 
+  - nxp,calib-gpios: calibration GPIO, which must correspond with the
+	gpio used for the TDA998x interrupt pin.
+
 [1] Documentation/sound/alsa/soc/DAI.txt
 [1] Documentation/sound/alsa/soc/DAI.txt
 [2] include/dt-bindings/display/tda998x.h
 [2] include/dt-bindings/display/tda998x.h
 
 

+ 60 - 0
Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt

@@ -0,0 +1,60 @@
+Thine Electronics THC63LVD1024 LVDS decoder
+-------------------------------------------
+
+The THC63LVD1024 is a dual link LVDS receiver designed to convert LVDS streams
+to parallel data outputs. The chip supports single/dual input/output modes,
+handling up to two LVDS input streams and up to two digital CMOS/TTL outputs.
+
+Single or dual operation mode, output data mapping and DDR output modes are
+configured through input signals and the chip does not expose any control bus.
+
+Required properties:
+- compatible: Shall be "thine,thc63lvd1024"
+- vcc-supply: Power supply for TTL output, TTL CLOCKOUT signal, LVDS input,
+  PPL and digital circuitry
+
+Optional properties:
+- powerdown-gpios: Power down GPIO signal, pin name "/PDWN". Active low
+- oe-gpios: Output enable GPIO signal, pin name "OE". Active high
+
+The THC63LVD1024 video port connections are modeled according
+to OF graph bindings specified by Documentation/devicetree/bindings/graph.txt
+
+Required video port nodes:
+- port@0: First LVDS input port
+- port@2: First digital CMOS/TTL parallel output
+
+Optional video port nodes:
+- port@1: Second LVDS input port
+- port@3: Second digital CMOS/TTL parallel output
+
+Example:
+--------
+
+	thc63lvd1024: lvds-decoder {
+		compatible = "thine,thc63lvd1024";
+
+		vcc-supply = <&reg_lvds_vcc>;
+		powerdown-gpios = <&gpio4 15 GPIO_ACTIVE_LOW>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
+				reg = <0>;
+
+				lvds_dec_in_0: endpoint {
+					remote-endpoint = <&lvds_out>;
+				};
+			};
+
+			port@2{
+				reg = <2>;
+
+				lvds_dec_out_2: endpoint {
+					remote-endpoint = <&adv7511_in>;
+				};
+			};
+		};
+	};

+ 7 - 2
Documentation/devicetree/bindings/display/exynos/exynos5433-decon.txt

@@ -19,7 +19,8 @@ Required properties:
 	  clock-names property.
 	  clock-names property.
 - clock-names: list of clock names sorted in the same order as the clocks
 - clock-names: list of clock names sorted in the same order as the clocks
 	       property. Must contain "pclk", "aclk_decon", "aclk_smmu_decon0x",
 	       property. Must contain "pclk", "aclk_decon", "aclk_smmu_decon0x",
-	       "aclk_xiu_decon0x", "pclk_smmu_decon0x", clk_decon_vclk",
+	       "aclk_xiu_decon0x", "pclk_smmu_decon0x", "aclk_smmu_decon1x",
+	       "aclk_xiu_decon1x", "pclk_smmu_decon1x", clk_decon_vclk",
 	       "sclk_decon_eclk"
 	       "sclk_decon_eclk"
 - ports: contains a port which is connected to mic node. address-cells and
 - ports: contains a port which is connected to mic node. address-cells and
 	 size-cells must 1 and 0, respectively.
 	 size-cells must 1 and 0, respectively.
@@ -34,10 +35,14 @@ decon: decon@13800000 {
 	clocks = <&cmu_disp CLK_ACLK_DECON>, <&cmu_disp CLK_ACLK_SMMU_DECON0X>,
 	clocks = <&cmu_disp CLK_ACLK_DECON>, <&cmu_disp CLK_ACLK_SMMU_DECON0X>,
 		<&cmu_disp CLK_ACLK_XIU_DECON0X>,
 		<&cmu_disp CLK_ACLK_XIU_DECON0X>,
 		<&cmu_disp CLK_PCLK_SMMU_DECON0X>,
 		<&cmu_disp CLK_PCLK_SMMU_DECON0X>,
+		<&cmu_disp CLK_ACLK_SMMU_DECON1X>,
+		<&cmu_disp CLK_ACLK_XIU_DECON1X>,
+		<&cmu_disp CLK_PCLK_SMMU_DECON1X>,
 		<&cmu_disp CLK_SCLK_DECON_VCLK>,
 		<&cmu_disp CLK_SCLK_DECON_VCLK>,
 		<&cmu_disp CLK_SCLK_DECON_ECLK>;
 		<&cmu_disp CLK_SCLK_DECON_ECLK>;
 	clock-names = "aclk_decon", "aclk_smmu_decon0x", "aclk_xiu_decon0x",
 	clock-names = "aclk_decon", "aclk_smmu_decon0x", "aclk_xiu_decon0x",
-		"pclk_smmu_decon0x", "sclk_decon_vclk", "sclk_decon_eclk";
+		"pclk_smmu_decon0x", "aclk_smmu_decon1x", "aclk_xiu_decon1x",
+		"pclk_smmu_decon1x", "sclk_decon_vclk", "sclk_decon_eclk";
 	interrupt-names = "vsync", "lcd_sys";
 	interrupt-names = "vsync", "lcd_sys";
 	interrupts = <0 202 0>, <0 203 0>;
 	interrupts = <0 202 0>, <0 203 0>;
 
 

+ 15 - 13
Documentation/devicetree/bindings/display/renesas,du.txt

@@ -13,6 +13,7 @@ Required Properties:
     - "renesas,du-r8a7794" for R8A7794 (R-Car E2) compatible DU
     - "renesas,du-r8a7794" for R8A7794 (R-Car E2) compatible DU
     - "renesas,du-r8a7795" for R8A7795 (R-Car H3) compatible DU
     - "renesas,du-r8a7795" for R8A7795 (R-Car H3) compatible DU
     - "renesas,du-r8a7796" for R8A7796 (R-Car M3-W) compatible DU
     - "renesas,du-r8a7796" for R8A7796 (R-Car M3-W) compatible DU
+    - "renesas,du-r8a77965" for R8A77965 (R-Car M3-N) compatible DU
     - "renesas,du-r8a77970" for R8A77970 (R-Car V3M) compatible DU
     - "renesas,du-r8a77970" for R8A77970 (R-Car V3M) compatible DU
     - "renesas,du-r8a77995" for R8A77995 (R-Car D3) compatible DU
     - "renesas,du-r8a77995" for R8A77995 (R-Car D3) compatible DU
 
 
@@ -47,20 +48,21 @@ bindings specified in Documentation/devicetree/bindings/graph.txt.
 The following table lists for each supported model the port number
 The following table lists for each supported model the port number
 corresponding to each DU output.
 corresponding to each DU output.
 
 
-                      Port0          Port1          Port2          Port3
+                        Port0          Port1          Port2          Port3
 -----------------------------------------------------------------------------
 -----------------------------------------------------------------------------
- R8A7743 (RZ/G1M)     DPAD 0         LVDS 0         -              -
- R8A7745 (RZ/G1E)     DPAD 0         DPAD 1         -              -
- R8A7779 (R-Car H1)   DPAD 0         DPAD 1         -              -
- R8A7790 (R-Car H2)   DPAD 0         LVDS 0         LVDS 1         -
- R8A7791 (R-Car M2-W) DPAD 0         LVDS 0         -              -
- R8A7792 (R-Car V2H)  DPAD 0         DPAD 1         -              -
- R8A7793 (R-Car M2-N) DPAD 0         LVDS 0         -              -
- R8A7794 (R-Car E2)   DPAD 0         DPAD 1         -              -
- R8A7795 (R-Car H3)   DPAD 0         HDMI 0         HDMI 1         LVDS 0
- R8A7796 (R-Car M3-W) DPAD 0         HDMI 0         LVDS 0         -
- R8A77970 (R-Car V3M) DPAD 0         LVDS 0         -              -
- R8A77995 (R-Car D3)  DPAD 0         LVDS 0         LVDS 1         -
+ R8A7743 (RZ/G1M)       DPAD 0         LVDS 0         -              -
+ R8A7745 (RZ/G1E)       DPAD 0         DPAD 1         -              -
+ R8A7779 (R-Car H1)     DPAD 0         DPAD 1         -              -
+ R8A7790 (R-Car H2)     DPAD 0         LVDS 0         LVDS 1         -
+ R8A7791 (R-Car M2-W)   DPAD 0         LVDS 0         -              -
+ R8A7792 (R-Car V2H)    DPAD 0         DPAD 1         -              -
+ R8A7793 (R-Car M2-N)   DPAD 0         LVDS 0         -              -
+ R8A7794 (R-Car E2)     DPAD 0         DPAD 1         -              -
+ R8A7795 (R-Car H3)     DPAD 0         HDMI 0         HDMI 1         LVDS 0
+ R8A7796 (R-Car M3-W)   DPAD 0         HDMI 0         LVDS 0         -
+ R8A77965 (R-Car M3-N)  DPAD 0         HDMI 0         LVDS 0         -
+ R8A77970 (R-Car V3M)   DPAD 0         LVDS 0         -              -
+ R8A77995 (R-Car D3)    DPAD 0         LVDS 0         LVDS 1         -
 
 
 
 
 Example: R8A7795 (R-Car H3) ES2.0 DU
 Example: R8A7795 (R-Car H3) ES2.0 DU

+ 93 - 0
Documentation/devicetree/bindings/display/sunxi/sun6i-dsi.txt

@@ -0,0 +1,93 @@
+Allwinner A31 DSI Encoder
+=========================
+
+The DSI pipeline consists of two separate blocks: the DSI controller
+itself, and its associated D-PHY.
+
+DSI Encoder
+-----------
+
+The DSI Encoder generates the DSI signal from the TCON's.
+
+Required properties:
+  - compatible: value must be one of:
+    * allwinner,sun6i-a31-mipi-dsi
+  - reg: base address and size of memory-mapped region
+  - interrupts: interrupt associated to this IP
+  - clocks: phandles to the clocks feeding the DSI encoder
+    * bus: the DSI interface clock
+    * mod: the DSI module clock
+  - clock-names: the clock names mentioned above
+  - phys: phandle to the D-PHY
+  - phy-names: must be "dphy"
+  - resets: phandle to the reset controller driving the encoder
+
+  - ports: A ports node with endpoint definitions as defined in
+    Documentation/devicetree/bindings/media/video-interfaces.txt. The
+    first port should be the input endpoint, usually coming from the
+    associated TCON.
+
+Any MIPI-DSI device attached to this should be described according to
+the bindings defined in ../mipi-dsi-bus.txt
+
+D-PHY
+-----
+
+Required properties:
+  - compatible: value must be one of:
+    * allwinner,sun6i-a31-mipi-dphy
+  - reg: base address and size of memory-mapped region
+  - clocks: phandles to the clocks feeding the DSI encoder
+    * bus: the DSI interface clock
+    * mod: the DSI module clock
+  - clock-names: the clock names mentioned above
+  - resets: phandle to the reset controller driving the encoder
+
+Example:
+
+dsi0: dsi@1ca0000 {
+	compatible = "allwinner,sun6i-a31-mipi-dsi";
+	reg = <0x01ca0000 0x1000>;
+	interrupts = <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>;
+	clocks = <&ccu CLK_BUS_MIPI_DSI>,
+		 <&ccu CLK_DSI_SCLK>;
+	clock-names = "bus", "mod";
+	resets = <&ccu RST_BUS_MIPI_DSI>;
+	phys = <&dphy0>;
+	phy-names = "dphy";
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	panel@0 {
+		compatible = "bananapi,lhr050h41", "ilitek,ili9881c";
+		reg = <0>;
+		power-gpios = <&pio 1 7 GPIO_ACTIVE_HIGH>; /* PB07 */
+		reset-gpios = <&r_pio 0 5 GPIO_ACTIVE_LOW>; /* PL05 */
+		backlight = <&pwm_bl>;
+	};
+
+	ports {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		port@0 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0>;
+
+			dsi0_in_tcon0: endpoint {
+				remote-endpoint = <&tcon0_out_dsi0>;
+			};
+		};
+	};
+};
+
+dphy0: d-phy@1ca1000 {
+	compatible = "allwinner,sun6i-a31-mipi-dphy";
+	reg = <0x01ca1000 0x1000>;
+	clocks = <&ccu CLK_BUS_MIPI_DSI>,
+		 <&ccu CLK_DSI_DPHY>;
+	clock-names = "bus", "mod";
+	resets = <&ccu RST_BUS_MIPI_DSI>;
+	#phy-cells = <0>;
+};

+ 28 - 0
Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.txt

@@ -0,0 +1,28 @@
+Broadcom V3D GPU
+
+Only the Broadcom V3D 3.x and newer GPUs are covered by this binding.
+For V3D 2.x, see brcm,bcm-vc4.txt.
+
+Required properties:
+- compatible:	Should be "brcm,7268-v3d" or "brcm,7278-v3d"
+- reg:		Physical base addresses and lengths of the register areas
+- reg-names:	Names for the register areas.  The "hub", "bridge", and "core0"
+		  register areas are always required.  The "gca" register area
+		  is required if the GCA cache controller is present.
+- interrupts:	The interrupt numbers.  The first interrupt is for the hub,
+		  while the following interrupts are for the cores.
+		  See bindings/interrupt-controller/interrupts.txt
+
+Optional properties:
+- clocks:	The core clock the unit runs on
+
+v3d {
+	compatible = "brcm,7268-v3d";
+	reg = <0xf1204000 0x100>,
+	      <0xf1200000 0x4000>,
+	      <0xf1208000 0x4000>,
+	      <0xf1204100 0x100>;
+	reg-names = "bridge", "hub", "core0", "gca";
+	interrupts = <0 78 4>,
+		     <0 77 4>;
+};

+ 27 - 0
Documentation/devicetree/bindings/gpu/samsung-scaler.txt

@@ -0,0 +1,27 @@
+* Samsung Exynos Image Scaler
+
+Required properties:
+  - compatible : value should be one of the following:
+	(a) "samsung,exynos5420-scaler" for Scaler IP in Exynos5420
+	(b) "samsung,exynos5433-scaler" for Scaler IP in Exynos5433
+
+  - reg : Physical base address of the IP registers and length of memory
+	  mapped region.
+
+  - interrupts : Interrupt specifier for scaler interrupt, according to format
+		 specific to interrupt parent.
+
+  - clocks : Clock specifier for scaler clock, according to generic clock
+	     bindings. (See Documentation/devicetree/bindings/clock/exynos*.txt)
+
+  - clock-names : Names of clocks. For exynos scaler, it should be "mscl"
+		  on 5420 and "pclk", "aclk" and "aclk_xiu" on 5433.
+
+Example:
+	scaler@12800000 {
+		compatible = "samsung,exynos5420-scaler";
+		reg = <0x12800000 0x1294>;
+		interrupts = <0 220 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&clock CLK_MSCL0>;
+		clock-names = "mscl";
+	};

+ 2 - 0
Documentation/gpu/drivers.rst

@@ -10,8 +10,10 @@ GPU Driver Documentation
    tegra
    tegra
    tinydrm
    tinydrm
    tve200
    tve200
+   v3d
    vc4
    vc4
    bridge/dw-hdmi
    bridge/dw-hdmi
+   xen-front
 
 
 .. only::  subproject and html
 .. only::  subproject and html
 
 

+ 121 - 20
Documentation/gpu/i915.rst

@@ -58,6 +58,12 @@ Intel GVT-g Host Support(vGPU device model)
 .. kernel-doc:: drivers/gpu/drm/i915/intel_gvt.c
 .. kernel-doc:: drivers/gpu/drm/i915/intel_gvt.c
    :internal:
    :internal:
 
 
+Workarounds
+-----------
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_workarounds.c
+   :doc: Hardware workarounds
+
 Display Hardware Handling
 Display Hardware Handling
 =========================
 =========================
 
 
@@ -249,6 +255,103 @@ Memory Management and Command Submission
 This sections covers all things related to the GEM implementation in the
 This sections covers all things related to the GEM implementation in the
 i915 driver.
 i915 driver.
 
 
+Intel GPU Basics
+----------------
+
+An Intel GPU has multiple engines. There are several engine types.
+
+- RCS engine is for rendering 3D and performing compute, this is named
+  `I915_EXEC_RENDER` in user space.
+- BCS is a blitting (copy) engine, this is named `I915_EXEC_BLT` in user
+  space.
+- VCS is a video encode and decode engine, this is named `I915_EXEC_BSD`
+  in user space
+- VECS is video enhancement engine, this is named `I915_EXEC_VEBOX` in user
+  space.
+- The enumeration `I915_EXEC_DEFAULT` does not refer to specific engine;
+  instead it is to be used by user space to specify a default rendering
+  engine (for 3D) that may or may not be the same as RCS.
+
+The Intel GPU family is a family of integrated GPU's using Unified
+Memory Access. For having the GPU "do work", user space will feed the
+GPU batch buffers via one of the ioctls `DRM_IOCTL_I915_GEM_EXECBUFFER2`
+or `DRM_IOCTL_I915_GEM_EXECBUFFER2_WR`. Most such batchbuffers will
+instruct the GPU to perform work (for example rendering) and that work
+needs memory from which to read and memory to which to write. All memory
+is encapsulated within GEM buffer objects (usually created with the ioctl
+`DRM_IOCTL_I915_GEM_CREATE`). An ioctl providing a batchbuffer for the GPU
+to create will also list all GEM buffer objects that the batchbuffer reads
+and/or writes. For implementation details of memory management see
+`GEM BO Management Implementation Details`_.
+
+The i915 driver allows user space to create a context via the ioctl
+`DRM_IOCTL_I915_GEM_CONTEXT_CREATE` which is identified by a 32-bit
+integer. Such a context should be viewed by user-space as -loosely-
+analogous to the idea of a CPU process of an operating system. The i915
+driver guarantees that commands issued to a fixed context are to be
+executed so that writes of a previously issued command are seen by
+reads of following commands. Actions issued between different contexts
+(even if from the same file descriptor) are NOT given that guarantee
+and the only way to synchronize across contexts (even from the same
+file descriptor) is through the use of fences. At least as far back as
+Gen4, also have that a context carries with it a GPU HW context;
+the HW context is essentially (most of atleast) the state of a GPU.
+In addition to the ordering guarantees, the kernel will restore GPU
+state via HW context when commands are issued to a context, this saves
+user space the need to restore (most of atleast) the GPU state at the
+start of each batchbuffer. The non-deprecated ioctls to submit batchbuffer
+work can pass that ID (in the lower bits of drm_i915_gem_execbuffer2::rsvd1)
+to identify what context to use with the command.
+
+The GPU has its own memory management and address space. The kernel
+driver maintains the memory translation table for the GPU. For older
+GPUs (i.e. those before Gen8), there is a single global such translation
+table, a global Graphics Translation Table (GTT). For newer generation
+GPUs each context has its own translation table, called Per-Process
+Graphics Translation Table (PPGTT). Of important note, is that although
+PPGTT is named per-process it is actually per context. When user space
+submits a batchbuffer, the kernel walks the list of GEM buffer objects
+used by the batchbuffer and guarantees that not only is the memory of
+each such GEM buffer object resident but it is also present in the
+(PP)GTT. If the GEM buffer object is not yet placed in the (PP)GTT,
+then it is given an address. Two consequences of this are: the kernel
+needs to edit the batchbuffer submitted to write the correct value of
+the GPU address when a GEM BO is assigned a GPU address and the kernel
+might evict a different GEM BO from the (PP)GTT to make address room
+for another GEM BO. Consequently, the ioctls submitting a batchbuffer
+for execution also include a list of all locations within buffers that
+refer to GPU-addresses so that the kernel can edit the buffer correctly.
+This process is dubbed relocation.
+
+GEM BO Management Implementation Details
+----------------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_vma.h
+   :doc: Virtual Memory Address
+
+Buffer Object Eviction
+----------------------
+
+This section documents the interface functions for evicting buffer
+objects to make space available in the virtual gpu address spaces. Note
+that this is mostly orthogonal to shrinking buffer objects caches, which
+has the goal to make main memory (shared with the gpu through the
+unified memory architecture) available.
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_evict.c
+   :internal:
+
+Buffer Object Memory Shrinking
+------------------------------
+
+This section documents the interface function for shrinking memory usage
+of buffer object caches. Shrinking is used to make main memory
+available. Note that this is mostly orthogonal to evicting buffer
+objects, which has the goal to make space in gpu virtual address spaces.
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_shrinker.c
+   :internal:
+
 Batchbuffer Parsing
 Batchbuffer Parsing
 -------------------
 -------------------
 
 
@@ -267,6 +370,12 @@ Batchbuffer Pools
 .. kernel-doc:: drivers/gpu/drm/i915/i915_gem_batch_pool.c
 .. kernel-doc:: drivers/gpu/drm/i915/i915_gem_batch_pool.c
    :internal:
    :internal:
 
 
+User Batchbuffer Execution
+--------------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_execbuffer.c
+   :doc: User command execution
+
 Logical Rings, Logical Ring Contexts and Execlists
 Logical Rings, Logical Ring Contexts and Execlists
 --------------------------------------------------
 --------------------------------------------------
 
 
@@ -312,28 +421,14 @@ Object Tiling IOCTLs
 .. kernel-doc:: drivers/gpu/drm/i915/i915_gem_tiling.c
 .. kernel-doc:: drivers/gpu/drm/i915/i915_gem_tiling.c
    :doc: buffer object tiling
    :doc: buffer object tiling
 
 
-Buffer Object Eviction
-----------------------
-
-This section documents the interface functions for evicting buffer
-objects to make space available in the virtual gpu address spaces. Note
-that this is mostly orthogonal to shrinking buffer objects caches, which
-has the goal to make main memory (shared with the gpu through the
-unified memory architecture) available.
-
-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_evict.c
-   :internal:
-
-Buffer Object Memory Shrinking
-------------------------------
+WOPCM
+=====
 
 
-This section documents the interface function for shrinking memory usage
-of buffer object caches. Shrinking is used to make main memory
-available. Note that this is mostly orthogonal to evicting buffer
-objects, which has the goal to make space in gpu virtual address spaces.
+WOPCM Layout
+------------
 
 
-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_shrinker.c
-   :internal:
+.. kernel-doc:: drivers/gpu/drm/i915/intel_wopcm.c
+   :doc: WOPCM Layout
 
 
 GuC
 GuC
 ===
 ===
@@ -359,6 +454,12 @@ GuC Firmware Layout
 .. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fwif.h
 .. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fwif.h
    :doc: GuC Firmware Layout
    :doc: GuC Firmware Layout
 
 
+GuC Address Space
+-----------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_guc.c
+   :doc: GuC Address Space
+
 Tracing
 Tracing
 =======
 =======
 
 

+ 0 - 1
Documentation/gpu/kms-properties.csv

@@ -98,5 +98,4 @@ radeon,DVI-I,“coherent”,RANGE,"Min=0, Max=1",Connector,TBD
 ,,"""underscan vborder""",RANGE,"Min=0, Max=128",Connector,TBD
 ,,"""underscan vborder""",RANGE,"Min=0, Max=128",Connector,TBD
 ,Audio,“audio”,ENUM,"{ ""off"", ""on"", ""auto"" }",Connector,TBD
 ,Audio,“audio”,ENUM,"{ ""off"", ""on"", ""auto"" }",Connector,TBD
 ,FMT Dithering,“dither”,ENUM,"{ ""off"", ""on"" }",Connector,TBD
 ,FMT Dithering,“dither”,ENUM,"{ ""off"", ""on"" }",Connector,TBD
-rcar-du,Generic,"""alpha""",RANGE,"Min=0, Max=255",Plane,TBD
 ,,"""colorkey""",RANGE,"Min=0, Max=0x01ffffff",Plane,TBD
 ,,"""colorkey""",RANGE,"Min=0, Max=0x01ffffff",Plane,TBD

+ 18 - 0
Documentation/gpu/todo.rst

@@ -212,6 +212,24 @@ probably use drm_fb_helper_fbdev_teardown().
 
 
 Contact: Maintainer of the driver you plan to convert
 Contact: Maintainer of the driver you plan to convert
 
 
+Clean up mmap forwarding
+------------------------
+
+A lot of drivers forward gem mmap calls to dma-buf mmap for imported buffers.
+And also a lot of them forward dma-buf mmap to the gem mmap implementations.
+Would be great to refactor this all into a set of small common helpers.
+
+Contact: Daniel Vetter
+
+Put a reservation_object into drm_gem_object
+--------------------------------------------
+
+This would remove the need for the ->gem_prime_res_obj callback. It would also
+allow us to implement generic helpers for waiting for a bo, allowing for quite a
+bit of refactoring in the various wait ioctl implementations.
+
+Contact: Daniel Vetter
+
 idr_init_base()
 idr_init_base()
 ---------------
 ---------------
 
 

+ 31 - 0
Documentation/gpu/xen-front.rst

@@ -0,0 +1,31 @@
+====================================================
+ drm/xen-front Xen para-virtualized frontend driver
+====================================================
+
+This frontend driver implements Xen para-virtualized display
+according to the display protocol described at
+include/xen/interface/io/displif.h
+
+Driver modes of operation in terms of display buffers used
+==========================================================
+
+.. kernel-doc:: drivers/gpu/drm/xen/xen_drm_front.h
+   :doc: Driver modes of operation in terms of display buffers used
+
+Buffers allocated by the frontend driver
+----------------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/xen/xen_drm_front.h
+   :doc: Buffers allocated by the frontend driver
+
+Buffers allocated by the backend
+--------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/xen/xen_drm_front.h
+   :doc: Buffers allocated by the backend
+
+Driver limitations
+==================
+
+.. kernel-doc:: drivers/gpu/drm/xen/xen_drm_front.h
+   :doc: Driver limitations

+ 20 - 1
MAINTAINERS

@@ -767,12 +767,14 @@ F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
 F:	drivers/gpu/drm/amd/amdkfd/
 F:	drivers/gpu/drm/amd/amdkfd/
 F:	drivers/gpu/drm/amd/include/cik_structs.h
 F:	drivers/gpu/drm/amd/include/cik_structs.h
 F:	drivers/gpu/drm/amd/include/kgd_kfd_interface.h
 F:	drivers/gpu/drm/amd/include/kgd_kfd_interface.h
 F:	drivers/gpu/drm/amd/include/vi_structs.h
 F:	drivers/gpu/drm/amd/include/vi_structs.h
+F:	drivers/gpu/drm/amd/include/v9_structs.h
 F:	include/uapi/linux/kfd_ioctl.h
 F:	include/uapi/linux/kfd_ioctl.h
 
 
 AMD SEATTLE DEVICE TREE SUPPORT
 AMD SEATTLE DEVICE TREE SUPPORT
@@ -4685,7 +4687,7 @@ F:	Documentation/devicetree/bindings/display/exynos/
 
 
 DRM DRIVERS FOR FREESCALE DCU
 DRM DRIVERS FOR FREESCALE DCU
 M:	Stefan Agner <stefan@agner.ch>
 M:	Stefan Agner <stefan@agner.ch>
-M:	Alison Wang <alison.wang@freescale.com>
+M:	Alison Wang <alison.wang@nxp.com>
 L:	dri-devel@lists.freedesktop.org
 L:	dri-devel@lists.freedesktop.org
 S:	Supported
 S:	Supported
 F:	drivers/gpu/drm/fsl-dcu/
 F:	drivers/gpu/drm/fsl-dcu/
@@ -4796,6 +4798,14 @@ S:	Maintained
 F:	drivers/gpu/drm/omapdrm/
 F:	drivers/gpu/drm/omapdrm/
 F:	Documentation/devicetree/bindings/display/ti/
 F:	Documentation/devicetree/bindings/display/ti/
 
 
+DRM DRIVERS FOR V3D
+M:	Eric Anholt <eric@anholt.net>
+S:	Supported
+F:	drivers/gpu/drm/v3d/
+F:	include/uapi/drm/v3d_drm.h
+F:	Documentation/devicetree/bindings/display/brcm,bcm-v3d.txt
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+
 DRM DRIVERS FOR VC4
 DRM DRIVERS FOR VC4
 M:	Eric Anholt <eric@anholt.net>
 M:	Eric Anholt <eric@anholt.net>
 T:	git git://github.com/anholt/linux
 T:	git git://github.com/anholt/linux
@@ -4842,6 +4852,15 @@ S:	Maintained
 F:	drivers/gpu/drm/tinydrm/
 F:	drivers/gpu/drm/tinydrm/
 F:	include/drm/tinydrm/
 F:	include/drm/tinydrm/
 
 
+DRM DRIVERS FOR XEN
+M:	Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+L:	dri-devel@lists.freedesktop.org
+L:	xen-devel@lists.xen.org
+S:	Supported
+F:	drivers/gpu/drm/xen/
+F:	Documentation/gpu/xen-front.rst
+
 DRM TTM SUBSYSTEM
 DRM TTM SUBSYSTEM
 M:	Christian Koenig <christian.koenig@amd.com>
 M:	Christian Koenig <christian.koenig@amd.com>
 M:	Roger He <Hongbo.He@amd.com>
 M:	Roger He <Hongbo.He@amd.com>

+ 0 - 10
drivers/dma-buf/sync_debug.h

@@ -62,8 +62,6 @@ struct sync_pt {
 	struct rb_node node;
 	struct rb_node node;
 };
 };
 
 
-#ifdef CONFIG_SW_SYNC
-
 extern const struct file_operations sw_sync_debugfs_fops;
 extern const struct file_operations sw_sync_debugfs_fops;
 
 
 void sync_timeline_debug_add(struct sync_timeline *obj);
 void sync_timeline_debug_add(struct sync_timeline *obj);
@@ -72,12 +70,4 @@ void sync_file_debug_add(struct sync_file *fence);
 void sync_file_debug_remove(struct sync_file *fence);
 void sync_file_debug_remove(struct sync_file *fence);
 void sync_dump(void);
 void sync_dump(void);
 
 
-#else
-# define sync_timeline_debug_add(obj)
-# define sync_timeline_debug_remove(obj)
-# define sync_file_debug_add(fence)
-# define sync_file_debug_remove(fence)
-# define sync_dump()
-#endif
-
 #endif /* _LINUX_SYNC_H */
 #endif /* _LINUX_SYNC_H */

+ 9 - 4
drivers/gpu/drm/Kconfig

@@ -49,16 +49,17 @@ config DRM_DEBUG_MM
 
 
 	  If in doubt, say "N".
 	  If in doubt, say "N".
 
 
-config DRM_DEBUG_MM_SELFTEST
-	tristate "kselftests for DRM range manager (struct drm_mm)"
+config DRM_DEBUG_SELFTEST
+	tristate "kselftests for DRM"
 	depends on DRM
 	depends on DRM
 	depends on DEBUG_KERNEL
 	depends on DEBUG_KERNEL
 	select PRIME_NUMBERS
 	select PRIME_NUMBERS
 	select DRM_LIB_RANDOM
 	select DRM_LIB_RANDOM
+	select DRM_KMS_HELPER
 	default n
 	default n
 	help
 	help
-	  This option provides a kernel module that can be used to test
-	  the DRM range manager (drm_mm) and its API. This option is not
+	  This option provides kernel modules that can be used to run
+	  various selftests on parts of the DRM api. This option is not
 	  useful for distributions or general kernels, but only for kernel
 	  useful for distributions or general kernels, but only for kernel
 	  developers working on DRM and associated drivers.
 	  developers working on DRM and associated drivers.
 
 
@@ -267,6 +268,8 @@ source "drivers/gpu/drm/amd/amdkfd/Kconfig"
 
 
 source "drivers/gpu/drm/imx/Kconfig"
 source "drivers/gpu/drm/imx/Kconfig"
 
 
+source "drivers/gpu/drm/v3d/Kconfig"
+
 source "drivers/gpu/drm/vc4/Kconfig"
 source "drivers/gpu/drm/vc4/Kconfig"
 
 
 source "drivers/gpu/drm/etnaviv/Kconfig"
 source "drivers/gpu/drm/etnaviv/Kconfig"
@@ -289,6 +292,8 @@ source "drivers/gpu/drm/pl111/Kconfig"
 
 
 source "drivers/gpu/drm/tve200/Kconfig"
 source "drivers/gpu/drm/tve200/Kconfig"
 
 
+source "drivers/gpu/drm/xen/Kconfig"
+
 # Keep legacy drivers last
 # Keep legacy drivers last
 
 
 menuconfig DRM_LEGACY
 menuconfig DRM_LEGACY

+ 3 - 1
drivers/gpu/drm/Makefile

@@ -43,7 +43,7 @@ drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o
 drm_kms_helper-$(CONFIG_DRM_DP_AUX_CHARDEV) += drm_dp_aux_dev.o
 drm_kms_helper-$(CONFIG_DRM_DP_AUX_CHARDEV) += drm_dp_aux_dev.o
 
 
 obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o
 obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o
-obj-$(CONFIG_DRM_DEBUG_MM_SELFTEST) += selftests/
+obj-$(CONFIG_DRM_DEBUG_SELFTEST) += selftests/
 
 
 obj-$(CONFIG_DRM)	+= drm.o
 obj-$(CONFIG_DRM)	+= drm.o
 obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
 obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
@@ -61,6 +61,7 @@ obj-$(CONFIG_DRM_MGA)	+= mga/
 obj-$(CONFIG_DRM_I810)	+= i810/
 obj-$(CONFIG_DRM_I810)	+= i810/
 obj-$(CONFIG_DRM_I915)	+= i915/
 obj-$(CONFIG_DRM_I915)	+= i915/
 obj-$(CONFIG_DRM_MGAG200) += mgag200/
 obj-$(CONFIG_DRM_MGAG200) += mgag200/
+obj-$(CONFIG_DRM_V3D)  += v3d/
 obj-$(CONFIG_DRM_VC4)  += vc4/
 obj-$(CONFIG_DRM_VC4)  += vc4/
 obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/
 obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/
 obj-$(CONFIG_DRM_SIS)   += sis/
 obj-$(CONFIG_DRM_SIS)   += sis/
@@ -103,3 +104,4 @@ obj-$(CONFIG_DRM_MXSFB)	+= mxsfb/
 obj-$(CONFIG_DRM_TINYDRM) += tinydrm/
 obj-$(CONFIG_DRM_TINYDRM) += tinydrm/
 obj-$(CONFIG_DRM_PL111) += pl111/
 obj-$(CONFIG_DRM_PL111) += pl111/
 obj-$(CONFIG_DRM_TVE200) += tve200/
 obj-$(CONFIG_DRM_TVE200) += tve200/
+obj-$(CONFIG_DRM_XEN) += xen/

+ 19 - 5
drivers/gpu/drm/amd/amdgpu/Makefile

@@ -56,13 +56,18 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 
 
 # add asic specific block
 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
-	ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \
-	amdgpu_amdkfd_gfx_v7.o
+	ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o
 
 
 amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
 amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
 
 
 amdgpu-y += \
 amdgpu-y += \
-	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o
+	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
+	vega20_reg_init.o
+
+# add DF block
+amdgpu-y += \
+	df_v1_7.o \
+	df_v3_6.o
 
 
 # add GMC block
 # add GMC block
 amdgpu-y += \
 amdgpu-y += \
@@ -126,11 +131,20 @@ amdgpu-y += \
 	vcn_v1_0.o
 	vcn_v1_0.o
 
 
 # add amdkfd interfaces
 # add amdkfd interfaces
+amdgpu-y += amdgpu_amdkfd.o
+
+ifneq ($(CONFIG_HSA_AMD),)
 amdgpu-y += \
 amdgpu-y += \
-	 amdgpu_amdkfd.o \
 	 amdgpu_amdkfd_fence.o \
 	 amdgpu_amdkfd_fence.o \
 	 amdgpu_amdkfd_gpuvm.o \
 	 amdgpu_amdkfd_gpuvm.o \
-	 amdgpu_amdkfd_gfx_v8.o
+	 amdgpu_amdkfd_gfx_v8.o \
+	 amdgpu_amdkfd_gfx_v9.o
+
+ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
+amdgpu-y += amdgpu_amdkfd_gfx_v7.o
+endif
+
+endif
 
 
 # add cgs
 # add cgs
 amdgpu-y += amdgpu_cgs.o
 amdgpu-y += amdgpu_cgs.o

+ 42 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu.h

@@ -129,6 +129,7 @@ extern int amdgpu_lbpw;
 extern int amdgpu_compute_multipipe;
 extern int amdgpu_compute_multipipe;
 extern int amdgpu_gpu_recovery;
 extern int amdgpu_gpu_recovery;
 extern int amdgpu_emu_mode;
 extern int amdgpu_emu_mode;
+extern uint amdgpu_smu_memory_pool_size;
 
 
 #ifdef CONFIG_DRM_AMDGPU_SI
 #ifdef CONFIG_DRM_AMDGPU_SI
 extern int amdgpu_si_support;
 extern int amdgpu_si_support;
@@ -137,6 +138,7 @@ extern int amdgpu_si_support;
 extern int amdgpu_cik_support;
 extern int amdgpu_cik_support;
 #endif
 #endif
 
 
+#define AMDGPU_SG_THRESHOLD			(256*1024*1024)
 #define AMDGPU_DEFAULT_GTT_SIZE_MB		3072ULL /* 3GB by default */
 #define AMDGPU_DEFAULT_GTT_SIZE_MB		3072ULL /* 3GB by default */
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
 #define AMDGPU_MAX_USEC_TIMEOUT			100000	/* 100 ms */
 #define AMDGPU_MAX_USEC_TIMEOUT			100000	/* 100 ms */
@@ -222,10 +224,10 @@ enum amdgpu_kiq_irq {
 	AMDGPU_CP_KIQ_IRQ_LAST
 	AMDGPU_CP_KIQ_IRQ_LAST
 };
 };
 
 
-int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
+int amdgpu_device_ip_set_clockgating_state(void *dev,
 					   enum amd_ip_block_type block_type,
 					   enum amd_ip_block_type block_type,
 					   enum amd_clockgating_state state);
 					   enum amd_clockgating_state state);
-int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
+int amdgpu_device_ip_set_powergating_state(void *dev,
 					   enum amd_ip_block_type block_type,
 					   enum amd_ip_block_type block_type,
 					   enum amd_powergating_state state);
 					   enum amd_powergating_state state);
 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
@@ -681,6 +683,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);
 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);
 
 
 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 
 
 
 
@@ -771,9 +775,18 @@ struct amdgpu_rlc {
 	u32 starting_offsets_start;
 	u32 starting_offsets_start;
 	u32 reg_list_format_size_bytes;
 	u32 reg_list_format_size_bytes;
 	u32 reg_list_size_bytes;
 	u32 reg_list_size_bytes;
+	u32 reg_list_format_direct_reg_list_length;
+	u32 save_restore_list_cntl_size_bytes;
+	u32 save_restore_list_gpm_size_bytes;
+	u32 save_restore_list_srm_size_bytes;
 
 
 	u32 *register_list_format;
 	u32 *register_list_format;
 	u32 *register_restore;
 	u32 *register_restore;
+	u8 *save_restore_list_cntl;
+	u8 *save_restore_list_gpm;
+	u8 *save_restore_list_srm;
+
+	bool is_rlc_v2_1;
 };
 };
 
 
 #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
 #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
@@ -867,6 +880,8 @@ struct amdgpu_gfx_config {
 
 
 	/* gfx configure feature */
 	/* gfx configure feature */
 	uint32_t double_offchip_lds_buf;
 	uint32_t double_offchip_lds_buf;
+	/* cached value of DB_DEBUG2 */
+	uint32_t db_debug2;
 };
 };
 
 
 struct amdgpu_cu_info {
 struct amdgpu_cu_info {
@@ -938,6 +953,12 @@ struct amdgpu_gfx {
 	uint32_t			ce_feature_version;
 	uint32_t			ce_feature_version;
 	uint32_t			pfp_feature_version;
 	uint32_t			pfp_feature_version;
 	uint32_t			rlc_feature_version;
 	uint32_t			rlc_feature_version;
+	uint32_t			rlc_srlc_fw_version;
+	uint32_t			rlc_srlc_feature_version;
+	uint32_t			rlc_srlg_fw_version;
+	uint32_t			rlc_srlg_feature_version;
+	uint32_t			rlc_srls_fw_version;
+	uint32_t			rlc_srls_feature_version;
 	uint32_t			mec_feature_version;
 	uint32_t			mec_feature_version;
 	uint32_t			mec2_feature_version;
 	uint32_t			mec2_feature_version;
 	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
 	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
@@ -1204,6 +1225,8 @@ struct amdgpu_asic_funcs {
 	/* invalidate hdp read cache */
 	/* invalidate hdp read cache */
 	void (*invalidate_hdp)(struct amdgpu_device *adev,
 	void (*invalidate_hdp)(struct amdgpu_device *adev,
 			       struct amdgpu_ring *ring);
 			       struct amdgpu_ring *ring);
+	/* check if the asic needs a full reset of if soft reset will work */
+	bool (*need_full_reset)(struct amdgpu_device *adev);
 };
 };
 
 
 /*
 /*
@@ -1368,7 +1391,19 @@ struct amdgpu_nbio_funcs {
 	void (*detect_hw_virt)(struct amdgpu_device *adev);
 	void (*detect_hw_virt)(struct amdgpu_device *adev);
 };
 };
 
 
-
+struct amdgpu_df_funcs {
+	void (*init)(struct amdgpu_device *adev);
+	void (*enable_broadcast_mode)(struct amdgpu_device *adev,
+				      bool enable);
+	u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
+	u32 (*get_hbm_channel_number)(struct amdgpu_device *adev);
+	void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
+						 bool enable);
+	void (*get_clockgating_state)(struct amdgpu_device *adev,
+				      u32 *flags);
+	void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
+					    bool enable);
+};
 /* Define the HW IP blocks will be used in driver , add more if necessary */
 /* Define the HW IP blocks will be used in driver , add more if necessary */
 enum amd_hw_ip_block_type {
 enum amd_hw_ip_block_type {
 	GC_HWIP = 1,
 	GC_HWIP = 1,
@@ -1398,6 +1433,7 @@ enum amd_hw_ip_block_type {
 struct amd_powerplay {
 struct amd_powerplay {
 	void *pp_handle;
 	void *pp_handle;
 	const struct amd_pm_funcs *pp_funcs;
 	const struct amd_pm_funcs *pp_funcs;
+	uint32_t pp_feature;
 };
 };
 
 
 #define AMDGPU_RESET_MAGIC_NUM 64
 #define AMDGPU_RESET_MAGIC_NUM 64
@@ -1590,6 +1626,7 @@ struct amdgpu_device {
 	uint32_t 		*reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
 	uint32_t 		*reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
 
 
 	const struct amdgpu_nbio_funcs	*nbio_funcs;
 	const struct amdgpu_nbio_funcs	*nbio_funcs;
+	const struct amdgpu_df_funcs	*df_funcs;
 
 
 	/* delayed work_func for deferring clockgating during resume */
 	/* delayed work_func for deferring clockgating during resume */
 	struct delayed_work     late_init_work;
 	struct delayed_work     late_init_work;
@@ -1764,6 +1801,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
 #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
 #define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
 #define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
 #define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
 #define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
+#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
 #define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
 #define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
 #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
 #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
 #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
 #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
@@ -1790,6 +1828,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
 #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
 #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
 #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
+#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
 #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
 #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))

+ 5 - 6
drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c

@@ -290,12 +290,11 @@ static int acp_hw_init(void *handle)
 	else if (r)
 	else if (r)
 		return r;
 		return r;
 
 
-	r = cgs_get_pci_resource(adev->acp.cgs_device, CGS_RESOURCE_TYPE_MMIO,
-			0x5289, 0, &acp_base);
-	if (r == -ENODEV)
-		return 0;
-	else if (r)
-		return r;
+	if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
+		return -EINVAL;
+
+	acp_base = adev->rmmio_base;
+
 	if (adev->asic_type != CHIP_STONEY) {
 	if (adev->asic_type != CHIP_STONEY) {
 		adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
 		adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
 		if (adev->acp.acp_genpd == NULL)
 		if (adev->acp.acp_genpd == NULL)

+ 83 - 4
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

@@ -50,15 +50,21 @@ int amdgpu_amdkfd_init(void)
 		kgd2kfd = NULL;
 		kgd2kfd = NULL;
 	}
 	}
 
 
+
 #elif defined(CONFIG_HSA_AMD)
 #elif defined(CONFIG_HSA_AMD)
+
 	ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
 	ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
 	if (ret)
 	if (ret)
 		kgd2kfd = NULL;
 		kgd2kfd = NULL;
 
 
 #else
 #else
+	kgd2kfd = NULL;
 	ret = -ENOENT;
 	ret = -ENOENT;
 #endif
 #endif
+
+#if defined(CONFIG_HSA_AMD_MODULE) || defined(CONFIG_HSA_AMD)
 	amdgpu_amdkfd_gpuvm_init_mem_limits();
 	amdgpu_amdkfd_gpuvm_init_mem_limits();
+#endif
 
 
 	return ret;
 	return ret;
 }
 }
@@ -92,8 +98,12 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS11:
 		kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
 		kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
 		break;
 		break;
+	case CHIP_VEGA10:
+	case CHIP_RAVEN:
+		kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
+		break;
 	default:
 	default:
-		dev_dbg(adev->dev, "kfd not supported on this ASIC\n");
+		dev_info(adev->dev, "kfd not supported on this ASIC\n");
 		return;
 		return;
 	}
 	}
 
 
@@ -175,6 +185,28 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 				&gpu_resources.doorbell_physical_address,
 				&gpu_resources.doorbell_physical_address,
 				&gpu_resources.doorbell_aperture_size,
 				&gpu_resources.doorbell_aperture_size,
 				&gpu_resources.doorbell_start_offset);
 				&gpu_resources.doorbell_start_offset);
+		if (adev->asic_type >= CHIP_VEGA10) {
+			/* On SOC15 the BIF is involved in routing
+			 * doorbells using the low 12 bits of the
+			 * address. Communicate the assignments to
+			 * KFD. KFD uses two doorbell pages per
+			 * process in case of 64-bit doorbells so we
+			 * can use each doorbell assignment twice.
+			 */
+			gpu_resources.sdma_doorbell[0][0] =
+				AMDGPU_DOORBELL64_sDMA_ENGINE0;
+			gpu_resources.sdma_doorbell[0][1] =
+				AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200;
+			gpu_resources.sdma_doorbell[1][0] =
+				AMDGPU_DOORBELL64_sDMA_ENGINE1;
+			gpu_resources.sdma_doorbell[1][1] =
+				AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200;
+			/* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for
+			 * SDMA, IH and VCN. So don't use them for the CP.
+			 */
+			gpu_resources.reserved_doorbell_mask = 0x1f0;
+			gpu_resources.reserved_doorbell_val  = 0x0f0;
+		}
 
 
 		kgd2kfd->device_init(adev->kfd, &gpu_resources);
 		kgd2kfd->device_init(adev->kfd, &gpu_resources);
 	}
 	}
@@ -217,13 +249,19 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 	struct amdgpu_bo *bo = NULL;
 	struct amdgpu_bo *bo = NULL;
+	struct amdgpu_bo_param bp;
 	int r;
 	int r;
 	uint64_t gpu_addr_tmp = 0;
 	uint64_t gpu_addr_tmp = 0;
 	void *cpu_ptr_tmp = NULL;
 	void *cpu_ptr_tmp = NULL;
 
 
-	r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
-			     AMDGPU_GEM_CREATE_CPU_GTT_USWC, ttm_bo_type_kernel,
-			     NULL, &bo);
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = PAGE_SIZE;
+	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+	bp.type = ttm_bo_type_kernel;
+	bp.resv = NULL;
+	r = amdgpu_bo_create(adev, &bp, &bo);
 	if (r) {
 	if (r) {
 		dev_err(adev->dev,
 		dev_err(adev->dev,
 			"failed to allocate BO for amdkfd (%d)\n", r);
 			"failed to allocate BO for amdkfd (%d)\n", r);
@@ -432,3 +470,44 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
 
 
 	return false;
 	return false;
 }
 }
+
+#if !defined(CONFIG_HSA_AMD_MODULE) && !defined(CONFIG_HSA_AMD)
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
+{
+	return false;
+}
+
+void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
+{
+}
+
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+					struct amdgpu_vm *vm)
+{
+}
+
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
+{
+	return NULL;
+}
+
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm)
+{
+	return 0;
+}
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
+{
+	return NULL;
+}
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
+{
+	return NULL;
+}
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
+{
+	return NULL;
+}
+#endif

+ 19 - 6
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

@@ -28,6 +28,7 @@
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/mmu_context.h>
 #include <linux/mmu_context.h>
+#include <linux/workqueue.h>
 #include <kgd_kfd_interface.h>
 #include <kgd_kfd_interface.h>
 #include <drm/ttm/ttm_execbuf_util.h>
 #include <drm/ttm/ttm_execbuf_util.h>
 #include "amdgpu_sync.h"
 #include "amdgpu_sync.h"
@@ -59,7 +60,9 @@ struct kgd_mem {
 
 
 	uint32_t mapping_flags;
 	uint32_t mapping_flags;
 
 
+	atomic_t invalid;
 	struct amdkfd_process_info *process_info;
 	struct amdkfd_process_info *process_info;
+	struct page **user_pages;
 
 
 	struct amdgpu_sync sync;
 	struct amdgpu_sync sync;
 
 
@@ -84,6 +87,9 @@ struct amdkfd_process_info {
 	struct list_head vm_list_head;
 	struct list_head vm_list_head;
 	/* List head for all KFD BOs that belong to a KFD process. */
 	/* List head for all KFD BOs that belong to a KFD process. */
 	struct list_head kfd_bo_list;
 	struct list_head kfd_bo_list;
+	/* List of userptr BOs that are valid or invalid */
+	struct list_head userptr_valid_list;
+	struct list_head userptr_inval_list;
 	/* Lock to protect kfd_bo_list */
 	/* Lock to protect kfd_bo_list */
 	struct mutex lock;
 	struct mutex lock;
 
 
@@ -91,6 +97,11 @@ struct amdkfd_process_info {
 	unsigned int n_vms;
 	unsigned int n_vms;
 	/* Eviction Fence */
 	/* Eviction Fence */
 	struct amdgpu_amdkfd_fence *eviction_fence;
 	struct amdgpu_amdkfd_fence *eviction_fence;
+
+	/* MMU-notifier related fields */
+	atomic_t evicted_bos;
+	struct delayed_work restore_userptr_work;
+	struct pid *pid;
 };
 };
 
 
 int amdgpu_amdkfd_init(void);
 int amdgpu_amdkfd_init(void);
@@ -104,12 +115,14 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
 
 
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
 int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 				uint32_t vmid, uint64_t gpu_addr,
 				uint32_t vmid, uint64_t gpu_addr,
 				uint32_t *ib_cmd, uint32_t ib_len);
 				uint32_t *ib_cmd, uint32_t ib_len);
 
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
 
 
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
 
 
@@ -143,14 +156,14 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
 
 
 /* GPUVM API */
 /* GPUVM API */
 int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
 int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
-					  void **process_info,
-					  struct dma_fence **ef);
+					void **process_info,
+					struct dma_fence **ef);
 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
-					   struct file *filp,
-					   void **vm, void **process_info,
-					   struct dma_fence **ef);
+					struct file *filp,
+					void **vm, void **process_info,
+					struct dma_fence **ef);
 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
-				    struct amdgpu_vm *vm);
+				struct amdgpu_vm *vm);
 void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
 void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
 uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
 uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(

+ 0 - 10
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c

@@ -98,8 +98,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 					unsigned int vmid);
 					unsigned int vmid);
 
 
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
-				uint32_t hpd_size, uint64_t hpd_gpu_addr);
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 			uint32_t queue_id, uint32_t __user *wptr,
 			uint32_t queue_id, uint32_t __user *wptr,
@@ -183,7 +181,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.free_pasid = amdgpu_pasid_free,
 	.free_pasid = amdgpu_pasid_free,
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
-	.init_pipeline = kgd_init_pipeline,
 	.init_interrupts = kgd_init_interrupts,
 	.init_interrupts = kgd_init_interrupts,
 	.hqd_load = kgd_hqd_load,
 	.hqd_load = kgd_hqd_load,
 	.hqd_sdma_load = kgd_hqd_sdma_load,
 	.hqd_sdma_load = kgd_hqd_sdma_load,
@@ -309,13 +306,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 	return 0;
 	return 0;
 }
 }
 
 
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
-				uint32_t hpd_size, uint64_t hpd_gpu_addr)
-{
-	/* amdgpu owns the per-pipe state */
-	return 0;
-}
-
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 {
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);

+ 0 - 10
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c

@@ -57,8 +57,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 		uint32_t sh_mem_bases);
 		uint32_t sh_mem_bases);
 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 		unsigned int vmid);
 		unsigned int vmid);
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
-		uint32_t hpd_size, uint64_t hpd_gpu_addr);
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 			uint32_t queue_id, uint32_t __user *wptr,
 			uint32_t queue_id, uint32_t __user *wptr,
@@ -141,7 +139,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.free_pasid = amdgpu_pasid_free,
 	.free_pasid = amdgpu_pasid_free,
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
-	.init_pipeline = kgd_init_pipeline,
 	.init_interrupts = kgd_init_interrupts,
 	.init_interrupts = kgd_init_interrupts,
 	.hqd_load = kgd_hqd_load,
 	.hqd_load = kgd_hqd_load,
 	.hqd_sdma_load = kgd_hqd_sdma_load,
 	.hqd_sdma_load = kgd_hqd_sdma_load,
@@ -270,13 +267,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 	return 0;
 	return 0;
 }
 }
 
 
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
-				uint32_t hpd_size, uint64_t hpd_gpu_addr)
-{
-	/* amdgpu owns the per-pipe state */
-	return 0;
-}
-
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 {
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);

+ 1043 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c

@@ -0,0 +1,1043 @@
+/*
+ * Copyright 2014-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define pr_fmt(fmt) "kfd2kgd: " fmt
+
+#include <linux/module.h>
+#include <linux/fdtable.h>
+#include <linux/uaccess.h>
+#include <linux/firmware.h>
+#include <drm/drmP.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_ucode.h"
+#include "soc15_hw_ip.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "vega10_enum.h"
+#include "sdma0/sdma0_4_0_offset.h"
+#include "sdma0/sdma0_4_0_sh_mask.h"
+#include "sdma1/sdma1_4_0_offset.h"
+#include "sdma1/sdma1_4_0_sh_mask.h"
+#include "athub/athub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
+#include "oss/osssys_4_0_offset.h"
+#include "oss/osssys_4_0_sh_mask.h"
+#include "soc15_common.h"
+#include "v9_structs.h"
+#include "soc15.h"
+#include "soc15d.h"
+
+/* HACK: MMHUB and GC both have VM-related register with the same
+ * names but different offsets. Define the MMHUB register we need here
+ * with a prefix. A proper solution would be to move the functions
+ * programming these registers into gfx_v9_0.c and mmhub_v1_0.c
+ * respectively.
+ */
+#define mmMMHUB_VM_INVALIDATE_ENG16_REQ				0x06f3
+#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX		0
+
+#define mmMMHUB_VM_INVALIDATE_ENG16_ACK				0x0705
+#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX		0
+
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32		0x072b
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX	0
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32		0x072c
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX	0
+
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32		0x074b
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX	0
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32		0x074c
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX	0
+
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32		0x076b
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX	0
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32		0x076c
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX	0
+
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32		0x0727
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX	0
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32		0x0728
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX	0
+
+#define V9_PIPE_PER_MEC		(4)
+#define V9_QUEUES_PER_PIPE_MEC	(8)
+
+enum hqd_dequeue_request_type {
+	NO_ACTION = 0,
+	DRAIN_PIPE,
+	RESET_WAVES
+};
+
+/*
+ * Register access functions
+ */
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+		uint32_t sh_mem_config,
+		uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
+		uint32_t sh_mem_bases);
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+		unsigned int vmid);
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+			uint32_t queue_id, uint32_t __user *wptr,
+			uint32_t wptr_shift, uint32_t wptr_mask,
+			struct mm_struct *mm);
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t (**dump)[2], uint32_t *n_regs);
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+			     uint32_t __user *wptr, struct mm_struct *mm);
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs);
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+		uint32_t pipe_id, uint32_t queue_id);
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+				enum kfd_preempt_type reset_type,
+				unsigned int utimeout, uint32_t pipe_id,
+				uint32_t queue_id);
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+				unsigned int utimeout);
+static int kgd_address_watch_disable(struct kgd_dev *kgd);
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+					unsigned int watch_point_id,
+					uint32_t cntl_val,
+					uint32_t addr_hi,
+					uint32_t addr_lo);
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+					uint32_t gfx_index_val,
+					uint32_t sq_cmd);
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+					unsigned int watch_point_id,
+					unsigned int reg_offset);
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
+		uint8_t vmid);
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+		uint8_t vmid);
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+		uint32_t page_table_base);
+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+					uint64_t va, uint32_t vmid);
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
+
+/* Because of REG_GET_FIELD() being used, we put this function in the
+ * asic specific file.
+ */
+static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
+		struct tile_config *config)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	config->gb_addr_config = adev->gfx.config.gb_addr_config;
+
+	config->tile_config_ptr = adev->gfx.config.tile_mode_array;
+	config->num_tile_configs =
+			ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+	config->macro_tile_config_ptr =
+			adev->gfx.config.macrotile_mode_array;
+	config->num_macro_tile_configs =
+			ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+
+	return 0;
+}
+
+static const struct kfd2kgd_calls kfd2kgd = {
+	.init_gtt_mem_allocation = alloc_gtt_mem,
+	.free_gtt_mem = free_gtt_mem,
+	.get_local_mem_info = get_local_mem_info,
+	.get_gpu_clock_counter = get_gpu_clock_counter,
+	.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
+	.alloc_pasid = amdgpu_pasid_alloc,
+	.free_pasid = amdgpu_pasid_free,
+	.program_sh_mem_settings = kgd_program_sh_mem_settings,
+	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+	.init_interrupts = kgd_init_interrupts,
+	.hqd_load = kgd_hqd_load,
+	.hqd_sdma_load = kgd_hqd_sdma_load,
+	.hqd_dump = kgd_hqd_dump,
+	.hqd_sdma_dump = kgd_hqd_sdma_dump,
+	.hqd_is_occupied = kgd_hqd_is_occupied,
+	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+	.hqd_destroy = kgd_hqd_destroy,
+	.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+	.address_watch_disable = kgd_address_watch_disable,
+	.address_watch_execute = kgd_address_watch_execute,
+	.wave_control_execute = kgd_wave_control_execute,
+	.address_watch_get_offset = kgd_address_watch_get_offset,
+	.get_atc_vmid_pasid_mapping_pasid =
+			get_atc_vmid_pasid_mapping_pasid,
+	.get_atc_vmid_pasid_mapping_valid =
+			get_atc_vmid_pasid_mapping_valid,
+	.get_fw_version = get_fw_version,
+	.set_scratch_backing_va = set_scratch_backing_va,
+	.get_tile_config = amdgpu_amdkfd_get_tile_config,
+	.get_cu_info = get_cu_info,
+	.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
+	.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
+	.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
+	.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+	.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
+	.set_vm_context_page_table_base = set_vm_context_page_table_base,
+	.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
+	.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
+	.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
+	.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
+	.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
+	.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
+	.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
+	.invalidate_tlbs = invalidate_tlbs,
+	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
+	.submit_ib = amdgpu_amdkfd_submit_ib,
+};
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
+{
+	return (struct kfd2kgd_calls *)&kfd2kgd;
+}
+
+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
+{
+	return (struct amdgpu_device *)kgd;
+}
+
+static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+			uint32_t queue, uint32_t vmid)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	mutex_lock(&adev->srbm_mutex);
+	soc15_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct kgd_dev *kgd)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	soc15_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+				uint32_t queue_id)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	lock_srbm(kgd, mec, pipe, queue_id, 0);
+}
+
+static uint32_t get_queue_mask(struct amdgpu_device *adev,
+			       uint32_t pipe_id, uint32_t queue_id)
+{
+	unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
+			    queue_id) & 31;
+
+	return ((uint32_t)1) << bit;
+}
+
+static void release_queue(struct kgd_dev *kgd)
+{
+	unlock_srbm(kgd);
+}
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+					uint32_t sh_mem_config,
+					uint32_t sh_mem_ape1_base,
+					uint32_t sh_mem_ape1_limit,
+					uint32_t sh_mem_bases)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	lock_srbm(kgd, 0, 0, 0, vmid);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+	/* APE1 no longer exists on GFX9 */
+
+	unlock_srbm(kgd);
+}
+
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+					unsigned int vmid)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	/*
+	 * We have to assume that there is no outstanding mapping.
+	 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+	 * a mapping is in progress or because a mapping finished
+	 * and the SW cleared it.
+	 * So the protocol is to always wait & clear.
+	 */
+	uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+			ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+	/*
+	 * need to do this twice, once for gfx and once for mmhub
+	 * for ATC add 16 to VMID for mmhub, for IH different registers.
+	 * ATC_VMID0..15 registers are separate from ATC_VMID16..31.
+	 */
+
+	WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
+	       pasid_mapping);
+
+	while (!(RREG32(SOC15_REG_OFFSET(
+				ATHUB, 0,
+				mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+		 (1U << vmid)))
+		cpu_relax();
+
+	WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+				mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+	       1U << vmid);
+
+	/* Mapping vmid to pasid also for IH block */
+	WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
+	       pasid_mapping);
+
+	WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid,
+	       pasid_mapping);
+
+	while (!(RREG32(SOC15_REG_OFFSET(
+				ATHUB, 0,
+				mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+		 (1U << (vmid + 16))))
+		cpu_relax();
+
+	WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+				mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+	       1U << (vmid + 16));
+
+	/* Mapping vmid to pasid also for IH block */
+	WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid,
+	       pasid_mapping);
+	return 0;
+}
+
+/* TODO - RING0 form of field is obsolete, seems to date back to SI
+ * but still works
+ */
+
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint32_t mec;
+	uint32_t pipe;
+
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	lock_srbm(kgd, mec, pipe, 0, 0);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
+		CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+		CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+	unlock_srbm(kgd);
+
+	return 0;
+}
+
+static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
+				unsigned int engine_id,
+				unsigned int queue_id)
+{
+	uint32_t base[2] = {
+		SOC15_REG_OFFSET(SDMA0, 0,
+				 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
+		SOC15_REG_OFFSET(SDMA1, 0,
+				 mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL
+	};
+	uint32_t retval;
+
+	retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
+					       mmSDMA0_RLC0_RB_CNTL);
+
+	pr_debug("sdma base address: 0x%x\n", retval);
+
+	return retval;
+}
+
+static inline struct v9_mqd *get_mqd(void *mqd)
+{
+	return (struct v9_mqd *)mqd;
+}
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+	return (struct v9_sdma_mqd *)mqd;
+}
+
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+			uint32_t queue_id, uint32_t __user *wptr,
+			uint32_t wptr_shift, uint32_t wptr_mask,
+			struct mm_struct *mm)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct v9_mqd *m;
+	uint32_t *mqd_hqd;
+	uint32_t reg, hqd_base, data;
+
+	m = get_mqd(mqd);
+
+	acquire_queue(kgd, pipe_id, queue_id);
+
+	/* HIQ is set during driver init period with vmid set to 0*/
+	if (m->cp_hqd_vmid == 0) {
+		uint32_t value, mec, pipe;
+
+		mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+		pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+		pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+			mec, pipe, queue_id);
+		value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
+		value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+			((mec << 5) | (pipe << 3) | queue_id | 0x80));
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
+	}
+
+	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+	mqd_hqd = &m->cp_mqd_base_addr_lo;
+	hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+
+	for (reg = hqd_base;
+	     reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+		WREG32(reg, mqd_hqd[reg - hqd_base]);
+
+
+	/* Activate doorbell logic before triggering WPTR poll. */
+	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+
+	if (wptr) {
+		/* Don't read wptr with get_user because the user
+		 * context may not be accessible (if this function
+		 * runs in a work queue). Instead trigger a one-shot
+		 * polling read from memory in the CP. This assumes
+		 * that wptr is GPU-accessible in the queue's VMID via
+		 * ATC or SVM. WPTR==RPTR before starting the poll so
+		 * the CP starts fetching new commands from the right
+		 * place.
+		 *
+		 * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+		 * tricky. Assume that the queue didn't overflow. The
+		 * number of valid bits in the 32-bit RPTR depends on
+		 * the queue size. The remaining bits are taken from
+		 * the saved 64-bit WPTR. If the WPTR wrapped, add the
+		 * queue size.
+		 */
+		uint32_t queue_size =
+			2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+					   CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+		uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+		if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+			guessed_wptr += queue_size;
+		guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+		guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+		       lower_32_bits(guessed_wptr));
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+		       upper_32_bits(guessed_wptr));
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+		       lower_32_bits((uintptr_t)wptr));
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+		       upper_32_bits((uintptr_t)wptr));
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
+		       get_queue_mask(adev, pipe_id, queue_id));
+	}
+
+	/* Start the EOP fetcher */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+	       REG_SET_FIELD(m->cp_hqd_eop_rptr,
+			     CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+
+	release_queue(kgd);
+
+	return 0;
+}
+
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t (**dump)[2], uint32_t *n_regs)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do {				\
+		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
+			break;				\
+		(*dump)[i][0] = (addr) << 2;		\
+		(*dump)[i++][1] = RREG32(addr);		\
+	} while (0)
+
+	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	acquire_queue(kgd, pipe_id, queue_id);
+
+	for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+	     reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+		DUMP_REG(reg);
+
+	release_queue(kgd);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+			     uint32_t __user *wptr, struct mm_struct *mm)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct v9_sdma_mqd *m;
+	uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
+	unsigned long end_jiffies;
+	uint32_t data;
+	uint64_t data64;
+	uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+	m = get_sdma_mqd(mqd);
+	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+	sdmax_gfx_context_cntl = m->sdma_engine_id ?
+		SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
+		SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
+
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+	end_jiffies = msecs_to_jiffies(2000) + jiffies;
+	while (true) {
+		data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies))
+			return -ETIME;
+		usleep_range(500, 1000);
+	}
+	data = RREG32(sdmax_gfx_context_cntl);
+	data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
+			     RESUME_CTX, 0);
+	WREG32(sdmax_gfx_context_cntl, data);
+
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
+	       m->sdmax_rlcx_doorbell_offset);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+			     ENABLE, 1);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
+				m->sdmax_rlcx_rb_rptr_hi);
+
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+	if (read_user_wptr(mm, wptr64, data64)) {
+		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+		       lower_32_bits(data64));
+		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+		       upper_32_bits(data64));
+	} else {
+		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+		       m->sdmax_rlcx_rb_rptr);
+		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+		       m->sdmax_rlcx_rb_rptr_hi);
+	}
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+			m->sdmax_rlcx_rb_base_hi);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+			m->sdmax_rlcx_rb_rptr_addr_lo);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+			m->sdmax_rlcx_rb_rptr_addr_hi);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+			     RB_ENABLE, 1);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+
+	return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
+	uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+10)
+
+	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+		DUMP_REG(sdma_base_addr + reg);
+	for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
+		DUMP_REG(sdma_base_addr + reg);
+	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
+	     reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
+		DUMP_REG(sdma_base_addr + reg);
+	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
+	     reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
+		DUMP_REG(sdma_base_addr + reg);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+				uint32_t pipe_id, uint32_t queue_id)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint32_t act;
+	bool retval = false;
+	uint32_t low, high;
+
+	acquire_queue(kgd, pipe_id, queue_id);
+	act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+	if (act) {
+		low = lower_32_bits(queue_address >> 8);
+		high = upper_32_bits(queue_address >> 8);
+
+		if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
+		   high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
+			retval = true;
+	}
+	release_queue(kgd);
+	return retval;
+}
+
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct v9_sdma_mqd *m;
+	uint32_t sdma_base_addr;
+	uint32_t sdma_rlc_rb_cntl;
+
+	m = get_sdma_mqd(mqd);
+	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+
+	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+		return true;
+
+	return false;
+}
+
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+				enum kfd_preempt_type reset_type,
+				unsigned int utimeout, uint32_t pipe_id,
+				uint32_t queue_id)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	enum hqd_dequeue_request_type type;
+	unsigned long end_jiffies;
+	uint32_t temp;
+	struct v9_mqd *m = get_mqd(mqd);
+
+	acquire_queue(kgd, pipe_id, queue_id);
+
+	if (m->cp_hqd_vmid == 0)
+		WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+
+	switch (reset_type) {
+	case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+		type = DRAIN_PIPE;
+		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+		type = RESET_WAVES;
+		break;
+	default:
+		type = DRAIN_PIPE;
+		break;
+	}
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+
+	end_jiffies = (utimeout * HZ / 1000) + jiffies;
+	while (true) {
+		temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+		if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("cp queue preemption time out.\n");
+			release_queue(kgd);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	release_queue(kgd);
+	return 0;
+}
+
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+				unsigned int utimeout)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct v9_sdma_mqd *m;
+	uint32_t sdma_base_addr;
+	uint32_t temp;
+	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+	m = get_sdma_mqd(mqd);
+	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+
+	while (true) {
+		temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies))
+			return -ETIME;
+		usleep_range(500, 1000);
+	}
+
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+	m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+	m->sdmax_rlcx_rb_rptr_hi =
+		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
+
+	return 0;
+}
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
+							uint8_t vmid)
+{
+	uint32_t reg;
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+		     + vmid);
+	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
+}
+
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+								uint8_t vmid)
+{
+	uint32_t reg;
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+		     + vmid);
+	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+}
+
+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	uint32_t req = (1 << vmid) |
+		(0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */
+		VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK |
+		VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK |
+		VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK |
+		VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK |
+		VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK;
+
+	mutex_lock(&adev->srbm_mutex);
+
+	/* Use legacy mode tlb invalidation.
+	 *
+	 * Currently on Raven the code below is broken for anything but
+	 * legacy mode due to a MMHUB power gating problem. A workaround
+	 * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
+	 * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
+	 * bit.
+	 *
+	 * TODO 1: agree on the right set of invalidation registers for
+	 * KFD use. Use the last one for now. Invalidate both GC and
+	 * MMHUB.
+	 *
+	 * TODO 2: support range-based invalidation, requires kfg2kgd
+	 * interface change
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
+				0xffffffff);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
+				0x0000001f);
+
+	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
+				mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
+				0xffffffff);
+	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
+				mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
+				0x0000001f);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req);
+
+	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ),
+				req);
+
+	while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) &
+					(1 << vmid)))
+		cpu_relax();
+
+	while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0,
+					mmMMHUB_VM_INVALIDATE_ENG16_ACK)) &
+					(1 << vmid)))
+		cpu_relax();
+
+	mutex_unlock(&adev->srbm_mutex);
+
+}
+
+static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
+{
+	signed long r;
+	uint32_t seq;
+	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+	spin_lock(&adev->gfx.kiq.ring_lock);
+	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
+	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+	amdgpu_ring_write(ring,
+			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+			PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
+			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */
+	amdgpu_fence_emit_polling(ring, &seq);
+	amdgpu_ring_commit(ring);
+	spin_unlock(&adev->gfx.kiq.ring_lock);
+
+	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+	if (r < 1) {
+		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+		return -ETIME;
+	}
+
+	return 0;
+}
+
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	int vmid;
+	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+	if (ring->ready)
+		return invalidate_tlbs_with_kiq(adev, pasid);
+
+	for (vmid = 0; vmid < 16; vmid++) {
+		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
+			continue;
+		if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
+			if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
+				== pasid) {
+				write_vmid_invalidate_request(kgd, vmid);
+				break;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("non kfd vmid %d\n", vmid);
+		return 0;
+	}
+
+	write_vmid_invalidate_request(kgd, vmid);
+	return 0;
+}
+
+static int kgd_address_watch_disable(struct kgd_dev *kgd)
+{
+	return 0;
+}
+
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+					unsigned int watch_point_id,
+					uint32_t cntl_val,
+					uint32_t addr_hi,
+					uint32_t addr_lo)
+{
+	return 0;
+}
+
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+					uint32_t gfx_index_val,
+					uint32_t sq_cmd)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint32_t data = 0;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
+
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		INSTANCE_BROADCAST_WRITES, 1);
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		SH_BROADCAST_WRITES, 1);
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		SE_BROADCAST_WRITES, 1);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+					unsigned int watch_point_id,
+					unsigned int reg_offset)
+{
+	return 0;
+}
+
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+					uint64_t va, uint32_t vmid)
+{
+	/* No longer needed on GFXv9. The scratch base address is
+	 * passed to the shader by the CP. It's the user mode driver's
+	 * responsibility.
+	 */
+}
+
+/* FIXME: Does this need to be ASIC-specific code? */
+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	const union amdgpu_firmware_header *hdr;
+
+	switch (type) {
+	case KGD_ENGINE_PFP:
+		hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data;
+		break;
+
+	case KGD_ENGINE_ME:
+		hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data;
+		break;
+
+	case KGD_ENGINE_CE:
+		hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data;
+		break;
+
+	case KGD_ENGINE_MEC1:
+		hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data;
+		break;
+
+	case KGD_ENGINE_MEC2:
+		hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data;
+		break;
+
+	case KGD_ENGINE_RLC:
+		hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data;
+		break;
+
+	case KGD_ENGINE_SDMA1:
+		hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data;
+		break;
+
+	case KGD_ENGINE_SDMA2:
+		hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data;
+		break;
+
+	default:
+		return 0;
+	}
+
+	if (hdr == NULL)
+		return 0;
+
+	/* Only 12 bit in use*/
+	return hdr->common.ucode_version;
+}
+
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+		uint32_t page_table_base)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT |
+		AMDGPU_PTE_VALID;
+
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("trying to set page table base for wrong VMID %u\n",
+		       vmid);
+		return;
+	}
+
+	/* TODO: take advantage of per-process address space size. For
+	 * now, all processes share the same address space size, like
+	 * on GFX8 and older.
+	 */
+	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
+	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
+
+	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
+			lower_32_bits(adev->vm_manager.max_pfn - 1));
+	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
+			upper_32_bits(adev->vm_manager.max_pfn - 1));
+
+	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
+	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
+			lower_32_bits(adev->vm_manager.max_pfn - 1));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
+			upper_32_bits(adev->vm_manager.max_pfn - 1));
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
+}

+ 559 - 25
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

@@ -23,6 +23,8 @@
 #define pr_fmt(fmt) "kfd2kgd: " fmt
 #define pr_fmt(fmt) "kfd2kgd: " fmt
 
 
 #include <linux/list.h>
 #include <linux/list.h>
+#include <linux/pagemap.h>
+#include <linux/sched/mm.h>
 #include <drm/drmP.h>
 #include <drm/drmP.h>
 #include "amdgpu_object.h"
 #include "amdgpu_object.h"
 #include "amdgpu_vm.h"
 #include "amdgpu_vm.h"
@@ -33,10 +35,20 @@
  */
  */
 #define VI_BO_SIZE_ALIGN (0x8000)
 #define VI_BO_SIZE_ALIGN (0x8000)
 
 
+/* BO flag to indicate a KFD userptr BO */
+#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
+
+/* Userptr restore delay, just long enough to allow consecutive VM
+ * changes to accumulate
+ */
+#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
+
 /* Impose limit on how much memory KFD can use */
 /* Impose limit on how much memory KFD can use */
 static struct {
 static struct {
 	uint64_t max_system_mem_limit;
 	uint64_t max_system_mem_limit;
+	uint64_t max_userptr_mem_limit;
 	int64_t system_mem_used;
 	int64_t system_mem_used;
+	int64_t userptr_mem_used;
 	spinlock_t mem_limit_lock;
 	spinlock_t mem_limit_lock;
 } kfd_mem_limit;
 } kfd_mem_limit;
 
 
@@ -57,6 +69,7 @@ static const char * const domain_bit_to_string[] = {
 
 
 #define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
 #define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
 
 
+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
 
 
 
 
 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
@@ -78,6 +91,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
 
 
 /* Set memory usage limits. Current, limits are
 /* Set memory usage limits. Current, limits are
  *  System (kernel) memory - 3/8th System RAM
  *  System (kernel) memory - 3/8th System RAM
+ *  Userptr memory - 3/4th System RAM
  */
  */
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
 {
 {
@@ -90,8 +104,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
 
 
 	spin_lock_init(&kfd_mem_limit.mem_limit_lock);
 	spin_lock_init(&kfd_mem_limit.mem_limit_lock);
 	kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
 	kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
-	pr_debug("Kernel memory limit %lluM\n",
-		(kfd_mem_limit.max_system_mem_limit >> 20));
+	kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
+	pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
+		(kfd_mem_limit.max_system_mem_limit >> 20),
+		(kfd_mem_limit.max_userptr_mem_limit >> 20));
 }
 }
 
 
 static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
 static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
@@ -111,6 +127,16 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
 			goto err_no_mem;
 			goto err_no_mem;
 		}
 		}
 		kfd_mem_limit.system_mem_used += (acc_size + size);
 		kfd_mem_limit.system_mem_used += (acc_size + size);
+	} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+		if ((kfd_mem_limit.system_mem_used + acc_size >
+			kfd_mem_limit.max_system_mem_limit) ||
+			(kfd_mem_limit.userptr_mem_used + (size + acc_size) >
+			kfd_mem_limit.max_userptr_mem_limit)) {
+			ret = -ENOMEM;
+			goto err_no_mem;
+		}
+		kfd_mem_limit.system_mem_used += acc_size;
+		kfd_mem_limit.userptr_mem_used += size;
 	}
 	}
 err_no_mem:
 err_no_mem:
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
@@ -126,10 +152,16 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
 				       sizeof(struct amdgpu_bo));
 				       sizeof(struct amdgpu_bo));
 
 
 	spin_lock(&kfd_mem_limit.mem_limit_lock);
 	spin_lock(&kfd_mem_limit.mem_limit_lock);
-	if (domain == AMDGPU_GEM_DOMAIN_GTT)
+	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
 		kfd_mem_limit.system_mem_used -= (acc_size + size);
 		kfd_mem_limit.system_mem_used -= (acc_size + size);
+	} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+		kfd_mem_limit.system_mem_used -= acc_size;
+		kfd_mem_limit.userptr_mem_used -= size;
+	}
 	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
 	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
 		  "kfd system memory accounting unbalanced");
 		  "kfd system memory accounting unbalanced");
+	WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
+		  "kfd userptr memory accounting unbalanced");
 
 
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 }
 }
@@ -138,12 +170,17 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
 {
 {
 	spin_lock(&kfd_mem_limit.mem_limit_lock);
 	spin_lock(&kfd_mem_limit.mem_limit_lock);
 
 
-	if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
+	if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
+		kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
+		kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
+	} else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
 		kfd_mem_limit.system_mem_used -=
 		kfd_mem_limit.system_mem_used -=
 			(bo->tbo.acc_size + amdgpu_bo_size(bo));
 			(bo->tbo.acc_size + amdgpu_bo_size(bo));
 	}
 	}
 	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
 	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
 		  "kfd system memory accounting unbalanced");
 		  "kfd system memory accounting unbalanced");
+	WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
+		  "kfd userptr memory accounting unbalanced");
 
 
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 }
 }
@@ -506,7 +543,8 @@ static void remove_bo_from_vm(struct amdgpu_device *adev,
 }
 }
 
 
 static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
 static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
-				struct amdkfd_process_info *process_info)
+				struct amdkfd_process_info *process_info,
+				bool userptr)
 {
 {
 	struct ttm_validate_buffer *entry = &mem->validate_list;
 	struct ttm_validate_buffer *entry = &mem->validate_list;
 	struct amdgpu_bo *bo = mem->bo;
 	struct amdgpu_bo *bo = mem->bo;
@@ -515,10 +553,95 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
 	entry->shared = true;
 	entry->shared = true;
 	entry->bo = &bo->tbo;
 	entry->bo = &bo->tbo;
 	mutex_lock(&process_info->lock);
 	mutex_lock(&process_info->lock);
-	list_add_tail(&entry->head, &process_info->kfd_bo_list);
+	if (userptr)
+		list_add_tail(&entry->head, &process_info->userptr_valid_list);
+	else
+		list_add_tail(&entry->head, &process_info->kfd_bo_list);
 	mutex_unlock(&process_info->lock);
 	mutex_unlock(&process_info->lock);
 }
 }
 
 
+/* Initializes user pages. It registers the MMU notifier and validates
+ * the userptr BO in the GTT domain.
+ *
+ * The BO must already be on the userptr_valid_list. Otherwise an
+ * eviction and restore may happen that leaves the new BO unmapped
+ * with the user mode queues running.
+ *
+ * Takes the process_info->lock to protect against concurrent restore
+ * workers.
+ *
+ * Returns 0 for success, negative errno for errors.
+ */
+static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
+			   uint64_t user_addr)
+{
+	struct amdkfd_process_info *process_info = mem->process_info;
+	struct amdgpu_bo *bo = mem->bo;
+	struct ttm_operation_ctx ctx = { true, false };
+	int ret = 0;
+
+	mutex_lock(&process_info->lock);
+
+	ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0);
+	if (ret) {
+		pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
+		goto out;
+	}
+
+	ret = amdgpu_mn_register(bo, user_addr);
+	if (ret) {
+		pr_err("%s: Failed to register MMU notifier: %d\n",
+		       __func__, ret);
+		goto out;
+	}
+
+	/* If no restore worker is running concurrently, user_pages
+	 * should not be allocated
+	 */
+	WARN(mem->user_pages, "Leaking user_pages array");
+
+	mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
+					   sizeof(struct page *),
+					   GFP_KERNEL | __GFP_ZERO);
+	if (!mem->user_pages) {
+		pr_err("%s: Failed to allocate pages array\n", __func__);
+		ret = -ENOMEM;
+		goto unregister_out;
+	}
+
+	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
+	if (ret) {
+		pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
+		goto free_out;
+	}
+
+	amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
+
+	ret = amdgpu_bo_reserve(bo, true);
+	if (ret) {
+		pr_err("%s: Failed to reserve BO\n", __func__);
+		goto release_out;
+	}
+	amdgpu_ttm_placement_from_domain(bo, mem->domain);
+	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (ret)
+		pr_err("%s: failed to validate BO\n", __func__);
+	amdgpu_bo_unreserve(bo);
+
+release_out:
+	if (ret)
+		release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
+free_out:
+	kvfree(mem->user_pages);
+	mem->user_pages = NULL;
+unregister_out:
+	if (ret)
+		amdgpu_mn_unregister(bo);
+out:
+	mutex_unlock(&process_info->lock);
+	return ret;
+}
+
 /* Reserving a BO and its page table BOs must happen atomically to
 /* Reserving a BO and its page table BOs must happen atomically to
  * avoid deadlocks. Some operations update multiple VMs at once. Track
  * avoid deadlocks. Some operations update multiple VMs at once. Track
  * all the reservation info in a context structure. Optionally a sync
  * all the reservation info in a context structure. Optionally a sync
@@ -748,7 +871,8 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
 }
 }
 
 
 static int map_bo_to_gpuvm(struct amdgpu_device *adev,
 static int map_bo_to_gpuvm(struct amdgpu_device *adev,
-		struct kfd_bo_va_list *entry, struct amdgpu_sync *sync)
+		struct kfd_bo_va_list *entry, struct amdgpu_sync *sync,
+		bool no_update_pte)
 {
 {
 	int ret;
 	int ret;
 
 
@@ -762,6 +886,9 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev,
 		return ret;
 		return ret;
 	}
 	}
 
 
+	if (no_update_pte)
+		return 0;
+
 	ret = update_gpuvm_pte(adev, entry, sync);
 	ret = update_gpuvm_pte(adev, entry, sync);
 	if (ret) {
 	if (ret) {
 		pr_err("update_gpuvm_pte() failed\n");
 		pr_err("update_gpuvm_pte() failed\n");
@@ -820,6 +947,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 		mutex_init(&info->lock);
 		mutex_init(&info->lock);
 		INIT_LIST_HEAD(&info->vm_list_head);
 		INIT_LIST_HEAD(&info->vm_list_head);
 		INIT_LIST_HEAD(&info->kfd_bo_list);
 		INIT_LIST_HEAD(&info->kfd_bo_list);
+		INIT_LIST_HEAD(&info->userptr_valid_list);
+		INIT_LIST_HEAD(&info->userptr_inval_list);
 
 
 		info->eviction_fence =
 		info->eviction_fence =
 			amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
 			amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
@@ -830,6 +959,11 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 			goto create_evict_fence_fail;
 			goto create_evict_fence_fail;
 		}
 		}
 
 
+		info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
+		atomic_set(&info->evicted_bos, 0);
+		INIT_DELAYED_WORK(&info->restore_userptr_work,
+				  amdgpu_amdkfd_restore_userptr_worker);
+
 		*process_info = info;
 		*process_info = info;
 		*ef = dma_fence_get(&info->eviction_fence->base);
 		*ef = dma_fence_get(&info->eviction_fence->base);
 	}
 	}
@@ -872,6 +1006,7 @@ reserve_pd_fail:
 		dma_fence_put(*ef);
 		dma_fence_put(*ef);
 		*ef = NULL;
 		*ef = NULL;
 		*process_info = NULL;
 		*process_info = NULL;
+		put_pid(info->pid);
 create_evict_fence_fail:
 create_evict_fence_fail:
 		mutex_destroy(&info->lock);
 		mutex_destroy(&info->lock);
 		kfree(info);
 		kfree(info);
@@ -967,8 +1102,12 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
 	/* Release per-process resources when last compute VM is destroyed */
 	/* Release per-process resources when last compute VM is destroyed */
 	if (!process_info->n_vms) {
 	if (!process_info->n_vms) {
 		WARN_ON(!list_empty(&process_info->kfd_bo_list));
 		WARN_ON(!list_empty(&process_info->kfd_bo_list));
+		WARN_ON(!list_empty(&process_info->userptr_valid_list));
+		WARN_ON(!list_empty(&process_info->userptr_inval_list));
 
 
 		dma_fence_put(&process_info->eviction_fence->base);
 		dma_fence_put(&process_info->eviction_fence->base);
+		cancel_delayed_work_sync(&process_info->restore_userptr_work);
+		put_pid(process_info->pid);
 		mutex_destroy(&process_info->lock);
 		mutex_destroy(&process_info->lock);
 		kfree(process_info);
 		kfree(process_info);
 	}
 	}
@@ -1003,9 +1142,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 {
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
 	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+	uint64_t user_addr = 0;
 	struct amdgpu_bo *bo;
 	struct amdgpu_bo *bo;
+	struct amdgpu_bo_param bp;
 	int byte_align;
 	int byte_align;
-	u32 alloc_domain;
+	u32 domain, alloc_domain;
 	u64 alloc_flags;
 	u64 alloc_flags;
 	uint32_t mapping_flags;
 	uint32_t mapping_flags;
 	int ret;
 	int ret;
@@ -1014,14 +1155,21 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	 * Check on which domain to allocate BO
 	 * Check on which domain to allocate BO
 	 */
 	 */
 	if (flags & ALLOC_MEM_FLAGS_VRAM) {
 	if (flags & ALLOC_MEM_FLAGS_VRAM) {
-		alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
+		domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
 		alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
 		alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
 		alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
 		alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
 			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
 			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
 			AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
 			AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
 	} else if (flags & ALLOC_MEM_FLAGS_GTT) {
 	} else if (flags & ALLOC_MEM_FLAGS_GTT) {
-		alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+		domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+		alloc_flags = 0;
+	} else if (flags & ALLOC_MEM_FLAGS_USERPTR) {
+		domain = AMDGPU_GEM_DOMAIN_GTT;
+		alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
 		alloc_flags = 0;
 		alloc_flags = 0;
+		if (!offset || !*offset)
+			return -EINVAL;
+		user_addr = *offset;
 	} else {
 	} else {
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
@@ -1069,8 +1217,14 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
 	pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
 			va, size, domain_string(alloc_domain));
 			va, size, domain_string(alloc_domain));
 
 
-	ret = amdgpu_bo_create(adev, size, byte_align,
-				alloc_domain, alloc_flags, ttm_bo_type_device, NULL, &bo);
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = byte_align;
+	bp.domain = alloc_domain;
+	bp.flags = alloc_flags;
+	bp.type = ttm_bo_type_device;
+	bp.resv = NULL;
+	ret = amdgpu_bo_create(adev, &bp, &bo);
 	if (ret) {
 	if (ret) {
 		pr_debug("Failed to create BO on domain %s. ret %d\n",
 		pr_debug("Failed to create BO on domain %s. ret %d\n",
 				domain_string(alloc_domain), ret);
 				domain_string(alloc_domain), ret);
@@ -1078,18 +1232,34 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	}
 	}
 	bo->kfd_bo = *mem;
 	bo->kfd_bo = *mem;
 	(*mem)->bo = bo;
 	(*mem)->bo = bo;
+	if (user_addr)
+		bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
 
 
 	(*mem)->va = va;
 	(*mem)->va = va;
-	(*mem)->domain = alloc_domain;
+	(*mem)->domain = domain;
 	(*mem)->mapped_to_gpu_memory = 0;
 	(*mem)->mapped_to_gpu_memory = 0;
 	(*mem)->process_info = avm->process_info;
 	(*mem)->process_info = avm->process_info;
-	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info);
+	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
+
+	if (user_addr) {
+		ret = init_user_pages(*mem, current->mm, user_addr);
+		if (ret) {
+			mutex_lock(&avm->process_info->lock);
+			list_del(&(*mem)->validate_list.head);
+			mutex_unlock(&avm->process_info->lock);
+			goto allocate_init_user_pages_failed;
+		}
+	}
 
 
 	if (offset)
 	if (offset)
 		*offset = amdgpu_bo_mmap_offset(bo);
 		*offset = amdgpu_bo_mmap_offset(bo);
 
 
 	return 0;
 	return 0;
 
 
+allocate_init_user_pages_failed:
+	amdgpu_bo_unref(&bo);
+	/* Don't unreserve system mem limit twice */
+	goto err_reserve_system_mem;
 err_bo_create:
 err_bo_create:
 	unreserve_system_mem_limit(adev, size, alloc_domain);
 	unreserve_system_mem_limit(adev, size, alloc_domain);
 err_reserve_system_mem:
 err_reserve_system_mem:
@@ -1122,12 +1292,24 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	 * be freed anyway
 	 * be freed anyway
 	 */
 	 */
 
 
+	/* No more MMU notifiers */
+	amdgpu_mn_unregister(mem->bo);
+
 	/* Make sure restore workers don't access the BO any more */
 	/* Make sure restore workers don't access the BO any more */
 	bo_list_entry = &mem->validate_list;
 	bo_list_entry = &mem->validate_list;
 	mutex_lock(&process_info->lock);
 	mutex_lock(&process_info->lock);
 	list_del(&bo_list_entry->head);
 	list_del(&bo_list_entry->head);
 	mutex_unlock(&process_info->lock);
 	mutex_unlock(&process_info->lock);
 
 
+	/* Free user pages if necessary */
+	if (mem->user_pages) {
+		pr_debug("%s: Freeing user_pages array\n", __func__);
+		if (mem->user_pages[0])
+			release_pages(mem->user_pages,
+					mem->bo->tbo.ttm->num_pages);
+		kvfree(mem->user_pages);
+	}
+
 	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
 	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
 	if (unlikely(ret))
 	if (unlikely(ret))
 		return ret;
 		return ret;
@@ -1173,21 +1355,32 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	struct kfd_bo_va_list *bo_va_entry = NULL;
 	struct kfd_bo_va_list *bo_va_entry = NULL;
 	struct kfd_bo_va_list *bo_va_entry_aql = NULL;
 	struct kfd_bo_va_list *bo_va_entry_aql = NULL;
 	unsigned long bo_size;
 	unsigned long bo_size;
-
-	/* Make sure restore is not running concurrently.
-	 */
-	mutex_lock(&mem->process_info->lock);
-
-	mutex_lock(&mem->lock);
+	bool is_invalid_userptr = false;
 
 
 	bo = mem->bo;
 	bo = mem->bo;
-
 	if (!bo) {
 	if (!bo) {
 		pr_err("Invalid BO when mapping memory to GPU\n");
 		pr_err("Invalid BO when mapping memory to GPU\n");
-		ret = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 	}
 
 
+	/* Make sure restore is not running concurrently. Since we
+	 * don't map invalid userptr BOs, we rely on the next restore
+	 * worker to do the mapping
+	 */
+	mutex_lock(&mem->process_info->lock);
+
+	/* Lock mmap-sem. If we find an invalid userptr BO, we can be
+	 * sure that the MMU notifier is no longer running
+	 * concurrently and the queues are actually stopped
+	 */
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+		down_write(&current->mm->mmap_sem);
+		is_invalid_userptr = atomic_read(&mem->invalid);
+		up_write(&current->mm->mmap_sem);
+	}
+
+	mutex_lock(&mem->lock);
+
 	domain = mem->domain;
 	domain = mem->domain;
 	bo_size = bo->tbo.mem.size;
 	bo_size = bo->tbo.mem.size;
 
 
@@ -1200,6 +1393,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	if (unlikely(ret))
 	if (unlikely(ret))
 		goto out;
 		goto out;
 
 
+	/* Userptr can be marked as "not invalid", but not actually be
+	 * validated yet (still in the system domain). In that case
+	 * the queues are still stopped and we can leave mapping for
+	 * the next restore worker
+	 */
+	if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
+		is_invalid_userptr = true;
+
 	if (check_if_add_bo_to_vm(avm, mem)) {
 	if (check_if_add_bo_to_vm(avm, mem)) {
 		ret = add_bo_to_vm(adev, mem, avm, false,
 		ret = add_bo_to_vm(adev, mem, avm, false,
 				&bo_va_entry);
 				&bo_va_entry);
@@ -1217,7 +1418,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 			goto add_bo_to_vm_failed;
 			goto add_bo_to_vm_failed;
 	}
 	}
 
 
-	if (mem->mapped_to_gpu_memory == 0) {
+	if (mem->mapped_to_gpu_memory == 0 &&
+	    !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
 		/* Validate BO only once. The eviction fence gets added to BO
 		/* Validate BO only once. The eviction fence gets added to BO
 		 * the first time it is mapped. Validate will wait for all
 		 * the first time it is mapped. Validate will wait for all
 		 * background evictions to complete.
 		 * background evictions to complete.
@@ -1235,7 +1437,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 					entry->va, entry->va + bo_size,
 					entry->va, entry->va + bo_size,
 					entry);
 					entry);
 
 
-			ret = map_bo_to_gpuvm(adev, entry, ctx.sync);
+			ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
+					      is_invalid_userptr);
 			if (ret) {
 			if (ret) {
 				pr_err("Failed to map radeon bo to gpuvm\n");
 				pr_err("Failed to map radeon bo to gpuvm\n");
 				goto map_bo_to_gpuvm_failed;
 				goto map_bo_to_gpuvm_failed;
@@ -1418,6 +1621,337 @@ bo_reserve_failed:
 	return ret;
 	return ret;
 }
 }
 
 
+/* Evict a userptr BO by stopping the queues if necessary
+ *
+ * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
+ * cannot do any memory allocations, and cannot take any locks that
+ * are held elsewhere while allocating memory. Therefore this is as
+ * simple as possible, using atomic counters.
+ *
+ * It doesn't do anything to the BO itself. The real work happens in
+ * restore, where we get updated page addresses. This function only
+ * ensures that GPU access to the BO is stopped.
+ */
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
+				struct mm_struct *mm)
+{
+	struct amdkfd_process_info *process_info = mem->process_info;
+	int invalid, evicted_bos;
+	int r = 0;
+
+	invalid = atomic_inc_return(&mem->invalid);
+	evicted_bos = atomic_inc_return(&process_info->evicted_bos);
+	if (evicted_bos == 1) {
+		/* First eviction, stop the queues */
+		r = kgd2kfd->quiesce_mm(mm);
+		if (r)
+			pr_err("Failed to quiesce KFD\n");
+		schedule_delayed_work(&process_info->restore_userptr_work,
+			msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+	}
+
+	return r;
+}
+
+/* Update invalid userptr BOs
+ *
+ * Moves invalidated (evicted) userptr BOs from userptr_valid_list to
+ * userptr_inval_list and updates user pages for all BOs that have
+ * been invalidated since their last update.
+ */
+static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
+				     struct mm_struct *mm)
+{
+	struct kgd_mem *mem, *tmp_mem;
+	struct amdgpu_bo *bo;
+	struct ttm_operation_ctx ctx = { false, false };
+	int invalid, ret;
+
+	/* Move all invalidated BOs to the userptr_inval_list and
+	 * release their user pages by migration to the CPU domain
+	 */
+	list_for_each_entry_safe(mem, tmp_mem,
+				 &process_info->userptr_valid_list,
+				 validate_list.head) {
+		if (!atomic_read(&mem->invalid))
+			continue; /* BO is still valid */
+
+		bo = mem->bo;
+
+		if (amdgpu_bo_reserve(bo, true))
+			return -EAGAIN;
+		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+		ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+		amdgpu_bo_unreserve(bo);
+		if (ret) {
+			pr_err("%s: Failed to invalidate userptr BO\n",
+			       __func__);
+			return -EAGAIN;
+		}
+
+		list_move_tail(&mem->validate_list.head,
+			       &process_info->userptr_inval_list);
+	}
+
+	if (list_empty(&process_info->userptr_inval_list))
+		return 0; /* All evicted userptr BOs were freed */
+
+	/* Go through userptr_inval_list and update any invalid user_pages */
+	list_for_each_entry(mem, &process_info->userptr_inval_list,
+			    validate_list.head) {
+		invalid = atomic_read(&mem->invalid);
+		if (!invalid)
+			/* BO hasn't been invalidated since the last
+			 * revalidation attempt. Keep its BO list.
+			 */
+			continue;
+
+		bo = mem->bo;
+
+		if (!mem->user_pages) {
+			mem->user_pages =
+				kvmalloc_array(bo->tbo.ttm->num_pages,
+						 sizeof(struct page *),
+						 GFP_KERNEL | __GFP_ZERO);
+			if (!mem->user_pages) {
+				pr_err("%s: Failed to allocate pages array\n",
+				       __func__);
+				return -ENOMEM;
+			}
+		} else if (mem->user_pages[0]) {
+			release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
+		}
+
+		/* Get updated user pages */
+		ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
+						   mem->user_pages);
+		if (ret) {
+			mem->user_pages[0] = NULL;
+			pr_info("%s: Failed to get user pages: %d\n",
+				__func__, ret);
+			/* Pretend it succeeded. It will fail later
+			 * with a VM fault if the GPU tries to access
+			 * it. Better than hanging indefinitely with
+			 * stalled user mode queues.
+			 */
+		}
+
+		/* Mark the BO as valid unless it was invalidated
+		 * again concurrently
+		 */
+		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
+			return -EAGAIN;
+	}
+
+	return 0;
+}
+
+/* Validate invalid userptr BOs
+ *
+ * Validates BOs on the userptr_inval_list, and moves them back to the
+ * userptr_valid_list. Also updates GPUVM page tables with new page
+ * addresses and waits for the page table updates to complete.
+ */
+static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
+{
+	struct amdgpu_bo_list_entry *pd_bo_list_entries;
+	struct list_head resv_list, duplicates;
+	struct ww_acquire_ctx ticket;
+	struct amdgpu_sync sync;
+
+	struct amdgpu_vm *peer_vm;
+	struct kgd_mem *mem, *tmp_mem;
+	struct amdgpu_bo *bo;
+	struct ttm_operation_ctx ctx = { false, false };
+	int i, ret;
+
+	pd_bo_list_entries = kcalloc(process_info->n_vms,
+				     sizeof(struct amdgpu_bo_list_entry),
+				     GFP_KERNEL);
+	if (!pd_bo_list_entries) {
+		pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&resv_list);
+	INIT_LIST_HEAD(&duplicates);
+
+	/* Get all the page directory BOs that need to be reserved */
+	i = 0;
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node)
+		amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
+				    &pd_bo_list_entries[i++]);
+	/* Add the userptr_inval_list entries to resv_list */
+	list_for_each_entry(mem, &process_info->userptr_inval_list,
+			    validate_list.head) {
+		list_add_tail(&mem->resv_list.head, &resv_list);
+		mem->resv_list.bo = mem->validate_list.bo;
+		mem->resv_list.shared = mem->validate_list.shared;
+	}
+
+	/* Reserve all BOs and page tables for validation */
+	ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
+	WARN(!list_empty(&duplicates), "Duplicates should be empty");
+	if (ret)
+		goto out;
+
+	amdgpu_sync_create(&sync);
+
+	/* Avoid triggering eviction fences when unmapping invalid
+	 * userptr BOs (waits for all fences, doesn't use
+	 * FENCE_OWNER_VM)
+	 */
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node)
+		amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo,
+						process_info->eviction_fence,
+						NULL, NULL);
+
+	ret = process_validate_vms(process_info);
+	if (ret)
+		goto unreserve_out;
+
+	/* Validate BOs and update GPUVM page tables */
+	list_for_each_entry_safe(mem, tmp_mem,
+				 &process_info->userptr_inval_list,
+				 validate_list.head) {
+		struct kfd_bo_va_list *bo_va_entry;
+
+		bo = mem->bo;
+
+		/* Copy pages array and validate the BO if we got user pages */
+		if (mem->user_pages[0]) {
+			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
+						     mem->user_pages);
+			amdgpu_ttm_placement_from_domain(bo, mem->domain);
+			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+			if (ret) {
+				pr_err("%s: failed to validate BO\n", __func__);
+				goto unreserve_out;
+			}
+		}
+
+		/* Validate succeeded, now the BO owns the pages, free
+		 * our copy of the pointer array. Put this BO back on
+		 * the userptr_valid_list. If we need to revalidate
+		 * it, we need to start from scratch.
+		 */
+		kvfree(mem->user_pages);
+		mem->user_pages = NULL;
+		list_move_tail(&mem->validate_list.head,
+			       &process_info->userptr_valid_list);
+
+		/* Update mapping. If the BO was not validated
+		 * (because we couldn't get user pages), this will
+		 * clear the page table entries, which will result in
+		 * VM faults if the GPU tries to access the invalid
+		 * memory.
+		 */
+		list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) {
+			if (!bo_va_entry->is_mapped)
+				continue;
+
+			ret = update_gpuvm_pte((struct amdgpu_device *)
+					       bo_va_entry->kgd_dev,
+					       bo_va_entry, &sync);
+			if (ret) {
+				pr_err("%s: update PTE failed\n", __func__);
+				/* make sure this gets validated again */
+				atomic_inc(&mem->invalid);
+				goto unreserve_out;
+			}
+		}
+	}
+
+	/* Update page directories */
+	ret = process_update_pds(process_info, &sync);
+
+unreserve_out:
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node)
+		amdgpu_bo_fence(peer_vm->root.base.bo,
+				&process_info->eviction_fence->base, true);
+	ttm_eu_backoff_reservation(&ticket, &resv_list);
+	amdgpu_sync_wait(&sync, false);
+	amdgpu_sync_free(&sync);
+out:
+	kfree(pd_bo_list_entries);
+
+	return ret;
+}
+
+/* Worker callback to restore evicted userptr BOs
+ *
+ * Tries to update and validate all userptr BOs. If successful and no
+ * concurrent evictions happened, the queues are restarted. Otherwise,
+ * reschedule for another attempt later.
+ */
+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct amdkfd_process_info *process_info =
+		container_of(dwork, struct amdkfd_process_info,
+			     restore_userptr_work);
+	struct task_struct *usertask;
+	struct mm_struct *mm;
+	int evicted_bos;
+
+	evicted_bos = atomic_read(&process_info->evicted_bos);
+	if (!evicted_bos)
+		return;
+
+	/* Reference task and mm in case of concurrent process termination */
+	usertask = get_pid_task(process_info->pid, PIDTYPE_PID);
+	if (!usertask)
+		return;
+	mm = get_task_mm(usertask);
+	if (!mm) {
+		put_task_struct(usertask);
+		return;
+	}
+
+	mutex_lock(&process_info->lock);
+
+	if (update_invalid_user_pages(process_info, mm))
+		goto unlock_out;
+	/* userptr_inval_list can be empty if all evicted userptr BOs
+	 * have been freed. In that case there is nothing to validate
+	 * and we can just restart the queues.
+	 */
+	if (!list_empty(&process_info->userptr_inval_list)) {
+		if (atomic_read(&process_info->evicted_bos) != evicted_bos)
+			goto unlock_out; /* Concurrent eviction, try again */
+
+		if (validate_invalid_user_pages(process_info))
+			goto unlock_out;
+	}
+	/* Final check for concurrent evicton and atomic update. If
+	 * another eviction happens after successful update, it will
+	 * be a first eviction that calls quiesce_mm. The eviction
+	 * reference counting inside KFD will handle this case.
+	 */
+	if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
+	    evicted_bos)
+		goto unlock_out;
+	evicted_bos = 0;
+	if (kgd2kfd->resume_mm(mm)) {
+		pr_err("%s: Failed to resume KFD\n", __func__);
+		/* No recovery from this failure. Probably the CP is
+		 * hanging. No point trying again.
+		 */
+	}
+unlock_out:
+	mutex_unlock(&process_info->lock);
+	mmput(mm);
+	put_task_struct(usertask);
+
+	/* If validation failed, reschedule another attempt */
+	if (evicted_bos)
+		schedule_delayed_work(&process_info->restore_userptr_work,
+			msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+}
+
 /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
 /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
  *   KFD process identified by process_info
  *   KFD process identified by process_info
  *
  *

+ 44 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c

@@ -322,3 +322,47 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
 
 
 	return ret;
 	return ret;
 }
 }
+
+union gfx_info {
+	struct  atom_gfx_info_v2_4 v24;
+};
+
+int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	int index;
+	uint8_t frev, crev;
+	uint16_t data_offset;
+
+	index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+					    gfx_info);
+	if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+				   &frev, &crev, &data_offset)) {
+		union gfx_info *gfx_info = (union gfx_info *)
+			(mode_info->atom_context->bios + data_offset);
+		switch (crev) {
+		case 4:
+			adev->gfx.config.max_shader_engines = gfx_info->v24.gc_num_se;
+			adev->gfx.config.max_cu_per_sh = gfx_info->v24.gc_num_cu_per_sh;
+			adev->gfx.config.max_sh_per_se = gfx_info->v24.gc_num_sh_per_se;
+			adev->gfx.config.max_backends_per_se = gfx_info->v24.gc_num_rb_per_se;
+			adev->gfx.config.max_texture_channel_caches = gfx_info->v24.gc_num_tccs;
+			adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs);
+			adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds;
+			adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth;
+			adev->gfx.config.gs_prim_buffer_depth =
+				le16_to_cpu(gfx_info->v24.gc_gsprim_buff_depth);
+			adev->gfx.config.double_offchip_lds_buf =
+				gfx_info->v24.gc_double_offchip_lds_buffer;
+			adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v24.gc_wave_size);
+			adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v24.gc_max_waves_per_simd);
+			adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v24.gc_max_scratch_slots_per_cu;
+			adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v24.gc_lds_size);
+			return 0;
+		default:
+			return -EINVAL;
+		}
+
+	}
+	return -EINVAL;
+}

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h

@@ -30,5 +30,6 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev);
 
 
 #endif
 #endif

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c

@@ -550,7 +550,7 @@ static int amdgpu_atpx_init(void)
  * look up whether we are the integrated or discrete GPU (all asics).
  * look up whether we are the integrated or discrete GPU (all asics).
  * Returns the client id.
  * Returns the client id.
  */
  */
-static int amdgpu_atpx_get_client_id(struct pci_dev *pdev)
+static enum vga_switcheroo_client_id amdgpu_atpx_get_client_id(struct pci_dev *pdev)
 {
 {
 	if (amdgpu_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev))
 	if (amdgpu_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev))
 		return VGA_SWITCHEROO_IGD;
 		return VGA_SWITCHEROO_IGD;

+ 11 - 4
drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c

@@ -75,13 +75,20 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
 {
 {
 	struct amdgpu_bo *dobj = NULL;
 	struct amdgpu_bo *dobj = NULL;
 	struct amdgpu_bo *sobj = NULL;
 	struct amdgpu_bo *sobj = NULL;
+	struct amdgpu_bo_param bp;
 	uint64_t saddr, daddr;
 	uint64_t saddr, daddr;
 	int r, n;
 	int r, n;
 	int time;
 	int time;
 
 
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = PAGE_SIZE;
+	bp.domain = sdomain;
+	bp.flags = 0;
+	bp.type = ttm_bo_type_kernel;
+	bp.resv = NULL;
 	n = AMDGPU_BENCHMARK_ITERATIONS;
 	n = AMDGPU_BENCHMARK_ITERATIONS;
-	r = amdgpu_bo_create(adev, size, PAGE_SIZE,sdomain, 0,
-			     ttm_bo_type_kernel, NULL, &sobj);
+	r = amdgpu_bo_create(adev, &bp, &sobj);
 	if (r) {
 	if (r) {
 		goto out_cleanup;
 		goto out_cleanup;
 	}
 	}
@@ -93,8 +100,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
 	if (r) {
 	if (r) {
 		goto out_cleanup;
 		goto out_cleanup;
 	}
 	}
-	r = amdgpu_bo_create(adev, size, PAGE_SIZE, ddomain, 0,
-			     ttm_bo_type_kernel, NULL, &dobj);
+	bp.domain = ddomain;
+	r = amdgpu_bo_create(adev, &bp, &dobj);
 	if (r) {
 	if (r) {
 		goto out_cleanup;
 		goto out_cleanup;
 	}
 	}

+ 6 - 241
drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c

@@ -23,7 +23,6 @@
  */
  */
 #include <linux/list.h>
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
-#include <linux/pci.h>
 #include <drm/drmP.h>
 #include <drm/drmP.h>
 #include <linux/firmware.h>
 #include <linux/firmware.h>
 #include <drm/amdgpu_drm.h>
 #include <drm/amdgpu_drm.h>
@@ -109,121 +108,6 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
 	WARN(1, "Invalid indirect register space");
 	WARN(1, "Invalid indirect register space");
 }
 }
 
 
-static int amdgpu_cgs_get_pci_resource(struct cgs_device *cgs_device,
-				       enum cgs_resource_type resource_type,
-				       uint64_t size,
-				       uint64_t offset,
-				       uint64_t *resource_base)
-{
-	CGS_FUNC_ADEV;
-
-	if (resource_base == NULL)
-		return -EINVAL;
-
-	switch (resource_type) {
-	case CGS_RESOURCE_TYPE_MMIO:
-		if (adev->rmmio_size == 0)
-			return -ENOENT;
-		if ((offset + size) > adev->rmmio_size)
-			return -EINVAL;
-		*resource_base = adev->rmmio_base;
-		return 0;
-	case CGS_RESOURCE_TYPE_DOORBELL:
-		if (adev->doorbell.size == 0)
-			return -ENOENT;
-		if ((offset + size) > adev->doorbell.size)
-			return -EINVAL;
-		*resource_base = adev->doorbell.base;
-		return 0;
-	case CGS_RESOURCE_TYPE_FB:
-	case CGS_RESOURCE_TYPE_IO:
-	case CGS_RESOURCE_TYPE_ROM:
-	default:
-		return -EINVAL;
-	}
-}
-
-static const void *amdgpu_cgs_atom_get_data_table(struct cgs_device *cgs_device,
-						  unsigned table, uint16_t *size,
-						  uint8_t *frev, uint8_t *crev)
-{
-	CGS_FUNC_ADEV;
-	uint16_t data_start;
-
-	if (amdgpu_atom_parse_data_header(
-		    adev->mode_info.atom_context, table, size,
-		    frev, crev, &data_start))
-		return (uint8_t*)adev->mode_info.atom_context->bios +
-			data_start;
-
-	return NULL;
-}
-
-static int amdgpu_cgs_atom_get_cmd_table_revs(struct cgs_device *cgs_device, unsigned table,
-					      uint8_t *frev, uint8_t *crev)
-{
-	CGS_FUNC_ADEV;
-
-	if (amdgpu_atom_parse_cmd_header(
-		    adev->mode_info.atom_context, table,
-		    frev, crev))
-		return 0;
-
-	return -EINVAL;
-}
-
-static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigned table,
-					  void *args)
-{
-	CGS_FUNC_ADEV;
-
-	return amdgpu_atom_execute_table(
-		adev->mode_info.atom_context, table, args);
-}
-
-static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device,
-				  enum amd_ip_block_type block_type,
-				  enum amd_clockgating_state state)
-{
-	CGS_FUNC_ADEV;
-	int i, r = -1;
-
-	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_blocks[i].status.valid)
-			continue;
-
-		if (adev->ip_blocks[i].version->type == block_type) {
-			r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
-								(void *)adev,
-									state);
-			break;
-		}
-	}
-	return r;
-}
-
-static int amdgpu_cgs_set_powergating_state(struct cgs_device *cgs_device,
-				  enum amd_ip_block_type block_type,
-				  enum amd_powergating_state state)
-{
-	CGS_FUNC_ADEV;
-	int i, r = -1;
-
-	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_blocks[i].status.valid)
-			continue;
-
-		if (adev->ip_blocks[i].version->type == block_type) {
-			r = adev->ip_blocks[i].version->funcs->set_powergating_state(
-								(void *)adev,
-									state);
-			break;
-		}
-	}
-	return r;
-}
-
-
 static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
 static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
 {
 {
 	CGS_FUNC_ADEV;
 	CGS_FUNC_ADEV;
@@ -271,18 +155,6 @@ static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
 	return result;
 	return result;
 }
 }
 
 
-static int amdgpu_cgs_rel_firmware(struct cgs_device *cgs_device, enum cgs_ucode_id type)
-{
-	CGS_FUNC_ADEV;
-	if ((CGS_UCODE_ID_SMU == type) || (CGS_UCODE_ID_SMU_SK == type)) {
-		release_firmware(adev->pm.fw);
-		adev->pm.fw = NULL;
-		return 0;
-	}
-	/* cannot release other firmware because they are not created by cgs */
-	return -EINVAL;
-}
-
 static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device,
 static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device,
 					enum cgs_ucode_id type)
 					enum cgs_ucode_id type)
 {
 {
@@ -326,34 +198,6 @@ static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device,
 	return fw_version;
 	return fw_version;
 }
 }
 
 
-static int amdgpu_cgs_enter_safe_mode(struct cgs_device *cgs_device,
-					bool en)
-{
-	CGS_FUNC_ADEV;
-
-	if (adev->gfx.rlc.funcs->enter_safe_mode == NULL ||
-		adev->gfx.rlc.funcs->exit_safe_mode == NULL)
-		return 0;
-
-	if (en)
-		adev->gfx.rlc.funcs->enter_safe_mode(adev);
-	else
-		adev->gfx.rlc.funcs->exit_safe_mode(adev);
-
-	return 0;
-}
-
-static void amdgpu_cgs_lock_grbm_idx(struct cgs_device *cgs_device,
-					bool lock)
-{
-	CGS_FUNC_ADEV;
-
-	if (lock)
-		mutex_lock(&adev->grbm_idx_mutex);
-	else
-		mutex_unlock(&adev->grbm_idx_mutex);
-}
-
 static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 					enum cgs_ucode_id type,
 					enum cgs_ucode_id type,
 					struct cgs_firmware_info *info)
 					struct cgs_firmware_info *info)
@@ -541,6 +385,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 			case CHIP_POLARIS12:
 			case CHIP_POLARIS12:
 				strcpy(fw_name, "amdgpu/polaris12_smc.bin");
 				strcpy(fw_name, "amdgpu/polaris12_smc.bin");
 				break;
 				break;
+			case CHIP_VEGAM:
+				strcpy(fw_name, "amdgpu/vegam_smc.bin");
+				break;
 			case CHIP_VEGA10:
 			case CHIP_VEGA10:
 				if ((adev->pdev->device == 0x687f) &&
 				if ((adev->pdev->device == 0x687f) &&
 					((adev->pdev->revision == 0xc0) ||
 					((adev->pdev->revision == 0xc0) ||
@@ -553,6 +400,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 			case CHIP_VEGA12:
 			case CHIP_VEGA12:
 				strcpy(fw_name, "amdgpu/vega12_smc.bin");
 				strcpy(fw_name, "amdgpu/vega12_smc.bin");
 				break;
 				break;
+			case CHIP_VEGA20:
+				strcpy(fw_name, "amdgpu/vega20_smc.bin");
+				break;
 			default:
 			default:
 				DRM_ERROR("SMC firmware not supported\n");
 				DRM_ERROR("SMC firmware not supported\n");
 				return -EINVAL;
 				return -EINVAL;
@@ -598,97 +448,12 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 	return 0;
 	return 0;
 }
 }
 
 
-static int amdgpu_cgs_is_virtualization_enabled(void *cgs_device)
-{
-	CGS_FUNC_ADEV;
-	return amdgpu_sriov_vf(adev);
-}
-
-static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
-					  struct cgs_display_info *info)
-{
-	CGS_FUNC_ADEV;
-	struct cgs_mode_info *mode_info;
-
-	if (info == NULL)
-		return -EINVAL;
-
-	mode_info = info->mode_info;
-	if (mode_info)
-		/* if the displays are off, vblank time is max */
-		mode_info->vblank_time_us = 0xffffffff;
-
-	if (!amdgpu_device_has_dc_support(adev)) {
-		struct amdgpu_crtc *amdgpu_crtc;
-		struct drm_device *ddev = adev->ddev;
-		struct drm_crtc *crtc;
-		uint32_t line_time_us, vblank_lines;
-
-		if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
-			list_for_each_entry(crtc,
-					&ddev->mode_config.crtc_list, head) {
-				amdgpu_crtc = to_amdgpu_crtc(crtc);
-				if (crtc->enabled) {
-					info->active_display_mask |= (1 << amdgpu_crtc->crtc_id);
-					info->display_count++;
-				}
-				if (mode_info != NULL &&
-					crtc->enabled && amdgpu_crtc->enabled &&
-					amdgpu_crtc->hw_mode.clock) {
-					line_time_us = (amdgpu_crtc->hw_mode.crtc_htotal * 1000) /
-								amdgpu_crtc->hw_mode.clock;
-					vblank_lines = amdgpu_crtc->hw_mode.crtc_vblank_end -
-								amdgpu_crtc->hw_mode.crtc_vdisplay +
-								(amdgpu_crtc->v_border * 2);
-					mode_info->vblank_time_us = vblank_lines * line_time_us;
-					mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
-					/* we have issues with mclk switching with refresh rates
-					 * over 120 hz on the non-DC code.
-					 */
-					if (mode_info->refresh_rate > 120)
-						mode_info->vblank_time_us = 0;
-					mode_info = NULL;
-				}
-			}
-		}
-	} else {
-		info->display_count = adev->pm.pm_display_cfg.num_display;
-		if (mode_info != NULL) {
-			mode_info->vblank_time_us = adev->pm.pm_display_cfg.min_vblank_time;
-			mode_info->refresh_rate = adev->pm.pm_display_cfg.vrefresh;
-		}
-	}
-	return 0;
-}
-
-
-static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool enabled)
-{
-	CGS_FUNC_ADEV;
-
-	adev->pm.dpm_enabled = enabled;
-
-	return 0;
-}
-
 static const struct cgs_ops amdgpu_cgs_ops = {
 static const struct cgs_ops amdgpu_cgs_ops = {
 	.read_register = amdgpu_cgs_read_register,
 	.read_register = amdgpu_cgs_read_register,
 	.write_register = amdgpu_cgs_write_register,
 	.write_register = amdgpu_cgs_write_register,
 	.read_ind_register = amdgpu_cgs_read_ind_register,
 	.read_ind_register = amdgpu_cgs_read_ind_register,
 	.write_ind_register = amdgpu_cgs_write_ind_register,
 	.write_ind_register = amdgpu_cgs_write_ind_register,
-	.get_pci_resource = amdgpu_cgs_get_pci_resource,
-	.atom_get_data_table = amdgpu_cgs_atom_get_data_table,
-	.atom_get_cmd_table_revs = amdgpu_cgs_atom_get_cmd_table_revs,
-	.atom_exec_cmd_table = amdgpu_cgs_atom_exec_cmd_table,
 	.get_firmware_info = amdgpu_cgs_get_firmware_info,
 	.get_firmware_info = amdgpu_cgs_get_firmware_info,
-	.rel_firmware = amdgpu_cgs_rel_firmware,
-	.set_powergating_state = amdgpu_cgs_set_powergating_state,
-	.set_clockgating_state = amdgpu_cgs_set_clockgating_state,
-	.get_active_displays_info = amdgpu_cgs_get_active_displays_info,
-	.notify_dpm_enabled = amdgpu_cgs_notify_dpm_enabled,
-	.is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled,
-	.enter_safe_mode = amdgpu_cgs_enter_safe_mode,
-	.lock_grbm_idx = amdgpu_cgs_lock_grbm_idx,
 };
 };
 
 
 struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev)
 struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev)

+ 4 - 4
drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c

@@ -691,7 +691,7 @@ static int amdgpu_connector_lvds_get_modes(struct drm_connector *connector)
 	return ret;
 	return ret;
 }
 }
 
 
-static int amdgpu_connector_lvds_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status amdgpu_connector_lvds_mode_valid(struct drm_connector *connector,
 					     struct drm_display_mode *mode)
 					     struct drm_display_mode *mode)
 {
 {
 	struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
 	struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
@@ -843,7 +843,7 @@ static int amdgpu_connector_vga_get_modes(struct drm_connector *connector)
 	return ret;
 	return ret;
 }
 }
 
 
-static int amdgpu_connector_vga_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status amdgpu_connector_vga_mode_valid(struct drm_connector *connector,
 					    struct drm_display_mode *mode)
 					    struct drm_display_mode *mode)
 {
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_device *dev = connector->dev;
@@ -1172,7 +1172,7 @@ static void amdgpu_connector_dvi_force(struct drm_connector *connector)
 		amdgpu_connector->use_digital = true;
 		amdgpu_connector->use_digital = true;
 }
 }
 
 
-static int amdgpu_connector_dvi_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector,
 					    struct drm_display_mode *mode)
 					    struct drm_display_mode *mode)
 {
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_device *dev = connector->dev;
@@ -1448,7 +1448,7 @@ out:
 	return ret;
 	return ret;
 }
 }
 
 
-static int amdgpu_connector_dp_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector *connector,
 					   struct drm_display_mode *mode)
 					   struct drm_display_mode *mode)
 {
 {
 	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
 	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);

+ 6 - 12
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

@@ -382,8 +382,7 @@ retry:
 
 
 	p->bytes_moved += ctx.bytes_moved;
 	p->bytes_moved += ctx.bytes_moved;
 	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
 	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
-	    bo->tbo.mem.mem_type == TTM_PL_VRAM &&
-	    bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
+	    amdgpu_bo_in_cpu_visible_vram(bo))
 		p->bytes_moved_vis += ctx.bytes_moved;
 		p->bytes_moved_vis += ctx.bytes_moved;
 
 
 	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
 	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
@@ -411,7 +410,6 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
 		struct amdgpu_bo_list_entry *candidate = p->evictable;
 		struct amdgpu_bo_list_entry *candidate = p->evictable;
 		struct amdgpu_bo *bo = candidate->robj;
 		struct amdgpu_bo *bo = candidate->robj;
 		struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 		struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-		u64 initial_bytes_moved, bytes_moved;
 		bool update_bytes_moved_vis;
 		bool update_bytes_moved_vis;
 		uint32_t other;
 		uint32_t other;
 
 
@@ -435,18 +433,14 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
 			continue;
 			continue;
 
 
 		/* Good we can try to move this BO somewhere else */
 		/* Good we can try to move this BO somewhere else */
-		amdgpu_ttm_placement_from_domain(bo, other);
 		update_bytes_moved_vis =
 		update_bytes_moved_vis =
 			adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
 			adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
-			bo->tbo.mem.mem_type == TTM_PL_VRAM &&
-			bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT;
-		initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
+			amdgpu_bo_in_cpu_visible_vram(bo);
+		amdgpu_ttm_placement_from_domain(bo, other);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-		bytes_moved = atomic64_read(&adev->num_bytes_moved) -
-			initial_bytes_moved;
-		p->bytes_moved += bytes_moved;
+		p->bytes_moved += ctx.bytes_moved;
 		if (update_bytes_moved_vis)
 		if (update_bytes_moved_vis)
-			p->bytes_moved_vis += bytes_moved;
+			p->bytes_moved_vis += ctx.bytes_moved;
 
 
 		if (unlikely(r))
 		if (unlikely(r))
 			break;
 			break;
@@ -536,7 +530,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	if (p->bo_list) {
 	if (p->bo_list) {
 		amdgpu_bo_list_get_list(p->bo_list, &p->validated);
 		amdgpu_bo_list_get_list(p->bo_list, &p->validated);
 		if (p->bo_list->first_userptr != p->bo_list->num_entries)
 		if (p->bo_list->first_userptr != p->bo_list->num_entries)
-			p->mn = amdgpu_mn_get(p->adev);
+			p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
 	}
 	}
 
 
 	INIT_LIST_HEAD(&duplicates);
 	INIT_LIST_HEAD(&duplicates);

+ 72 - 9
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

@@ -91,7 +91,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			continue;
 			continue;
 
 
 		r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
 		r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
-					  rq, amdgpu_sched_jobs, &ctx->guilty);
+					  rq, &ctx->guilty);
 		if (r)
 		if (r)
 			goto failed;
 			goto failed;
 	}
 	}
@@ -111,8 +111,9 @@ failed:
 	return r;
 	return r;
 }
 }
 
 
-static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
+static void amdgpu_ctx_fini(struct kref *ref)
 {
 {
+	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
 	struct amdgpu_device *adev = ctx->adev;
 	struct amdgpu_device *adev = ctx->adev;
 	unsigned i, j;
 	unsigned i, j;
 
 
@@ -125,13 +126,11 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 	kfree(ctx->fences);
 	kfree(ctx->fences);
 	ctx->fences = NULL;
 	ctx->fences = NULL;
 
 
-	for (i = 0; i < adev->num_rings; i++)
-		drm_sched_entity_fini(&adev->rings[i]->sched,
-				      &ctx->rings[i].entity);
-
 	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
 	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
 
 
 	mutex_destroy(&ctx->lock);
 	mutex_destroy(&ctx->lock);
+
+	kfree(ctx);
 }
 }
 
 
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
@@ -170,12 +169,20 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 static void amdgpu_ctx_do_release(struct kref *ref)
 static void amdgpu_ctx_do_release(struct kref *ref)
 {
 {
 	struct amdgpu_ctx *ctx;
 	struct amdgpu_ctx *ctx;
+	u32 i;
 
 
 	ctx = container_of(ref, struct amdgpu_ctx, refcount);
 	ctx = container_of(ref, struct amdgpu_ctx, refcount);
 
 
-	amdgpu_ctx_fini(ctx);
+	for (i = 0; i < ctx->adev->num_rings; i++) {
 
 
-	kfree(ctx);
+		if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
+			continue;
+
+		drm_sched_entity_fini(&ctx->adev->rings[i]->sched,
+			&ctx->rings[i].entity);
+	}
+
+	amdgpu_ctx_fini(ref);
 }
 }
 
 
 static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
 static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
@@ -437,16 +444,72 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
 	idr_init(&mgr->ctx_handles);
 	idr_init(&mgr->ctx_handles);
 }
 }
 
 
+void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
+{
+	struct amdgpu_ctx *ctx;
+	struct idr *idp;
+	uint32_t id, i;
+
+	idp = &mgr->ctx_handles;
+
+	idr_for_each_entry(idp, ctx, id) {
+
+		if (!ctx->adev)
+			return;
+
+		for (i = 0; i < ctx->adev->num_rings; i++) {
+
+			if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
+				continue;
+
+			if (kref_read(&ctx->refcount) == 1)
+				drm_sched_entity_do_release(&ctx->adev->rings[i]->sched,
+						  &ctx->rings[i].entity);
+			else
+				DRM_ERROR("ctx %p is still alive\n", ctx);
+		}
+	}
+}
+
+void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr)
+{
+	struct amdgpu_ctx *ctx;
+	struct idr *idp;
+	uint32_t id, i;
+
+	idp = &mgr->ctx_handles;
+
+	idr_for_each_entry(idp, ctx, id) {
+
+		if (!ctx->adev)
+			return;
+
+		for (i = 0; i < ctx->adev->num_rings; i++) {
+
+			if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
+				continue;
+
+			if (kref_read(&ctx->refcount) == 1)
+				drm_sched_entity_cleanup(&ctx->adev->rings[i]->sched,
+					&ctx->rings[i].entity);
+			else
+				DRM_ERROR("ctx %p is still alive\n", ctx);
+		}
+	}
+}
+
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
 {
 {
 	struct amdgpu_ctx *ctx;
 	struct amdgpu_ctx *ctx;
 	struct idr *idp;
 	struct idr *idp;
 	uint32_t id;
 	uint32_t id;
 
 
+	amdgpu_ctx_mgr_entity_cleanup(mgr);
+
 	idp = &mgr->ctx_handles;
 	idp = &mgr->ctx_handles;
 
 
 	idr_for_each_entry(idp, ctx, id) {
 	idr_for_each_entry(idp, ctx, id) {
-		if (kref_put(&ctx->refcount, amdgpu_ctx_do_release) != 1)
+		if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
 			DRM_ERROR("ctx %p is still alive\n", ctx);
 			DRM_ERROR("ctx %p is still alive\n", ctx);
 	}
 	}
 
 

+ 190 - 5
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c

@@ -28,8 +28,13 @@
 #include <linux/debugfs.h>
 #include <linux/debugfs.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 
 
-/*
- * Debugfs
+/**
+ * amdgpu_debugfs_add_files - Add simple debugfs entries
+ *
+ * @adev:  Device to attach debugfs entries to
+ * @files:  Array of function callbacks that respond to reads
+ * @nfiles: Number of callbacks to register
+ *
  */
  */
 int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
 int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
 			     const struct drm_info_list *files,
 			     const struct drm_info_list *files,
@@ -64,7 +69,33 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
 
 
 #if defined(CONFIG_DEBUG_FS)
 #if defined(CONFIG_DEBUG_FS)
 
 
-
+/**
+ * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
+ *
+ * @read: True if reading
+ * @f: open file handle
+ * @buf: User buffer to write/read to
+ * @size: Number of bytes to write/read
+ * @pos:  Offset to seek to
+ *
+ * This debugfs entry has special meaning on the offset being sought.
+ * Various bits have different meanings:
+ *
+ * Bit 62:  Indicates a GRBM bank switch is needed
+ * Bit 61:  Indicates a SRBM bank switch is needed (implies bit 62 is
+ * 			zero)
+ * Bits 24..33: The SE or ME selector if needed
+ * Bits 34..43: The SH (or SA) or PIPE selector if needed
+ * Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed
+ *
+ * Bit 23:  Indicates that the PM power gating lock should be held
+ * 			This is necessary to read registers that might be
+ * 			unreliable during a power gating transistion.
+ *
+ * The lower bits are the BYTE offset of the register to read.  This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
 static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
 static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
 		char __user *buf, size_t size, loff_t *pos)
 		char __user *buf, size_t size, loff_t *pos)
 {
 {
@@ -164,19 +195,37 @@ end:
 	return result;
 	return result;
 }
 }
 
 
-
+/**
+ * amdgpu_debugfs_regs_read - Callback for reading MMIO registers
+ */
 static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
 static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
 					size_t size, loff_t *pos)
 {
 {
 	return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos);
 	return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos);
 }
 }
 
 
+/**
+ * amdgpu_debugfs_regs_write - Callback for writing MMIO registers
+ */
 static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
 static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
 					 size_t size, loff_t *pos)
 					 size_t size, loff_t *pos)
 {
 {
 	return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos);
 	return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos);
 }
 }
 
 
+
+/**
+ * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to read.  This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
 static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
 					size_t size, loff_t *pos)
 {
 {
@@ -204,6 +253,18 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 	return result;
 	return result;
 }
 }
 
 
+/**
+ * amdgpu_debugfs_regs_pcie_write - Write to a PCIE register
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos:  Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to write.  This
+ * allows writing multiple registers in a single call and having
+ * the returned size reflect that.
+ */
 static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf,
 static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf,
 					 size_t size, loff_t *pos)
 					 size_t size, loff_t *pos)
 {
 {
@@ -232,6 +293,18 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
 	return result;
 	return result;
 }
 }
 
 
+/**
+ * amdgpu_debugfs_regs_didt_read - Read from a DIDT register
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to read.  This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
 static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
 					size_t size, loff_t *pos)
 {
 {
@@ -259,6 +332,18 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 	return result;
 	return result;
 }
 }
 
 
+/**
+ * amdgpu_debugfs_regs_didt_write - Write to a DIDT register
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos:  Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to write.  This
+ * allows writing multiple registers in a single call and having
+ * the returned size reflect that.
+ */
 static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf,
 static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf,
 					 size_t size, loff_t *pos)
 					 size_t size, loff_t *pos)
 {
 {
@@ -287,6 +372,18 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
 	return result;
 	return result;
 }
 }
 
 
+/**
+ * amdgpu_debugfs_regs_smc_read - Read from a SMC register
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to read.  This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
 static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
 					size_t size, loff_t *pos)
 {
 {
@@ -314,6 +411,18 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 	return result;
 	return result;
 }
 }
 
 
+/**
+ * amdgpu_debugfs_regs_smc_write - Write to a SMC register
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos:  Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to write.  This
+ * allows writing multiple registers in a single call and having
+ * the returned size reflect that.
+ */
 static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf,
 static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf,
 					 size_t size, loff_t *pos)
 					 size_t size, loff_t *pos)
 {
 {
@@ -342,6 +451,20 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 	return result;
 	return result;
 }
 }
 
 
+/**
+ * amdgpu_debugfs_gca_config_read - Read from gfx config data
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * This file is used to access configuration data in a somewhat
+ * stable fashion.  The format is a series of DWORDs with the first
+ * indicating which revision it is.  New content is appended to the
+ * end so that older software can still read the data.
+ */
+
 static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
 static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
 					size_t size, loff_t *pos)
 {
 {
@@ -418,6 +541,19 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
 	return result;
 	return result;
 }
 }
 
 
+/**
+ * amdgpu_debugfs_sensor_read - Read from the powerplay sensors
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * The offset is treated as the BYTE address of one of the sensors
+ * enumerated in amd/include/kgd_pp_interface.h under the
+ * 'amd_pp_sensors' enumeration.  For instance to read the UVD VCLK
+ * you would use the offset 3 * 4 = 12.
+ */
 static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
 static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
 					size_t size, loff_t *pos)
 {
 {
@@ -428,7 +564,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
 	if (size & 3 || *pos & 0x3)
 	if (size & 3 || *pos & 0x3)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	if (amdgpu_dpm == 0)
+	if (!adev->pm.dpm_enabled)
 		return -EINVAL;
 		return -EINVAL;
 
 
 	/* convert offset to sensor number */
 	/* convert offset to sensor number */
@@ -457,6 +593,27 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
 	return !r ? outsize : r;
 	return !r ? outsize : r;
 }
 }
 
 
+/** amdgpu_debugfs_wave_read - Read WAVE STATUS data
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * The offset being sought changes which wave that the status data
+ * will be returned for.  The bits are used as follows:
+ *
+ * Bits 0..6: 	Byte offset into data
+ * Bits 7..14:	SE selector
+ * Bits 15..22:	SH/SA selector
+ * Bits 23..30: CU/{WGP+SIMD} selector
+ * Bits 31..36: WAVE ID selector
+ * Bits 37..44: SIMD ID selector
+ *
+ * The returned data begins with one DWORD of version information
+ * Followed by WAVE STATUS registers relevant to the GFX IP version
+ * being used.  See gfx_v8_0_read_wave_data() for an example output.
+ */
 static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
 static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
 					size_t size, loff_t *pos)
 {
 {
@@ -507,6 +664,28 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
 	return result;
 	return result;
 }
 }
 
 
+/** amdgpu_debugfs_gpr_read - Read wave gprs
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * The offset being sought changes which wave that the status data
+ * will be returned for.  The bits are used as follows:
+ *
+ * Bits 0..11:	Byte offset into data
+ * Bits 12..19:	SE selector
+ * Bits 20..27:	SH/SA selector
+ * Bits 28..35: CU/{WGP+SIMD} selector
+ * Bits 36..43: WAVE ID selector
+ * Bits 37..44: SIMD ID selector
+ * Bits 52..59: Thread selector
+ * Bits 60..61: Bank selector (VGPR=0,SGPR=1)
+ *
+ * The return data comes from the SGPR or VGPR register bank for
+ * the selected operational unit.
+ */
 static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
 static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
 					size_t size, loff_t *pos)
 {
 {
@@ -637,6 +816,12 @@ static const char *debugfs_regs_names[] = {
 	"amdgpu_gpr",
 	"amdgpu_gpr",
 };
 };
 
 
+/**
+ * amdgpu_debugfs_regs_init -	Initialize debugfs entries that provide
+ * 								register access.
+ *
+ * @adev: The device to attach the debugfs entries to
+ */
 int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
 int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
 {
 {
 	struct drm_minor *minor = adev->ddev->primary;
 	struct drm_minor *minor = adev->ddev->primary;

+ 87 - 34
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

@@ -83,8 +83,10 @@ static const char *amdgpu_asic_name[] = {
 	"POLARIS10",
 	"POLARIS10",
 	"POLARIS11",
 	"POLARIS11",
 	"POLARIS12",
 	"POLARIS12",
+	"VEGAM",
 	"VEGA10",
 	"VEGA10",
 	"VEGA12",
 	"VEGA12",
+	"VEGA20",
 	"RAVEN",
 	"RAVEN",
 	"LAST",
 	"LAST",
 };
 };
@@ -690,6 +692,8 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev,
 {
 {
 	u64 size_af, size_bf;
 	u64 size_af, size_bf;
 
 
+	mc->gart_size += adev->pm.smu_prv_buffer_size;
+
 	size_af = adev->gmc.mc_mask - mc->vram_end;
 	size_af = adev->gmc.mc_mask - mc->vram_end;
 	size_bf = mc->vram_start;
 	size_bf = mc->vram_start;
 	if (size_bf > size_af) {
 	if (size_bf > size_af) {
@@ -907,6 +911,46 @@ static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
 	}
 	}
 }
 }
 
 
+static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
+{
+	struct sysinfo si;
+	bool is_os_64 = (sizeof(void *) == 8) ? true : false;
+	uint64_t total_memory;
+	uint64_t dram_size_seven_GB = 0x1B8000000;
+	uint64_t dram_size_three_GB = 0xB8000000;
+
+	if (amdgpu_smu_memory_pool_size == 0)
+		return;
+
+	if (!is_os_64) {
+		DRM_WARN("Not 64-bit OS, feature not supported\n");
+		goto def_value;
+	}
+	si_meminfo(&si);
+	total_memory = (uint64_t)si.totalram * si.mem_unit;
+
+	if ((amdgpu_smu_memory_pool_size == 1) ||
+		(amdgpu_smu_memory_pool_size == 2)) {
+		if (total_memory < dram_size_three_GB)
+			goto def_value1;
+	} else if ((amdgpu_smu_memory_pool_size == 4) ||
+		(amdgpu_smu_memory_pool_size == 8)) {
+		if (total_memory < dram_size_seven_GB)
+			goto def_value1;
+	} else {
+		DRM_WARN("Smu memory pool size not supported\n");
+		goto def_value;
+	}
+	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
+
+	return;
+
+def_value1:
+	DRM_WARN("No enough system memory\n");
+def_value:
+	adev->pm.smu_prv_buffer_size = 0;
+}
+
 /**
 /**
  * amdgpu_device_check_arguments - validate module params
  * amdgpu_device_check_arguments - validate module params
  *
  *
@@ -948,6 +992,8 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
 		amdgpu_vm_fragment_size = -1;
 		amdgpu_vm_fragment_size = -1;
 	}
 	}
 
 
+	amdgpu_device_check_smu_prv_buffer_size(adev);
+
 	amdgpu_device_check_vm_size(adev);
 	amdgpu_device_check_vm_size(adev);
 
 
 	amdgpu_device_check_block_size(adev);
 	amdgpu_device_check_block_size(adev);
@@ -1039,10 +1085,11 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
  * the hardware IP specified.
  * the hardware IP specified.
  * Returns the error code from the last instance.
  * Returns the error code from the last instance.
  */
  */
-int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
+int amdgpu_device_ip_set_clockgating_state(void *dev,
 					   enum amd_ip_block_type block_type,
 					   enum amd_ip_block_type block_type,
 					   enum amd_clockgating_state state)
 					   enum amd_clockgating_state state)
 {
 {
+	struct amdgpu_device *adev = dev;
 	int i, r = 0;
 	int i, r = 0;
 
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 	for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -1072,10 +1119,11 @@ int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
  * the hardware IP specified.
  * the hardware IP specified.
  * Returns the error code from the last instance.
  * Returns the error code from the last instance.
  */
  */
-int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
+int amdgpu_device_ip_set_powergating_state(void *dev,
 					   enum amd_ip_block_type block_type,
 					   enum amd_ip_block_type block_type,
 					   enum amd_powergating_state state)
 					   enum amd_powergating_state state)
 {
 {
+	struct amdgpu_device *adev = dev;
 	int i, r = 0;
 	int i, r = 0;
 
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 	for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -1320,9 +1368,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 	case CHIP_TOPAZ:
 	case CHIP_TOPAZ:
 	case CHIP_TONGA:
 	case CHIP_TONGA:
 	case CHIP_FIJI:
 	case CHIP_FIJI:
-	case CHIP_POLARIS11:
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 	case CHIP_CARRIZO:
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
 	case CHIP_STONEY:
 #ifdef CONFIG_DRM_AMDGPU_SI
 #ifdef CONFIG_DRM_AMDGPU_SI
@@ -1339,6 +1388,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 	case CHIP_KABINI:
 	case CHIP_KABINI:
 	case CHIP_MULLINS:
 	case CHIP_MULLINS:
 #endif
 #endif
+	case CHIP_VEGA20:
 	default:
 	default:
 		return 0;
 		return 0;
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
@@ -1428,9 +1478,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 	case CHIP_TOPAZ:
 	case CHIP_TOPAZ:
 	case CHIP_TONGA:
 	case CHIP_TONGA:
 	case CHIP_FIJI:
 	case CHIP_FIJI:
-	case CHIP_POLARIS11:
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 	case CHIP_CARRIZO:
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
 	case CHIP_STONEY:
 		if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
 		if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
@@ -1472,6 +1523,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 #endif
 #endif
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 	case CHIP_RAVEN:
 	case CHIP_RAVEN:
 		if (adev->asic_type == CHIP_RAVEN)
 		if (adev->asic_type == CHIP_RAVEN)
 			adev->family = AMDGPU_FAMILY_RV;
 			adev->family = AMDGPU_FAMILY_RV;
@@ -1499,6 +1551,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 			return -EAGAIN;
 			return -EAGAIN;
 	}
 	}
 
 
+	adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
+
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
 			DRM_ERROR("disabled ip block: %d <%s>\n",
 			DRM_ERROR("disabled ip block: %d <%s>\n",
@@ -1654,12 +1708,17 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
 	if (amdgpu_emu_mode == 1)
 	if (amdgpu_emu_mode == 1)
 		return 0;
 		return 0;
 
 
+	r = amdgpu_ib_ring_tests(adev);
+	if (r)
+		DRM_ERROR("ib ring test failed (%d).\n", r);
+
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.valid)
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
 			continue;
 		/* skip CG for VCE/UVD, it's handled specially */
 		/* skip CG for VCE/UVD, it's handled specially */
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
 			/* enable clockgating to save power */
 			/* enable clockgating to save power */
 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
@@ -1704,8 +1763,8 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
 		}
 		}
 	}
 	}
 
 
-	mod_delayed_work(system_wq, &adev->late_init_work,
-			msecs_to_jiffies(AMDGPU_RESUME_MS));
+	queue_delayed_work(system_wq, &adev->late_init_work,
+			   msecs_to_jiffies(AMDGPU_RESUME_MS));
 
 
 	amdgpu_device_fill_reset_magic(adev);
 	amdgpu_device_fill_reset_magic(adev);
 
 
@@ -1759,6 +1818,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 
 
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
 			adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
 			adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+			adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
 			adev->ip_blocks[i].version->funcs->set_clockgating_state) {
 			adev->ip_blocks[i].version->funcs->set_clockgating_state) {
 			/* ungate blocks before hw fini so that we can shutdown the blocks safely */
 			/* ungate blocks before hw fini so that we can shutdown the blocks safely */
 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
@@ -1850,6 +1910,12 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
 	if (amdgpu_sriov_vf(adev))
 	if (amdgpu_sriov_vf(adev))
 		amdgpu_virt_request_full_gpu(adev, false);
 		amdgpu_virt_request_full_gpu(adev, false);
 
 
+	/* ungate SMC block powergating */
+	if (adev->powerplay.pp_feature & PP_GFXOFF_MASK)
+		amdgpu_device_ip_set_powergating_state(adev,
+						       AMD_IP_BLOCK_TYPE_SMC,
+						       AMD_CG_STATE_UNGATE);
+
 	/* ungate SMC block first */
 	/* ungate SMC block first */
 	r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC,
 	r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC,
 						   AMD_CG_STATE_UNGATE);
 						   AMD_CG_STATE_UNGATE);
@@ -2086,16 +2152,15 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
 	case CHIP_MULLINS:
 	case CHIP_MULLINS:
 	case CHIP_CARRIZO:
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
 	case CHIP_STONEY:
-	case CHIP_POLARIS11:
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 	case CHIP_TONGA:
 	case CHIP_TONGA:
 	case CHIP_FIJI:
 	case CHIP_FIJI:
-#if defined(CONFIG_DRM_AMD_DC_PRE_VEGA)
-		return amdgpu_dc != 0;
-#endif
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
 	case CHIP_RAVEN:
 	case CHIP_RAVEN:
 #endif
 #endif
@@ -2375,10 +2440,6 @@ fence_driver_init:
 		goto failed;
 		goto failed;
 	}
 	}
 
 
-	r = amdgpu_ib_ring_tests(adev);
-	if (r)
-		DRM_ERROR("ib ring test failed (%d).\n", r);
-
 	if (amdgpu_sriov_vf(adev))
 	if (amdgpu_sriov_vf(adev))
 		amdgpu_virt_init_data_exchange(adev);
 		amdgpu_virt_init_data_exchange(adev);
 
 
@@ -2539,7 +2600,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
 	/* unpin the front buffers and cursors */
 	/* unpin the front buffers and cursors */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 		struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-		struct amdgpu_framebuffer *rfb = to_amdgpu_framebuffer(crtc->primary->fb);
+		struct drm_framebuffer *fb = crtc->primary->fb;
 		struct amdgpu_bo *robj;
 		struct amdgpu_bo *robj;
 
 
 		if (amdgpu_crtc->cursor_bo) {
 		if (amdgpu_crtc->cursor_bo) {
@@ -2551,10 +2612,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
 			}
 			}
 		}
 		}
 
 
-		if (rfb == NULL || rfb->obj == NULL) {
+		if (fb == NULL || fb->obj[0] == NULL) {
 			continue;
 			continue;
 		}
 		}
-		robj = gem_to_amdgpu_bo(rfb->obj);
+		robj = gem_to_amdgpu_bo(fb->obj[0]);
 		/* don't unpin kernel fb objects */
 		/* don't unpin kernel fb objects */
 		if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
 		if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
 			r = amdgpu_bo_reserve(robj, true);
 			r = amdgpu_bo_reserve(robj, true);
@@ -2640,11 +2701,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	}
 	}
 	amdgpu_fence_driver_resume(adev);
 	amdgpu_fence_driver_resume(adev);
 
 
-	if (resume) {
-		r = amdgpu_ib_ring_tests(adev);
-		if (r)
-			DRM_ERROR("ib ring test failed (%d).\n", r);
-	}
 
 
 	r = amdgpu_device_ip_late_init(adev);
 	r = amdgpu_device_ip_late_init(adev);
 	if (r)
 	if (r)
@@ -2736,6 +2792,9 @@ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
 	if (amdgpu_sriov_vf(adev))
 	if (amdgpu_sriov_vf(adev))
 		return true;
 		return true;
 
 
+	if (amdgpu_asic_need_full_reset(adev))
+		return true;
+
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.valid)
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
 			continue;
@@ -2792,6 +2851,9 @@ static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
 {
 {
 	int i;
 	int i;
 
 
+	if (amdgpu_asic_need_full_reset(adev))
+		return true;
+
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.valid)
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
 			continue;
@@ -3087,20 +3149,19 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
 
 
 	/* now we are okay to resume SMC/CP/SDMA */
 	/* now we are okay to resume SMC/CP/SDMA */
 	r = amdgpu_device_ip_reinit_late_sriov(adev);
 	r = amdgpu_device_ip_reinit_late_sriov(adev);
-	amdgpu_virt_release_full_gpu(adev, true);
 	if (r)
 	if (r)
 		goto error;
 		goto error;
 
 
 	amdgpu_irq_gpu_reset_resume_helper(adev);
 	amdgpu_irq_gpu_reset_resume_helper(adev);
 	r = amdgpu_ib_ring_tests(adev);
 	r = amdgpu_ib_ring_tests(adev);
 
 
+error:
+	amdgpu_virt_release_full_gpu(adev, true);
 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
 		atomic_inc(&adev->vram_lost_counter);
 		atomic_inc(&adev->vram_lost_counter);
 		r = amdgpu_device_handle_vram_lost(adev);
 		r = amdgpu_device_handle_vram_lost(adev);
 	}
 	}
 
 
-error:
-
 	return r;
 	return r;
 }
 }
 
 
@@ -3117,7 +3178,6 @@ error:
 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 			      struct amdgpu_job *job, bool force)
 			      struct amdgpu_job *job, bool force)
 {
 {
-	struct drm_atomic_state *state = NULL;
 	int i, r, resched;
 	int i, r, resched;
 
 
 	if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
 	if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
@@ -3140,10 +3200,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	/* block TTM */
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
 	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
 
 
-	/* store modesetting */
-	if (amdgpu_device_has_dc_support(adev))
-		state = drm_atomic_helper_suspend(adev->ddev);
-
 	/* block all schedulers and reset given job's ring */
 	/* block all schedulers and reset given job's ring */
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
 		struct amdgpu_ring *ring = adev->rings[i];
@@ -3183,10 +3239,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		kthread_unpark(ring->sched.thread);
 		kthread_unpark(ring->sched.thread);
 	}
 	}
 
 
-	if (amdgpu_device_has_dc_support(adev)) {
-		if (drm_atomic_helper_resume(adev->ddev, state))
-			dev_info(adev->dev, "drm resume failed:%d\n", r);
-	} else {
+	if (!amdgpu_device_has_dc_support(adev)) {
 		drm_helper_resume_force_mode(adev->ddev);
 		drm_helper_resume_force_mode(adev->ddev);
 	}
 	}
 
 

+ 9 - 31
drivers/gpu/drm/amd/amdgpu/amdgpu_display.c

@@ -35,6 +35,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/pm_runtime.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_fb_helper.h>
 
 
 static void amdgpu_display_flip_callback(struct dma_fence *f,
 static void amdgpu_display_flip_callback(struct dma_fence *f,
@@ -151,8 +152,6 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
 	struct drm_device *dev = crtc->dev;
 	struct drm_device *dev = crtc->dev;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-	struct amdgpu_framebuffer *old_amdgpu_fb;
-	struct amdgpu_framebuffer *new_amdgpu_fb;
 	struct drm_gem_object *obj;
 	struct drm_gem_object *obj;
 	struct amdgpu_flip_work *work;
 	struct amdgpu_flip_work *work;
 	struct amdgpu_bo *new_abo;
 	struct amdgpu_bo *new_abo;
@@ -174,15 +173,13 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
 	work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0;
 	work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0;
 
 
 	/* schedule unpin of the old buffer */
 	/* schedule unpin of the old buffer */
-	old_amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
-	obj = old_amdgpu_fb->obj;
+	obj = crtc->primary->fb->obj[0];
 
 
 	/* take a reference to the old object */
 	/* take a reference to the old object */
 	work->old_abo = gem_to_amdgpu_bo(obj);
 	work->old_abo = gem_to_amdgpu_bo(obj);
 	amdgpu_bo_ref(work->old_abo);
 	amdgpu_bo_ref(work->old_abo);
 
 
-	new_amdgpu_fb = to_amdgpu_framebuffer(fb);
-	obj = new_amdgpu_fb->obj;
+	obj = fb->obj[0];
 	new_abo = gem_to_amdgpu_bo(obj);
 	new_abo = gem_to_amdgpu_bo(obj);
 
 
 	/* pin the new buffer */
 	/* pin the new buffer */
@@ -192,7 +189,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
 		goto cleanup;
 		goto cleanup;
 	}
 	}
 
 
-	r = amdgpu_bo_pin(new_abo, amdgpu_display_framebuffer_domains(adev), &base);
+	r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev), &base);
 	if (unlikely(r != 0)) {
 	if (unlikely(r != 0)) {
 		DRM_ERROR("failed to pin new abo buffer before flip\n");
 		DRM_ERROR("failed to pin new abo buffer before flip\n");
 		goto unreserve;
 		goto unreserve;
@@ -482,31 +479,12 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
 	return true;
 	return true;
 }
 }
 
 
-static void amdgpu_display_user_framebuffer_destroy(struct drm_framebuffer *fb)
-{
-	struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
-
-	drm_gem_object_put_unlocked(amdgpu_fb->obj);
-	drm_framebuffer_cleanup(fb);
-	kfree(amdgpu_fb);
-}
-
-static int amdgpu_display_user_framebuffer_create_handle(
-			struct drm_framebuffer *fb,
-			struct drm_file *file_priv,
-			unsigned int *handle)
-{
-	struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
-
-	return drm_gem_handle_create(file_priv, amdgpu_fb->obj, handle);
-}
-
 static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
 static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
-	.destroy = amdgpu_display_user_framebuffer_destroy,
-	.create_handle = amdgpu_display_user_framebuffer_create_handle,
+	.destroy = drm_gem_fb_destroy,
+	.create_handle = drm_gem_fb_create_handle,
 };
 };
 
 
-uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev)
+uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev)
 {
 {
 	uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
 	uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
 
 
@@ -526,11 +504,11 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
 				    struct drm_gem_object *obj)
 				    struct drm_gem_object *obj)
 {
 {
 	int ret;
 	int ret;
-	rfb->obj = obj;
+	rfb->base.obj[0] = obj;
 	drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
 	drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
 	ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
 	ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
 	if (ret) {
 	if (ret) {
-		rfb->obj = NULL;
+		rfb->base.obj[0] = NULL;
 		return ret;
 		return ret;
 	}
 	}
 	return 0;
 	return 0;

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_display.h

@@ -23,7 +23,7 @@
 #ifndef __AMDGPU_DISPLAY_H__
 #ifndef __AMDGPU_DISPLAY_H__
 #define __AMDGPU_DISPLAY_H__
 #define __AMDGPU_DISPLAY_H__
 
 
-uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev);
+uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev);
 struct drm_framebuffer *
 struct drm_framebuffer *
 amdgpu_display_user_framebuffer_create(struct drm_device *dev,
 amdgpu_display_user_framebuffer_create(struct drm_device *dev,
 				       struct drm_file *file_priv,
 				       struct drm_file *file_priv,

+ 20 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c

@@ -115,6 +115,26 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev,
 	pr_cont("\n");
 	pr_cont("\n");
 }
 }
 
 
+void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev)
+{
+	struct drm_device *ddev = adev->ddev;
+	struct drm_crtc *crtc;
+	struct amdgpu_crtc *amdgpu_crtc;
+
+	adev->pm.dpm.new_active_crtcs = 0;
+	adev->pm.dpm.new_active_crtc_count = 0;
+	if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
+		list_for_each_entry(crtc,
+				    &ddev->mode_config.crtc_list, head) {
+			amdgpu_crtc = to_amdgpu_crtc(crtc);
+			if (amdgpu_crtc->enabled) {
+				adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id);
+				adev->pm.dpm.new_active_crtc_count++;
+			}
+		}
+	}
+}
+
 
 
 u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev)
 u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev)
 {
 {

+ 3 - 8
drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h

@@ -52,8 +52,6 @@ enum amdgpu_dpm_event_src {
 	AMDGPU_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4
 	AMDGPU_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4
 };
 };
 
 
-#define SCLK_DEEP_SLEEP_MASK 0x8
-
 struct amdgpu_ps {
 struct amdgpu_ps {
 	u32 caps; /* vbios flags */
 	u32 caps; /* vbios flags */
 	u32 class; /* vbios flags */
 	u32 class; /* vbios flags */
@@ -349,12 +347,6 @@ enum amdgpu_pcie_gen {
 		((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
 		((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
 			(adev)->powerplay.pp_handle, msg_id))
 			(adev)->powerplay.pp_handle, msg_id))
 
 
-#define amdgpu_dpm_notify_smu_memory_info(adev, virtual_addr_low, \
-			virtual_addr_hi, mc_addr_low, mc_addr_hi, size) \
-		((adev)->powerplay.pp_funcs->notify_smu_memory_info)( \
-			(adev)->powerplay.pp_handle, virtual_addr_low, \
-			virtual_addr_hi, mc_addr_low, mc_addr_hi, size)
-
 #define amdgpu_dpm_get_power_profile_mode(adev, buf) \
 #define amdgpu_dpm_get_power_profile_mode(adev, buf) \
 		((adev)->powerplay.pp_funcs->get_power_profile_mode(\
 		((adev)->powerplay.pp_funcs->get_power_profile_mode(\
 			(adev)->powerplay.pp_handle, buf))
 			(adev)->powerplay.pp_handle, buf))
@@ -445,6 +437,8 @@ struct amdgpu_pm {
 	uint32_t                pcie_gen_mask;
 	uint32_t                pcie_gen_mask;
 	uint32_t                pcie_mlw_mask;
 	uint32_t                pcie_mlw_mask;
 	struct amd_pp_display_configuration pm_display_cfg;/* set by dc */
 	struct amd_pp_display_configuration pm_display_cfg;/* set by dc */
+	uint32_t                smu_prv_buffer_size;
+	struct amdgpu_bo        *smu_prv_buffer;
 };
 };
 
 
 #define R600_SSTU_DFLT                               0
 #define R600_SSTU_DFLT                               0
@@ -482,6 +476,7 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev,
 				struct amdgpu_ps *rps);
 				struct amdgpu_ps *rps);
 u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev);
 u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev);
 u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev);
 u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev);
+void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev);
 bool amdgpu_is_uvd_state(u32 class, u32 class2);
 bool amdgpu_is_uvd_state(u32 class, u32 class2);
 void amdgpu_calculate_u_and_p(u32 i, u32 r_c, u32 p_b,
 void amdgpu_calculate_u_and_p(u32 i, u32 r_c, u32 p_b,
 			      u32 *p, u32 *u);
 			      u32 *p, u32 *u);

+ 19 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

@@ -75,9 +75,10 @@
  * - 3.23.0 - Add query for VRAM lost counter
  * - 3.23.0 - Add query for VRAM lost counter
  * - 3.24.0 - Add high priority compute support for gfx9
  * - 3.24.0 - Add high priority compute support for gfx9
  * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
  * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
+ * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
  */
  */
 #define KMS_DRIVER_MAJOR	3
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	25
+#define KMS_DRIVER_MINOR	26
 #define KMS_DRIVER_PATCHLEVEL	0
 #define KMS_DRIVER_PATCHLEVEL	0
 
 
 int amdgpu_vram_limit = 0;
 int amdgpu_vram_limit = 0;
@@ -121,7 +122,7 @@ uint amdgpu_pg_mask = 0xffffffff;
 uint amdgpu_sdma_phase_quantum = 32;
 uint amdgpu_sdma_phase_quantum = 32;
 char *amdgpu_disable_cu = NULL;
 char *amdgpu_disable_cu = NULL;
 char *amdgpu_virtual_display = NULL;
 char *amdgpu_virtual_display = NULL;
-uint amdgpu_pp_feature_mask = 0xffffbfff;
+uint amdgpu_pp_feature_mask = 0xffff3fff; /* gfxoff (bit 15) disabled by default */
 int amdgpu_ngg = 0;
 int amdgpu_ngg = 0;
 int amdgpu_prim_buf_per_se = 0;
 int amdgpu_prim_buf_per_se = 0;
 int amdgpu_pos_buf_per_se = 0;
 int amdgpu_pos_buf_per_se = 0;
@@ -132,6 +133,7 @@ int amdgpu_lbpw = -1;
 int amdgpu_compute_multipipe = -1;
 int amdgpu_compute_multipipe = -1;
 int amdgpu_gpu_recovery = -1; /* auto */
 int amdgpu_gpu_recovery = -1; /* auto */
 int amdgpu_emu_mode = 0;
 int amdgpu_emu_mode = 0;
+uint amdgpu_smu_memory_pool_size = 0;
 
 
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -316,6 +318,11 @@ MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)
 module_param_named(cik_support, amdgpu_cik_support, int, 0444);
 module_param_named(cik_support, amdgpu_cik_support, int, 0444);
 #endif
 #endif
 
 
+MODULE_PARM_DESC(smu_memory_pool_size,
+	"reserve gtt for smu debug usage, 0 = disable,"
+		"0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
+module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
 #ifdef  CONFIG_DRM_AMDGPU_SI
 	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
 	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@@ -534,6 +541,9 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	/* VEGAM */
+	{0x1002, 0x694C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
+	{0x1002, 0x694E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
 	/* Vega 10 */
 	/* Vega 10 */
 	{0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
 	{0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
 	{0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
 	{0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
@@ -550,6 +560,13 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x69A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
 	{0x1002, 0x69A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
 	{0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
 	{0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
 	{0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
 	{0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+	/* Vega 20 */
+	{0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+	{0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+	{0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+	{0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+	{0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+	{0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
 	/* Raven */
 	/* Raven */
 	{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
 	{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
 
 

+ 6 - 6
drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c

@@ -137,7 +137,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
 	/* need to align pitch with crtc limits */
 	/* need to align pitch with crtc limits */
 	mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
 	mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
 						  fb_tiled);
 						  fb_tiled);
-	domain = amdgpu_display_framebuffer_domains(adev);
+	domain = amdgpu_display_supported_domains(adev);
 
 
 	height = ALIGN(mode_cmd->height, 8);
 	height = ALIGN(mode_cmd->height, 8);
 	size = mode_cmd->pitches[0] * height;
 	size = mode_cmd->pitches[0] * height;
@@ -292,9 +292,9 @@ static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfb
 
 
 	drm_fb_helper_unregister_fbi(&rfbdev->helper);
 	drm_fb_helper_unregister_fbi(&rfbdev->helper);
 
 
-	if (rfb->obj) {
-		amdgpufb_destroy_pinned_object(rfb->obj);
-		rfb->obj = NULL;
+	if (rfb->base.obj[0]) {
+		amdgpufb_destroy_pinned_object(rfb->base.obj[0]);
+		rfb->base.obj[0] = NULL;
 		drm_framebuffer_unregister_private(&rfb->base);
 		drm_framebuffer_unregister_private(&rfb->base);
 		drm_framebuffer_cleanup(&rfb->base);
 		drm_framebuffer_cleanup(&rfb->base);
 	}
 	}
@@ -377,7 +377,7 @@ int amdgpu_fbdev_total_size(struct amdgpu_device *adev)
 	if (!adev->mode_info.rfbdev)
 	if (!adev->mode_info.rfbdev)
 		return 0;
 		return 0;
 
 
-	robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj);
+	robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]);
 	size += amdgpu_bo_size(robj);
 	size += amdgpu_bo_size(robj);
 	return size;
 	return size;
 }
 }
@@ -386,7 +386,7 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
 {
 {
 	if (!adev->mode_info.rfbdev)
 	if (!adev->mode_info.rfbdev)
 		return false;
 		return false;
-	if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj))
+	if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]))
 		return true;
 		return true;
 	return false;
 	return false;
 }
 }

+ 6 - 5
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c

@@ -131,7 +131,8 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
  * Emits a fence command on the requested ring (all asics).
  * Emits a fence command on the requested ring (all asics).
  * Returns 0 on success, -ENOMEM on failure.
  * Returns 0 on success, -ENOMEM on failure.
  */
  */
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f)
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
+		      unsigned flags)
 {
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_fence *fence;
 	struct amdgpu_fence *fence;
@@ -149,7 +150,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f)
 		       adev->fence_context + ring->idx,
 		       adev->fence_context + ring->idx,
 		       seq);
 		       seq);
 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
-			       seq, AMDGPU_FENCE_FLAG_INT);
+			       seq, flags | AMDGPU_FENCE_FLAG_INT);
 
 
 	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
 	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
 	/* This function can't be called concurrently anyway, otherwise
 	/* This function can't be called concurrently anyway, otherwise
@@ -375,14 +376,14 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_device *adev = ring->adev;
 	uint64_t index;
 	uint64_t index;
 
 
-	if (ring != &adev->uvd.ring) {
+	if (ring != &adev->uvd.inst[ring->me].ring) {
 		ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs];
 		ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs];
 		ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4);
 		ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4);
 	} else {
 	} else {
 		/* put fence directly behind firmware */
 		/* put fence directly behind firmware */
 		index = ALIGN(adev->uvd.fw->size, 8);
 		index = ALIGN(adev->uvd.fw->size, 8);
-		ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index;
-		ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index;
+		ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index;
+		ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index;
 	}
 	}
 	amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
 	amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
 	amdgpu_irq_get(adev, irq_src, irq_type);
 	amdgpu_irq_get(adev, irq_src, irq_type);

+ 11 - 6
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c

@@ -113,12 +113,17 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
 	int r;
 	int r;
 
 
 	if (adev->gart.robj == NULL) {
 	if (adev->gart.robj == NULL) {
-		r = amdgpu_bo_create(adev, adev->gart.table_size, PAGE_SIZE,
-				     AMDGPU_GEM_DOMAIN_VRAM,
-				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-				     ttm_bo_type_kernel, NULL,
-				     &adev->gart.robj);
+		struct amdgpu_bo_param bp;
+
+		memset(&bp, 0, sizeof(bp));
+		bp.size = adev->gart.table_size;
+		bp.byte_align = PAGE_SIZE;
+		bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+		bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+			AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+		bp.type = ttm_bo_type_kernel;
+		bp.resv = NULL;
+		r = amdgpu_bo_create(adev, &bp, &adev->gart.robj);
 		if (r) {
 		if (r) {
 			return r;
 			return r;
 		}
 		}

+ 37 - 13
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

@@ -48,17 +48,25 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 			     struct drm_gem_object **obj)
 			     struct drm_gem_object **obj)
 {
 {
 	struct amdgpu_bo *bo;
 	struct amdgpu_bo *bo;
+	struct amdgpu_bo_param bp;
 	int r;
 	int r;
 
 
+	memset(&bp, 0, sizeof(bp));
 	*obj = NULL;
 	*obj = NULL;
 	/* At least align on page size */
 	/* At least align on page size */
 	if (alignment < PAGE_SIZE) {
 	if (alignment < PAGE_SIZE) {
 		alignment = PAGE_SIZE;
 		alignment = PAGE_SIZE;
 	}
 	}
 
 
+	bp.size = size;
+	bp.byte_align = alignment;
+	bp.type = type;
+	bp.resv = resv;
+	bp.preferred_domain = initial_domain;
 retry:
 retry:
-	r = amdgpu_bo_create(adev, size, alignment, initial_domain,
-			     flags, type, resv, &bo);
+	bp.flags = flags;
+	bp.domain = initial_domain;
+	r = amdgpu_bo_create(adev, &bp, &bo);
 	if (r) {
 	if (r) {
 		if (r != -ERESTARTSYS) {
 		if (r != -ERESTARTSYS) {
 			if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
 			if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
@@ -221,12 +229,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 		return -EINVAL;
 
 
 	/* reject invalid gem domains */
 	/* reject invalid gem domains */
-	if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU |
-				 AMDGPU_GEM_DOMAIN_GTT |
-				 AMDGPU_GEM_DOMAIN_VRAM |
-				 AMDGPU_GEM_DOMAIN_GDS |
-				 AMDGPU_GEM_DOMAIN_GWS |
-				 AMDGPU_GEM_DOMAIN_OA))
+	if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK)
 		return -EINVAL;
 		return -EINVAL;
 
 
 	/* create a gem object to contain this object in */
 	/* create a gem object to contain this object in */
@@ -771,16 +774,23 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
 }
 }
 
 
 #if defined(CONFIG_DEBUG_FS)
 #if defined(CONFIG_DEBUG_FS)
+
+#define amdgpu_debugfs_gem_bo_print_flag(m, bo, flag)	\
+	if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) {	\
+		seq_printf((m), " " #flag);		\
+	}
+
 static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
 static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
 {
 {
 	struct drm_gem_object *gobj = ptr;
 	struct drm_gem_object *gobj = ptr;
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
 	struct seq_file *m = data;
 	struct seq_file *m = data;
 
 
+	struct dma_buf_attachment *attachment;
+	struct dma_buf *dma_buf;
 	unsigned domain;
 	unsigned domain;
 	const char *placement;
 	const char *placement;
 	unsigned pin_count;
 	unsigned pin_count;
-	uint64_t offset;
 
 
 	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
 	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
 	switch (domain) {
 	switch (domain) {
@@ -798,13 +808,27 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
 	seq_printf(m, "\t0x%08x: %12ld byte %s",
 	seq_printf(m, "\t0x%08x: %12ld byte %s",
 		   id, amdgpu_bo_size(bo), placement);
 		   id, amdgpu_bo_size(bo), placement);
 
 
-	offset = READ_ONCE(bo->tbo.mem.start);
-	if (offset != AMDGPU_BO_INVALID_OFFSET)
-		seq_printf(m, " @ 0x%010Lx", offset);
-
 	pin_count = READ_ONCE(bo->pin_count);
 	pin_count = READ_ONCE(bo->pin_count);
 	if (pin_count)
 	if (pin_count)
 		seq_printf(m, " pin count %d", pin_count);
 		seq_printf(m, " pin count %d", pin_count);
+
+	dma_buf = READ_ONCE(bo->gem_base.dma_buf);
+	attachment = READ_ONCE(bo->gem_base.import_attach);
+
+	if (attachment)
+		seq_printf(m, " imported from %p", dma_buf);
+	else if (dma_buf)
+		seq_printf(m, " exported as %p", dma_buf);
+
+	amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED);
+	amdgpu_debugfs_gem_bo_print_flag(m, bo, NO_CPU_ACCESS);
+	amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_GTT_USWC);
+	amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CLEARED);
+	amdgpu_debugfs_gem_bo_print_flag(m, bo, SHADOW);
+	amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
+	amdgpu_debugfs_gem_bo_print_flag(m, bo, VM_ALWAYS_VALID);
+	amdgpu_debugfs_gem_bo_print_flag(m, bo, EXPLICIT_SYNC);
+
 	seq_printf(m, "\n");
 	seq_printf(m, "\n");
 
 
 	return 0;
 	return 0;

+ 6 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

@@ -127,6 +127,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	struct amdgpu_vm *vm;
 	struct amdgpu_vm *vm;
 	uint64_t fence_ctx;
 	uint64_t fence_ctx;
 	uint32_t status = 0, alloc_size;
 	uint32_t status = 0, alloc_size;
+	unsigned fence_flags = 0;
 
 
 	unsigned i;
 	unsigned i;
 	int r = 0;
 	int r = 0;
@@ -227,7 +228,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 #endif
 #endif
 		amdgpu_asic_invalidate_hdp(adev, ring);
 		amdgpu_asic_invalidate_hdp(adev, ring);
 
 
-	r = amdgpu_fence_emit(ring, f);
+	if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
+		fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+
+	r = amdgpu_fence_emit(ring, f, fence_flags);
 	if (r) {
 	if (r) {
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
 		if (job && job->vmid)
 		if (job && job->vmid)
@@ -242,7 +246,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	/* wrap the last IB with fence */
 	/* wrap the last IB with fence */
 	if (job && job->uf_addr) {
 	if (job && job->uf_addr) {
 		amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
 		amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
-				       AMDGPU_FENCE_FLAG_64BIT);
+				       fence_flags | AMDGPU_FENCE_FLAG_64BIT);
 	}
 	}
 
 
 	if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
 	if (patch_offset != ~0 && ring->funcs->patch_cond_exec)

+ 58 - 12
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

@@ -31,6 +31,7 @@
 #include "amdgpu_sched.h"
 #include "amdgpu_sched.h"
 #include "amdgpu_uvd.h"
 #include "amdgpu_uvd.h"
 #include "amdgpu_vce.h"
 #include "amdgpu_vce.h"
+#include "atom.h"
 
 
 #include <linux/vga_switcheroo.h>
 #include <linux/vga_switcheroo.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
@@ -214,6 +215,18 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
 		fw_info->ver = adev->gfx.rlc_fw_version;
 		fw_info->ver = adev->gfx.rlc_fw_version;
 		fw_info->feature = adev->gfx.rlc_feature_version;
 		fw_info->feature = adev->gfx.rlc_feature_version;
 		break;
 		break;
+	case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL:
+		fw_info->ver = adev->gfx.rlc_srlc_fw_version;
+		fw_info->feature = adev->gfx.rlc_srlc_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM:
+		fw_info->ver = adev->gfx.rlc_srlg_fw_version;
+		fw_info->feature = adev->gfx.rlc_srlg_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM:
+		fw_info->ver = adev->gfx.rlc_srls_fw_version;
+		fw_info->feature = adev->gfx.rlc_srls_feature_version;
+		break;
 	case AMDGPU_INFO_FW_GFX_MEC:
 	case AMDGPU_INFO_FW_GFX_MEC:
 		if (query_fw->index == 0) {
 		if (query_fw->index == 0) {
 			fw_info->ver = adev->gfx.mec_fw_version;
 			fw_info->ver = adev->gfx.mec_fw_version;
@@ -273,12 +286,15 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 	struct drm_crtc *crtc;
 	struct drm_crtc *crtc;
 	uint32_t ui32 = 0;
 	uint32_t ui32 = 0;
 	uint64_t ui64 = 0;
 	uint64_t ui64 = 0;
-	int i, found;
+	int i, j, found;
 	int ui32_size = sizeof(ui32);
 	int ui32_size = sizeof(ui32);
 
 
 	if (!info->return_size || !info->return_pointer)
 	if (!info->return_size || !info->return_pointer)
 		return -EINVAL;
 		return -EINVAL;
 
 
+	/* Ensure IB tests are run on ring */
+	flush_delayed_work(&adev->late_init_work);
+
 	switch (info->query) {
 	switch (info->query) {
 	case AMDGPU_INFO_ACCEL_WORKING:
 	case AMDGPU_INFO_ACCEL_WORKING:
 		ui32 = adev->accel_working;
 		ui32 = adev->accel_working;
@@ -332,7 +348,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			break;
 			break;
 		case AMDGPU_HW_IP_UVD:
 		case AMDGPU_HW_IP_UVD:
 			type = AMD_IP_BLOCK_TYPE_UVD;
 			type = AMD_IP_BLOCK_TYPE_UVD;
-			ring_mask = adev->uvd.ring.ready ? 1 : 0;
+			for (i = 0; i < adev->uvd.num_uvd_inst; i++)
+				ring_mask |= ((adev->uvd.inst[i].ring.ready ? 1 : 0) << i);
 			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
 			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
 			ib_size_alignment = 16;
 			ib_size_alignment = 16;
 			break;
 			break;
@@ -345,8 +362,11 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			break;
 			break;
 		case AMDGPU_HW_IP_UVD_ENC:
 		case AMDGPU_HW_IP_UVD_ENC:
 			type = AMD_IP_BLOCK_TYPE_UVD;
 			type = AMD_IP_BLOCK_TYPE_UVD;
-			for (i = 0; i < adev->uvd.num_enc_rings; i++)
-				ring_mask |= ((adev->uvd.ring_enc[i].ready ? 1 : 0) << i);
+			for (i = 0; i < adev->uvd.num_uvd_inst; i++)
+				for (j = 0; j < adev->uvd.num_enc_rings; j++)
+					ring_mask |=
+					((adev->uvd.inst[i].ring_enc[j].ready ? 1 : 0) <<
+					(j + i * adev->uvd.num_enc_rings));
 			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
 			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
 			ib_size_alignment = 1;
 			ib_size_alignment = 1;
 			break;
 			break;
@@ -701,10 +721,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		}
 		}
 	}
 	}
 	case AMDGPU_INFO_SENSOR: {
 	case AMDGPU_INFO_SENSOR: {
-		struct pp_gpu_power query = {0};
-		int query_size = sizeof(query);
-
-		if (amdgpu_dpm == 0)
+		if (!adev->pm.dpm_enabled)
 			return -ENOENT;
 			return -ENOENT;
 
 
 		switch (info->sensor_info.type) {
 		switch (info->sensor_info.type) {
@@ -746,10 +763,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			/* get average GPU power */
 			/* get average GPU power */
 			if (amdgpu_dpm_read_sensor(adev,
 			if (amdgpu_dpm_read_sensor(adev,
 						   AMDGPU_PP_SENSOR_GPU_POWER,
 						   AMDGPU_PP_SENSOR_GPU_POWER,
-						   (void *)&query, &query_size)) {
+						   (void *)&ui32, &ui32_size)) {
 				return -EINVAL;
 				return -EINVAL;
 			}
 			}
-			ui32 = query.average_gpu_power >> 8;
+			ui32 >>= 8;
 			break;
 			break;
 		case AMDGPU_INFO_SENSOR_VDDNB:
 		case AMDGPU_INFO_SENSOR_VDDNB:
 			/* get VDDNB in millivolts */
 			/* get VDDNB in millivolts */
@@ -913,8 +930,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 		return;
 		return;
 
 
 	pm_runtime_get_sync(dev->dev);
 	pm_runtime_get_sync(dev->dev);
-
-	amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
+	amdgpu_ctx_mgr_entity_fini(&fpriv->ctx_mgr);
 
 
 	if (adev->asic_type != CHIP_RAVEN) {
 	if (adev->asic_type != CHIP_RAVEN) {
 		amdgpu_uvd_free_handles(adev, file_priv);
 		amdgpu_uvd_free_handles(adev, file_priv);
@@ -935,6 +951,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 	pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
 	pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
 
 
 	amdgpu_vm_fini(adev, &fpriv->vm);
 	amdgpu_vm_fini(adev, &fpriv->vm);
+	amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
+
 	if (pasid)
 	if (pasid)
 		amdgpu_pasid_free_delayed(pd->tbo.resv, pasid);
 		amdgpu_pasid_free_delayed(pd->tbo.resv, pasid);
 	amdgpu_bo_unref(&pd);
 	amdgpu_bo_unref(&pd);
@@ -1088,6 +1106,7 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct drm_amdgpu_info_firmware fw_info;
 	struct drm_amdgpu_info_firmware fw_info;
 	struct drm_amdgpu_query_fw query_fw;
 	struct drm_amdgpu_query_fw query_fw;
+	struct atom_context *ctx = adev->mode_info.atom_context;
 	int ret, i;
 	int ret, i;
 
 
 	/* VCE */
 	/* VCE */
@@ -1146,6 +1165,30 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
 	seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n",
 	seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n",
 		   fw_info.feature, fw_info.ver);
 		   fw_info.feature, fw_info.ver);
 
 
+	/* RLC SAVE RESTORE LIST CNTL */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "RLC SRLC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* RLC SAVE RESTORE LIST GPM MEM */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "RLC SRLG feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* RLC SAVE RESTORE LIST SRM MEM */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "RLC SRLS feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
 	/* MEC */
 	/* MEC */
 	query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC;
 	query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC;
 	query_fw.index = 0;
 	query_fw.index = 0;
@@ -1210,6 +1253,9 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
 	seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n",
 	seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n",
 		   fw_info.feature, fw_info.ver);
 		   fw_info.feature, fw_info.ver);
 
 
+
+	seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version);
+
 	return 0;
 	return 0;
 }
 }
 
 

+ 88 - 23
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c

@@ -36,12 +36,14 @@
 #include <drm/drm.h>
 #include <drm/drm.h>
 
 
 #include "amdgpu.h"
 #include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
 
 
 struct amdgpu_mn {
 struct amdgpu_mn {
 	/* constant after initialisation */
 	/* constant after initialisation */
 	struct amdgpu_device	*adev;
 	struct amdgpu_device	*adev;
 	struct mm_struct	*mm;
 	struct mm_struct	*mm;
 	struct mmu_notifier	mn;
 	struct mmu_notifier	mn;
+	enum amdgpu_mn_type	type;
 
 
 	/* only used on destruction */
 	/* only used on destruction */
 	struct work_struct	work;
 	struct work_struct	work;
@@ -185,7 +187,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 }
 }
 
 
 /**
 /**
- * amdgpu_mn_invalidate_range_start - callback to notify about mm change
+ * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
  *
  *
  * @mn: our notifier
  * @mn: our notifier
  * @mn: the mm this callback is about
  * @mn: the mm this callback is about
@@ -195,10 +197,10 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
  * We block for all BOs between start and end to be idle and
  * We block for all BOs between start and end to be idle and
  * unmap them by move them into system domain again.
  * unmap them by move them into system domain again.
  */
  */
-static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
-					     struct mm_struct *mm,
-					     unsigned long start,
-					     unsigned long end)
+static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
+						 struct mm_struct *mm,
+						 unsigned long start,
+						 unsigned long end)
 {
 {
 	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
 	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
 	struct interval_tree_node *it;
 	struct interval_tree_node *it;
@@ -219,6 +221,49 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
 	}
 	}
 }
 }
 
 
+/**
+ * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
+ *
+ * @mn: our notifier
+ * @mn: the mm this callback is about
+ * @start: start of updated range
+ * @end: end of updated range
+ *
+ * We temporarily evict all BOs between start and end. This
+ * necessitates evicting all user-mode queues of the process. The BOs
+ * are restorted in amdgpu_mn_invalidate_range_end_hsa.
+ */
+static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
+						 struct mm_struct *mm,
+						 unsigned long start,
+						 unsigned long end)
+{
+	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
+	struct interval_tree_node *it;
+
+	/* notification is exclusive, but interval is inclusive */
+	end -= 1;
+
+	amdgpu_mn_read_lock(rmn);
+
+	it = interval_tree_iter_first(&rmn->objects, start, end);
+	while (it) {
+		struct amdgpu_mn_node *node;
+		struct amdgpu_bo *bo;
+
+		node = container_of(it, struct amdgpu_mn_node, it);
+		it = interval_tree_iter_next(it, start, end);
+
+		list_for_each_entry(bo, &node->bos, mn_list) {
+			struct kgd_mem *mem = bo->kfd_bo;
+
+			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
+							 start, end))
+				amdgpu_amdkfd_evict_userptr(mem, mm);
+		}
+	}
+}
+
 /**
 /**
  * amdgpu_mn_invalidate_range_end - callback to notify about mm change
  * amdgpu_mn_invalidate_range_end - callback to notify about mm change
  *
  *
@@ -239,23 +284,39 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
 	amdgpu_mn_read_unlock(rmn);
 	amdgpu_mn_read_unlock(rmn);
 }
 }
 
 
-static const struct mmu_notifier_ops amdgpu_mn_ops = {
-	.release = amdgpu_mn_release,
-	.invalidate_range_start = amdgpu_mn_invalidate_range_start,
-	.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+	[AMDGPU_MN_TYPE_GFX] = {
+		.release = amdgpu_mn_release,
+		.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
+		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+	},
+	[AMDGPU_MN_TYPE_HSA] = {
+		.release = amdgpu_mn_release,
+		.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
+		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+	},
 };
 };
 
 
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type.
+ */
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
+
 /**
 /**
  * amdgpu_mn_get - create notifier context
  * amdgpu_mn_get - create notifier context
  *
  *
  * @adev: amdgpu device pointer
  * @adev: amdgpu device pointer
+ * @type: type of MMU notifier context
  *
  *
  * Creates a notifier context for current->mm.
  * Creates a notifier context for current->mm.
  */
  */
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
+struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+				enum amdgpu_mn_type type)
 {
 {
 	struct mm_struct *mm = current->mm;
 	struct mm_struct *mm = current->mm;
 	struct amdgpu_mn *rmn;
 	struct amdgpu_mn *rmn;
+	unsigned long key = AMDGPU_MN_KEY(mm, type);
 	int r;
 	int r;
 
 
 	mutex_lock(&adev->mn_lock);
 	mutex_lock(&adev->mn_lock);
@@ -264,8 +325,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
 		return ERR_PTR(-EINTR);
 		return ERR_PTR(-EINTR);
 	}
 	}
 
 
-	hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm)
-		if (rmn->mm == mm)
+	hash_for_each_possible(adev->mn_hash, rmn, node, key)
+		if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key)
 			goto release_locks;
 			goto release_locks;
 
 
 	rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
 	rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
@@ -276,8 +337,9 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
 
 
 	rmn->adev = adev;
 	rmn->adev = adev;
 	rmn->mm = mm;
 	rmn->mm = mm;
-	rmn->mn.ops = &amdgpu_mn_ops;
 	init_rwsem(&rmn->lock);
 	init_rwsem(&rmn->lock);
+	rmn->type = type;
+	rmn->mn.ops = &amdgpu_mn_ops[type];
 	rmn->objects = RB_ROOT_CACHED;
 	rmn->objects = RB_ROOT_CACHED;
 	mutex_init(&rmn->read_lock);
 	mutex_init(&rmn->read_lock);
 	atomic_set(&rmn->recursion, 0);
 	atomic_set(&rmn->recursion, 0);
@@ -286,7 +348,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
 	if (r)
 	if (r)
 		goto free_rmn;
 		goto free_rmn;
 
 
-	hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm);
+	hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type));
 
 
 release_locks:
 release_locks:
 	up_write(&mm->mmap_sem);
 	up_write(&mm->mmap_sem);
@@ -315,15 +377,21 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 {
 {
 	unsigned long end = addr + amdgpu_bo_size(bo) - 1;
 	unsigned long end = addr + amdgpu_bo_size(bo) - 1;
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	enum amdgpu_mn_type type =
+		bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
 	struct amdgpu_mn *rmn;
 	struct amdgpu_mn *rmn;
-	struct amdgpu_mn_node *node = NULL;
+	struct amdgpu_mn_node *node = NULL, *new_node;
 	struct list_head bos;
 	struct list_head bos;
 	struct interval_tree_node *it;
 	struct interval_tree_node *it;
 
 
-	rmn = amdgpu_mn_get(adev);
+	rmn = amdgpu_mn_get(adev, type);
 	if (IS_ERR(rmn))
 	if (IS_ERR(rmn))
 		return PTR_ERR(rmn);
 		return PTR_ERR(rmn);
 
 
+	new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
+	if (!new_node)
+		return -ENOMEM;
+
 	INIT_LIST_HEAD(&bos);
 	INIT_LIST_HEAD(&bos);
 
 
 	down_write(&rmn->lock);
 	down_write(&rmn->lock);
@@ -337,13 +405,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 		list_splice(&node->bos, &bos);
 		list_splice(&node->bos, &bos);
 	}
 	}
 
 
-	if (!node) {
-		node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
-		if (!node) {
-			up_write(&rmn->lock);
-			return -ENOMEM;
-		}
-	}
+	if (!node)
+		node = new_node;
+	else
+		kfree(new_node);
 
 
 	bo->mn = rmn;
 	bo->mn = rmn;
 
 

+ 9 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h

@@ -29,16 +29,23 @@
  */
  */
 struct amdgpu_mn;
 struct amdgpu_mn;
 
 
+enum amdgpu_mn_type {
+	AMDGPU_MN_TYPE_GFX,
+	AMDGPU_MN_TYPE_HSA,
+};
+
 #if defined(CONFIG_MMU_NOTIFIER)
 #if defined(CONFIG_MMU_NOTIFIER)
 void amdgpu_mn_lock(struct amdgpu_mn *mn);
 void amdgpu_mn_lock(struct amdgpu_mn *mn);
 void amdgpu_mn_unlock(struct amdgpu_mn *mn);
 void amdgpu_mn_unlock(struct amdgpu_mn *mn);
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev);
+struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+				enum amdgpu_mn_type type);
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
 void amdgpu_mn_unregister(struct amdgpu_bo *bo);
 void amdgpu_mn_unregister(struct amdgpu_bo *bo);
 #else
 #else
 static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
 static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
 static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
 static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
-static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
+static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+					      enum amdgpu_mn_type type)
 {
 {
 	return NULL;
 	return NULL;
 }
 }

+ 0 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h

@@ -308,7 +308,6 @@ struct amdgpu_display_funcs {
 
 
 struct amdgpu_framebuffer {
 struct amdgpu_framebuffer {
 	struct drm_framebuffer base;
 	struct drm_framebuffer base;
-	struct drm_gem_object *obj;
 
 
 	/* caching for later use */
 	/* caching for later use */
 	uint64_t address;
 	uint64_t address;

+ 72 - 48
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

@@ -191,14 +191,21 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
 			      u32 domain, struct amdgpu_bo **bo_ptr,
 			      u32 domain, struct amdgpu_bo **bo_ptr,
 			      u64 *gpu_addr, void **cpu_addr)
 			      u64 *gpu_addr, void **cpu_addr)
 {
 {
+	struct amdgpu_bo_param bp;
 	bool free = false;
 	bool free = false;
 	int r;
 	int r;
 
 
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = align;
+	bp.domain = domain;
+	bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+		AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+	bp.type = ttm_bo_type_kernel;
+	bp.resv = NULL;
+
 	if (!*bo_ptr) {
 	if (!*bo_ptr) {
-		r = amdgpu_bo_create(adev, size, align, domain,
-				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-				     ttm_bo_type_kernel, NULL, bo_ptr);
+		r = amdgpu_bo_create(adev, &bp, bo_ptr);
 		if (r) {
 		if (r) {
 			dev_err(adev->dev, "(%d) failed to allocate kernel bo\n",
 			dev_err(adev->dev, "(%d) failed to allocate kernel bo\n",
 				r);
 				r);
@@ -341,27 +348,25 @@ fail:
 	return false;
 	return false;
 }
 }
 
 
-static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
-			       int byte_align, u32 domain,
-			       u64 flags, enum ttm_bo_type type,
-			       struct reservation_object *resv,
+static int amdgpu_bo_do_create(struct amdgpu_device *adev,
+			       struct amdgpu_bo_param *bp,
 			       struct amdgpu_bo **bo_ptr)
 			       struct amdgpu_bo **bo_ptr)
 {
 {
 	struct ttm_operation_ctx ctx = {
 	struct ttm_operation_ctx ctx = {
-		.interruptible = (type != ttm_bo_type_kernel),
+		.interruptible = (bp->type != ttm_bo_type_kernel),
 		.no_wait_gpu = false,
 		.no_wait_gpu = false,
-		.resv = resv,
+		.resv = bp->resv,
 		.flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
 		.flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
 	};
 	};
 	struct amdgpu_bo *bo;
 	struct amdgpu_bo *bo;
-	unsigned long page_align;
+	unsigned long page_align, size = bp->size;
 	size_t acc_size;
 	size_t acc_size;
 	int r;
 	int r;
 
 
-	page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
+	page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
 	size = ALIGN(size, PAGE_SIZE);
 	size = ALIGN(size, PAGE_SIZE);
 
 
-	if (!amdgpu_bo_validate_size(adev, size, domain))
+	if (!amdgpu_bo_validate_size(adev, size, bp->domain))
 		return -ENOMEM;
 		return -ENOMEM;
 
 
 	*bo_ptr = NULL;
 	*bo_ptr = NULL;
@@ -375,18 +380,14 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
 	drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
 	drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
 	INIT_LIST_HEAD(&bo->shadow_list);
 	INIT_LIST_HEAD(&bo->shadow_list);
 	INIT_LIST_HEAD(&bo->va);
 	INIT_LIST_HEAD(&bo->va);
-	bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
-					 AMDGPU_GEM_DOMAIN_GTT |
-					 AMDGPU_GEM_DOMAIN_CPU |
-					 AMDGPU_GEM_DOMAIN_GDS |
-					 AMDGPU_GEM_DOMAIN_GWS |
-					 AMDGPU_GEM_DOMAIN_OA);
+	bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
+		bp->domain;
 	bo->allowed_domains = bo->preferred_domains;
 	bo->allowed_domains = bo->preferred_domains;
-	if (type != ttm_bo_type_kernel &&
+	if (bp->type != ttm_bo_type_kernel &&
 	    bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
 	    bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
 		bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
 		bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
 
 
-	bo->flags = flags;
+	bo->flags = bp->flags;
 
 
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
 	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
 	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
@@ -417,11 +418,13 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
 #endif
 #endif
 
 
 	bo->tbo.bdev = &adev->mman.bdev;
 	bo->tbo.bdev = &adev->mman.bdev;
-	amdgpu_ttm_placement_from_domain(bo, domain);
+	amdgpu_ttm_placement_from_domain(bo, bp->domain);
+	if (bp->type == ttm_bo_type_kernel)
+		bo->tbo.priority = 1;
 
 
-	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
+	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, bp->type,
 				 &bo->placement, page_align, &ctx, acc_size,
 				 &bo->placement, page_align, &ctx, acc_size,
-				 NULL, resv, &amdgpu_ttm_bo_destroy);
+				 NULL, bp->resv, &amdgpu_ttm_bo_destroy);
 	if (unlikely(r != 0))
 	if (unlikely(r != 0))
 		return r;
 		return r;
 
 
@@ -433,10 +436,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
 	else
 	else
 		amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
 		amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
 
 
-	if (type == ttm_bo_type_kernel)
-		bo->tbo.priority = 1;
-
-	if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
+	if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
 	    bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
 	    bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
 		struct dma_fence *fence;
 		struct dma_fence *fence;
 
 
@@ -449,20 +449,20 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
 		bo->tbo.moving = dma_fence_get(fence);
 		bo->tbo.moving = dma_fence_get(fence);
 		dma_fence_put(fence);
 		dma_fence_put(fence);
 	}
 	}
-	if (!resv)
+	if (!bp->resv)
 		amdgpu_bo_unreserve(bo);
 		amdgpu_bo_unreserve(bo);
 	*bo_ptr = bo;
 	*bo_ptr = bo;
 
 
 	trace_amdgpu_bo_create(bo);
 	trace_amdgpu_bo_create(bo);
 
 
 	/* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */
 	/* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */
-	if (type == ttm_bo_type_device)
+	if (bp->type == ttm_bo_type_device)
 		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 
 
 	return 0;
 	return 0;
 
 
 fail_unreserve:
 fail_unreserve:
-	if (!resv)
+	if (!bp->resv)
 		ww_mutex_unlock(&bo->tbo.resv->lock);
 		ww_mutex_unlock(&bo->tbo.resv->lock);
 	amdgpu_bo_unref(&bo);
 	amdgpu_bo_unref(&bo);
 	return r;
 	return r;
@@ -472,16 +472,22 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
 				   unsigned long size, int byte_align,
 				   unsigned long size, int byte_align,
 				   struct amdgpu_bo *bo)
 				   struct amdgpu_bo *bo)
 {
 {
+	struct amdgpu_bo_param bp;
 	int r;
 	int r;
 
 
 	if (bo->shadow)
 	if (bo->shadow)
 		return 0;
 		return 0;
 
 
-	r = amdgpu_bo_do_create(adev, size, byte_align, AMDGPU_GEM_DOMAIN_GTT,
-				AMDGPU_GEM_CREATE_CPU_GTT_USWC |
-				AMDGPU_GEM_CREATE_SHADOW,
-				ttm_bo_type_kernel,
-				bo->tbo.resv, &bo->shadow);
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = byte_align;
+	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+		AMDGPU_GEM_CREATE_SHADOW;
+	bp.type = ttm_bo_type_kernel;
+	bp.resv = bo->tbo.resv;
+
+	r = amdgpu_bo_do_create(adev, &bp, &bo->shadow);
 	if (!r) {
 	if (!r) {
 		bo->shadow->parent = amdgpu_bo_ref(bo);
 		bo->shadow->parent = amdgpu_bo_ref(bo);
 		mutex_lock(&adev->shadow_list_lock);
 		mutex_lock(&adev->shadow_list_lock);
@@ -492,28 +498,26 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
 	return r;
 	return r;
 }
 }
 
 
-int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size,
-		     int byte_align, u32 domain,
-		     u64 flags, enum ttm_bo_type type,
-		     struct reservation_object *resv,
+int amdgpu_bo_create(struct amdgpu_device *adev,
+		     struct amdgpu_bo_param *bp,
 		     struct amdgpu_bo **bo_ptr)
 		     struct amdgpu_bo **bo_ptr)
 {
 {
-	uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
+	u64 flags = bp->flags;
 	int r;
 	int r;
 
 
-	r = amdgpu_bo_do_create(adev, size, byte_align, domain,
-				parent_flags, type, resv, bo_ptr);
+	bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
+	r = amdgpu_bo_do_create(adev, bp, bo_ptr);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
 	if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
 	if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
-		if (!resv)
+		if (!bp->resv)
 			WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
 			WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
 							NULL));
 							NULL));
 
 
-		r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr));
+		r = amdgpu_bo_create_shadow(adev, bp->size, bp->byte_align, (*bo_ptr));
 
 
-		if (!resv)
+		if (!bp->resv)
 			reservation_object_unlock((*bo_ptr)->tbo.resv);
 			reservation_object_unlock((*bo_ptr)->tbo.resv);
 
 
 		if (r)
 		if (r)
@@ -689,8 +693,21 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 		return -EINVAL;
 		return -EINVAL;
 
 
 	/* A shared bo cannot be migrated to VRAM */
 	/* A shared bo cannot be migrated to VRAM */
-	if (bo->prime_shared_count && (domain == AMDGPU_GEM_DOMAIN_VRAM))
-		return -EINVAL;
+	if (bo->prime_shared_count) {
+		if (domain & AMDGPU_GEM_DOMAIN_GTT)
+			domain = AMDGPU_GEM_DOMAIN_GTT;
+		else
+			return -EINVAL;
+	}
+
+	/* This assumes only APU display buffers are pinned with (VRAM|GTT).
+	 * See function amdgpu_display_supported_domains()
+	 */
+	if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) {
+		domain = AMDGPU_GEM_DOMAIN_VRAM;
+		if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD)
+			domain = AMDGPU_GEM_DOMAIN_GTT;
+	}
 
 
 	if (bo->pin_count) {
 	if (bo->pin_count) {
 		uint32_t mem_type = bo->tbo.mem.mem_type;
 		uint32_t mem_type = bo->tbo.mem.mem_type;
@@ -838,6 +855,13 @@ int amdgpu_bo_init(struct amdgpu_device *adev)
 	return amdgpu_ttm_init(adev);
 	return amdgpu_ttm_init(adev);
 }
 }
 
 
+int amdgpu_bo_late_init(struct amdgpu_device *adev)
+{
+	amdgpu_ttm_late_init(adev);
+
+	return 0;
+}
+
 void amdgpu_bo_fini(struct amdgpu_device *adev)
 void amdgpu_bo_fini(struct amdgpu_device *adev)
 {
 {
 	amdgpu_ttm_fini(adev);
 	amdgpu_ttm_fini(adev);

+ 34 - 4
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h

@@ -33,6 +33,16 @@
 
 
 #define AMDGPU_BO_INVALID_OFFSET	LONG_MAX
 #define AMDGPU_BO_INVALID_OFFSET	LONG_MAX
 
 
+struct amdgpu_bo_param {
+	unsigned long			size;
+	int				byte_align;
+	u32				domain;
+	u32				preferred_domain;
+	u64				flags;
+	enum ttm_bo_type		type;
+	struct reservation_object	*resv;
+};
+
 /* bo virtual addresses in a vm */
 /* bo virtual addresses in a vm */
 struct amdgpu_bo_va_mapping {
 struct amdgpu_bo_va_mapping {
 	struct amdgpu_bo_va		*bo_va;
 	struct amdgpu_bo_va		*bo_va;
@@ -195,6 +205,27 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo)
 	}
 	}
 }
 }
 
 
+/**
+ * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
+ */
+static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
+	struct drm_mm_node *node = bo->tbo.mem.mm_node;
+	unsigned long pages_left;
+
+	if (bo->tbo.mem.mem_type != TTM_PL_VRAM)
+		return false;
+
+	for (pages_left = bo->tbo.mem.num_pages; pages_left;
+	     pages_left -= node->size, node++)
+		if (node->start < fpfn)
+			return true;
+
+	return false;
+}
+
 /**
 /**
  * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
  * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
  */
  */
@@ -203,10 +234,8 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
 	return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
 	return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
 }
 }
 
 
-int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size,
-		     int byte_align, u32 domain,
-		     u64 flags, enum ttm_bo_type type,
-		     struct reservation_object *resv,
+int amdgpu_bo_create(struct amdgpu_device *adev,
+		     struct amdgpu_bo_param *bp,
 		     struct amdgpu_bo **bo_ptr);
 		     struct amdgpu_bo **bo_ptr);
 int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
 int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
 			      unsigned long size, int align,
 			      unsigned long size, int align,
@@ -230,6 +259,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 int amdgpu_bo_unpin(struct amdgpu_bo *bo);
 int amdgpu_bo_unpin(struct amdgpu_bo *bo);
 int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
 int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
 int amdgpu_bo_init(struct amdgpu_device *adev);
 int amdgpu_bo_init(struct amdgpu_device *adev);
+int amdgpu_bo_late_init(struct amdgpu_device *adev);
 void amdgpu_bo_fini(struct amdgpu_device *adev);
 void amdgpu_bo_fini(struct amdgpu_device *adev);
 int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
 int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
 				struct vm_area_struct *vma);
 				struct vm_area_struct *vma);

+ 282 - 83
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c

@@ -77,6 +77,37 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
 	}
 	}
 }
 }
 
 
+/**
+ * DOC: power_dpm_state
+ *
+ * This is a legacy interface and is only provided for backwards compatibility.
+ * The amdgpu driver provides a sysfs API for adjusting certain power
+ * related parameters.  The file power_dpm_state is used for this.
+ * It accepts the following arguments:
+ * - battery
+ * - balanced
+ * - performance
+ *
+ * battery
+ *
+ * On older GPUs, the vbios provided a special power state for battery
+ * operation.  Selecting battery switched to this state.  This is no
+ * longer provided on newer GPUs so the option does nothing in that case.
+ *
+ * balanced
+ *
+ * On older GPUs, the vbios provided a special power state for balanced
+ * operation.  Selecting balanced switched to this state.  This is no
+ * longer provided on newer GPUs so the option does nothing in that case.
+ *
+ * performance
+ *
+ * On older GPUs, the vbios provided a special power state for performance
+ * operation.  Selecting performance switched to this state.  This is no
+ * longer provided on newer GPUs so the option does nothing in that case.
+ *
+ */
+
 static ssize_t amdgpu_get_dpm_state(struct device *dev,
 static ssize_t amdgpu_get_dpm_state(struct device *dev,
 				    struct device_attribute *attr,
 				    struct device_attribute *attr,
 				    char *buf)
 				    char *buf)
@@ -131,6 +162,59 @@ fail:
 	return count;
 	return count;
 }
 }
 
 
+
+/**
+ * DOC: power_dpm_force_performance_level
+ *
+ * The amdgpu driver provides a sysfs API for adjusting certain power
+ * related parameters.  The file power_dpm_force_performance_level is
+ * used for this.  It accepts the following arguments:
+ * - auto
+ * - low
+ * - high
+ * - manual
+ * - GPU fan
+ * - profile_standard
+ * - profile_min_sclk
+ * - profile_min_mclk
+ * - profile_peak
+ *
+ * auto
+ *
+ * When auto is selected, the driver will attempt to dynamically select
+ * the optimal power profile for current conditions in the driver.
+ *
+ * low
+ *
+ * When low is selected, the clocks are forced to the lowest power state.
+ *
+ * high
+ *
+ * When high is selected, the clocks are forced to the highest power state.
+ *
+ * manual
+ *
+ * When manual is selected, the user can manually adjust which power states
+ * are enabled for each clock domain via the sysfs pp_dpm_mclk, pp_dpm_sclk,
+ * and pp_dpm_pcie files and adjust the power state transition heuristics
+ * via the pp_power_profile_mode sysfs file.
+ *
+ * profile_standard
+ * profile_min_sclk
+ * profile_min_mclk
+ * profile_peak
+ *
+ * When the profiling modes are selected, clock and power gating are
+ * disabled and the clocks are set for different profiling cases. This
+ * mode is recommended for profiling specific work loads where you do
+ * not want clock or power gating for clock fluctuation to interfere
+ * with your results. profile_standard sets the clocks to a fixed clock
+ * level which varies from asic to asic.  profile_min_sclk forces the sclk
+ * to the lowest level.  profile_min_mclk forces the mclk to the lowest level.
+ * profile_peak sets all clocks (mclk, sclk, pcie) to the highest levels.
+ *
+ */
+
 static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
 static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
 						struct device_attribute *attr,
 						struct device_attribute *attr,
 								char *buf)
 								char *buf)
@@ -324,6 +408,17 @@ fail:
 	return count;
 	return count;
 }
 }
 
 
+/**
+ * DOC: pp_table
+ *
+ * The amdgpu driver provides a sysfs API for uploading new powerplay
+ * tables.  The file pp_table is used for this.  Reading the file
+ * will dump the current power play table.  Writing to the file
+ * will attempt to upload a new powerplay table and re-initialize
+ * powerplay using that new table.
+ *
+ */
+
 static ssize_t amdgpu_get_pp_table(struct device *dev,
 static ssize_t amdgpu_get_pp_table(struct device *dev,
 		struct device_attribute *attr,
 		struct device_attribute *attr,
 		char *buf)
 		char *buf)
@@ -360,6 +455,29 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
 	return count;
 	return count;
 }
 }
 
 
+/**
+ * DOC: pp_od_clk_voltage
+ *
+ * The amdgpu driver provides a sysfs API for adjusting the clocks and voltages
+ * in each power level within a power state.  The pp_od_clk_voltage is used for
+ * this.
+ *
+ * Reading the file will display:
+ * - a list of engine clock levels and voltages labeled OD_SCLK
+ * - a list of memory clock levels and voltages labeled OD_MCLK
+ * - a list of valid ranges for sclk, mclk, and voltage labeled OD_RANGE
+ *
+ * To manually adjust these settings, first select manual using
+ * power_dpm_force_performance_level. Enter a new value for each
+ * level by writing a string that contains "s/m level clock voltage" to
+ * the file.  E.g., "s 1 500 820" will update sclk level 1 to be 500 MHz
+ * at 820 mV; "m 0 350 810" will update mclk level 0 to be 350 MHz at
+ * 810 mV.  When you have edited all of the states as needed, write
+ * "c" (commit) to the file to commit your changes.  If you want to reset to the
+ * default power levels, write "r" (reset) to the file to reset them.
+ *
+ */
+
 static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
 static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
 		struct device_attribute *attr,
 		struct device_attribute *attr,
 		const char *buf,
 		const char *buf,
@@ -437,6 +555,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
 	if (adev->powerplay.pp_funcs->print_clock_levels) {
 	if (adev->powerplay.pp_funcs->print_clock_levels) {
 		size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
 		size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
 		size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
 		size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
+		size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size);
 		return size;
 		return size;
 	} else {
 	} else {
 		return snprintf(buf, PAGE_SIZE, "\n");
 		return snprintf(buf, PAGE_SIZE, "\n");
@@ -444,6 +563,23 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
 
 
 }
 }
 
 
+/**
+ * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie
+ *
+ * The amdgpu driver provides a sysfs API for adjusting what power levels
+ * are enabled for a given power state.  The files pp_dpm_sclk, pp_dpm_mclk,
+ * and pp_dpm_pcie are used for this.
+ *
+ * Reading back the files will show you the available power levels within
+ * the power state and the clock information for those levels.
+ *
+ * To manually adjust these states, first select manual using
+ * power_dpm_force_performance_level.
+ * Secondly,Enter a new value for each level by inputing a string that
+ * contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie"
+ * E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6.
+ */
+
 static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
 static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
 		struct device_attribute *attr,
 		struct device_attribute *attr,
 		char *buf)
 		char *buf)
@@ -466,23 +602,27 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
 	struct amdgpu_device *adev = ddev->dev_private;
 	struct amdgpu_device *adev = ddev->dev_private;
 	int ret;
 	int ret;
 	long level;
 	long level;
-	uint32_t i, mask = 0;
-	char sub_str[2];
+	uint32_t mask = 0;
+	char *sub_str = NULL;
+	char *tmp;
+	char buf_cpy[count];
+	const char delimiter[3] = {' ', '\n', '\0'};
 
 
-	for (i = 0; i < strlen(buf); i++) {
-		if (*(buf + i) == '\n')
-			continue;
-		sub_str[0] = *(buf + i);
-		sub_str[1] = '\0';
-		ret = kstrtol(sub_str, 0, &level);
+	memcpy(buf_cpy, buf, count+1);
+	tmp = buf_cpy;
+	while (tmp[0]) {
+		sub_str =  strsep(&tmp, delimiter);
+		if (strlen(sub_str)) {
+			ret = kstrtol(sub_str, 0, &level);
 
 
-		if (ret) {
-			count = -EINVAL;
-			goto fail;
-		}
-		mask |= 1 << level;
+			if (ret) {
+				count = -EINVAL;
+				goto fail;
+			}
+			mask |= 1 << level;
+		} else
+			break;
 	}
 	}
-
 	if (adev->powerplay.pp_funcs->force_clock_level)
 	if (adev->powerplay.pp_funcs->force_clock_level)
 		amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
 		amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
 
 
@@ -512,21 +652,26 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
 	struct amdgpu_device *adev = ddev->dev_private;
 	struct amdgpu_device *adev = ddev->dev_private;
 	int ret;
 	int ret;
 	long level;
 	long level;
-	uint32_t i, mask = 0;
-	char sub_str[2];
+	uint32_t mask = 0;
+	char *sub_str = NULL;
+	char *tmp;
+	char buf_cpy[count];
+	const char delimiter[3] = {' ', '\n', '\0'};
 
 
-	for (i = 0; i < strlen(buf); i++) {
-		if (*(buf + i) == '\n')
-			continue;
-		sub_str[0] = *(buf + i);
-		sub_str[1] = '\0';
-		ret = kstrtol(sub_str, 0, &level);
+	memcpy(buf_cpy, buf, count+1);
+	tmp = buf_cpy;
+	while (tmp[0]) {
+		sub_str =  strsep(&tmp, delimiter);
+		if (strlen(sub_str)) {
+			ret = kstrtol(sub_str, 0, &level);
 
 
-		if (ret) {
-			count = -EINVAL;
-			goto fail;
-		}
-		mask |= 1 << level;
+			if (ret) {
+				count = -EINVAL;
+				goto fail;
+			}
+			mask |= 1 << level;
+		} else
+			break;
 	}
 	}
 	if (adev->powerplay.pp_funcs->force_clock_level)
 	if (adev->powerplay.pp_funcs->force_clock_level)
 		amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
 		amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
@@ -557,21 +702,27 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
 	struct amdgpu_device *adev = ddev->dev_private;
 	struct amdgpu_device *adev = ddev->dev_private;
 	int ret;
 	int ret;
 	long level;
 	long level;
-	uint32_t i, mask = 0;
-	char sub_str[2];
+	uint32_t mask = 0;
+	char *sub_str = NULL;
+	char *tmp;
+	char buf_cpy[count];
+	const char delimiter[3] = {' ', '\n', '\0'};
 
 
-	for (i = 0; i < strlen(buf); i++) {
-		if (*(buf + i) == '\n')
-			continue;
-		sub_str[0] = *(buf + i);
-		sub_str[1] = '\0';
-		ret = kstrtol(sub_str, 0, &level);
+	memcpy(buf_cpy, buf, count+1);
+	tmp = buf_cpy;
 
 
-		if (ret) {
-			count = -EINVAL;
-			goto fail;
-		}
-		mask |= 1 << level;
+	while (tmp[0]) {
+		sub_str =  strsep(&tmp, delimiter);
+		if (strlen(sub_str)) {
+			ret = kstrtol(sub_str, 0, &level);
+
+			if (ret) {
+				count = -EINVAL;
+				goto fail;
+			}
+			mask |= 1 << level;
+		} else
+			break;
 	}
 	}
 	if (adev->powerplay.pp_funcs->force_clock_level)
 	if (adev->powerplay.pp_funcs->force_clock_level)
 		amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
 		amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
@@ -668,6 +819,26 @@ fail:
 	return count;
 	return count;
 }
 }
 
 
+/**
+ * DOC: pp_power_profile_mode
+ *
+ * The amdgpu driver provides a sysfs API for adjusting the heuristics
+ * related to switching between power levels in a power state.  The file
+ * pp_power_profile_mode is used for this.
+ *
+ * Reading this file outputs a list of all of the predefined power profiles
+ * and the relevant heuristics settings for that profile.
+ *
+ * To select a profile or create a custom profile, first select manual using
+ * power_dpm_force_performance_level.  Writing the number of a predefined
+ * profile to pp_power_profile_mode will enable those heuristics.  To
+ * create a custom set of heuristics, write a string of numbers to the file
+ * starting with the number of the custom profile along with a setting
+ * for each heuristic parameter.  Due to differences across asic families
+ * the heuristic parameters vary from family to family.
+ *
+ */
+
 static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
 static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
 		struct device_attribute *attr,
 		struct device_attribute *attr,
 		char *buf)
 		char *buf)
@@ -1020,8 +1191,8 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
 {
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	struct drm_device *ddev = adev->ddev;
 	struct drm_device *ddev = adev->ddev;
-	struct pp_gpu_power query = {0};
-	int r, size = sizeof(query);
+	u32 query = 0;
+	int r, size = sizeof(u32);
 	unsigned uw;
 	unsigned uw;
 
 
 	/* Can't get power when the card is off */
 	/* Can't get power when the card is off */
@@ -1041,7 +1212,7 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
 		return r;
 		return r;
 
 
 	/* convert to microwatts */
 	/* convert to microwatts */
-	uw = (query.average_gpu_power >> 8) * 1000000;
+	uw = (query >> 8) * 1000000 + (query & 0xff) * 1000;
 
 
 	return snprintf(buf, PAGE_SIZE, "%u\n", uw);
 	return snprintf(buf, PAGE_SIZE, "%u\n", uw);
 }
 }
@@ -1109,6 +1280,46 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
 	return count;
 	return count;
 }
 }
 
 
+
+/**
+ * DOC: hwmon
+ *
+ * The amdgpu driver exposes the following sensor interfaces:
+ * - GPU temperature (via the on-die sensor)
+ * - GPU voltage
+ * - Northbridge voltage (APUs only)
+ * - GPU power
+ * - GPU fan
+ *
+ * hwmon interfaces for GPU temperature:
+ * - temp1_input: the on die GPU temperature in millidegrees Celsius
+ * - temp1_crit: temperature critical max value in millidegrees Celsius
+ * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
+ *
+ * hwmon interfaces for GPU voltage:
+ * - in0_input: the voltage on the GPU in millivolts
+ * - in1_input: the voltage on the Northbridge in millivolts
+ *
+ * hwmon interfaces for GPU power:
+ * - power1_average: average power used by the GPU in microWatts
+ * - power1_cap_min: minimum cap supported in microWatts
+ * - power1_cap_max: maximum cap supported in microWatts
+ * - power1_cap: selected power cap in microWatts
+ *
+ * hwmon interfaces for GPU fan:
+ * - pwm1: pulse width modulation fan level (0-255)
+ * - pwm1_enable: pulse width modulation fan control method
+ *                0: no fan speed control
+ *                1: manual fan speed control using pwm interface
+ *                2: automatic fan speed control
+ * - pwm1_min: pulse width modulation fan control minimum level (0)
+ * - pwm1_max: pulse width modulation fan control maximum level (255)
+ * - fan1_input: fan speed in RPM
+ *
+ * You can use hwmon tools like sensors to view this information on your system.
+ *
+ */
+
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
 static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
@@ -1153,19 +1364,14 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	umode_t effective_mode = attr->mode;
 	umode_t effective_mode = attr->mode;
 
 
-	/* handle non-powerplay limitations */
-	if (!adev->powerplay.pp_handle) {
-		/* Skip fan attributes if fan is not present */
-		if (adev->pm.no_fan &&
-		    (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
-		     attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
-		     attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
-		     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
-			return 0;
-		/* requires powerplay */
-		if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr)
-			return 0;
-	}
+
+	/* Skip fan attributes if fan is not present */
+	if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
+	    attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
+	    attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
+	    attr == &sensor_dev_attr_pwm1_min.dev_attr.attr ||
+	    attr == &sensor_dev_attr_fan1_input.dev_attr.attr))
+		return 0;
 
 
 	/* Skip limit attributes if DPM is not enabled */
 	/* Skip limit attributes if DPM is not enabled */
 	if (!adev->pm.dpm_enabled &&
 	if (!adev->pm.dpm_enabled &&
@@ -1658,9 +1864,6 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
 
 
 void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
 void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
 {
 {
-	struct drm_device *ddev = adev->ddev;
-	struct drm_crtc *crtc;
-	struct amdgpu_crtc *amdgpu_crtc;
 	int i = 0;
 	int i = 0;
 
 
 	if (!adev->pm.dpm_enabled)
 	if (!adev->pm.dpm_enabled)
@@ -1676,21 +1879,25 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
 	}
 	}
 
 
 	if (adev->powerplay.pp_funcs->dispatch_tasks) {
 	if (adev->powerplay.pp_funcs->dispatch_tasks) {
+		if (!amdgpu_device_has_dc_support(adev)) {
+			mutex_lock(&adev->pm.mutex);
+			amdgpu_dpm_get_active_displays(adev);
+			adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtcs;
+			adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev);
+			adev->pm.pm_display_cfg.min_vblank_time = amdgpu_dpm_get_vblank_time(adev);
+			/* we have issues with mclk switching with refresh rates over 120 hz on the non-DC code. */
+			if (adev->pm.pm_display_cfg.vrefresh > 120)
+				adev->pm.pm_display_cfg.min_vblank_time = 0;
+			if (adev->powerplay.pp_funcs->display_configuration_change)
+				adev->powerplay.pp_funcs->display_configuration_change(
+								adev->powerplay.pp_handle,
+								&adev->pm.pm_display_cfg);
+			mutex_unlock(&adev->pm.mutex);
+		}
 		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL);
 		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL);
 	} else {
 	} else {
 		mutex_lock(&adev->pm.mutex);
 		mutex_lock(&adev->pm.mutex);
-		adev->pm.dpm.new_active_crtcs = 0;
-		adev->pm.dpm.new_active_crtc_count = 0;
-		if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
-			list_for_each_entry(crtc,
-					    &ddev->mode_config.crtc_list, head) {
-				amdgpu_crtc = to_amdgpu_crtc(crtc);
-				if (amdgpu_crtc->enabled) {
-					adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id);
-					adev->pm.dpm.new_active_crtc_count++;
-				}
-			}
-		}
+		amdgpu_dpm_get_active_displays(adev);
 		/* update battery/ac status */
 		/* update battery/ac status */
 		if (power_supply_is_system_supplied() > 0)
 		if (power_supply_is_system_supplied() > 0)
 			adev->pm.dpm.ac_power = true;
 			adev->pm.dpm.ac_power = true;
@@ -1711,7 +1918,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
 static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev)
 static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev)
 {
 {
 	uint32_t value;
 	uint32_t value;
-	struct pp_gpu_power query = {0};
+	uint32_t query = 0;
 	int size;
 	int size;
 
 
 	/* sanity check PP is enabled */
 	/* sanity check PP is enabled */
@@ -1734,17 +1941,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
 		seq_printf(m, "\t%u mV (VDDGFX)\n", value);
 		seq_printf(m, "\t%u mV (VDDGFX)\n", value);
 	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size))
 	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size))
 		seq_printf(m, "\t%u mV (VDDNB)\n", value);
 		seq_printf(m, "\t%u mV (VDDNB)\n", value);
-	size = sizeof(query);
-	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size)) {
-		seq_printf(m, "\t%u.%u W (VDDC)\n", query.vddc_power >> 8,
-				query.vddc_power & 0xff);
-		seq_printf(m, "\t%u.%u W (VDDCI)\n", query.vddci_power >> 8,
-				query.vddci_power & 0xff);
-		seq_printf(m, "\t%u.%u W (max GPU)\n", query.max_gpu_power >> 8,
-				query.max_gpu_power & 0xff);
-		seq_printf(m, "\t%u.%u W (average GPU)\n", query.average_gpu_power >> 8,
-				query.average_gpu_power & 0xff);
-	}
+	size = sizeof(uint32_t);
+	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size))
+		seq_printf(m, "\t%u.%u W (average GPU)\n", query >> 8, query & 0xff);
 	size = sizeof(value);
 	size = sizeof(value);
 	seq_printf(m, "\n");
 	seq_printf(m, "\n");
 
 

+ 10 - 4
drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c

@@ -102,12 +102,18 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 	struct reservation_object *resv = attach->dmabuf->resv;
 	struct reservation_object *resv = attach->dmabuf->resv;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_bo *bo;
 	struct amdgpu_bo *bo;
+	struct amdgpu_bo_param bp;
 	int ret;
 	int ret;
 
 
+	memset(&bp, 0, sizeof(bp));
+	bp.size = attach->dmabuf->size;
+	bp.byte_align = PAGE_SIZE;
+	bp.domain = AMDGPU_GEM_DOMAIN_CPU;
+	bp.flags = 0;
+	bp.type = ttm_bo_type_sg;
+	bp.resv = resv;
 	ww_mutex_lock(&resv->lock, NULL);
 	ww_mutex_lock(&resv->lock, NULL);
-	ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE,
-			       AMDGPU_GEM_DOMAIN_CPU, 0, ttm_bo_type_sg,
-			       resv, &bo);
+	ret = amdgpu_bo_create(adev, &bp, &bo);
 	if (ret)
 	if (ret)
 		goto error;
 		goto error;
 
 
@@ -209,7 +215,7 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	struct ttm_operation_ctx ctx = { true, false };
 	struct ttm_operation_ctx ctx = { true, false };
-	u32 domain = amdgpu_display_framebuffer_domains(adev);
+	u32 domain = amdgpu_display_supported_domains(adev);
 	int ret;
 	int ret;
 	bool reads = (direction == DMA_BIDIRECTIONAL ||
 	bool reads = (direction == DMA_BIDIRECTIONAL ||
 		      direction == DMA_FROM_DEVICE);
 		      direction == DMA_FROM_DEVICE);

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c

@@ -52,6 +52,7 @@ static int psp_sw_init(void *handle)
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 		psp_v3_1_set_psp_funcs(psp);
 		psp_v3_1_set_psp_funcs(psp);
 		break;
 		break;
 	case CHIP_RAVEN:
 	case CHIP_RAVEN:

+ 10 - 4
drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c

@@ -66,6 +66,8 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,
 			       u32 ring,
 			       u32 ring,
 			       struct amdgpu_ring **out_ring)
 			       struct amdgpu_ring **out_ring)
 {
 {
+	u32 instance;
+
 	switch (mapper->hw_ip) {
 	switch (mapper->hw_ip) {
 	case AMDGPU_HW_IP_GFX:
 	case AMDGPU_HW_IP_GFX:
 		*out_ring = &adev->gfx.gfx_ring[ring];
 		*out_ring = &adev->gfx.gfx_ring[ring];
@@ -77,13 +79,16 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,
 		*out_ring = &adev->sdma.instance[ring].ring;
 		*out_ring = &adev->sdma.instance[ring].ring;
 		break;
 		break;
 	case AMDGPU_HW_IP_UVD:
 	case AMDGPU_HW_IP_UVD:
-		*out_ring = &adev->uvd.ring;
+		instance = ring;
+		*out_ring = &adev->uvd.inst[instance].ring;
 		break;
 		break;
 	case AMDGPU_HW_IP_VCE:
 	case AMDGPU_HW_IP_VCE:
 		*out_ring = &adev->vce.ring[ring];
 		*out_ring = &adev->vce.ring[ring];
 		break;
 		break;
 	case AMDGPU_HW_IP_UVD_ENC:
 	case AMDGPU_HW_IP_UVD_ENC:
-		*out_ring = &adev->uvd.ring_enc[ring];
+		instance = ring / adev->uvd.num_enc_rings;
+		*out_ring =
+		&adev->uvd.inst[instance].ring_enc[ring%adev->uvd.num_enc_rings];
 		break;
 		break;
 	case AMDGPU_HW_IP_VCN_DEC:
 	case AMDGPU_HW_IP_VCN_DEC:
 		*out_ring = &adev->vcn.ring_dec;
 		*out_ring = &adev->vcn.ring_dec;
@@ -240,13 +245,14 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
 		ip_num_rings = adev->sdma.num_instances;
 		ip_num_rings = adev->sdma.num_instances;
 		break;
 		break;
 	case AMDGPU_HW_IP_UVD:
 	case AMDGPU_HW_IP_UVD:
-		ip_num_rings = 1;
+		ip_num_rings = adev->uvd.num_uvd_inst;
 		break;
 		break;
 	case AMDGPU_HW_IP_VCE:
 	case AMDGPU_HW_IP_VCE:
 		ip_num_rings = adev->vce.num_rings;
 		ip_num_rings = adev->vce.num_rings;
 		break;
 		break;
 	case AMDGPU_HW_IP_UVD_ENC:
 	case AMDGPU_HW_IP_UVD_ENC:
-		ip_num_rings = adev->uvd.num_enc_rings;
+		ip_num_rings =
+			adev->uvd.num_enc_rings * adev->uvd.num_uvd_inst;
 		break;
 		break;
 	case AMDGPU_HW_IP_VCN_DEC:
 	case AMDGPU_HW_IP_VCN_DEC:
 		ip_num_rings = 1;
 		ip_num_rings = 1;

+ 21 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c

@@ -362,6 +362,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 
 
 	dma_fence_put(ring->vmid_wait);
 	dma_fence_put(ring->vmid_wait);
 	ring->vmid_wait = NULL;
 	ring->vmid_wait = NULL;
+	ring->me = 0;
 
 
 	ring->adev->rings[ring->idx] = NULL;
 	ring->adev->rings[ring->idx] = NULL;
 }
 }
@@ -459,6 +460,26 @@ void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring)
 	spin_unlock(&adev->ring_lru_list_lock);
 	spin_unlock(&adev->ring_lru_list_lock);
 }
 }
 
 
+/**
+ * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper
+ *
+ * @adev: amdgpu_device pointer
+ * @reg0: register to write
+ * @reg1: register to wait on
+ * @ref: reference value to write/wait on
+ * @mask: mask to wait on
+ *
+ * Helper for rings that don't support write and wait in a
+ * single oneshot packet.
+ */
+void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
+						uint32_t reg0, uint32_t reg1,
+						uint32_t ref, uint32_t mask)
+{
+	amdgpu_ring_emit_wreg(ring, reg0, ref);
+	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
 /*
 /*
  * Debugfs info
  * Debugfs info
  */
  */

+ 11 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

@@ -29,7 +29,7 @@
 #include <drm/drm_print.h>
 #include <drm/drm_print.h>
 
 
 /* max number of rings */
 /* max number of rings */
-#define AMDGPU_MAX_RINGS		18
+#define AMDGPU_MAX_RINGS		21
 #define AMDGPU_MAX_GFX_RINGS		1
 #define AMDGPU_MAX_GFX_RINGS		1
 #define AMDGPU_MAX_COMPUTE_RINGS	8
 #define AMDGPU_MAX_COMPUTE_RINGS	8
 #define AMDGPU_MAX_VCE_RINGS		3
 #define AMDGPU_MAX_VCE_RINGS		3
@@ -42,6 +42,7 @@
 
 
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
+#define AMDGPU_FENCE_FLAG_TC_WB_ONLY    (1 << 2)
 
 
 enum amdgpu_ring_type {
 enum amdgpu_ring_type {
 	AMDGPU_RING_TYPE_GFX,
 	AMDGPU_RING_TYPE_GFX,
@@ -90,7 +91,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
 				   unsigned irq_type);
 				   unsigned irq_type);
 void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
 void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
 void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
 void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,
+		      unsigned flags);
 int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
 int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
 void amdgpu_fence_process(struct amdgpu_ring *ring);
 void amdgpu_fence_process(struct amdgpu_ring *ring);
 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
@@ -154,6 +156,9 @@ struct amdgpu_ring_funcs {
 	void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
 	void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
 	void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
 	void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
 			      uint32_t val, uint32_t mask);
 			      uint32_t val, uint32_t mask);
+	void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
+					uint32_t reg0, uint32_t reg1,
+					uint32_t ref, uint32_t mask);
 	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
 	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
 	/* priority functions */
 	/* priority functions */
 	void (*set_priority) (struct amdgpu_ring *ring,
 	void (*set_priority) (struct amdgpu_ring *ring,
@@ -228,6 +233,10 @@ int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
 			int *blacklist, int num_blacklist,
 			int *blacklist, int num_blacklist,
 			bool lru_pipe_order, struct amdgpu_ring **ring);
 			bool lru_pipe_order, struct amdgpu_ring **ring);
 void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
 void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
+						uint32_t reg0, uint32_t val0,
+						uint32_t reg1, uint32_t val1);
+
 static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
 static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
 {
 {
 	int i = 0;
 	int i = 0;

+ 12 - 6
drivers/gpu/drm/amd/amdgpu/amdgpu_test.c

@@ -33,6 +33,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 	struct amdgpu_bo *vram_obj = NULL;
 	struct amdgpu_bo *vram_obj = NULL;
 	struct amdgpu_bo **gtt_obj = NULL;
 	struct amdgpu_bo **gtt_obj = NULL;
+	struct amdgpu_bo_param bp;
 	uint64_t gart_addr, vram_addr;
 	uint64_t gart_addr, vram_addr;
 	unsigned n, size;
 	unsigned n, size;
 	int i, r;
 	int i, r;
@@ -58,9 +59,15 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 		r = 1;
 		r = 1;
 		goto out_cleanup;
 		goto out_cleanup;
 	}
 	}
-
-	r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 0,
-			     ttm_bo_type_kernel, NULL, &vram_obj);
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = PAGE_SIZE;
+	bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+	bp.flags = 0;
+	bp.type = ttm_bo_type_kernel;
+	bp.resv = NULL;
+
+	r = amdgpu_bo_create(adev, &bp, &vram_obj);
 	if (r) {
 	if (r) {
 		DRM_ERROR("Failed to create VRAM object\n");
 		DRM_ERROR("Failed to create VRAM object\n");
 		goto out_cleanup;
 		goto out_cleanup;
@@ -79,9 +86,8 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 		void **vram_start, **vram_end;
 		void **vram_start, **vram_end;
 		struct dma_fence *fence = NULL;
 		struct dma_fence *fence = NULL;
 
 
-		r = amdgpu_bo_create(adev, size, PAGE_SIZE,
-				     AMDGPU_GEM_DOMAIN_GTT, 0,
-				     ttm_bo_type_kernel, NULL, gtt_obj + i);
+		bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+		r = amdgpu_bo_create(adev, &bp, gtt_obj + i);
 		if (r) {
 		if (r) {
 			DRM_ERROR("Failed to create GTT object %d\n", i);
 			DRM_ERROR("Failed to create GTT object %d\n", i);
 			goto out_lclean;
 			goto out_lclean;

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h

@@ -275,7 +275,7 @@ TRACE_EVENT(amdgpu_vm_bo_unmap,
 			     ),
 			     ),
 
 
 	    TP_fast_assign(
 	    TP_fast_assign(
-			   __entry->bo = bo_va->base.bo;
+			   __entry->bo = bo_va ? bo_va->base.bo : NULL;
 			   __entry->start = mapping->start;
 			   __entry->start = mapping->start;
 			   __entry->last = mapping->last;
 			   __entry->last = mapping->last;
 			   __entry->offset = mapping->offset;
 			   __entry->offset = mapping->offset;

+ 441 - 55
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

@@ -63,16 +63,44 @@ static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
 /*
 /*
  * Global memory.
  * Global memory.
  */
  */
+
+/**
+ * amdgpu_ttm_mem_global_init - Initialize and acquire reference to
+ * memory object
+ *
+ * @ref: Object for initialization.
+ *
+ * This is called by drm_global_item_ref() when an object is being
+ * initialized.
+ */
 static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref)
 static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref)
 {
 {
 	return ttm_mem_global_init(ref->object);
 	return ttm_mem_global_init(ref->object);
 }
 }
 
 
+/**
+ * amdgpu_ttm_mem_global_release - Drop reference to a memory object
+ *
+ * @ref: Object being removed
+ *
+ * This is called by drm_global_item_unref() when an object is being
+ * released.
+ */
 static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
 static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
 {
 {
 	ttm_mem_global_release(ref->object);
 	ttm_mem_global_release(ref->object);
 }
 }
 
 
+/**
+ * amdgpu_ttm_global_init - Initialize global TTM memory reference
+ * 							structures.
+ *
+ * @adev:  	AMDGPU device for which the global structures need to be
+ *			registered.
+ *
+ * This is called as part of the AMDGPU ttm init from amdgpu_ttm_init()
+ * during bring up.
+ */
 static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
 static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
 {
 {
 	struct drm_global_reference *global_ref;
 	struct drm_global_reference *global_ref;
@@ -80,7 +108,9 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
 	struct drm_sched_rq *rq;
 	struct drm_sched_rq *rq;
 	int r;
 	int r;
 
 
+	/* ensure reference is false in case init fails */
 	adev->mman.mem_global_referenced = false;
 	adev->mman.mem_global_referenced = false;
+
 	global_ref = &adev->mman.mem_global_ref;
 	global_ref = &adev->mman.mem_global_ref;
 	global_ref->global_type = DRM_GLOBAL_TTM_MEM;
 	global_ref->global_type = DRM_GLOBAL_TTM_MEM;
 	global_ref->size = sizeof(struct ttm_mem_global);
 	global_ref->size = sizeof(struct ttm_mem_global);
@@ -111,7 +141,7 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
 	ring = adev->mman.buffer_funcs_ring;
 	ring = adev->mman.buffer_funcs_ring;
 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
 	r = drm_sched_entity_init(&ring->sched, &adev->mman.entity,
 	r = drm_sched_entity_init(&ring->sched, &adev->mman.entity,
-				  rq, amdgpu_sched_jobs, NULL);
+				  rq, NULL);
 	if (r) {
 	if (r) {
 		DRM_ERROR("Failed setting up TTM BO move run queue.\n");
 		DRM_ERROR("Failed setting up TTM BO move run queue.\n");
 		goto error_entity;
 		goto error_entity;
@@ -146,6 +176,18 @@ static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
 	return 0;
 	return 0;
 }
 }
 
 
+/**
+ * amdgpu_init_mem_type - 	Initialize a memory manager for a specific
+ * 							type of memory request.
+ *
+ * @bdev:	The TTM BO device object (contains a reference to
+ * 			amdgpu_device)
+ * @type:	The type of memory requested
+ * @man:
+ *
+ * This is called by ttm_bo_init_mm() when a buffer object is being
+ * initialized.
+ */
 static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 				struct ttm_mem_type_manager *man)
 				struct ttm_mem_type_manager *man)
 {
 {
@@ -161,6 +203,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 		man->default_caching = TTM_PL_FLAG_CACHED;
 		man->default_caching = TTM_PL_FLAG_CACHED;
 		break;
 		break;
 	case TTM_PL_TT:
 	case TTM_PL_TT:
+		/* GTT memory  */
 		man->func = &amdgpu_gtt_mgr_func;
 		man->func = &amdgpu_gtt_mgr_func;
 		man->gpu_offset = adev->gmc.gart_start;
 		man->gpu_offset = adev->gmc.gart_start;
 		man->available_caching = TTM_PL_MASK_CACHING;
 		man->available_caching = TTM_PL_MASK_CACHING;
@@ -193,6 +236,14 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 	return 0;
 	return 0;
 }
 }
 
 
+/**
+ * amdgpu_evict_flags - Compute placement flags
+ *
+ * @bo: The buffer object to evict
+ * @placement: Possible destination(s) for evicted BO
+ *
+ * Fill in placement data when ttm_bo_evict() is called
+ */
 static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 				struct ttm_placement *placement)
 				struct ttm_placement *placement)
 {
 {
@@ -204,12 +255,14 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 		.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
 		.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
 	};
 	};
 
 
+	/* Don't handle scatter gather BOs */
 	if (bo->type == ttm_bo_type_sg) {
 	if (bo->type == ttm_bo_type_sg) {
 		placement->num_placement = 0;
 		placement->num_placement = 0;
 		placement->num_busy_placement = 0;
 		placement->num_busy_placement = 0;
 		return;
 		return;
 	}
 	}
 
 
+	/* Object isn't an AMDGPU object so ignore */
 	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
 	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
 		placement->placement = &placements;
 		placement->placement = &placements;
 		placement->busy_placement = &placements;
 		placement->busy_placement = &placements;
@@ -217,26 +270,16 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 		placement->num_busy_placement = 1;
 		placement->num_busy_placement = 1;
 		return;
 		return;
 	}
 	}
+
 	abo = ttm_to_amdgpu_bo(bo);
 	abo = ttm_to_amdgpu_bo(bo);
 	switch (bo->mem.mem_type) {
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
 	case TTM_PL_VRAM:
 		if (!adev->mman.buffer_funcs_enabled) {
 		if (!adev->mman.buffer_funcs_enabled) {
+			/* Move to system memory */
 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
 		} else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
 		} else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
-			   !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
-			unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
-			struct drm_mm_node *node = bo->mem.mm_node;
-			unsigned long pages_left;
-
-			for (pages_left = bo->mem.num_pages;
-			     pages_left;
-			     pages_left -= node->size, node++) {
-				if (node->start < fpfn)
-					break;
-			}
-
-			if (!pages_left)
-				goto gtt;
+			   !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
+			   amdgpu_bo_in_cpu_visible_vram(abo)) {
 
 
 			/* Try evicting to the CPU inaccessible part of VRAM
 			/* Try evicting to the CPU inaccessible part of VRAM
 			 * first, but only set GTT as busy placement, so this
 			 * first, but only set GTT as busy placement, so this
@@ -245,12 +288,12 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 			 */
 			 */
 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
 							 AMDGPU_GEM_DOMAIN_GTT);
 							 AMDGPU_GEM_DOMAIN_GTT);
-			abo->placements[0].fpfn = fpfn;
+			abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
 			abo->placements[0].lpfn = 0;
 			abo->placements[0].lpfn = 0;
 			abo->placement.busy_placement = &abo->placements[1];
 			abo->placement.busy_placement = &abo->placements[1];
 			abo->placement.num_busy_placement = 1;
 			abo->placement.num_busy_placement = 1;
 		} else {
 		} else {
-gtt:
+			/* Move to GTT memory */
 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
 		}
 		}
 		break;
 		break;
@@ -261,6 +304,15 @@ gtt:
 	*placement = abo->placement;
 	*placement = abo->placement;
 }
 }
 
 
+/**
+ * amdgpu_verify_access - Verify access for a mmap call
+ *
+ * @bo:		The buffer object to map
+ * @filp:	The file pointer from the process performing the mmap
+ *
+ * This is called by ttm_bo_mmap() to verify whether a process
+ * has the right to mmap a BO to their process space.
+ */
 static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 {
 {
 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
@@ -278,6 +330,15 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 					  filp->private_data);
 					  filp->private_data);
 }
 }
 
 
+/**
+ * amdgpu_move_null - Register memory for a buffer object
+ *
+ * @bo:			The bo to assign the memory to
+ * @new_mem:	The memory to be assigned.
+ *
+ * Assign the memory from new_mem to the memory of the buffer object
+ * bo.
+ */
 static void amdgpu_move_null(struct ttm_buffer_object *bo,
 static void amdgpu_move_null(struct ttm_buffer_object *bo,
 			     struct ttm_mem_reg *new_mem)
 			     struct ttm_mem_reg *new_mem)
 {
 {
@@ -288,6 +349,10 @@ static void amdgpu_move_null(struct ttm_buffer_object *bo,
 	new_mem->mm_node = NULL;
 	new_mem->mm_node = NULL;
 }
 }
 
 
+/**
+ * amdgpu_mm_node_addr -	Compute the GPU relative offset of a GTT
+ * 							buffer.
+ */
 static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
 static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
 				    struct drm_mm_node *mm_node,
 				    struct drm_mm_node *mm_node,
 				    struct ttm_mem_reg *mem)
 				    struct ttm_mem_reg *mem)
@@ -302,9 +367,10 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
 }
 }
 
 
 /**
 /**
- * amdgpu_find_mm_node - Helper function finds the drm_mm_node
- *  corresponding to @offset. It also modifies the offset to be
- *  within the drm_mm_node returned
+ * amdgpu_find_mm_node -	Helper function finds the drm_mm_node
+ *  						corresponding to @offset. It also modifies
+ * 							the offset to be within the drm_mm_node
+ * 							returned
  */
  */
 static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
 static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
 					       unsigned long *offset)
 					       unsigned long *offset)
@@ -443,7 +509,12 @@ error:
 	return r;
 	return r;
 }
 }
 
 
-
+/**
+ * amdgpu_move_blit - Copy an entire buffer to another buffer
+ *
+ * This is a helper called by amdgpu_bo_move() and
+ * amdgpu_move_vram_ram() to help move buffers to and from VRAM.
+ */
 static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 			    bool evict, bool no_wait_gpu,
 			    bool evict, bool no_wait_gpu,
 			    struct ttm_mem_reg *new_mem,
 			    struct ttm_mem_reg *new_mem,
@@ -478,6 +549,11 @@ error:
 	return r;
 	return r;
 }
 }
 
 
+/**
+ * amdgpu_move_vram_ram - Copy VRAM buffer to RAM buffer
+ *
+ * Called by amdgpu_bo_move().
+ */
 static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
 static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
 				struct ttm_operation_ctx *ctx,
 				struct ttm_operation_ctx *ctx,
 				struct ttm_mem_reg *new_mem)
 				struct ttm_mem_reg *new_mem)
@@ -490,6 +566,8 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
 	int r;
 	int r;
 
 
 	adev = amdgpu_ttm_adev(bo->bdev);
 	adev = amdgpu_ttm_adev(bo->bdev);
+
+	/* create space/pages for new_mem in GTT space */
 	tmp_mem = *new_mem;
 	tmp_mem = *new_mem;
 	tmp_mem.mm_node = NULL;
 	tmp_mem.mm_node = NULL;
 	placement.num_placement = 1;
 	placement.num_placement = 1;
@@ -504,25 +582,36 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
 		return r;
 		return r;
 	}
 	}
 
 
+	/* set caching flags */
 	r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
 	r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
 	if (unlikely(r)) {
 	if (unlikely(r)) {
 		goto out_cleanup;
 		goto out_cleanup;
 	}
 	}
 
 
+	/* Bind the memory to the GTT space */
 	r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx);
 	r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx);
 	if (unlikely(r)) {
 	if (unlikely(r)) {
 		goto out_cleanup;
 		goto out_cleanup;
 	}
 	}
+
+	/* blit VRAM to GTT */
 	r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem);
 	r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem);
 	if (unlikely(r)) {
 	if (unlikely(r)) {
 		goto out_cleanup;
 		goto out_cleanup;
 	}
 	}
+
+	/* move BO (in tmp_mem) to new_mem */
 	r = ttm_bo_move_ttm(bo, ctx, new_mem);
 	r = ttm_bo_move_ttm(bo, ctx, new_mem);
 out_cleanup:
 out_cleanup:
 	ttm_bo_mem_put(bo, &tmp_mem);
 	ttm_bo_mem_put(bo, &tmp_mem);
 	return r;
 	return r;
 }
 }
 
 
+/**
+ * amdgpu_move_ram_vram - Copy buffer from RAM to VRAM
+ *
+ * Called by amdgpu_bo_move().
+ */
 static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
 static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
 				struct ttm_operation_ctx *ctx,
 				struct ttm_operation_ctx *ctx,
 				struct ttm_mem_reg *new_mem)
 				struct ttm_mem_reg *new_mem)
@@ -535,6 +624,8 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
 	int r;
 	int r;
 
 
 	adev = amdgpu_ttm_adev(bo->bdev);
 	adev = amdgpu_ttm_adev(bo->bdev);
+
+	/* make space in GTT for old_mem buffer */
 	tmp_mem = *new_mem;
 	tmp_mem = *new_mem;
 	tmp_mem.mm_node = NULL;
 	tmp_mem.mm_node = NULL;
 	placement.num_placement = 1;
 	placement.num_placement = 1;
@@ -548,10 +639,14 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
 	if (unlikely(r)) {
 	if (unlikely(r)) {
 		return r;
 		return r;
 	}
 	}
+
+	/* move/bind old memory to GTT space */
 	r = ttm_bo_move_ttm(bo, ctx, &tmp_mem);
 	r = ttm_bo_move_ttm(bo, ctx, &tmp_mem);
 	if (unlikely(r)) {
 	if (unlikely(r)) {
 		goto out_cleanup;
 		goto out_cleanup;
 	}
 	}
+
+	/* copy to VRAM */
 	r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem);
 	r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem);
 	if (unlikely(r)) {
 	if (unlikely(r)) {
 		goto out_cleanup;
 		goto out_cleanup;
@@ -561,6 +656,11 @@ out_cleanup:
 	return r;
 	return r;
 }
 }
 
 
+/**
+ * amdgpu_bo_move - Move a buffer object to a new memory location
+ *
+ * Called by ttm_bo_handle_move_mem()
+ */
 static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 			  struct ttm_operation_ctx *ctx,
 			  struct ttm_operation_ctx *ctx,
 			  struct ttm_mem_reg *new_mem)
 			  struct ttm_mem_reg *new_mem)
@@ -626,6 +726,11 @@ memcpy:
 	return 0;
 	return 0;
 }
 }
 
 
+/**
+ * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault
+ *
+ * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault()
+ */
 static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 {
 {
 	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
 	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
@@ -695,7 +800,7 @@ struct amdgpu_ttm_tt {
 	struct ttm_dma_tt	ttm;
 	struct ttm_dma_tt	ttm;
 	u64			offset;
 	u64			offset;
 	uint64_t		userptr;
 	uint64_t		userptr;
-	struct mm_struct	*usermm;
+	struct task_struct	*usertask;
 	uint32_t		userflags;
 	uint32_t		userflags;
 	spinlock_t              guptasklock;
 	spinlock_t              guptasklock;
 	struct list_head        guptasks;
 	struct list_head        guptasks;
@@ -703,17 +808,29 @@ struct amdgpu_ttm_tt {
 	uint32_t		last_set_pages;
 	uint32_t		last_set_pages;
 };
 };
 
 
+/**
+ * amdgpu_ttm_tt_get_user_pages - 	Pin pages of memory pointed to
+ * 									by a USERPTR pointer to memory
+ *
+ * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
+ * This provides a wrapper around the get_user_pages() call to provide
+ * device accessible pages that back user memory.
+ */
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+	struct mm_struct *mm = gtt->usertask->mm;
 	unsigned int flags = 0;
 	unsigned int flags = 0;
 	unsigned pinned = 0;
 	unsigned pinned = 0;
 	int r;
 	int r;
 
 
+	if (!mm) /* Happens during process shutdown */
+		return -ESRCH;
+
 	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
 	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
 		flags |= FOLL_WRITE;
 		flags |= FOLL_WRITE;
 
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&mm->mmap_sem);
 
 
 	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
 	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
 		/* check that we only use anonymous memory
 		/* check that we only use anonymous memory
@@ -721,13 +838,14 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
 		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
 		struct vm_area_struct *vma;
 		struct vm_area_struct *vma;
 
 
-		vma = find_vma(gtt->usermm, gtt->userptr);
+		vma = find_vma(mm, gtt->userptr);
 		if (!vma || vma->vm_file || vma->vm_end < end) {
 		if (!vma || vma->vm_file || vma->vm_end < end) {
-			up_read(&current->mm->mmap_sem);
+			up_read(&mm->mmap_sem);
 			return -EPERM;
 			return -EPERM;
 		}
 		}
 	}
 	}
 
 
+	/* loop enough times using contiguous pages of memory */
 	do {
 	do {
 		unsigned num_pages = ttm->num_pages - pinned;
 		unsigned num_pages = ttm->num_pages - pinned;
 		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
 		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
@@ -739,7 +857,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 		list_add(&guptask.list, &gtt->guptasks);
 		list_add(&guptask.list, &gtt->guptasks);
 		spin_unlock(&gtt->guptasklock);
 		spin_unlock(&gtt->guptasklock);
 
 
-		r = get_user_pages(userptr, num_pages, flags, p, NULL);
+		if (mm == current->mm)
+			r = get_user_pages(userptr, num_pages, flags, p, NULL);
+		else
+			r = get_user_pages_remote(gtt->usertask,
+					mm, userptr, num_pages,
+					flags, p, NULL, NULL);
 
 
 		spin_lock(&gtt->guptasklock);
 		spin_lock(&gtt->guptasklock);
 		list_del(&guptask.list);
 		list_del(&guptask.list);
@@ -752,15 +875,23 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 
 
 	} while (pinned < ttm->num_pages);
 	} while (pinned < ttm->num_pages);
 
 
-	up_read(&current->mm->mmap_sem);
+	up_read(&mm->mmap_sem);
 	return 0;
 	return 0;
 
 
 release_pages:
 release_pages:
 	release_pages(pages, pinned);
 	release_pages(pages, pinned);
-	up_read(&current->mm->mmap_sem);
+	up_read(&mm->mmap_sem);
 	return r;
 	return r;
 }
 }
 
 
+/**
+ * amdgpu_ttm_tt_set_user_pages - 	Copy pages in, putting old pages
+ * 									as necessary.
+ *
+ * Called by amdgpu_cs_list_validate().  This creates the page list
+ * that backs user memory and will ultimately be mapped into the device
+ * address space.
+ */
 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -775,6 +906,11 @@ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
 	}
 	}
 }
 }
 
 
+/**
+ * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty
+ *
+ * Called while unpinning userptr pages
+ */
 void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
 void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
 {
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -793,7 +929,12 @@ void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
 	}
 	}
 }
 }
 
 
-/* prepare the sg table with the user pages */
+/**
+ * amdgpu_ttm_tt_pin_userptr - 	prepare the sg table with the
+ * 								user pages
+ *
+ * Called by amdgpu_ttm_backend_bind()
+ **/
 static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 {
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
@@ -805,17 +946,20 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 	enum dma_data_direction direction = write ?
 	enum dma_data_direction direction = write ?
 		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
 		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
 
 
+	/* Allocate an SG array and squash pages into it */
 	r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
 	r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
 				      ttm->num_pages << PAGE_SHIFT,
 				      ttm->num_pages << PAGE_SHIFT,
 				      GFP_KERNEL);
 				      GFP_KERNEL);
 	if (r)
 	if (r)
 		goto release_sg;
 		goto release_sg;
 
 
+	/* Map SG to device */
 	r = -ENOMEM;
 	r = -ENOMEM;
 	nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
 	nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
 	if (nents != ttm->sg->nents)
 	if (nents != ttm->sg->nents)
 		goto release_sg;
 		goto release_sg;
 
 
+	/* convert SG to linear array of pages and dma addresses */
 	drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
 	drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
 					 gtt->ttm.dma_address, ttm->num_pages);
 					 gtt->ttm.dma_address, ttm->num_pages);
 
 
@@ -826,6 +970,9 @@ release_sg:
 	return r;
 	return r;
 }
 }
 
 
+/**
+ * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages
+ */
 static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 {
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
@@ -839,14 +986,60 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 	if (!ttm->sg->sgl)
 	if (!ttm->sg->sgl)
 		return;
 		return;
 
 
-	/* free the sg table and pages again */
+	/* unmap the pages mapped to the device */
 	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
 	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
 
 
+	/* mark the pages as dirty */
 	amdgpu_ttm_tt_mark_user_pages(ttm);
 	amdgpu_ttm_tt_mark_user_pages(ttm);
 
 
 	sg_free_table(ttm->sg);
 	sg_free_table(ttm->sg);
 }
 }
 
 
+int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
+				struct ttm_buffer_object *tbo,
+				uint64_t flags)
+{
+	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
+	struct ttm_tt *ttm = tbo->ttm;
+	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+	int r;
+
+	if (abo->flags & AMDGPU_GEM_CREATE_MQD_GFX9) {
+		uint64_t page_idx = 1;
+
+		r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
+				ttm->pages, gtt->ttm.dma_address, flags);
+		if (r)
+			goto gart_bind_fail;
+
+		/* Patch mtype of the second part BO */
+		flags &=  ~AMDGPU_PTE_MTYPE_MASK;
+		flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC);
+
+		r = amdgpu_gart_bind(adev,
+				gtt->offset + (page_idx << PAGE_SHIFT),
+				ttm->num_pages - page_idx,
+				&ttm->pages[page_idx],
+				&(gtt->ttm.dma_address[page_idx]), flags);
+	} else {
+		r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
+				     ttm->pages, gtt->ttm.dma_address, flags);
+	}
+
+gart_bind_fail:
+	if (r)
+		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
+			  ttm->num_pages, gtt->offset);
+
+	return r;
+}
+
+/**
+ * amdgpu_ttm_backend_bind - Bind GTT memory
+ *
+ * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem().
+ * This handles binding GTT memory to the device address space.
+ */
 static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
 static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
 				   struct ttm_mem_reg *bo_mem)
 				   struct ttm_mem_reg *bo_mem)
 {
 {
@@ -877,7 +1070,10 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
 		return 0;
 		return 0;
 	}
 	}
 
 
+	/* compute PTE flags relevant to this BO memory */
 	flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
 	flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
+
+	/* bind pages into GART page tables */
 	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
 	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
 	r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
 	r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
 		ttm->pages, gtt->ttm.dma_address, flags);
 		ttm->pages, gtt->ttm.dma_address, flags);
@@ -888,6 +1084,9 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
 	return r;
 	return r;
 }
 }
 
 
+/**
+ * amdgpu_ttm_alloc_gart - Allocate GART memory for buffer object
+ */
 int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
 int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
 {
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
@@ -903,6 +1102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
 	    amdgpu_gtt_mgr_has_gart_addr(&bo->mem))
 	    amdgpu_gtt_mgr_has_gart_addr(&bo->mem))
 		return 0;
 		return 0;
 
 
+	/* allocate GTT space */
 	tmp = bo->mem;
 	tmp = bo->mem;
 	tmp.mm_node = NULL;
 	tmp.mm_node = NULL;
 	placement.num_placement = 1;
 	placement.num_placement = 1;
@@ -918,10 +1118,12 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
 	if (unlikely(r))
 	if (unlikely(r))
 		return r;
 		return r;
 
 
+	/* compute PTE flags for this buffer object */
 	flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
 	flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
+
+	/* Bind pages */
 	gtt->offset = (u64)tmp.start << PAGE_SHIFT;
 	gtt->offset = (u64)tmp.start << PAGE_SHIFT;
-	r = amdgpu_gart_bind(adev, gtt->offset, bo->ttm->num_pages,
-			     bo->ttm->pages, gtt->ttm.dma_address, flags);
+	r = amdgpu_ttm_gart_bind(adev, bo, flags);
 	if (unlikely(r)) {
 	if (unlikely(r)) {
 		ttm_bo_mem_put(bo, &tmp);
 		ttm_bo_mem_put(bo, &tmp);
 		return r;
 		return r;
@@ -935,31 +1137,40 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
 	return 0;
 	return 0;
 }
 }
 
 
+/**
+ * amdgpu_ttm_recover_gart - Rebind GTT pages
+ *
+ * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to
+ * rebind GTT pages during a GPU reset.
+ */
 int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
 int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
 {
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
-	struct amdgpu_ttm_tt *gtt = (void *)tbo->ttm;
 	uint64_t flags;
 	uint64_t flags;
 	int r;
 	int r;
 
 
-	if (!gtt)
+	if (!tbo->ttm)
 		return 0;
 		return 0;
 
 
-	flags = amdgpu_ttm_tt_pte_flags(adev, &gtt->ttm.ttm, &tbo->mem);
-	r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages,
-			     gtt->ttm.ttm.pages, gtt->ttm.dma_address, flags);
-	if (r)
-		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
-			  gtt->ttm.ttm.num_pages, gtt->offset);
+	flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, &tbo->mem);
+	r = amdgpu_ttm_gart_bind(adev, tbo, flags);
+
 	return r;
 	return r;
 }
 }
 
 
+/**
+ * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages
+ *
+ * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and
+ * ttm_tt_destroy().
+ */
 static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
 static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
 {
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	int r;
 	int r;
 
 
+	/* if the pages have userptr pinning then clear that first */
 	if (gtt->userptr)
 	if (gtt->userptr)
 		amdgpu_ttm_tt_unpin_userptr(ttm);
 		amdgpu_ttm_tt_unpin_userptr(ttm);
 
 
@@ -978,6 +1189,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
 {
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 
 
+	if (gtt->usertask)
+		put_task_struct(gtt->usertask);
+
 	ttm_dma_tt_fini(&gtt->ttm);
 	ttm_dma_tt_fini(&gtt->ttm);
 	kfree(gtt);
 	kfree(gtt);
 }
 }
@@ -988,6 +1202,13 @@ static struct ttm_backend_func amdgpu_backend_func = {
 	.destroy = &amdgpu_ttm_backend_destroy,
 	.destroy = &amdgpu_ttm_backend_destroy,
 };
 };
 
 
+/**
+ * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
+ *
+ * @bo: The buffer object to create a GTT ttm_tt object around
+ *
+ * Called by ttm_tt_create().
+ */
 static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
 static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
 					   uint32_t page_flags)
 					   uint32_t page_flags)
 {
 {
@@ -1001,6 +1222,8 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
 		return NULL;
 		return NULL;
 	}
 	}
 	gtt->ttm.ttm.func = &amdgpu_backend_func;
 	gtt->ttm.ttm.func = &amdgpu_backend_func;
+
+	/* allocate space for the uninitialized page entries */
 	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
 	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
 		kfree(gtt);
 		kfree(gtt);
 		return NULL;
 		return NULL;
@@ -1008,6 +1231,12 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
 	return &gtt->ttm.ttm;
 	return &gtt->ttm.ttm;
 }
 }
 
 
+/**
+ * amdgpu_ttm_tt_populate - Map GTT pages visible to the device
+ *
+ * Map the pages of a ttm_tt object to an address space visible
+ * to the underlying device.
+ */
 static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
 static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
 			struct ttm_operation_ctx *ctx)
 			struct ttm_operation_ctx *ctx)
 {
 {
@@ -1015,6 +1244,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
 	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
 
 
+	/* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
 	if (gtt && gtt->userptr) {
 	if (gtt && gtt->userptr) {
 		ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
 		ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
 		if (!ttm->sg)
 		if (!ttm->sg)
@@ -1039,9 +1269,17 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
 	}
 	}
 #endif
 #endif
 
 
+	/* fall back to generic helper to populate the page array
+	 * and map them to the device */
 	return ttm_populate_and_map_pages(adev->dev, &gtt->ttm, ctx);
 	return ttm_populate_and_map_pages(adev->dev, &gtt->ttm, ctx);
 }
 }
 
 
+/**
+ * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays
+ *
+ * Unmaps pages of a ttm_tt object from the device address space and
+ * unpopulates the page array backing it.
+ */
 static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
 static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
 {
 {
 	struct amdgpu_device *adev;
 	struct amdgpu_device *adev;
@@ -1067,9 +1305,21 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
 	}
 	}
 #endif
 #endif
 
 
+	/* fall back to generic helper to unmap and unpopulate array */
 	ttm_unmap_and_unpopulate_pages(adev->dev, &gtt->ttm);
 	ttm_unmap_and_unpopulate_pages(adev->dev, &gtt->ttm);
 }
 }
 
 
+/**
+ * amdgpu_ttm_tt_set_userptr -	Initialize userptr GTT ttm_tt
+ * 								for the current task
+ *
+ * @ttm: The ttm_tt object to bind this userptr object to
+ * @addr:  The address in the current tasks VM space to use
+ * @flags: Requirements of userptr object.
+ *
+ * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages
+ * to current task
+ */
 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 			      uint32_t flags)
 			      uint32_t flags)
 {
 {
@@ -1079,8 +1329,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 		return -EINVAL;
 		return -EINVAL;
 
 
 	gtt->userptr = addr;
 	gtt->userptr = addr;
-	gtt->usermm = current->mm;
 	gtt->userflags = flags;
 	gtt->userflags = flags;
+
+	if (gtt->usertask)
+		put_task_struct(gtt->usertask);
+	gtt->usertask = current->group_leader;
+	get_task_struct(gtt->usertask);
+
 	spin_lock_init(&gtt->guptasklock);
 	spin_lock_init(&gtt->guptasklock);
 	INIT_LIST_HEAD(&gtt->guptasks);
 	INIT_LIST_HEAD(&gtt->guptasks);
 	atomic_set(&gtt->mmu_invalidations, 0);
 	atomic_set(&gtt->mmu_invalidations, 0);
@@ -1089,6 +1344,9 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 	return 0;
 	return 0;
 }
 }
 
 
+/**
+ * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object
+ */
 struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
 struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
 {
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1096,9 +1354,18 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
 	if (gtt == NULL)
 	if (gtt == NULL)
 		return NULL;
 		return NULL;
 
 
-	return gtt->usermm;
+	if (gtt->usertask == NULL)
+		return NULL;
+
+	return gtt->usertask->mm;
 }
 }
 
 
+/**
+ * amdgpu_ttm_tt_affect_userptr -	Determine if a ttm_tt object lays
+ * 									inside an address range for the
+ * 									current task.
+ *
+ */
 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 				  unsigned long end)
 				  unsigned long end)
 {
 {
@@ -1109,10 +1376,16 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 	if (gtt == NULL || !gtt->userptr)
 	if (gtt == NULL || !gtt->userptr)
 		return false;
 		return false;
 
 
+	/* Return false if no part of the ttm_tt object lies within
+	 * the range
+	 */
 	size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
 	size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
 	if (gtt->userptr > end || gtt->userptr + size <= start)
 	if (gtt->userptr > end || gtt->userptr + size <= start)
 		return false;
 		return false;
 
 
+	/* Search the lists of tasks that hold this mapping and see
+	 * if current is one of them.  If it is return false.
+	 */
 	spin_lock(&gtt->guptasklock);
 	spin_lock(&gtt->guptasklock);
 	list_for_each_entry(entry, &gtt->guptasks, list) {
 	list_for_each_entry(entry, &gtt->guptasks, list) {
 		if (entry->task == current) {
 		if (entry->task == current) {
@@ -1127,6 +1400,10 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 	return true;
 	return true;
 }
 }
 
 
+/**
+ * amdgpu_ttm_tt_userptr_invalidated -	Has the ttm_tt object been
+ * 										invalidated?
+ */
 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
 				       int *last_invalidated)
 				       int *last_invalidated)
 {
 {
@@ -1137,6 +1414,12 @@ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
 	return prev_invalidated != *last_invalidated;
 	return prev_invalidated != *last_invalidated;
 }
 }
 
 
+/**
+ * amdgpu_ttm_tt_userptr_needs_pages -	Have the pages backing this
+ * 										ttm_tt object been invalidated
+ * 										since the last time they've
+ * 										been set?
+ */
 bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
 bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
 {
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1147,6 +1430,9 @@ bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
 	return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
 	return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
 }
 }
 
 
+/**
+ * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only?
+ */
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
 {
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1157,6 +1443,12 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
 	return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
 	return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
 }
 }
 
 
+/**
+ * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
+ *
+ * @ttm: The ttm_tt object to compute the flags for
+ * @mem: The memory registry backing this ttm_tt object
+ */
 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
 				 struct ttm_mem_reg *mem)
 				 struct ttm_mem_reg *mem)
 {
 {
@@ -1181,6 +1473,16 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
 	return flags;
 	return flags;
 }
 }
 
 
+/**
+ * amdgpu_ttm_bo_eviction_valuable -	Check to see if we can evict
+ * 										a buffer object.
+ *
+ * Return true if eviction is sensible.  Called by
+ * ttm_mem_evict_first() on behalf of ttm_bo_mem_force_space()
+ * which tries to evict buffer objects until it can find space
+ * for a new object and by ttm_bo_force_list_clean() which is
+ * used to clean out a memory space.
+ */
 static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 					    const struct ttm_place *place)
 					    const struct ttm_place *place)
 {
 {
@@ -1227,6 +1529,19 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 	return ttm_bo_eviction_valuable(bo, place);
 	return ttm_bo_eviction_valuable(bo, place);
 }
 }
 
 
+/**
+ * amdgpu_ttm_access_memory -	Read or Write memory that backs a
+ * 								buffer object.
+ *
+ * @bo:  The buffer object to read/write
+ * @offset:  Offset into buffer object
+ * @buf:  Secondary buffer to write/read from
+ * @len: Length in bytes of access
+ * @write:  true if writing
+ *
+ * This is used to access VRAM that backs a buffer object via MMIO
+ * access for debugging purposes.
+ */
 static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 				    unsigned long offset,
 				    unsigned long offset,
 				    void *buf, int len, int write)
 				    void *buf, int len, int write)
@@ -1329,6 +1644,7 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
 static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
 static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
 {
 {
 	struct ttm_operation_ctx ctx = { false, false };
 	struct ttm_operation_ctx ctx = { false, false };
+	struct amdgpu_bo_param bp;
 	int r = 0;
 	int r = 0;
 	int i;
 	int i;
 	u64 vram_size = adev->gmc.visible_vram_size;
 	u64 vram_size = adev->gmc.visible_vram_size;
@@ -1336,17 +1652,21 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
 	u64 size = adev->fw_vram_usage.size;
 	u64 size = adev->fw_vram_usage.size;
 	struct amdgpu_bo *bo;
 	struct amdgpu_bo *bo;
 
 
+	memset(&bp, 0, sizeof(bp));
+	bp.size = adev->fw_vram_usage.size;
+	bp.byte_align = PAGE_SIZE;
+	bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+	bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+		AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+	bp.type = ttm_bo_type_kernel;
+	bp.resv = NULL;
 	adev->fw_vram_usage.va = NULL;
 	adev->fw_vram_usage.va = NULL;
 	adev->fw_vram_usage.reserved_bo = NULL;
 	adev->fw_vram_usage.reserved_bo = NULL;
 
 
 	if (adev->fw_vram_usage.size > 0 &&
 	if (adev->fw_vram_usage.size > 0 &&
 		adev->fw_vram_usage.size <= vram_size) {
 		adev->fw_vram_usage.size <= vram_size) {
 
 
-		r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE,
-				     AMDGPU_GEM_DOMAIN_VRAM,
-				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-				     ttm_bo_type_kernel, NULL,
+		r = amdgpu_bo_create(adev, &bp,
 				     &adev->fw_vram_usage.reserved_bo);
 				     &adev->fw_vram_usage.reserved_bo);
 		if (r)
 		if (r)
 			goto error_create;
 			goto error_create;
@@ -1398,13 +1718,22 @@ error_create:
 	adev->fw_vram_usage.reserved_bo = NULL;
 	adev->fw_vram_usage.reserved_bo = NULL;
 	return r;
 	return r;
 }
 }
-
+/**
+ * amdgpu_ttm_init -	Init the memory management (ttm) as well as
+ * 						various gtt/vram related fields.
+ *
+ * This initializes all of the memory space pools that the TTM layer
+ * will need such as the GTT space (system memory mapped to the device),
+ * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which
+ * can be mapped per VMID.
+ */
 int amdgpu_ttm_init(struct amdgpu_device *adev)
 int amdgpu_ttm_init(struct amdgpu_device *adev)
 {
 {
 	uint64_t gtt_size;
 	uint64_t gtt_size;
 	int r;
 	int r;
 	u64 vis_vram_limit;
 	u64 vis_vram_limit;
 
 
+	/* initialize global references for vram/gtt */
 	r = amdgpu_ttm_global_init(adev);
 	r = amdgpu_ttm_global_init(adev);
 	if (r) {
 	if (r) {
 		return r;
 		return r;
@@ -1425,6 +1754,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	/* We opt to avoid OOM on system pages allocations */
 	/* We opt to avoid OOM on system pages allocations */
 	adev->mman.bdev.no_retry = true;
 	adev->mman.bdev.no_retry = true;
 
 
+	/* Initialize VRAM pool with all of VRAM divided into pages */
 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
 				adev->gmc.real_vram_size >> PAGE_SHIFT);
 				adev->gmc.real_vram_size >> PAGE_SHIFT);
 	if (r) {
 	if (r) {
@@ -1454,15 +1784,23 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 		return r;
 		return r;
 	}
 	}
 
 
-	r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
-				    AMDGPU_GEM_DOMAIN_VRAM,
-				    &adev->stolen_vga_memory,
-				    NULL, NULL);
-	if (r)
-		return r;
+	/* allocate memory as required for VGA
+	 * This is used for VGA emulation and pre-OS scanout buffers to
+	 * avoid display artifacts while transitioning between pre-OS
+	 * and driver.  */
+	if (adev->gmc.stolen_size) {
+		r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
+					    AMDGPU_GEM_DOMAIN_VRAM,
+					    &adev->stolen_vga_memory,
+					    NULL, NULL);
+		if (r)
+			return r;
+	}
 	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
 	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
 		 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
 		 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
 
 
+	/* Compute GTT size, either bsaed on 3/4th the size of RAM size
+	 * or whatever the user passed on module init */
 	if (amdgpu_gtt_size == -1) {
 	if (amdgpu_gtt_size == -1) {
 		struct sysinfo si;
 		struct sysinfo si;
 
 
@@ -1473,6 +1811,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	}
 	}
 	else
 	else
 		gtt_size = (uint64_t)amdgpu_gtt_size << 20;
 		gtt_size = (uint64_t)amdgpu_gtt_size << 20;
+
+	/* Initialize GTT memory pool */
 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
 	if (r) {
 	if (r) {
 		DRM_ERROR("Failed initializing GTT heap.\n");
 		DRM_ERROR("Failed initializing GTT heap.\n");
@@ -1481,6 +1821,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
 	DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
 		 (unsigned)(gtt_size / (1024 * 1024)));
 		 (unsigned)(gtt_size / (1024 * 1024)));
 
 
+	/* Initialize various on-chip memory pools */
 	adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;
 	adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;
 	adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
 	adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
 	adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT;
 	adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT;
@@ -1520,6 +1861,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 		}
 		}
 	}
 	}
 
 
+	/* Register debugfs entries for amdgpu_ttm */
 	r = amdgpu_ttm_debugfs_init(adev);
 	r = amdgpu_ttm_debugfs_init(adev);
 	if (r) {
 	if (r) {
 		DRM_ERROR("Failed to init debugfs\n");
 		DRM_ERROR("Failed to init debugfs\n");
@@ -1528,13 +1870,25 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	return 0;
 	return 0;
 }
 }
 
 
+/**
+ * amdgpu_ttm_late_init -	Handle any late initialization for
+ * 							amdgpu_ttm
+ */
+void amdgpu_ttm_late_init(struct amdgpu_device *adev)
+{
+	/* return the VGA stolen memory (if any) back to VRAM */
+	amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
+}
+
+/**
+ * amdgpu_ttm_fini - De-initialize the TTM memory pools
+ */
 void amdgpu_ttm_fini(struct amdgpu_device *adev)
 void amdgpu_ttm_fini(struct amdgpu_device *adev)
 {
 {
 	if (!adev->mman.initialized)
 	if (!adev->mman.initialized)
 		return;
 		return;
 
 
 	amdgpu_ttm_debugfs_fini(adev);
 	amdgpu_ttm_debugfs_fini(adev);
-	amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
 	amdgpu_ttm_fw_reserve_vram_fini(adev);
 	amdgpu_ttm_fw_reserve_vram_fini(adev);
 	if (adev->mman.aper_base_kaddr)
 	if (adev->mman.aper_base_kaddr)
 		iounmap(adev->mman.aper_base_kaddr);
 		iounmap(adev->mman.aper_base_kaddr);
@@ -1856,6 +2210,11 @@ static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
 #endif
 #endif
 };
 };
 
 
+/**
+ * amdgpu_ttm_vram_read - Linear read access to VRAM
+ *
+ * Accesses VRAM via MMIO for debugging purposes.
+ */
 static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
 static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
 				    size_t size, loff_t *pos)
 				    size_t size, loff_t *pos)
 {
 {
@@ -1895,6 +2254,11 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
 	return result;
 	return result;
 }
 }
 
 
+/**
+ * amdgpu_ttm_vram_write - Linear write access to VRAM
+ *
+ * Accesses VRAM via MMIO for debugging purposes.
+ */
 static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
 static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
 				    size_t size, loff_t *pos)
 				    size_t size, loff_t *pos)
 {
 {
@@ -1943,6 +2307,9 @@ static const struct file_operations amdgpu_ttm_vram_fops = {
 
 
 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
 
 
+/**
+ * amdgpu_ttm_gtt_read - Linear read access to GTT memory
+ */
 static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf,
 static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf,
 				   size_t size, loff_t *pos)
 				   size_t size, loff_t *pos)
 {
 {
@@ -1990,6 +2357,13 @@ static const struct file_operations amdgpu_ttm_gtt_fops = {
 
 
 #endif
 #endif
 
 
+/**
+ * amdgpu_iomem_read - Virtual read access to GPU mapped memory
+ *
+ * This function is used to read memory that has been mapped to the
+ * GPU and the known addresses are not physical addresses but instead
+ * bus addresses (e.g., what you'd put in an IB or ring buffer).
+ */
 static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
 static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
 				 size_t size, loff_t *pos)
 				 size_t size, loff_t *pos)
 {
 {
@@ -1998,6 +2372,7 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
 	ssize_t result = 0;
 	ssize_t result = 0;
 	int r;
 	int r;
 
 
+	/* retrieve the IOMMU domain if any for this device */
 	dom = iommu_get_domain_for_dev(adev->dev);
 	dom = iommu_get_domain_for_dev(adev->dev);
 
 
 	while (size) {
 	while (size) {
@@ -2010,6 +2385,10 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
 
 
 		bytes = bytes < size ? bytes : size;
 		bytes = bytes < size ? bytes : size;
 
 
+		/* Translate the bus address to a physical address.  If
+		 * the domain is NULL it means there is no IOMMU active
+		 * and the address translation is the identity
+		 */
 		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
 		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
 
 
 		pfn = addr >> PAGE_SHIFT;
 		pfn = addr >> PAGE_SHIFT;
@@ -2034,6 +2413,13 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
 	return result;
 	return result;
 }
 }
 
 
+/**
+ * amdgpu_iomem_write - Virtual write access to GPU mapped memory
+ *
+ * This function is used to write memory that has been mapped to the
+ * GPU and the known addresses are not physical addresses but instead
+ * bus addresses (e.g., what you'd put in an IB or ring buffer).
+ */
 static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
 static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
 				 size_t size, loff_t *pos)
 				 size_t size, loff_t *pos)
 {
 {

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

@@ -77,6 +77,7 @@ uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
 uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
 uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
 
 
 int amdgpu_ttm_init(struct amdgpu_device *adev);
 int amdgpu_ttm_init(struct amdgpu_device *adev);
+void amdgpu_ttm_late_init(struct amdgpu_device *adev);
 void amdgpu_ttm_fini(struct amdgpu_device *adev);
 void amdgpu_ttm_fini(struct amdgpu_device *adev);
 void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
 void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
 					bool enable);
 					bool enable);

+ 51 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c

@@ -161,8 +161,38 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr)
 			  le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes));
 			  le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes));
 		DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
 		DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
 			  le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
 			  le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
-		DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
-			  le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
+		DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n",
+			  le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes));
+		if (version_minor == 1) {
+			const struct rlc_firmware_header_v2_1 *v2_1 =
+				container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0);
+			DRM_DEBUG("reg_list_format_direct_reg_list_length: %u\n",
+				  le32_to_cpu(v2_1->reg_list_format_direct_reg_list_length));
+			DRM_DEBUG("save_restore_list_cntl_ucode_ver: %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_cntl_ucode_ver));
+			DRM_DEBUG("save_restore_list_cntl_feature_ver: %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_cntl_feature_ver));
+			DRM_DEBUG("save_restore_list_cntl_size_bytes %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_cntl_size_bytes));
+			DRM_DEBUG("save_restore_list_cntl_offset_bytes: %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_cntl_offset_bytes));
+			DRM_DEBUG("save_restore_list_gpm_ucode_ver: %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_gpm_ucode_ver));
+			DRM_DEBUG("save_restore_list_gpm_feature_ver: %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_gpm_feature_ver));
+			DRM_DEBUG("save_restore_list_gpm_size_bytes %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_gpm_size_bytes));
+			DRM_DEBUG("save_restore_list_gpm_offset_bytes: %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_gpm_offset_bytes));
+			DRM_DEBUG("save_restore_list_srm_ucode_ver: %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_srm_ucode_ver));
+			DRM_DEBUG("save_restore_list_srm_feature_ver: %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_srm_feature_ver));
+			DRM_DEBUG("save_restore_list_srm_size_bytes %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_srm_size_bytes));
+			DRM_DEBUG("save_restore_list_srm_offset_bytes: %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_srm_offset_bytes));
+		}
 	} else {
 	} else {
 		DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor);
 		DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor);
 	}
 	}
@@ -265,6 +295,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 		if (!load_type)
 		if (!load_type)
 			return AMDGPU_FW_LOAD_DIRECT;
 			return AMDGPU_FW_LOAD_DIRECT;
 		else
 		else
@@ -276,6 +307,8 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
 			return AMDGPU_FW_LOAD_DIRECT;
 			return AMDGPU_FW_LOAD_DIRECT;
 		else
 		else
 			return AMDGPU_FW_LOAD_PSP;
 			return AMDGPU_FW_LOAD_PSP;
+	case CHIP_VEGA20:
+		return AMDGPU_FW_LOAD_DIRECT;
 	default:
 	default:
 		DRM_ERROR("Unknown firmware load type\n");
 		DRM_ERROR("Unknown firmware load type\n");
 	}
 	}
@@ -307,7 +340,10 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
 	    (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
 	    (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
 	     ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 &&
 	     ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 &&
 	     ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT &&
 	     ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT &&
-	     ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT)) {
+	     ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT &&
+	     ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL &&
+	     ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM &&
+	     ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) {
 		ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
 		ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
 
 
 		memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
 		memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
@@ -329,6 +365,18 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
 					      le32_to_cpu(header->ucode_array_offset_bytes) +
 					      le32_to_cpu(header->ucode_array_offset_bytes) +
 					      le32_to_cpu(cp_hdr->jt_offset) * 4),
 					      le32_to_cpu(cp_hdr->jt_offset) * 4),
 		       ucode->ucode_size);
 		       ucode->ucode_size);
+	} else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) {
+		ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes;
+		memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl,
+		       ucode->ucode_size);
+	} else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM) {
+		ucode->ucode_size = adev->gfx.rlc.save_restore_list_gpm_size_bytes;
+		memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_gpm,
+		       ucode->ucode_size);
+	} else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) {
+		ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes;
+		memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_srm,
+		       ucode->ucode_size);
 	}
 	}
 
 
 	return 0;
 	return 0;

+ 22 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h

@@ -98,6 +98,24 @@ struct rlc_firmware_header_v2_0 {
 	uint32_t reg_list_separate_array_offset_bytes; /* payload offset from the start of the header */
 	uint32_t reg_list_separate_array_offset_bytes; /* payload offset from the start of the header */
 };
 };
 
 
+/* version_major=2, version_minor=1 */
+struct rlc_firmware_header_v2_1 {
+	struct rlc_firmware_header_v2_0 v2_0;
+	uint32_t reg_list_format_direct_reg_list_length; /* length of direct reg list format array */
+	uint32_t save_restore_list_cntl_ucode_ver;
+	uint32_t save_restore_list_cntl_feature_ver;
+	uint32_t save_restore_list_cntl_size_bytes;
+	uint32_t save_restore_list_cntl_offset_bytes;
+	uint32_t save_restore_list_gpm_ucode_ver;
+	uint32_t save_restore_list_gpm_feature_ver;
+	uint32_t save_restore_list_gpm_size_bytes;
+	uint32_t save_restore_list_gpm_offset_bytes;
+	uint32_t save_restore_list_srm_ucode_ver;
+	uint32_t save_restore_list_srm_feature_ver;
+	uint32_t save_restore_list_srm_size_bytes;
+	uint32_t save_restore_list_srm_offset_bytes;
+};
+
 /* version_major=1, version_minor=0 */
 /* version_major=1, version_minor=0 */
 struct sdma_firmware_header_v1_0 {
 struct sdma_firmware_header_v1_0 {
 	struct common_firmware_header header;
 	struct common_firmware_header header;
@@ -148,6 +166,7 @@ union amdgpu_firmware_header {
 	struct gfx_firmware_header_v1_0 gfx;
 	struct gfx_firmware_header_v1_0 gfx;
 	struct rlc_firmware_header_v1_0 rlc;
 	struct rlc_firmware_header_v1_0 rlc;
 	struct rlc_firmware_header_v2_0 rlc_v2_0;
 	struct rlc_firmware_header_v2_0 rlc_v2_0;
+	struct rlc_firmware_header_v2_1 rlc_v2_1;
 	struct sdma_firmware_header_v1_0 sdma;
 	struct sdma_firmware_header_v1_0 sdma;
 	struct sdma_firmware_header_v1_1 sdma_v1_1;
 	struct sdma_firmware_header_v1_1 sdma_v1_1;
 	struct gpu_info_firmware_header_v1_0 gpu_info;
 	struct gpu_info_firmware_header_v1_0 gpu_info;
@@ -168,6 +187,9 @@ enum AMDGPU_UCODE_ID {
 	AMDGPU_UCODE_ID_CP_MEC2,
 	AMDGPU_UCODE_ID_CP_MEC2,
 	AMDGPU_UCODE_ID_CP_MEC2_JT,
 	AMDGPU_UCODE_ID_CP_MEC2_JT,
 	AMDGPU_UCODE_ID_RLC_G,
 	AMDGPU_UCODE_ID_RLC_G,
+	AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
+	AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
+	AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,
 	AMDGPU_UCODE_ID_STORAGE,
 	AMDGPU_UCODE_ID_STORAGE,
 	AMDGPU_UCODE_ID_SMC,
 	AMDGPU_UCODE_ID_SMC,
 	AMDGPU_UCODE_ID_UVD,
 	AMDGPU_UCODE_ID_UVD,

+ 162 - 130
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c

@@ -66,15 +66,18 @@
 #define FIRMWARE_POLARIS10	"amdgpu/polaris10_uvd.bin"
 #define FIRMWARE_POLARIS10	"amdgpu/polaris10_uvd.bin"
 #define FIRMWARE_POLARIS11	"amdgpu/polaris11_uvd.bin"
 #define FIRMWARE_POLARIS11	"amdgpu/polaris11_uvd.bin"
 #define FIRMWARE_POLARIS12	"amdgpu/polaris12_uvd.bin"
 #define FIRMWARE_POLARIS12	"amdgpu/polaris12_uvd.bin"
+#define FIRMWARE_VEGAM		"amdgpu/vegam_uvd.bin"
 
 
 #define FIRMWARE_VEGA10		"amdgpu/vega10_uvd.bin"
 #define FIRMWARE_VEGA10		"amdgpu/vega10_uvd.bin"
 #define FIRMWARE_VEGA12		"amdgpu/vega12_uvd.bin"
 #define FIRMWARE_VEGA12		"amdgpu/vega12_uvd.bin"
+#define FIRMWARE_VEGA20		"amdgpu/vega20_uvd.bin"
 
 
-#define mmUVD_GPCOM_VCPU_DATA0_VEGA10 (0x03c4 + 0x7e00)
-#define mmUVD_GPCOM_VCPU_DATA1_VEGA10 (0x03c5 + 0x7e00)
-#define mmUVD_GPCOM_VCPU_CMD_VEGA10 (0x03c3 + 0x7e00)
-#define mmUVD_NO_OP_VEGA10 (0x03ff + 0x7e00)
-#define mmUVD_ENGINE_CNTL_VEGA10 (0x03c6 + 0x7e00)
+/* These are common relative offsets for all asics, from uvd_7_0_offset.h,  */
+#define UVD_GPCOM_VCPU_CMD		0x03c3
+#define UVD_GPCOM_VCPU_DATA0	0x03c4
+#define UVD_GPCOM_VCPU_DATA1	0x03c5
+#define UVD_NO_OP				0x03ff
+#define UVD_BASE_SI				0x3800
 
 
 /**
 /**
  * amdgpu_uvd_cs_ctx - Command submission parser context
  * amdgpu_uvd_cs_ctx - Command submission parser context
@@ -109,9 +112,11 @@ MODULE_FIRMWARE(FIRMWARE_STONEY);
 MODULE_FIRMWARE(FIRMWARE_POLARIS10);
 MODULE_FIRMWARE(FIRMWARE_POLARIS10);
 MODULE_FIRMWARE(FIRMWARE_POLARIS11);
 MODULE_FIRMWARE(FIRMWARE_POLARIS11);
 MODULE_FIRMWARE(FIRMWARE_POLARIS12);
 MODULE_FIRMWARE(FIRMWARE_POLARIS12);
+MODULE_FIRMWARE(FIRMWARE_VEGAM);
 
 
 MODULE_FIRMWARE(FIRMWARE_VEGA10);
 MODULE_FIRMWARE(FIRMWARE_VEGA10);
 MODULE_FIRMWARE(FIRMWARE_VEGA12);
 MODULE_FIRMWARE(FIRMWARE_VEGA12);
+MODULE_FIRMWARE(FIRMWARE_VEGA20);
 
 
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
 
 
@@ -123,9 +128,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 	const char *fw_name;
 	const char *fw_name;
 	const struct common_firmware_header *hdr;
 	const struct common_firmware_header *hdr;
 	unsigned version_major, version_minor, family_id;
 	unsigned version_major, version_minor, family_id;
-	int i, r;
+	int i, j, r;
 
 
-	INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
+	INIT_DELAYED_WORK(&adev->uvd.inst->idle_work, amdgpu_uvd_idle_work_handler);
 
 
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
 #ifdef CONFIG_DRM_AMDGPU_CIK
 #ifdef CONFIG_DRM_AMDGPU_CIK
@@ -172,6 +177,12 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 	case CHIP_VEGA12:
 	case CHIP_VEGA12:
 		fw_name = FIRMWARE_VEGA12;
 		fw_name = FIRMWARE_VEGA12;
 		break;
 		break;
+	case CHIP_VEGAM:
+		fw_name = FIRMWARE_VEGAM;
+		break;
+	case CHIP_VEGA20:
+		fw_name = FIRMWARE_VEGA20;
+		break;
 	default:
 	default:
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
@@ -226,28 +237,30 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
 		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
 		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
 
 
-	r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
-				    AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.vcpu_bo,
-				    &adev->uvd.gpu_addr, &adev->uvd.cpu_addr);
-	if (r) {
-		dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
-		return r;
-	}
+	for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
 
 
-	ring = &adev->uvd.ring;
-	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-	r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity,
-				  rq, amdgpu_sched_jobs, NULL);
-	if (r != 0) {
-		DRM_ERROR("Failed setting up UVD run queue.\n");
-		return r;
-	}
+		r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+					    AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo,
+					    &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr);
+		if (r) {
+			dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
+			return r;
+		}
 
 
-	for (i = 0; i < adev->uvd.max_handles; ++i) {
-		atomic_set(&adev->uvd.handles[i], 0);
-		adev->uvd.filp[i] = NULL;
-	}
+		ring = &adev->uvd.inst[j].ring;
+		rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+		r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity,
+					  rq, NULL);
+		if (r != 0) {
+			DRM_ERROR("Failed setting up UVD(%d) run queue.\n", j);
+			return r;
+		}
 
 
+		for (i = 0; i < adev->uvd.max_handles; ++i) {
+			atomic_set(&adev->uvd.inst[j].handles[i], 0);
+			adev->uvd.inst[j].filp[i] = NULL;
+		}
+	}
 	/* from uvd v5.0 HW addressing capacity increased to 64 bits */
 	/* from uvd v5.0 HW addressing capacity increased to 64 bits */
 	if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
 	if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
 		adev->uvd.address_64_bit = true;
 		adev->uvd.address_64_bit = true;
@@ -274,20 +287,22 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 
 
 int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 {
 {
-	int i;
-	kfree(adev->uvd.saved_bo);
+	int i, j;
 
 
-	drm_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
+	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+		kfree(adev->uvd.inst[j].saved_bo);
 
 
-	amdgpu_bo_free_kernel(&adev->uvd.vcpu_bo,
-			      &adev->uvd.gpu_addr,
-			      (void **)&adev->uvd.cpu_addr);
+		drm_sched_entity_fini(&adev->uvd.inst[j].ring.sched, &adev->uvd.inst[j].entity);
 
 
-	amdgpu_ring_fini(&adev->uvd.ring);
+		amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo,
+				      &adev->uvd.inst[j].gpu_addr,
+				      (void **)&adev->uvd.inst[j].cpu_addr);
 
 
-	for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
-		amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
+		amdgpu_ring_fini(&adev->uvd.inst[j].ring);
 
 
+		for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
+			amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
+	}
 	release_firmware(adev->uvd.fw);
 	release_firmware(adev->uvd.fw);
 
 
 	return 0;
 	return 0;
@@ -297,32 +312,33 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
 {
 {
 	unsigned size;
 	unsigned size;
 	void *ptr;
 	void *ptr;
-	int i;
-
-	if (adev->uvd.vcpu_bo == NULL)
-		return 0;
+	int i, j;
 
 
-	cancel_delayed_work_sync(&adev->uvd.idle_work);
+	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+		if (adev->uvd.inst[j].vcpu_bo == NULL)
+			continue;
 
 
-	/* only valid for physical mode */
-	if (adev->asic_type < CHIP_POLARIS10) {
-		for (i = 0; i < adev->uvd.max_handles; ++i)
-			if (atomic_read(&adev->uvd.handles[i]))
-				break;
+		cancel_delayed_work_sync(&adev->uvd.inst[j].idle_work);
 
 
-		if (i == adev->uvd.max_handles)
-			return 0;
-	}
+		/* only valid for physical mode */
+		if (adev->asic_type < CHIP_POLARIS10) {
+			for (i = 0; i < adev->uvd.max_handles; ++i)
+				if (atomic_read(&adev->uvd.inst[j].handles[i]))
+					break;
 
 
-	size = amdgpu_bo_size(adev->uvd.vcpu_bo);
-	ptr = adev->uvd.cpu_addr;
+			if (i == adev->uvd.max_handles)
+				continue;
+		}
 
 
-	adev->uvd.saved_bo = kmalloc(size, GFP_KERNEL);
-	if (!adev->uvd.saved_bo)
-		return -ENOMEM;
+		size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo);
+		ptr = adev->uvd.inst[j].cpu_addr;
 
 
-	memcpy_fromio(adev->uvd.saved_bo, ptr, size);
+		adev->uvd.inst[j].saved_bo = kmalloc(size, GFP_KERNEL);
+		if (!adev->uvd.inst[j].saved_bo)
+			return -ENOMEM;
 
 
+		memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
+	}
 	return 0;
 	return 0;
 }
 }
 
 
@@ -330,59 +346,65 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
 {
 {
 	unsigned size;
 	unsigned size;
 	void *ptr;
 	void *ptr;
+	int i;
 
 
-	if (adev->uvd.vcpu_bo == NULL)
-		return -EINVAL;
+	for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+		if (adev->uvd.inst[i].vcpu_bo == NULL)
+			return -EINVAL;
 
 
-	size = amdgpu_bo_size(adev->uvd.vcpu_bo);
-	ptr = adev->uvd.cpu_addr;
+		size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo);
+		ptr = adev->uvd.inst[i].cpu_addr;
 
 
-	if (adev->uvd.saved_bo != NULL) {
-		memcpy_toio(ptr, adev->uvd.saved_bo, size);
-		kfree(adev->uvd.saved_bo);
-		adev->uvd.saved_bo = NULL;
-	} else {
-		const struct common_firmware_header *hdr;
-		unsigned offset;
-
-		hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
-		if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
-			offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
-			memcpy_toio(adev->uvd.cpu_addr, adev->uvd.fw->data + offset,
-				    le32_to_cpu(hdr->ucode_size_bytes));
-			size -= le32_to_cpu(hdr->ucode_size_bytes);
-			ptr += le32_to_cpu(hdr->ucode_size_bytes);
+		if (adev->uvd.inst[i].saved_bo != NULL) {
+			memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size);
+			kfree(adev->uvd.inst[i].saved_bo);
+			adev->uvd.inst[i].saved_bo = NULL;
+		} else {
+			const struct common_firmware_header *hdr;
+			unsigned offset;
+
+			hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
+			if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+				offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+				memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset,
+					    le32_to_cpu(hdr->ucode_size_bytes));
+				size -= le32_to_cpu(hdr->ucode_size_bytes);
+				ptr += le32_to_cpu(hdr->ucode_size_bytes);
+			}
+			memset_io(ptr, 0, size);
+			/* to restore uvd fence seq */
+			amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring);
 		}
 		}
-		memset_io(ptr, 0, size);
-		/* to restore uvd fence seq */
-		amdgpu_fence_driver_force_completion(&adev->uvd.ring);
 	}
 	}
-
 	return 0;
 	return 0;
 }
 }
 
 
 void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 {
 {
-	struct amdgpu_ring *ring = &adev->uvd.ring;
-	int i, r;
+	struct amdgpu_ring *ring;
+	int i, j, r;
 
 
-	for (i = 0; i < adev->uvd.max_handles; ++i) {
-		uint32_t handle = atomic_read(&adev->uvd.handles[i]);
-		if (handle != 0 && adev->uvd.filp[i] == filp) {
-			struct dma_fence *fence;
-
-			r = amdgpu_uvd_get_destroy_msg(ring, handle,
-						       false, &fence);
-			if (r) {
-				DRM_ERROR("Error destroying UVD (%d)!\n", r);
-				continue;
-			}
+	for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+		ring = &adev->uvd.inst[j].ring;
+
+		for (i = 0; i < adev->uvd.max_handles; ++i) {
+			uint32_t handle = atomic_read(&adev->uvd.inst[j].handles[i]);
+			if (handle != 0 && adev->uvd.inst[j].filp[i] == filp) {
+				struct dma_fence *fence;
+
+				r = amdgpu_uvd_get_destroy_msg(ring, handle,
+							       false, &fence);
+				if (r) {
+					DRM_ERROR("Error destroying UVD(%d) %d!\n", j, r);
+					continue;
+				}
 
 
-			dma_fence_wait(fence, false);
-			dma_fence_put(fence);
+				dma_fence_wait(fence, false);
+				dma_fence_put(fence);
 
 
-			adev->uvd.filp[i] = NULL;
-			atomic_set(&adev->uvd.handles[i], 0);
+				adev->uvd.inst[j].filp[i] = NULL;
+				atomic_set(&adev->uvd.inst[j].handles[i], 0);
+			}
 		}
 		}
 	}
 	}
 }
 }
@@ -657,15 +679,16 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
 	void *ptr;
 	void *ptr;
 	long r;
 	long r;
 	int i;
 	int i;
+	uint32_t ip_instance = ctx->parser->job->ring->me;
 
 
 	if (offset & 0x3F) {
 	if (offset & 0x3F) {
-		DRM_ERROR("UVD messages must be 64 byte aligned!\n");
+		DRM_ERROR("UVD(%d) messages must be 64 byte aligned!\n", ip_instance);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
 	r = amdgpu_bo_kmap(bo, &ptr);
 	r = amdgpu_bo_kmap(bo, &ptr);
 	if (r) {
 	if (r) {
-		DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r);
+		DRM_ERROR("Failed mapping the UVD(%d) message (%ld)!\n", ip_instance, r);
 		return r;
 		return r;
 	}
 	}
 
 
@@ -675,7 +698,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
 	handle = msg[2];
 	handle = msg[2];
 
 
 	if (handle == 0) {
 	if (handle == 0) {
-		DRM_ERROR("Invalid UVD handle!\n");
+		DRM_ERROR("Invalid UVD(%d) handle!\n", ip_instance);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
@@ -686,18 +709,18 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
 
 
 		/* try to alloc a new handle */
 		/* try to alloc a new handle */
 		for (i = 0; i < adev->uvd.max_handles; ++i) {
 		for (i = 0; i < adev->uvd.max_handles; ++i) {
-			if (atomic_read(&adev->uvd.handles[i]) == handle) {
-				DRM_ERROR("Handle 0x%x already in use!\n", handle);
+			if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) {
+				DRM_ERROR("(%d)Handle 0x%x already in use!\n", ip_instance, handle);
 				return -EINVAL;
 				return -EINVAL;
 			}
 			}
 
 
-			if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) {
-				adev->uvd.filp[i] = ctx->parser->filp;
+			if (!atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], 0, handle)) {
+				adev->uvd.inst[ip_instance].filp[i] = ctx->parser->filp;
 				return 0;
 				return 0;
 			}
 			}
 		}
 		}
 
 
-		DRM_ERROR("No more free UVD handles!\n");
+		DRM_ERROR("No more free UVD(%d) handles!\n", ip_instance);
 		return -ENOSPC;
 		return -ENOSPC;
 
 
 	case 1:
 	case 1:
@@ -709,27 +732,27 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
 
 
 		/* validate the handle */
 		/* validate the handle */
 		for (i = 0; i < adev->uvd.max_handles; ++i) {
 		for (i = 0; i < adev->uvd.max_handles; ++i) {
-			if (atomic_read(&adev->uvd.handles[i]) == handle) {
-				if (adev->uvd.filp[i] != ctx->parser->filp) {
-					DRM_ERROR("UVD handle collision detected!\n");
+			if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) {
+				if (adev->uvd.inst[ip_instance].filp[i] != ctx->parser->filp) {
+					DRM_ERROR("UVD(%d) handle collision detected!\n", ip_instance);
 					return -EINVAL;
 					return -EINVAL;
 				}
 				}
 				return 0;
 				return 0;
 			}
 			}
 		}
 		}
 
 
-		DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
+		DRM_ERROR("Invalid UVD(%d) handle 0x%x!\n", ip_instance, handle);
 		return -ENOENT;
 		return -ENOENT;
 
 
 	case 2:
 	case 2:
 		/* it's a destroy msg, free the handle */
 		/* it's a destroy msg, free the handle */
 		for (i = 0; i < adev->uvd.max_handles; ++i)
 		for (i = 0; i < adev->uvd.max_handles; ++i)
-			atomic_cmpxchg(&adev->uvd.handles[i], handle, 0);
+			atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], handle, 0);
 		amdgpu_bo_kunmap(bo);
 		amdgpu_bo_kunmap(bo);
 		return 0;
 		return 0;
 
 
 	default:
 	default:
-		DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
+		DRM_ERROR("Illegal UVD(%d) message type (%d)!\n", ip_instance, msg_type);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 	BUG();
 	BUG();
@@ -800,7 +823,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
 		}
 		}
 
 
 		if ((cmd == 0 || cmd == 0x3) &&
 		if ((cmd == 0 || cmd == 0x3) &&
-		    (start >> 28) != (ctx->parser->adev->uvd.gpu_addr >> 28)) {
+		    (start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) {
 			DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
 			DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
 				  start, end);
 				  start, end);
 			return -EINVAL;
 			return -EINVAL;
@@ -968,6 +991,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	uint64_t addr;
 	uint64_t addr;
 	long r;
 	long r;
 	int i;
 	int i;
+	unsigned offset_idx = 0;
+	unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
 
 
 	amdgpu_bo_kunmap(bo);
 	amdgpu_bo_kunmap(bo);
 	amdgpu_bo_unpin(bo);
 	amdgpu_bo_unpin(bo);
@@ -987,17 +1012,16 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 		goto err;
 		goto err;
 
 
 	if (adev->asic_type >= CHIP_VEGA10) {
 	if (adev->asic_type >= CHIP_VEGA10) {
-		data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0_VEGA10, 0);
-		data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1_VEGA10, 0);
-		data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD_VEGA10, 0);
-		data[3] = PACKET0(mmUVD_NO_OP_VEGA10, 0);
-	} else {
-		data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0);
-		data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0);
-		data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0);
-		data[3] = PACKET0(mmUVD_NO_OP, 0);
+		offset_idx = 1 + ring->me;
+		offset[1] = adev->reg_offset[UVD_HWIP][0][1];
+		offset[2] = adev->reg_offset[UVD_HWIP][1][1];
 	}
 	}
 
 
+	data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0);
+	data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0);
+	data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
+	data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
+
 	ib = &job->ibs[0];
 	ib = &job->ibs[0];
 	addr = amdgpu_bo_gpu_offset(bo);
 	addr = amdgpu_bo_gpu_offset(bo);
 	ib->ptr[0] = data[0];
 	ib->ptr[0] = data[0];
@@ -1033,7 +1057,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 		if (r)
 		if (r)
 			goto err_free;
 			goto err_free;
 
 
-		r = amdgpu_job_submit(job, ring, &adev->uvd.entity,
+		r = amdgpu_job_submit(job, ring, &adev->uvd.inst[ring->me].entity,
 				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
 				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
 		if (r)
 		if (r)
 			goto err_free;
 			goto err_free;
@@ -1121,8 +1145,15 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 {
 {
 	struct amdgpu_device *adev =
 	struct amdgpu_device *adev =
-		container_of(work, struct amdgpu_device, uvd.idle_work.work);
-	unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
+		container_of(work, struct amdgpu_device, uvd.inst->idle_work.work);
+	unsigned fences = 0, i, j;
+
+	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+		fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);
+		for (j = 0; j < adev->uvd.num_enc_rings; ++j) {
+			fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]);
+		}
+	}
 
 
 	if (fences == 0) {
 	if (fences == 0) {
 		if (adev->pm.dpm_enabled) {
 		if (adev->pm.dpm_enabled) {
@@ -1136,7 +1167,7 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 							       AMD_CG_STATE_GATE);
 							       AMD_CG_STATE_GATE);
 		}
 		}
 	} else {
 	} else {
-		schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
+		schedule_delayed_work(&adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT);
 	}
 	}
 }
 }
 
 
@@ -1148,7 +1179,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
 	if (amdgpu_sriov_vf(adev))
 	if (amdgpu_sriov_vf(adev))
 		return;
 		return;
 
 
-	set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
+	set_clocks = !cancel_delayed_work_sync(&adev->uvd.inst->idle_work);
 	if (set_clocks) {
 	if (set_clocks) {
 		if (adev->pm.dpm_enabled) {
 		if (adev->pm.dpm_enabled) {
 			amdgpu_dpm_enable_uvd(adev, true);
 			amdgpu_dpm_enable_uvd(adev, true);
@@ -1165,7 +1196,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
 void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
 void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
 {
 {
 	if (!amdgpu_sriov_vf(ring->adev))
 	if (!amdgpu_sriov_vf(ring->adev))
-		schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
+		schedule_delayed_work(&ring->adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT);
 }
 }
 
 
 /**
 /**
@@ -1179,27 +1210,28 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 {
 	struct dma_fence *fence;
 	struct dma_fence *fence;
 	long r;
 	long r;
+	uint32_t ip_instance = ring->me;
 
 
 	r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
 	r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
 	if (r) {
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
+		DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r);
 		goto error;
 		goto error;
 	}
 	}
 
 
 	r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
 	r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
 	if (r) {
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
+		DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r);
 		goto error;
 		goto error;
 	}
 	}
 
 
 	r = dma_fence_wait_timeout(fence, false, timeout);
 	r = dma_fence_wait_timeout(fence, false, timeout);
 	if (r == 0) {
 	if (r == 0) {
-		DRM_ERROR("amdgpu: IB test timed out.\n");
+		DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance);
 		r = -ETIMEDOUT;
 		r = -ETIMEDOUT;
 	} else if (r < 0) {
 	} else if (r < 0) {
-		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+		DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r);
 	} else {
 	} else {
-		DRM_DEBUG("ib test on ring %d succeeded\n",  ring->idx);
+		DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx);
 		r = 0;
 		r = 0;
 	}
 	}
 
 
@@ -1227,7 +1259,7 @@ uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev)
 		 * necessarily linear. So we need to count
 		 * necessarily linear. So we need to count
 		 * all non-zero handles.
 		 * all non-zero handles.
 		 */
 		 */
-		if (atomic_read(&adev->uvd.handles[i]))
+		if (atomic_read(&adev->uvd.inst->handles[i]))
 			used_handles++;
 			used_handles++;
 	}
 	}
 
 

+ 13 - 6
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h

@@ -31,30 +31,37 @@
 #define AMDGPU_UVD_SESSION_SIZE		(50*1024)
 #define AMDGPU_UVD_SESSION_SIZE		(50*1024)
 #define AMDGPU_UVD_FIRMWARE_OFFSET	256
 #define AMDGPU_UVD_FIRMWARE_OFFSET	256
 
 
+#define AMDGPU_MAX_UVD_INSTANCES			2
+
 #define AMDGPU_UVD_FIRMWARE_SIZE(adev)    \
 #define AMDGPU_UVD_FIRMWARE_SIZE(adev)    \
 	(AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(((const struct common_firmware_header *)(adev)->uvd.fw->data)->ucode_size_bytes) + \
 	(AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(((const struct common_firmware_header *)(adev)->uvd.fw->data)->ucode_size_bytes) + \
 			       8) - AMDGPU_UVD_FIRMWARE_OFFSET)
 			       8) - AMDGPU_UVD_FIRMWARE_OFFSET)
 
 
-struct amdgpu_uvd {
+struct amdgpu_uvd_inst {
 	struct amdgpu_bo	*vcpu_bo;
 	struct amdgpu_bo	*vcpu_bo;
 	void			*cpu_addr;
 	void			*cpu_addr;
 	uint64_t		gpu_addr;
 	uint64_t		gpu_addr;
-	unsigned		fw_version;
 	void			*saved_bo;
 	void			*saved_bo;
-	unsigned		max_handles;
 	atomic_t		handles[AMDGPU_MAX_UVD_HANDLES];
 	atomic_t		handles[AMDGPU_MAX_UVD_HANDLES];
 	struct drm_file		*filp[AMDGPU_MAX_UVD_HANDLES];
 	struct drm_file		*filp[AMDGPU_MAX_UVD_HANDLES];
 	struct delayed_work	idle_work;
 	struct delayed_work	idle_work;
-	const struct firmware	*fw;	/* UVD firmware */
 	struct amdgpu_ring	ring;
 	struct amdgpu_ring	ring;
 	struct amdgpu_ring	ring_enc[AMDGPU_MAX_UVD_ENC_RINGS];
 	struct amdgpu_ring	ring_enc[AMDGPU_MAX_UVD_ENC_RINGS];
 	struct amdgpu_irq_src	irq;
 	struct amdgpu_irq_src	irq;
-	bool			address_64_bit;
-	bool			use_ctx_buf;
 	struct drm_sched_entity entity;
 	struct drm_sched_entity entity;
 	struct drm_sched_entity entity_enc;
 	struct drm_sched_entity entity_enc;
 	uint32_t                srbm_soft_reset;
 	uint32_t                srbm_soft_reset;
+};
+
+struct amdgpu_uvd {
+	const struct firmware	*fw;	/* UVD firmware */
+	unsigned		fw_version;
+	unsigned		max_handles;
 	unsigned		num_enc_rings;
 	unsigned		num_enc_rings;
+	uint8_t		num_uvd_inst;
+	bool			address_64_bit;
+	bool			use_ctx_buf;
+	struct amdgpu_uvd_inst		inst[AMDGPU_MAX_UVD_INSTANCES];
 };
 };
 
 
 int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
 int amdgpu_uvd_sw_init(struct amdgpu_device *adev);

+ 37 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c

@@ -51,11 +51,13 @@
 #define FIRMWARE_FIJI		"amdgpu/fiji_vce.bin"
 #define FIRMWARE_FIJI		"amdgpu/fiji_vce.bin"
 #define FIRMWARE_STONEY		"amdgpu/stoney_vce.bin"
 #define FIRMWARE_STONEY		"amdgpu/stoney_vce.bin"
 #define FIRMWARE_POLARIS10	"amdgpu/polaris10_vce.bin"
 #define FIRMWARE_POLARIS10	"amdgpu/polaris10_vce.bin"
-#define FIRMWARE_POLARIS11         "amdgpu/polaris11_vce.bin"
-#define FIRMWARE_POLARIS12         "amdgpu/polaris12_vce.bin"
+#define FIRMWARE_POLARIS11	"amdgpu/polaris11_vce.bin"
+#define FIRMWARE_POLARIS12	"amdgpu/polaris12_vce.bin"
+#define FIRMWARE_VEGAM		"amdgpu/vegam_vce.bin"
 
 
 #define FIRMWARE_VEGA10		"amdgpu/vega10_vce.bin"
 #define FIRMWARE_VEGA10		"amdgpu/vega10_vce.bin"
 #define FIRMWARE_VEGA12		"amdgpu/vega12_vce.bin"
 #define FIRMWARE_VEGA12		"amdgpu/vega12_vce.bin"
+#define FIRMWARE_VEGA20		"amdgpu/vega20_vce.bin"
 
 
 #ifdef CONFIG_DRM_AMDGPU_CIK
 #ifdef CONFIG_DRM_AMDGPU_CIK
 MODULE_FIRMWARE(FIRMWARE_BONAIRE);
 MODULE_FIRMWARE(FIRMWARE_BONAIRE);
@@ -71,9 +73,11 @@ MODULE_FIRMWARE(FIRMWARE_STONEY);
 MODULE_FIRMWARE(FIRMWARE_POLARIS10);
 MODULE_FIRMWARE(FIRMWARE_POLARIS10);
 MODULE_FIRMWARE(FIRMWARE_POLARIS11);
 MODULE_FIRMWARE(FIRMWARE_POLARIS11);
 MODULE_FIRMWARE(FIRMWARE_POLARIS12);
 MODULE_FIRMWARE(FIRMWARE_POLARIS12);
+MODULE_FIRMWARE(FIRMWARE_VEGAM);
 
 
 MODULE_FIRMWARE(FIRMWARE_VEGA10);
 MODULE_FIRMWARE(FIRMWARE_VEGA10);
 MODULE_FIRMWARE(FIRMWARE_VEGA12);
 MODULE_FIRMWARE(FIRMWARE_VEGA12);
+MODULE_FIRMWARE(FIRMWARE_VEGA20);
 
 
 static void amdgpu_vce_idle_work_handler(struct work_struct *work);
 static void amdgpu_vce_idle_work_handler(struct work_struct *work);
 
 
@@ -132,12 +136,18 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
 	case CHIP_POLARIS12:
 	case CHIP_POLARIS12:
 		fw_name = FIRMWARE_POLARIS12;
 		fw_name = FIRMWARE_POLARIS12;
 		break;
 		break;
+	case CHIP_VEGAM:
+		fw_name = FIRMWARE_VEGAM;
+		break;
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
 		fw_name = FIRMWARE_VEGA10;
 		fw_name = FIRMWARE_VEGA10;
 		break;
 		break;
 	case CHIP_VEGA12:
 	case CHIP_VEGA12:
 		fw_name = FIRMWARE_VEGA12;
 		fw_name = FIRMWARE_VEGA12;
 		break;
 		break;
+	case CHIP_VEGA20:
+		fw_name = FIRMWARE_VEGA20;
+		break;
 
 
 	default:
 	default:
 		return -EINVAL;
 		return -EINVAL;
@@ -181,7 +191,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
 	ring = &adev->vce.ring[0];
 	ring = &adev->vce.ring[0];
 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
 	r = drm_sched_entity_init(&ring->sched, &adev->vce.entity,
 	r = drm_sched_entity_init(&ring->sched, &adev->vce.entity,
-				  rq, amdgpu_sched_jobs, NULL);
+				  rq, NULL);
 	if (r != 0) {
 	if (r != 0) {
 		DRM_ERROR("Failed setting up VCE run queue.\n");
 		DRM_ERROR("Failed setting up VCE run queue.\n");
 		return r;
 		return r;
@@ -755,6 +765,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 			if (r)
 			if (r)
 				goto out;
 				goto out;
 			break;
 			break;
+
+		case 0x0500000d: /* MV buffer */
+			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
+							idx + 2, 0, 0);
+			if (r)
+				goto out;
+
+			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8,
+							idx + 7, 0, 0);
+			if (r)
+				goto out;
+			break;
 		}
 		}
 
 
 		idx += len / 4;
 		idx += len / 4;
@@ -860,6 +882,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 				goto out;
 				goto out;
 			break;
 			break;
 
 
+		case 0x0500000d: /* MV buffer */
+			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3,
+							idx + 2, *size, 0);
+			if (r)
+				goto out;
+
+			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8,
+							idx + 7, *size / 12, 0);
+			if (r)
+				goto out;
+			break;
+
 		default:
 		default:
 			DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
 			DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
 			r = -EINVAL;
 			r = -EINVAL;

+ 16 - 9
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c

@@ -105,7 +105,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 	ring = &adev->vcn.ring_dec;
 	ring = &adev->vcn.ring_dec;
 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
 	r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_dec,
 	r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_dec,
-				  rq, amdgpu_sched_jobs, NULL);
+				  rq, NULL);
 	if (r != 0) {
 	if (r != 0) {
 		DRM_ERROR("Failed setting up VCN dec run queue.\n");
 		DRM_ERROR("Failed setting up VCN dec run queue.\n");
 		return r;
 		return r;
@@ -114,7 +114,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 	ring = &adev->vcn.ring_enc[0];
 	ring = &adev->vcn.ring_enc[0];
 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
 	r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_enc,
 	r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_enc,
-				  rq, amdgpu_sched_jobs, NULL);
+				  rq, NULL);
 	if (r != 0) {
 	if (r != 0) {
 		DRM_ERROR("Failed setting up VCN enc run queue.\n");
 		DRM_ERROR("Failed setting up VCN enc run queue.\n");
 		return r;
 		return r;
@@ -205,13 +205,18 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
 	struct amdgpu_device *adev =
 	struct amdgpu_device *adev =
 		container_of(work, struct amdgpu_device, vcn.idle_work.work);
 		container_of(work, struct amdgpu_device, vcn.idle_work.work);
 	unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
 	unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
+	unsigned i;
+
+	for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+		fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]);
+	}
 
 
 	if (fences == 0) {
 	if (fences == 0) {
-		if (adev->pm.dpm_enabled) {
-			/* might be used when with pg/cg
+		if (adev->pm.dpm_enabled)
 			amdgpu_dpm_enable_uvd(adev, false);
 			amdgpu_dpm_enable_uvd(adev, false);
-			*/
-		}
+		else
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+							       AMD_PG_STATE_GATE);
 	} else {
 	} else {
 		schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
 		schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
 	}
 	}
@@ -223,9 +228,11 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 	bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
 	bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
 
 
 	if (set_clocks && adev->pm.dpm_enabled) {
 	if (set_clocks && adev->pm.dpm_enabled) {
-		/* might be used when with pg/cg
-		amdgpu_dpm_enable_uvd(adev, true);
-		*/
+		if (adev->pm.dpm_enabled)
+			amdgpu_dpm_enable_uvd(adev, true);
+		else
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+							       AMD_PG_STATE_UNGATE);
 	}
 	}
 }
 }
 
 

+ 11 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h

@@ -45,6 +45,17 @@
 #define VCN_ENC_CMD_REG_WRITE		0x0000000b
 #define VCN_ENC_CMD_REG_WRITE		0x0000000b
 #define VCN_ENC_CMD_REG_WAIT		0x0000000c
 #define VCN_ENC_CMD_REG_WAIT		0x0000000c
 
 
+enum engine_status_constants {
+	UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
+	UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
+	UVD_STATUS__UVD_BUSY = 0x00000004,
+	GB_ADDR_CONFIG_DEFAULT = 0x26010011,
+	UVD_STATUS__IDLE = 0x2,
+	UVD_STATUS__BUSY = 0x5,
+	UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF = 0x1,
+	UVD_STATUS__RBC_BUSY = 0x1,
+};
+
 struct amdgpu_vcn {
 struct amdgpu_vcn {
 	struct amdgpu_bo	*vcpu_bo;
 	struct amdgpu_bo	*vcpu_bo;
 	void			*cpu_addr;
 	void			*cpu_addr;

+ 146 - 127
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

@@ -94,6 +94,34 @@ struct amdgpu_prt_cb {
 	struct dma_fence_cb cb;
 	struct dma_fence_cb cb;
 };
 };
 
 
+static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
+				   struct amdgpu_vm *vm,
+				   struct amdgpu_bo *bo)
+{
+	base->vm = vm;
+	base->bo = bo;
+	INIT_LIST_HEAD(&base->bo_list);
+	INIT_LIST_HEAD(&base->vm_status);
+
+	if (!bo)
+		return;
+	list_add_tail(&base->bo_list, &bo->va);
+
+	if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
+		return;
+
+	if (bo->preferred_domains &
+	    amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
+		return;
+
+	/*
+	 * we checked all the prerequisites, but it looks like this per vm bo
+	 * is currently evicted. add the bo to the evicted list to make sure it
+	 * is validated on next vm use to avoid fault.
+	 * */
+	list_move_tail(&base->vm_status, &vm->evicted);
+}
+
 /**
 /**
  * amdgpu_vm_level_shift - return the addr shift for each level
  * amdgpu_vm_level_shift - return the addr shift for each level
  *
  *
@@ -196,24 +224,16 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      void *param)
 			      void *param)
 {
 {
 	struct ttm_bo_global *glob = adev->mman.bdev.glob;
 	struct ttm_bo_global *glob = adev->mman.bdev.glob;
-	int r;
+	struct amdgpu_vm_bo_base *bo_base, *tmp;
+	int r = 0;
 
 
-	spin_lock(&vm->status_lock);
-	while (!list_empty(&vm->evicted)) {
-		struct amdgpu_vm_bo_base *bo_base;
-		struct amdgpu_bo *bo;
+	list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
+		struct amdgpu_bo *bo = bo_base->bo;
 
 
-		bo_base = list_first_entry(&vm->evicted,
-					   struct amdgpu_vm_bo_base,
-					   vm_status);
-		spin_unlock(&vm->status_lock);
-
-		bo = bo_base->bo;
-		BUG_ON(!bo);
 		if (bo->parent) {
 		if (bo->parent) {
 			r = validate(param, bo);
 			r = validate(param, bo);
 			if (r)
 			if (r)
-				return r;
+				break;
 
 
 			spin_lock(&glob->lru_lock);
 			spin_lock(&glob->lru_lock);
 			ttm_bo_move_to_lru_tail(&bo->tbo);
 			ttm_bo_move_to_lru_tail(&bo->tbo);
@@ -222,22 +242,29 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			spin_unlock(&glob->lru_lock);
 			spin_unlock(&glob->lru_lock);
 		}
 		}
 
 
-		if (bo->tbo.type == ttm_bo_type_kernel &&
-		    vm->use_cpu_for_update) {
-			r = amdgpu_bo_kmap(bo, NULL);
-			if (r)
-				return r;
-		}
-
-		spin_lock(&vm->status_lock);
-		if (bo->tbo.type != ttm_bo_type_kernel)
+		if (bo->tbo.type != ttm_bo_type_kernel) {
+			spin_lock(&vm->moved_lock);
 			list_move(&bo_base->vm_status, &vm->moved);
 			list_move(&bo_base->vm_status, &vm->moved);
-		else
+			spin_unlock(&vm->moved_lock);
+		} else {
 			list_move(&bo_base->vm_status, &vm->relocated);
 			list_move(&bo_base->vm_status, &vm->relocated);
+		}
 	}
 	}
-	spin_unlock(&vm->status_lock);
 
 
-	return 0;
+	spin_lock(&glob->lru_lock);
+	list_for_each_entry(bo_base, &vm->idle, vm_status) {
+		struct amdgpu_bo *bo = bo_base->bo;
+
+		if (!bo->parent)
+			continue;
+
+		ttm_bo_move_to_lru_tail(&bo->tbo);
+		if (bo->shadow)
+			ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
+	}
+	spin_unlock(&glob->lru_lock);
+
+	return r;
 }
 }
 
 
 /**
 /**
@@ -249,13 +276,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  */
  */
 bool amdgpu_vm_ready(struct amdgpu_vm *vm)
 bool amdgpu_vm_ready(struct amdgpu_vm *vm)
 {
 {
-	bool ready;
-
-	spin_lock(&vm->status_lock);
-	ready = list_empty(&vm->evicted);
-	spin_unlock(&vm->status_lock);
-
-	return ready;
+	return list_empty(&vm->evicted);
 }
 }
 
 
 /**
 /**
@@ -412,11 +433,16 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 		struct amdgpu_bo *pt;
 		struct amdgpu_bo *pt;
 
 
 		if (!entry->base.bo) {
 		if (!entry->base.bo) {
-			r = amdgpu_bo_create(adev,
-					     amdgpu_vm_bo_size(adev, level),
-					     AMDGPU_GPU_PAGE_SIZE,
-					     AMDGPU_GEM_DOMAIN_VRAM, flags,
-					     ttm_bo_type_kernel, resv, &pt);
+			struct amdgpu_bo_param bp;
+
+			memset(&bp, 0, sizeof(bp));
+			bp.size = amdgpu_vm_bo_size(adev, level);
+			bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
+			bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+			bp.flags = flags;
+			bp.type = ttm_bo_type_kernel;
+			bp.resv = resv;
+			r = amdgpu_bo_create(adev, &bp, &pt);
 			if (r)
 			if (r)
 				return r;
 				return r;
 
 
@@ -441,12 +467,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 			*/
 			*/
 			pt->parent = amdgpu_bo_ref(parent->base.bo);
 			pt->parent = amdgpu_bo_ref(parent->base.bo);
 
 
-			entry->base.vm = vm;
-			entry->base.bo = pt;
-			list_add_tail(&entry->base.bo_list, &pt->va);
-			spin_lock(&vm->status_lock);
-			list_add(&entry->base.vm_status, &vm->relocated);
-			spin_unlock(&vm->status_lock);
+			amdgpu_vm_bo_base_init(&entry->base, vm, pt);
+			list_move(&entry->base.vm_status, &vm->relocated);
 		}
 		}
 
 
 		if (level < AMDGPU_VM_PTB) {
 		if (level < AMDGPU_VM_PTB) {
@@ -628,7 +650,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 		amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
 		amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
 
 
 	if (vm_flush_needed || pasid_mapping_needed) {
 	if (vm_flush_needed || pasid_mapping_needed) {
-		r = amdgpu_fence_emit(ring, &fence);
+		r = amdgpu_fence_emit(ring, &fence, 0);
 		if (r)
 		if (r)
 			return r;
 			return r;
 	}
 	}
@@ -893,10 +915,8 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
 		if (!entry->base.bo)
 		if (!entry->base.bo)
 			continue;
 			continue;
 
 
-		spin_lock(&vm->status_lock);
-		if (list_empty(&entry->base.vm_status))
-			list_add(&entry->base.vm_status, &vm->relocated);
-		spin_unlock(&vm->status_lock);
+		if (!entry->base.moved)
+			list_move(&entry->base.vm_status, &vm->relocated);
 		amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
 		amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
 	}
 	}
 }
 }
@@ -926,6 +946,14 @@ restart:
 	params.adev = adev;
 	params.adev = adev;
 
 
 	if (vm->use_cpu_for_update) {
 	if (vm->use_cpu_for_update) {
+		struct amdgpu_vm_bo_base *bo_base;
+
+		list_for_each_entry(bo_base, &vm->relocated, vm_status) {
+			r = amdgpu_bo_kmap(bo_base->bo, NULL);
+			if (unlikely(r))
+				return r;
+		}
+
 		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
 		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
 		if (unlikely(r))
 		if (unlikely(r))
 			return r;
 			return r;
@@ -941,7 +969,6 @@ restart:
 		params.func = amdgpu_vm_do_set_ptes;
 		params.func = amdgpu_vm_do_set_ptes;
 	}
 	}
 
 
-	spin_lock(&vm->status_lock);
 	while (!list_empty(&vm->relocated)) {
 	while (!list_empty(&vm->relocated)) {
 		struct amdgpu_vm_bo_base *bo_base, *parent;
 		struct amdgpu_vm_bo_base *bo_base, *parent;
 		struct amdgpu_vm_pt *pt, *entry;
 		struct amdgpu_vm_pt *pt, *entry;
@@ -950,14 +977,12 @@ restart:
 		bo_base = list_first_entry(&vm->relocated,
 		bo_base = list_first_entry(&vm->relocated,
 					   struct amdgpu_vm_bo_base,
 					   struct amdgpu_vm_bo_base,
 					   vm_status);
 					   vm_status);
-		list_del_init(&bo_base->vm_status);
-		spin_unlock(&vm->status_lock);
+		bo_base->moved = false;
+		list_move(&bo_base->vm_status, &vm->idle);
 
 
 		bo = bo_base->bo->parent;
 		bo = bo_base->bo->parent;
-		if (!bo) {
-			spin_lock(&vm->status_lock);
+		if (!bo)
 			continue;
 			continue;
-		}
 
 
 		parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
 		parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
 					  bo_list);
 					  bo_list);
@@ -966,12 +991,10 @@ restart:
 
 
 		amdgpu_vm_update_pde(&params, vm, pt, entry);
 		amdgpu_vm_update_pde(&params, vm, pt, entry);
 
 
-		spin_lock(&vm->status_lock);
 		if (!vm->use_cpu_for_update &&
 		if (!vm->use_cpu_for_update &&
 		    (ndw - params.ib->length_dw) < 32)
 		    (ndw - params.ib->length_dw) < 32)
 			break;
 			break;
 	}
 	}
-	spin_unlock(&vm->status_lock);
 
 
 	if (vm->use_cpu_for_update) {
 	if (vm->use_cpu_for_update) {
 		/* Flush HDP */
 		/* Flush HDP */
@@ -1074,9 +1097,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 		if (entry->huge) {
 		if (entry->huge) {
 			/* Add the entry to the relocated list to update it. */
 			/* Add the entry to the relocated list to update it. */
 			entry->huge = false;
 			entry->huge = false;
-			spin_lock(&p->vm->status_lock);
 			list_move(&entry->base.vm_status, &p->vm->relocated);
 			list_move(&entry->base.vm_status, &p->vm->relocated);
-			spin_unlock(&p->vm->status_lock);
 		}
 		}
 		return;
 		return;
 	}
 	}
@@ -1555,9 +1576,22 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 		amdgpu_asic_flush_hdp(adev, NULL);
 		amdgpu_asic_flush_hdp(adev, NULL);
 	}
 	}
 
 
-	spin_lock(&vm->status_lock);
+	spin_lock(&vm->moved_lock);
 	list_del_init(&bo_va->base.vm_status);
 	list_del_init(&bo_va->base.vm_status);
-	spin_unlock(&vm->status_lock);
+	spin_unlock(&vm->moved_lock);
+
+	/* If the BO is not in its preferred location add it back to
+	 * the evicted list so that it gets validated again on the
+	 * next command submission.
+	 */
+	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
+		uint32_t mem_type = bo->tbo.mem.mem_type;
+
+		if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
+			list_add_tail(&bo_va->base.vm_status, &vm->evicted);
+		else
+			list_add(&bo_va->base.vm_status, &vm->idle);
+	}
 
 
 	list_splice_init(&bo_va->invalids, &bo_va->valids);
 	list_splice_init(&bo_va->invalids, &bo_va->valids);
 	bo_va->cleared = clear;
 	bo_va->cleared = clear;
@@ -1766,19 +1800,18 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 			   struct amdgpu_vm *vm)
 			   struct amdgpu_vm *vm)
 {
 {
+	struct amdgpu_bo_va *bo_va, *tmp;
+	struct list_head moved;
 	bool clear;
 	bool clear;
-	int r = 0;
-
-	spin_lock(&vm->status_lock);
-	while (!list_empty(&vm->moved)) {
-		struct amdgpu_bo_va *bo_va;
-		struct reservation_object *resv;
+	int r;
 
 
-		bo_va = list_first_entry(&vm->moved,
-			struct amdgpu_bo_va, base.vm_status);
-		spin_unlock(&vm->status_lock);
+	INIT_LIST_HEAD(&moved);
+	spin_lock(&vm->moved_lock);
+	list_splice_init(&vm->moved, &moved);
+	spin_unlock(&vm->moved_lock);
 
 
-		resv = bo_va->base.bo->tbo.resv;
+	list_for_each_entry_safe(bo_va, tmp, &moved, base.vm_status) {
+		struct reservation_object *resv = bo_va->base.bo->tbo.resv;
 
 
 		/* Per VM BOs never need to bo cleared in the page tables */
 		/* Per VM BOs never need to bo cleared in the page tables */
 		if (resv == vm->root.base.bo->tbo.resv)
 		if (resv == vm->root.base.bo->tbo.resv)
@@ -1791,17 +1824,19 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 			clear = true;
 			clear = true;
 
 
 		r = amdgpu_vm_bo_update(adev, bo_va, clear);
 		r = amdgpu_vm_bo_update(adev, bo_va, clear);
-		if (r)
+		if (r) {
+			spin_lock(&vm->moved_lock);
+			list_splice(&moved, &vm->moved);
+			spin_unlock(&vm->moved_lock);
 			return r;
 			return r;
+		}
 
 
 		if (!clear && resv != vm->root.base.bo->tbo.resv)
 		if (!clear && resv != vm->root.base.bo->tbo.resv)
 			reservation_object_unlock(resv);
 			reservation_object_unlock(resv);
 
 
-		spin_lock(&vm->status_lock);
 	}
 	}
-	spin_unlock(&vm->status_lock);
 
 
-	return r;
+	return 0;
 }
 }
 
 
 /**
 /**
@@ -1827,36 +1862,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
 	if (bo_va == NULL) {
 	if (bo_va == NULL) {
 		return NULL;
 		return NULL;
 	}
 	}
-	bo_va->base.vm = vm;
-	bo_va->base.bo = bo;
-	INIT_LIST_HEAD(&bo_va->base.bo_list);
-	INIT_LIST_HEAD(&bo_va->base.vm_status);
+	amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
 
 
 	bo_va->ref_count = 1;
 	bo_va->ref_count = 1;
 	INIT_LIST_HEAD(&bo_va->valids);
 	INIT_LIST_HEAD(&bo_va->valids);
 	INIT_LIST_HEAD(&bo_va->invalids);
 	INIT_LIST_HEAD(&bo_va->invalids);
 
 
-	if (!bo)
-		return bo_va;
-
-	list_add_tail(&bo_va->base.bo_list, &bo->va);
-
-	if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
-		return bo_va;
-
-	if (bo->preferred_domains &
-	    amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
-		return bo_va;
-
-	/*
-	 * We checked all the prerequisites, but it looks like this per VM BO
-	 * is currently evicted. add the BO to the evicted list to make sure it
-	 * is validated on next VM use to avoid fault.
-	 * */
-	spin_lock(&vm->status_lock);
-	list_move_tail(&bo_va->base.vm_status, &vm->evicted);
-	spin_unlock(&vm->status_lock);
-
 	return bo_va;
 	return bo_va;
 }
 }
 
 
@@ -1884,11 +1895,11 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
 	if (mapping->flags & AMDGPU_PTE_PRT)
 	if (mapping->flags & AMDGPU_PTE_PRT)
 		amdgpu_vm_prt_get(adev);
 		amdgpu_vm_prt_get(adev);
 
 
-	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
-		spin_lock(&vm->status_lock);
-		if (list_empty(&bo_va->base.vm_status))
-			list_add(&bo_va->base.vm_status, &vm->moved);
-		spin_unlock(&vm->status_lock);
+	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv &&
+	    !bo_va->base.moved) {
+		spin_lock(&vm->moved_lock);
+		list_move(&bo_va->base.vm_status, &vm->moved);
+		spin_unlock(&vm->moved_lock);
 	}
 	}
 	trace_amdgpu_vm_bo_map(bo_va, mapping);
 	trace_amdgpu_vm_bo_map(bo_va, mapping);
 }
 }
@@ -2198,9 +2209,9 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 
 
 	list_del(&bo_va->base.bo_list);
 	list_del(&bo_va->base.bo_list);
 
 
-	spin_lock(&vm->status_lock);
+	spin_lock(&vm->moved_lock);
 	list_del(&bo_va->base.vm_status);
 	list_del(&bo_va->base.vm_status);
-	spin_unlock(&vm->status_lock);
+	spin_unlock(&vm->moved_lock);
 
 
 	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
 	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
 		list_del(&mapping->list);
 		list_del(&mapping->list);
@@ -2234,33 +2245,34 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 {
 {
 	struct amdgpu_vm_bo_base *bo_base;
 	struct amdgpu_vm_bo_base *bo_base;
 
 
+	/* shadow bo doesn't have bo base, its validation needs its parent */
+	if (bo->parent && bo->parent->shadow == bo)
+		bo = bo->parent;
+
 	list_for_each_entry(bo_base, &bo->va, bo_list) {
 	list_for_each_entry(bo_base, &bo->va, bo_list) {
 		struct amdgpu_vm *vm = bo_base->vm;
 		struct amdgpu_vm *vm = bo_base->vm;
+		bool was_moved = bo_base->moved;
 
 
 		bo_base->moved = true;
 		bo_base->moved = true;
 		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
 		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
-			spin_lock(&bo_base->vm->status_lock);
 			if (bo->tbo.type == ttm_bo_type_kernel)
 			if (bo->tbo.type == ttm_bo_type_kernel)
 				list_move(&bo_base->vm_status, &vm->evicted);
 				list_move(&bo_base->vm_status, &vm->evicted);
 			else
 			else
 				list_move_tail(&bo_base->vm_status,
 				list_move_tail(&bo_base->vm_status,
 					       &vm->evicted);
 					       &vm->evicted);
-			spin_unlock(&bo_base->vm->status_lock);
 			continue;
 			continue;
 		}
 		}
 
 
-		if (bo->tbo.type == ttm_bo_type_kernel) {
-			spin_lock(&bo_base->vm->status_lock);
-			if (list_empty(&bo_base->vm_status))
-				list_add(&bo_base->vm_status, &vm->relocated);
-			spin_unlock(&bo_base->vm->status_lock);
+		if (was_moved)
 			continue;
 			continue;
-		}
 
 
-		spin_lock(&bo_base->vm->status_lock);
-		if (list_empty(&bo_base->vm_status))
-			list_add(&bo_base->vm_status, &vm->moved);
-		spin_unlock(&bo_base->vm->status_lock);
+		if (bo->tbo.type == ttm_bo_type_kernel) {
+			list_move(&bo_base->vm_status, &vm->relocated);
+		} else {
+			spin_lock(&bo_base->vm->moved_lock);
+			list_move(&bo_base->vm_status, &vm->moved);
+			spin_unlock(&bo_base->vm->moved_lock);
+		}
 	}
 	}
 }
 }
 
 
@@ -2355,6 +2367,8 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		   int vm_context, unsigned int pasid)
 		   int vm_context, unsigned int pasid)
 {
 {
+	struct amdgpu_bo_param bp;
+	struct amdgpu_bo *root;
 	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
 	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
 		AMDGPU_VM_PTE_COUNT(adev) * 8);
 		AMDGPU_VM_PTE_COUNT(adev) * 8);
 	unsigned ring_instance;
 	unsigned ring_instance;
@@ -2367,10 +2381,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	vm->va = RB_ROOT_CACHED;
 	vm->va = RB_ROOT_CACHED;
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
 		vm->reserved_vmid[i] = NULL;
 		vm->reserved_vmid[i] = NULL;
-	spin_lock_init(&vm->status_lock);
 	INIT_LIST_HEAD(&vm->evicted);
 	INIT_LIST_HEAD(&vm->evicted);
 	INIT_LIST_HEAD(&vm->relocated);
 	INIT_LIST_HEAD(&vm->relocated);
+	spin_lock_init(&vm->moved_lock);
 	INIT_LIST_HEAD(&vm->moved);
 	INIT_LIST_HEAD(&vm->moved);
+	INIT_LIST_HEAD(&vm->idle);
 	INIT_LIST_HEAD(&vm->freed);
 	INIT_LIST_HEAD(&vm->freed);
 
 
 	/* create scheduler entity for page table updates */
 	/* create scheduler entity for page table updates */
@@ -2380,7 +2395,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	ring = adev->vm_manager.vm_pte_rings[ring_instance];
 	ring = adev->vm_manager.vm_pte_rings[ring_instance];
 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
 	r = drm_sched_entity_init(&ring->sched, &vm->entity,
 	r = drm_sched_entity_init(&ring->sched, &vm->entity,
-				  rq, amdgpu_sched_jobs, NULL);
+				  rq, NULL);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
@@ -2409,24 +2424,28 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		flags |= AMDGPU_GEM_CREATE_SHADOW;
 		flags |= AMDGPU_GEM_CREATE_SHADOW;
 
 
 	size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
 	size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
-	r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags,
-			     ttm_bo_type_kernel, NULL, &vm->root.base.bo);
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = align;
+	bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+	bp.flags = flags;
+	bp.type = ttm_bo_type_kernel;
+	bp.resv = NULL;
+	r = amdgpu_bo_create(adev, &bp, &root);
 	if (r)
 	if (r)
 		goto error_free_sched_entity;
 		goto error_free_sched_entity;
 
 
-	r = amdgpu_bo_reserve(vm->root.base.bo, true);
+	r = amdgpu_bo_reserve(root, true);
 	if (r)
 	if (r)
 		goto error_free_root;
 		goto error_free_root;
 
 
-	r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
+	r = amdgpu_vm_clear_bo(adev, vm, root,
 			       adev->vm_manager.root_level,
 			       adev->vm_manager.root_level,
 			       vm->pte_support_ats);
 			       vm->pte_support_ats);
 	if (r)
 	if (r)
 		goto error_unreserve;
 		goto error_unreserve;
 
 
-	vm->root.base.vm = vm;
-	list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
-	list_add_tail(&vm->root.base.vm_status, &vm->evicted);
+	amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
 	amdgpu_bo_unreserve(vm->root.base.bo);
 	amdgpu_bo_unreserve(vm->root.base.bo);
 
 
 	if (pasid) {
 	if (pasid) {

+ 7 - 8
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

@@ -75,11 +75,12 @@ struct amdgpu_bo_list_entry;
 /* PDE Block Fragment Size for VEGA10 */
 /* PDE Block Fragment Size for VEGA10 */
 #define AMDGPU_PDE_BFS(a)	((uint64_t)a << 59)
 #define AMDGPU_PDE_BFS(a)	((uint64_t)a << 59)
 
 
-/* VEGA10 only */
+
+/* For GFX9 */
 #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
 #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
 #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
 #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
 
 
-/* For Raven */
+#define AMDGPU_MTYPE_NC 0
 #define AMDGPU_MTYPE_CC 2
 #define AMDGPU_MTYPE_CC 2
 
 
 #define AMDGPU_PTE_DEFAULT_ATC  (AMDGPU_PTE_SYSTEM      \
 #define AMDGPU_PTE_DEFAULT_ATC  (AMDGPU_PTE_SYSTEM      \
@@ -167,9 +168,6 @@ struct amdgpu_vm {
 	/* tree of virtual addresses mapped */
 	/* tree of virtual addresses mapped */
 	struct rb_root_cached	va;
 	struct rb_root_cached	va;
 
 
-	/* protecting invalidated */
-	spinlock_t		status_lock;
-
 	/* BOs who needs a validation */
 	/* BOs who needs a validation */
 	struct list_head	evicted;
 	struct list_head	evicted;
 
 
@@ -178,6 +176,10 @@ struct amdgpu_vm {
 
 
 	/* BOs moved, but not yet updated in the PT */
 	/* BOs moved, but not yet updated in the PT */
 	struct list_head	moved;
 	struct list_head	moved;
+	spinlock_t		moved_lock;
+
+	/* All BOs of this VM not currently in the state machine */
+	struct list_head	idle;
 
 
 	/* BO mappings freed, but not yet updated in the PT */
 	/* BO mappings freed, but not yet updated in the PT */
 	struct list_head	freed;
 	struct list_head	freed;
@@ -186,9 +188,6 @@ struct amdgpu_vm {
 	struct amdgpu_vm_pt     root;
 	struct amdgpu_vm_pt     root;
 	struct dma_fence	*last_update;
 	struct dma_fence	*last_update;
 
 
-	/* protecting freed */
-	spinlock_t		freed_lock;
-
 	/* Scheduler entity for page table updates */
 	/* Scheduler entity for page table updates */
 	struct drm_sched_entity	entity;
 	struct drm_sched_entity	entity;
 
 

+ 2 - 2
drivers/gpu/drm/amd/amdgpu/ci_dpm.c

@@ -5903,7 +5903,7 @@ static int ci_dpm_init(struct amdgpu_device *adev)
 	pi->pcie_dpm_key_disabled = 0;
 	pi->pcie_dpm_key_disabled = 0;
 	pi->thermal_sclk_dpm_enabled = 0;
 	pi->thermal_sclk_dpm_enabled = 0;
 
 
-	if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK)
+	if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK)
 		pi->caps_sclk_ds = true;
 		pi->caps_sclk_ds = true;
 	else
 	else
 		pi->caps_sclk_ds = false;
 		pi->caps_sclk_ds = false;
@@ -6255,7 +6255,7 @@ static int ci_dpm_late_init(void *handle)
 	int ret;
 	int ret;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
-	if (!amdgpu_dpm)
+	if (!adev->pm.dpm_enabled)
 		return 0;
 		return 0;
 
 
 	/* init the sysfs and debugfs files late */
 	/* init the sysfs and debugfs files late */

+ 7 - 0
drivers/gpu/drm/amd/amdgpu/cik.c

@@ -1735,6 +1735,12 @@ static void cik_invalidate_hdp(struct amdgpu_device *adev,
 	}
 	}
 }
 }
 
 
+static bool cik_need_full_reset(struct amdgpu_device *adev)
+{
+	/* change this when we support soft reset */
+	return true;
+}
+
 static const struct amdgpu_asic_funcs cik_asic_funcs =
 static const struct amdgpu_asic_funcs cik_asic_funcs =
 {
 {
 	.read_disabled_bios = &cik_read_disabled_bios,
 	.read_disabled_bios = &cik_read_disabled_bios,
@@ -1748,6 +1754,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
 	.get_config_memsize = &cik_get_config_memsize,
 	.get_config_memsize = &cik_get_config_memsize,
 	.flush_hdp = &cik_flush_hdp,
 	.flush_hdp = &cik_flush_hdp,
 	.invalidate_hdp = &cik_invalidate_hdp,
 	.invalidate_hdp = &cik_invalidate_hdp,
+	.need_full_reset = &cik_need_full_reset,
 };
 };
 
 
 static int cik_common_early_init(void *handle)
 static int cik_common_early_init(void *handle)

+ 5 - 12
drivers/gpu/drm/amd/amdgpu/dce_v10_0.c

@@ -1823,7 +1823,6 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct drm_device *dev = crtc->dev;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_framebuffer *amdgpu_fb;
 	struct drm_framebuffer *target_fb;
 	struct drm_framebuffer *target_fb;
 	struct drm_gem_object *obj;
 	struct drm_gem_object *obj;
 	struct amdgpu_bo *abo;
 	struct amdgpu_bo *abo;
@@ -1842,18 +1841,15 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
 		return 0;
 		return 0;
 	}
 	}
 
 
-	if (atomic) {
-		amdgpu_fb = to_amdgpu_framebuffer(fb);
+	if (atomic)
 		target_fb = fb;
 		target_fb = fb;
-	} else {
-		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
+	else
 		target_fb = crtc->primary->fb;
 		target_fb = crtc->primary->fb;
-	}
 
 
 	/* If atomic, assume fb object is pinned & idle & fenced and
 	/* If atomic, assume fb object is pinned & idle & fenced and
 	 * just update base pointers
 	 * just update base pointers
 	 */
 	 */
-	obj = amdgpu_fb->obj;
+	obj = target_fb->obj[0];
 	abo = gem_to_amdgpu_bo(obj);
 	abo = gem_to_amdgpu_bo(obj);
 	r = amdgpu_bo_reserve(abo, false);
 	r = amdgpu_bo_reserve(abo, false);
 	if (unlikely(r != 0))
 	if (unlikely(r != 0))
@@ -2043,8 +2039,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
 	WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
 	WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
 
 
 	if (!atomic && fb && fb != crtc->primary->fb) {
 	if (!atomic && fb && fb != crtc->primary->fb) {
-		amdgpu_fb = to_amdgpu_framebuffer(fb);
-		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+		abo = gem_to_amdgpu_bo(fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
 		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r != 0))
 		if (unlikely(r != 0))
 			return r;
 			return r;
@@ -2526,11 +2521,9 @@ static void dce_v10_0_crtc_disable(struct drm_crtc *crtc)
 	dce_v10_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 	dce_v10_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 	if (crtc->primary->fb) {
 	if (crtc->primary->fb) {
 		int r;
 		int r;
-		struct amdgpu_framebuffer *amdgpu_fb;
 		struct amdgpu_bo *abo;
 		struct amdgpu_bo *abo;
 
 
-		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
-		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+		abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
 		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
 			DRM_ERROR("failed to reserve abo before unpin\n");

+ 15 - 15
drivers/gpu/drm/amd/amdgpu/dce_v11_0.c

@@ -173,6 +173,7 @@ static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev)
 							ARRAY_SIZE(polaris11_golden_settings_a11));
 							ARRAY_SIZE(polaris11_golden_settings_a11));
 		break;
 		break;
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS10:
+	case CHIP_VEGAM:
 		amdgpu_device_program_register_sequence(adev,
 		amdgpu_device_program_register_sequence(adev,
 							polaris10_golden_settings_a11,
 							polaris10_golden_settings_a11,
 							ARRAY_SIZE(polaris10_golden_settings_a11));
 							ARRAY_SIZE(polaris10_golden_settings_a11));
@@ -473,6 +474,7 @@ static int dce_v11_0_get_num_crtc (struct amdgpu_device *adev)
 		num_crtc = 2;
 		num_crtc = 2;
 		break;
 		break;
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS10:
+	case CHIP_VEGAM:
 		num_crtc = 6;
 		num_crtc = 6;
 		break;
 		break;
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS11:
@@ -1445,6 +1447,7 @@ static int dce_v11_0_audio_init(struct amdgpu_device *adev)
 		adev->mode_info.audio.num_pins = 7;
 		adev->mode_info.audio.num_pins = 7;
 		break;
 		break;
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS10:
+	case CHIP_VEGAM:
 		adev->mode_info.audio.num_pins = 8;
 		adev->mode_info.audio.num_pins = 8;
 		break;
 		break;
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS11:
@@ -1862,7 +1865,6 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct drm_device *dev = crtc->dev;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_framebuffer *amdgpu_fb;
 	struct drm_framebuffer *target_fb;
 	struct drm_framebuffer *target_fb;
 	struct drm_gem_object *obj;
 	struct drm_gem_object *obj;
 	struct amdgpu_bo *abo;
 	struct amdgpu_bo *abo;
@@ -1881,18 +1883,15 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
 		return 0;
 		return 0;
 	}
 	}
 
 
-	if (atomic) {
-		amdgpu_fb = to_amdgpu_framebuffer(fb);
+	if (atomic)
 		target_fb = fb;
 		target_fb = fb;
-	} else {
-		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
+	else
 		target_fb = crtc->primary->fb;
 		target_fb = crtc->primary->fb;
-	}
 
 
 	/* If atomic, assume fb object is pinned & idle & fenced and
 	/* If atomic, assume fb object is pinned & idle & fenced and
 	 * just update base pointers
 	 * just update base pointers
 	 */
 	 */
-	obj = amdgpu_fb->obj;
+	obj = target_fb->obj[0];
 	abo = gem_to_amdgpu_bo(obj);
 	abo = gem_to_amdgpu_bo(obj);
 	r = amdgpu_bo_reserve(abo, false);
 	r = amdgpu_bo_reserve(abo, false);
 	if (unlikely(r != 0))
 	if (unlikely(r != 0))
@@ -2082,8 +2081,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
 	WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
 	WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
 
 
 	if (!atomic && fb && fb != crtc->primary->fb) {
 	if (!atomic && fb && fb != crtc->primary->fb) {
-		amdgpu_fb = to_amdgpu_framebuffer(fb);
-		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+		abo = gem_to_amdgpu_bo(fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
 		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r != 0))
 		if (unlikely(r != 0))
 			return r;
 			return r;
@@ -2253,7 +2251,8 @@ static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc)
 
 
 	if ((adev->asic_type == CHIP_POLARIS10) ||
 	if ((adev->asic_type == CHIP_POLARIS10) ||
 	    (adev->asic_type == CHIP_POLARIS11) ||
 	    (adev->asic_type == CHIP_POLARIS11) ||
-	    (adev->asic_type == CHIP_POLARIS12)) {
+	    (adev->asic_type == CHIP_POLARIS12) ||
+	    (adev->asic_type == CHIP_VEGAM)) {
 		struct amdgpu_encoder *amdgpu_encoder =
 		struct amdgpu_encoder *amdgpu_encoder =
 			to_amdgpu_encoder(amdgpu_crtc->encoder);
 			to_amdgpu_encoder(amdgpu_crtc->encoder);
 		struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
 		struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
@@ -2601,11 +2600,9 @@ static void dce_v11_0_crtc_disable(struct drm_crtc *crtc)
 	dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 	dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 	if (crtc->primary->fb) {
 	if (crtc->primary->fb) {
 		int r;
 		int r;
-		struct amdgpu_framebuffer *amdgpu_fb;
 		struct amdgpu_bo *abo;
 		struct amdgpu_bo *abo;
 
 
-		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
-		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+		abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
 		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
 			DRM_ERROR("failed to reserve abo before unpin\n");
@@ -2673,7 +2670,8 @@ static int dce_v11_0_crtc_mode_set(struct drm_crtc *crtc,
 
 
 	if ((adev->asic_type == CHIP_POLARIS10) ||
 	if ((adev->asic_type == CHIP_POLARIS10) ||
 	    (adev->asic_type == CHIP_POLARIS11) ||
 	    (adev->asic_type == CHIP_POLARIS11) ||
-	    (adev->asic_type == CHIP_POLARIS12)) {
+	    (adev->asic_type == CHIP_POLARIS12) ||
+	    (adev->asic_type == CHIP_VEGAM)) {
 		struct amdgpu_encoder *amdgpu_encoder =
 		struct amdgpu_encoder *amdgpu_encoder =
 			to_amdgpu_encoder(amdgpu_crtc->encoder);
 			to_amdgpu_encoder(amdgpu_crtc->encoder);
 		int encoder_mode =
 		int encoder_mode =
@@ -2830,6 +2828,7 @@ static int dce_v11_0_early_init(void *handle)
 		adev->mode_info.num_dig = 9;
 		adev->mode_info.num_dig = 9;
 		break;
 		break;
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS10:
+	case CHIP_VEGAM:
 		adev->mode_info.num_hpd = 6;
 		adev->mode_info.num_hpd = 6;
 		adev->mode_info.num_dig = 6;
 		adev->mode_info.num_dig = 6;
 		break;
 		break;
@@ -2949,7 +2948,8 @@ static int dce_v11_0_hw_init(void *handle)
 	amdgpu_atombios_encoder_init_dig(adev);
 	amdgpu_atombios_encoder_init_dig(adev);
 	if ((adev->asic_type == CHIP_POLARIS10) ||
 	if ((adev->asic_type == CHIP_POLARIS10) ||
 	    (adev->asic_type == CHIP_POLARIS11) ||
 	    (adev->asic_type == CHIP_POLARIS11) ||
-	    (adev->asic_type == CHIP_POLARIS12)) {
+	    (adev->asic_type == CHIP_POLARIS12) ||
+	    (adev->asic_type == CHIP_VEGAM)) {
 		amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk,
 		amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk,
 						   DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS);
 						   DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS);
 		amdgpu_atombios_crtc_set_dce_clock(adev, 0,
 		amdgpu_atombios_crtc_set_dce_clock(adev, 0,

+ 5 - 12
drivers/gpu/drm/amd/amdgpu/dce_v6_0.c

@@ -1780,7 +1780,6 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct drm_device *dev = crtc->dev;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_framebuffer *amdgpu_fb;
 	struct drm_framebuffer *target_fb;
 	struct drm_framebuffer *target_fb;
 	struct drm_gem_object *obj;
 	struct drm_gem_object *obj;
 	struct amdgpu_bo *abo;
 	struct amdgpu_bo *abo;
@@ -1798,18 +1797,15 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
 		return 0;
 		return 0;
 	}
 	}
 
 
-	if (atomic) {
-		amdgpu_fb = to_amdgpu_framebuffer(fb);
+	if (atomic)
 		target_fb = fb;
 		target_fb = fb;
-	} else {
-		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
+	else
 		target_fb = crtc->primary->fb;
 		target_fb = crtc->primary->fb;
-	}
 
 
 	/* If atomic, assume fb object is pinned & idle & fenced and
 	/* If atomic, assume fb object is pinned & idle & fenced and
 	 * just update base pointers
 	 * just update base pointers
 	 */
 	 */
-	obj = amdgpu_fb->obj;
+	obj = target_fb->obj[0];
 	abo = gem_to_amdgpu_bo(obj);
 	abo = gem_to_amdgpu_bo(obj);
 	r = amdgpu_bo_reserve(abo, false);
 	r = amdgpu_bo_reserve(abo, false);
 	if (unlikely(r != 0))
 	if (unlikely(r != 0))
@@ -1978,8 +1974,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
 	WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
 	WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
 
 
 	if (!atomic && fb && fb != crtc->primary->fb) {
 	if (!atomic && fb && fb != crtc->primary->fb) {
-		amdgpu_fb = to_amdgpu_framebuffer(fb);
-		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+		abo = gem_to_amdgpu_bo(fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
 		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r != 0))
 		if (unlikely(r != 0))
 			return r;
 			return r;
@@ -2414,11 +2409,9 @@ static void dce_v6_0_crtc_disable(struct drm_crtc *crtc)
 	dce_v6_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 	dce_v6_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 	if (crtc->primary->fb) {
 	if (crtc->primary->fb) {
 		int r;
 		int r;
-		struct amdgpu_framebuffer *amdgpu_fb;
 		struct amdgpu_bo *abo;
 		struct amdgpu_bo *abo;
 
 
-		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
-		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+		abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
 		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
 			DRM_ERROR("failed to reserve abo before unpin\n");

+ 5 - 12
drivers/gpu/drm/amd/amdgpu/dce_v8_0.c

@@ -1754,7 +1754,6 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct drm_device *dev = crtc->dev;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_framebuffer *amdgpu_fb;
 	struct drm_framebuffer *target_fb;
 	struct drm_framebuffer *target_fb;
 	struct drm_gem_object *obj;
 	struct drm_gem_object *obj;
 	struct amdgpu_bo *abo;
 	struct amdgpu_bo *abo;
@@ -1773,18 +1772,15 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
 		return 0;
 		return 0;
 	}
 	}
 
 
-	if (atomic) {
-		amdgpu_fb = to_amdgpu_framebuffer(fb);
+	if (atomic)
 		target_fb = fb;
 		target_fb = fb;
-	} else {
-		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
+	else
 		target_fb = crtc->primary->fb;
 		target_fb = crtc->primary->fb;
-	}
 
 
 	/* If atomic, assume fb object is pinned & idle & fenced and
 	/* If atomic, assume fb object is pinned & idle & fenced and
 	 * just update base pointers
 	 * just update base pointers
 	 */
 	 */
-	obj = amdgpu_fb->obj;
+	obj = target_fb->obj[0];
 	abo = gem_to_amdgpu_bo(obj);
 	abo = gem_to_amdgpu_bo(obj);
 	r = amdgpu_bo_reserve(abo, false);
 	r = amdgpu_bo_reserve(abo, false);
 	if (unlikely(r != 0))
 	if (unlikely(r != 0))
@@ -1955,8 +1951,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
 	WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
 	WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
 
 
 	if (!atomic && fb && fb != crtc->primary->fb) {
 	if (!atomic && fb && fb != crtc->primary->fb) {
-		amdgpu_fb = to_amdgpu_framebuffer(fb);
-		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+		abo = gem_to_amdgpu_bo(fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
 		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r != 0))
 		if (unlikely(r != 0))
 			return r;
 			return r;
@@ -2430,11 +2425,9 @@ static void dce_v8_0_crtc_disable(struct drm_crtc *crtc)
 	dce_v8_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 	dce_v8_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 	if (crtc->primary->fb) {
 	if (crtc->primary->fb) {
 		int r;
 		int r;
-		struct amdgpu_framebuffer *amdgpu_fb;
 		struct amdgpu_bo *abo;
 		struct amdgpu_bo *abo;
 
 
-		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
-		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+		abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
 		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
 			DRM_ERROR("failed to reserve abo before unpin\n");

+ 5 - 5
drivers/gpu/drm/amd/amdgpu/dce_virtual.c

@@ -168,11 +168,9 @@ static void dce_virtual_crtc_disable(struct drm_crtc *crtc)
 	dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 	dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 	if (crtc->primary->fb) {
 	if (crtc->primary->fb) {
 		int r;
 		int r;
-		struct amdgpu_framebuffer *amdgpu_fb;
 		struct amdgpu_bo *abo;
 		struct amdgpu_bo *abo;
 
 
-		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
-		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+		abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
 		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
 			DRM_ERROR("failed to reserve abo before unpin\n");
@@ -329,7 +327,7 @@ static int dce_virtual_get_modes(struct drm_connector *connector)
 	return 0;
 	return 0;
 }
 }
 
 
-static int dce_virtual_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status dce_virtual_mode_valid(struct drm_connector *connector,
 				  struct drm_display_mode *mode)
 				  struct drm_display_mode *mode)
 {
 {
 	return MODE_OK;
 	return MODE_OK;
@@ -462,8 +460,9 @@ static int dce_virtual_hw_init(void *handle)
 		break;
 		break;
 	case CHIP_CARRIZO:
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
 	case CHIP_STONEY:
-	case CHIP_POLARIS11:
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
+	case CHIP_VEGAM:
 		dce_v11_0_disable_dce(adev);
 		dce_v11_0_disable_dce(adev);
 		break;
 		break;
 	case CHIP_TOPAZ:
 	case CHIP_TOPAZ:
@@ -474,6 +473,7 @@ static int dce_virtual_hw_init(void *handle)
 		break;
 		break;
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 		break;
 		break;
 	default:
 	default:
 		DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type);
 		DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type);

+ 120 - 0
drivers/gpu/drm/amd/amdgpu/df_v1_7.c

@@ -0,0 +1,120 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v1_7.h"
+
+#include "df/df_1_7_default.h"
+#include "df/df_1_7_offset.h"
+#include "df/df_1_7_sh_mask.h"
+
+static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2};
+
+static void df_v1_7_init (struct amdgpu_device *adev)
+{
+}
+
+static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev,
+                                          bool enable)
+{
+	u32 tmp;
+
+	if (enable) {
+		tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
+		tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
+		WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
+	} else
+		WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
+			     mmFabricConfigAccessControl_DEFAULT);
+}
+
+static u32 df_v1_7_get_fb_channel_number(struct amdgpu_device *adev)
+{
+	u32 tmp;
+
+	tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
+	tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+	tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
+
+	return tmp;
+}
+
+static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev)
+{
+	int fb_channel_number;
+
+	fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
+
+	return df_v1_7_channel_number[fb_channel_number];
+}
+
+static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+						     bool enable)
+{
+	u32 tmp;
+
+	/* Put DF on broadcast mode */
+	adev->df_funcs->enable_broadcast_mode(adev, true);
+
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
+		tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+		tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+		tmp |= DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY;
+		WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+	} else {
+		tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+		tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+		tmp |= DF_V1_7_MGCG_DISABLE;
+		WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+	}
+
+	/* Exit boradcast mode */
+	adev->df_funcs->enable_broadcast_mode(adev, false);
+}
+
+static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev,
+					  u32 *flags)
+{
+	u32 tmp;
+
+	/* AMD_CG_SUPPORT_DF_MGCG */
+	tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+	if (tmp & DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY)
+		*flags |= AMD_CG_SUPPORT_DF_MGCG;
+}
+
+static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev,
+						bool enable)
+{
+	WREG32_FIELD15(DF, 0, DF_CS_AON0_CoherentSlaveModeCtrlA0,
+		       ForceParWrRMW, enable);
+}
+
+const struct amdgpu_df_funcs df_v1_7_funcs = {
+	.init = df_v1_7_init,
+	.enable_broadcast_mode = df_v1_7_enable_broadcast_mode,
+	.get_fb_channel_number = df_v1_7_get_fb_channel_number,
+	.get_hbm_channel_number = df_v1_7_get_hbm_channel_number,
+	.update_medium_grain_clock_gating = df_v1_7_update_medium_grain_clock_gating,
+	.get_clockgating_state = df_v1_7_get_clockgating_state,
+	.enable_ecc_force_par_wr_rmw = df_v1_7_enable_ecc_force_par_wr_rmw,
+};

+ 16 - 14
drivers/gpu/drm/nouveau/nvkm/engine/disp/basegk104.c → drivers/gpu/drm/amd/amdgpu/df_v1_7.h

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright 2015 Red Hat Inc.
+ * Copyright 2018 Advanced Micro Devices, Inc.
  *
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * copy of this software and associated documentation files (the "Software"),
@@ -19,20 +19,22 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
  */
  */
-#include "dmacnv50.h"
-#include "rootnv50.h"
 
 
-#include <nvif/class.h>
+#ifndef __DF_V1_7_H__
+#define __DF_V1_7_H__
 
 
-const struct nv50_disp_dmac_oclass
-gk104_disp_base_oclass = {
-	.base.oclass = GK104_DISP_BASE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_base_new,
-	.func = &gf119_disp_dmac_func,
-	.mthd = &gf119_disp_base_chan_mthd,
-	.chid = 1,
+#include "soc15_common.h"
+enum DF_V1_7_MGCG
+{
+	DF_V1_7_MGCG_DISABLE = 0,
+	DF_V1_7_MGCG_ENABLE_00_CYCLE_DELAY =1,
+	DF_V1_7_MGCG_ENABLE_01_CYCLE_DELAY =2,
+	DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY =13,
+	DF_V1_7_MGCG_ENABLE_31_CYCLE_DELAY =14,
+	DF_V1_7_MGCG_ENABLE_63_CYCLE_DELAY =15
 };
 };
+
+extern const struct amdgpu_df_funcs df_v1_7_funcs;
+
+#endif

+ 116 - 0
drivers/gpu/drm/amd/amdgpu/df_v3_6.c

@@ -0,0 +1,116 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v3_6.h"
+
+#include "df/df_3_6_default.h"
+#include "df/df_3_6_offset.h"
+#include "df/df_3_6_sh_mask.h"
+
+static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
+				       16, 32, 0, 0, 0, 2, 4, 8};
+
+static void df_v3_6_init(struct amdgpu_device *adev)
+{
+}
+
+static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev,
+					  bool enable)
+{
+	u32 tmp;
+
+	if (enable) {
+		tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
+		tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
+		WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
+	} else
+		WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
+			     mmFabricConfigAccessControl_DEFAULT);
+}
+
+static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev)
+{
+	u32 tmp;
+
+	tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
+	tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+	tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
+
+	return tmp;
+}
+
+static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev)
+{
+	int fb_channel_number;
+
+	fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
+	if (fb_channel_number > ARRAY_SIZE(df_v3_6_channel_number))
+		fb_channel_number = 0;
+
+	return df_v3_6_channel_number[fb_channel_number];
+}
+
+static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+						     bool enable)
+{
+	u32 tmp;
+
+	/* Put DF on broadcast mode */
+	adev->df_funcs->enable_broadcast_mode(adev, true);
+
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
+		tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+		tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+		tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
+		WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+	} else {
+		tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+		tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+		tmp |= DF_V3_6_MGCG_DISABLE;
+		WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+	}
+
+	/* Exit broadcast mode */
+	adev->df_funcs->enable_broadcast_mode(adev, false);
+}
+
+static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
+					  u32 *flags)
+{
+	u32 tmp;
+
+	/* AMD_CG_SUPPORT_DF_MGCG */
+	tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+	if (tmp & DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY)
+		*flags |= AMD_CG_SUPPORT_DF_MGCG;
+}
+
+const struct amdgpu_df_funcs df_v3_6_funcs = {
+	.init = df_v3_6_init,
+	.enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
+	.get_fb_channel_number = df_v3_6_get_fb_channel_number,
+	.get_hbm_channel_number = df_v3_6_get_hbm_channel_number,
+	.update_medium_grain_clock_gating =
+			df_v3_6_update_medium_grain_clock_gating,
+	.get_clockgating_state = df_v3_6_get_clockgating_state,
+};

+ 16 - 14
drivers/gpu/drm/nouveau/nvkm/engine/disp/basegk110.c → drivers/gpu/drm/amd/amdgpu/df_v3_6.h

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright 2015 Red Hat Inc.
+ * Copyright 2018 Advanced Micro Devices, Inc.
  *
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * copy of this software and associated documentation files (the "Software"),
@@ -19,20 +19,22 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
  */
  */
-#include "dmacnv50.h"
-#include "rootnv50.h"
 
 
-#include <nvif/class.h>
+#ifndef __DF_V3_6_H__
+#define __DF_V3_6_H__
 
 
-const struct nv50_disp_dmac_oclass
-gk110_disp_base_oclass = {
-	.base.oclass = GK110_DISP_BASE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_base_new,
-	.func = &gf119_disp_dmac_func,
-	.mthd = &gf119_disp_base_chan_mthd,
-	.chid = 1,
+#include "soc15_common.h"
+
+enum DF_V3_6_MGCG {
+	DF_V3_6_MGCG_DISABLE = 0,
+	DF_V3_6_MGCG_ENABLE_00_CYCLE_DELAY = 1,
+	DF_V3_6_MGCG_ENABLE_01_CYCLE_DELAY = 2,
+	DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY = 13,
+	DF_V3_6_MGCG_ENABLE_31_CYCLE_DELAY = 14,
+	DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15
 };
 };
+
+extern const struct amdgpu_df_funcs df_v3_6_funcs;
+
+#endif

+ 80 - 22
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

@@ -125,18 +125,6 @@ MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 
 
-MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
-
 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
@@ -149,6 +137,18 @@ MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 
 
+MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
+
 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
@@ -161,6 +161,13 @@ MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
 
 
+MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
+MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vegam_me.bin");
+MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
+MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
+
 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 {
 {
 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
@@ -292,6 +299,37 @@ static const u32 tonga_mgcg_cgcg_init[] =
 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 };
 };
 
 
+static const u32 golden_settings_vegam_a11[] =
+{
+	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
+	mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
+	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
+	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
+	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
+	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
+	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
+	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
+	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
+	mmSQ_CONFIG, 0x07f80000, 0x01180000,
+	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
+	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
+	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
+	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
+	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
+	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
+};
+
+static const u32 vegam_golden_common_all[] =
+{
+	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
+	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
+	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
+	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
+	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
+};
+
 static const u32 golden_settings_polaris11_a11[] =
 static const u32 golden_settings_polaris11_a11[] =
 {
 {
 	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
 	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
@@ -712,6 +750,14 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 							tonga_golden_common_all,
 							tonga_golden_common_all,
 							ARRAY_SIZE(tonga_golden_common_all));
 							ARRAY_SIZE(tonga_golden_common_all));
 		break;
 		break;
+	case CHIP_VEGAM:
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_vegam_a11,
+							ARRAY_SIZE(golden_settings_vegam_a11));
+		amdgpu_device_program_register_sequence(adev,
+							vegam_golden_common_all,
+							ARRAY_SIZE(vegam_golden_common_all));
+		break;
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 	case CHIP_POLARIS12:
 		amdgpu_device_program_register_sequence(adev,
 		amdgpu_device_program_register_sequence(adev,
@@ -918,17 +964,20 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 	case CHIP_FIJI:
 	case CHIP_FIJI:
 		chip_name = "fiji";
 		chip_name = "fiji";
 		break;
 		break;
-	case CHIP_POLARIS11:
-		chip_name = "polaris11";
+	case CHIP_STONEY:
+		chip_name = "stoney";
 		break;
 		break;
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS10:
 		chip_name = "polaris10";
 		chip_name = "polaris10";
 		break;
 		break;
+	case CHIP_POLARIS11:
+		chip_name = "polaris11";
+		break;
 	case CHIP_POLARIS12:
 	case CHIP_POLARIS12:
 		chip_name = "polaris12";
 		chip_name = "polaris12";
 		break;
 		break;
-	case CHIP_STONEY:
-		chip_name = "stoney";
+	case CHIP_VEGAM:
+		chip_name = "vegam";
 		break;
 		break;
 	default:
 	default:
 		BUG();
 		BUG();
@@ -1770,6 +1819,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
 		break;
 		break;
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS10:
+	case CHIP_VEGAM:
 		ret = amdgpu_atombios_get_gfx_info(adev);
 		ret = amdgpu_atombios_get_gfx_info(adev);
 		if (ret)
 		if (ret)
 			return ret;
 			return ret;
@@ -1957,12 +2007,13 @@ static int gfx_v8_0_sw_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
-	case CHIP_FIJI:
 	case CHIP_TONGA:
 	case CHIP_TONGA:
+	case CHIP_CARRIZO:
+	case CHIP_FIJI:
+	case CHIP_POLARIS10:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 	case CHIP_POLARIS12:
-	case CHIP_POLARIS10:
-	case CHIP_CARRIZO:
+	case CHIP_VEGAM:
 		adev->gfx.mec.num_mec = 2;
 		adev->gfx.mec.num_mec = 2;
 		break;
 		break;
 	case CHIP_TOPAZ:
 	case CHIP_TOPAZ:
@@ -2323,6 +2374,7 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
 
 
 		break;
 		break;
 	case CHIP_FIJI:
 	case CHIP_FIJI:
+	case CHIP_VEGAM:
 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
@@ -3504,6 +3556,7 @@ gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
 {
 {
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
 	case CHIP_FIJI:
 	case CHIP_FIJI:
+	case CHIP_VEGAM:
 		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
 		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
 			  RB_XSEL2(1) | PKR_MAP(2) |
 			  RB_XSEL2(1) | PKR_MAP(2) |
 			  PKR_XSEL(1) | PKR_YSEL(1) |
 			  PKR_XSEL(1) | PKR_YSEL(1) |
@@ -4071,7 +4124,8 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
 		gfx_v8_0_init_power_gating(adev);
 		gfx_v8_0_init_power_gating(adev);
 		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
 		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
 	} else if ((adev->asic_type == CHIP_POLARIS11) ||
 	} else if ((adev->asic_type == CHIP_POLARIS11) ||
-		   (adev->asic_type == CHIP_POLARIS12)) {
+		   (adev->asic_type == CHIP_POLARIS12) ||
+		   (adev->asic_type == CHIP_VEGAM)) {
 		gfx_v8_0_init_csb(adev);
 		gfx_v8_0_init_csb(adev);
 		gfx_v8_0_init_save_restore_list(adev);
 		gfx_v8_0_init_save_restore_list(adev);
 		gfx_v8_0_enable_save_restore_machine(adev);
 		gfx_v8_0_enable_save_restore_machine(adev);
@@ -4146,7 +4200,8 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
 	WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
 	WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
 	if (adev->asic_type == CHIP_POLARIS11 ||
 	if (adev->asic_type == CHIP_POLARIS11 ||
 	    adev->asic_type == CHIP_POLARIS10 ||
 	    adev->asic_type == CHIP_POLARIS10 ||
-	    adev->asic_type == CHIP_POLARIS12) {
+	    adev->asic_type == CHIP_POLARIS12 ||
+	    adev->asic_type == CHIP_VEGAM) {
 		tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
 		tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
 		tmp &= ~0x3;
 		tmp &= ~0x3;
 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
@@ -5498,7 +5553,8 @@ static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade
 						       bool enable)
 						       bool enable)
 {
 {
 	if ((adev->asic_type == CHIP_POLARIS11) ||
 	if ((adev->asic_type == CHIP_POLARIS11) ||
-	    (adev->asic_type == CHIP_POLARIS12))
+	    (adev->asic_type == CHIP_POLARIS12) ||
+	    (adev->asic_type == CHIP_VEGAM))
 		/* Send msg to SMU via Powerplay */
 		/* Send msg to SMU via Powerplay */
 		amdgpu_device_ip_set_powergating_state(adev,
 		amdgpu_device_ip_set_powergating_state(adev,
 						       AMD_IP_BLOCK_TYPE_SMC,
 						       AMD_IP_BLOCK_TYPE_SMC,
@@ -5588,6 +5644,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
 		break;
 		break;
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
 		else
 		else
@@ -6154,6 +6211,7 @@ static int gfx_v8_0_set_clockgating_state(void *handle,
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
 		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
 		break;
 		break;
 	default:
 	default:

+ 248 - 96
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

@@ -27,6 +27,7 @@
 #include "amdgpu_gfx.h"
 #include "amdgpu_gfx.h"
 #include "soc15.h"
 #include "soc15.h"
 #include "soc15d.h"
 #include "soc15d.h"
+#include "amdgpu_atomfirmware.h"
 
 
 #include "gc/gc_9_0_offset.h"
 #include "gc/gc_9_0_offset.h"
 #include "gc/gc_9_0_sh_mask.h"
 #include "gc/gc_9_0_sh_mask.h"
@@ -41,7 +42,6 @@
 #define GFX9_MEC_HPD_SIZE 2048
 #define GFX9_MEC_HPD_SIZE 2048
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
-#define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34
 
 
 #define mmPWR_MISC_CNTL_STATUS					0x0183
 #define mmPWR_MISC_CNTL_STATUS					0x0183
 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
@@ -64,6 +64,13 @@ MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
 
 
+MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
+MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vega20_me.bin");
+MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
+MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
+
 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
 MODULE_FIRMWARE("amdgpu/raven_me.bin");
 MODULE_FIRMWARE("amdgpu/raven_me.bin");
@@ -73,29 +80,22 @@ MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
 
 
 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
 {
 {
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
 };
 };
 
 
 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
@@ -109,6 +109,20 @@ static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800)
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800)
 };
 };
 
 
+static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
+{
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
+};
+
 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
 {
 {
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
@@ -185,6 +199,30 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000)
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000)
 };
 };
 
 
+static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
+{
+	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+};
+
+static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
+{
+	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+};
+
 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
@@ -218,6 +256,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
 						golden_settings_gc_9_2_1_vg12,
 						golden_settings_gc_9_2_1_vg12,
 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
 		break;
 		break;
+	case CHIP_VEGA20:
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_9_0,
+						ARRAY_SIZE(golden_settings_gc_9_0));
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_9_0_vg20,
+						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
+		break;
 	case CHIP_RAVEN:
 	case CHIP_RAVEN:
 		soc15_program_register_sequence(adev,
 		soc15_program_register_sequence(adev,
 						 golden_settings_gc_9_1,
 						 golden_settings_gc_9_1,
@@ -401,6 +447,27 @@ static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
 	kfree(adev->gfx.rlc.register_list_format);
 	kfree(adev->gfx.rlc.register_list_format);
 }
 }
 
 
+static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
+{
+	const struct rlc_firmware_header_v2_1 *rlc_hdr;
+
+	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
+	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
+	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
+	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
+	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
+	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
+	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
+	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
+	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
+	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
+	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
+	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
+	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
+	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
+			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
+}
+
 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 {
 {
 	const char *chip_name;
 	const char *chip_name;
@@ -412,6 +479,8 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
 	unsigned int *tmp = NULL;
 	unsigned int *tmp = NULL;
 	unsigned int i = 0;
 	unsigned int i = 0;
+	uint16_t version_major;
+	uint16_t version_minor;
 
 
 	DRM_DEBUG("\n");
 	DRM_DEBUG("\n");
 
 
@@ -422,6 +491,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 	case CHIP_VEGA12:
 	case CHIP_VEGA12:
 		chip_name = "vega12";
 		chip_name = "vega12";
 		break;
 		break;
+	case CHIP_VEGA20:
+		chip_name = "vega20";
+		break;
 	case CHIP_RAVEN:
 	case CHIP_RAVEN:
 		chip_name = "raven";
 		chip_name = "raven";
 		break;
 		break;
@@ -468,6 +540,12 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 		goto out;
 		goto out;
 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+
+	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+	if (version_major == 2 && version_minor == 1)
+		adev->gfx.rlc.is_rlc_v2_1 = true;
+
 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
 	adev->gfx.rlc.save_and_restore_offset =
 	adev->gfx.rlc.save_and_restore_offset =
@@ -508,6 +586,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
 	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
 
 
+	if (adev->gfx.rlc.is_rlc_v2_1)
+		gfx_v9_0_init_rlc_ext_microcode(adev);
+
 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
 	if (err)
 	if (err)
@@ -566,6 +647,26 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 		adev->firmware.fw_size +=
 		adev->firmware.fw_size +=
 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 
 
+		if (adev->gfx.rlc.is_rlc_v2_1) {
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
+			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
+			info->fw = adev->gfx.rlc_fw;
+			adev->firmware.fw_size +=
+				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
+
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
+			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
+			info->fw = adev->gfx.rlc_fw;
+			adev->firmware.fw_size +=
+				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
+
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
+			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
+			info->fw = adev->gfx.rlc_fw;
+			adev->firmware.fw_size +=
+				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
+		}
+
 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
 		info->fw = adev->gfx.mec_fw;
 		info->fw = adev->gfx.mec_fw;
@@ -1013,9 +1114,10 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
 };
 };
 
 
-static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
+static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
 {
 {
 	u32 gb_addr_config;
 	u32 gb_addr_config;
+	int err;
 
 
 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
 
 
@@ -1037,6 +1139,20 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
 		DRM_INFO("fix gfx.config for vega12\n");
 		DRM_INFO("fix gfx.config for vega12\n");
 		break;
 		break;
+	case CHIP_VEGA20:
+		adev->gfx.config.max_hw_contexts = 8;
+		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
+		gb_addr_config &= ~0xf3e777ff;
+		gb_addr_config |= 0x22014042;
+		/* check vbios table if gpu info is not available */
+		err = amdgpu_atomfirmware_get_gfx_info(adev);
+		if (err)
+			return err;
+		break;
 	case CHIP_RAVEN:
 	case CHIP_RAVEN:
 		adev->gfx.config.max_hw_contexts = 8;
 		adev->gfx.config.max_hw_contexts = 8;
 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
@@ -1086,6 +1202,8 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
 					adev->gfx.config.gb_addr_config,
 					adev->gfx.config.gb_addr_config,
 					GB_ADDR_CONFIG,
 					GB_ADDR_CONFIG,
 					PIPE_INTERLEAVE_SIZE));
 					PIPE_INTERLEAVE_SIZE));
+
+	return 0;
 }
 }
 
 
 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
@@ -1319,6 +1437,7 @@ static int gfx_v9_0_sw_init(void *handle)
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 	case CHIP_RAVEN:
 	case CHIP_RAVEN:
 		adev->gfx.mec.num_mec = 2;
 		adev->gfx.mec.num_mec = 2;
 		break;
 		break;
@@ -1446,7 +1565,9 @@ static int gfx_v9_0_sw_init(void *handle)
 
 
 	adev->gfx.ce_ram_size = 0x8000;
 	adev->gfx.ce_ram_size = 0x8000;
 
 
-	gfx_v9_0_gpu_early_init(adev);
+	r = gfx_v9_0_gpu_early_init(adev);
+	if (r)
+		return r;
 
 
 	r = gfx_v9_0_ngg_init(adev);
 	r = gfx_v9_0_ngg_init(adev);
 	if (r)
 	if (r)
@@ -1600,6 +1721,7 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
 
 
 	gfx_v9_0_setup_rb(adev);
 	gfx_v9_0_setup_rb(adev);
 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
+	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
 
 
 	/* XXX SH_MEM regs */
 	/* XXX SH_MEM regs */
 	/* where to put LDS, scratch, GPUVM in FSA64 space */
 	/* where to put LDS, scratch, GPUVM in FSA64 space */
@@ -1616,7 +1738,10 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
 			WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
 			WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
-			tmp = adev->gmc.shared_aperture_start >> 48;
+			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+				(adev->gmc.private_aperture_start >> 48));
+			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+				(adev->gmc.shared_aperture_start >> 48));
 			WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
 			WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
 		}
 		}
 	}
 	}
@@ -1708,55 +1833,42 @@ static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
 			adev->gfx.rlc.clear_state_size);
 			adev->gfx.rlc.clear_state_size);
 }
 }
 
 
-static void gfx_v9_0_parse_ind_reg_list(int *register_list_format,
+static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
 				int indirect_offset,
 				int indirect_offset,
 				int list_size,
 				int list_size,
 				int *unique_indirect_regs,
 				int *unique_indirect_regs,
 				int *unique_indirect_reg_count,
 				int *unique_indirect_reg_count,
-				int max_indirect_reg_count,
 				int *indirect_start_offsets,
 				int *indirect_start_offsets,
-				int *indirect_start_offsets_count,
-				int max_indirect_start_offsets_count)
+				int *indirect_start_offsets_count)
 {
 {
 	int idx;
 	int idx;
-	bool new_entry = true;
 
 
 	for (; indirect_offset < list_size; indirect_offset++) {
 	for (; indirect_offset < list_size; indirect_offset++) {
+		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
+		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
 
 
-		if (new_entry) {
-			new_entry = false;
-			indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
-			*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
-			BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count);
-		}
+		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
+			indirect_offset += 2;
 
 
-		if (register_list_format[indirect_offset] == 0xFFFFFFFF) {
-			new_entry = true;
-			continue;
-		}
+			/* look for the matching indice */
+			for (idx = 0; idx < *unique_indirect_reg_count; idx++) {
+				if (unique_indirect_regs[idx] ==
+					register_list_format[indirect_offset] ||
+					!unique_indirect_regs[idx])
+					break;
+			}
 
 
-		indirect_offset += 2;
+			BUG_ON(idx >= *unique_indirect_reg_count);
 
 
-		/* look for the matching indice */
-		for (idx = 0; idx < *unique_indirect_reg_count; idx++) {
-			if (unique_indirect_regs[idx] ==
-				register_list_format[indirect_offset])
-				break;
-		}
+			if (!unique_indirect_regs[idx])
+				unique_indirect_regs[idx] = register_list_format[indirect_offset];
 
 
-		if (idx >= *unique_indirect_reg_count) {
-			unique_indirect_regs[*unique_indirect_reg_count] =
-				register_list_format[indirect_offset];
-			idx = *unique_indirect_reg_count;
-			*unique_indirect_reg_count = *unique_indirect_reg_count + 1;
-			BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count);
+			indirect_offset++;
 		}
 		}
-
-		register_list_format[indirect_offset] = idx;
 	}
 	}
 }
 }
 
 
-static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
+static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
 {
 {
 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
 	int unique_indirect_reg_count = 0;
 	int unique_indirect_reg_count = 0;
@@ -1765,7 +1877,7 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
 	int indirect_start_offsets_count = 0;
 	int indirect_start_offsets_count = 0;
 
 
 	int list_size = 0;
 	int list_size = 0;
-	int i = 0;
+	int i = 0, j = 0;
 	u32 tmp = 0;
 	u32 tmp = 0;
 
 
 	u32 *register_list_format =
 	u32 *register_list_format =
@@ -1776,15 +1888,14 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
 		adev->gfx.rlc.reg_list_format_size_bytes);
 		adev->gfx.rlc.reg_list_format_size_bytes);
 
 
 	/* setup unique_indirect_regs array and indirect_start_offsets array */
 	/* setup unique_indirect_regs array and indirect_start_offsets array */
-	gfx_v9_0_parse_ind_reg_list(register_list_format,
-				GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH,
-				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
-				unique_indirect_regs,
-				&unique_indirect_reg_count,
-				ARRAY_SIZE(unique_indirect_regs),
-				indirect_start_offsets,
-				&indirect_start_offsets_count,
-				ARRAY_SIZE(indirect_start_offsets));
+	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
+	gfx_v9_1_parse_ind_reg_list(register_list_format,
+				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
+				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
+				    unique_indirect_regs,
+				    &unique_indirect_reg_count,
+				    indirect_start_offsets,
+				    &indirect_start_offsets_count);
 
 
 	/* enable auto inc in case it is disabled */
 	/* enable auto inc in case it is disabled */
 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
@@ -1798,19 +1909,37 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
 			adev->gfx.rlc.register_restore[i]);
 			adev->gfx.rlc.register_restore[i]);
 
 
-	/* load direct register */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0);
-	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
-			adev->gfx.rlc.register_restore[i]);
-
 	/* load indirect register */
 	/* load indirect register */
 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
 		adev->gfx.rlc.reg_list_format_start);
 		adev->gfx.rlc.reg_list_format_start);
-	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
+
+	/* direct register portion */
+	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
 			register_list_format[i]);
 			register_list_format[i]);
 
 
+	/* indirect register portion */
+	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
+		if (register_list_format[i] == 0xFFFFFFFF) {
+			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+			continue;
+		}
+
+		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+
+		for (j = 0; j < unique_indirect_reg_count; j++) {
+			if (register_list_format[i] == unique_indirect_regs[j]) {
+				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
+				break;
+			}
+		}
+
+		BUG_ON(j >= unique_indirect_reg_count);
+
+		i++;
+	}
+
 	/* set save/restore list size */
 	/* set save/restore list size */
 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
 	list_size = list_size >> 1;
 	list_size = list_size >> 1;
@@ -1823,14 +1952,19 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
 		adev->gfx.rlc.starting_offsets_start);
 		adev->gfx.rlc.starting_offsets_start);
 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
-			indirect_start_offsets[i]);
+		       indirect_start_offsets[i]);
 
 
 	/* load unique indirect regs*/
 	/* load unique indirect regs*/
 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i,
-			unique_indirect_regs[i] & 0x3FFFF);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i,
-			unique_indirect_regs[i] >> 20);
+		if (unique_indirect_regs[i] != 0) {
+			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
+			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
+			       unique_indirect_regs[i] & 0x3FFFF);
+
+			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
+			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
+			       unique_indirect_regs[i] >> 20);
+		}
 	}
 	}
 
 
 	kfree(register_list_format);
 	kfree(register_list_format);
@@ -2010,6 +2144,9 @@ static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *ad
 
 
 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
 {
 {
+	if (!adev->gfx.rlc.is_rlc_v2_1)
+		return;
+
 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
 			      AMD_PG_SUPPORT_GFX_SMG |
 			      AMD_PG_SUPPORT_GFX_SMG |
 			      AMD_PG_SUPPORT_GFX_DMG |
 			      AMD_PG_SUPPORT_GFX_DMG |
@@ -2017,27 +2154,12 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
 			      AMD_PG_SUPPORT_GDS |
 			      AMD_PG_SUPPORT_GDS |
 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
 		gfx_v9_0_init_csb(adev);
 		gfx_v9_0_init_csb(adev);
-		gfx_v9_0_init_rlc_save_restore_list(adev);
+		gfx_v9_1_init_rlc_save_restore_list(adev);
 		gfx_v9_0_enable_save_restore_machine(adev);
 		gfx_v9_0_enable_save_restore_machine(adev);
 
 
-		if (adev->asic_type == CHIP_RAVEN) {
-			WREG32(mmRLC_JUMP_TABLE_RESTORE,
-				adev->gfx.rlc.cp_table_gpu_addr >> 8);
-			gfx_v9_0_init_gfx_power_gating(adev);
-
-			if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
-				gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
-				gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
-			} else {
-				gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
-				gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
-			}
-
-			if (adev->pg_flags & AMD_PG_SUPPORT_CP)
-				gfx_v9_0_enable_cp_power_gating(adev, true);
-			else
-				gfx_v9_0_enable_cp_power_gating(adev, false);
-		}
+		WREG32(mmRLC_JUMP_TABLE_RESTORE,
+		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
+		gfx_v9_0_init_gfx_power_gating(adev);
 	}
 	}
 }
 }
 
 
@@ -3061,6 +3183,9 @@ static int gfx_v9_0_hw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int i;
 	int i;
 
 
+	amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
+					       AMD_PG_STATE_UNGATE);
+
 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
 
 
@@ -3279,6 +3404,11 @@ static int gfx_v9_0_late_init(void *handle)
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
+	r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
+						   AMD_PG_STATE_GATE);
+	if (r)
+		return r;
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -3339,8 +3469,7 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
 						bool enable)
 						bool enable)
 {
 {
-	/* TODO: double check if we need to perform under safe mdoe */
-	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
+	gfx_v9_0_enter_rlc_safe_mode(adev);
 
 
 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
@@ -3351,7 +3480,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
 		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
 		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
 	}
 	}
 
 
-	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
+	gfx_v9_0_exit_rlc_safe_mode(adev);
 }
 }
 
 
 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
@@ -3605,6 +3734,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 	case CHIP_RAVEN:
 	case CHIP_RAVEN:
 		gfx_v9_0_update_gfx_clock_gating(adev,
 		gfx_v9_0_update_gfx_clock_gating(adev,
 						 state == AMD_CG_STATE_GATE ? true : false);
 						 state == AMD_CG_STATE_GATE ? true : false);
@@ -3742,7 +3872,7 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 	}
 	}
 
 
 	amdgpu_ring_write(ring, header);
 	amdgpu_ring_write(ring, header);
-BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
 	amdgpu_ring_write(ring,
 	amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
 #ifdef __BIG_ENDIAN
 		(2 << 0) |
 		(2 << 0) |
@@ -3774,13 +3904,16 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
 {
 {
 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
 
 
 	/* RELEASE_MEM - flush caches, send int */
 	/* RELEASE_MEM - flush caches, send int */
 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
-	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
-				 EOP_TC_ACTION_EN |
-				 EOP_TC_WB_ACTION_EN |
-				 EOP_TC_MD_ACTION_EN |
+	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
+					       EOP_TC_NC_ACTION_EN) :
+					      (EOP_TCL1_ACTION_EN |
+					       EOP_TC_ACTION_EN |
+					       EOP_TC_WB_ACTION_EN |
+					       EOP_TC_MD_ACTION_EN)) |
 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
 				 EVENT_INDEX(5)));
 				 EVENT_INDEX(5)));
 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
@@ -4137,6 +4270,20 @@ static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
 }
 }
 
 
+static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+						  uint32_t reg0, uint32_t reg1,
+						  uint32_t ref, uint32_t mask)
+{
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+	if (amdgpu_sriov_vf(ring->adev))
+		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+				      ref, mask, 0x20);
+	else
+		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+							   ref, mask);
+}
+
 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 						 enum amdgpu_interrupt_state state)
 						 enum amdgpu_interrupt_state state)
 {
 {
@@ -4458,6 +4605,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
 };
 };
 
 
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
@@ -4492,6 +4640,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 	.set_priority = gfx_v9_0_ring_set_priority_compute,
 	.set_priority = gfx_v9_0_ring_set_priority_compute,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
 };
 };
 
 
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
@@ -4522,6 +4671,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
 };
 };
 
 
 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -4577,6 +4727,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 	case CHIP_RAVEN:
 	case CHIP_RAVEN:
 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
 		break;
 		break;
@@ -4686,6 +4837,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 
 
 	cu_info->number = active_cu_number;
 	cu_info->number = active_cu_number;
 	cu_info->ao_cu_mask = ao_cu_mask;
 	cu_info->ao_cu_mask = ao_cu_mask;
+	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
 
 
 	return 0;
 	return 0;
 }
 }

+ 23 - 2
drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c

@@ -819,12 +819,33 @@ static int gmc_v6_0_late_init(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
+	amdgpu_bo_late_init(adev);
+
 	if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
 	if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
 		return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
 		return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
 	else
 	else
 		return 0;
 		return 0;
 }
 }
 
 
+static unsigned gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+	u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
+	unsigned size;
+
+	if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+		size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
+	} else {
+		u32 viewport = RREG32(mmVIEWPORT_SIZE);
+		size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+			REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+			4);
+	}
+	/* return 0 if the pre-OS buffer uses up most of vram */
+	if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
+		return 0;
+	return size;
+}
+
 static int gmc_v6_0_sw_init(void *handle)
 static int gmc_v6_0_sw_init(void *handle)
 {
 {
 	int r;
 	int r;
@@ -851,8 +872,6 @@ static int gmc_v6_0_sw_init(void *handle)
 
 
 	adev->gmc.mc_mask = 0xffffffffffULL;
 	adev->gmc.mc_mask = 0xffffffffffULL;
 
 
-	adev->gmc.stolen_size = 256 * 1024;
-
 	adev->need_dma32 = false;
 	adev->need_dma32 = false;
 	dma_bits = adev->need_dma32 ? 32 : 40;
 	dma_bits = adev->need_dma32 ? 32 : 40;
 	r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
 	r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
@@ -878,6 +897,8 @@ static int gmc_v6_0_sw_init(void *handle)
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
+	adev->gmc.stolen_size = gmc_v6_0_get_vbios_fb_size(adev);
+
 	r = amdgpu_bo_init(adev);
 	r = amdgpu_bo_init(adev);
 	if (r)
 	if (r)
 		return r;
 		return r;

+ 23 - 2
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c

@@ -958,12 +958,33 @@ static int gmc_v7_0_late_init(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
+	amdgpu_bo_late_init(adev);
+
 	if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
 	if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
 		return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
 		return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
 	else
 	else
 		return 0;
 		return 0;
 }
 }
 
 
+static unsigned gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+	u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
+	unsigned size;
+
+	if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+		size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
+	} else {
+		u32 viewport = RREG32(mmVIEWPORT_SIZE);
+		size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+			REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+			4);
+	}
+	/* return 0 if the pre-OS buffer uses up most of vram */
+	if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
+		return 0;
+	return size;
+}
+
 static int gmc_v7_0_sw_init(void *handle)
 static int gmc_v7_0_sw_init(void *handle)
 {
 {
 	int r;
 	int r;
@@ -998,8 +1019,6 @@ static int gmc_v7_0_sw_init(void *handle)
 	 */
 	 */
 	adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
 	adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
 
 
-	adev->gmc.stolen_size = 256 * 1024;
-
 	/* set DMA mask + need_dma32 flags.
 	/* set DMA mask + need_dma32 flags.
 	 * PCIE - can handle 40-bits.
 	 * PCIE - can handle 40-bits.
 	 * IGP - can handle 40-bits
 	 * IGP - can handle 40-bits
@@ -1030,6 +1049,8 @@ static int gmc_v7_0_sw_init(void *handle)
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
+	adev->gmc.stolen_size = gmc_v7_0_get_vbios_fb_size(adev);
+
 	/* Memory manager */
 	/* Memory manager */
 	r = amdgpu_bo_init(adev);
 	r = amdgpu_bo_init(adev);
 	if (r)
 	if (r)

+ 29 - 4
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c

@@ -138,6 +138,7 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
 		break;
 		break;
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 		amdgpu_device_program_register_sequence(adev,
 		amdgpu_device_program_register_sequence(adev,
 							golden_settings_polaris11_a11,
 							golden_settings_polaris11_a11,
 							ARRAY_SIZE(golden_settings_polaris11_a11));
 							ARRAY_SIZE(golden_settings_polaris11_a11));
@@ -231,6 +232,7 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
 	case CHIP_FIJI:
 	case CHIP_FIJI:
 	case CHIP_CARRIZO:
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
 	case CHIP_STONEY:
+	case CHIP_VEGAM:
 		return 0;
 		return 0;
 	default: BUG();
 	default: BUG();
 	}
 	}
@@ -567,9 +569,10 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
 	/* set the gart size */
 	/* set the gart size */
 	if (amdgpu_gart_size == -1) {
 	if (amdgpu_gart_size == -1) {
 		switch (adev->asic_type) {
 		switch (adev->asic_type) {
-		case CHIP_POLARIS11: /* all engines support GPUVM */
 		case CHIP_POLARIS10: /* all engines support GPUVM */
 		case CHIP_POLARIS10: /* all engines support GPUVM */
+		case CHIP_POLARIS11: /* all engines support GPUVM */
 		case CHIP_POLARIS12: /* all engines support GPUVM */
 		case CHIP_POLARIS12: /* all engines support GPUVM */
+		case CHIP_VEGAM:     /* all engines support GPUVM */
 		default:
 		default:
 			adev->gmc.gart_size = 256ULL << 20;
 			adev->gmc.gart_size = 256ULL << 20;
 			break;
 			break;
@@ -1049,12 +1052,33 @@ static int gmc_v8_0_late_init(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
+	amdgpu_bo_late_init(adev);
+
 	if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
 	if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
 		return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
 		return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
 	else
 	else
 		return 0;
 		return 0;
 }
 }
 
 
+static unsigned gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+	u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
+	unsigned size;
+
+	if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+		size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
+	} else {
+		u32 viewport = RREG32(mmVIEWPORT_SIZE);
+		size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+			REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+			4);
+	}
+	/* return 0 if the pre-OS buffer uses up most of vram */
+	if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
+		return 0;
+	return size;
+}
+
 #define mmMC_SEQ_MISC0_FIJI 0xA71
 #define mmMC_SEQ_MISC0_FIJI 0xA71
 
 
 static int gmc_v8_0_sw_init(void *handle)
 static int gmc_v8_0_sw_init(void *handle)
@@ -1068,7 +1092,8 @@ static int gmc_v8_0_sw_init(void *handle)
 	} else {
 	} else {
 		u32 tmp;
 		u32 tmp;
 
 
-		if (adev->asic_type == CHIP_FIJI)
+		if ((adev->asic_type == CHIP_FIJI) ||
+		    (adev->asic_type == CHIP_VEGAM))
 			tmp = RREG32(mmMC_SEQ_MISC0_FIJI);
 			tmp = RREG32(mmMC_SEQ_MISC0_FIJI);
 		else
 		else
 			tmp = RREG32(mmMC_SEQ_MISC0);
 			tmp = RREG32(mmMC_SEQ_MISC0);
@@ -1096,8 +1121,6 @@ static int gmc_v8_0_sw_init(void *handle)
 	 */
 	 */
 	adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
 	adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
 
 
-	adev->gmc.stolen_size = 256 * 1024;
-
 	/* set DMA mask + need_dma32 flags.
 	/* set DMA mask + need_dma32 flags.
 	 * PCIE - can handle 40-bits.
 	 * PCIE - can handle 40-bits.
 	 * IGP - can handle 40-bits
 	 * IGP - can handle 40-bits
@@ -1128,6 +1151,8 @@ static int gmc_v8_0_sw_init(void *handle)
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
+	adev->gmc.stolen_size = gmc_v8_0_get_vbios_fb_size(adev);
+
 	/* Memory manager */
 	/* Memory manager */
 	r = amdgpu_bo_init(adev);
 	r = amdgpu_bo_init(adev);
 	if (r)
 	if (r)

+ 82 - 64
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

@@ -43,19 +43,13 @@
 #include "gfxhub_v1_0.h"
 #include "gfxhub_v1_0.h"
 #include "mmhub_v1_0.h"
 #include "mmhub_v1_0.h"
 
 
-#define mmDF_CS_AON0_DramBaseAddress0                                                                  0x0044
-#define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX                                                         0
-//DF_CS_AON0_DramBaseAddress0
-#define DF_CS_AON0_DramBaseAddress0__AddrRngVal__SHIFT                                                        0x0
-#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT                                                    0x1
-#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT                                                      0x4
-#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT                                                      0x8
-#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr__SHIFT                                                      0xc
-#define DF_CS_AON0_DramBaseAddress0__AddrRngVal_MASK                                                          0x00000001L
-#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK                                                      0x00000002L
-#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK                                                        0x000000F0L
-#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK                                                        0x00000700L
-#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK                                                        0xFFFFF000L
+/* add these here since we already include dce12 headers and these are for DCN */
+#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION                                                          0x055d
+#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX                                                 2
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT                                        0x0
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT                                       0x10
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK                                          0x00003FFFL
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK                                         0x3FFF0000L
 
 
 /* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/
 /* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/
 #define AMDGPU_NUM_OF_VMIDS			8
 #define AMDGPU_NUM_OF_VMIDS			8
@@ -385,11 +379,9 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
 			      upper_32_bits(pd_addr));
 			      upper_32_bits(pd_addr));
 
 
-	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
-
-	/* wait for the invalidate to complete */
-	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
-				  1 << vmid, 1 << vmid);
+	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
+					    hub->vm_inv_eng0_ack + eng,
+					    req, 1 << vmid);
 
 
 	return pd_addr;
 	return pd_addr;
 }
 }
@@ -556,8 +548,7 @@ static int gmc_v9_0_early_init(void *handle)
 	adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
 	adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
 	adev->gmc.shared_aperture_end =
 	adev->gmc.shared_aperture_end =
 		adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
 		adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
-	adev->gmc.private_aperture_start =
-		adev->gmc.shared_aperture_end + 1;
+	adev->gmc.private_aperture_start = 0x1000000000000000ULL;
 	adev->gmc.private_aperture_end =
 	adev->gmc.private_aperture_end =
 		adev->gmc.private_aperture_start + (4ULL << 30) - 1;
 		adev->gmc.private_aperture_start + (4ULL << 30) - 1;
 
 
@@ -659,6 +650,11 @@ static int gmc_v9_0_late_init(void *handle)
 	unsigned i;
 	unsigned i;
 	int r;
 	int r;
 
 
+	/*
+	 * TODO - Uncomment once GART corruption issue is fixed.
+	 */
+	/* amdgpu_bo_late_init(adev); */
+
 	for(i = 0; i < adev->num_rings; ++i) {
 	for(i = 0; i < adev->num_rings; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
 		struct amdgpu_ring *ring = adev->rings[i];
 		unsigned vmhub = ring->funcs->vmhub;
 		unsigned vmhub = ring->funcs->vmhub;
@@ -679,6 +675,7 @@ static int gmc_v9_0_late_init(void *handle)
 			DRM_INFO("ECC is active.\n");
 			DRM_INFO("ECC is active.\n");
 		} else if (r == 0) {
 		} else if (r == 0) {
 			DRM_INFO("ECC is not present.\n");
 			DRM_INFO("ECC is not present.\n");
+			adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false);
 		} else {
 		} else {
 			DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r);
 			DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r);
 			return r;
 			return r;
@@ -697,10 +694,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
 	amdgpu_device_vram_location(adev, &adev->gmc, base);
 	amdgpu_device_vram_location(adev, &adev->gmc, base);
 	amdgpu_device_gart_location(adev, mc);
 	amdgpu_device_gart_location(adev, mc);
 	/* base offset of vram pages */
 	/* base offset of vram pages */
-	if (adev->flags & AMD_IS_APU)
-		adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
-	else
-		adev->vm_manager.vram_base_offset = 0;
+	adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
 }
 }
 
 
 /**
 /**
@@ -714,7 +708,6 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
  */
  */
 static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
 static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
 {
 {
-	u32 tmp;
 	int chansize, numchan;
 	int chansize, numchan;
 	int r;
 	int r;
 
 
@@ -727,39 +720,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
 		else
 		else
 			chansize = 128;
 			chansize = 128;
 
 
-		tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
-		tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
-		tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
-		switch (tmp) {
-		case 0:
-		default:
-			numchan = 1;
-			break;
-		case 1:
-			numchan = 2;
-			break;
-		case 2:
-			numchan = 0;
-			break;
-		case 3:
-			numchan = 4;
-			break;
-		case 4:
-			numchan = 0;
-			break;
-		case 5:
-			numchan = 8;
-			break;
-		case 6:
-			numchan = 0;
-			break;
-		case 7:
-			numchan = 16;
-			break;
-		case 8:
-			numchan = 2;
-			break;
-		}
+		numchan = adev->df_funcs->get_hbm_channel_number(adev);
 		adev->gmc.vram_width = numchan * chansize;
 		adev->gmc.vram_width = numchan * chansize;
 	}
 	}
 
 
@@ -792,6 +753,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
 		switch (adev->asic_type) {
 		switch (adev->asic_type) {
 		case CHIP_VEGA10:  /* all engines support GPUVM */
 		case CHIP_VEGA10:  /* all engines support GPUVM */
 		case CHIP_VEGA12:  /* all engines support GPUVM */
 		case CHIP_VEGA12:  /* all engines support GPUVM */
+		case CHIP_VEGA20:
 		default:
 		default:
 			adev->gmc.gart_size = 512ULL << 20;
 			adev->gmc.gart_size = 512ULL << 20;
 			break;
 			break;
@@ -826,6 +788,52 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
 	return amdgpu_gart_table_vram_alloc(adev);
 	return amdgpu_gart_table_vram_alloc(adev);
 }
 }
 
 
+static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+#if 0
+	u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
+#endif
+	unsigned size;
+
+	/*
+	 * TODO Remove once GART corruption is resolved
+	 * Check related code in gmc_v9_0_sw_fini
+	 * */
+	size = 9 * 1024 * 1024;
+
+#if 0
+	if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+		size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
+	} else {
+		u32 viewport;
+
+		switch (adev->asic_type) {
+		case CHIP_RAVEN:
+			viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
+			size = (REG_GET_FIELD(viewport,
+					      HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
+				REG_GET_FIELD(viewport,
+					      HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
+				4);
+			break;
+		case CHIP_VEGA10:
+		case CHIP_VEGA12:
+		default:
+			viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
+			size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+				REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+				4);
+			break;
+		}
+	}
+	/* return 0 if the pre-OS buffer uses up most of vram */
+	if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
+		return 0;
+
+#endif
+	return size;
+}
+
 static int gmc_v9_0_sw_init(void *handle)
 static int gmc_v9_0_sw_init(void *handle)
 {
 {
 	int r;
 	int r;
@@ -851,6 +859,7 @@ static int gmc_v9_0_sw_init(void *handle)
 		break;
 		break;
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 		/*
 		/*
 		 * To fulfill 4-level page support,
 		 * To fulfill 4-level page support,
 		 * vm size is 256TB (48bit), maximum size of Vega10,
 		 * vm size is 256TB (48bit), maximum size of Vega10,
@@ -877,12 +886,6 @@ static int gmc_v9_0_sw_init(void *handle)
 	 */
 	 */
 	adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
 	adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
 
 
-	/*
-	 * It needs to reserve 8M stolen memory for vega10
-	 * TODO: Figure out how to avoid that...
-	 */
-	adev->gmc.stolen_size = 8 * 1024 * 1024;
-
 	/* set DMA mask + need_dma32 flags.
 	/* set DMA mask + need_dma32 flags.
 	 * PCIE - can handle 44-bits.
 	 * PCIE - can handle 44-bits.
 	 * IGP - can handle 44-bits
 	 * IGP - can handle 44-bits
@@ -907,6 +910,8 @@ static int gmc_v9_0_sw_init(void *handle)
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
+	adev->gmc.stolen_size = gmc_v9_0_get_vbios_fb_size(adev);
+
 	/* Memory manager */
 	/* Memory manager */
 	r = amdgpu_bo_init(adev);
 	r = amdgpu_bo_init(adev);
 	if (r)
 	if (r)
@@ -950,6 +955,18 @@ static int gmc_v9_0_sw_fini(void *handle)
 	amdgpu_gem_force_release(adev);
 	amdgpu_gem_force_release(adev);
 	amdgpu_vm_manager_fini(adev);
 	amdgpu_vm_manager_fini(adev);
 	gmc_v9_0_gart_fini(adev);
 	gmc_v9_0_gart_fini(adev);
+
+	/*
+	* TODO:
+	* Currently there is a bug where some memory client outside
+	* of the driver writes to first 8M of VRAM on S3 resume,
+	* this overrides GART which by default gets placed in first 8M and
+	* causes VM_FAULTS once GTT is accessed.
+	* Keep the stolen memory reservation until the while this is not solved.
+	* Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init
+	*/
+	amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
+
 	amdgpu_bo_fini(adev);
 	amdgpu_bo_fini(adev);
 
 
 	return 0;
 	return 0;
@@ -960,6 +977,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
 
 
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
+	case CHIP_VEGA20:
 		soc15_program_register_sequence(adev,
 		soc15_program_register_sequence(adev,
 						golden_settings_mmhub_1_0_0,
 						golden_settings_mmhub_1_0_0,
 						ARRAY_SIZE(golden_settings_mmhub_1_0_0));
 						ARRAY_SIZE(golden_settings_mmhub_1_0_0));

+ 2 - 2
drivers/gpu/drm/amd/amdgpu/kv_dpm.c

@@ -2817,7 +2817,7 @@ static int kv_dpm_init(struct amdgpu_device *adev)
 		pi->caps_tcp_ramping = true;
 		pi->caps_tcp_ramping = true;
 	}
 	}
 
 
-	if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK)
+	if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK)
 		pi->caps_sclk_ds = true;
 		pi->caps_sclk_ds = true;
 	else
 	else
 		pi->caps_sclk_ds = false;
 		pi->caps_sclk_ds = false;
@@ -2974,7 +2974,7 @@ static int kv_dpm_late_init(void *handle)
 	/* powerdown unused blocks for now */
 	/* powerdown unused blocks for now */
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
-	if (!amdgpu_dpm)
+	if (!adev->pm.dpm_enabled)
 		return 0;
 		return 0;
 
 
 	kv_dpm_powergate_acp(adev, true);
 	kv_dpm_powergate_acp(adev, true);

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c

@@ -734,6 +734,7 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 	case CHIP_RAVEN:
 	case CHIP_RAVEN:
 		mmhub_v1_0_update_medium_grain_clock_gating(adev,
 		mmhub_v1_0_update_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
 				state == AMD_CG_STATE_GATE ? true : false);

+ 3 - 1
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c

@@ -260,8 +260,10 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
 	} while (timeout > 1);
 	} while (timeout > 1);
 
 
 flr_done:
 flr_done:
-	if (locked)
+	if (locked) {
+		adev->in_gpu_reset = 0;
 		mutex_unlock(&adev->lock_reset);
 		mutex_unlock(&adev->lock_reset);
+	}
 
 
 	/* Trigger recovery for world switch failure if no TDR */
 	/* Trigger recovery for world switch failure if no TDR */
 	if (amdgpu_lockup_timeout == 0)
 	if (amdgpu_lockup_timeout == 0)

+ 17 - 1
drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c

@@ -34,10 +34,19 @@
 #define smnCPM_CONTROL                                                                                  0x11180460
 #define smnCPM_CONTROL                                                                                  0x11180460
 #define smnPCIE_CNTL2                                                                                   0x11180070
 #define smnPCIE_CNTL2                                                                                   0x11180070
 
 
+/* vega20 */
+#define mmRCC_DEV0_EPF0_STRAP0_VG20                                                                         0x0011
+#define mmRCC_DEV0_EPF0_STRAP0_VG20_BASE_IDX                                                                2
+
 static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
 static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
 {
 {
         u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
         u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
 
 
+	if (adev->asic_type == CHIP_VEGA20)
+		tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0_VG20);
+	else
+		tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
+
 	tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
 	tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
 	tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
 	tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
 
 
@@ -75,10 +84,14 @@ static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instan
 			SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
 			SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
 
 
 	u32 doorbell_range = RREG32(reg);
 	u32 doorbell_range = RREG32(reg);
+	u32 range = 2;
+
+	if (adev->asic_type == CHIP_VEGA20)
+		range = 8;
 
 
 	if (use_doorbell) {
 	if (use_doorbell) {
 		doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
 		doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
-		doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2);
+		doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, range);
 	} else
 	} else
 		doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
 		doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
 
 
@@ -133,6 +146,9 @@ static void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
 {
 {
 	uint32_t def, data;
 	uint32_t def, data;
 
 
+	if (adev->asic_type == CHIP_VEGA20)
+		return;
+
 	/* NBIF_MGCG_CTRL_LCLK */
 	/* NBIF_MGCG_CTRL_LCLK */
 	def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK);
 	def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK);
 
 

+ 46 - 21
drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h

@@ -40,11 +40,20 @@ enum psp_gfx_crtl_cmd_id
     GFX_CTRL_CMD_ID_INIT_GPCOM_RING = 0x00020000,   /* initialize GPCOM ring */
     GFX_CTRL_CMD_ID_INIT_GPCOM_RING = 0x00020000,   /* initialize GPCOM ring */
     GFX_CTRL_CMD_ID_DESTROY_RINGS   = 0x00030000,   /* destroy rings */
     GFX_CTRL_CMD_ID_DESTROY_RINGS   = 0x00030000,   /* destroy rings */
     GFX_CTRL_CMD_ID_CAN_INIT_RINGS  = 0x00040000,   /* is it allowed to initialized the rings */
     GFX_CTRL_CMD_ID_CAN_INIT_RINGS  = 0x00040000,   /* is it allowed to initialized the rings */
+    GFX_CTRL_CMD_ID_ENABLE_INT      = 0x00050000,   /* enable PSP-to-Gfx interrupt */
+    GFX_CTRL_CMD_ID_DISABLE_INT     = 0x00060000,   /* disable PSP-to-Gfx interrupt */
+    GFX_CTRL_CMD_ID_MODE1_RST       = 0x00070000,   /* trigger the Mode 1 reset */
 
 
     GFX_CTRL_CMD_ID_MAX             = 0x000F0000,   /* max command ID */
     GFX_CTRL_CMD_ID_MAX             = 0x000F0000,   /* max command ID */
 };
 };
 
 
 
 
+/*-----------------------------------------------------------------------------
+    NOTE:   All physical addresses used in this interface are actually
+            GPU Virtual Addresses.
+*/
+
+
 /* Control registers of the TEE Gfx interface. These are located in
 /* Control registers of the TEE Gfx interface. These are located in
 *  SRBM-to-PSP mailbox registers (total 8 registers).
 *  SRBM-to-PSP mailbox registers (total 8 registers).
 */
 */
@@ -55,8 +64,8 @@ struct psp_gfx_ctrl
     volatile uint32_t   rbi_rptr;         /* +8   Read pointer (index) of RBI ring */
     volatile uint32_t   rbi_rptr;         /* +8   Read pointer (index) of RBI ring */
     volatile uint32_t   gpcom_wptr;       /* +12  Write pointer (index) of GPCOM ring */
     volatile uint32_t   gpcom_wptr;       /* +12  Write pointer (index) of GPCOM ring */
     volatile uint32_t   gpcom_rptr;       /* +16  Read pointer (index) of GPCOM ring */
     volatile uint32_t   gpcom_rptr;       /* +16  Read pointer (index) of GPCOM ring */
-    volatile uint32_t   ring_addr_lo;     /* +20  bits [31:0] of physical address of ring buffer */
-    volatile uint32_t   ring_addr_hi;     /* +24  bits [63:32] of physical address of ring buffer */
+    volatile uint32_t   ring_addr_lo;     /* +20  bits [31:0] of GPU Virtual of ring buffer (VMID=0)*/
+    volatile uint32_t   ring_addr_hi;     /* +24  bits [63:32] of GPU Virtual of ring buffer (VMID=0) */
     volatile uint32_t   ring_buf_size;    /* +28  Ring buffer size (in bytes) */
     volatile uint32_t   ring_buf_size;    /* +28  Ring buffer size (in bytes) */
 
 
 };
 };
@@ -78,6 +87,8 @@ enum psp_gfx_cmd_id
     GFX_CMD_ID_LOAD_ASD     = 0x00000004,   /* load ASD Driver */
     GFX_CMD_ID_LOAD_ASD     = 0x00000004,   /* load ASD Driver */
     GFX_CMD_ID_SETUP_TMR    = 0x00000005,   /* setup TMR region */
     GFX_CMD_ID_SETUP_TMR    = 0x00000005,   /* setup TMR region */
     GFX_CMD_ID_LOAD_IP_FW   = 0x00000006,   /* load HW IP FW */
     GFX_CMD_ID_LOAD_IP_FW   = 0x00000006,   /* load HW IP FW */
+    GFX_CMD_ID_DESTROY_TMR  = 0x00000007,   /* destroy TMR region */
+    GFX_CMD_ID_SAVE_RESTORE = 0x00000008,   /* save/restore HW IP FW */
 
 
 };
 };
 
 
@@ -85,11 +96,11 @@ enum psp_gfx_cmd_id
 /* Command to load Trusted Application binary into PSP OS. */
 /* Command to load Trusted Application binary into PSP OS. */
 struct psp_gfx_cmd_load_ta
 struct psp_gfx_cmd_load_ta
 {
 {
-    uint32_t        app_phy_addr_lo;        /* bits [31:0] of the physical address of the TA binary (must be 4 KB aligned) */
-    uint32_t        app_phy_addr_hi;        /* bits [63:32] of the physical address of the TA binary */
+    uint32_t        app_phy_addr_lo;        /* bits [31:0] of the GPU Virtual address of the TA binary (must be 4 KB aligned) */
+    uint32_t        app_phy_addr_hi;        /* bits [63:32] of the GPU Virtual address of the TA binary */
     uint32_t        app_len;                /* length of the TA binary in bytes */
     uint32_t        app_len;                /* length of the TA binary in bytes */
-    uint32_t        cmd_buf_phy_addr_lo;    /* bits [31:0] of the physical address of CMD buffer (must be 4 KB aligned) */
-    uint32_t        cmd_buf_phy_addr_hi;    /* bits [63:32] of the physical address of CMD buffer */
+    uint32_t        cmd_buf_phy_addr_lo;    /* bits [31:0] of the GPU Virtual address of CMD buffer (must be 4 KB aligned) */
+    uint32_t        cmd_buf_phy_addr_hi;    /* bits [63:32] of the GPU Virtual address of CMD buffer */
     uint32_t        cmd_buf_len;            /* length of the CMD buffer in bytes; must be multiple of 4 KB */
     uint32_t        cmd_buf_len;            /* length of the CMD buffer in bytes; must be multiple of 4 KB */
 
 
     /* Note: CmdBufLen can be set to 0. In this case no persistent CMD buffer is provided
     /* Note: CmdBufLen can be set to 0. In this case no persistent CMD buffer is provided
@@ -111,8 +122,8 @@ struct psp_gfx_cmd_unload_ta
 */
 */
 struct psp_gfx_buf_desc
 struct psp_gfx_buf_desc
 {
 {
-    uint32_t        buf_phy_addr_lo;       /* bits [31:0] of physical address of the buffer (must be 4 KB aligned) */
-    uint32_t        buf_phy_addr_hi;       /* bits [63:32] of physical address of the buffer */
+    uint32_t        buf_phy_addr_lo;       /* bits [31:0] of GPU Virtual address of the buffer (must be 4 KB aligned) */
+    uint32_t        buf_phy_addr_hi;       /* bits [63:32] of GPU Virtual address of the buffer */
     uint32_t        buf_size;              /* buffer size in bytes (must be multiple of 4 KB and no bigger than 64 MB) */
     uint32_t        buf_size;              /* buffer size in bytes (must be multiple of 4 KB and no bigger than 64 MB) */
 
 
 };
 };
@@ -145,8 +156,8 @@ struct psp_gfx_cmd_invoke_cmd
 /* Command to setup TMR region. */
 /* Command to setup TMR region. */
 struct psp_gfx_cmd_setup_tmr
 struct psp_gfx_cmd_setup_tmr
 {
 {
-    uint32_t        buf_phy_addr_lo;       /* bits [31:0] of physical address of TMR buffer (must be 4 KB aligned) */
-    uint32_t        buf_phy_addr_hi;       /* bits [63:32] of physical address of TMR buffer */
+    uint32_t        buf_phy_addr_lo;       /* bits [31:0] of GPU Virtual address of TMR buffer (must be 4 KB aligned) */
+    uint32_t        buf_phy_addr_hi;       /* bits [63:32] of GPU Virtual address of TMR buffer */
     uint32_t        buf_size;              /* buffer size in bytes (must be multiple of 4 KB) */
     uint32_t        buf_size;              /* buffer size in bytes (must be multiple of 4 KB) */
 
 
 };
 };
@@ -174,18 +185,32 @@ enum psp_gfx_fw_type
     GFX_FW_TYPE_ISP         = 16,
     GFX_FW_TYPE_ISP         = 16,
     GFX_FW_TYPE_ACP         = 17,
     GFX_FW_TYPE_ACP         = 17,
     GFX_FW_TYPE_SMU         = 18,
     GFX_FW_TYPE_SMU         = 18,
+    GFX_FW_TYPE_MMSCH       = 19,
+    GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM        = 20,
+    GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM        = 21,
+    GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL           = 22,
+    GFX_FW_TYPE_MAX         = 23
 };
 };
 
 
 /* Command to load HW IP FW. */
 /* Command to load HW IP FW. */
 struct psp_gfx_cmd_load_ip_fw
 struct psp_gfx_cmd_load_ip_fw
 {
 {
-    uint32_t                fw_phy_addr_lo;    /* bits [31:0] of physical address of FW location (must be 4 KB aligned) */
-    uint32_t                fw_phy_addr_hi;    /* bits [63:32] of physical address of FW location */
+    uint32_t                fw_phy_addr_lo;    /* bits [31:0] of GPU Virtual address of FW location (must be 4 KB aligned) */
+    uint32_t                fw_phy_addr_hi;    /* bits [63:32] of GPU Virtual address of FW location */
     uint32_t                fw_size;           /* FW buffer size in bytes */
     uint32_t                fw_size;           /* FW buffer size in bytes */
     enum psp_gfx_fw_type    fw_type;           /* FW type */
     enum psp_gfx_fw_type    fw_type;           /* FW type */
 
 
 };
 };
 
 
+/* Command to save/restore HW IP FW. */
+struct psp_gfx_cmd_save_restore_ip_fw
+{
+    uint32_t                save_fw;              /* if set, command is used for saving fw otherwise for resetoring*/
+    uint32_t                save_restore_addr_lo; /* bits [31:0] of FB address of GART memory used as save/restore buffer (must be 4 KB aligned) */
+    uint32_t                save_restore_addr_hi; /* bits [63:32] of FB address of GART memory used as save/restore buffer */
+    uint32_t                buf_size;             /* Size of the save/restore buffer in bytes */
+    enum psp_gfx_fw_type    fw_type;              /* FW type */
+};
 
 
 /* All GFX ring buffer commands. */
 /* All GFX ring buffer commands. */
 union psp_gfx_commands
 union psp_gfx_commands
@@ -195,7 +220,7 @@ union psp_gfx_commands
     struct psp_gfx_cmd_invoke_cmd       cmd_invoke_cmd;
     struct psp_gfx_cmd_invoke_cmd       cmd_invoke_cmd;
     struct psp_gfx_cmd_setup_tmr        cmd_setup_tmr;
     struct psp_gfx_cmd_setup_tmr        cmd_setup_tmr;
     struct psp_gfx_cmd_load_ip_fw       cmd_load_ip_fw;
     struct psp_gfx_cmd_load_ip_fw       cmd_load_ip_fw;
-
+    struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw;
 };
 };
 
 
 
 
@@ -226,8 +251,8 @@ struct psp_gfx_cmd_resp
 
 
     /* These fields are used for RBI only. They are all 0 in GPCOM commands
     /* These fields are used for RBI only. They are all 0 in GPCOM commands
     */
     */
-    uint32_t        resp_buf_addr_lo;   /* +12 bits [31:0] of physical address of response buffer (must be 4 KB aligned) */
-    uint32_t        resp_buf_addr_hi;   /* +16 bits [63:32] of physical address of response buffer */
+    uint32_t        resp_buf_addr_lo;   /* +12 bits [31:0] of GPU Virtual address of response buffer (must be 4 KB aligned) */
+    uint32_t        resp_buf_addr_hi;   /* +16 bits [63:32] of GPU Virtual address of response buffer */
     uint32_t        resp_offset;        /* +20 offset within response buffer */
     uint32_t        resp_offset;        /* +20 offset within response buffer */
     uint32_t        resp_buf_size;      /* +24 total size of the response buffer in bytes */
     uint32_t        resp_buf_size;      /* +24 total size of the response buffer in bytes */
 
 
@@ -251,19 +276,19 @@ struct psp_gfx_cmd_resp
 /* Structure of the Ring Buffer Frame */
 /* Structure of the Ring Buffer Frame */
 struct psp_gfx_rb_frame
 struct psp_gfx_rb_frame
 {
 {
-    uint32_t    cmd_buf_addr_lo;    /* +0  bits [31:0] of physical address of command buffer (must be 4 KB aligned) */
-    uint32_t    cmd_buf_addr_hi;    /* +4  bits [63:32] of physical address of command buffer */
+    uint32_t    cmd_buf_addr_lo;    /* +0  bits [31:0] of GPU Virtual address of command buffer (must be 4 KB aligned) */
+    uint32_t    cmd_buf_addr_hi;    /* +4  bits [63:32] of GPU Virtual address of command buffer */
     uint32_t    cmd_buf_size;       /* +8  command buffer size in bytes */
     uint32_t    cmd_buf_size;       /* +8  command buffer size in bytes */
-    uint32_t    fence_addr_lo;      /* +12 bits [31:0] of physical address of Fence for this frame */
-    uint32_t    fence_addr_hi;      /* +16 bits [63:32] of physical address of Fence for this frame */
+    uint32_t    fence_addr_lo;      /* +12 bits [31:0] of GPU Virtual address of Fence for this frame */
+    uint32_t    fence_addr_hi;      /* +16 bits [63:32] of GPU Virtual address of Fence for this frame */
     uint32_t    fence_value;        /* +20 Fence value */
     uint32_t    fence_value;        /* +20 Fence value */
     uint32_t    sid_lo;             /* +24 bits [31:0] of SID value (used only for RBI frames) */
     uint32_t    sid_lo;             /* +24 bits [31:0] of SID value (used only for RBI frames) */
     uint32_t    sid_hi;             /* +28 bits [63:32] of SID value (used only for RBI frames) */
     uint32_t    sid_hi;             /* +28 bits [63:32] of SID value (used only for RBI frames) */
     uint8_t     vmid;               /* +32 VMID value used for mapping of all addresses for this frame */
     uint8_t     vmid;               /* +32 VMID value used for mapping of all addresses for this frame */
     uint8_t     frame_type;         /* +33 1: destory context frame, 0: all other frames; used only for RBI frames */
     uint8_t     frame_type;         /* +33 1: destory context frame, 0: all other frames; used only for RBI frames */
     uint8_t     reserved1[2];       /* +34 reserved, must be 0 */
     uint8_t     reserved1[2];       /* +34 reserved, must be 0 */
-    uint32_t    reserved2[7];       /* +40 reserved, must be 0 */
-    /* total 64 bytes */
+    uint32_t    reserved2[7];       /* +36 reserved, must be 0 */
+                /* total 64 bytes */
 };
 };
 
 
 #endif /* _PSP_TEE_GFX_IF_H_ */
 #endif /* _PSP_TEE_GFX_IF_H_ */

+ 9 - 0
drivers/gpu/drm/amd/amdgpu/psp_v10_0.c

@@ -70,6 +70,15 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *
 	case AMDGPU_UCODE_ID_RLC_G:
 	case AMDGPU_UCODE_ID_RLC_G:
 		*type = GFX_FW_TYPE_RLC_G;
 		*type = GFX_FW_TYPE_RLC_G;
 		break;
 		break;
+	case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
+		*type = GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL;
+		break;
+	case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
+		*type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM;
+		break;
+	case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
+		*type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM;
+		break;
 	case AMDGPU_UCODE_ID_SMC:
 	case AMDGPU_UCODE_ID_SMC:
 		*type = GFX_FW_TYPE_SMU;
 		*type = GFX_FW_TYPE_SMU;
 		break;
 		break;

+ 3 - 0
drivers/gpu/drm/amd/amdgpu/psp_v3_1.c

@@ -41,6 +41,9 @@ MODULE_FIRMWARE("amdgpu/vega10_sos.bin");
 MODULE_FIRMWARE("amdgpu/vega10_asd.bin");
 MODULE_FIRMWARE("amdgpu/vega10_asd.bin");
 MODULE_FIRMWARE("amdgpu/vega12_sos.bin");
 MODULE_FIRMWARE("amdgpu/vega12_sos.bin");
 MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
 MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
+MODULE_FIRMWARE("amdgpu/vega20_sos.bin");
+MODULE_FIRMWARE("amdgpu/vega20_asd.bin");
+
 
 
 #define smnMP1_FIRMWARE_FLAGS 0x3010028
 #define smnMP1_FIRMWARE_FLAGS 0x3010028
 
 

+ 9 - 3
drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c

@@ -62,6 +62,8 @@ MODULE_FIRMWARE("amdgpu/polaris11_sdma.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_sdma1.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_sdma1.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_sdma.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_sdma.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_sdma1.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/vegam_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vegam_sdma1.bin");
 
 
 
 
 static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
 static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
@@ -209,6 +211,7 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev)
 		break;
 		break;
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 		amdgpu_device_program_register_sequence(adev,
 		amdgpu_device_program_register_sequence(adev,
 							golden_settings_polaris11_a11,
 							golden_settings_polaris11_a11,
 							ARRAY_SIZE(golden_settings_polaris11_a11));
 							ARRAY_SIZE(golden_settings_polaris11_a11));
@@ -275,15 +278,18 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
 	case CHIP_FIJI:
 	case CHIP_FIJI:
 		chip_name = "fiji";
 		chip_name = "fiji";
 		break;
 		break;
-	case CHIP_POLARIS11:
-		chip_name = "polaris11";
-		break;
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS10:
 		chip_name = "polaris10";
 		chip_name = "polaris10";
 		break;
 		break;
+	case CHIP_POLARIS11:
+		chip_name = "polaris11";
+		break;
 	case CHIP_POLARIS12:
 	case CHIP_POLARIS12:
 		chip_name = "polaris12";
 		chip_name = "polaris12";
 		break;
 		break;
+	case CHIP_VEGAM:
+		chip_name = "vegam";
+		break;
 	case CHIP_CARRIZO:
 	case CHIP_CARRIZO:
 		chip_name = "carrizo";
 		chip_name = "carrizo";
 		break;
 		break;

+ 68 - 28
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c

@@ -42,6 +42,8 @@ MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
 MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
 MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
 MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
 MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
 MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin");
 MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/vega20_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin");
 MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
 MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
 
 
 #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK  0x000000F8L
 #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK  0x000000F8L
@@ -107,6 +109,28 @@ static const struct soc15_reg_golden golden_settings_sdma_4_1[] =
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0)
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0)
 };
 };
 
 
+static const struct soc15_reg_golden golden_settings_sdma_4_2[] =
+{
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0)
+};
+
 static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
 static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
 {
 {
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002),
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002),
@@ -139,6 +163,11 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
 						golden_settings_sdma_vg12,
 						golden_settings_sdma_vg12,
 						ARRAY_SIZE(golden_settings_sdma_vg12));
 						ARRAY_SIZE(golden_settings_sdma_vg12));
 		break;
 		break;
+	case CHIP_VEGA20:
+		soc15_program_register_sequence(adev,
+						golden_settings_sdma_4_2,
+						ARRAY_SIZE(golden_settings_sdma_4_2));
+		break;
 	case CHIP_RAVEN:
 	case CHIP_RAVEN:
 		soc15_program_register_sequence(adev,
 		soc15_program_register_sequence(adev,
 						 golden_settings_sdma_4_1,
 						 golden_settings_sdma_4_1,
@@ -182,6 +211,9 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
 	case CHIP_VEGA12:
 	case CHIP_VEGA12:
 		chip_name = "vega12";
 		chip_name = "vega12";
 		break;
 		break;
+	case CHIP_VEGA20:
+		chip_name = "vega20";
+		break;
 	case CHIP_RAVEN:
 	case CHIP_RAVEN:
 		chip_name = "raven";
 		chip_name = "raven";
 		break;
 		break;
@@ -360,6 +392,31 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
 
 
 }
 }
 
 
+static void sdma_v4_0_wait_reg_mem(struct amdgpu_ring *ring,
+				   int mem_space, int hdp,
+				   uint32_t addr0, uint32_t addr1,
+				   uint32_t ref, uint32_t mask,
+				   uint32_t inv)
+{
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(hdp) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(mem_space) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+	if (mem_space) {
+		/* memory */
+		amdgpu_ring_write(ring, addr0);
+		amdgpu_ring_write(ring, addr1);
+	} else {
+		/* registers */
+		amdgpu_ring_write(ring, addr0 << 2);
+		amdgpu_ring_write(ring, addr1 << 2);
+	}
+	amdgpu_ring_write(ring, ref); /* reference */
+	amdgpu_ring_write(ring, mask); /* mask */
+	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(inv)); /* retry count, poll interval */
+}
+
 /**
 /**
  * sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
  * sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
  *
  *
@@ -378,15 +435,10 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 	else
 	else
 		ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1;
 		ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1;
 
 
-	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
-			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
-			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
-	amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2);
-	amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2);
-	amdgpu_ring_write(ring, ref_and_mask); /* reference */
-	amdgpu_ring_write(ring, ref_and_mask); /* mask */
-	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
-			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+	sdma_v4_0_wait_reg_mem(ring, 0, 1,
+			       adev->nbio_funcs->get_hdp_flush_done_offset(adev),
+			       adev->nbio_funcs->get_hdp_flush_req_offset(adev),
+			       ref_and_mask, ref_and_mask, 10);
 }
 }
 
 
 /**
 /**
@@ -1114,16 +1166,10 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 	uint64_t addr = ring->fence_drv.gpu_addr;
 	uint64_t addr = ring->fence_drv.gpu_addr;
 
 
 	/* wait for idle */
 	/* wait for idle */
-	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
-			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
-			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
-			  SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
-	amdgpu_ring_write(ring, addr & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
-	amdgpu_ring_write(ring, seq); /* reference */
-	amdgpu_ring_write(ring, 0xffffffff); /* mask */
-	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
-			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+	sdma_v4_0_wait_reg_mem(ring, 1, 0,
+			       addr & 0xfffffffc,
+			       upper_32_bits(addr) & 0xffffffff,
+			       seq, 0xffffffff, 4);
 }
 }
 
 
 
 
@@ -1154,15 +1200,7 @@ static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring,
 static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 					 uint32_t val, uint32_t mask)
 					 uint32_t val, uint32_t mask)
 {
 {
-	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
-			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
-			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
-	amdgpu_ring_write(ring, reg << 2);
-	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, val); /* reference */
-	amdgpu_ring_write(ring, mask); /* mask */
-	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
-			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
+	sdma_v4_0_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10);
 }
 }
 
 
 static int sdma_v4_0_early_init(void *handle)
 static int sdma_v4_0_early_init(void *handle)
@@ -1510,6 +1548,7 @@ static int sdma_v4_0_set_clockgating_state(void *handle,
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 	case CHIP_RAVEN:
 	case CHIP_RAVEN:
 		sdma_v4_0_update_medium_grain_clock_gating(adev,
 		sdma_v4_0_update_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
 				state == AMD_CG_STATE_GATE ? true : false);
@@ -1605,6 +1644,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
 	.pad_ib = sdma_v4_0_ring_pad_ib,
 	.pad_ib = sdma_v4_0_ring_pad_ib,
 	.emit_wreg = sdma_v4_0_ring_emit_wreg,
 	.emit_wreg = sdma_v4_0_ring_emit_wreg,
 	.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
 	.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
 };
 };
 
 
 static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
 static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)

+ 7 - 0
drivers/gpu/drm/amd/amdgpu/si.c

@@ -1252,6 +1252,12 @@ static void si_invalidate_hdp(struct amdgpu_device *adev,
 	}
 	}
 }
 }
 
 
+static bool si_need_full_reset(struct amdgpu_device *adev)
+{
+	/* change this when we support soft reset */
+	return true;
+}
+
 static int si_get_pcie_lanes(struct amdgpu_device *adev)
 static int si_get_pcie_lanes(struct amdgpu_device *adev)
 {
 {
 	u32 link_width_cntl;
 	u32 link_width_cntl;
@@ -1332,6 +1338,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
 	.get_config_memsize = &si_get_config_memsize,
 	.get_config_memsize = &si_get_config_memsize,
 	.flush_hdp = &si_flush_hdp,
 	.flush_hdp = &si_flush_hdp,
 	.invalidate_hdp = &si_invalidate_hdp,
 	.invalidate_hdp = &si_invalidate_hdp,
+	.need_full_reset = &si_need_full_reset,
 };
 };
 
 
 static uint32_t si_get_rev_id(struct amdgpu_device *adev)
 static uint32_t si_get_rev_id(struct amdgpu_device *adev)

Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно