浏览代码

Merge remote-tracking branch 'airlied/drm-next' into drm-misc-next-fixes

Backmerging airlied/drm-next
Sean Paul 8 年之前
父节点
当前提交
b15cdca5b5
共有 100 个文件被更改,包括 7656 次插入4466 次删除
  1. 5 2
      Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt
  2. 7 6
      Documentation/devicetree/bindings/display/exynos/exynos5433-decon.txt
  3. 8 0
      Documentation/devicetree/bindings/display/panel/auo,p320hvn03.txt
  4. 23 0
      Documentation/devicetree/bindings/display/panel/innolux,p079zca.txt
  5. 8 0
      Documentation/devicetree/bindings/display/panel/nec,nl12880b20-05.txt
  6. 8 0
      Documentation/devicetree/bindings/display/panel/nlt,nl192108ac18-02d.txt
  7. 4 1
      Documentation/devicetree/bindings/display/panel/samsung,s6e3ha2.txt
  8. 36 0
      Documentation/devicetree/bindings/display/st,stm32-ltdc.txt
  9. 121 6
      Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt
  10. 21 0
      Documentation/devicetree/bindings/display/zte,vou.txt
  11. 1 0
      Documentation/devicetree/bindings/vendor-prefixes.txt
  12. 12 54
      Documentation/gpu/drm-internals.rst
  13. 6 0
      Documentation/gpu/drm-kms-helpers.rst
  14. 2 2
      Documentation/gpu/drm-kms.rst
  15. 12 0
      Documentation/gpu/drm-mm.rst
  16. 2 0
      Documentation/gpu/index.rst
  17. 6 0
      Documentation/gpu/pl111.rst
  18. 178 0
      Documentation/gpu/tegra.rst
  19. 0 13
      Documentation/gpu/todo.rst
  20. 13 10
      Documentation/sync_file.txt
  21. 19 0
      MAINTAINERS
  22. 3 5
      drivers/dma-buf/dma-buf.c
  23. 5 0
      drivers/dma-buf/dma-fence.c
  24. 6 4
      drivers/dma-buf/sync_debug.c
  25. 34 18
      drivers/dma-buf/sync_file.c
  26. 4 0
      drivers/gpu/drm/Kconfig
  27. 5 1
      drivers/gpu/drm/Makefile
  28. 12 4
      drivers/gpu/drm/amd/amdgpu/Kconfig
  29. 11 4
      drivers/gpu/drm/amd/amdgpu/Makefile
  30. 118 47
      drivers/gpu/drm/amd/amdgpu/amdgpu.h
  31. 63 38
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
  32. 7 7
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
  33. 11 65
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
  34. 12 52
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
  35. 64 41
      drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
  36. 174 133
      drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
  37. 10 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
  38. 371 67
      drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
  39. 1 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
  40. 73 10
      drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
  41. 12 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
  42. 4 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
  43. 16 38
      drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
  44. 206 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
  45. 60 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
  46. 13 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
  47. 2 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
  48. 2 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
  49. 31 11
      drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
  50. 72 44
      drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
  51. 3 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
  52. 1 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
  53. 1 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
  54. 24 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
  55. 2 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
  56. 299 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
  57. 83 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
  58. 11 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
  59. 19 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
  60. 1 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
  61. 23 14
      drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
  62. 32 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
  63. 28 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
  64. 5 25
      drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
  65. 2 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
  66. 654 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
  67. 77 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
  68. 14 10
      drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
  69. 1 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
  70. 476 157
      drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
  71. 28 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
  72. 1 1
      drivers/gpu/drm/amd/amdgpu/ci_dpm.c
  73. 1 1
      drivers/gpu/drm/amd/amdgpu/ci_smc.c
  74. 61 62
      drivers/gpu/drm/amd/amdgpu/cik.c
  75. 1 1
      drivers/gpu/drm/amd/amdgpu/cik_ih.c
  76. 1 1
      drivers/gpu/drm/amd/amdgpu/cz_ih.c
  77. 1 1
      drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
  78. 1 1
      drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
  79. 551 49
      drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
  80. 1 1
      drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
  81. 1 1
      drivers/gpu/drm/amd/amdgpu/dce_virtual.c
  82. 702 913
      drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
  83. 358 294
      drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
  84. 5 0
      drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
  85. 507 784
      drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
  86. 5 0
      drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
  87. 1172 610
      drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
  88. 175 260
      drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
  89. 2 1
      drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h
  90. 31 47
      drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
  91. 31 64
      drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
  92. 31 64
      drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
  93. 73 79
      drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
  94. 1 1
      drivers/gpu/drm/amd/amdgpu/iceland_ih.c
  95. 1 1
      drivers/gpu/drm/amd/amdgpu/kv_dpm.c
  96. 1 1
      drivers/gpu/drm/amd/amdgpu/kv_smc.c
  97. 225 297
      drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
  98. 4 0
      drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
  99. 20 15
      drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
  100. 14 11
      drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c

+ 5 - 2
Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt

@@ -5,7 +5,7 @@ with HDMI output and the HVS (Hardware Video Scaler) for compositing
 display planes.
 display planes.
 
 
 Required properties for VC4:
 Required properties for VC4:
-- compatible:	Should be "brcm,bcm2835-vc4"
+- compatible:	Should be "brcm,bcm2835-vc4" or "brcm,cygnus-vc4"
 
 
 Required properties for Pixel Valve:
 Required properties for Pixel Valve:
 - compatible:	Should be one of "brcm,bcm2835-pixelvalve0",
 - compatible:	Should be one of "brcm,bcm2835-pixelvalve0",
@@ -54,11 +54,14 @@ Required properties for VEC:
 		  See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
 		  See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
 
 
 Required properties for V3D:
 Required properties for V3D:
-- compatible:	Should be "brcm,bcm2835-v3d"
+- compatible:	Should be "brcm,bcm2835-v3d" or "brcm,cygnus-v3d"
 - reg:		Physical base address and length of the V3D's registers
 - reg:		Physical base address and length of the V3D's registers
 - interrupts:	The interrupt number
 - interrupts:	The interrupt number
 		  See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
 		  See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
 
 
+Optional properties for V3D:
+- clocks:	The clock the unit runs on
+
 Required properties for DSI:
 Required properties for DSI:
 - compatible:	Should be "brcm,bcm2835-dsi0" or "brcm,bcm2835-dsi1"
 - compatible:	Should be "brcm,bcm2835-dsi0" or "brcm,bcm2835-dsi1"
 - reg:		Physical base address and length of the DSI block's registers
 - reg:		Physical base address and length of the DSI block's registers

+ 7 - 6
Documentation/devicetree/bindings/display/exynos/exynos5433-decon.txt

@@ -8,12 +8,13 @@ Required properties:
 - compatible: value should be one of:
 - compatible: value should be one of:
 	"samsung,exynos5433-decon", "samsung,exynos5433-decon-tv";
 	"samsung,exynos5433-decon", "samsung,exynos5433-decon-tv";
 - reg: physical base address and length of the DECON registers set.
 - reg: physical base address and length of the DECON registers set.
-- interrupts: should contain a list of all DECON IP block interrupts in the
-	      order: VSYNC, LCD_SYSTEM. The interrupt specifier format
-	      depends on the interrupt controller used.
-- interrupt-names: should contain the interrupt names: "vsync", "lcd_sys"
-		   in the same order as they were listed in the interrupts
-		   property.
+- interrupt-names: should contain the interrupt names depending on mode of work:
+		video mode: "vsync",
+		command mode: "lcd_sys",
+		command mode with software trigger: "lcd_sys", "te".
+- interrupts or interrupts-extended: list of interrupt specifiers corresponding
+		to names privided in interrupt-names, as described in
+		interrupt-controller/interrupts.txt
 - clocks: must include clock specifiers corresponding to entries in the
 - clocks: must include clock specifiers corresponding to entries in the
 	  clock-names property.
 	  clock-names property.
 - clock-names: list of clock names sorted in the same order as the clocks
 - clock-names: list of clock names sorted in the same order as the clocks

+ 8 - 0
Documentation/devicetree/bindings/display/panel/auo,p320hvn03.txt

@@ -0,0 +1,8 @@
+AU Optronics Corporation 31.5" FHD (1920x1080) TFT LCD panel
+
+Required properties:
+- compatible: should be "auo,p320hvn03"
+- power-supply: as specified in the base binding
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.

+ 23 - 0
Documentation/devicetree/bindings/display/panel/innolux,p079zca.txt

@@ -0,0 +1,23 @@
+Innolux P079ZCA 7.85" 768x1024 TFT LCD panel
+
+Required properties:
+- compatible: should be "innolux,p079zca"
+- reg: DSI virtual channel of the peripheral
+- power-supply: phandle of the regulator that provides the supply voltage
+- enable-gpios: panel enable gpio
+
+Optional properties:
+- backlight: phandle of the backlight device attached to the panel
+
+Example:
+
+	&mipi_dsi {
+		panel {
+			compatible = "innolux,p079zca";
+			reg = <0>;
+			power-supply = <...>;
+			backlight = <&backlight>;
+			enable-gpios = <&gpio1 13 GPIO_ACTIVE_HIGH>;
+			status = "okay";
+		};
+	};

+ 8 - 0
Documentation/devicetree/bindings/display/panel/nec,nl12880b20-05.txt

@@ -0,0 +1,8 @@
+NEC LCD Technologies, Ltd. 12.1" WXGA (1280x800) LVDS TFT LCD panel
+
+Required properties:
+- compatible: should be "nec,nl12880bc20-05"
+- power-supply: as specified in the base binding
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.

+ 8 - 0
Documentation/devicetree/bindings/display/panel/nlt,nl192108ac18-02d.txt

@@ -0,0 +1,8 @@
+NLT Technologies, Ltd. 15.6" FHD (1920x1080) LVDS TFT LCD panel
+
+Required properties:
+- compatible: should be "nlt,nl192108ac18-02d"
+- power-supply: as specified in the base binding
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.

+ 4 - 1
Documentation/devicetree/bindings/display/panel/samsung,s6e3ha2.txt

@@ -1,7 +1,10 @@
 Samsung S6E3HA2 5.7" 1440x2560 AMOLED panel
 Samsung S6E3HA2 5.7" 1440x2560 AMOLED panel
+Samsung S6E3HF2 5.65" 1600x2560 AMOLED panel
 
 
 Required properties:
 Required properties:
-  - compatible: "samsung,s6e3ha2"
+  - compatible: should be one of:
+    "samsung,s6e3ha2",
+    "samsung,s6e3hf2".
   - reg: the virtual channel number of a DSI peripheral
   - reg: the virtual channel number of a DSI peripheral
   - vdd3-supply: I/O voltage supply
   - vdd3-supply: I/O voltage supply
   - vci-supply: voltage supply for analog circuits
   - vci-supply: voltage supply for analog circuits

+ 36 - 0
Documentation/devicetree/bindings/display/st,stm32-ltdc.txt

@@ -0,0 +1,36 @@
+* STMicroelectronics STM32 lcd-tft display controller
+
+- ltdc: lcd-tft display controller host
+  must be a sub-node of st-display-subsystem
+  Required properties:
+  - compatible: "st,stm32-ltdc"
+  - reg: Physical base address of the IP registers and length of memory mapped region.
+  - clocks: A list of phandle + clock-specifier pairs, one for each
+    entry in 'clock-names'.
+  - clock-names: A list of clock names. For ltdc it should contain:
+      - "lcd" for the clock feeding the output pixel clock & IP clock.
+  - resets: reset to be used by the device (defined by use of RCC macro).
+  Required nodes:
+    - Video port for RGB output.
+
+Example:
+
+/ {
+	...
+	soc {
+	...
+		ltdc: display-controller@40016800 {
+			compatible = "st,stm32-ltdc";
+			reg = <0x40016800 0x200>;
+			interrupts = <88>, <89>;
+			resets = <&rcc STM32F4_APB2_RESET(LTDC)>;
+			clocks = <&rcc 1 CLK_LCD>;
+			clock-names = "lcd";
+
+			port {
+				ltdc_out_rgb: endpoint {
+				};
+			};
+		};
+	};
+};

+ 121 - 6
Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt

@@ -4,6 +4,44 @@ Allwinner A10 Display Pipeline
 The Allwinner A10 Display pipeline is composed of several components
 The Allwinner A10 Display pipeline is composed of several components
 that are going to be documented below:
 that are going to be documented below:
 
 
+For the input port of all components up to the TCON in the display
+pipeline, if there are multiple components, the local endpoint IDs
+must correspond to the index of the upstream block. For example, if
+the remote endpoint is Frontend 1, then the local endpoint ID must
+be 1.
+
+Conversely, for the output ports of the same group, the remote endpoint
+ID must be the index of the local hardware block. If the local backend
+is backend 1, then the remote endpoint ID must be 1.
+
+HDMI Encoder
+------------
+
+The HDMI Encoder supports the HDMI video and audio outputs, and does
+CEC. It is one end of the pipeline.
+
+Required properties:
+  - compatible: value must be one of:
+    * allwinner,sun5i-a10s-hdmi
+  - reg: base address and size of memory-mapped region
+  - interrupts: interrupt associated to this IP
+  - clocks: phandles to the clocks feeding the HDMI encoder
+    * ahb: the HDMI interface clock
+    * mod: the HDMI module clock
+    * pll-0: the first video PLL
+    * pll-1: the second video PLL
+  - clock-names: the clock names mentioned above
+  - dmas: phandles to the DMA channels used by the HDMI encoder
+    * ddc-tx: The channel for DDC transmission
+    * ddc-rx: The channel for DDC reception
+    * audio-tx: The channel used for audio transmission
+  - dma-names: the channel names mentioned above
+
+  - ports: A ports node with endpoint definitions as defined in
+    Documentation/devicetree/bindings/media/video-interfaces.txt. The
+    first port should be the input endpoint. The second should be the
+    output, usually to an HDMI connector.
+
 TV Encoder
 TV Encoder
 ----------
 ----------
 
 
@@ -31,6 +69,7 @@ Required properties:
    * allwinner,sun6i-a31-tcon
    * allwinner,sun6i-a31-tcon
    * allwinner,sun6i-a31s-tcon
    * allwinner,sun6i-a31s-tcon
    * allwinner,sun8i-a33-tcon
    * allwinner,sun8i-a33-tcon
+   * allwinner,sun8i-v3s-tcon
  - reg: base address and size of memory-mapped region
  - reg: base address and size of memory-mapped region
  - interrupts: interrupt associated to this IP
  - interrupts: interrupt associated to this IP
  - clocks: phandles to the clocks feeding the TCON. Three are needed:
  - clocks: phandles to the clocks feeding the TCON. Three are needed:
@@ -47,12 +86,15 @@ Required properties:
   Documentation/devicetree/bindings/media/video-interfaces.txt. The
   Documentation/devicetree/bindings/media/video-interfaces.txt. The
   first port should be the input endpoint, the second one the output
   first port should be the input endpoint, the second one the output
 
 
-  The output should have two endpoints. The first is the block
-  connected to the TCON channel 0 (usually a panel or a bridge), the
-  second the block connected to the TCON channel 1 (usually the TV
-  encoder)
+  The output may have multiple endpoints. The TCON has two channels,
+  usually with the first channel being used for the panels interfaces
+  (RGB, LVDS, etc.), and the second being used for the outputs that
+  require another controller (TV Encoder, HDMI, etc.). The endpoints
+  will take an extra property, allwinner,tcon-channel, to specify the
+  channel the endpoint is associated to. If that property is not
+  present, the endpoint number will be used as the channel number.
 
 
-On SoCs other than the A33, there is one more clock required:
+On SoCs other than the A33 and V3s, there is one more clock required:
    - 'tcon-ch1': The clock driving the TCON channel 1
    - 'tcon-ch1': The clock driving the TCON channel 1
 
 
 DRC
 DRC
@@ -138,6 +180,26 @@ Required properties:
   Documentation/devicetree/bindings/media/video-interfaces.txt. The
   Documentation/devicetree/bindings/media/video-interfaces.txt. The
   first port should be the input endpoints, the second one the outputs
   first port should be the input endpoints, the second one the outputs
 
 
+Display Engine 2.0 Mixer
+------------------------
+
+The DE2 mixer have many functionalities, currently only layer blending is
+supported.
+
+Required properties:
+  - compatible: value must be one of:
+    * allwinner,sun8i-v3s-de2-mixer
+  - reg: base address and size of the memory-mapped region.
+  - clocks: phandles to the clocks feeding the mixer
+    * bus: the mixer interface clock
+    * mod: the mixer module clock
+  - clock-names: the clock names mentioned above
+  - resets: phandles to the reset controllers driving the mixer
+
+- ports: A ports node with endpoint definitions as defined in
+  Documentation/devicetree/bindings/media/video-interfaces.txt. The
+  first port should be the input endpoints, the second one the output
+
 
 
 Display Engine Pipeline
 Display Engine Pipeline
 -----------------------
 -----------------------
@@ -148,13 +210,15 @@ extra node.
 
 
 Required properties:
 Required properties:
   - compatible: value must be one of:
   - compatible: value must be one of:
+    * allwinner,sun5i-a10s-display-engine
     * allwinner,sun5i-a13-display-engine
     * allwinner,sun5i-a13-display-engine
     * allwinner,sun6i-a31-display-engine
     * allwinner,sun6i-a31-display-engine
     * allwinner,sun6i-a31s-display-engine
     * allwinner,sun6i-a31s-display-engine
     * allwinner,sun8i-a33-display-engine
     * allwinner,sun8i-a33-display-engine
+    * allwinner,sun8i-v3s-display-engine
 
 
   - allwinner,pipelines: list of phandle to the display engine
   - allwinner,pipelines: list of phandle to the display engine
-    frontends available.
+    frontends (DE 1.0) or mixers (DE 2.0) available.
 
 
 Example:
 Example:
 
 
@@ -173,6 +237,57 @@ panel: panel {
 	};
 	};
 };
 };
 
 
+connector {
+	compatible = "hdmi-connector";
+	type = "a";
+
+	port {
+		hdmi_con_in: endpoint {
+			remote-endpoint = <&hdmi_out_con>;
+		};
+	};
+};
+
+hdmi: hdmi@01c16000 {
+	compatible = "allwinner,sun5i-a10s-hdmi";
+	reg = <0x01c16000 0x1000>;
+	interrupts = <58>;
+	clocks = <&ccu CLK_AHB_HDMI>, <&ccu CLK_HDMI>,
+		 <&ccu CLK_PLL_VIDEO0_2X>,
+		 <&ccu CLK_PLL_VIDEO1_2X>;
+	clock-names = "ahb", "mod", "pll-0", "pll-1";
+	dmas = <&dma SUN4I_DMA_NORMAL 16>,
+	       <&dma SUN4I_DMA_NORMAL 16>,
+	       <&dma SUN4I_DMA_DEDICATED 24>;
+	dma-names = "ddc-tx", "ddc-rx", "audio-tx";
+	status = "disabled";
+
+	ports {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		port@0 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0>;
+
+			hdmi_in_tcon0: endpoint {
+				remote-endpoint = <&tcon0_out_hdmi>;
+			};
+		};
+
+		port@1 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <1>;
+
+			hdmi_out_con: endpoint {
+				remote-endpoint = <&hdmi_con_in>;
+			};
+		};
+	};
+};
+
 tve0: tv-encoder@01c0a000 {
 tve0: tv-encoder@01c0a000 {
 	compatible = "allwinner,sun4i-a10-tv-encoder";
 	compatible = "allwinner,sun4i-a10-tv-encoder";
 	reg = <0x01c0a000 0x1000>;
 	reg = <0x01c0a000 0x1000>;

+ 21 - 0
Documentation/devicetree/bindings/display/zte,vou.txt

@@ -58,6 +58,18 @@ Required properties:
    integer cells.  The first cell is the offset of SYSCTRL register used
    integer cells.  The first cell is the offset of SYSCTRL register used
    to control TV Encoder DAC power, and the second cell is the bit mask.
    to control TV Encoder DAC power, and the second cell is the bit mask.
 
 
+* VGA output device
+
+Required properties:
+ - compatible: should be "zte,zx296718-vga"
+ - reg: Physical base address and length of the VGA device IO region
+ - interrupts : VGA interrupt number to CPU
+ - clocks: Phandle with clock-specifier pointing to VGA I2C clock.
+ - clock-names: Must be "i2c_wclk".
+ - zte,vga-power-control: the phandle to SYSCTRL block followed by two
+   integer cells.  The first cell is the offset of SYSCTRL register used
+   to control VGA DAC power, and the second cell is the bit mask.
+
 Example:
 Example:
 
 
 vou: vou@1440000 {
 vou: vou@1440000 {
@@ -81,6 +93,15 @@ vou: vou@1440000 {
 			      "main_wclk", "aux_wclk";
 			      "main_wclk", "aux_wclk";
 	};
 	};
 
 
+	vga: vga@8000 {
+		compatible = "zte,zx296718-vga";
+		reg = <0x8000 0x1000>;
+		interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&topcrm VGA_I2C_WCLK>;
+		clock-names = "i2c_wclk";
+		zte,vga-power-control = <&sysctrl 0x170 0xe0>;
+	};
+
 	hdmi: hdmi@c000 {
 	hdmi: hdmi@c000 {
 		compatible = "zte,zx296718-hdmi";
 		compatible = "zte,zx296718-hdmi";
 		reg = <0xc000 0x4000>;
 		reg = <0xc000 0x4000>;

+ 1 - 0
Documentation/devicetree/bindings/vendor-prefixes.txt

@@ -219,6 +219,7 @@ nexbox	Nexbox
 newhaven	Newhaven Display International
 newhaven	Newhaven Display International
 ni	National Instruments
 ni	National Instruments
 nintendo	Nintendo
 nintendo	Nintendo
+nlt	NLT Technologies, Ltd.
 nokia	Nokia
 nokia	Nokia
 nordic	Nordic Semiconductor
 nordic	Nordic Semiconductor
 nuvoton	Nuvoton Technology Corporation
 nuvoton	Nuvoton Technology Corporation

+ 12 - 54
Documentation/gpu/drm-internals.rst

@@ -98,6 +98,9 @@ DRIVER_ATOMIC
     implement appropriate obj->atomic_get_property() vfuncs for any
     implement appropriate obj->atomic_get_property() vfuncs for any
     modeset objects with driver specific properties.
     modeset objects with driver specific properties.
 
 
+DRIVER_SYNCOBJ
+    Driver support drm sync objects.
+
 Major, Minor and Patchlevel
 Major, Minor and Patchlevel
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 
@@ -149,60 +152,15 @@ Device Instance and Driver Handling
 Driver Load
 Driver Load
 -----------
 -----------
 
 
-IRQ Registration
-~~~~~~~~~~~~~~~~
-
-The DRM core tries to facilitate IRQ handler registration and
-unregistration by providing :c:func:`drm_irq_install()` and
-:c:func:`drm_irq_uninstall()` functions. Those functions only
-support a single interrupt per device, devices that use more than one
-IRQs need to be handled manually.
-
-Managed IRQ Registration
-''''''''''''''''''''''''
-
-:c:func:`drm_irq_install()` starts by calling the irq_preinstall
-driver operation. The operation is optional and must make sure that the
-interrupt will not get fired by clearing all pending interrupt flags or
-disabling the interrupt.
-
-The passed-in IRQ will then be requested by a call to
-:c:func:`request_irq()`. If the DRIVER_IRQ_SHARED driver feature
-flag is set, a shared (IRQF_SHARED) IRQ handler will be requested.
-
-The IRQ handler function must be provided as the mandatory irq_handler
-driver operation. It will get passed directly to
-:c:func:`request_irq()` and thus has the same prototype as all IRQ
-handlers. It will get called with a pointer to the DRM device as the
-second argument.
-
-Finally the function calls the optional irq_postinstall driver
-operation. The operation usually enables interrupts (excluding the
-vblank interrupt, which is enabled separately), but drivers may choose
-to enable/disable interrupts at a different time.
-
-:c:func:`drm_irq_uninstall()` is similarly used to uninstall an
-IRQ handler. It starts by waking up all processes waiting on a vblank
-interrupt to make sure they don't hang, and then calls the optional
-irq_uninstall driver operation. The operation must disable all hardware
-interrupts. Finally the function frees the IRQ by calling
-:c:func:`free_irq()`.
-
-Manual IRQ Registration
-'''''''''''''''''''''''
-
-Drivers that require multiple interrupt handlers can't use the managed
-IRQ registration functions. In that case IRQs must be registered and
-unregistered manually (usually with the :c:func:`request_irq()` and
-:c:func:`free_irq()` functions, or their :c:func:`devm_request_irq()` and
-:c:func:`devm_free_irq()` equivalents).
-
-When manually registering IRQs, drivers must not set the
-DRIVER_HAVE_IRQ driver feature flag, and must not provide the
-irq_handler driver operation. They must set the :c:type:`struct
-drm_device <drm_device>` irq_enabled field to 1 upon
-registration of the IRQs, and clear it to 0 after unregistering the
-IRQs.
+
+IRQ Helper Library
+~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/gpu/drm/drm_irq.c
+   :doc: irq helpers
+
+.. kernel-doc:: drivers/gpu/drm/drm_irq.c
+   :export:
 
 
 Memory Manager Initialization
 Memory Manager Initialization
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

+ 6 - 0
Documentation/gpu/drm-kms-helpers.rst

@@ -143,6 +143,12 @@ Bridge Helper Reference
 .. kernel-doc:: drivers/gpu/drm/drm_bridge.c
 .. kernel-doc:: drivers/gpu/drm/drm_bridge.c
    :export:
    :export:
 
 
+Panel-Bridge Helper Reference
+-----------------------------
+
+.. kernel-doc:: drivers/gpu/drm/bridge/panel.c
+   :export:
+
 .. _drm_panel_helper:
 .. _drm_panel_helper:
 
 
 Panel Helper Reference
 Panel Helper Reference

+ 2 - 2
Documentation/gpu/drm-kms.rst

@@ -612,8 +612,8 @@ operation handler.
 Vertical Blanking and Interrupt Handling Functions Reference
 Vertical Blanking and Interrupt Handling Functions Reference
 ------------------------------------------------------------
 ------------------------------------------------------------
 
 
-.. kernel-doc:: include/drm/drm_irq.h
+.. kernel-doc:: include/drm/drm_vblank.h
    :internal:
    :internal:
 
 
-.. kernel-doc:: drivers/gpu/drm/drm_irq.c
+.. kernel-doc:: drivers/gpu/drm/drm_vblank.c
    :export:
    :export:

+ 12 - 0
Documentation/gpu/drm-mm.rst

@@ -484,3 +484,15 @@ DRM Cache Handling
 
 
 .. kernel-doc:: drivers/gpu/drm/drm_cache.c
 .. kernel-doc:: drivers/gpu/drm/drm_cache.c
    :export:
    :export:
+
+DRM Sync Objects
+===========================
+
+.. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
+   :doc: Overview
+
+.. kernel-doc:: include/drm/drm_syncobj.h
+   :export:
+
+.. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
+   :export:

+ 2 - 0
Documentation/gpu/index.rst

@@ -12,6 +12,8 @@ Linux GPU Driver Developer's Guide
    drm-uapi
    drm-uapi
    i915
    i915
    meson
    meson
+   pl111
+   tegra
    tinydrm
    tinydrm
    vc4
    vc4
    vga-switcheroo
    vga-switcheroo

+ 6 - 0
Documentation/gpu/pl111.rst

@@ -0,0 +1,6 @@
+==========================================
+ drm/pl111 ARM PrimeCell PL111 CLCD Driver
+==========================================
+
+.. kernel-doc:: drivers/gpu/drm/pl111/pl111_drv.c
+   :doc: ARM PrimeCell PL111 CLCD Driver

+ 178 - 0
Documentation/gpu/tegra.rst

@@ -0,0 +1,178 @@
+===============================================
+ drm/tegra NVIDIA Tegra GPU and display driver
+===============================================
+
+NVIDIA Tegra SoCs support a set of display, graphics and video functions via
+the host1x controller. host1x supplies command streams, gathered from a push
+buffer provided directly by the CPU, to its clients via channels. Software,
+or blocks amongst themselves, can use syncpoints for synchronization.
+
+Up until, but not including, Tegra124 (aka Tegra K1) the drm/tegra driver
+supports the built-in GPU, comprised of the gr2d and gr3d engines. Starting
+with Tegra124 the GPU is based on the NVIDIA desktop GPU architecture and
+supported by the drm/nouveau driver.
+
+The drm/tegra driver supports NVIDIA Tegra SoC generations since Tegra20. It
+has three parts:
+
+  - A host1x driver that provides infrastructure and access to the host1x
+    services.
+
+  - A KMS driver that supports the display controllers as well as a number of
+    outputs, such as RGB, HDMI, DSI, and DisplayPort.
+
+  - A set of custom userspace IOCTLs that can be used to submit jobs to the
+    GPU and video engines via host1x.
+
+Driver Infrastructure
+=====================
+
+The various host1x clients need to be bound together into a logical device in
+order to expose their functionality to users. The infrastructure that supports
+this is implemented in the host1x driver. When a driver is registered with the
+infrastructure it provides a list of compatible strings specifying the devices
+that it needs. The infrastructure creates a logical device and scan the device
+tree for matching device nodes, adding the required clients to a list. Drivers
+for individual clients register with the infrastructure as well and are added
+to the logical host1x device.
+
+Once all clients are available, the infrastructure will initialize the logical
+device using a driver-provided function which will set up the bits specific to
+the subsystem and in turn initialize each of its clients.
+
+Similarly, when one of the clients is unregistered, the infrastructure will
+destroy the logical device by calling back into the driver, which ensures that
+the subsystem specific bits are torn down and the clients destroyed in turn.
+
+Host1x Infrastructure Reference
+-------------------------------
+
+.. kernel-doc:: include/linux/host1x.h
+
+.. kernel-doc:: drivers/gpu/host1x/bus.c
+   :export:
+
+Host1x Syncpoint Reference
+--------------------------
+
+.. kernel-doc:: drivers/gpu/host1x/syncpt.c
+   :export:
+
+KMS driver
+==========
+
+The display hardware has remained mostly backwards compatible over the various
+Tegra SoC generations, up until Tegra186 which introduces several changes that
+make it difficult to support with a parameterized driver.
+
+Display Controllers
+-------------------
+
+Tegra SoCs have two display controllers, each of which can be associated with
+zero or more outputs. Outputs can also share a single display controller, but
+only if they run with compatible display timings. Two display controllers can
+also share a single framebuffer, allowing cloned configurations even if modes
+on two outputs don't match. A display controller is modelled as a CRTC in KMS
+terms.
+
+On Tegra186, the number of display controllers has been increased to three. A
+display controller can no longer drive all of the outputs. While two of these
+controllers can drive both DSI outputs and both SOR outputs, the third cannot
+drive any DSI.
+
+Windows
+~~~~~~~
+
+A display controller controls a set of windows that can be used to composite
+multiple buffers onto the screen. While it is possible to assign arbitrary Z
+ordering to individual windows (by programming the corresponding blending
+registers), this is currently not supported by the driver. Instead, it will
+assume a fixed Z ordering of the windows (window A is the root window, that
+is, the lowest, while windows B and C are overlaid on top of window A). The
+overlay windows support multiple pixel formats and can automatically convert
+from YUV to RGB at scanout time. This makes them useful for displaying video
+content. In KMS, each window is modelled as a plane. Each display controller
+has a hardware cursor that is exposed as a cursor plane.
+
+Outputs
+-------
+
+The type and number of supported outputs varies between Tegra SoC generations.
+All generations support at least HDMI. While earlier generations supported the
+very simple RGB interfaces (one per display controller), recent generations no
+longer do and instead provide standard interfaces such as DSI and eDP/DP.
+
+Outputs are modelled as a composite encoder/connector pair.
+
+RGB/LVDS
+~~~~~~~~
+
+This interface is no longer available since Tegra124. It has been replaced by
+the more standard DSI and eDP interfaces.
+
+HDMI
+~~~~
+
+HDMI is supported on all Tegra SoCs. Starting with Tegra210, HDMI is provided
+by the versatile SOR output, which supports eDP, DP and HDMI. The SOR is able
+to support HDMI 2.0, though support for this is currently not merged.
+
+DSI
+~~~
+
+Although Tegra has supported DSI since Tegra30, the controller has changed in
+several ways in Tegra114. Since none of the publicly available development
+boards prior to Dalmore (Tegra114) have made use of DSI, only Tegra114 and
+later are supported by the drm/tegra driver.
+
+eDP/DP
+~~~~~~
+
+eDP was first introduced in Tegra124 where it was used to drive the display
+panel for notebook form factors. Tegra210 added support for full DisplayPort
+support, though this is currently not implemented in the drm/tegra driver.
+
+Userspace Interface
+===================
+
+The userspace interface provided by drm/tegra allows applications to create
+GEM buffers, access and control syncpoints as well as submit command streams
+to host1x.
+
+GEM Buffers
+-----------
+
+The ``DRM_IOCTL_TEGRA_GEM_CREATE`` IOCTL is used to create a GEM buffer object
+with Tegra-specific flags. This is useful for buffers that should be tiled, or
+that are to be scanned out upside down (useful for 3D content).
+
+After a GEM buffer object has been created, its memory can be mapped by an
+application using the mmap offset returned by the ``DRM_IOCTL_TEGRA_GEM_MMAP``
+IOCTL.
+
+Syncpoints
+----------
+
+The current value of a syncpoint can be obtained by executing the
+``DRM_IOCTL_TEGRA_SYNCPT_READ`` IOCTL. Incrementing the syncpoint is achieved
+using the ``DRM_IOCTL_TEGRA_SYNCPT_INCR`` IOCTL.
+
+Userspace can also request blocking on a syncpoint. To do so, it needs to
+execute the ``DRM_IOCTL_TEGRA_SYNCPT_WAIT`` IOCTL, specifying the value of
+the syncpoint to wait for. The kernel will release the application when the
+syncpoint reaches that value or after a specified timeout.
+
+Command Stream Submission
+-------------------------
+
+Before an application can submit command streams to host1x it needs to open a
+channel to an engine using the ``DRM_IOCTL_TEGRA_OPEN_CHANNEL`` IOCTL. Client
+IDs are used to identify the target of the channel. When a channel is no
+longer needed, it can be closed using the ``DRM_IOCTL_TEGRA_CLOSE_CHANNEL``
+IOCTL. To retrieve the syncpoint associated with a channel, an application
+can use the ``DRM_IOCTL_TEGRA_GET_SYNCPT``.
+
+After opening a channel, submitting command streams is easy. The application
+writes commands into the memory backing a GEM buffer object and passes these
+to the ``DRM_IOCTL_TEGRA_SUBMIT`` IOCTL along with various other parameters,
+such as the syncpoints or relocations used in the job submission.

+ 0 - 13
Documentation/gpu/todo.rst

@@ -177,19 +177,6 @@ following drivers still use ``struct_mutex``: ``msm``, ``omapdrm`` and
 
 
 Contact: Daniel Vetter, respective driver maintainers
 Contact: Daniel Vetter, respective driver maintainers
 
 
-Switch to drm_connector_list_iter for any connector_list walking
-----------------------------------------------------------------
-
-Connectors can be hotplugged, and we now have a special list of helpers to walk
-the connector_list in a race-free fashion, without incurring deadlocks on
-mutexes and other fun stuff.
-
-Unfortunately most drivers are not converted yet. At least all those supporting
-DP MST hotplug should be converted, since for those drivers the difference
-matters. See drm_for_each_connector_iter() vs. drm_for_each_connector().
-
-Contact: Daniel Vetter
-
 Core refactorings
 Core refactorings
 =================
 =================
 
 

+ 13 - 10
Documentation/sync_file.txt

@@ -1,8 +1,8 @@
-			      Sync File API Guide
-			      ~~~~~~~~~~~~~~~~~~~
+===================
+Sync File API Guide
+===================
 
 
-				Gustavo Padovan
-			  <gustavo at padovan dot org>
+:Author: Gustavo Padovan <gustavo at padovan dot org>
 
 
 This document serves as a guide for device drivers writers on what the
 This document serves as a guide for device drivers writers on what the
 sync_file API is, and how drivers can support it. Sync file is the carrier of
 sync_file API is, and how drivers can support it. Sync file is the carrier of
@@ -46,16 +46,17 @@ Creating Sync Files
 
 
 When a driver needs to send an out-fence userspace it creates a sync_file.
 When a driver needs to send an out-fence userspace it creates a sync_file.
 
 
-Interface:
+Interface::
+
 	struct sync_file *sync_file_create(struct dma_fence *fence);
 	struct sync_file *sync_file_create(struct dma_fence *fence);
 
 
 The caller pass the out-fence and gets back the sync_file. That is just the
 The caller pass the out-fence and gets back the sync_file. That is just the
 first step, next it needs to install an fd on sync_file->file. So it gets an
 first step, next it needs to install an fd on sync_file->file. So it gets an
-fd:
+fd::
 
 
 	fd = get_unused_fd_flags(O_CLOEXEC);
 	fd = get_unused_fd_flags(O_CLOEXEC);
 
 
-and installs it on sync_file->file:
+and installs it on sync_file->file::
 
 
 	fd_install(fd, sync_file->file);
 	fd_install(fd, sync_file->file);
 
 
@@ -71,7 +72,8 @@ When userspace needs to send an in-fence to the driver it passes file descriptor
 of the Sync File to the kernel. The kernel can then retrieve the fences
 of the Sync File to the kernel. The kernel can then retrieve the fences
 from it.
 from it.
 
 
-Interface:
+Interface::
+
 	struct dma_fence *sync_file_get_fence(int fd);
 	struct dma_fence *sync_file_get_fence(int fd);
 
 
 
 
@@ -79,5 +81,6 @@ The returned reference is owned by the caller and must be disposed of
 afterwards using dma_fence_put(). In case of error, a NULL is returned instead.
 afterwards using dma_fence_put(). In case of error, a NULL is returned instead.
 
 
 References:
 References:
-[1] struct sync_file in include/linux/sync_file.h
-[2] All interfaces mentioned above defined in include/linux/sync_file.h
+
+1. struct sync_file in include/linux/sync_file.h
+2. All interfaces mentioned above defined in include/linux/sync_file.h

+ 19 - 0
MAINTAINERS

@@ -4235,6 +4235,12 @@ F:	include/drm/drm*
 F:	include/uapi/drm/drm*
 F:	include/uapi/drm/drm*
 F:	include/linux/vga*
 F:	include/linux/vga*
 
 
+DRM DRIVER FOR ARM PL111 CLCD
+M:	Eric Anholt <eric@anholt.net>
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+S:	Supported
+F:	drivers/gpu/drm/pl111/
+
 DRM DRIVER FOR AST SERVER GRAPHICS CHIPS
 DRM DRIVER FOR AST SERVER GRAPHICS CHIPS
 M:	Dave Airlie <airlied@redhat.com>
 M:	Dave Airlie <airlied@redhat.com>
 S:	Odd Fixes
 S:	Odd Fixes
@@ -4242,6 +4248,8 @@ F:	drivers/gpu/drm/ast/
 
 
 DRM DRIVERS FOR BRIDGE CHIPS
 DRM DRIVERS FOR BRIDGE CHIPS
 M:	Archit Taneja <architt@codeaurora.org>
 M:	Archit Taneja <architt@codeaurora.org>
+M:	Andrzej Hajda <a.hajda@samsung.com>
+R:	Laurent Pinchart <Laurent.pinchart@ideasonboard.com>
 S:	Maintained
 S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/bridge/
 F:	drivers/gpu/drm/bridge/
@@ -4498,6 +4506,17 @@ S:	Maintained
 F:	drivers/gpu/drm/sti
 F:	drivers/gpu/drm/sti
 F:	Documentation/devicetree/bindings/display/st,stih4xx.txt
 F:	Documentation/devicetree/bindings/display/st,stih4xx.txt
 
 
+DRM DRIVERS FOR STM
+M:	Yannick Fertre <yannick.fertre@st.com>
+M:	Philippe Cornu <philippe.cornu@st.com>
+M:	Benjamin Gaignard <benjamin.gaignard@linaro.org>
+M:	Vincent Abriou <vincent.abriou@st.com>
+L:	dri-devel@lists.freedesktop.org
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+S:	Maintained
+F:	drivers/gpu/drm/stm
+F:	Documentation/devicetree/bindings/display/st,stm32-ltdc.txt
+
 DRM DRIVER FOR TDFX VIDEO CARDS
 DRM DRIVER FOR TDFX VIDEO CARDS
 S:	Orphan / Obsolete
 S:	Orphan / Obsolete
 F:	drivers/gpu/drm/tdfx/
 F:	drivers/gpu/drm/tdfx/

+ 3 - 5
drivers/dma-buf/dma-buf.c

@@ -558,8 +558,8 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 	if (WARN_ON(!dmabuf || !dev))
 	if (WARN_ON(!dmabuf || !dev))
 		return ERR_PTR(-EINVAL);
 		return ERR_PTR(-EINVAL);
 
 
-	attach = kzalloc(sizeof(struct dma_buf_attachment), GFP_KERNEL);
-	if (attach == NULL)
+	attach = kzalloc(sizeof(*attach), GFP_KERNEL);
+	if (!attach)
 		return ERR_PTR(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
 
 
 	attach->dev = dev;
 	attach->dev = dev;
@@ -1122,9 +1122,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
 		attach_count = 0;
 		attach_count = 0;
 
 
 		list_for_each_entry(attach_obj, &buf_obj->attachments, node) {
 		list_for_each_entry(attach_obj, &buf_obj->attachments, node) {
-			seq_puts(s, "\t");
-
-			seq_printf(s, "%s\n", dev_name(attach_obj->dev));
+			seq_printf(s, "\t%s\n", dev_name(attach_obj->dev));
 			attach_count++;
 			attach_count++;
 		}
 		}
 
 

+ 5 - 0
drivers/dma-buf/dma-fence.c

@@ -402,6 +402,11 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
 		}
 		}
 	}
 	}
 
 
+	if (!timeout) {
+		ret = 0;
+		goto out;
+	}
+
 	cb.base.func = dma_fence_default_wait_cb;
 	cb.base.func = dma_fence_default_wait_cb;
 	cb.task = current;
 	cb.task = current;
 	list_add(&cb.base.node, &fence->cb_list);
 	list_add(&cb.base.node, &fence->cb_list);

+ 6 - 4
drivers/dma-buf/sync_debug.c

@@ -110,7 +110,7 @@ static void sync_print_fence(struct seq_file *s,
 		}
 		}
 	}
 	}
 
 
-	seq_puts(s, "\n");
+	seq_putc(s, '\n');
 }
 }
 
 
 static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj)
 static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj)
@@ -132,9 +132,11 @@ static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj)
 static void sync_print_sync_file(struct seq_file *s,
 static void sync_print_sync_file(struct seq_file *s,
 				  struct sync_file *sync_file)
 				  struct sync_file *sync_file)
 {
 {
+	char buf[128];
 	int i;
 	int i;
 
 
-	seq_printf(s, "[%p] %s: %s\n", sync_file, sync_file->name,
+	seq_printf(s, "[%p] %s: %s\n", sync_file,
+		   sync_file_get_name(sync_file, buf, sizeof(buf)),
 		   sync_status_str(dma_fence_get_status(sync_file->fence)));
 		   sync_status_str(dma_fence_get_status(sync_file->fence)));
 
 
 	if (dma_fence_is_array(sync_file->fence)) {
 	if (dma_fence_is_array(sync_file->fence)) {
@@ -161,7 +163,7 @@ static int sync_debugfs_show(struct seq_file *s, void *unused)
 				     sync_timeline_list);
 				     sync_timeline_list);
 
 
 		sync_print_obj(s, obj);
 		sync_print_obj(s, obj);
-		seq_puts(s, "\n");
+		seq_putc(s, '\n');
 	}
 	}
 	spin_unlock_irqrestore(&sync_timeline_list_lock, flags);
 	spin_unlock_irqrestore(&sync_timeline_list_lock, flags);
 
 
@@ -173,7 +175,7 @@ static int sync_debugfs_show(struct seq_file *s, void *unused)
 			container_of(pos, struct sync_file, sync_file_list);
 			container_of(pos, struct sync_file, sync_file_list);
 
 
 		sync_print_sync_file(s, sync_file);
 		sync_print_sync_file(s, sync_file);
-		seq_puts(s, "\n");
+		seq_putc(s, '\n');
 	}
 	}
 	spin_unlock_irqrestore(&sync_file_list_lock, flags);
 	spin_unlock_irqrestore(&sync_file_list_lock, flags);
 	return 0;
 	return 0;

+ 34 - 18
drivers/dma-buf/sync_file.c

@@ -41,8 +41,6 @@ static struct sync_file *sync_file_alloc(void)
 	if (IS_ERR(sync_file->file))
 	if (IS_ERR(sync_file->file))
 		goto err;
 		goto err;
 
 
-	kref_init(&sync_file->kref);
-
 	init_waitqueue_head(&sync_file->wq);
 	init_waitqueue_head(&sync_file->wq);
 
 
 	INIT_LIST_HEAD(&sync_file->cb.node);
 	INIT_LIST_HEAD(&sync_file->cb.node);
@@ -82,11 +80,6 @@ struct sync_file *sync_file_create(struct dma_fence *fence)
 
 
 	sync_file->fence = dma_fence_get(fence);
 	sync_file->fence = dma_fence_get(fence);
 
 
-	snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d",
-		 fence->ops->get_driver_name(fence),
-		 fence->ops->get_timeline_name(fence), fence->context,
-		 fence->seqno);
-
 	return sync_file;
 	return sync_file;
 }
 }
 EXPORT_SYMBOL(sync_file_create);
 EXPORT_SYMBOL(sync_file_create);
@@ -131,6 +124,36 @@ struct dma_fence *sync_file_get_fence(int fd)
 }
 }
 EXPORT_SYMBOL(sync_file_get_fence);
 EXPORT_SYMBOL(sync_file_get_fence);
 
 
+/**
+ * sync_file_get_name - get the name of the sync_file
+ * @sync_file:		sync_file to get the fence from
+ * @buf:		destination buffer to copy sync_file name into
+ * @len:		available size of destination buffer.
+ *
+ * Each sync_file may have a name assigned either by the user (when merging
+ * sync_files together) or created from the fence it contains. In the latter
+ * case construction of the name is deferred until use, and so requires
+ * sync_file_get_name().
+ *
+ * Returns: a string representing the name.
+ */
+char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len)
+{
+	if (sync_file->user_name[0]) {
+		strlcpy(buf, sync_file->user_name, len);
+	} else {
+		struct dma_fence *fence = sync_file->fence;
+
+		snprintf(buf, len, "%s-%s%llu-%d",
+			 fence->ops->get_driver_name(fence),
+			 fence->ops->get_timeline_name(fence),
+			 fence->context,
+			 fence->seqno);
+	}
+
+	return buf;
+}
+
 static int sync_file_set_fence(struct sync_file *sync_file,
 static int sync_file_set_fence(struct sync_file *sync_file,
 			       struct dma_fence **fences, int num_fences)
 			       struct dma_fence **fences, int num_fences)
 {
 {
@@ -268,7 +291,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
 		goto err;
 		goto err;
 	}
 	}
 
 
-	strlcpy(sync_file->name, name, sizeof(sync_file->name));
+	strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name));
 	return sync_file;
 	return sync_file;
 
 
 err:
 err:
@@ -277,22 +300,15 @@ err:
 
 
 }
 }
 
 
-static void sync_file_free(struct kref *kref)
+static int sync_file_release(struct inode *inode, struct file *file)
 {
 {
-	struct sync_file *sync_file = container_of(kref, struct sync_file,
-						     kref);
+	struct sync_file *sync_file = file->private_data;
 
 
 	if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
 	if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
 		dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
 		dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
 	dma_fence_put(sync_file->fence);
 	dma_fence_put(sync_file->fence);
 	kfree(sync_file);
 	kfree(sync_file);
-}
-
-static int sync_file_release(struct inode *inode, struct file *file)
-{
-	struct sync_file *sync_file = file->private_data;
 
 
-	kref_put(&sync_file->kref, sync_file_free);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -422,7 +438,7 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
 	}
 	}
 
 
 no_fences:
 no_fences:
-	strlcpy(info.name, sync_file->name, sizeof(info.name));
+	sync_file_get_name(sync_file, info.name, sizeof(info.name));
 	info.status = dma_fence_is_signaled(sync_file->fence);
 	info.status = dma_fence_is_signaled(sync_file->fence);
 	info.num_fences = num_fences;
 	info.num_fences = num_fences;
 
 

+ 4 - 0
drivers/gpu/drm/Kconfig

@@ -246,6 +246,8 @@ source "drivers/gpu/drm/fsl-dcu/Kconfig"
 
 
 source "drivers/gpu/drm/tegra/Kconfig"
 source "drivers/gpu/drm/tegra/Kconfig"
 
 
+source "drivers/gpu/drm/stm/Kconfig"
+
 source "drivers/gpu/drm/panel/Kconfig"
 source "drivers/gpu/drm/panel/Kconfig"
 
 
 source "drivers/gpu/drm/bridge/Kconfig"
 source "drivers/gpu/drm/bridge/Kconfig"
@@ -274,6 +276,8 @@ source "drivers/gpu/drm/meson/Kconfig"
 
 
 source "drivers/gpu/drm/tinydrm/Kconfig"
 source "drivers/gpu/drm/tinydrm/Kconfig"
 
 
+source "drivers/gpu/drm/pl111/Kconfig"
+
 # Keep legacy drivers last
 # Keep legacy drivers last
 
 
 menuconfig DRM_LEGACY
 menuconfig DRM_LEGACY

+ 5 - 1
drivers/gpu/drm/Makefile

@@ -16,7 +16,8 @@ drm-y       :=	drm_auth.o drm_bufs.o drm_cache.o \
 		drm_framebuffer.o drm_connector.o drm_blend.o \
 		drm_framebuffer.o drm_connector.o drm_blend.o \
 		drm_encoder.o drm_mode_object.o drm_property.o \
 		drm_encoder.o drm_mode_object.o drm_property.o \
 		drm_plane.o drm_color_mgmt.o drm_print.o \
 		drm_plane.o drm_color_mgmt.o drm_print.o \
-		drm_dumb_buffers.o drm_mode_config.o
+		drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
+		drm_syncobj.o
 
 
 drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
 drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
 drm-$(CONFIG_DRM_VM) += drm_vm.o
 drm-$(CONFIG_DRM_VM) += drm_vm.o
@@ -34,6 +35,7 @@ drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_probe_helper.o \
 		drm_simple_kms_helper.o drm_modeset_helper.o \
 		drm_simple_kms_helper.o drm_modeset_helper.o \
 		drm_scdc_helper.o
 		drm_scdc_helper.o
 
 
+drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o
 drm_kms_helper-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
 drm_kms_helper-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
 drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o
 drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o
 drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o
 drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o
@@ -82,6 +84,7 @@ obj-$(CONFIG_DRM_BOCHS) += bochs/
 obj-$(CONFIG_DRM_VIRTIO_GPU) += virtio/
 obj-$(CONFIG_DRM_VIRTIO_GPU) += virtio/
 obj-$(CONFIG_DRM_MSM) += msm/
 obj-$(CONFIG_DRM_MSM) += msm/
 obj-$(CONFIG_DRM_TEGRA) += tegra/
 obj-$(CONFIG_DRM_TEGRA) += tegra/
+obj-$(CONFIG_DRM_STM) += stm/
 obj-$(CONFIG_DRM_STI) += sti/
 obj-$(CONFIG_DRM_STI) += sti/
 obj-$(CONFIG_DRM_IMX) += imx/
 obj-$(CONFIG_DRM_IMX) += imx/
 obj-$(CONFIG_DRM_MEDIATEK) += mediatek/
 obj-$(CONFIG_DRM_MEDIATEK) += mediatek/
@@ -96,3 +99,4 @@ obj-y			+= hisilicon/
 obj-$(CONFIG_DRM_ZTE)	+= zte/
 obj-$(CONFIG_DRM_ZTE)	+= zte/
 obj-$(CONFIG_DRM_MXSFB)	+= mxsfb/
 obj-$(CONFIG_DRM_MXSFB)	+= mxsfb/
 obj-$(CONFIG_DRM_TINYDRM) += tinydrm/
 obj-$(CONFIG_DRM_TINYDRM) += tinydrm/
+obj-$(CONFIG_DRM_PL111) += pl111/

+ 12 - 4
drivers/gpu/drm/amd/amdgpu/Kconfig

@@ -5,15 +5,23 @@ config DRM_AMDGPU_SI
 	  Choose this option if you want to enable experimental support
 	  Choose this option if you want to enable experimental support
 	  for SI asics.
 	  for SI asics.
 
 
+	  SI is already supported in radeon. Experimental support for SI
+	  in amdgpu will be disabled by default and is still provided by
+	  radeon. Use module options to override this:
+
+	  radeon.si_support=0 amdgpu.si_support=1
+
 config DRM_AMDGPU_CIK
 config DRM_AMDGPU_CIK
 	bool "Enable amdgpu support for CIK parts"
 	bool "Enable amdgpu support for CIK parts"
 	depends on DRM_AMDGPU
 	depends on DRM_AMDGPU
 	help
 	help
-	  Choose this option if you want to enable experimental support
-	  for CIK asics.
+	  Choose this option if you want to enable support for CIK asics.
+
+	  CIK is already supported in radeon. Support for CIK in amdgpu
+	  will be disabled by default and is still provided by radeon.
+	  Use module options to override this:
 
 
-	  CIK is already supported in radeon.  CIK support in amdgpu
-	  is for experimentation and testing.
+	  radeon.cik_support=0 amdgpu.cik_support=1
 
 
 config DRM_AMDGPU_USERPTR
 config DRM_AMDGPU_USERPTR
 	bool "Always enable userptr write support"
 	bool "Always enable userptr write support"

+ 11 - 4
drivers/gpu/drm/amd/amdgpu/Makefile

@@ -4,7 +4,7 @@
 
 
 FULL_AMD_PATH=$(src)/..
 FULL_AMD_PATH=$(src)/..
 
 
-ccflags-y := -Iinclude/drm -I$(FULL_AMD_PATH)/include/asic_reg \
+ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
 	-I$(FULL_AMD_PATH)/include \
 	-I$(FULL_AMD_PATH)/include \
 	-I$(FULL_AMD_PATH)/amdgpu \
 	-I$(FULL_AMD_PATH)/amdgpu \
 	-I$(FULL_AMD_PATH)/scheduler \
 	-I$(FULL_AMD_PATH)/scheduler \
@@ -24,7 +24,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
 	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
 	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
-	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o
+	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
+	amdgpu_queue_mgr.o
 
 
 # add asic specific block
 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
@@ -34,7 +35,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
 amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
 amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
 
 
 amdgpu-y += \
 amdgpu-y += \
-	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o
+	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o
 
 
 # add GMC block
 # add GMC block
 amdgpu-y += \
 amdgpu-y += \
@@ -54,7 +55,8 @@ amdgpu-y += \
 # add PSP block
 # add PSP block
 amdgpu-y += \
 amdgpu-y += \
 	amdgpu_psp.o \
 	amdgpu_psp.o \
-	psp_v3_1.o
+	psp_v3_1.o \
+	psp_v10_0.o
 
 
 # add SMC block
 # add SMC block
 amdgpu-y += \
 amdgpu-y += \
@@ -92,6 +94,11 @@ amdgpu-y += \
 	vce_v3_0.o \
 	vce_v3_0.o \
 	vce_v4_0.o
 	vce_v4_0.o
 
 
+# add VCN block
+amdgpu-y += \
+	amdgpu_vcn.o \
+	vcn_v1_0.o
+
 # add amdkfd interfaces
 # add amdkfd interfaces
 amdgpu-y += \
 amdgpu-y += \
 	 amdgpu_amdkfd.o \
 	 amdgpu_amdkfd.o \

+ 118 - 47
drivers/gpu/drm/amd/amdgpu/amdgpu.h

@@ -36,16 +36,18 @@
 #include <linux/hashtable.h>
 #include <linux/hashtable.h>
 #include <linux/dma-fence.h>
 #include <linux/dma-fence.h>
 
 
-#include <ttm/ttm_bo_api.h>
-#include <ttm/ttm_bo_driver.h>
-#include <ttm/ttm_placement.h>
-#include <ttm/ttm_module.h>
-#include <ttm/ttm_execbuf_util.h>
+#include <drm/ttm/ttm_bo_api.h>
+#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_module.h>
+#include <drm/ttm/ttm_execbuf_util.h>
 
 
 #include <drm/drmP.h>
 #include <drm/drmP.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_gem.h>
 #include <drm/amdgpu_drm.h>
 #include <drm/amdgpu_drm.h>
 
 
+#include <kgd_kfd_interface.h>
+
 #include "amd_shared.h"
 #include "amd_shared.h"
 #include "amdgpu_mode.h"
 #include "amdgpu_mode.h"
 #include "amdgpu_ih.h"
 #include "amdgpu_ih.h"
@@ -62,6 +64,7 @@
 #include "amdgpu_acp.h"
 #include "amdgpu_acp.h"
 #include "amdgpu_uvd.h"
 #include "amdgpu_uvd.h"
 #include "amdgpu_vce.h"
 #include "amdgpu_vce.h"
+#include "amdgpu_vcn.h"
 
 
 #include "gpu_scheduler.h"
 #include "gpu_scheduler.h"
 #include "amdgpu_virt.h"
 #include "amdgpu_virt.h"
@@ -92,6 +95,7 @@ extern int amdgpu_vm_size;
 extern int amdgpu_vm_block_size;
 extern int amdgpu_vm_block_size;
 extern int amdgpu_vm_fault_stop;
 extern int amdgpu_vm_fault_stop;
 extern int amdgpu_vm_debug;
 extern int amdgpu_vm_debug;
+extern int amdgpu_vm_update_mode;
 extern int amdgpu_sched_jobs;
 extern int amdgpu_sched_jobs;
 extern int amdgpu_sched_hw_submission;
 extern int amdgpu_sched_hw_submission;
 extern int amdgpu_no_evict;
 extern int amdgpu_no_evict;
@@ -109,6 +113,15 @@ extern int amdgpu_prim_buf_per_se;
 extern int amdgpu_pos_buf_per_se;
 extern int amdgpu_pos_buf_per_se;
 extern int amdgpu_cntl_sb_buf_per_se;
 extern int amdgpu_cntl_sb_buf_per_se;
 extern int amdgpu_param_buf_per_se;
 extern int amdgpu_param_buf_per_se;
+extern int amdgpu_job_hang_limit;
+extern int amdgpu_lbpw;
+
+#ifdef CONFIG_DRM_AMDGPU_SI
+extern int amdgpu_si_support;
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+extern int amdgpu_cik_support;
+#endif
 
 
 #define AMDGPU_DEFAULT_GTT_SIZE_MB		3072ULL /* 3GB by default */
 #define AMDGPU_DEFAULT_GTT_SIZE_MB		3072ULL /* 3GB by default */
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
@@ -305,8 +318,8 @@ struct amdgpu_gart_funcs {
 	/* set pte flags based per asic */
 	/* set pte flags based per asic */
 	uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
 	uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
 				     uint32_t flags);
 				     uint32_t flags);
-	/* adjust mc addr in fb for APU case */
-	u64 (*adjust_mc_addr)(struct amdgpu_device *adev, u64 addr);
+	/* get the pde for a given mc addr */
+	u64 (*get_vm_pde)(struct amdgpu_device *adev, u64 addr);
 	uint32_t (*get_invalidate_req)(unsigned int vm_id);
 	uint32_t (*get_invalidate_req)(unsigned int vm_id);
 };
 };
 
 
@@ -554,7 +567,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
 void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
 void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
 int amdgpu_gart_init(struct amdgpu_device *adev);
 int amdgpu_gart_init(struct amdgpu_device *adev);
 void amdgpu_gart_fini(struct amdgpu_device *adev);
 void amdgpu_gart_fini(struct amdgpu_device *adev);
-void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
+int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
 			int pages);
 			int pages);
 int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
 int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
 		     int pages, struct page **pagelist,
 		     int pages, struct page **pagelist,
@@ -602,6 +615,7 @@ struct amdgpu_mc {
 	uint32_t                srbm_soft_reset;
 	uint32_t                srbm_soft_reset;
 	struct amdgpu_mode_mc_save save;
 	struct amdgpu_mode_mc_save save;
 	bool			prt_warning;
 	bool			prt_warning;
+	uint64_t		stolen_size;
 	/* apertures */
 	/* apertures */
 	u64					shared_aperture_start;
 	u64					shared_aperture_start;
 	u64					shared_aperture_end;
 	u64					shared_aperture_end;
@@ -771,6 +785,29 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 		      struct amd_sched_entity *entity, void *owner,
 		      struct amd_sched_entity *entity, void *owner,
 		      struct dma_fence **f);
 		      struct dma_fence **f);
 
 
+/*
+ * Queue manager
+ */
+struct amdgpu_queue_mapper {
+	int 		hw_ip;
+	struct mutex	lock;
+	/* protected by lock */
+	struct amdgpu_ring *queue_map[AMDGPU_MAX_RINGS];
+};
+
+struct amdgpu_queue_mgr {
+	struct amdgpu_queue_mapper mapper[AMDGPU_MAX_IP_NUM];
+};
+
+int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
+			  struct amdgpu_queue_mgr *mgr);
+int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
+			  struct amdgpu_queue_mgr *mgr);
+int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
+			 struct amdgpu_queue_mgr *mgr,
+			 int hw_ip, int instance, int ring,
+			 struct amdgpu_ring **out_ring);
+
 /*
 /*
  * context related structures
  * context related structures
  */
  */
@@ -784,6 +821,7 @@ struct amdgpu_ctx_ring {
 struct amdgpu_ctx {
 struct amdgpu_ctx {
 	struct kref		refcount;
 	struct kref		refcount;
 	struct amdgpu_device    *adev;
 	struct amdgpu_device    *adev;
+	struct amdgpu_queue_mgr queue_mgr;
 	unsigned		reset_counter;
 	unsigned		reset_counter;
 	spinlock_t		ring_lock;
 	spinlock_t		ring_lock;
 	struct dma_fence	**fences;
 	struct dma_fence	**fences;
@@ -822,6 +860,7 @@ struct amdgpu_fpriv {
 	struct mutex		bo_list_lock;
 	struct mutex		bo_list_lock;
 	struct idr		bo_list_handles;
 	struct idr		bo_list_handles;
 	struct amdgpu_ctx_mgr	ctx_mgr;
 	struct amdgpu_ctx_mgr	ctx_mgr;
+	u32			vram_lost_counter;
 };
 };
 
 
 /*
 /*
@@ -830,6 +869,8 @@ struct amdgpu_fpriv {
 
 
 struct amdgpu_bo_list {
 struct amdgpu_bo_list {
 	struct mutex lock;
 	struct mutex lock;
+	struct rcu_head rhead;
+	struct kref refcount;
 	struct amdgpu_bo *gds_obj;
 	struct amdgpu_bo *gds_obj;
 	struct amdgpu_bo *gws_obj;
 	struct amdgpu_bo *gws_obj;
 	struct amdgpu_bo *oa_obj;
 	struct amdgpu_bo *oa_obj;
@@ -893,20 +934,26 @@ struct amdgpu_rlc {
 	u32 *register_restore;
 	u32 *register_restore;
 };
 };
 
 
+#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
+
 struct amdgpu_mec {
 struct amdgpu_mec {
 	struct amdgpu_bo	*hpd_eop_obj;
 	struct amdgpu_bo	*hpd_eop_obj;
 	u64			hpd_eop_gpu_addr;
 	u64			hpd_eop_gpu_addr;
 	struct amdgpu_bo	*mec_fw_obj;
 	struct amdgpu_bo	*mec_fw_obj;
 	u64			mec_fw_gpu_addr;
 	u64			mec_fw_gpu_addr;
-	u32 num_pipe;
 	u32 num_mec;
 	u32 num_mec;
-	u32 num_queue;
+	u32 num_pipe_per_mec;
+	u32 num_queue_per_pipe;
 	void			*mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
 	void			*mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
+
+	/* These are the resources for which amdgpu takes ownership */
+	DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
 };
 };
 
 
 struct amdgpu_kiq {
 struct amdgpu_kiq {
 	u64			eop_gpu_addr;
 	u64			eop_gpu_addr;
 	struct amdgpu_bo	*eop_obj;
 	struct amdgpu_bo	*eop_obj;
+	struct mutex		ring_mutex;
 	struct amdgpu_ring	ring;
 	struct amdgpu_ring	ring;
 	struct amdgpu_irq_src	irq;
 	struct amdgpu_irq_src	irq;
 };
 };
@@ -983,7 +1030,10 @@ struct amdgpu_gfx_config {
 struct amdgpu_cu_info {
 struct amdgpu_cu_info {
 	uint32_t number; /* total active CU number */
 	uint32_t number; /* total active CU number */
 	uint32_t ao_cu_mask;
 	uint32_t ao_cu_mask;
+	uint32_t max_waves_per_simd;
 	uint32_t wave_front_size;
 	uint32_t wave_front_size;
+	uint32_t max_scratch_slots_per_cu;
+	uint32_t lds_size;
 	uint32_t bitmap[4][4];
 	uint32_t bitmap[4][4];
 };
 };
 
 
@@ -1061,6 +1111,8 @@ struct amdgpu_gfx {
 	uint32_t                        grbm_soft_reset;
 	uint32_t                        grbm_soft_reset;
 	uint32_t                        srbm_soft_reset;
 	uint32_t                        srbm_soft_reset;
 	bool                            in_reset;
 	bool                            in_reset;
+	/* s3/s4 mask */
+	bool                            in_suspend;
 	/* NGG */
 	/* NGG */
 	struct amdgpu_ngg		ngg;
 	struct amdgpu_ngg		ngg;
 };
 };
@@ -1109,12 +1161,14 @@ struct amdgpu_cs_parser {
 
 
 	/* user fence */
 	/* user fence */
 	struct amdgpu_bo_list_entry	uf_entry;
 	struct amdgpu_bo_list_entry	uf_entry;
+
+	unsigned num_post_dep_syncobjs;
+	struct drm_syncobj **post_dep_syncobjs;
 };
 };
 
 
 #define AMDGPU_PREAMBLE_IB_PRESENT          (1 << 0) /* bit set means command submit involves a preamble IB */
 #define AMDGPU_PREAMBLE_IB_PRESENT          (1 << 0) /* bit set means command submit involves a preamble IB */
 #define AMDGPU_PREAMBLE_IB_PRESENT_FIRST    (1 << 1) /* bit set means preamble IB is first presented in belonging context */
 #define AMDGPU_PREAMBLE_IB_PRESENT_FIRST    (1 << 1) /* bit set means preamble IB is first presented in belonging context */
 #define AMDGPU_HAVE_CTX_SWITCH              (1 << 2) /* bit set means context switch occured */
 #define AMDGPU_HAVE_CTX_SWITCH              (1 << 2) /* bit set means context switch occured */
-#define AMDGPU_VM_DOMAIN                    (1 << 3) /* bit set means in virtual memory context */
 
 
 struct amdgpu_job {
 struct amdgpu_job {
 	struct amd_sched_job    base;
 	struct amd_sched_job    base;
@@ -1122,6 +1176,8 @@ struct amdgpu_job {
 	struct amdgpu_vm	*vm;
 	struct amdgpu_vm	*vm;
 	struct amdgpu_ring	*ring;
 	struct amdgpu_ring	*ring;
 	struct amdgpu_sync	sync;
 	struct amdgpu_sync	sync;
+	struct amdgpu_sync	dep_sync;
+	struct amdgpu_sync	sched_sync;
 	struct amdgpu_ib	*ibs;
 	struct amdgpu_ib	*ibs;
 	struct dma_fence	*fence; /* the hw fence */
 	struct dma_fence	*fence; /* the hw fence */
 	uint32_t		preamble_status;
 	uint32_t		preamble_status;
@@ -1129,7 +1185,6 @@ struct amdgpu_job {
 	void			*owner;
 	void			*owner;
 	uint64_t		fence_ctx; /* the fence_context this job uses */
 	uint64_t		fence_ctx; /* the fence_context this job uses */
 	bool                    vm_needs_flush;
 	bool                    vm_needs_flush;
-	bool			need_pipeline_sync;
 	unsigned		vm_id;
 	unsigned		vm_id;
 	uint64_t		vm_pd_addr;
 	uint64_t		vm_pd_addr;
 	uint32_t		gds_base, gds_size;
 	uint32_t		gds_base, gds_size;
@@ -1221,6 +1276,9 @@ struct amdgpu_firmware {
 	const struct amdgpu_psp_funcs *funcs;
 	const struct amdgpu_psp_funcs *funcs;
 	struct amdgpu_bo *rbuf;
 	struct amdgpu_bo *rbuf;
 	struct mutex mutex;
 	struct mutex mutex;
+
+	/* gpu info firmware data pointer */
+	const struct firmware *gpu_info_fw;
 };
 };
 
 
 /*
 /*
@@ -1296,7 +1354,6 @@ struct amdgpu_smumgr {
  */
  */
 struct amdgpu_allowed_register_entry {
 struct amdgpu_allowed_register_entry {
 	uint32_t reg_offset;
 	uint32_t reg_offset;
-	bool untouched;
 	bool grbm_indexed;
 	bool grbm_indexed;
 };
 };
 
 
@@ -1424,6 +1481,7 @@ typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
 typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
 typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
 typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
 typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
 
 
+#define AMDGPU_RESET_MAGIC_NUM 64
 struct amdgpu_device {
 struct amdgpu_device {
 	struct device			*dev;
 	struct device			*dev;
 	struct drm_device		*ddev;
 	struct drm_device		*ddev;
@@ -1523,7 +1581,9 @@ struct amdgpu_device {
 	atomic64_t			gtt_usage;
 	atomic64_t			gtt_usage;
 	atomic64_t			num_bytes_moved;
 	atomic64_t			num_bytes_moved;
 	atomic64_t			num_evictions;
 	atomic64_t			num_evictions;
+	atomic64_t			num_vram_cpu_page_faults;
 	atomic_t			gpu_reset_counter;
 	atomic_t			gpu_reset_counter;
+	atomic_t			vram_lost_counter;
 
 
 	/* data for buffer migration throttling */
 	/* data for buffer migration throttling */
 	struct {
 	struct {
@@ -1570,11 +1630,18 @@ struct amdgpu_device {
 	/* sdma */
 	/* sdma */
 	struct amdgpu_sdma		sdma;
 	struct amdgpu_sdma		sdma;
 
 
-	/* uvd */
-	struct amdgpu_uvd		uvd;
+	union {
+		struct {
+			/* uvd */
+			struct amdgpu_uvd		uvd;
+
+			/* vce */
+			struct amdgpu_vce		vce;
+		};
 
 
-	/* vce */
-	struct amdgpu_vce		vce;
+		/* vcn */
+		struct amdgpu_vcn		vcn;
+	};
 
 
 	/* firmwares */
 	/* firmwares */
 	struct amdgpu_firmware		firmware;
 	struct amdgpu_firmware		firmware;
@@ -1598,6 +1665,9 @@ struct amdgpu_device {
 	/* amdkfd interface */
 	/* amdkfd interface */
 	struct kfd_dev          *kfd;
 	struct kfd_dev          *kfd;
 
 
+	/* delayed work_func for deferring clockgating during resume */
+	struct delayed_work     late_init_work;
+
 	struct amdgpu_virt	virt;
 	struct amdgpu_virt	virt;
 
 
 	/* link all shadow bo */
 	/* link all shadow bo */
@@ -1606,9 +1676,13 @@ struct amdgpu_device {
 	/* link all gtt */
 	/* link all gtt */
 	spinlock_t			gtt_list_lock;
 	spinlock_t			gtt_list_lock;
 	struct list_head                gtt_list;
 	struct list_head                gtt_list;
+	/* keep an lru list of rings by HW IP */
+	struct list_head		ring_lru_list;
+	spinlock_t			ring_lru_list_lock;
 
 
 	/* record hw reset is performed */
 	/* record hw reset is performed */
 	bool has_hw_reset;
 	bool has_hw_reset;
+	u8				reset_magic[AMDGPU_RESET_MAGIC_NUM];
 
 
 };
 };
 
 
@@ -1617,7 +1691,6 @@ static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
 	return container_of(bdev, struct amdgpu_device, mman.bdev);
 	return container_of(bdev, struct amdgpu_device, mman.bdev);
 }
 }
 
 
-bool amdgpu_device_is_px(struct drm_device *dev);
 int amdgpu_device_init(struct amdgpu_device *adev,
 int amdgpu_device_init(struct amdgpu_device *adev,
 		       struct drm_device *ddev,
 		       struct drm_device *ddev,
 		       struct pci_dev *pdev,
 		       struct pci_dev *pdev,
@@ -1733,30 +1806,31 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *sr
 	unsigned occupied, chunk1, chunk2;
 	unsigned occupied, chunk1, chunk2;
 	void *dst;
 	void *dst;
 
 
-	if (ring->count_dw < count_dw) {
+	if (unlikely(ring->count_dw < count_dw)) {
 		DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
 		DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
-	} else {
-		occupied = ring->wptr & ring->buf_mask;
-		dst = (void *)&ring->ring[occupied];
-		chunk1 = ring->buf_mask + 1 - occupied;
-		chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
-		chunk2 = count_dw - chunk1;
-		chunk1 <<= 2;
-		chunk2 <<= 2;
-
-		if (chunk1)
-			memcpy(dst, src, chunk1);
-
-		if (chunk2) {
-			src += chunk1;
-			dst = (void *)ring->ring;
-			memcpy(dst, src, chunk2);
-		}
-
-		ring->wptr += count_dw;
-		ring->wptr &= ring->ptr_mask;
-		ring->count_dw -= count_dw;
+		return;
+	}
+
+	occupied = ring->wptr & ring->buf_mask;
+	dst = (void *)&ring->ring[occupied];
+	chunk1 = ring->buf_mask + 1 - occupied;
+	chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
+	chunk2 = count_dw - chunk1;
+	chunk1 <<= 2;
+	chunk2 <<= 2;
+
+	if (chunk1)
+		memcpy(dst, src, chunk1);
+
+	if (chunk2) {
+		src += chunk1;
+		dst = (void *)ring->ring;
+		memcpy(dst, src, chunk2);
 	}
 	}
+
+	ring->wptr += count_dw;
+	ring->wptr &= ring->ptr_mask;
+	ring->count_dw -= count_dw;
 }
 }
 
 
 static inline struct amdgpu_sdma_instance *
 static inline struct amdgpu_sdma_instance *
@@ -1792,6 +1866,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
 #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
 #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
 #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
 #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
 #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
+#define amdgpu_gart_get_vm_pde(adev, addr) (adev)->gart.gart_funcs->get_vm_pde((adev), (addr))
 #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
 #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
 #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
 #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
@@ -1813,6 +1888,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
 #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
 #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
 #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
+#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
 #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
 #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
@@ -1849,9 +1925,6 @@ bool amdgpu_need_post(struct amdgpu_device *adev);
 void amdgpu_update_display_priority(struct amdgpu_device *adev);
 void amdgpu_update_display_priority(struct amdgpu_device *adev);
 
 
 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
-int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
-		       u32 ip_instance, u32 ring,
-		       struct amdgpu_ring **out_ring);
 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes);
 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
@@ -1900,6 +1973,8 @@ static inline bool amdgpu_has_atpx(void) { return false; }
 extern const struct drm_ioctl_desc amdgpu_ioctls_kms[];
 extern const struct drm_ioctl_desc amdgpu_ioctls_kms[];
 extern const int amdgpu_max_kms_ioctl;
 extern const int amdgpu_max_kms_ioctl;
 
 
+bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
+			  struct amdgpu_fpriv *fpriv);
 int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags);
 void amdgpu_driver_unload_kms(struct drm_device *dev);
 void amdgpu_driver_unload_kms(struct drm_device *dev);
 void amdgpu_driver_lastclose_kms(struct drm_device *dev);
 void amdgpu_driver_lastclose_kms(struct drm_device *dev);
@@ -1912,10 +1987,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon);
 u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe);
 u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe);
 int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe);
 int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe);
 void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
 void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
-int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe,
-				    int *max_error,
-				    struct timeval *vblank_time,
-				    unsigned flags);
 long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
 long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
 			     unsigned long arg);
 			     unsigned long arg);
 
 

+ 63 - 38
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

@@ -24,6 +24,7 @@
 #include "amd_shared.h"
 #include "amd_shared.h"
 #include <drm/drmP.h>
 #include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
+#include "amdgpu_gfx.h"
 #include <linux/module.h>
 #include <linux/module.h>
 
 
 const struct kfd2kgd_calls *kfd2kgd;
 const struct kfd2kgd_calls *kfd2kgd;
@@ -60,9 +61,9 @@ int amdgpu_amdkfd_init(void)
 	return ret;
 	return ret;
 }
 }
 
 
-bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev)
+bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev)
 {
 {
-	switch (rdev->asic_type) {
+	switch (adev->asic_type) {
 #ifdef CONFIG_DRM_AMDGPU_CIK
 #ifdef CONFIG_DRM_AMDGPU_CIK
 	case CHIP_KAVERI:
 	case CHIP_KAVERI:
 		kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
 		kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
@@ -86,59 +87,83 @@ void amdgpu_amdkfd_fini(void)
 	}
 	}
 }
 }
 
 
-void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 {
 {
 	if (kgd2kfd)
 	if (kgd2kfd)
-		rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
-					rdev->pdev, kfd2kgd);
+		adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
+					adev->pdev, kfd2kgd);
 }
 }
 
 
-void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
 {
-	if (rdev->kfd) {
+	int i;
+	int last_valid_bit;
+	if (adev->kfd) {
 		struct kgd2kfd_shared_resources gpu_resources = {
 		struct kgd2kfd_shared_resources gpu_resources = {
 			.compute_vmid_bitmap = 0xFF00,
 			.compute_vmid_bitmap = 0xFF00,
-
-			.first_compute_pipe = 1,
-			.compute_pipe_count = 4 - 1,
+			.num_mec = adev->gfx.mec.num_mec,
+			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
+			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
 		};
 		};
 
 
-		amdgpu_doorbell_get_kfd_info(rdev,
+		/* this is going to have a few of the MSBs set that we need to
+		 * clear */
+		bitmap_complement(gpu_resources.queue_bitmap,
+				  adev->gfx.mec.queue_bitmap,
+				  KGD_MAX_QUEUES);
+
+		/* remove the KIQ bit as well */
+		if (adev->gfx.kiq.ring.ready)
+			clear_bit(amdgpu_gfx_queue_to_bit(adev,
+							  adev->gfx.kiq.ring.me - 1,
+							  adev->gfx.kiq.ring.pipe,
+							  adev->gfx.kiq.ring.queue),
+				  gpu_resources.queue_bitmap);
+
+		/* According to linux/bitmap.h we shouldn't use bitmap_clear if
+		 * nbits is not compile time constant */
+		last_valid_bit = adev->gfx.mec.num_mec
+				* adev->gfx.mec.num_pipe_per_mec
+				* adev->gfx.mec.num_queue_per_pipe;
+		for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
+			clear_bit(i, gpu_resources.queue_bitmap);
+
+		amdgpu_doorbell_get_kfd_info(adev,
 				&gpu_resources.doorbell_physical_address,
 				&gpu_resources.doorbell_physical_address,
 				&gpu_resources.doorbell_aperture_size,
 				&gpu_resources.doorbell_aperture_size,
 				&gpu_resources.doorbell_start_offset);
 				&gpu_resources.doorbell_start_offset);
 
 
-		kgd2kfd->device_init(rdev->kfd, &gpu_resources);
+		kgd2kfd->device_init(adev->kfd, &gpu_resources);
 	}
 	}
 }
 }
 
 
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
 {
 {
-	if (rdev->kfd) {
-		kgd2kfd->device_exit(rdev->kfd);
-		rdev->kfd = NULL;
+	if (adev->kfd) {
+		kgd2kfd->device_exit(adev->kfd);
+		adev->kfd = NULL;
 	}
 	}
 }
 }
 
 
-void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
+void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
 		const void *ih_ring_entry)
 		const void *ih_ring_entry)
 {
 {
-	if (rdev->kfd)
-		kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
+	if (adev->kfd)
+		kgd2kfd->interrupt(adev->kfd, ih_ring_entry);
 }
 }
 
 
-void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
 {
 {
-	if (rdev->kfd)
-		kgd2kfd->suspend(rdev->kfd);
+	if (adev->kfd)
+		kgd2kfd->suspend(adev->kfd);
 }
 }
 
 
-int amdgpu_amdkfd_resume(struct amdgpu_device *rdev)
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
 {
 {
 	int r = 0;
 	int r = 0;
 
 
-	if (rdev->kfd)
-		r = kgd2kfd->resume(rdev->kfd);
+	if (adev->kfd)
+		r = kgd2kfd->resume(adev->kfd);
 
 
 	return r;
 	return r;
 }
 }
@@ -147,7 +172,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 			void **mem_obj, uint64_t *gpu_addr,
 			void **mem_obj, uint64_t *gpu_addr,
 			void **cpu_ptr)
 			void **cpu_ptr)
 {
 {
-	struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 	struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
 	struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
 	int r;
 	int r;
 
 
@@ -159,10 +184,10 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 	if ((*mem) == NULL)
 	if ((*mem) == NULL)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
-	r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
+	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
 			     AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo);
 			     AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo);
 	if (r) {
 	if (r) {
-		dev_err(rdev->dev,
+		dev_err(adev->dev,
 			"failed to allocate BO for amdkfd (%d)\n", r);
 			"failed to allocate BO for amdkfd (%d)\n", r);
 		return r;
 		return r;
 	}
 	}
@@ -170,21 +195,21 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 	/* map the buffer */
 	/* map the buffer */
 	r = amdgpu_bo_reserve((*mem)->bo, true);
 	r = amdgpu_bo_reserve((*mem)->bo, true);
 	if (r) {
 	if (r) {
-		dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
+		dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
 		goto allocate_mem_reserve_bo_failed;
 		goto allocate_mem_reserve_bo_failed;
 	}
 	}
 
 
 	r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
 	r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
 				&(*mem)->gpu_addr);
 				&(*mem)->gpu_addr);
 	if (r) {
 	if (r) {
-		dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r);
+		dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
 		goto allocate_mem_pin_bo_failed;
 		goto allocate_mem_pin_bo_failed;
 	}
 	}
 	*gpu_addr = (*mem)->gpu_addr;
 	*gpu_addr = (*mem)->gpu_addr;
 
 
 	r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
 	r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
 	if (r) {
 	if (r) {
-		dev_err(rdev->dev,
+		dev_err(adev->dev,
 			"(%d) failed to map bo to kernel for amdkfd\n", r);
 			"(%d) failed to map bo to kernel for amdkfd\n", r);
 		goto allocate_mem_kmap_bo_failed;
 		goto allocate_mem_kmap_bo_failed;
 	}
 	}
@@ -220,27 +245,27 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
 
 
 uint64_t get_vmem_size(struct kgd_dev *kgd)
 uint64_t get_vmem_size(struct kgd_dev *kgd)
 {
 {
-	struct amdgpu_device *rdev =
+	struct amdgpu_device *adev =
 		(struct amdgpu_device *)kgd;
 		(struct amdgpu_device *)kgd;
 
 
 	BUG_ON(kgd == NULL);
 	BUG_ON(kgd == NULL);
 
 
-	return rdev->mc.real_vram_size;
+	return adev->mc.real_vram_size;
 }
 }
 
 
 uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
 uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
 {
 {
-	struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 
 
-	if (rdev->gfx.funcs->get_gpu_clock_counter)
-		return rdev->gfx.funcs->get_gpu_clock_counter(rdev);
+	if (adev->gfx.funcs->get_gpu_clock_counter)
+		return adev->gfx.funcs->get_gpu_clock_counter(adev);
 	return 0;
 	return 0;
 }
 }
 
 
 uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
 uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
 {
 {
-	struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 
 
 	/* The sclk is in quantas of 10kHz */
 	/* The sclk is in quantas of 10kHz */
-	return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
+	return adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
 }
 }

+ 7 - 7
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

@@ -39,15 +39,15 @@ struct kgd_mem {
 int amdgpu_amdkfd_init(void);
 int amdgpu_amdkfd_init(void);
 void amdgpu_amdkfd_fini(void);
 void amdgpu_amdkfd_fini(void);
 
 
-bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev);
+bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev);
 
 
-void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev);
-int amdgpu_amdkfd_resume(struct amdgpu_device *rdev);
-void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev);
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev);
+void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
 			const void *ih_ring_entry);
 			const void *ih_ring_entry);
-void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev);
-void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev);
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev);
+void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
+void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
+void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
 
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);

+ 11 - 65
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c

@@ -29,6 +29,7 @@
 #include "cikd.h"
 #include "cikd.h"
 #include "cik_sdma.h"
 #include "cik_sdma.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_ucode.h"
+#include "gfx_v7_0.h"
 #include "gca/gfx_7_2_d.h"
 #include "gca/gfx_7_2_d.h"
 #include "gca/gfx_7_2_enum.h"
 #include "gca/gfx_7_2_enum.h"
 #include "gca/gfx_7_2_sh_mask.h"
 #include "gca/gfx_7_2_sh_mask.h"
@@ -38,8 +39,6 @@
 #include "gmc/gmc_7_1_sh_mask.h"
 #include "gmc/gmc_7_1_sh_mask.h"
 #include "cik_structs.h"
 #include "cik_structs.h"
 
 
-#define CIK_PIPE_PER_MEC	(4)
-
 enum {
 enum {
 	MAX_TRAPID = 8,		/* 3 bits in the bitfield. */
 	MAX_TRAPID = 8,		/* 3 bits in the bitfield. */
 	MAX_WATCH_ADDRESSES = 4
 	MAX_WATCH_ADDRESSES = 4
@@ -185,8 +184,10 @@ static void unlock_srbm(struct kgd_dev *kgd)
 static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t queue_id)
 				uint32_t queue_id)
 {
 {
-	uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
-	uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 
 
 	lock_srbm(kgd, mec, pipe, queue_id, 0);
 	lock_srbm(kgd, mec, pipe, queue_id, 0);
 }
 }
@@ -243,18 +244,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t hpd_size, uint64_t hpd_gpu_addr)
 				uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
 {
-	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
-	uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
-	uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
-
-	lock_srbm(kgd, mec, pipe, 0, 0);
-	WREG32(mmCP_HPD_EOP_BASE_ADDR, lower_32_bits(hpd_gpu_addr >> 8));
-	WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(hpd_gpu_addr >> 8));
-	WREG32(mmCP_HPD_EOP_VMID, 0);
-	WREG32(mmCP_HPD_EOP_CONTROL, hpd_size);
-	unlock_srbm(kgd);
-
+	/* amdgpu owns the per-pipe state */
 	return 0;
 	return 0;
 }
 }
 
 
@@ -264,8 +254,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 	uint32_t mec;
 	uint32_t mec;
 	uint32_t pipe;
 	uint32_t pipe;
 
 
-	mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
-	pipe = (pipe_id % CIK_PIPE_PER_MEC);
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 
 
 	lock_srbm(kgd, mec, pipe, 0, 0);
 	lock_srbm(kgd, mec, pipe, 0, 0);
 
 
@@ -309,55 +299,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 	m = get_mqd(mqd);
 	m = get_mqd(mqd);
 
 
 	is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
 	is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
-
-	acquire_queue(kgd, pipe_id, queue_id);
-	WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
-	WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
-	WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
-
-	WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
-	WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
-	WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
-
-	WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
-	WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
-	WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
-
-	WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
-
-	WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
-	WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
-	WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
-
-	WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo);
-	WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi);
-	WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo);
-	WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi);
-
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
-			m->cp_hqd_pq_rptr_report_addr_hi);
-
-	WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
-
-	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo);
-	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi);
-
-	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
-
-	WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
-
-	WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
-
-	WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
-	WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
-
-	WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
-
 	if (is_wptr_shadow_valid)
 	if (is_wptr_shadow_valid)
-		WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow);
+		m->cp_hqd_pq_wptr = wptr_shadow;
 
 
-	WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
+	acquire_queue(kgd, pipe_id, queue_id);
+	gfx_v7_0_mqd_commit(adev, m);
 	release_queue(kgd);
 	release_queue(kgd);
 
 
 	return 0;
 	return 0;

+ 12 - 52
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c

@@ -28,6 +28,7 @@
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_ucode.h"
+#include "gfx_v8_0.h"
 #include "gca/gfx_8_0_sh_mask.h"
 #include "gca/gfx_8_0_sh_mask.h"
 #include "gca/gfx_8_0_d.h"
 #include "gca/gfx_8_0_d.h"
 #include "gca/gfx_8_0_enum.h"
 #include "gca/gfx_8_0_enum.h"
@@ -38,8 +39,6 @@
 #include "vi_structs.h"
 #include "vi_structs.h"
 #include "vid.h"
 #include "vid.h"
 
 
-#define VI_PIPE_PER_MEC	(4)
-
 struct cik_sdma_rlc_registers;
 struct cik_sdma_rlc_registers;
 
 
 /*
 /*
@@ -146,8 +145,10 @@ static void unlock_srbm(struct kgd_dev *kgd)
 static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t queue_id)
 				uint32_t queue_id)
 {
 {
-	uint32_t mec = (++pipe_id / VI_PIPE_PER_MEC) + 1;
-	uint32_t pipe = (pipe_id % VI_PIPE_PER_MEC);
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 
 
 	lock_srbm(kgd, mec, pipe, queue_id, 0);
 	lock_srbm(kgd, mec, pipe, queue_id, 0);
 }
 }
@@ -205,6 +206,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t hpd_size, uint64_t hpd_gpu_addr)
 				uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
 {
+	/* amdgpu owns the per-pipe state */
 	return 0;
 	return 0;
 }
 }
 
 
@@ -214,8 +216,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 	uint32_t mec;
 	uint32_t mec;
 	uint32_t pipe;
 	uint32_t pipe;
 
 
-	mec = (++pipe_id / VI_PIPE_PER_MEC) + 1;
-	pipe = (pipe_id % VI_PIPE_PER_MEC);
+	mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 
 
 	lock_srbm(kgd, mec, pipe, 0, 0);
 	lock_srbm(kgd, mec, pipe, 0, 0);
 
 
@@ -251,53 +253,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 	m = get_mqd(mqd);
 	m = get_mqd(mqd);
 
 
 	valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr));
 	valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr));
-	acquire_queue(kgd, pipe_id, queue_id);
-
-	WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
-	WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
-	WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
-
-	WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
-	WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
-	WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
-	WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
-	WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
-	WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
-	WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
-			m->cp_hqd_pq_rptr_report_addr_hi);
-
-	if (valid_wptr > 0)
-		WREG32(mmCP_HQD_PQ_WPTR, shadow_wptr);
-
-	WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
-	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
-
-	WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo);
-	WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi);
-	WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control);
-	WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
-	WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
-	WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events);
-
-	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo);
-	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi);
-	WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control);
-	WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset);
-	WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size);
-	WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset);
-	WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size);
-
-	WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
-
-	WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request);
-	WREG32(mmCP_HQD_ERROR, m->cp_hqd_error);
-	WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
-	WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones);
-
-	WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
+	if (valid_wptr == 0)
+		m->cp_hqd_pq_wptr = shadow_wptr;
 
 
+	acquire_queue(kgd, pipe_id, queue_id);
+	gfx_v8_0_mqd_commit(adev, mqd);
 	release_queue(kgd);
 	release_queue(kgd);
 
 
 	return 0;
 	return 0;

+ 64 - 41
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c

@@ -35,33 +35,59 @@
 #define AMDGPU_BO_LIST_MAX_PRIORITY	32u
 #define AMDGPU_BO_LIST_MAX_PRIORITY	32u
 #define AMDGPU_BO_LIST_NUM_BUCKETS	(AMDGPU_BO_LIST_MAX_PRIORITY + 1)
 #define AMDGPU_BO_LIST_NUM_BUCKETS	(AMDGPU_BO_LIST_MAX_PRIORITY + 1)
 
 
-static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv,
-				 struct amdgpu_bo_list **result,
+static int amdgpu_bo_list_set(struct amdgpu_device *adev,
+				     struct drm_file *filp,
+				     struct amdgpu_bo_list *list,
+				     struct drm_amdgpu_bo_list_entry *info,
+				     unsigned num_entries);
+
+static void amdgpu_bo_list_release_rcu(struct kref *ref)
+{
+	unsigned i;
+	struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list,
+						   refcount);
+
+	for (i = 0; i < list->num_entries; ++i)
+		amdgpu_bo_unref(&list->array[i].robj);
+
+	mutex_destroy(&list->lock);
+	kvfree(list->array);
+	kfree_rcu(list, rhead);
+}
+
+static int amdgpu_bo_list_create(struct amdgpu_device *adev,
+				 struct drm_file *filp,
+				 struct drm_amdgpu_bo_list_entry *info,
+				 unsigned num_entries,
 				 int *id)
 				 int *id)
 {
 {
 	int r;
 	int r;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct amdgpu_bo_list *list;
 
 
-	*result = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL);
-	if (!*result)
+	list = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL);
+	if (!list)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
+	/* initialize bo list*/
+	mutex_init(&list->lock);
+	kref_init(&list->refcount);
+	r = amdgpu_bo_list_set(adev, filp, list, info, num_entries);
+	if (r) {
+		kfree(list);
+		return r;
+	}
+
+	/* idr alloc should be called only after initialization of bo list. */
 	mutex_lock(&fpriv->bo_list_lock);
 	mutex_lock(&fpriv->bo_list_lock);
-	r = idr_alloc(&fpriv->bo_list_handles, *result,
-		      1, 0, GFP_KERNEL);
+	r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
+	mutex_unlock(&fpriv->bo_list_lock);
 	if (r < 0) {
 	if (r < 0) {
-		mutex_unlock(&fpriv->bo_list_lock);
-		kfree(*result);
+		kfree(list);
 		return r;
 		return r;
 	}
 	}
 	*id = r;
 	*id = r;
 
 
-	mutex_init(&(*result)->lock);
-	(*result)->num_entries = 0;
-	(*result)->array = NULL;
-
-	mutex_lock(&(*result)->lock);
-	mutex_unlock(&fpriv->bo_list_lock);
-
 	return 0;
 	return 0;
 }
 }
 
 
@@ -71,13 +97,9 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
 
 
 	mutex_lock(&fpriv->bo_list_lock);
 	mutex_lock(&fpriv->bo_list_lock);
 	list = idr_remove(&fpriv->bo_list_handles, id);
 	list = idr_remove(&fpriv->bo_list_handles, id);
-	if (list) {
-		/* Another user may have a reference to this list still */
-		mutex_lock(&list->lock);
-		mutex_unlock(&list->lock);
-		amdgpu_bo_list_free(list);
-	}
 	mutex_unlock(&fpriv->bo_list_lock);
 	mutex_unlock(&fpriv->bo_list_lock);
+	if (list)
+		kref_put(&list->refcount, amdgpu_bo_list_release_rcu);
 }
 }
 
 
 static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 static int amdgpu_bo_list_set(struct amdgpu_device *adev,
@@ -96,7 +118,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 	int r;
 	int r;
 	unsigned long total_size = 0;
 	unsigned long total_size = 0;
 
 
-	array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry));
+	array = kvmalloc_array(num_entries, sizeof(struct amdgpu_bo_list_entry), GFP_KERNEL);
 	if (!array)
 	if (!array)
 		return -ENOMEM;
 		return -ENOMEM;
 	memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
 	memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
@@ -148,7 +170,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 	for (i = 0; i < list->num_entries; ++i)
 	for (i = 0; i < list->num_entries; ++i)
 		amdgpu_bo_unref(&list->array[i].robj);
 		amdgpu_bo_unref(&list->array[i].robj);
 
 
-	drm_free_large(list->array);
+	kvfree(list->array);
 
 
 	list->gds_obj = gds_obj;
 	list->gds_obj = gds_obj;
 	list->gws_obj = gws_obj;
 	list->gws_obj = gws_obj;
@@ -163,7 +185,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 error_free:
 error_free:
 	while (i--)
 	while (i--)
 		amdgpu_bo_unref(&array[i].robj);
 		amdgpu_bo_unref(&array[i].robj);
-	drm_free_large(array);
+	kvfree(array);
 	return r;
 	return r;
 }
 }
 
 
@@ -172,11 +194,17 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id)
 {
 {
 	struct amdgpu_bo_list *result;
 	struct amdgpu_bo_list *result;
 
 
-	mutex_lock(&fpriv->bo_list_lock);
+	rcu_read_lock();
 	result = idr_find(&fpriv->bo_list_handles, id);
 	result = idr_find(&fpriv->bo_list_handles, id);
-	if (result)
-		mutex_lock(&result->lock);
-	mutex_unlock(&fpriv->bo_list_lock);
+
+	if (result) {
+		if (kref_get_unless_zero(&result->refcount))
+			mutex_lock(&result->lock);
+		else
+			result = NULL;
+	}
+	rcu_read_unlock();
+
 	return result;
 	return result;
 }
 }
 
 
@@ -214,6 +242,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
 void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
 void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
 {
 {
 	mutex_unlock(&list->lock);
 	mutex_unlock(&list->lock);
+	kref_put(&list->refcount, amdgpu_bo_list_release_rcu);
 }
 }
 
 
 void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
 void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
@@ -224,7 +253,7 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
 		amdgpu_bo_unref(&list->array[i].robj);
 		amdgpu_bo_unref(&list->array[i].robj);
 
 
 	mutex_destroy(&list->lock);
 	mutex_destroy(&list->lock);
-	drm_free_large(list->array);
+	kvfree(list->array);
 	kfree(list);
 	kfree(list);
 }
 }
 
 
@@ -244,8 +273,8 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
 
 
 	int r;
 	int r;
 
 
-	info = drm_malloc_ab(args->in.bo_number,
-			     sizeof(struct drm_amdgpu_bo_list_entry));
+	info = kvmalloc_array(args->in.bo_number,
+			     sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL);
 	if (!info)
 	if (!info)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
@@ -273,16 +302,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
 
 
 	switch (args->in.operation) {
 	switch (args->in.operation) {
 	case AMDGPU_BO_LIST_OP_CREATE:
 	case AMDGPU_BO_LIST_OP_CREATE:
-		r = amdgpu_bo_list_create(fpriv, &list, &handle);
+		r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
+					  &handle);
 		if (r)
 		if (r)
 			goto error_free;
 			goto error_free;
-
-		r = amdgpu_bo_list_set(adev, filp, list, info,
-					      args->in.bo_number);
-		amdgpu_bo_list_put(list);
-		if (r)
-			goto error_free;
-
 		break;
 		break;
 
 
 	case AMDGPU_BO_LIST_OP_DESTROY:
 	case AMDGPU_BO_LIST_OP_DESTROY:
@@ -311,11 +334,11 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
 
 
 	memset(args, 0, sizeof(*args));
 	memset(args, 0, sizeof(*args));
 	args->out.list_handle = handle;
 	args->out.list_handle = handle;
-	drm_free_large(info);
+	kvfree(info);
 
 
 	return 0;
 	return 0;
 
 
 error_free:
 error_free:
-	drm_free_large(info);
+	kvfree(info);
 	return r;
 	return r;
 }
 }

+ 174 - 133
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

@@ -27,81 +27,10 @@
 #include <linux/pagemap.h>
 #include <linux/pagemap.h>
 #include <drm/drmP.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_syncobj.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_trace.h"
 
 
-int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
-		       u32 ip_instance, u32 ring,
-		       struct amdgpu_ring **out_ring)
-{
-	/* Right now all IPs have only one instance - multiple rings. */
-	if (ip_instance != 0) {
-		DRM_ERROR("invalid ip instance: %d\n", ip_instance);
-		return -EINVAL;
-	}
-
-	switch (ip_type) {
-	default:
-		DRM_ERROR("unknown ip type: %d\n", ip_type);
-		return -EINVAL;
-	case AMDGPU_HW_IP_GFX:
-		if (ring < adev->gfx.num_gfx_rings) {
-			*out_ring = &adev->gfx.gfx_ring[ring];
-		} else {
-			DRM_ERROR("only %d gfx rings are supported now\n",
-				  adev->gfx.num_gfx_rings);
-			return -EINVAL;
-		}
-		break;
-	case AMDGPU_HW_IP_COMPUTE:
-		if (ring < adev->gfx.num_compute_rings) {
-			*out_ring = &adev->gfx.compute_ring[ring];
-		} else {
-			DRM_ERROR("only %d compute rings are supported now\n",
-				  adev->gfx.num_compute_rings);
-			return -EINVAL;
-		}
-		break;
-	case AMDGPU_HW_IP_DMA:
-		if (ring < adev->sdma.num_instances) {
-			*out_ring = &adev->sdma.instance[ring].ring;
-		} else {
-			DRM_ERROR("only %d SDMA rings are supported\n",
-				  adev->sdma.num_instances);
-			return -EINVAL;
-		}
-		break;
-	case AMDGPU_HW_IP_UVD:
-		*out_ring = &adev->uvd.ring;
-		break;
-	case AMDGPU_HW_IP_VCE:
-		if (ring < adev->vce.num_rings){
-			*out_ring = &adev->vce.ring[ring];
-		} else {
-			DRM_ERROR("only %d VCE rings are supported\n", adev->vce.num_rings);
-			return -EINVAL;
-		}
-		break;
-	case AMDGPU_HW_IP_UVD_ENC:
-		if (ring < adev->uvd.num_enc_rings){
-			*out_ring = &adev->uvd.ring_enc[ring];
-		} else {
-			DRM_ERROR("only %d UVD ENC rings are supported\n",
-				adev->uvd.num_enc_rings);
-			return -EINVAL;
-		}
-		break;
-	}
-
-	if (!(*out_ring && (*out_ring)->adev)) {
-		DRM_ERROR("Ring %d is not initialized on IP %d\n",
-			  ring, ip_type);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 				      struct drm_amdgpu_cs_chunk_fence *data,
 				      struct drm_amdgpu_cs_chunk_fence *data,
 				      uint32_t *offset)
 				      uint32_t *offset)
@@ -194,7 +123,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 		size = p->chunks[i].length_dw;
 		size = p->chunks[i].length_dw;
 		cdata = (void __user *)(uintptr_t)user_chunk.chunk_data;
 		cdata = (void __user *)(uintptr_t)user_chunk.chunk_data;
 
 
-		p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
+		p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
 		if (p->chunks[i].kdata == NULL) {
 		if (p->chunks[i].kdata == NULL) {
 			ret = -ENOMEM;
 			ret = -ENOMEM;
 			i--;
 			i--;
@@ -226,6 +155,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 			break;
 			break;
 
 
 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
+		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
+		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
 			break;
 			break;
 
 
 		default:
 		default:
@@ -247,7 +178,7 @@ free_all_kdata:
 	i = p->nchunks - 1;
 	i = p->nchunks - 1;
 free_partial_kdata:
 free_partial_kdata:
 	for (; i >= 0; i--)
 	for (; i >= 0; i--)
-		drm_free_large(p->chunks[i].kdata);
+		kvfree(p->chunks[i].kdata);
 	kfree(p->chunks);
 	kfree(p->chunks);
 	p->chunks = NULL;
 	p->chunks = NULL;
 	p->nchunks = 0;
 	p->nchunks = 0;
@@ -505,7 +436,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 			return r;
 			return r;
 
 
 		if (binding_userptr) {
 		if (binding_userptr) {
-			drm_free_large(lobj->user_pages);
+			kvfree(lobj->user_pages);
 			lobj->user_pages = NULL;
 			lobj->user_pages = NULL;
 		}
 		}
 	}
 	}
@@ -571,7 +502,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 				release_pages(e->user_pages,
 				release_pages(e->user_pages,
 					      e->robj->tbo.ttm->num_pages,
 					      e->robj->tbo.ttm->num_pages,
 					      false);
 					      false);
-				drm_free_large(e->user_pages);
+				kvfree(e->user_pages);
 				e->user_pages = NULL;
 				e->user_pages = NULL;
 			}
 			}
 
 
@@ -597,12 +528,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 			goto error_free_pages;
 			goto error_free_pages;
 		}
 		}
 
 
-		/* Fill the page arrays for all useptrs. */
+		/* Fill the page arrays for all userptrs. */
 		list_for_each_entry(e, &need_pages, tv.head) {
 		list_for_each_entry(e, &need_pages, tv.head) {
 			struct ttm_tt *ttm = e->robj->tbo.ttm;
 			struct ttm_tt *ttm = e->robj->tbo.ttm;
 
 
-			e->user_pages = drm_calloc_large(ttm->num_pages,
-							 sizeof(struct page*));
+			e->user_pages = kvmalloc_array(ttm->num_pages,
+							 sizeof(struct page*),
+							 GFP_KERNEL | __GFP_ZERO);
 			if (!e->user_pages) {
 			if (!e->user_pages) {
 				r = -ENOMEM;
 				r = -ENOMEM;
 				DRM_ERROR("calloc failure in %s\n", __func__);
 				DRM_ERROR("calloc failure in %s\n", __func__);
@@ -612,7 +544,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 			r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
 			r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
 			if (r) {
 			if (r) {
 				DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
 				DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
-				drm_free_large(e->user_pages);
+				kvfree(e->user_pages);
 				e->user_pages = NULL;
 				e->user_pages = NULL;
 				goto error_free_pages;
 				goto error_free_pages;
 			}
 			}
@@ -708,7 +640,7 @@ error_free_pages:
 			release_pages(e->user_pages,
 			release_pages(e->user_pages,
 				      e->robj->tbo.ttm->num_pages,
 				      e->robj->tbo.ttm->num_pages,
 				      false);
 				      false);
-			drm_free_large(e->user_pages);
+			kvfree(e->user_pages);
 		}
 		}
 	}
 	}
 
 
@@ -753,6 +685,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
 		ttm_eu_backoff_reservation(&parser->ticket,
 		ttm_eu_backoff_reservation(&parser->ticket,
 					   &parser->validated);
 					   &parser->validated);
 	}
 	}
+
+	for (i = 0; i < parser->num_post_dep_syncobjs; i++)
+		drm_syncobj_put(parser->post_dep_syncobjs[i]);
+	kfree(parser->post_dep_syncobjs);
+
 	dma_fence_put(parser->fence);
 	dma_fence_put(parser->fence);
 
 
 	if (parser->ctx)
 	if (parser->ctx)
@@ -761,7 +698,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
 		amdgpu_bo_list_put(parser->bo_list);
 		amdgpu_bo_list_put(parser->bo_list);
 
 
 	for (i = 0; i < parser->nchunks; i++)
 	for (i = 0; i < parser->nchunks; i++)
-		drm_free_large(parser->chunks[i].kdata);
+		kvfree(parser->chunks[i].kdata);
 	kfree(parser->chunks);
 	kfree(parser->chunks);
 	if (parser->job)
 	if (parser->job)
 		amdgpu_job_free(parser->job);
 		amdgpu_job_free(parser->job);
@@ -916,9 +853,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 				return -EINVAL;
 				return -EINVAL;
 		}
 		}
 
 
-		r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type,
-				       chunk_ib->ip_instance, chunk_ib->ring,
-				       &ring);
+		r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type,
+					 chunk_ib->ip_instance, chunk_ib->ring, &ring);
 		if (r)
 		if (r)
 			return r;
 			return r;
 
 
@@ -995,62 +931,150 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 	return 0;
 	return 0;
 }
 }
 
 
-static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
-				  struct amdgpu_cs_parser *p)
+static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
+				       struct amdgpu_cs_chunk *chunk)
 {
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
-	int i, j, r;
-
-	for (i = 0; i < p->nchunks; ++i) {
-		struct drm_amdgpu_cs_chunk_dep *deps;
-		struct amdgpu_cs_chunk *chunk;
-		unsigned num_deps;
+	unsigned num_deps;
+	int i, r;
+	struct drm_amdgpu_cs_chunk_dep *deps;
 
 
-		chunk = &p->chunks[i];
+	deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_dep);
 
 
-		if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES)
-			continue;
+	for (i = 0; i < num_deps; ++i) {
+		struct amdgpu_ring *ring;
+		struct amdgpu_ctx *ctx;
+		struct dma_fence *fence;
 
 
-		deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
-		num_deps = chunk->length_dw * 4 /
-			sizeof(struct drm_amdgpu_cs_chunk_dep);
+		ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
+		if (ctx == NULL)
+			return -EINVAL;
 
 
-		for (j = 0; j < num_deps; ++j) {
-			struct amdgpu_ring *ring;
-			struct amdgpu_ctx *ctx;
-			struct dma_fence *fence;
+		r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr,
+					 deps[i].ip_type,
+					 deps[i].ip_instance,
+					 deps[i].ring, &ring);
+		if (r) {
+			amdgpu_ctx_put(ctx);
+			return r;
+		}
 
 
-			r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
-					       deps[j].ip_instance,
-					       deps[j].ring, &ring);
+		fence = amdgpu_ctx_get_fence(ctx, ring,
+					     deps[i].handle);
+		if (IS_ERR(fence)) {
+			r = PTR_ERR(fence);
+			amdgpu_ctx_put(ctx);
+			return r;
+		} else if (fence) {
+			r = amdgpu_sync_fence(p->adev, &p->job->sync,
+					      fence);
+			dma_fence_put(fence);
+			amdgpu_ctx_put(ctx);
 			if (r)
 			if (r)
 				return r;
 				return r;
+		}
+	}
+	return 0;
+}
 
 
-			ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id);
-			if (ctx == NULL)
-				return -EINVAL;
+static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
+						 uint32_t handle)
+{
+	int r;
+	struct dma_fence *fence;
+	r = drm_syncobj_fence_get(p->filp, handle, &fence);
+	if (r)
+		return r;
 
 
-			fence = amdgpu_ctx_get_fence(ctx, ring,
-						     deps[j].handle);
-			if (IS_ERR(fence)) {
-				r = PTR_ERR(fence);
-				amdgpu_ctx_put(ctx);
-				return r;
+	r = amdgpu_sync_fence(p->adev, &p->job->sync, fence);
+	dma_fence_put(fence);
 
 
-			} else if (fence) {
-				r = amdgpu_sync_fence(adev, &p->job->sync,
-						      fence);
-				dma_fence_put(fence);
-				amdgpu_ctx_put(ctx);
-				if (r)
-					return r;
-			}
+	return r;
+}
+
+static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
+					    struct amdgpu_cs_chunk *chunk)
+{
+	unsigned num_deps;
+	int i, r;
+	struct drm_amdgpu_cs_chunk_sem *deps;
+
+	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+	for (i = 0; i < num_deps; ++i) {
+		r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
+		if (r)
+			return r;
+	}
+	return 0;
+}
+
+static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
+					     struct amdgpu_cs_chunk *chunk)
+{
+	unsigned num_deps;
+	int i;
+	struct drm_amdgpu_cs_chunk_sem *deps;
+	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+	p->post_dep_syncobjs = kmalloc_array(num_deps,
+					     sizeof(struct drm_syncobj *),
+					     GFP_KERNEL);
+	p->num_post_dep_syncobjs = 0;
+
+	for (i = 0; i < num_deps; ++i) {
+		p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
+		if (!p->post_dep_syncobjs[i])
+			return -EINVAL;
+		p->num_post_dep_syncobjs++;
+	}
+	return 0;
+}
+
+static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
+				  struct amdgpu_cs_parser *p)
+{
+	int i, r;
+
+	for (i = 0; i < p->nchunks; ++i) {
+		struct amdgpu_cs_chunk *chunk;
+
+		chunk = &p->chunks[i];
+
+		if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
+			r = amdgpu_cs_process_fence_dep(p, chunk);
+			if (r)
+				return r;
+		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
+			r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
+			if (r)
+				return r;
+		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
+			r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
+			if (r)
+				return r;
 		}
 		}
 	}
 	}
 
 
 	return 0;
 	return 0;
 }
 }
 
 
+static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
+{
+	int i;
+
+	for (i = 0; i < p->num_post_dep_syncobjs; ++i) {
+		drm_syncobj_replace_fence(p->filp, p->post_dep_syncobjs[i],
+					  p->fence);
+	}
+}
+
 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 			    union drm_amdgpu_cs *cs)
 			    union drm_amdgpu_cs *cs)
 {
 {
@@ -1071,6 +1095,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	job->owner = p->filp;
 	job->owner = p->filp;
 	job->fence_ctx = entity->fence_context;
 	job->fence_ctx = entity->fence_context;
 	p->fence = dma_fence_get(&job->base.s_fence->finished);
 	p->fence = dma_fence_get(&job->base.s_fence->finished);
+
+	amdgpu_cs_post_dependencies(p);
+
 	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
 	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
 	job->uf_sequence = cs->out.handle;
 	job->uf_sequence = cs->out.handle;
 	amdgpu_job_free_resources(job);
 	amdgpu_job_free_resources(job);
@@ -1078,13 +1105,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
 
 	trace_amdgpu_cs_ioctl(job);
 	trace_amdgpu_cs_ioctl(job);
 	amd_sched_entity_push_job(&job->base);
 	amd_sched_entity_push_job(&job->base);
-
 	return 0;
 	return 0;
 }
 }
 
 
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
 {
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
 	union drm_amdgpu_cs *cs = data;
 	union drm_amdgpu_cs *cs = data;
 	struct amdgpu_cs_parser parser = {};
 	struct amdgpu_cs_parser parser = {};
 	bool reserved_buffers = false;
 	bool reserved_buffers = false;
@@ -1092,6 +1119,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 
 
 	if (!adev->accel_working)
 	if (!adev->accel_working)
 		return -EBUSY;
 		return -EBUSY;
+	if (amdgpu_kms_vram_lost(adev, fpriv))
+		return -ENODEV;
 
 
 	parser.adev = adev;
 	parser.adev = adev;
 	parser.filp = filp;
 	parser.filp = filp;
@@ -1153,21 +1182,28 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
 {
 {
 	union drm_amdgpu_wait_cs *wait = data;
 	union drm_amdgpu_wait_cs *wait = data;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
 	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
 	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
 	struct amdgpu_ring *ring = NULL;
 	struct amdgpu_ring *ring = NULL;
 	struct amdgpu_ctx *ctx;
 	struct amdgpu_ctx *ctx;
 	struct dma_fence *fence;
 	struct dma_fence *fence;
 	long r;
 	long r;
 
 
-	r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
-			       wait->in.ring, &ring);
-	if (r)
-		return r;
+	if (amdgpu_kms_vram_lost(adev, fpriv))
+		return -ENODEV;
 
 
 	ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
 	ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
 	if (ctx == NULL)
 	if (ctx == NULL)
 		return -EINVAL;
 		return -EINVAL;
 
 
+	r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
+				 wait->in.ip_type, wait->in.ip_instance,
+				 wait->in.ring, &ring);
+	if (r) {
+		amdgpu_ctx_put(ctx);
+		return r;
+	}
+
 	fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
 	fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
 	if (IS_ERR(fence))
 	if (IS_ERR(fence))
 		r = PTR_ERR(fence);
 		r = PTR_ERR(fence);
@@ -1203,15 +1239,17 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
 	struct dma_fence *fence;
 	struct dma_fence *fence;
 	int r;
 	int r;
 
 
-	r = amdgpu_cs_get_ring(adev, user->ip_type, user->ip_instance,
-			       user->ring, &ring);
-	if (r)
-		return ERR_PTR(r);
-
 	ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
 	ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
 	if (ctx == NULL)
 	if (ctx == NULL)
 		return ERR_PTR(-EINVAL);
 		return ERR_PTR(-EINVAL);
 
 
+	r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type,
+				 user->ip_instance, user->ring, &ring);
+	if (r) {
+		amdgpu_ctx_put(ctx);
+		return ERR_PTR(r);
+	}
+
 	fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no);
 	fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no);
 	amdgpu_ctx_put(ctx);
 	amdgpu_ctx_put(ctx);
 
 
@@ -1332,12 +1370,15 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp)
 				struct drm_file *filp)
 {
 {
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
 	union drm_amdgpu_wait_fences *wait = data;
 	union drm_amdgpu_wait_fences *wait = data;
 	uint32_t fence_count = wait->in.fence_count;
 	uint32_t fence_count = wait->in.fence_count;
 	struct drm_amdgpu_fence *fences_user;
 	struct drm_amdgpu_fence *fences_user;
 	struct drm_amdgpu_fence *fences;
 	struct drm_amdgpu_fence *fences;
 	int r;
 	int r;
 
 
+	if (amdgpu_kms_vram_lost(adev, fpriv))
+		return -ENODEV;
 	/* Get the fences from userspace */
 	/* Get the fences from userspace */
 	fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
 	fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
 			GFP_KERNEL);
 			GFP_KERNEL);

+ 10 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

@@ -52,12 +52,20 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
 		struct amd_sched_rq *rq;
 		struct amd_sched_rq *rq;
 
 
 		rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
 		rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
+
+		if (ring == &adev->gfx.kiq.ring)
+			continue;
+
 		r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
 		r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
 					  rq, amdgpu_sched_jobs);
 					  rq, amdgpu_sched_jobs);
 		if (r)
 		if (r)
 			goto failed;
 			goto failed;
 	}
 	}
 
 
+	r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
+	if (r)
+		goto failed;
+
 	return 0;
 	return 0;
 
 
 failed:
 failed:
@@ -86,6 +94,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 	for (i = 0; i < adev->num_rings; i++)
 	for (i = 0; i < adev->num_rings; i++)
 		amd_sched_entity_fini(&adev->rings[i]->sched,
 		amd_sched_entity_fini(&adev->rings[i]->sched,
 				      &ctx->rings[i].entity);
 				      &ctx->rings[i].entity);
+
+	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
 }
 }
 
 
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,

+ 371 - 67
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

@@ -54,8 +54,14 @@
 #include <linux/pci.h>
 #include <linux/pci.h>
 #include <linux/firmware.h>
 #include <linux/firmware.h>
 
 
+MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
+
+#define AMDGPU_RESUME_MS		2000
+
 static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
 static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
 static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev);
 static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev);
+static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev);
 
 
 static const char *amdgpu_asic_name[] = {
 static const char *amdgpu_asic_name[] = {
 	"TAHITI",
 	"TAHITI",
@@ -77,6 +83,7 @@ static const char *amdgpu_asic_name[] = {
 	"POLARIS11",
 	"POLARIS11",
 	"POLARIS12",
 	"POLARIS12",
 	"VEGA10",
 	"VEGA10",
+	"RAVEN",
 	"LAST",
 	"LAST",
 };
 };
 
 
@@ -478,9 +485,8 @@ void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
 
 
 /*
 /*
  * amdgpu_wb_*()
  * amdgpu_wb_*()
- * Writeback is the the method by which the the GPU updates special pages
- * in memory with the status of certain GPU events (fences, ring pointers,
- * etc.).
+ * Writeback is the method by which the GPU updates special pages in memory
+ * with the status of certain GPU events (fences, ring pointers,etc.).
  */
  */
 
 
 /**
 /**
@@ -506,7 +512,7 @@ static void amdgpu_wb_fini(struct amdgpu_device *adev)
  *
  *
  * @adev: amdgpu_device pointer
  * @adev: amdgpu_device pointer
  *
  *
- * Disables Writeback and frees the Writeback memory (all asics).
+ * Initializes writeback and allocates writeback memory (all asics).
  * Used at driver startup.
  * Used at driver startup.
  * Returns 0 on success or an -error on failure.
  * Returns 0 on success or an -error on failure.
  */
  */
@@ -614,7 +620,7 @@ void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb)
  * @mc: memory controller structure holding memory informations
  * @mc: memory controller structure holding memory informations
  * @base: base address at which to put VRAM
  * @base: base address at which to put VRAM
  *
  *
- * Function will place try to place VRAM at base address provided
+ * Function will try to place VRAM at base address provided
  * as parameter (which is so far either PCI aperture address or
  * as parameter (which is so far either PCI aperture address or
  * for IGP TOM base address).
  * for IGP TOM base address).
  *
  *
@@ -636,7 +642,7 @@ void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb)
  * ones)
  * ones)
  *
  *
  * Note: IGP TOM addr should be the same as the aperture addr, we don't
  * Note: IGP TOM addr should be the same as the aperture addr, we don't
- * explicitly check for that thought.
+ * explicitly check for that though.
  *
  *
  * FIXME: when reducing VRAM size align new size on power of 2.
  * FIXME: when reducing VRAM size align new size on power of 2.
  */
  */
@@ -1067,6 +1073,10 @@ def_value:
 
 
 static void amdgpu_check_vm_size(struct amdgpu_device *adev)
 static void amdgpu_check_vm_size(struct amdgpu_device *adev)
 {
 {
+	/* no need to check the default value */
+	if (amdgpu_vm_size == -1)
+		return;
+
 	if (!amdgpu_check_pot_argument(amdgpu_vm_size)) {
 	if (!amdgpu_check_pot_argument(amdgpu_vm_size)) {
 		dev_warn(adev->dev, "VM size (%d) must be a power of 2\n",
 		dev_warn(adev->dev, "VM size (%d) must be a power of 2\n",
 			 amdgpu_vm_size);
 			 amdgpu_vm_size);
@@ -1342,6 +1352,9 @@ int amdgpu_ip_block_add(struct amdgpu_device *adev,
 	if (!ip_block_version)
 	if (!ip_block_version)
 		return -EINVAL;
 		return -EINVAL;
 
 
+	DRM_DEBUG("add ip block number %d <%s>\n", adev->num_ip_blocks,
+		  ip_block_version->funcs->name);
+
 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
 
 
 	return 0;
 	return 0;
@@ -1392,6 +1405,104 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
 	}
 	}
 }
 }
 
 
+static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
+{
+	const char *chip_name;
+	char fw_name[30];
+	int err;
+	const struct gpu_info_firmware_header_v1_0 *hdr;
+
+	adev->firmware.gpu_info_fw = NULL;
+
+	switch (adev->asic_type) {
+	case CHIP_TOPAZ:
+	case CHIP_TONGA:
+	case CHIP_FIJI:
+	case CHIP_POLARIS11:
+	case CHIP_POLARIS10:
+	case CHIP_POLARIS12:
+	case CHIP_CARRIZO:
+	case CHIP_STONEY:
+#ifdef CONFIG_DRM_AMDGPU_SI
+	case CHIP_VERDE:
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+	case CHIP_OLAND:
+	case CHIP_HAINAN:
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+	case CHIP_KAVERI:
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+#endif
+	default:
+		return 0;
+	case CHIP_VEGA10:
+		chip_name = "vega10";
+		break;
+	case CHIP_RAVEN:
+		chip_name = "raven";
+		break;
+	}
+
+	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
+	err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
+	if (err) {
+		dev_err(adev->dev,
+			"Failed to load gpu_info firmware \"%s\"\n",
+			fw_name);
+		goto out;
+	}
+	err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
+	if (err) {
+		dev_err(adev->dev,
+			"Failed to validate gpu_info firmware \"%s\"\n",
+			fw_name);
+		goto out;
+	}
+
+	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
+	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
+
+	switch (hdr->version_major) {
+	case 1:
+	{
+		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
+			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
+								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
+		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
+		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
+		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
+		adev->gfx.config.max_texture_channel_caches =
+			le32_to_cpu(gpu_info_fw->gc_num_tccs);
+		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
+		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
+		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
+		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
+		adev->gfx.config.double_offchip_lds_buf =
+			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
+		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
+		adev->gfx.cu_info.max_waves_per_simd =
+			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
+		adev->gfx.cu_info.max_scratch_slots_per_cu =
+			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
+		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
+		break;
+	}
+	default:
+		dev_err(adev->dev,
+			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
+		err = -EINVAL;
+		goto out;
+	}
+out:
+	return err;
+}
+
 static int amdgpu_early_init(struct amdgpu_device *adev)
 static int amdgpu_early_init(struct amdgpu_device *adev)
 {
 {
 	int i, r;
 	int i, r;
@@ -1444,8 +1555,12 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
 			return r;
 			return r;
 		break;
 		break;
 #endif
 #endif
-	case CHIP_VEGA10:
-		adev->family = AMDGPU_FAMILY_AI;
+	case  CHIP_VEGA10:
+	case  CHIP_RAVEN:
+		if (adev->asic_type == CHIP_RAVEN)
+			adev->family = AMDGPU_FAMILY_RV;
+		else
+			adev->family = AMDGPU_FAMILY_AI;
 
 
 		r = soc15_set_ip_blocks(adev);
 		r = soc15_set_ip_blocks(adev);
 		if (r)
 		if (r)
@@ -1456,6 +1571,10 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
+	r = amdgpu_device_parse_gpu_info_fw(adev);
+	if (r)
+		return r;
+
 	if (amdgpu_sriov_vf(adev)) {
 	if (amdgpu_sriov_vf(adev)) {
 		r = amdgpu_virt_request_full_gpu(adev, true);
 		r = amdgpu_virt_request_full_gpu(adev, true);
 		if (r)
 		if (r)
@@ -1464,7 +1583,8 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
 
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
-			DRM_ERROR("disabled ip block: %d\n", i);
+			DRM_ERROR("disabled ip block: %d <%s>\n",
+				  i, adev->ip_blocks[i].version->funcs->name);
 			adev->ip_blocks[i].status.valid = false;
 			adev->ip_blocks[i].status.valid = false;
 		} else {
 		} else {
 			if (adev->ip_blocks[i].version->funcs->early_init) {
 			if (adev->ip_blocks[i].version->funcs->early_init) {
@@ -1552,22 +1672,24 @@ static int amdgpu_init(struct amdgpu_device *adev)
 	return 0;
 	return 0;
 }
 }
 
 
-static int amdgpu_late_init(struct amdgpu_device *adev)
+static void amdgpu_fill_reset_magic(struct amdgpu_device *adev)
+{
+	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
+}
+
+static bool amdgpu_check_vram_lost(struct amdgpu_device *adev)
+{
+	return !!memcmp(adev->gart.ptr, adev->reset_magic,
+			AMDGPU_RESET_MAGIC_NUM);
+}
+
+static int amdgpu_late_set_cg_state(struct amdgpu_device *adev)
 {
 {
 	int i = 0, r;
 	int i = 0, r;
 
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.valid)
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
 			continue;
-		if (adev->ip_blocks[i].version->funcs->late_init) {
-			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
-			if (r) {
-				DRM_ERROR("late_init of IP block <%s> failed %d\n",
-					  adev->ip_blocks[i].version->funcs->name, r);
-				return r;
-			}
-			adev->ip_blocks[i].status.late_initialized = true;
-		}
 		/* skip CG for VCE/UVD, it's handled specially */
 		/* skip CG for VCE/UVD, it's handled specially */
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) {
 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) {
@@ -1581,6 +1703,31 @@ static int amdgpu_late_init(struct amdgpu_device *adev)
 			}
 			}
 		}
 		}
 	}
 	}
+	return 0;
+}
+
+static int amdgpu_late_init(struct amdgpu_device *adev)
+{
+	int i = 0, r;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.valid)
+			continue;
+		if (adev->ip_blocks[i].version->funcs->late_init) {
+			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
+			if (r) {
+				DRM_ERROR("late_init of IP block <%s> failed %d\n",
+					  adev->ip_blocks[i].version->funcs->name, r);
+				return r;
+			}
+			adev->ip_blocks[i].status.late_initialized = true;
+		}
+	}
+
+	mod_delayed_work(system_wq, &adev->late_init_work,
+			msecs_to_jiffies(AMDGPU_RESUME_MS));
+
+	amdgpu_fill_reset_magic(adev);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -1672,6 +1819,13 @@ static int amdgpu_fini(struct amdgpu_device *adev)
 	return 0;
 	return 0;
 }
 }
 
 
+static void amdgpu_late_init_func_handler(struct work_struct *work)
+{
+	struct amdgpu_device *adev =
+		container_of(work, struct amdgpu_device, late_init_work.work);
+	amdgpu_late_set_cg_state(adev);
+}
+
 int amdgpu_suspend(struct amdgpu_device *adev)
 int amdgpu_suspend(struct amdgpu_device *adev)
 {
 {
 	int i, r;
 	int i, r;
@@ -1717,19 +1871,25 @@ static int amdgpu_sriov_reinit_early(struct amdgpu_device *adev)
 {
 {
 	int i, r;
 	int i, r;
 
 
-	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_blocks[i].status.valid)
-			continue;
+	static enum amd_ip_block_type ip_order[] = {
+		AMD_IP_BLOCK_TYPE_GMC,
+		AMD_IP_BLOCK_TYPE_COMMON,
+		AMD_IP_BLOCK_TYPE_IH,
+	};
 
 
-		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
-				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
-				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)
-			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
+		int j;
+		struct amdgpu_ip_block *block;
 
 
-		if (r) {
-			DRM_ERROR("resume of IP block <%s> failed %d\n",
-				  adev->ip_blocks[i].version->funcs->name, r);
-			return r;
+		for (j = 0; j < adev->num_ip_blocks; j++) {
+			block = &adev->ip_blocks[j];
+
+			if (block->version->type != ip_order[i] ||
+				!block->status.valid)
+				continue;
+
+			r = block->version->funcs->hw_init(adev);
+			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
 		}
 		}
 	}
 	}
 
 
@@ -1740,33 +1900,67 @@ static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev)
 {
 {
 	int i, r;
 	int i, r;
 
 
+	static enum amd_ip_block_type ip_order[] = {
+		AMD_IP_BLOCK_TYPE_SMC,
+		AMD_IP_BLOCK_TYPE_DCE,
+		AMD_IP_BLOCK_TYPE_GFX,
+		AMD_IP_BLOCK_TYPE_SDMA,
+		AMD_IP_BLOCK_TYPE_VCE,
+	};
+
+	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
+		int j;
+		struct amdgpu_ip_block *block;
+
+		for (j = 0; j < adev->num_ip_blocks; j++) {
+			block = &adev->ip_blocks[j];
+
+			if (block->version->type != ip_order[i] ||
+				!block->status.valid)
+				continue;
+
+			r = block->version->funcs->hw_init(adev);
+			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
+		}
+	}
+
+	return 0;
+}
+
+static int amdgpu_resume_phase1(struct amdgpu_device *adev)
+{
+	int i, r;
+
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.valid)
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
 			continue;
-
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
 				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
 				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
-				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH )
-			continue;
-
-		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
-		if (r) {
-			DRM_ERROR("resume of IP block <%s> failed %d\n",
-				  adev->ip_blocks[i].version->funcs->name, r);
-			return r;
+				adev->ip_blocks[i].version->type ==
+				AMD_IP_BLOCK_TYPE_IH) {
+			r = adev->ip_blocks[i].version->funcs->resume(adev);
+			if (r) {
+				DRM_ERROR("resume of IP block <%s> failed %d\n",
+					  adev->ip_blocks[i].version->funcs->name, r);
+				return r;
+			}
 		}
 		}
 	}
 	}
 
 
 	return 0;
 	return 0;
 }
 }
 
 
-static int amdgpu_resume(struct amdgpu_device *adev)
+static int amdgpu_resume_phase2(struct amdgpu_device *adev)
 {
 {
 	int i, r;
 	int i, r;
 
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.valid)
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
 			continue;
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
+				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH )
+			continue;
 		r = adev->ip_blocks[i].version->funcs->resume(adev);
 		r = adev->ip_blocks[i].version->funcs->resume(adev);
 		if (r) {
 		if (r) {
 			DRM_ERROR("resume of IP block <%s> failed %d\n",
 			DRM_ERROR("resume of IP block <%s> failed %d\n",
@@ -1778,6 +1972,18 @@ static int amdgpu_resume(struct amdgpu_device *adev)
 	return 0;
 	return 0;
 }
 }
 
 
+static int amdgpu_resume(struct amdgpu_device *adev)
+{
+	int r;
+
+	r = amdgpu_resume_phase1(adev);
+	if (r)
+		return r;
+	r = amdgpu_resume_phase2(adev);
+
+	return r;
+}
+
 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
 {
 {
 	if (adev->is_atom_fw) {
 	if (adev->is_atom_fw) {
@@ -1860,8 +2066,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
 
 	amdgpu_check_arguments(adev);
 	amdgpu_check_arguments(adev);
 
 
-	/* Registers mapping */
-	/* TODO: block userspace mapping of io register */
 	spin_lock_init(&adev->mmio_idx_lock);
 	spin_lock_init(&adev->mmio_idx_lock);
 	spin_lock_init(&adev->smc_idx_lock);
 	spin_lock_init(&adev->smc_idx_lock);
 	spin_lock_init(&adev->pcie_idx_lock);
 	spin_lock_init(&adev->pcie_idx_lock);
@@ -1877,6 +2081,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	INIT_LIST_HEAD(&adev->gtt_list);
 	INIT_LIST_HEAD(&adev->gtt_list);
 	spin_lock_init(&adev->gtt_list_lock);
 	spin_lock_init(&adev->gtt_list_lock);
 
 
+	INIT_LIST_HEAD(&adev->ring_lru_list);
+	spin_lock_init(&adev->ring_lru_list_lock);
+
+	INIT_DELAYED_WORK(&adev->late_init_work, amdgpu_late_init_func_handler);
+
+	/* Registers mapping */
+	/* TODO: block userspace mapping of io register */
 	if (adev->asic_type >= CHIP_BONAIRE) {
 	if (adev->asic_type >= CHIP_BONAIRE) {
 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
@@ -1989,6 +2200,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
 
 	adev->accel_working = true;
 	adev->accel_working = true;
 
 
+	amdgpu_vm_check_compute_bug(adev);
+
 	/* Initialize the buffer migration limit. */
 	/* Initialize the buffer migration limit. */
 	if (amdgpu_moverate >= 0)
 	if (amdgpu_moverate >= 0)
 		max_MBps = amdgpu_moverate;
 		max_MBps = amdgpu_moverate;
@@ -2017,6 +2230,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	if (r)
 	if (r)
 		DRM_ERROR("registering register debugfs failed (%d).\n", r);
 		DRM_ERROR("registering register debugfs failed (%d).\n", r);
 
 
+	r = amdgpu_debugfs_test_ib_ring_init(adev);
+	if (r)
+		DRM_ERROR("registering register test ib ring debugfs failed (%d).\n", r);
+
 	r = amdgpu_debugfs_firmware_init(adev);
 	r = amdgpu_debugfs_firmware_init(adev);
 	if (r)
 	if (r)
 		DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
 		DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
@@ -2073,7 +2290,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	amdgpu_fence_driver_fini(adev);
 	amdgpu_fence_driver_fini(adev);
 	amdgpu_fbdev_fini(adev);
 	amdgpu_fbdev_fini(adev);
 	r = amdgpu_fini(adev);
 	r = amdgpu_fini(adev);
+	if (adev->firmware.gpu_info_fw) {
+		release_firmware(adev->firmware.gpu_info_fw);
+		adev->firmware.gpu_info_fw = NULL;
+	}
 	adev->accel_working = false;
 	adev->accel_working = false;
+	cancel_delayed_work_sync(&adev->late_init_work);
 	/* free i2c buses */
 	/* free i2c buses */
 	amdgpu_i2c_fini(adev);
 	amdgpu_i2c_fini(adev);
 	amdgpu_atombios_fini(adev);
 	amdgpu_atombios_fini(adev);
@@ -2458,16 +2680,15 @@ err:
  * amdgpu_sriov_gpu_reset - reset the asic
  * amdgpu_sriov_gpu_reset - reset the asic
  *
  *
  * @adev: amdgpu device pointer
  * @adev: amdgpu device pointer
- * @voluntary: if this reset is requested by guest.
- *             (true means by guest and false means by HYPERVISOR )
+ * @job: which job trigger hang
  *
  *
  * Attempt the reset the GPU if it has hung (all asics).
  * Attempt the reset the GPU if it has hung (all asics).
  * for SRIOV case.
  * for SRIOV case.
  * Returns 0 for success or an error on failure.
  * Returns 0 for success or an error on failure.
  */
  */
-int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary)
+int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job)
 {
 {
-	int i, r = 0;
+	int i, j, r = 0;
 	int resched;
 	int resched;
 	struct amdgpu_bo *bo, *tmp;
 	struct amdgpu_bo *bo, *tmp;
 	struct amdgpu_ring *ring;
 	struct amdgpu_ring *ring;
@@ -2480,22 +2701,39 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary)
 	/* block TTM */
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
 	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
 
 
-	/* block scheduler */
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		ring = adev->rings[i];
+	/* we start from the ring trigger GPU hang */
+	j = job ? job->ring->idx : 0;
 
 
+	/* block scheduler */
+	for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) {
+		ring = adev->rings[i % AMDGPU_MAX_RINGS];
 		if (!ring || !ring->sched.thread)
 		if (!ring || !ring->sched.thread)
 			continue;
 			continue;
 
 
 		kthread_park(ring->sched.thread);
 		kthread_park(ring->sched.thread);
+
+		if (job && j != i)
+			continue;
+
+		/* here give the last chance to check if job removed from mirror-list
+		 * since we already pay some time on kthread_park */
+		if (job && list_empty(&job->base.node)) {
+			kthread_unpark(ring->sched.thread);
+			goto give_up_reset;
+		}
+
+		if (amd_sched_invalidate_job(&job->base, amdgpu_job_hang_limit))
+			amd_sched_job_kickout(&job->base);
+
+		/* only do job_reset on the hang ring if @job not NULL */
 		amd_sched_hw_job_reset(&ring->sched);
 		amd_sched_hw_job_reset(&ring->sched);
-	}
 
 
-	/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
-	amdgpu_fence_driver_force_completion(adev);
+		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
+		amdgpu_fence_driver_force_completion_ring(ring);
+	}
 
 
 	/* request to take full control of GPU before re-initialization  */
 	/* request to take full control of GPU before re-initialization  */
-	if (voluntary)
+	if (job)
 		amdgpu_virt_reset_gpu(adev);
 		amdgpu_virt_reset_gpu(adev);
 	else
 	else
 		amdgpu_virt_request_full_gpu(adev, true);
 		amdgpu_virt_request_full_gpu(adev, true);
@@ -2545,20 +2783,28 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary)
 	}
 	}
 	dma_fence_put(fence);
 	dma_fence_put(fence);
 
 
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		struct amdgpu_ring *ring = adev->rings[i];
+	for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) {
+		ring = adev->rings[i % AMDGPU_MAX_RINGS];
 		if (!ring || !ring->sched.thread)
 		if (!ring || !ring->sched.thread)
 			continue;
 			continue;
 
 
+		if (job && j != i) {
+			kthread_unpark(ring->sched.thread);
+			continue;
+		}
+
 		amd_sched_job_recovery(&ring->sched);
 		amd_sched_job_recovery(&ring->sched);
 		kthread_unpark(ring->sched.thread);
 		kthread_unpark(ring->sched.thread);
 	}
 	}
 
 
 	drm_helper_resume_force_mode(adev->ddev);
 	drm_helper_resume_force_mode(adev->ddev);
+give_up_reset:
 	ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
 	ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
 	if (r) {
 	if (r) {
 		/* bad news, how to tell it to userspace ? */
 		/* bad news, how to tell it to userspace ? */
 		dev_info(adev->dev, "GPU reset failed\n");
 		dev_info(adev->dev, "GPU reset failed\n");
+	} else {
+		dev_info(adev->dev, "GPU reset successed!\n");
 	}
 	}
 
 
 	adev->gfx.in_reset = false;
 	adev->gfx.in_reset = false;
@@ -2578,10 +2824,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
 {
 {
 	int i, r;
 	int i, r;
 	int resched;
 	int resched;
-	bool need_full_reset;
-
-	if (amdgpu_sriov_vf(adev))
-		return amdgpu_sriov_gpu_reset(adev, true);
+	bool need_full_reset, vram_lost = false;
 
 
 	if (!amdgpu_check_soft_reset(adev)) {
 	if (!amdgpu_check_soft_reset(adev)) {
 		DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
 		DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
@@ -2641,16 +2884,27 @@ retry:
 
 
 		if (!r) {
 		if (!r) {
 			dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
 			dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
-			r = amdgpu_resume(adev);
+			r = amdgpu_resume_phase1(adev);
+			if (r)
+				goto out;
+			vram_lost = amdgpu_check_vram_lost(adev);
+			if (vram_lost) {
+				DRM_ERROR("VRAM is lost!\n");
+				atomic_inc(&adev->vram_lost_counter);
+			}
+			r = amdgpu_ttm_recover_gart(adev);
+			if (r)
+				goto out;
+			r = amdgpu_resume_phase2(adev);
+			if (r)
+				goto out;
+			if (vram_lost)
+				amdgpu_fill_reset_magic(adev);
 		}
 		}
 	}
 	}
+out:
 	if (!r) {
 	if (!r) {
 		amdgpu_irq_gpu_reset_resume_helper(adev);
 		amdgpu_irq_gpu_reset_resume_helper(adev);
-		if (need_full_reset && amdgpu_need_backup(adev)) {
-			r = amdgpu_ttm_recover_gart(adev);
-			if (r)
-				DRM_ERROR("gart recovery failed!!!\n");
-		}
 		r = amdgpu_ib_ring_tests(adev);
 		r = amdgpu_ib_ring_tests(adev);
 		if (r) {
 		if (r) {
 			dev_err(adev->dev, "ib ring test failed (%d).\n", r);
 			dev_err(adev->dev, "ib ring test failed (%d).\n", r);
@@ -2712,10 +2966,11 @@ retry:
 	drm_helper_resume_force_mode(adev->ddev);
 	drm_helper_resume_force_mode(adev->ddev);
 
 
 	ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
 	ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
-	if (r) {
+	if (r)
 		/* bad news, how to tell it to userspace ? */
 		/* bad news, how to tell it to userspace ? */
 		dev_info(adev->dev, "GPU reset failed\n");
 		dev_info(adev->dev, "GPU reset failed\n");
-	}
+	else
+		dev_info(adev->dev, "GPU reset successed!\n");
 
 
 	return r;
 	return r;
 }
 }
@@ -3499,11 +3754,60 @@ static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev)
 	}
 	}
 }
 }
 
 
+static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+	int r = 0, i;
+
+	/* hold on the scheduler */
+	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		if (!ring || !ring->sched.thread)
+			continue;
+		kthread_park(ring->sched.thread);
+	}
+
+	seq_printf(m, "run ib test:\n");
+	r = amdgpu_ib_ring_tests(adev);
+	if (r)
+		seq_printf(m, "ib ring tests failed (%d).\n", r);
+	else
+		seq_printf(m, "ib ring tests passed.\n");
+
+	/* go on the scheduler */
+	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		if (!ring || !ring->sched.thread)
+			continue;
+		kthread_unpark(ring->sched.thread);
+	}
+
+	return 0;
+}
+
+static const struct drm_info_list amdgpu_debugfs_test_ib_ring_list[] = {
+	{"amdgpu_test_ib", &amdgpu_debugfs_test_ib}
+};
+
+static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev)
+{
+	return amdgpu_debugfs_add_files(adev,
+					amdgpu_debugfs_test_ib_ring_list, 1);
+}
+
 int amdgpu_debugfs_init(struct drm_minor *minor)
 int amdgpu_debugfs_init(struct drm_minor *minor)
 {
 {
 	return 0;
 	return 0;
 }
 }
 #else
 #else
+static int amdgpu_debugfs_test_ib_init(struct amdgpu_device *adev)
+{
+	return 0;
+}
 static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
 static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
 {
 {
 	return 0;
 	return 0;

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c

@@ -22,7 +22,7 @@
  * Authors: Alex Deucher
  * Authors: Alex Deucher
  */
  */
 
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_atombios.h"
 #include "amdgpu_atombios.h"
 #include "amdgpu_i2c.h"
 #include "amdgpu_i2c.h"

+ 73 - 10
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

@@ -39,7 +39,7 @@
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_runtime.h>
 #include <linux/vga_switcheroo.h>
 #include <linux/vga_switcheroo.h>
-#include "drm_crtc_helper.h"
+#include <drm/drm_crtc_helper.h>
 
 
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_irq.h"
 #include "amdgpu_irq.h"
@@ -65,9 +65,11 @@
  * - 3.13.0 - Add PRT support
  * - 3.13.0 - Add PRT support
  * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality
  * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality
  * - 3.15.0 - Export more gpu info for gfx9
  * - 3.15.0 - Export more gpu info for gfx9
+ * - 3.16.0 - Add reserved vmid support
+ * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS.
  */
  */
 #define KMS_DRIVER_MAJOR	3
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	15
+#define KMS_DRIVER_MINOR	17
 #define KMS_DRIVER_PATCHLEVEL	0
 #define KMS_DRIVER_PATCHLEVEL	0
 
 
 int amdgpu_vram_limit = 0;
 int amdgpu_vram_limit = 0;
@@ -92,7 +94,8 @@ int amdgpu_vm_size = -1;
 int amdgpu_vm_block_size = -1;
 int amdgpu_vm_block_size = -1;
 int amdgpu_vm_fault_stop = 0;
 int amdgpu_vm_fault_stop = 0;
 int amdgpu_vm_debug = 0;
 int amdgpu_vm_debug = 0;
-int amdgpu_vram_page_split = 1024;
+int amdgpu_vram_page_split = 512;
+int amdgpu_vm_update_mode = -1;
 int amdgpu_exp_hw_support = 0;
 int amdgpu_exp_hw_support = 0;
 int amdgpu_sched_jobs = 32;
 int amdgpu_sched_jobs = 32;
 int amdgpu_sched_hw_submission = 2;
 int amdgpu_sched_hw_submission = 2;
@@ -110,6 +113,8 @@ int amdgpu_prim_buf_per_se = 0;
 int amdgpu_pos_buf_per_se = 0;
 int amdgpu_pos_buf_per_se = 0;
 int amdgpu_cntl_sb_buf_per_se = 0;
 int amdgpu_cntl_sb_buf_per_se = 0;
 int amdgpu_param_buf_per_se = 0;
 int amdgpu_param_buf_per_se = 0;
+int amdgpu_job_hang_limit = 0;
+int amdgpu_lbpw = -1;
 
 
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -177,6 +182,9 @@ module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444);
 MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)");
 MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)");
 module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
 module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
 
 
+MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both");
+module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444);
+
 MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 1024, -1 = disable)");
 MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 1024, -1 = disable)");
 module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444);
 module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444);
 
 
@@ -232,6 +240,24 @@ module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444);
 MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)");
 MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)");
 module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444);
 module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444);
 
 
+MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and not drop it (default 0)");
+module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444);
+
+MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)");
+module_param_named(lbpw, amdgpu_lbpw, int, 0444);
+
+#ifdef CONFIG_DRM_AMDGPU_SI
+int amdgpu_si_support = 0;
+MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))");
+module_param_named(si_support, amdgpu_si_support, int, 0444);
+#endif
+
+#ifdef CONFIG_DRM_AMDGPU_CIK
+int amdgpu_cik_support = 0;
+MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))");
+module_param_named(cik_support, amdgpu_cik_support, int, 0444);
+#endif
+
 
 
 static const struct pci_device_id pciidlist[] = {
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
 #ifdef  CONFIG_DRM_AMDGPU_SI
@@ -460,6 +486,9 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
 	{0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
 	{0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
 	{0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
 	{0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
 	{0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
+	/* Raven */
+	{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU|AMD_EXP_HW_SUPPORT},
+
 	{0, 0, 0}
 	{0, 0, 0}
 };
 };
 
 
@@ -491,6 +520,7 @@ static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev)
 static int amdgpu_pci_probe(struct pci_dev *pdev,
 static int amdgpu_pci_probe(struct pci_dev *pdev,
 			    const struct pci_device_id *ent)
 			    const struct pci_device_id *ent)
 {
 {
+	struct drm_device *dev;
 	unsigned long flags = ent->driver_data;
 	unsigned long flags = ent->driver_data;
 	int ret;
 	int ret;
 
 
@@ -513,7 +543,29 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 
-	return drm_get_pci_dev(pdev, ent, &kms_driver);
+	dev = drm_dev_alloc(&kms_driver, &pdev->dev);
+	if (IS_ERR(dev))
+		return PTR_ERR(dev);
+
+	ret = pci_enable_device(pdev);
+	if (ret)
+		goto err_free;
+
+	dev->pdev = pdev;
+
+	pci_set_drvdata(pdev, dev);
+
+	ret = drm_dev_register(dev, ent->driver_data);
+	if (ret)
+		goto err_pci;
+
+	return 0;
+
+err_pci:
+	pci_disable_device(pdev);
+err_free:
+	drm_dev_unref(dev);
+	return ret;
 }
 }
 
 
 static void
 static void
@@ -521,7 +573,8 @@ amdgpu_pci_remove(struct pci_dev *pdev)
 {
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
 	struct drm_device *dev = pci_get_drvdata(pdev);
 
 
-	drm_put_dev(dev);
+	drm_dev_unregister(dev);
+	drm_dev_unref(dev);
 }
 }
 
 
 static void
 static void
@@ -715,11 +768,21 @@ static const struct file_operations amdgpu_driver_kms_fops = {
 #endif
 #endif
 };
 };
 
 
+static bool
+amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
+				 bool in_vblank_irq, int *vpos, int *hpos,
+				 ktime_t *stime, ktime_t *etime,
+				 const struct drm_display_mode *mode)
+{
+	return amdgpu_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos,
+					  stime, etime, mode);
+}
+
 static struct drm_driver kms_driver = {
 static struct drm_driver kms_driver = {
 	.driver_features =
 	.driver_features =
 	    DRIVER_USE_AGP |
 	    DRIVER_USE_AGP |
 	    DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
 	    DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
-	    DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET,
+	    DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
 	.load = amdgpu_driver_load_kms,
 	.load = amdgpu_driver_load_kms,
 	.open = amdgpu_driver_open_kms,
 	.open = amdgpu_driver_open_kms,
 	.postclose = amdgpu_driver_postclose_kms,
 	.postclose = amdgpu_driver_postclose_kms,
@@ -729,8 +792,8 @@ static struct drm_driver kms_driver = {
 	.get_vblank_counter = amdgpu_get_vblank_counter_kms,
 	.get_vblank_counter = amdgpu_get_vblank_counter_kms,
 	.enable_vblank = amdgpu_enable_vblank_kms,
 	.enable_vblank = amdgpu_enable_vblank_kms,
 	.disable_vblank = amdgpu_disable_vblank_kms,
 	.disable_vblank = amdgpu_disable_vblank_kms,
-	.get_vblank_timestamp = amdgpu_get_vblank_timestamp_kms,
-	.get_scanout_position = amdgpu_get_crtc_scanoutpos,
+	.get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos,
+	.get_scanout_position = amdgpu_get_crtc_scanout_position,
 #if defined(CONFIG_DEBUG_FS)
 #if defined(CONFIG_DEBUG_FS)
 	.debugfs_init = amdgpu_debugfs_init,
 	.debugfs_init = amdgpu_debugfs_init,
 #endif
 #endif
@@ -807,7 +870,7 @@ static int __init amdgpu_init(void)
 	driver->num_ioctls = amdgpu_max_kms_ioctl;
 	driver->num_ioctls = amdgpu_max_kms_ioctl;
 	amdgpu_register_atpx_handler();
 	amdgpu_register_atpx_handler();
 	/* let modprobe override vga console setting */
 	/* let modprobe override vga console setting */
-	return drm_pci_init(driver, pdriver);
+	return pci_register_driver(pdriver);
 
 
 error_sched:
 error_sched:
 	amdgpu_fence_slab_fini();
 	amdgpu_fence_slab_fini();
@@ -822,7 +885,7 @@ error_sync:
 static void __exit amdgpu_exit(void)
 static void __exit amdgpu_exit(void)
 {
 {
 	amdgpu_amdkfd_fini();
 	amdgpu_amdkfd_fini();
-	drm_pci_exit(driver, pdriver);
+	pci_unregister_driver(pdriver);
 	amdgpu_unregister_atpx_handler();
 	amdgpu_unregister_atpx_handler();
 	amdgpu_sync_fini();
 	amdgpu_sync_fini();
 	amd_sched_fence_slab_fini();
 	amd_sched_fence_slab_fini();

+ 12 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c

@@ -541,6 +541,12 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev)
 	}
 	}
 }
 }
 
 
+void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring)
+{
+	if (ring)
+		amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
+}
+
 /*
 /*
  * Common fence implementation
  * Common fence implementation
  */
  */
@@ -660,11 +666,17 @@ static const struct drm_info_list amdgpu_debugfs_fence_list[] = {
 	{"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
 	{"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
 	{"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL}
 	{"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL}
 };
 };
+
+static const struct drm_info_list amdgpu_debugfs_fence_list_sriov[] = {
+	{"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
+};
 #endif
 #endif
 
 
 int amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
 int amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
 {
 {
 #if defined(CONFIG_DEBUG_FS)
 #if defined(CONFIG_DEBUG_FS)
+	if (amdgpu_sriov_vf(adev))
+		return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list_sriov, 1);
 	return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2);
 	return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2);
 #else
 #else
 	return 0;
 	return 0;

+ 4 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c

@@ -224,8 +224,9 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
  *
  *
  * Unbinds the requested pages from the gart page table and
  * Unbinds the requested pages from the gart page table and
  * replaces them with the dummy page (all asics).
  * replaces them with the dummy page (all asics).
+ * Returns 0 for success, -EINVAL for failure.
  */
  */
-void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
+int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
 			int pages)
 			int pages)
 {
 {
 	unsigned t;
 	unsigned t;
@@ -237,7 +238,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
 
 
 	if (!adev->gart.ready) {
 	if (!adev->gart.ready) {
 		WARN(1, "trying to unbind memory from uninitialized GART !\n");
 		WARN(1, "trying to unbind memory from uninitialized GART !\n");
-		return;
+		return -EINVAL;
 	}
 	}
 
 
 	t = offset / AMDGPU_GPU_PAGE_SIZE;
 	t = offset / AMDGPU_GPU_PAGE_SIZE;
@@ -258,6 +259,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
 	}
 	}
 	mb();
 	mb();
 	amdgpu_gart_flush_gpu_tlb(adev, 0);
 	amdgpu_gart_flush_gpu_tlb(adev, 0);
+	return 0;
 }
 }
 
 
 /**
 /**

+ 16 - 38
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

@@ -219,16 +219,6 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 	ttm_eu_backoff_reservation(&ticket, &list);
 	ttm_eu_backoff_reservation(&ticket, &list);
 }
 }
 
 
-static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r)
-{
-	if (r == -EDEADLK) {
-		r = amdgpu_gpu_reset(adev);
-		if (!r)
-			r = -EAGAIN;
-	}
-	return r;
-}
-
 /*
 /*
  * GEM ioctls.
  * GEM ioctls.
  */
  */
@@ -249,20 +239,17 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 				      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
 				      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
 				      AMDGPU_GEM_CREATE_VRAM_CLEARED|
 				      AMDGPU_GEM_CREATE_VRAM_CLEARED|
 				      AMDGPU_GEM_CREATE_SHADOW |
 				      AMDGPU_GEM_CREATE_SHADOW |
-				      AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
-		r = -EINVAL;
-		goto error_unlock;
-	}
+				      AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS))
+		return -EINVAL;
+
 	/* reject invalid gem domains */
 	/* reject invalid gem domains */
 	if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU |
 	if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU |
 				 AMDGPU_GEM_DOMAIN_GTT |
 				 AMDGPU_GEM_DOMAIN_GTT |
 				 AMDGPU_GEM_DOMAIN_VRAM |
 				 AMDGPU_GEM_DOMAIN_VRAM |
 				 AMDGPU_GEM_DOMAIN_GDS |
 				 AMDGPU_GEM_DOMAIN_GDS |
 				 AMDGPU_GEM_DOMAIN_GWS |
 				 AMDGPU_GEM_DOMAIN_GWS |
-				 AMDGPU_GEM_DOMAIN_OA)) {
-		r = -EINVAL;
-		goto error_unlock;
-	}
+				 AMDGPU_GEM_DOMAIN_OA))
+		return -EINVAL;
 
 
 	/* create a gem object to contain this object in */
 	/* create a gem object to contain this object in */
 	if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
 	if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
@@ -274,10 +261,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 			size = size << AMDGPU_GWS_SHIFT;
 			size = size << AMDGPU_GWS_SHIFT;
 		else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA)
 		else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA)
 			size = size << AMDGPU_OA_SHIFT;
 			size = size << AMDGPU_OA_SHIFT;
-		else {
-			r = -EINVAL;
-			goto error_unlock;
-		}
+		else
+			return -EINVAL;
 	}
 	}
 	size = roundup(size, PAGE_SIZE);
 	size = roundup(size, PAGE_SIZE);
 
 
@@ -286,21 +271,17 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 				     args->in.domain_flags,
 				     args->in.domain_flags,
 				     kernel, &gobj);
 				     kernel, &gobj);
 	if (r)
 	if (r)
-		goto error_unlock;
+		return r;
 
 
 	r = drm_gem_handle_create(filp, gobj, &handle);
 	r = drm_gem_handle_create(filp, gobj, &handle);
 	/* drop reference from allocate - handle holds it now */
 	/* drop reference from allocate - handle holds it now */
 	drm_gem_object_unreference_unlocked(gobj);
 	drm_gem_object_unreference_unlocked(gobj);
 	if (r)
 	if (r)
-		goto error_unlock;
+		return r;
 
 
 	memset(args, 0, sizeof(*args));
 	memset(args, 0, sizeof(*args));
 	args->out.handle = handle;
 	args->out.handle = handle;
 	return 0;
 	return 0;
-
-error_unlock:
-	r = amdgpu_gem_handle_lockup(adev, r);
-	return r;
 }
 }
 
 
 int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
@@ -334,7 +315,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 				     AMDGPU_GEM_DOMAIN_CPU, 0,
 				     AMDGPU_GEM_DOMAIN_CPU, 0,
 				     0, &gobj);
 				     0, &gobj);
 	if (r)
 	if (r)
-		goto handle_lockup;
+		return r;
 
 
 	bo = gem_to_amdgpu_bo(gobj);
 	bo = gem_to_amdgpu_bo(gobj);
 	bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
 	bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
@@ -374,7 +355,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 	/* drop reference from allocate - handle holds it now */
 	/* drop reference from allocate - handle holds it now */
 	drm_gem_object_unreference_unlocked(gobj);
 	drm_gem_object_unreference_unlocked(gobj);
 	if (r)
 	if (r)
-		goto handle_lockup;
+		return r;
 
 
 	args->handle = handle;
 	args->handle = handle;
 	return 0;
 	return 0;
@@ -388,9 +369,6 @@ unlock_mmap_sem:
 release_object:
 release_object:
 	drm_gem_object_unreference_unlocked(gobj);
 	drm_gem_object_unreference_unlocked(gobj);
 
 
-handle_lockup:
-	r = amdgpu_gem_handle_lockup(adev, r);
-
 	return r;
 	return r;
 }
 }
 
 
@@ -456,7 +434,6 @@ unsigned long amdgpu_gem_timeout(uint64_t timeout_ns)
 int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *filp)
 			      struct drm_file *filp)
 {
 {
-	struct amdgpu_device *adev = dev->dev_private;
 	union drm_amdgpu_gem_wait_idle *args = data;
 	union drm_amdgpu_gem_wait_idle *args = data;
 	struct drm_gem_object *gobj;
 	struct drm_gem_object *gobj;
 	struct amdgpu_bo *robj;
 	struct amdgpu_bo *robj;
@@ -484,7 +461,6 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 		r = ret;
 		r = ret;
 
 
 	drm_gem_object_unreference_unlocked(gobj);
 	drm_gem_object_unreference_unlocked(gobj);
-	r = amdgpu_gem_handle_lockup(adev, r);
 	return r;
 	return r;
 }
 }
 
 
@@ -593,9 +569,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	uint64_t va_flags;
 	uint64_t va_flags;
 	int r = 0;
 	int r = 0;
 
 
-	if (!adev->vm_manager.enabled)
-		return -ENOTTY;
-
 	if (args->va_address < AMDGPU_VA_RESERVED_SIZE) {
 	if (args->va_address < AMDGPU_VA_RESERVED_SIZE) {
 		dev_err(&dev->pdev->dev,
 		dev_err(&dev->pdev->dev,
 			"va_address 0x%lX is in reserved area 0x%X\n",
 			"va_address 0x%lX is in reserved area 0x%X\n",
@@ -621,6 +594,11 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 			args->operation);
 			args->operation);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
+	if ((args->operation == AMDGPU_VA_OP_MAP) ||
+	    (args->operation == AMDGPU_VA_OP_REPLACE)) {
+		if (amdgpu_kms_vram_lost(adev, fpriv))
+			return -ENODEV;
+	}
 
 
 	INIT_LIST_HEAD(&list);
 	INIT_LIST_HEAD(&list);
 	if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
 	if ((args->operation != AMDGPU_VA_OP_CLEAR) &&

+ 206 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

@@ -108,3 +108,209 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s
 		p = next + 1;
 		p = next + 1;
 	}
 	}
 }
 }
+
+void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
+{
+	int i, queue, pipe, mec;
+
+	/* policy for amdgpu compute queue ownership */
+	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
+		queue = i % adev->gfx.mec.num_queue_per_pipe;
+		pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+			% adev->gfx.mec.num_pipe_per_mec;
+		mec = (i / adev->gfx.mec.num_queue_per_pipe)
+			/ adev->gfx.mec.num_pipe_per_mec;
+
+		/* we've run out of HW */
+		if (mec >= adev->gfx.mec.num_mec)
+			break;
+
+		if (adev->gfx.mec.num_mec > 1) {
+			/* policy: amdgpu owns the first two queues of the first MEC */
+			if (mec == 0 && queue < 2)
+				set_bit(i, adev->gfx.mec.queue_bitmap);
+		} else {
+			/* policy: amdgpu owns all queues in the first pipe */
+			if (mec == 0 && pipe == 0)
+				set_bit(i, adev->gfx.mec.queue_bitmap);
+		}
+	}
+
+	/* update the number of active compute rings */
+	adev->gfx.num_compute_rings =
+		bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+	/* If you hit this case and edited the policy, you probably just
+	 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
+	if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
+		adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+}
+
+static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
+				  struct amdgpu_ring *ring)
+{
+	int queue_bit;
+	int mec, pipe, queue;
+
+	queue_bit = adev->gfx.mec.num_mec
+		    * adev->gfx.mec.num_pipe_per_mec
+		    * adev->gfx.mec.num_queue_per_pipe;
+
+	while (queue_bit-- >= 0) {
+		if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
+			continue;
+
+		amdgpu_gfx_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue);
+
+		/* Using pipes 2/3 from MEC 2 seems cause problems */
+		if (mec == 1 && pipe > 1)
+			continue;
+
+		ring->me = mec + 1;
+		ring->pipe = pipe;
+		ring->queue = queue;
+
+		return 0;
+	}
+
+	dev_err(adev->dev, "Failed to find a queue for KIQ\n");
+	return -EINVAL;
+}
+
+int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
+			     struct amdgpu_ring *ring,
+			     struct amdgpu_irq_src *irq)
+{
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+	int r = 0;
+
+	mutex_init(&kiq->ring_mutex);
+
+	r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
+	if (r)
+		return r;
+
+	ring->adev = NULL;
+	ring->ring_obj = NULL;
+	ring->use_doorbell = true;
+	ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
+
+	r = amdgpu_gfx_kiq_acquire(adev, ring);
+	if (r)
+		return r;
+
+	ring->eop_gpu_addr = kiq->eop_gpu_addr;
+	sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+	r = amdgpu_ring_init(adev, ring, 1024,
+			     irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
+	if (r)
+		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
+
+	return r;
+}
+
+void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
+			      struct amdgpu_irq_src *irq)
+{
+	amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
+	amdgpu_ring_fini(ring);
+}
+
+void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev)
+{
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
+}
+
+int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
+			unsigned hpd_size)
+{
+	int r;
+	u32 *hpd;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+	r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
+				    &kiq->eop_gpu_addr, (void **)&hpd);
+	if (r) {
+		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
+		return r;
+	}
+
+	memset(hpd, 0, hpd_size);
+
+	r = amdgpu_bo_reserve(kiq->eop_obj, true);
+	if (unlikely(r != 0))
+		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
+	amdgpu_bo_kunmap(kiq->eop_obj);
+	amdgpu_bo_unreserve(kiq->eop_obj);
+
+	return 0;
+}
+
+/* create MQD for each compute queue */
+int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
+				   unsigned mqd_size)
+{
+	struct amdgpu_ring *ring = NULL;
+	int r, i;
+
+	/* create MQD for KIQ */
+	ring = &adev->gfx.kiq.ring;
+	if (!ring->mqd_obj) {
+		r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+					    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+					    &ring->mqd_gpu_addr, &ring->mqd_ptr);
+		if (r) {
+			dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
+			return r;
+		}
+
+		/* prepare MQD backup */
+		adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL);
+		if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
+				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
+	}
+
+	/* create MQD for each KCQ */
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		ring = &adev->gfx.compute_ring[i];
+		if (!ring->mqd_obj) {
+			r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+						    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+						    &ring->mqd_gpu_addr, &ring->mqd_ptr);
+			if (r) {
+				dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
+				return r;
+			}
+
+			/* prepare MQD backup */
+			adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
+			if (!adev->gfx.mec.mqd_backup[i])
+				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
+		}
+	}
+
+	return 0;
+}
+
+void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring = NULL;
+	int i;
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		ring = &adev->gfx.compute_ring[i];
+		kfree(adev->gfx.mec.mqd_backup[i]);
+		amdgpu_bo_free_kernel(&ring->mqd_obj,
+				      &ring->mqd_gpu_addr,
+				      &ring->mqd_ptr);
+	}
+
+	ring = &adev->gfx.kiq.ring;
+	kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
+	amdgpu_bo_free_kernel(&ring->mqd_obj,
+			      &ring->mqd_gpu_addr,
+			      &ring->mqd_ptr);
+}

+ 60 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h

@@ -30,4 +30,64 @@ void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);
 void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
 void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
 		unsigned max_sh);
 		unsigned max_sh);
 
 
+void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
+
+int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
+			     struct amdgpu_ring *ring,
+			     struct amdgpu_irq_src *irq);
+
+void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
+			      struct amdgpu_irq_src *irq);
+
+void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev);
+int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
+			unsigned hpd_size);
+
+int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
+				   unsigned mqd_size);
+void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev);
+
+/**
+ * amdgpu_gfx_create_bitmask - create a bitmask
+ *
+ * @bit_width: length of the mask
+ *
+ * create a variable length bit mask.
+ * Returns the bitmask.
+ */
+static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width)
+{
+	return (u32)((1ULL << bit_width) - 1);
+}
+
+static inline int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev,
+					  int mec, int pipe, int queue)
+{
+	int bit = 0;
+
+	bit += mec * adev->gfx.mec.num_pipe_per_mec
+		* adev->gfx.mec.num_queue_per_pipe;
+	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
+	bit += queue;
+
+	return bit;
+}
+
+static inline void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
+					   int *mec, int *pipe, int *queue)
+{
+	*queue = bit % adev->gfx.mec.num_queue_per_pipe;
+	*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
+		% adev->gfx.mec.num_pipe_per_mec;
+	*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
+	       / adev->gfx.mec.num_pipe_per_mec;
+
+}
+static inline bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
+						   int mec, int pipe, int queue)
+{
+	return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue),
+			adev->gfx.mec.queue_bitmap);
+}
+
 #endif
 #endif

+ 13 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

@@ -121,6 +121,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 {
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib *ib = &ibs[0];
 	struct amdgpu_ib *ib = &ibs[0];
+	struct dma_fence *tmp = NULL;
 	bool skip_preamble, need_ctx_switch;
 	bool skip_preamble, need_ctx_switch;
 	unsigned patch_offset = ~0;
 	unsigned patch_offset = ~0;
 	struct amdgpu_vm *vm;
 	struct amdgpu_vm *vm;
@@ -160,8 +161,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
 		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
 		return r;
 		return r;
 	}
 	}
-	if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync)
+
+	if (ring->funcs->emit_pipeline_sync && job &&
+	    ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
+	     amdgpu_vm_need_pipeline_sync(ring, job))) {
 		amdgpu_ring_emit_pipeline_sync(ring);
 		amdgpu_ring_emit_pipeline_sync(ring);
+		dma_fence_put(tmp);
+	}
+
+	if (ring->funcs->insert_start)
+		ring->funcs->insert_start(ring);
 
 
 	if (vm) {
 	if (vm) {
 		r = amdgpu_vm_flush(ring, job);
 		r = amdgpu_vm_flush(ring, job);
@@ -188,8 +197,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 			status |= AMDGPU_HAVE_CTX_SWITCH;
 			status |= AMDGPU_HAVE_CTX_SWITCH;
 		status |= job->preamble_status;
 		status |= job->preamble_status;
 
 
-		if (vm)
-			status |= AMDGPU_VM_DOMAIN;
 		amdgpu_ring_emit_cntxcntl(ring, status);
 		amdgpu_ring_emit_cntxcntl(ring, status);
 	}
 	}
 
 
@@ -208,6 +215,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		need_ctx_switch = false;
 		need_ctx_switch = false;
 	}
 	}
 
 
+	if (ring->funcs->emit_tmz)
+		amdgpu_ring_emit_tmz(ring, false);
+
 	if (ring->funcs->emit_hdp_invalidate
 	if (ring->funcs->emit_hdp_invalidate
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
 	    && !(adev->flags & AMD_IS_APU)
 	    && !(adev->flags & AMD_IS_APU)

+ 2 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h

@@ -62,8 +62,9 @@ enum amdgpu_ih_clientid
     AMDGPU_IH_CLIENTID_MP0	    = 0x1e,
     AMDGPU_IH_CLIENTID_MP0	    = 0x1e,
     AMDGPU_IH_CLIENTID_MP1	    = 0x1f,
     AMDGPU_IH_CLIENTID_MP1	    = 0x1f,
 
 
-    AMDGPU_IH_CLIENTID_MAX
+    AMDGPU_IH_CLIENTID_MAX,
 
 
+    AMDGPU_IH_CLIENTID_VCN	    = AMDGPU_IH_CLIENTID_UVD
 };
 };
 
 
 #define AMDGPU_IH_CLIENTID_LEGACY 0
 #define AMDGPU_IH_CLIENTID_LEGACY 0

+ 2 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c

@@ -83,7 +83,8 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
 	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
 	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
 						  reset_work);
 						  reset_work);
 
 
-	amdgpu_gpu_reset(adev);
+	if (!amdgpu_sriov_vf(adev))
+		amdgpu_gpu_reset(adev);
 }
 }
 
 
 /* Disable *all* interrupts */
 /* Disable *all* interrupts */

+ 31 - 11
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

@@ -36,7 +36,11 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job)
 		  job->base.sched->name,
 		  job->base.sched->name,
 		  atomic_read(&job->ring->fence_drv.last_seq),
 		  atomic_read(&job->ring->fence_drv.last_seq),
 		  job->ring->fence_drv.sync_seq);
 		  job->ring->fence_drv.sync_seq);
-	amdgpu_gpu_reset(job->adev);
+
+	if (amdgpu_sriov_vf(job->adev))
+		amdgpu_sriov_gpu_reset(job->adev, job);
+	else
+		amdgpu_gpu_reset(job->adev);
 }
 }
 
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
@@ -57,9 +61,10 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 	(*job)->vm = vm;
 	(*job)->vm = vm;
 	(*job)->ibs = (void *)&(*job)[1];
 	(*job)->ibs = (void *)&(*job)[1];
 	(*job)->num_ibs = num_ibs;
 	(*job)->num_ibs = num_ibs;
-	(*job)->need_pipeline_sync = false;
 
 
 	amdgpu_sync_create(&(*job)->sync);
 	amdgpu_sync_create(&(*job)->sync);
+	amdgpu_sync_create(&(*job)->dep_sync);
+	amdgpu_sync_create(&(*job)->sched_sync);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -98,6 +103,8 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job)
 
 
 	dma_fence_put(job->fence);
 	dma_fence_put(job->fence);
 	amdgpu_sync_free(&job->sync);
 	amdgpu_sync_free(&job->sync);
+	amdgpu_sync_free(&job->dep_sync);
+	amdgpu_sync_free(&job->sched_sync);
 	kfree(job);
 	kfree(job);
 }
 }
 
 
@@ -107,6 +114,8 @@ void amdgpu_job_free(struct amdgpu_job *job)
 
 
 	dma_fence_put(job->fence);
 	dma_fence_put(job->fence);
 	amdgpu_sync_free(&job->sync);
 	amdgpu_sync_free(&job->sync);
+	amdgpu_sync_free(&job->dep_sync);
+	amdgpu_sync_free(&job->sched_sync);
 	kfree(job);
 	kfree(job);
 }
 }
 
 
@@ -138,11 +147,18 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 	struct amdgpu_job *job = to_amdgpu_job(sched_job);
 	struct amdgpu_job *job = to_amdgpu_job(sched_job);
 	struct amdgpu_vm *vm = job->vm;
 	struct amdgpu_vm *vm = job->vm;
 
 
-	struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync);
+	struct dma_fence *fence = amdgpu_sync_get_fence(&job->dep_sync);
+	int r;
 
 
+	if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) {
+		r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence);
+		if (r)
+			DRM_ERROR("Error adding fence to sync (%d)\n", r);
+	}
+	if (!fence)
+		fence = amdgpu_sync_get_fence(&job->sync);
 	while (fence == NULL && vm && !job->vm_id) {
 	while (fence == NULL && vm && !job->vm_id) {
 		struct amdgpu_ring *ring = job->ring;
 		struct amdgpu_ring *ring = job->ring;
-		int r;
 
 
 		r = amdgpu_vm_grab_id(vm, ring, &job->sync,
 		r = amdgpu_vm_grab_id(vm, ring, &job->sync,
 				      &job->base.s_fence->finished,
 				      &job->base.s_fence->finished,
@@ -153,9 +169,6 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 		fence = amdgpu_sync_get_fence(&job->sync);
 		fence = amdgpu_sync_get_fence(&job->sync);
 	}
 	}
 
 
-	if (amd_sched_dependency_optimized(fence, sched_job->s_entity))
-		job->need_pipeline_sync = true;
-
 	return fence;
 	return fence;
 }
 }
 
 
@@ -163,6 +176,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 {
 {
 	struct dma_fence *fence = NULL;
 	struct dma_fence *fence = NULL;
 	struct amdgpu_job *job;
 	struct amdgpu_job *job;
+	struct amdgpu_fpriv *fpriv = NULL;
 	int r;
 	int r;
 
 
 	if (!sched_job) {
 	if (!sched_job) {
@@ -174,10 +188,16 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 	BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
 	BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
 
 
 	trace_amdgpu_sched_run_job(job);
 	trace_amdgpu_sched_run_job(job);
-	r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence);
-	if (r)
-		DRM_ERROR("Error scheduling IBs (%d)\n", r);
-
+	if (job->vm)
+		fpriv = container_of(job->vm, struct amdgpu_fpriv, vm);
+	/* skip ib schedule when vram is lost */
+	if (fpriv && amdgpu_kms_vram_lost(job->adev, fpriv))
+		DRM_ERROR("Skip scheduling IBs!\n");
+	else {
+		r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence);
+		if (r)
+			DRM_ERROR("Error scheduling IBs (%d)\n", r);
+	}
 	/* if gpu reset, hw fence will be replaced here */
 	/* if gpu reset, hw fence will be replaced here */
 	dma_fence_put(job->fence);
 	dma_fence_put(job->fence);
 	job->fence = dma_fence_get(fence);
 	job->fence = dma_fence_get(fence);

+ 72 - 44
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

@@ -87,6 +87,41 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
 	struct amdgpu_device *adev;
 	struct amdgpu_device *adev;
 	int r, acpi_status;
 	int r, acpi_status;
 
 
+#ifdef CONFIG_DRM_AMDGPU_SI
+	if (!amdgpu_si_support) {
+		switch (flags & AMD_ASIC_MASK) {
+		case CHIP_TAHITI:
+		case CHIP_PITCAIRN:
+		case CHIP_VERDE:
+		case CHIP_OLAND:
+		case CHIP_HAINAN:
+			dev_info(dev->dev,
+				 "SI support provided by radeon.\n");
+			dev_info(dev->dev,
+				 "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n"
+				);
+			return -ENODEV;
+		}
+	}
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+	if (!amdgpu_cik_support) {
+		switch (flags & AMD_ASIC_MASK) {
+		case CHIP_KAVERI:
+		case CHIP_BONAIRE:
+		case CHIP_HAWAII:
+		case CHIP_KABINI:
+		case CHIP_MULLINS:
+			dev_info(dev->dev,
+				 "CIK support provided by radeon.\n");
+			dev_info(dev->dev,
+				 "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n"
+				);
+			return -ENODEV;
+		}
+	}
+#endif
+
 	adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL);
 	adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL);
 	if (adev == NULL) {
 	if (adev == NULL) {
 		return -ENOMEM;
 		return -ENOMEM;
@@ -235,6 +270,7 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
 static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
 {
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
 	struct drm_amdgpu_info *info = data;
 	struct drm_amdgpu_info *info = data;
 	struct amdgpu_mode_info *minfo = &adev->mode_info;
 	struct amdgpu_mode_info *minfo = &adev->mode_info;
 	void __user *out = (void __user *)(uintptr_t)info->return_pointer;
 	void __user *out = (void __user *)(uintptr_t)info->return_pointer;
@@ -247,6 +283,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 
 
 	if (!info->return_size || !info->return_pointer)
 	if (!info->return_size || !info->return_pointer)
 		return -EINVAL;
 		return -EINVAL;
+	if (amdgpu_kms_vram_lost(adev, fpriv))
+		return -ENODEV;
 
 
 	switch (info->query) {
 	switch (info->query) {
 	case AMDGPU_INFO_ACCEL_WORKING:
 	case AMDGPU_INFO_ACCEL_WORKING:
@@ -319,6 +357,19 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
 			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
 			ib_size_alignment = 1;
 			ib_size_alignment = 1;
 			break;
 			break;
+		case AMDGPU_HW_IP_VCN_DEC:
+			type = AMD_IP_BLOCK_TYPE_VCN;
+			ring_mask = adev->vcn.ring_dec.ready ? 1 : 0;
+			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
+			ib_size_alignment = 16;
+			break;
+		case AMDGPU_HW_IP_VCN_ENC:
+			type = AMD_IP_BLOCK_TYPE_VCN;
+			for (i = 0; i < adev->vcn.num_enc_rings; i++)
+				ring_mask |= ((adev->vcn.ring_enc[i].ready ? 1 : 0) << i);
+			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
+			ib_size_alignment = 1;
+			break;
 		default:
 		default:
 			return -EINVAL;
 			return -EINVAL;
 		}
 		}
@@ -361,6 +412,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		case AMDGPU_HW_IP_UVD_ENC:
 		case AMDGPU_HW_IP_UVD_ENC:
 			type = AMD_IP_BLOCK_TYPE_UVD;
 			type = AMD_IP_BLOCK_TYPE_UVD;
 			break;
 			break;
+		case AMDGPU_HW_IP_VCN_DEC:
+		case AMDGPU_HW_IP_VCN_ENC:
+			type = AMD_IP_BLOCK_TYPE_VCN;
+			break;
 		default:
 		default:
 			return -EINVAL;
 			return -EINVAL;
 		}
 		}
@@ -397,6 +452,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 	case AMDGPU_INFO_NUM_EVICTIONS:
 	case AMDGPU_INFO_NUM_EVICTIONS:
 		ui64 = atomic64_read(&adev->num_evictions);
 		ui64 = atomic64_read(&adev->num_evictions);
 		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
 		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
+	case AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS:
+		ui64 = atomic64_read(&adev->num_vram_cpu_page_faults);
+		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
 	case AMDGPU_INFO_VRAM_USAGE:
 	case AMDGPU_INFO_VRAM_USAGE:
 		ui64 = atomic64_read(&adev->vram_usage);
 		ui64 = atomic64_read(&adev->vram_usage);
 		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
 		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
@@ -730,6 +788,12 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev)
 	vga_switcheroo_process_delayed_switch();
 	vga_switcheroo_process_delayed_switch();
 }
 }
 
 
+bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
+			  struct amdgpu_fpriv *fpriv)
+{
+	return fpriv->vram_lost_counter != atomic_read(&adev->vram_lost_counter);
+}
+
 /**
 /**
  * amdgpu_driver_open_kms - drm callback for open
  * amdgpu_driver_open_kms - drm callback for open
  *
  *
@@ -757,7 +821,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 		goto out_suspend;
 		goto out_suspend;
 	}
 	}
 
 
-	r = amdgpu_vm_init(adev, &fpriv->vm);
+	r = amdgpu_vm_init(adev, &fpriv->vm,
+			   AMDGPU_VM_CONTEXT_GFX);
 	if (r) {
 	if (r) {
 		kfree(fpriv);
 		kfree(fpriv);
 		goto out_suspend;
 		goto out_suspend;
@@ -782,6 +847,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 
 
 	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
 	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
 
 
+	fpriv->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
 	file_priv->driver_priv = fpriv;
 	file_priv->driver_priv = fpriv;
 
 
 out_suspend:
 out_suspend:
@@ -814,8 +880,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 
 
 	amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
 	amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
 
 
-	amdgpu_uvd_free_handles(adev, file_priv);
-	amdgpu_vce_free_handles(adev, file_priv);
+	if (adev->asic_type != CHIP_RAVEN) {
+		amdgpu_uvd_free_handles(adev, file_priv);
+		amdgpu_vce_free_handles(adev, file_priv);
+	}
 
 
 	amdgpu_vm_bo_rmv(adev, fpriv->prt_va);
 	amdgpu_vm_bo_rmv(adev, fpriv->prt_va);
 
 
@@ -945,50 +1013,10 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe)
 	amdgpu_irq_put(adev, &adev->crtc_irq, idx);
 	amdgpu_irq_put(adev, &adev->crtc_irq, idx);
 }
 }
 
 
-/**
- * amdgpu_get_vblank_timestamp_kms - get vblank timestamp
- *
- * @dev: drm dev pointer
- * @crtc: crtc to get the timestamp for
- * @max_error: max error
- * @vblank_time: time value
- * @flags: flags passed to the driver
- *
- * Gets the timestamp on the requested crtc based on the
- * scanout position.  (all asics).
- * Returns postive status flags on success, negative error on failure.
- */
-int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe,
-				    int *max_error,
-				    struct timeval *vblank_time,
-				    unsigned flags)
-{
-	struct drm_crtc *crtc;
-	struct amdgpu_device *adev = dev->dev_private;
-
-	if (pipe >= dev->num_crtcs) {
-		DRM_ERROR("Invalid crtc %u\n", pipe);
-		return -EINVAL;
-	}
-
-	/* Get associated drm_crtc: */
-	crtc = &adev->mode_info.crtcs[pipe]->base;
-	if (!crtc) {
-		/* This can occur on driver load if some component fails to
-		 * initialize completely and driver is unloaded */
-		DRM_ERROR("Uninitialized crtc %d\n", pipe);
-		return -EINVAL;
-	}
-
-	/* Helper routine in DRM core does all the work: */
-	return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error,
-						     vblank_time, flags,
-						     &crtc->hwmode);
-}
-
 const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
 const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	/* KMS */
 	/* KMS */
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),

+ 3 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h

@@ -534,6 +534,9 @@ struct amdgpu_framebuffer {
 				((em) == ATOM_ENCODER_MODE_DP_MST))
 				((em) == ATOM_ENCODER_MODE_DP_MST))
 
 
 /* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */
 /* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */
+#define DRM_SCANOUTPOS_VALID        (1 << 0)
+#define DRM_SCANOUTPOS_IN_VBLANK    (1 << 1)
+#define DRM_SCANOUTPOS_ACCURATE     (1 << 2)
 #define USE_REAL_VBLANKSTART		(1 << 30)
 #define USE_REAL_VBLANKSTART		(1 << 30)
 #define GET_DISTANCE_TO_VBLANKSTART	(1 << 31)
 #define GET_DISTANCE_TO_VBLANKSTART	(1 << 31)
 
 

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

@@ -960,6 +960,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 		return -EINVAL;
 		return -EINVAL;
 
 
 	/* hurrah the memory is not visible ! */
 	/* hurrah the memory is not visible ! */
+	atomic64_inc(&adev->num_vram_cpu_page_faults);
 	amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM);
 	amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM);
 	lpfn =	adev->mc.visible_vram_size >> PAGE_SHIFT;
 	lpfn =	adev->mc.visible_vram_size >> PAGE_SHIFT;
 	for (i = 0; i < abo->placement.num_placement; i++) {
 	for (i = 0; i < abo->placement.num_placement; i++) {

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c

@@ -72,6 +72,7 @@ static int amdgpu_pp_early_init(void *handle)
 	case CHIP_CARRIZO:
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
 	case CHIP_STONEY:
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
+	case CHIP_RAVEN:
 		adev->pp_enabled = true;
 		adev->pp_enabled = true;
 		if (amdgpu_create_pp_handle(adev))
 		if (amdgpu_create_pp_handle(adev))
 			return -EINVAL;
 			return -EINVAL;

+ 24 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c

@@ -24,12 +24,13 @@
  */
  */
 
 
 #include <linux/firmware.h>
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_psp.h"
 #include "amdgpu_psp.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_ucode.h"
 #include "soc15_common.h"
 #include "soc15_common.h"
 #include "psp_v3_1.h"
 #include "psp_v3_1.h"
+#include "psp_v10_0.h"
 
 
 static void psp_set_funcs(struct amdgpu_device *adev);
 static void psp_set_funcs(struct amdgpu_device *adev);
 
 
@@ -61,6 +62,12 @@ static int psp_sw_init(void *handle)
 		psp->compare_sram_data = psp_v3_1_compare_sram_data;
 		psp->compare_sram_data = psp_v3_1_compare_sram_data;
 		psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk;
 		psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk;
 		break;
 		break;
+	case CHIP_RAVEN:
+		psp->prep_cmd_buf = psp_v10_0_prep_cmd_buf;
+		psp->ring_init = psp_v10_0_ring_init;
+		psp->cmd_submit = psp_v10_0_cmd_submit;
+		psp->compare_sram_data = psp_v10_0_compare_sram_data;
+		break;
 	default:
 	default:
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
@@ -230,6 +237,13 @@ static int psp_asd_load(struct psp_context *psp)
 	int ret;
 	int ret;
 	struct psp_gfx_cmd_resp *cmd;
 	struct psp_gfx_cmd_resp *cmd;
 
 
+	/* If PSP version doesn't match ASD version, asd loading will be failed.
+	 * add workaround to bypass it for sriov now.
+	 * TODO: add version check to make it common
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
 	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
 	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
 	if (!cmd)
 	if (!cmd)
 		return -ENOMEM;
 		return -ENOMEM;
@@ -542,3 +556,12 @@ const struct amdgpu_ip_block_version psp_v3_1_ip_block =
 	.rev = 0,
 	.rev = 0,
 	.funcs = &psp_ip_funcs,
 	.funcs = &psp_ip_funcs,
 };
 };
+
+const struct amdgpu_ip_block_version psp_v10_0_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_PSP,
+	.major = 10,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &psp_ip_funcs,
+};

+ 2 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h

@@ -138,4 +138,6 @@ extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
 extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
 extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
 			uint32_t field_val, uint32_t mask, bool check_changed);
 			uint32_t field_val, uint32_t mask, bool check_changed);
 
 
+extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
+
 #endif
 #endif

+ 299 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c

@@ -0,0 +1,299 @@
+/*
+ * Copyright 2017 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Andres Rodriguez
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ring.h"
+
+static int amdgpu_queue_mapper_init(struct amdgpu_queue_mapper *mapper,
+				    int hw_ip)
+{
+	if (!mapper)
+		return -EINVAL;
+
+	if (hw_ip > AMDGPU_MAX_IP_NUM)
+		return -EINVAL;
+
+	mapper->hw_ip = hw_ip;
+	mutex_init(&mapper->lock);
+
+	memset(mapper->queue_map, 0, sizeof(mapper->queue_map));
+
+	return 0;
+}
+
+static struct amdgpu_ring *amdgpu_get_cached_map(struct amdgpu_queue_mapper *mapper,
+					  int ring)
+{
+	return mapper->queue_map[ring];
+}
+
+static int amdgpu_update_cached_map(struct amdgpu_queue_mapper *mapper,
+			     int ring, struct amdgpu_ring *pring)
+{
+	if (WARN_ON(mapper->queue_map[ring])) {
+		DRM_ERROR("Un-expected ring re-map\n");
+		return -EINVAL;
+	}
+
+	mapper->queue_map[ring] = pring;
+
+	return 0;
+}
+
+static int amdgpu_identity_map(struct amdgpu_device *adev,
+			       struct amdgpu_queue_mapper *mapper,
+			       int ring,
+			       struct amdgpu_ring **out_ring)
+{
+	switch (mapper->hw_ip) {
+	case AMDGPU_HW_IP_GFX:
+		*out_ring = &adev->gfx.gfx_ring[ring];
+		break;
+	case AMDGPU_HW_IP_COMPUTE:
+		*out_ring = &adev->gfx.compute_ring[ring];
+		break;
+	case AMDGPU_HW_IP_DMA:
+		*out_ring = &adev->sdma.instance[ring].ring;
+		break;
+	case AMDGPU_HW_IP_UVD:
+		*out_ring = &adev->uvd.ring;
+		break;
+	case AMDGPU_HW_IP_VCE:
+		*out_ring = &adev->vce.ring[ring];
+		break;
+	case AMDGPU_HW_IP_UVD_ENC:
+		*out_ring = &adev->uvd.ring_enc[ring];
+		break;
+	case AMDGPU_HW_IP_VCN_DEC:
+		*out_ring = &adev->vcn.ring_dec;
+		break;
+	case AMDGPU_HW_IP_VCN_ENC:
+		*out_ring = &adev->vcn.ring_enc[ring];
+		break;
+	default:
+		*out_ring = NULL;
+		DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
+		return -EINVAL;
+	}
+
+	return amdgpu_update_cached_map(mapper, ring, *out_ring);
+}
+
+static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip)
+{
+	switch (hw_ip) {
+	case AMDGPU_HW_IP_GFX:
+		return AMDGPU_RING_TYPE_GFX;
+	case AMDGPU_HW_IP_COMPUTE:
+		return AMDGPU_RING_TYPE_COMPUTE;
+	case AMDGPU_HW_IP_DMA:
+		return AMDGPU_RING_TYPE_SDMA;
+	case AMDGPU_HW_IP_UVD:
+		return AMDGPU_RING_TYPE_UVD;
+	case AMDGPU_HW_IP_VCE:
+		return AMDGPU_RING_TYPE_VCE;
+	default:
+		DRM_ERROR("Invalid HW IP specified %d\n", hw_ip);
+		return -1;
+	}
+}
+
+static int amdgpu_lru_map(struct amdgpu_device *adev,
+			  struct amdgpu_queue_mapper *mapper,
+			  int user_ring,
+			  struct amdgpu_ring **out_ring)
+{
+	int r, i, j;
+	int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip);
+	int ring_blacklist[AMDGPU_MAX_RINGS];
+	struct amdgpu_ring *ring;
+
+	/* 0 is a valid ring index, so initialize to -1 */
+	memset(ring_blacklist, 0xff, sizeof(ring_blacklist));
+
+	for (i = 0, j = 0; i < AMDGPU_MAX_RINGS; i++) {
+		ring = mapper->queue_map[i];
+		if (ring)
+			ring_blacklist[j++] = ring->idx;
+	}
+
+	r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist,
+				j, out_ring);
+	if (r)
+		return r;
+
+	return amdgpu_update_cached_map(mapper, user_ring, *out_ring);
+}
+
+/**
+ * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct
+ *
+ * @adev: amdgpu_device pointer
+ * @mgr: amdgpu_queue_mgr structure holding queue information
+ *
+ * Initialize the the selected @mgr (all asics).
+ *
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
+			  struct amdgpu_queue_mgr *mgr)
+{
+	int i, r;
+
+	if (!adev || !mgr)
+		return -EINVAL;
+
+	memset(mgr, 0, sizeof(*mgr));
+
+	for (i = 0; i < AMDGPU_MAX_IP_NUM; ++i) {
+		r = amdgpu_queue_mapper_init(&mgr->mapper[i], i);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_queue_mgr_fini - de-initialize an amdgpu_queue_mgr struct
+ *
+ * @adev: amdgpu_device pointer
+ * @mgr: amdgpu_queue_mgr structure holding queue information
+ *
+ * De-initialize the the selected @mgr (all asics).
+ *
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
+			  struct amdgpu_queue_mgr *mgr)
+{
+	return 0;
+}
+
+/**
+ * amdgpu_queue_mgr_map - Map a userspace ring id to an amdgpu_ring
+ *
+ * @adev: amdgpu_device pointer
+ * @mgr: amdgpu_queue_mgr structure holding queue information
+ * @hw_ip: HW IP enum
+ * @instance: HW instance
+ * @ring: user ring id
+ * @our_ring: pointer to mapped amdgpu_ring
+ *
+ * Map a userspace ring id to an appropriate kernel ring. Different
+ * policies are configurable at a HW IP level.
+ *
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
+			 struct amdgpu_queue_mgr *mgr,
+			 int hw_ip, int instance, int ring,
+			 struct amdgpu_ring **out_ring)
+{
+	int r, ip_num_rings;
+	struct amdgpu_queue_mapper *mapper = &mgr->mapper[hw_ip];
+
+	if (!adev || !mgr || !out_ring)
+		return -EINVAL;
+
+	if (hw_ip >= AMDGPU_MAX_IP_NUM)
+		return -EINVAL;
+
+	if (ring >= AMDGPU_MAX_RINGS)
+		return -EINVAL;
+
+	/* Right now all IPs have only one instance - multiple rings. */
+	if (instance != 0) {
+		DRM_ERROR("invalid ip instance: %d\n", instance);
+		return -EINVAL;
+	}
+
+	switch (hw_ip) {
+	case AMDGPU_HW_IP_GFX:
+		ip_num_rings = adev->gfx.num_gfx_rings;
+		break;
+	case AMDGPU_HW_IP_COMPUTE:
+		ip_num_rings = adev->gfx.num_compute_rings;
+		break;
+	case AMDGPU_HW_IP_DMA:
+		ip_num_rings = adev->sdma.num_instances;
+		break;
+	case AMDGPU_HW_IP_UVD:
+		ip_num_rings = 1;
+		break;
+	case AMDGPU_HW_IP_VCE:
+		ip_num_rings = adev->vce.num_rings;
+		break;
+	case AMDGPU_HW_IP_UVD_ENC:
+		ip_num_rings = adev->uvd.num_enc_rings;
+		break;
+	case AMDGPU_HW_IP_VCN_DEC:
+		ip_num_rings = 1;
+		break;
+	case AMDGPU_HW_IP_VCN_ENC:
+		ip_num_rings = adev->vcn.num_enc_rings;
+		break;
+	default:
+		DRM_ERROR("unknown ip type: %d\n", hw_ip);
+		return -EINVAL;
+	}
+
+	if (ring >= ip_num_rings) {
+		DRM_ERROR("Ring index:%d exceeds maximum:%d for ip:%d\n",
+				ring, ip_num_rings, hw_ip);
+		return -EINVAL;
+	}
+
+	mutex_lock(&mapper->lock);
+
+	*out_ring = amdgpu_get_cached_map(mapper, ring);
+	if (*out_ring) {
+		/* cache hit */
+		r = 0;
+		goto out_unlock;
+	}
+
+	switch (mapper->hw_ip) {
+	case AMDGPU_HW_IP_GFX:
+	case AMDGPU_HW_IP_UVD:
+	case AMDGPU_HW_IP_VCE:
+	case AMDGPU_HW_IP_UVD_ENC:
+	case AMDGPU_HW_IP_VCN_DEC:
+	case AMDGPU_HW_IP_VCN_ENC:
+		r = amdgpu_identity_map(adev, mapper, ring, out_ring);
+		break;
+	case AMDGPU_HW_IP_DMA:
+	case AMDGPU_HW_IP_COMPUTE:
+		r = amdgpu_lru_map(adev, mapper, ring, out_ring);
+		break;
+	default:
+		*out_ring = NULL;
+		r = -EINVAL;
+		DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
+	}
+
+out_unlock:
+	mutex_unlock(&mapper->lock);
+	return r;
+}

+ 83 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c

@@ -135,6 +135,8 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
 
 
 	if (ring->funcs->end_use)
 	if (ring->funcs->end_use)
 		ring->funcs->end_use(ring);
 		ring->funcs->end_use(ring);
+
+	amdgpu_ring_lru_touch(ring->adev, ring);
 }
 }
 
 
 /**
 /**
@@ -253,10 +255,13 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 	}
 	}
 
 
 	ring->max_dw = max_dw;
 	ring->max_dw = max_dw;
+	INIT_LIST_HEAD(&ring->lru_list);
+	amdgpu_ring_lru_touch(adev, ring);
 
 
 	if (amdgpu_debugfs_ring_init(adev, ring)) {
 	if (amdgpu_debugfs_ring_init(adev, ring)) {
 		DRM_ERROR("Failed to register debugfs file for rings !\n");
 		DRM_ERROR("Failed to register debugfs file for rings !\n");
 	}
 	}
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -294,6 +299,84 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 	ring->adev->rings[ring->idx] = NULL;
 	ring->adev->rings[ring->idx] = NULL;
 }
 }
 
 
+static void amdgpu_ring_lru_touch_locked(struct amdgpu_device *adev,
+					 struct amdgpu_ring *ring)
+{
+	/* list_move_tail handles the case where ring isn't part of the list */
+	list_move_tail(&ring->lru_list, &adev->ring_lru_list);
+}
+
+static bool amdgpu_ring_is_blacklisted(struct amdgpu_ring *ring,
+				       int *blacklist, int num_blacklist)
+{
+	int i;
+
+	for (i = 0; i < num_blacklist; i++) {
+		if (ring->idx == blacklist[i])
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * amdgpu_ring_lru_get - get the least recently used ring for a HW IP block
+ *
+ * @adev: amdgpu_device pointer
+ * @type: amdgpu_ring_type enum
+ * @blacklist: blacklisted ring ids array
+ * @num_blacklist: number of entries in @blacklist
+ * @ring: output ring
+ *
+ * Retrieve the amdgpu_ring structure for the least recently used ring of
+ * a specific IP block (all asics).
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist,
+			int num_blacklist, struct amdgpu_ring **ring)
+{
+	struct amdgpu_ring *entry;
+
+	/* List is sorted in LRU order, find first entry corresponding
+	 * to the desired HW IP */
+	*ring = NULL;
+	spin_lock(&adev->ring_lru_list_lock);
+	list_for_each_entry(entry, &adev->ring_lru_list, lru_list) {
+		if (entry->funcs->type != type)
+			continue;
+
+		if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist))
+			continue;
+
+		*ring = entry;
+		amdgpu_ring_lru_touch_locked(adev, *ring);
+		break;
+	}
+	spin_unlock(&adev->ring_lru_list_lock);
+
+	if (!*ring) {
+		DRM_ERROR("Ring LRU contains no entries for ring type:%d\n", type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_ring_lru_touch - mark a ring as recently being used
+ *
+ * @adev: amdgpu_device pointer
+ * @ring: ring to touch
+ *
+ * Move @ring to the tail of the lru list
+ */
+void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+	spin_lock(&adev->ring_lru_list_lock);
+	amdgpu_ring_lru_touch_locked(adev, ring);
+	spin_unlock(&adev->ring_lru_list_lock);
+}
+
 /*
 /*
  * Debugfs info
  * Debugfs info
  */
  */

+ 11 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

@@ -47,7 +47,9 @@ enum amdgpu_ring_type {
 	AMDGPU_RING_TYPE_UVD,
 	AMDGPU_RING_TYPE_UVD,
 	AMDGPU_RING_TYPE_VCE,
 	AMDGPU_RING_TYPE_VCE,
 	AMDGPU_RING_TYPE_KIQ,
 	AMDGPU_RING_TYPE_KIQ,
-	AMDGPU_RING_TYPE_UVD_ENC
+	AMDGPU_RING_TYPE_UVD_ENC,
+	AMDGPU_RING_TYPE_VCN_DEC,
+	AMDGPU_RING_TYPE_VCN_ENC
 };
 };
 
 
 struct amdgpu_device;
 struct amdgpu_device;
@@ -76,6 +78,7 @@ struct amdgpu_fence_driver {
 int amdgpu_fence_driver_init(struct amdgpu_device *adev);
 int amdgpu_fence_driver_init(struct amdgpu_device *adev);
 void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
 void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
 void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
 void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
+void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring);
 
 
 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 				  unsigned num_hw_submission);
 				  unsigned num_hw_submission);
@@ -130,6 +133,7 @@ struct amdgpu_ring_funcs {
 	int (*test_ib)(struct amdgpu_ring *ring, long timeout);
 	int (*test_ib)(struct amdgpu_ring *ring, long timeout);
 	/* insert NOP packets */
 	/* insert NOP packets */
 	void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
 	void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
+	void (*insert_start)(struct amdgpu_ring *ring);
 	void (*insert_end)(struct amdgpu_ring *ring);
 	void (*insert_end)(struct amdgpu_ring *ring);
 	/* pad the indirect buffer to the necessary number of dw */
 	/* pad the indirect buffer to the necessary number of dw */
 	void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 	void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
@@ -142,6 +146,7 @@ struct amdgpu_ring_funcs {
 	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
 	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
 	void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
 	void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
 	void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
 	void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
+	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
 };
 };
 
 
 struct amdgpu_ring {
 struct amdgpu_ring {
@@ -149,6 +154,7 @@ struct amdgpu_ring {
 	const struct amdgpu_ring_funcs	*funcs;
 	const struct amdgpu_ring_funcs	*funcs;
 	struct amdgpu_fence_driver	fence_drv;
 	struct amdgpu_fence_driver	fence_drv;
 	struct amd_gpu_scheduler	sched;
 	struct amd_gpu_scheduler	sched;
+	struct list_head		lru_list;
 
 
 	struct amdgpu_bo	*ring_obj;
 	struct amdgpu_bo	*ring_obj;
 	volatile uint32_t	*ring;
 	volatile uint32_t	*ring;
@@ -180,6 +186,7 @@ struct amdgpu_ring {
 	u64			cond_exe_gpu_addr;
 	u64			cond_exe_gpu_addr;
 	volatile u32		*cond_exe_cpu_addr;
 	volatile u32		*cond_exe_cpu_addr;
 	unsigned		vm_inv_eng;
 	unsigned		vm_inv_eng;
+	bool			has_compute_vm_bug;
 #if defined(CONFIG_DEBUG_FS)
 #if defined(CONFIG_DEBUG_FS)
 	struct dentry *ent;
 	struct dentry *ent;
 #endif
 #endif
@@ -194,6 +201,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     unsigned ring_size, struct amdgpu_irq_src *irq_src,
 		     unsigned ring_size, struct amdgpu_irq_src *irq_src,
 		     unsigned irq_type);
 		     unsigned irq_type);
 void amdgpu_ring_fini(struct amdgpu_ring *ring);
 void amdgpu_ring_fini(struct amdgpu_ring *ring);
+int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist,
+			int num_blacklist, struct amdgpu_ring **ring);
+void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
 static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
 static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
 {
 {
 	int i = 0;
 	int i = 0;

+ 19 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c

@@ -298,6 +298,25 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
 	return NULL;
 	return NULL;
 }
 }
 
 
+int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
+{
+	struct amdgpu_sync_entry *e;
+	struct hlist_node *tmp;
+	int i, r;
+
+	hash_for_each_safe(sync->fences, i, tmp, e, node) {
+		r = dma_fence_wait(e->fence, intr);
+		if (r)
+			return r;
+
+		hash_del(&e->node);
+		dma_fence_put(e->fence);
+		kmem_cache_free(amdgpu_sync_slab, e);
+	}
+
+	return 0;
+}
+
 /**
 /**
  * amdgpu_sync_free - free the sync object
  * amdgpu_sync_free - free the sync object
  *
  *

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h

@@ -49,6 +49,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
 struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
 				     struct amdgpu_ring *ring);
 				     struct amdgpu_ring *ring);
 struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
 struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
+int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
 void amdgpu_sync_free(struct amdgpu_sync *sync);
 void amdgpu_sync_free(struct amdgpu_sync *sync);
 int amdgpu_sync_init(void);
 int amdgpu_sync_init(void);
 void amdgpu_sync_fini(void);
 void amdgpu_sync_fini(void);

+ 23 - 14
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

@@ -29,11 +29,11 @@
  *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
  *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
  *    Dave Airlie
  *    Dave Airlie
  */
  */
-#include <ttm/ttm_bo_api.h>
-#include <ttm/ttm_bo_driver.h>
-#include <ttm/ttm_placement.h>
-#include <ttm/ttm_module.h>
-#include <ttm/ttm_page_alloc.h>
+#include <drm/ttm/ttm_bo_api.h>
+#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_module.h>
+#include <drm/ttm/ttm_page_alloc.h>
 #include <drm/drmP.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include <drm/amdgpu_drm.h>
 #include <linux/seq_file.h>
 #include <linux/seq_file.h>
@@ -745,6 +745,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
 		return r;
 		return r;
 	}
 	}
 
 
+	spin_lock(&gtt->adev->gtt_list_lock);
 	flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
 	flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
 	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
 	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
 	r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
 	r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
@@ -753,12 +754,13 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
 	if (r) {
 	if (r) {
 		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
 		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
 			  ttm->num_pages, gtt->offset);
 			  ttm->num_pages, gtt->offset);
-		return r;
+		goto error_gart_bind;
 	}
 	}
-	spin_lock(&gtt->adev->gtt_list_lock);
+
 	list_add_tail(&gtt->list, &gtt->adev->gtt_list);
 	list_add_tail(&gtt->list, &gtt->adev->gtt_list);
+error_gart_bind:
 	spin_unlock(&gtt->adev->gtt_list_lock);
 	spin_unlock(&gtt->adev->gtt_list_lock);
-	return 0;
+	return r;
 }
 }
 
 
 int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
 int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
@@ -789,6 +791,7 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
 static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
 static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
 {
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+	int r;
 
 
 	if (gtt->userptr)
 	if (gtt->userptr)
 		amdgpu_ttm_tt_unpin_userptr(ttm);
 		amdgpu_ttm_tt_unpin_userptr(ttm);
@@ -797,14 +800,17 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
 		return 0;
 		return 0;
 
 
 	/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
 	/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
-	if (gtt->adev->gart.ready)
-		amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
-
 	spin_lock(&gtt->adev->gtt_list_lock);
 	spin_lock(&gtt->adev->gtt_list_lock);
+	r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
+	if (r) {
+		DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
+			  gtt->ttm.ttm.num_pages, gtt->offset);
+		goto error_unbind;
+	}
 	list_del_init(&gtt->list);
 	list_del_init(&gtt->list);
+error_unbind:
 	spin_unlock(&gtt->adev->gtt_list_lock);
 	spin_unlock(&gtt->adev->gtt_list_lock);
-
-	return 0;
+	return r;
 }
 }
 
 
 static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
 static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
@@ -1115,7 +1121,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	/* Change the size here instead of the init above so only lpfn is affected */
 	/* Change the size here instead of the init above so only lpfn is affected */
 	amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
 	amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
 
 
-	r = amdgpu_bo_create(adev, 256 * 1024, PAGE_SIZE, true,
+	r = amdgpu_bo_create(adev, adev->mc.stolen_size, PAGE_SIZE, true,
 			     AMDGPU_GEM_DOMAIN_VRAM,
 			     AMDGPU_GEM_DOMAIN_VRAM,
 			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
 			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
@@ -1462,6 +1468,9 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
 	if (size & 0x3 || *pos & 0x3)
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 		return -EINVAL;
 
 
+	if (*pos >= adev->mc.mc_vram_size)
+		return -ENXIO;
+
 	while (size) {
 	while (size) {
 		unsigned long flags;
 		unsigned long flags;
 		uint32_t value;
 		uint32_t value;

+ 32 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c

@@ -197,6 +197,27 @@ void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr)
 	}
 	}
 }
 }
 
 
+void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr)
+{
+	uint16_t version_major = le16_to_cpu(hdr->header_version_major);
+	uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
+
+	DRM_DEBUG("GPU_INFO\n");
+	amdgpu_ucode_print_common_hdr(hdr);
+
+	if (version_major == 1) {
+		const struct gpu_info_firmware_header_v1_0 *gpu_info_hdr =
+			container_of(hdr, struct gpu_info_firmware_header_v1_0, header);
+
+		DRM_DEBUG("version_major: %u\n",
+			  le16_to_cpu(gpu_info_hdr->version_major));
+		DRM_DEBUG("version_minor: %u\n",
+			  le16_to_cpu(gpu_info_hdr->version_minor));
+	} else {
+		DRM_ERROR("Unknown gpu_info ucode version: %u.%u\n", version_major, version_minor);
+	}
+}
+
 int amdgpu_ucode_validate(const struct firmware *fw)
 int amdgpu_ucode_validate(const struct firmware *fw)
 {
 {
 	const struct common_firmware_header *hdr =
 	const struct common_firmware_header *hdr =
@@ -253,6 +274,15 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
 			return AMDGPU_FW_LOAD_DIRECT;
 			return AMDGPU_FW_LOAD_DIRECT;
 		else
 		else
 			return AMDGPU_FW_LOAD_PSP;
 			return AMDGPU_FW_LOAD_PSP;
+	case CHIP_RAVEN:
+#if 0
+		if (!load_type)
+			return AMDGPU_FW_LOAD_DIRECT;
+		else
+			return AMDGPU_FW_LOAD_PSP;
+#else
+		return AMDGPU_FW_LOAD_DIRECT;
+#endif
 	default:
 	default:
 		DRM_ERROR("Unknow firmware load type\n");
 		DRM_ERROR("Unknow firmware load type\n");
 	}
 	}
@@ -349,7 +379,8 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
 
 
 	err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true,
 	err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true,
 				amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
 				amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
-				0, NULL, NULL, bo);
+				AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
+				NULL, NULL, bo);
 	if (err) {
 	if (err) {
 		dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err);
 		dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err);
 		goto failed;
 		goto failed;

+ 28 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h

@@ -113,6 +113,32 @@ struct sdma_firmware_header_v1_1 {
 	uint32_t digest_size;
 	uint32_t digest_size;
 };
 };
 
 
+/* gpu info payload */
+struct gpu_info_firmware_v1_0 {
+	uint32_t gc_num_se;
+	uint32_t gc_num_cu_per_sh;
+	uint32_t gc_num_sh_per_se;
+	uint32_t gc_num_rb_per_se;
+	uint32_t gc_num_tccs;
+	uint32_t gc_num_gprs;
+	uint32_t gc_num_max_gs_thds;
+	uint32_t gc_gs_table_depth;
+	uint32_t gc_gsprim_buff_depth;
+	uint32_t gc_parameter_cache_depth;
+	uint32_t gc_double_offchip_lds_buffer;
+	uint32_t gc_wave_size;
+	uint32_t gc_max_waves_per_simd;
+	uint32_t gc_max_scratch_slots_per_cu;
+	uint32_t gc_lds_size;
+};
+
+/* version_major=1, version_minor=0 */
+struct gpu_info_firmware_header_v1_0 {
+	struct common_firmware_header header;
+	uint16_t version_major; /* version */
+	uint16_t version_minor; /* version */
+};
+
 /* header is fixed size */
 /* header is fixed size */
 union amdgpu_firmware_header {
 union amdgpu_firmware_header {
 	struct common_firmware_header common;
 	struct common_firmware_header common;
@@ -124,6 +150,7 @@ union amdgpu_firmware_header {
 	struct rlc_firmware_header_v2_0 rlc_v2_0;
 	struct rlc_firmware_header_v2_0 rlc_v2_0;
 	struct sdma_firmware_header_v1_0 sdma;
 	struct sdma_firmware_header_v1_0 sdma;
 	struct sdma_firmware_header_v1_1 sdma_v1_1;
 	struct sdma_firmware_header_v1_1 sdma_v1_1;
+	struct gpu_info_firmware_header_v1_0 gpu_info;
 	uint8_t raw[0x100];
 	uint8_t raw[0x100];
 };
 };
 
 
@@ -184,6 +211,7 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr);
 void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr);
 void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr);
 void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr);
 void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr);
 void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr);
 void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr);
+void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr);
 int amdgpu_ucode_validate(const struct firmware *fw);
 int amdgpu_ucode_validate(const struct firmware *fw);
 bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
 bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
 				uint16_t hdr_major, uint16_t hdr_minor);
 				uint16_t hdr_major, uint16_t hdr_minor);

+ 5 - 25
drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c

@@ -165,35 +165,14 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
 	adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
 	adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
 				(binary_id << 8));
 				(binary_id << 8));
 
 
-	/* allocate firmware, stack and heap BO */
-
-	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
-			     AMDGPU_GEM_DOMAIN_VRAM,
-			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-			     NULL, NULL, &adev->vce.vcpu_bo);
+	r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_VRAM, &adev->vce.vcpu_bo,
+				    &adev->vce.gpu_addr, &adev->vce.cpu_addr);
 	if (r) {
 	if (r) {
 		dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
 		dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
 		return r;
 		return r;
 	}
 	}
 
 
-	r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false);
-	if (r) {
-		amdgpu_bo_unref(&adev->vce.vcpu_bo);
-		dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r);
-		return r;
-	}
-
-	r = amdgpu_bo_pin(adev->vce.vcpu_bo, AMDGPU_GEM_DOMAIN_VRAM,
-			  &adev->vce.gpu_addr);
-	amdgpu_bo_unreserve(adev->vce.vcpu_bo);
-	if (r) {
-		amdgpu_bo_unref(&adev->vce.vcpu_bo);
-		dev_err(adev->dev, "(%d) VCE bo pin failed\n", r);
-		return r;
-	}
-
-
 	ring = &adev->vce.ring[0];
 	ring = &adev->vce.ring[0];
 	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
 	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
 	r = amd_sched_entity_init(&ring->sched, &adev->vce.entity,
 	r = amd_sched_entity_init(&ring->sched, &adev->vce.entity,
@@ -230,7 +209,8 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
 
 
 	amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity);
 	amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity);
 
 
-	amdgpu_bo_unref(&adev->vce.vcpu_bo);
+	amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
+		(void **)&adev->vce.cpu_addr);
 
 
 	for (i = 0; i < adev->vce.num_rings; i++)
 	for (i = 0; i < adev->vce.num_rings; i++)
 		amdgpu_ring_fini(&adev->vce.ring[i]);
 		amdgpu_ring_fini(&adev->vce.ring[i]);

+ 2 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h

@@ -33,6 +33,8 @@
 struct amdgpu_vce {
 struct amdgpu_vce {
 	struct amdgpu_bo	*vcpu_bo;
 	struct amdgpu_bo	*vcpu_bo;
 	uint64_t		gpu_addr;
 	uint64_t		gpu_addr;
+	void			*cpu_addr;
+	void			*saved_bo;
 	unsigned		fw_version;
 	unsigned		fw_version;
 	unsigned		fb_version;
 	unsigned		fb_version;
 	atomic_t		handles[AMDGPU_MAX_VCE_HANDLES];
 	atomic_t		handles[AMDGPU_MAX_VCE_HANDLES];

+ 654 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c

@@ -0,0 +1,654 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_vcn.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+
+#include "vega10/soc15ip.h"
+#include "raven1/VCN/vcn_1_0_offset.h"
+
+/* 1 second timeout */
+#define VCN_IDLE_TIMEOUT	msecs_to_jiffies(1000)
+
+/* Firmware Names */
+#define FIRMWARE_RAVEN		"amdgpu/raven_vcn.bin"
+
+MODULE_FIRMWARE(FIRMWARE_RAVEN);
+
+static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
+
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring;
+	struct amd_sched_rq *rq;
+	unsigned long bo_size;
+	const char *fw_name;
+	const struct common_firmware_header *hdr;
+	unsigned version_major, version_minor, family_id;
+	int r;
+
+	INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
+
+	switch (adev->asic_type) {
+	case CHIP_RAVEN:
+		fw_name = FIRMWARE_RAVEN;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	r = request_firmware(&adev->vcn.fw, fw_name, adev->dev);
+	if (r) {
+		dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
+			fw_name);
+		return r;
+	}
+
+	r = amdgpu_ucode_validate(adev->vcn.fw);
+	if (r) {
+		dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n",
+			fw_name);
+		release_firmware(adev->vcn.fw);
+		adev->vcn.fw = NULL;
+		return r;
+	}
+
+	hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+	family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
+	version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
+	version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
+	DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n",
+		version_major, version_minor, family_id);
+
+
+	bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
+		  +  AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE
+		  +  AMDGPU_VCN_SESSION_SIZE * 40;
+	r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo,
+				    &adev->vcn.gpu_addr, &adev->vcn.cpu_addr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
+		return r;
+	}
+
+	ring = &adev->vcn.ring_dec;
+	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
+	r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_dec,
+				  rq, amdgpu_sched_jobs);
+	if (r != 0) {
+		DRM_ERROR("Failed setting up VCN dec run queue.\n");
+		return r;
+	}
+
+	ring = &adev->vcn.ring_enc[0];
+	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
+	r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_enc,
+				  rq, amdgpu_sched_jobs);
+	if (r != 0) {
+		DRM_ERROR("Failed setting up VCN enc run queue.\n");
+		return r;
+	}
+
+	return 0;
+}
+
+int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
+{
+	int i;
+
+	kfree(adev->vcn.saved_bo);
+
+	amd_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec);
+
+	amd_sched_entity_fini(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc);
+
+	amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo,
+			      &adev->vcn.gpu_addr,
+			      (void **)&adev->vcn.cpu_addr);
+
+	amdgpu_ring_fini(&adev->vcn.ring_dec);
+
+	for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+		amdgpu_ring_fini(&adev->vcn.ring_enc[i]);
+
+	release_firmware(adev->vcn.fw);
+
+	return 0;
+}
+
+int amdgpu_vcn_suspend(struct amdgpu_device *adev)
+{
+	unsigned size;
+	void *ptr;
+
+	if (adev->vcn.vcpu_bo == NULL)
+		return 0;
+
+	cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+	size = amdgpu_bo_size(adev->vcn.vcpu_bo);
+	ptr = adev->vcn.cpu_addr;
+
+	adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL);
+	if (!adev->vcn.saved_bo)
+		return -ENOMEM;
+
+	memcpy_fromio(adev->vcn.saved_bo, ptr, size);
+
+	return 0;
+}
+
+int amdgpu_vcn_resume(struct amdgpu_device *adev)
+{
+	unsigned size;
+	void *ptr;
+
+	if (adev->vcn.vcpu_bo == NULL)
+		return -EINVAL;
+
+	size = amdgpu_bo_size(adev->vcn.vcpu_bo);
+	ptr = adev->vcn.cpu_addr;
+
+	if (adev->vcn.saved_bo != NULL) {
+		memcpy_toio(ptr, adev->vcn.saved_bo, size);
+		kfree(adev->vcn.saved_bo);
+		adev->vcn.saved_bo = NULL;
+	} else {
+		const struct common_firmware_header *hdr;
+		unsigned offset;
+
+		hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+		offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+		memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset,
+			    le32_to_cpu(hdr->ucode_size_bytes));
+		size -= le32_to_cpu(hdr->ucode_size_bytes);
+		ptr += le32_to_cpu(hdr->ucode_size_bytes);
+		memset_io(ptr, 0, size);
+	}
+
+	return 0;
+}
+
+static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
+{
+	struct amdgpu_device *adev =
+		container_of(work, struct amdgpu_device, vcn.idle_work.work);
+	unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
+
+	if (fences == 0) {
+		if (adev->pm.dpm_enabled) {
+			amdgpu_dpm_enable_uvd(adev, false);
+		} else {
+			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
+		}
+	} else {
+		schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+	}
+}
+
+void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+	if (set_clocks) {
+		if (adev->pm.dpm_enabled) {
+			amdgpu_dpm_enable_uvd(adev, true);
+		} else {
+			amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
+		}
+	}
+}
+
+void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
+{
+	schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+}
+
+int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t tmp = 0;
+	unsigned i;
+	int r;
+
+	WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD);
+	r = amdgpu_ring_alloc(ring, 3);
+	if (r) {
+		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
+			  ring->idx, r);
+		return r;
+	}
+	amdgpu_ring_write(ring,
+		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0));
+	amdgpu_ring_write(ring, 0xDEADBEEF);
+	amdgpu_ring_commit(ring);
+	for (i = 0; i < adev->usec_timeout; i++) {
+		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID));
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+
+	if (i < adev->usec_timeout) {
+		DRM_INFO("ring test on %d succeeded in %d usecs\n",
+			 ring->idx, i);
+	} else {
+		DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
+			  ring->idx, tmp);
+		r = -EINVAL;
+	}
+	return r;
+}
+
+static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
+			       bool direct, struct dma_fence **fence)
+{
+	struct ttm_validate_buffer tv;
+	struct ww_acquire_ctx ticket;
+	struct list_head head;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	struct dma_fence *f = NULL;
+	struct amdgpu_device *adev = ring->adev;
+	uint64_t addr;
+	int i, r;
+
+	memset(&tv, 0, sizeof(tv));
+	tv.bo = &bo->tbo;
+
+	INIT_LIST_HEAD(&head);
+	list_add(&tv.head, &head);
+
+	r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL);
+	if (r)
+		return r;
+
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
+	if (r)
+		goto err;
+
+	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
+	if (r)
+		goto err;
+
+	ib = &job->ibs[0];
+	addr = amdgpu_bo_gpu_offset(bo);
+	ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0);
+	ib->ptr[1] = addr;
+	ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0);
+	ib->ptr[3] = addr >> 32;
+	ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0);
+	ib->ptr[5] = 0;
+	for (i = 6; i < 16; i += 2) {
+		ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0);
+		ib->ptr[i+1] = 0;
+	}
+	ib->length_dw = 16;
+
+	if (direct) {
+		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+		job->fence = dma_fence_get(f);
+		if (r)
+			goto err_free;
+
+		amdgpu_job_free(job);
+	} else {
+		r = amdgpu_job_submit(job, ring, &adev->vcn.entity_dec,
+				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
+		if (r)
+			goto err_free;
+	}
+
+	ttm_eu_fence_buffer_objects(&ticket, &head, f);
+
+	if (fence)
+		*fence = dma_fence_get(f);
+	amdgpu_bo_unref(&bo);
+	dma_fence_put(f);
+
+	return 0;
+
+err_free:
+	amdgpu_job_free(job);
+
+err:
+	ttm_eu_backoff_reservation(&ticket, &head);
+	return r;
+}
+
+static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+			      struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_bo *bo;
+	uint32_t *msg;
+	int r, i;
+
+	r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
+			     AMDGPU_GEM_DOMAIN_VRAM,
+			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
+			     NULL, NULL, &bo);
+	if (r)
+		return r;
+
+	r = amdgpu_bo_reserve(bo, false);
+	if (r) {
+		amdgpu_bo_unref(&bo);
+		return r;
+	}
+
+	r = amdgpu_bo_kmap(bo, (void **)&msg);
+	if (r) {
+		amdgpu_bo_unreserve(bo);
+		amdgpu_bo_unref(&bo);
+		return r;
+	}
+
+	msg[0] = cpu_to_le32(0x00000028);
+	msg[1] = cpu_to_le32(0x00000038);
+	msg[2] = cpu_to_le32(0x00000001);
+	msg[3] = cpu_to_le32(0x00000000);
+	msg[4] = cpu_to_le32(handle);
+	msg[5] = cpu_to_le32(0x00000000);
+	msg[6] = cpu_to_le32(0x00000001);
+	msg[7] = cpu_to_le32(0x00000028);
+	msg[8] = cpu_to_le32(0x00000010);
+	msg[9] = cpu_to_le32(0x00000000);
+	msg[10] = cpu_to_le32(0x00000007);
+	msg[11] = cpu_to_le32(0x00000000);
+	msg[12] = cpu_to_le32(0x00000780);
+	msg[13] = cpu_to_le32(0x00000440);
+	for (i = 14; i < 1024; ++i)
+		msg[i] = cpu_to_le32(0x0);
+
+	amdgpu_bo_kunmap(bo);
+	amdgpu_bo_unreserve(bo);
+
+	return amdgpu_vcn_dec_send_msg(ring, bo, true, fence);
+}
+
+static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+			       bool direct, struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_bo *bo;
+	uint32_t *msg;
+	int r, i;
+
+	r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
+			     AMDGPU_GEM_DOMAIN_VRAM,
+			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
+			     NULL, NULL, &bo);
+	if (r)
+		return r;
+
+	r = amdgpu_bo_reserve(bo, false);
+	if (r) {
+		amdgpu_bo_unref(&bo);
+		return r;
+	}
+
+	r = amdgpu_bo_kmap(bo, (void **)&msg);
+	if (r) {
+		amdgpu_bo_unreserve(bo);
+		amdgpu_bo_unref(&bo);
+		return r;
+	}
+
+	msg[0] = cpu_to_le32(0x00000028);
+	msg[1] = cpu_to_le32(0x00000018);
+	msg[2] = cpu_to_le32(0x00000000);
+	msg[3] = cpu_to_le32(0x00000002);
+	msg[4] = cpu_to_le32(handle);
+	msg[5] = cpu_to_le32(0x00000000);
+	for (i = 6; i < 1024; ++i)
+		msg[i] = cpu_to_le32(0x0);
+
+	amdgpu_bo_kunmap(bo);
+	amdgpu_bo_unreserve(bo);
+
+	return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence);
+}
+
+int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct dma_fence *fence;
+	long r;
+
+	r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
+		goto error;
+	}
+
+	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, true, &fence);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
+		goto error;
+	}
+
+	r = dma_fence_wait_timeout(fence, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out.\n");
+		r = -ETIMEDOUT;
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+	} else {
+		DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
+		r = 0;
+	}
+
+	dma_fence_put(fence);
+
+error:
+	return r;
+}
+
+int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t rptr = amdgpu_ring_get_rptr(ring);
+	unsigned i;
+	int r;
+
+	r = amdgpu_ring_alloc(ring, 16);
+	if (r) {
+		DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n",
+			  ring->idx, r);
+		return r;
+	}
+	amdgpu_ring_write(ring, VCN_ENC_CMD_END);
+	amdgpu_ring_commit(ring);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (amdgpu_ring_get_rptr(ring) != rptr)
+			break;
+		DRM_UDELAY(1);
+	}
+
+	if (i < adev->usec_timeout) {
+		DRM_INFO("ring test on %d succeeded in %d usecs\n",
+			 ring->idx, i);
+	} else {
+		DRM_ERROR("amdgpu: ring %d test failed\n",
+			  ring->idx);
+		r = -ETIMEDOUT;
+	}
+
+	return r;
+}
+
+static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+			      struct dma_fence **fence)
+{
+	const unsigned ib_size_dw = 16;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	struct dma_fence *f = NULL;
+	uint64_t dummy;
+	int i, r;
+
+	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+	if (r)
+		return r;
+
+	ib = &job->ibs[0];
+	dummy = ib->gpu_addr + 1024;
+
+	ib->length_dw = 0;
+	ib->ptr[ib->length_dw++] = 0x00000018;
+	ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
+	ib->ptr[ib->length_dw++] = handle;
+	ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
+	ib->ptr[ib->length_dw++] = dummy;
+	ib->ptr[ib->length_dw++] = 0x0000000b;
+
+	ib->ptr[ib->length_dw++] = 0x00000014;
+	ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
+	ib->ptr[ib->length_dw++] = 0x0000001c;
+	ib->ptr[ib->length_dw++] = 0x00000000;
+	ib->ptr[ib->length_dw++] = 0x00000000;
+
+	ib->ptr[ib->length_dw++] = 0x00000008;
+	ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */
+
+	for (i = ib->length_dw; i < ib_size_dw; ++i)
+		ib->ptr[i] = 0x0;
+
+	r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+	job->fence = dma_fence_get(f);
+	if (r)
+		goto err;
+
+	amdgpu_job_free(job);
+	if (fence)
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
+
+	return 0;
+
+err:
+	amdgpu_job_free(job);
+	return r;
+}
+
+static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+				struct dma_fence **fence)
+{
+	const unsigned ib_size_dw = 16;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	struct dma_fence *f = NULL;
+	uint64_t dummy;
+	int i, r;
+
+	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+	if (r)
+		return r;
+
+	ib = &job->ibs[0];
+	dummy = ib->gpu_addr + 1024;
+
+	ib->length_dw = 0;
+	ib->ptr[ib->length_dw++] = 0x00000018;
+	ib->ptr[ib->length_dw++] = 0x00000001;
+	ib->ptr[ib->length_dw++] = handle;
+	ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
+	ib->ptr[ib->length_dw++] = dummy;
+	ib->ptr[ib->length_dw++] = 0x0000000b;
+
+	ib->ptr[ib->length_dw++] = 0x00000014;
+	ib->ptr[ib->length_dw++] = 0x00000002;
+	ib->ptr[ib->length_dw++] = 0x0000001c;
+	ib->ptr[ib->length_dw++] = 0x00000000;
+	ib->ptr[ib->length_dw++] = 0x00000000;
+
+	ib->ptr[ib->length_dw++] = 0x00000008;
+	ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */
+
+	for (i = ib->length_dw; i < ib_size_dw; ++i)
+		ib->ptr[i] = 0x0;
+
+	r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+	job->fence = dma_fence_get(f);
+	if (r)
+		goto err;
+
+	amdgpu_job_free(job);
+	if (fence)
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
+
+	return 0;
+
+err:
+	amdgpu_job_free(job);
+	return r;
+}
+
+int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct dma_fence *fence = NULL;
+	long r;
+
+	r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
+		goto error;
+	}
+
+	r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
+		goto error;
+	}
+
+	r = dma_fence_wait_timeout(fence, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out.\n");
+		r = -ETIMEDOUT;
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+	} else {
+		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+		r = 0;
+	}
+error:
+	dma_fence_put(fence);
+	return r;
+}

+ 77 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h

@@ -0,0 +1,77 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_VCN_H__
+#define __AMDGPU_VCN_H__
+
+#define AMDGPU_VCN_STACK_SIZE		(200*1024)
+#define AMDGPU_VCN_HEAP_SIZE		(256*1024)
+#define AMDGPU_VCN_SESSION_SIZE		(50*1024)
+#define AMDGPU_VCN_FIRMWARE_OFFSET	256
+#define AMDGPU_VCN_MAX_ENC_RINGS	3
+
+#define VCN_DEC_CMD_FENCE		0x00000000
+#define VCN_DEC_CMD_TRAP		0x00000001
+#define VCN_DEC_CMD_WRITE_REG		0x00000004
+#define VCN_DEC_CMD_REG_READ_COND_WAIT	0x00000006
+#define VCN_DEC_CMD_PACKET_START	0x0000000a
+#define VCN_DEC_CMD_PACKET_END		0x0000000b
+
+#define VCN_ENC_CMD_NO_OP		0x00000000
+#define VCN_ENC_CMD_END 		0x00000001
+#define VCN_ENC_CMD_IB			0x00000002
+#define VCN_ENC_CMD_FENCE		0x00000003
+#define VCN_ENC_CMD_TRAP		0x00000004
+#define VCN_ENC_CMD_REG_WRITE		0x0000000b
+#define VCN_ENC_CMD_REG_WAIT		0x0000000c
+
+struct amdgpu_vcn {
+	struct amdgpu_bo	*vcpu_bo;
+	void			*cpu_addr;
+	uint64_t		gpu_addr;
+	unsigned		fw_version;
+	void			*saved_bo;
+	struct delayed_work	idle_work;
+	const struct firmware	*fw;	/* VCN firmware */
+	struct amdgpu_ring	ring_dec;
+	struct amdgpu_ring	ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
+	struct amdgpu_irq_src	irq;
+	struct amd_sched_entity entity_dec;
+	struct amd_sched_entity entity_enc;
+	unsigned		num_enc_rings;
+};
+
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
+int amdgpu_vcn_sw_fini(struct amdgpu_device *adev);
+int amdgpu_vcn_suspend(struct amdgpu_device *adev);
+int amdgpu_vcn_resume(struct amdgpu_device *adev);
+void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring);
+
+int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring);
+int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+
+int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);
+int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+
+#endif

+ 14 - 10
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c

@@ -22,6 +22,7 @@
  */
  */
 
 
 #include "amdgpu.h"
 #include "amdgpu.h"
+#define MAX_KIQ_REG_WAIT	100000
 
 
 int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
 int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
 {
 {
@@ -105,8 +106,9 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 	/* enable virtual display */
 	/* enable virtual display */
 	adev->mode_info.num_crtc = 1;
 	adev->mode_info.num_crtc = 1;
 	adev->enable_virtual_display = true;
 	adev->enable_virtual_display = true;
+	adev->cg_flags = 0;
+	adev->pg_flags = 0;
 
 
-	mutex_init(&adev->virt.lock_kiq);
 	mutex_init(&adev->virt.lock_reset);
 	mutex_init(&adev->virt.lock_reset);
 }
 }
 
 
@@ -120,17 +122,19 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
 
 
 	BUG_ON(!ring->funcs->emit_rreg);
 	BUG_ON(!ring->funcs->emit_rreg);
 
 
-	mutex_lock(&adev->virt.lock_kiq);
+	mutex_lock(&kiq->ring_mutex);
 	amdgpu_ring_alloc(ring, 32);
 	amdgpu_ring_alloc(ring, 32);
 	amdgpu_ring_emit_rreg(ring, reg);
 	amdgpu_ring_emit_rreg(ring, reg);
 	amdgpu_fence_emit(ring, &f);
 	amdgpu_fence_emit(ring, &f);
 	amdgpu_ring_commit(ring);
 	amdgpu_ring_commit(ring);
-	mutex_unlock(&adev->virt.lock_kiq);
+	mutex_unlock(&kiq->ring_mutex);
 
 
-	r = dma_fence_wait(f, false);
-	if (r)
-		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+	r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
 	dma_fence_put(f);
 	dma_fence_put(f);
+	if (r < 1) {
+		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+		return ~0;
+	}
 
 
 	val = adev->wb.wb[adev->virt.reg_val_offs];
 	val = adev->wb.wb[adev->virt.reg_val_offs];
 
 
@@ -146,15 +150,15 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 
 
 	BUG_ON(!ring->funcs->emit_wreg);
 	BUG_ON(!ring->funcs->emit_wreg);
 
 
-	mutex_lock(&adev->virt.lock_kiq);
+	mutex_lock(&kiq->ring_mutex);
 	amdgpu_ring_alloc(ring, 32);
 	amdgpu_ring_alloc(ring, 32);
 	amdgpu_ring_emit_wreg(ring, reg, v);
 	amdgpu_ring_emit_wreg(ring, reg, v);
 	amdgpu_fence_emit(ring, &f);
 	amdgpu_fence_emit(ring, &f);
 	amdgpu_ring_commit(ring);
 	amdgpu_ring_commit(ring);
-	mutex_unlock(&adev->virt.lock_kiq);
+	mutex_unlock(&kiq->ring_mutex);
 
 
-	r = dma_fence_wait(f, false);
-	if (r)
+	r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
+	if (r < 1)
 		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
 		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
 	dma_fence_put(f);
 	dma_fence_put(f);
 }
 }

+ 1 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h

@@ -52,7 +52,6 @@ struct amdgpu_virt {
 	uint64_t			csa_vmid0_addr;
 	uint64_t			csa_vmid0_addr;
 	bool chained_ib_support;
 	bool chained_ib_support;
 	uint32_t			reg_val_offs;
 	uint32_t			reg_val_offs;
-	struct mutex			lock_kiq;
 	struct mutex                    lock_reset;
 	struct mutex                    lock_reset;
 	struct amdgpu_irq_src		ack_irq;
 	struct amdgpu_irq_src		ack_irq;
 	struct amdgpu_irq_src		rcv_irq;
 	struct amdgpu_irq_src		rcv_irq;
@@ -97,7 +96,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
-int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary);
+int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job);
 int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
 int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
 void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
 void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
 
 

+ 476 - 157
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

@@ -79,6 +79,12 @@ struct amdgpu_pte_update_params {
 		     uint64_t flags);
 		     uint64_t flags);
 	/* indicate update pt or its shadow */
 	/* indicate update pt or its shadow */
 	bool shadow;
 	bool shadow;
+	/* The next two are used during VM update by CPU
+	 *  DMA addresses to use for mapping
+	 *  Kernel pointer of PD/PT BO that needs to be updated
+	 */
+	dma_addr_t *pages_addr;
+	void *kptr;
 };
 };
 
 
 /* Helper to disable partial resident texture feature from a fence callback */
 /* Helper to disable partial resident texture feature from a fence callback */
@@ -275,12 +281,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 		adev->vm_manager.block_size;
 		adev->vm_manager.block_size;
 	unsigned pt_idx, from, to;
 	unsigned pt_idx, from, to;
 	int r;
 	int r;
+	u64 flags;
 
 
 	if (!parent->entries) {
 	if (!parent->entries) {
 		unsigned num_entries = amdgpu_vm_num_entries(adev, level);
 		unsigned num_entries = amdgpu_vm_num_entries(adev, level);
 
 
-		parent->entries = drm_calloc_large(num_entries,
-						   sizeof(struct amdgpu_vm_pt));
+		parent->entries = kvmalloc_array(num_entries,
+						   sizeof(struct amdgpu_vm_pt),
+						   GFP_KERNEL | __GFP_ZERO);
 		if (!parent->entries)
 		if (!parent->entries)
 			return -ENOMEM;
 			return -ENOMEM;
 		memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
 		memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
@@ -299,6 +307,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 	saddr = saddr & ((1 << shift) - 1);
 	saddr = saddr & ((1 << shift) - 1);
 	eaddr = eaddr & ((1 << shift) - 1);
 	eaddr = eaddr & ((1 << shift) - 1);
 
 
+	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+			AMDGPU_GEM_CREATE_VRAM_CLEARED;
+	if (vm->use_cpu_for_update)
+		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+	else
+		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
+				AMDGPU_GEM_CREATE_SHADOW);
+
 	/* walk over the address space and allocate the page tables */
 	/* walk over the address space and allocate the page tables */
 	for (pt_idx = from; pt_idx <= to; ++pt_idx) {
 	for (pt_idx = from; pt_idx <= to; ++pt_idx) {
 		struct reservation_object *resv = vm->root.bo->tbo.resv;
 		struct reservation_object *resv = vm->root.bo->tbo.resv;
@@ -310,10 +326,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 					     amdgpu_vm_bo_size(adev, level),
 					     amdgpu_vm_bo_size(adev, level),
 					     AMDGPU_GPU_PAGE_SIZE, true,
 					     AMDGPU_GPU_PAGE_SIZE, true,
 					     AMDGPU_GEM_DOMAIN_VRAM,
 					     AMDGPU_GEM_DOMAIN_VRAM,
-					     AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
-					     AMDGPU_GEM_CREATE_SHADOW |
-					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
-					     AMDGPU_GEM_CREATE_VRAM_CLEARED,
+					     flags,
 					     NULL, resv, &pt);
 					     NULL, resv, &pt);
 			if (r)
 			if (r)
 				return r;
 				return r;
@@ -391,6 +404,71 @@ static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev,
 		atomic_read(&adev->gpu_reset_counter);
 		atomic_read(&adev->gpu_reset_counter);
 }
 }
 
 
+static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub)
+{
+	return !!vm->reserved_vmid[vmhub];
+}
+
+/* idr_mgr->lock must be held */
+static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm,
+					       struct amdgpu_ring *ring,
+					       struct amdgpu_sync *sync,
+					       struct dma_fence *fence,
+					       struct amdgpu_job *job)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned vmhub = ring->funcs->vmhub;
+	uint64_t fence_context = adev->fence_context + ring->idx;
+	struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub];
+	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	struct dma_fence *updates = sync->last_vm_update;
+	int r = 0;
+	struct dma_fence *flushed, *tmp;
+	bool needs_flush = false;
+
+	flushed  = id->flushed_updates;
+	if ((amdgpu_vm_had_gpu_reset(adev, id)) ||
+	    (atomic64_read(&id->owner) != vm->client_id) ||
+	    (job->vm_pd_addr != id->pd_gpu_addr) ||
+	    (updates && (!flushed || updates->context != flushed->context ||
+			dma_fence_is_later(updates, flushed))) ||
+	    (!id->last_flush || (id->last_flush->context != fence_context &&
+				 !dma_fence_is_signaled(id->last_flush)))) {
+		needs_flush = true;
+		/* to prevent one context starved by another context */
+		id->pd_gpu_addr = 0;
+		tmp = amdgpu_sync_peek_fence(&id->active, ring);
+		if (tmp) {
+			r = amdgpu_sync_fence(adev, sync, tmp);
+			return r;
+		}
+	}
+
+	/* Good we can use this VMID. Remember this submission as
+	* user of the VMID.
+	*/
+	r = amdgpu_sync_fence(ring->adev, &id->active, fence);
+	if (r)
+		goto out;
+
+	if (updates && (!flushed || updates->context != flushed->context ||
+			dma_fence_is_later(updates, flushed))) {
+		dma_fence_put(id->flushed_updates);
+		id->flushed_updates = dma_fence_get(updates);
+	}
+	id->pd_gpu_addr = job->vm_pd_addr;
+	atomic64_set(&id->owner, vm->client_id);
+	job->vm_needs_flush = needs_flush;
+	if (needs_flush) {
+		dma_fence_put(id->last_flush);
+		id->last_flush = NULL;
+	}
+	job->vm_id = id - id_mgr->ids;
+	trace_amdgpu_vm_grab_id(vm, ring, job);
+out:
+	return r;
+}
+
 /**
 /**
  * amdgpu_vm_grab_id - allocate the next free VMID
  * amdgpu_vm_grab_id - allocate the next free VMID
  *
  *
@@ -415,12 +493,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	unsigned i;
 	unsigned i;
 	int r = 0;
 	int r = 0;
 
 
+	mutex_lock(&id_mgr->lock);
+	if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) {
+		r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job);
+		mutex_unlock(&id_mgr->lock);
+		return r;
+	}
 	fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
 	fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
-	if (!fences)
+	if (!fences) {
+		mutex_unlock(&id_mgr->lock);
 		return -ENOMEM;
 		return -ENOMEM;
-
-	mutex_lock(&id_mgr->lock);
-
+	}
 	/* Check if we have an idle VMID */
 	/* Check if we have an idle VMID */
 	i = 0;
 	i = 0;
 	list_for_each_entry(idle, &id_mgr->ids_lru, list) {
 	list_for_each_entry(idle, &id_mgr->ids_lru, list) {
@@ -521,7 +604,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	id->pd_gpu_addr = job->vm_pd_addr;
 	id->pd_gpu_addr = job->vm_pd_addr;
 	dma_fence_put(id->flushed_updates);
 	dma_fence_put(id->flushed_updates);
 	id->flushed_updates = dma_fence_get(updates);
 	id->flushed_updates = dma_fence_get(updates);
-	id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
 	atomic64_set(&id->owner, vm->client_id);
 	atomic64_set(&id->owner, vm->client_id);
 
 
 needs_flush:
 needs_flush:
@@ -540,40 +622,118 @@ error:
 	return r;
 	return r;
 }
 }
 
 
-static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring)
+static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev,
+					  struct amdgpu_vm *vm,
+					  unsigned vmhub)
+{
+	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+
+	mutex_lock(&id_mgr->lock);
+	if (vm->reserved_vmid[vmhub]) {
+		list_add(&vm->reserved_vmid[vmhub]->list,
+			&id_mgr->ids_lru);
+		vm->reserved_vmid[vmhub] = NULL;
+		atomic_dec(&id_mgr->reserved_vmid_num);
+	}
+	mutex_unlock(&id_mgr->lock);
+}
+
+static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev,
+					 struct amdgpu_vm *vm,
+					 unsigned vmhub)
+{
+	struct amdgpu_vm_id_manager *id_mgr;
+	struct amdgpu_vm_id *idle;
+	int r = 0;
+
+	id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	mutex_lock(&id_mgr->lock);
+	if (vm->reserved_vmid[vmhub])
+		goto unlock;
+	if (atomic_inc_return(&id_mgr->reserved_vmid_num) >
+	    AMDGPU_VM_MAX_RESERVED_VMID) {
+		DRM_ERROR("Over limitation of reserved vmid\n");
+		atomic_dec(&id_mgr->reserved_vmid_num);
+		r = -EINVAL;
+		goto unlock;
+	}
+	/* Select the first entry VMID */
+	idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vm_id, list);
+	list_del_init(&idle->list);
+	vm->reserved_vmid[vmhub] = idle;
+	mutex_unlock(&id_mgr->lock);
+
+	return 0;
+unlock:
+	mutex_unlock(&id_mgr->lock);
+	return r;
+}
+
+/**
+ * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
+ *
+ * @adev: amdgpu_device pointer
+ */
+void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
 {
 {
-	struct amdgpu_device *adev = ring->adev;
 	const struct amdgpu_ip_block *ip_block;
 	const struct amdgpu_ip_block *ip_block;
+	bool has_compute_vm_bug;
+	struct amdgpu_ring *ring;
+	int i;
 
 
-	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
-		/* only compute rings */
-		return false;
+	has_compute_vm_bug = false;
 
 
 	ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
 	ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
-	if (!ip_block)
-		return false;
+	if (ip_block) {
+		/* Compute has a VM bug for GFX version < 7.
+		   Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
+		if (ip_block->version->major <= 7)
+			has_compute_vm_bug = true;
+		else if (ip_block->version->major == 8)
+			if (adev->gfx.mec_fw_version < 673)
+				has_compute_vm_bug = true;
+	}
 
 
-	if (ip_block->version->major <= 7) {
-		/* gfx7 has no workaround */
-		return true;
-	} else if (ip_block->version->major == 8) {
-		if (adev->gfx.mec_fw_version >= 673)
-			/* gfx8 is fixed in MEC firmware 673 */
-			return false;
+	for (i = 0; i < adev->num_rings; i++) {
+		ring = adev->rings[i];
+		if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
+			/* only compute rings */
+			ring->has_compute_vm_bug = has_compute_vm_bug;
 		else
 		else
-			return true;
+			ring->has_compute_vm_bug = false;
 	}
 	}
-	return false;
 }
 }
 
 
-static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr)
+bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
+				  struct amdgpu_job *job)
 {
 {
-	u64 addr = mc_addr;
+	struct amdgpu_device *adev = ring->adev;
+	unsigned vmhub = ring->funcs->vmhub;
+	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	struct amdgpu_vm_id *id;
+	bool gds_switch_needed;
+	bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
+
+	if (job->vm_id == 0)
+		return false;
+	id = &id_mgr->ids[job->vm_id];
+	gds_switch_needed = ring->funcs->emit_gds_switch && (
+		id->gds_base != job->gds_base ||
+		id->gds_size != job->gds_size ||
+		id->gws_base != job->gws_base ||
+		id->gws_size != job->gws_size ||
+		id->oa_base != job->oa_base ||
+		id->oa_size != job->oa_size);
 
 
-	if (adev->gart.gart_funcs->adjust_mc_addr)
-		addr = adev->gart.gart_funcs->adjust_mc_addr(adev, addr);
+	if (amdgpu_vm_had_gpu_reset(adev, id))
+		return true;
 
 
-	return addr;
+	return vm_flush_needed || gds_switch_needed;
+}
+
+static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
+{
+	return (adev->mc.real_vram_size == adev->mc.visible_vram_size);
 }
 }
 
 
 /**
 /**
@@ -598,8 +758,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 		id->gws_size != job->gws_size ||
 		id->gws_size != job->gws_size ||
 		id->oa_base != job->oa_base ||
 		id->oa_base != job->oa_base ||
 		id->oa_size != job->oa_size);
 		id->oa_size != job->oa_size);
-	bool vm_flush_needed = job->vm_needs_flush ||
-		amdgpu_vm_ring_has_compute_vm_bug(ring);
+	bool vm_flush_needed = job->vm_needs_flush;
 	unsigned patch_offset = 0;
 	unsigned patch_offset = 0;
 	int r;
 	int r;
 
 
@@ -614,15 +773,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 	if (ring->funcs->init_cond_exec)
 	if (ring->funcs->init_cond_exec)
 		patch_offset = amdgpu_ring_init_cond_exec(ring);
 		patch_offset = amdgpu_ring_init_cond_exec(ring);
 
 
-	if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync)
-		amdgpu_ring_emit_pipeline_sync(ring);
-
 	if (ring->funcs->emit_vm_flush && vm_flush_needed) {
 	if (ring->funcs->emit_vm_flush && vm_flush_needed) {
-		u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
 		struct dma_fence *fence;
 		struct dma_fence *fence;
 
 
-		trace_amdgpu_vm_flush(ring, job->vm_id, pd_addr);
-		amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
+		trace_amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr);
+		amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr);
 
 
 		r = amdgpu_fence_emit(ring, &fence);
 		r = amdgpu_fence_emit(ring, &fence);
 		if (r)
 		if (r)
@@ -631,6 +786,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 		mutex_lock(&id_mgr->lock);
 		mutex_lock(&id_mgr->lock);
 		dma_fence_put(id->last_flush);
 		dma_fence_put(id->last_flush);
 		id->last_flush = fence;
 		id->last_flush = fence;
+		id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
 		mutex_unlock(&id_mgr->lock);
 		mutex_unlock(&id_mgr->lock);
 	}
 	}
 
 
@@ -805,6 +961,53 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
 	return result;
 	return result;
 }
 }
 
 
+/**
+ * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
+ *
+ * @params: see amdgpu_pte_update_params definition
+ * @pe: kmap addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: hw access flags
+ *
+ * Write count number of PT/PD entries directly.
+ */
+static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
+				   uint64_t pe, uint64_t addr,
+				   unsigned count, uint32_t incr,
+				   uint64_t flags)
+{
+	unsigned int i;
+	uint64_t value;
+
+	for (i = 0; i < count; i++) {
+		value = params->pages_addr ?
+			amdgpu_vm_map_gart(params->pages_addr, addr) :
+			addr;
+		amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
+					i, value, flags);
+		addr += incr;
+	}
+
+	/* Flush HDP */
+	mb();
+	amdgpu_gart_flush_gpu_tlb(params->adev, 0);
+}
+
+static int amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct amdgpu_bo *bo)
+{
+	struct amdgpu_sync sync;
+	int r;
+
+	amdgpu_sync_create(&sync);
+	amdgpu_sync_resv(adev, &sync, bo->tbo.resv, AMDGPU_FENCE_OWNER_VM);
+	r = amdgpu_sync_wait(&sync, true);
+	amdgpu_sync_free(&sync);
+
+	return r;
+}
+
 /*
 /*
  * amdgpu_vm_update_level - update a single level in the hierarchy
  * amdgpu_vm_update_level - update a single level in the hierarchy
  *
  *
@@ -821,11 +1024,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 				  unsigned level)
 				  unsigned level)
 {
 {
 	struct amdgpu_bo *shadow;
 	struct amdgpu_bo *shadow;
-	struct amdgpu_ring *ring;
-	uint64_t pd_addr, shadow_addr;
+	struct amdgpu_ring *ring = NULL;
+	uint64_t pd_addr, shadow_addr = 0;
 	uint32_t incr = amdgpu_vm_bo_size(adev, level + 1);
 	uint32_t incr = amdgpu_vm_bo_size(adev, level + 1);
 	uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
 	uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
-	unsigned count = 0, pt_idx, ndw;
+	unsigned count = 0, pt_idx, ndw = 0;
 	struct amdgpu_job *job;
 	struct amdgpu_job *job;
 	struct amdgpu_pte_update_params params;
 	struct amdgpu_pte_update_params params;
 	struct dma_fence *fence = NULL;
 	struct dma_fence *fence = NULL;
@@ -834,34 +1037,54 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 
 
 	if (!parent->entries)
 	if (!parent->entries)
 		return 0;
 		return 0;
-	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
 
 
-	/* padding, etc. */
-	ndw = 64;
+	memset(&params, 0, sizeof(params));
+	params.adev = adev;
+	shadow = parent->bo->shadow;
 
 
-	/* assume the worst case */
-	ndw += parent->last_entry_used * 6;
+	WARN_ON(vm->use_cpu_for_update && shadow);
+	if (vm->use_cpu_for_update && !shadow) {
+		r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr);
+		if (r)
+			return r;
+		r = amdgpu_vm_bo_wait(adev, parent->bo);
+		if (unlikely(r)) {
+			amdgpu_bo_kunmap(parent->bo);
+			return r;
+		}
+		params.func = amdgpu_vm_cpu_set_ptes;
+	} else {
+		if (shadow) {
+			r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
+			if (r)
+				return r;
+		}
+		ring = container_of(vm->entity.sched, struct amdgpu_ring,
+				    sched);
 
 
-	pd_addr = amdgpu_bo_gpu_offset(parent->bo);
+		/* padding, etc. */
+		ndw = 64;
 
 
-	shadow = parent->bo->shadow;
-	if (shadow) {
-		r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
+		/* assume the worst case */
+		ndw += parent->last_entry_used * 6;
+
+		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
+
+		if (shadow) {
+			shadow_addr = amdgpu_bo_gpu_offset(shadow);
+			ndw *= 2;
+		} else {
+			shadow_addr = 0;
+		}
+
+		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
 		if (r)
 		if (r)
 			return r;
 			return r;
-		shadow_addr = amdgpu_bo_gpu_offset(shadow);
-		ndw *= 2;
-	} else {
-		shadow_addr = 0;
-	}
 
 
-	r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
-	if (r)
-		return r;
+		params.ib = &job->ibs[0];
+		params.func = amdgpu_vm_do_set_ptes;
+	}
 
 
-	memset(&params, 0, sizeof(params));
-	params.adev = adev;
-	params.ib = &job->ibs[0];
 
 
 	/* walk over the address space and update the directory */
 	/* walk over the address space and update the directory */
 	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
 	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
@@ -881,6 +1104,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		}
 		}
 
 
 		pt = amdgpu_bo_gpu_offset(bo);
 		pt = amdgpu_bo_gpu_offset(bo);
+		pt = amdgpu_gart_get_vm_pde(adev, pt);
 		if (parent->entries[pt_idx].addr == pt)
 		if (parent->entries[pt_idx].addr == pt)
 			continue;
 			continue;
 
 
@@ -892,19 +1116,16 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		    (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
 		    (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
 
 
 			if (count) {
 			if (count) {
-				uint64_t pt_addr =
-					amdgpu_vm_adjust_mc_addr(adev, last_pt);
-
 				if (shadow)
 				if (shadow)
-					amdgpu_vm_do_set_ptes(&params,
-							      last_shadow,
-							      pt_addr, count,
-							      incr,
-							      AMDGPU_PTE_VALID);
-
-				amdgpu_vm_do_set_ptes(&params, last_pde,
-						      pt_addr, count, incr,
-						      AMDGPU_PTE_VALID);
+					params.func(&params,
+						    last_shadow,
+						    last_pt, count,
+						    incr,
+						    AMDGPU_PTE_VALID);
+
+				params.func(&params, last_pde,
+					    last_pt, count, incr,
+					    AMDGPU_PTE_VALID);
 			}
 			}
 
 
 			count = 1;
 			count = 1;
@@ -917,17 +1138,17 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 	}
 	}
 
 
 	if (count) {
 	if (count) {
-		uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt);
-
 		if (vm->root.bo->shadow)
 		if (vm->root.bo->shadow)
-			amdgpu_vm_do_set_ptes(&params, last_shadow, pt_addr,
-					      count, incr, AMDGPU_PTE_VALID);
+			params.func(&params, last_shadow, last_pt,
+				    count, incr, AMDGPU_PTE_VALID);
 
 
-		amdgpu_vm_do_set_ptes(&params, last_pde, pt_addr,
-				      count, incr, AMDGPU_PTE_VALID);
+		params.func(&params, last_pde, last_pt,
+			    count, incr, AMDGPU_PTE_VALID);
 	}
 	}
 
 
-	if (params.ib->length_dw == 0) {
+	if (params.func == amdgpu_vm_cpu_set_ptes)
+		amdgpu_bo_kunmap(parent->bo);
+	else if (params.ib->length_dw == 0) {
 		amdgpu_job_free(job);
 		amdgpu_job_free(job);
 	} else {
 	} else {
 		amdgpu_ring_pad_ib(ring, params.ib);
 		amdgpu_ring_pad_ib(ring, params.ib);
@@ -970,6 +1191,32 @@ error_free:
 	return r;
 	return r;
 }
 }
 
 
+/*
+ * amdgpu_vm_invalidate_level - mark all PD levels as invalid
+ *
+ * @parent: parent PD
+ *
+ * Mark all PD level as invalid after an error.
+ */
+static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
+{
+	unsigned pt_idx;
+
+	/*
+	 * Recurse into the subdirectories. This recursion is harmless because
+	 * we only have a maximum of 5 layers.
+	 */
+	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
+		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
+
+		if (!entry->bo)
+			continue;
+
+		entry->addr = ~0ULL;
+		amdgpu_vm_invalidate_level(entry);
+	}
+}
+
 /*
 /*
  * amdgpu_vm_update_directories - make sure that all directories are valid
  * amdgpu_vm_update_directories - make sure that all directories are valid
  *
  *
@@ -982,7 +1229,13 @@ error_free:
 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 				 struct amdgpu_vm *vm)
 				 struct amdgpu_vm *vm)
 {
 {
-	return amdgpu_vm_update_level(adev, vm, &vm->root, 0);
+	int r;
+
+	r = amdgpu_vm_update_level(adev, vm, &vm->root, 0);
+	if (r)
+		amdgpu_vm_invalidate_level(&vm->root);
+
+	return r;
 }
 }
 
 
 /**
 /**
@@ -1022,58 +1275,37 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
  * @flags: mapping flags
  * @flags: mapping flags
  *
  *
  * Update the page tables in the range @start - @end.
  * Update the page tables in the range @start - @end.
+ * Returns 0 for success, -EINVAL for failure.
  */
  */
-static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
+static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 				  uint64_t start, uint64_t end,
 				  uint64_t start, uint64_t end,
 				  uint64_t dst, uint64_t flags)
 				  uint64_t dst, uint64_t flags)
 {
 {
 	struct amdgpu_device *adev = params->adev;
 	struct amdgpu_device *adev = params->adev;
 	const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
 	const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
 
 
-	uint64_t cur_pe_start, cur_nptes, cur_dst;
-	uint64_t addr; /* next GPU address to be updated */
+	uint64_t addr, pe_start;
 	struct amdgpu_bo *pt;
 	struct amdgpu_bo *pt;
-	unsigned nptes; /* next number of ptes to be updated */
-	uint64_t next_pe_start;
-
-	/* initialize the variables */
-	addr = start;
-	pt = amdgpu_vm_get_pt(params, addr);
-	if (!pt) {
-		pr_err("PT not found, aborting update_ptes\n");
-		return;
-	}
-
-	if (params->shadow) {
-		if (!pt->shadow)
-			return;
-		pt = pt->shadow;
-	}
-	if ((addr & ~mask) == (end & ~mask))
-		nptes = end - addr;
-	else
-		nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
-
-	cur_pe_start = amdgpu_bo_gpu_offset(pt);
-	cur_pe_start += (addr & mask) * 8;
-	cur_nptes = nptes;
-	cur_dst = dst;
+	unsigned nptes;
+	int r;
+	bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);
 
 
-	/* for next ptb*/
-	addr += nptes;
-	dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 
 
 	/* walk over the address space and update the page tables */
 	/* walk over the address space and update the page tables */
-	while (addr < end) {
+	for (addr = start; addr < end; addr += nptes) {
 		pt = amdgpu_vm_get_pt(params, addr);
 		pt = amdgpu_vm_get_pt(params, addr);
 		if (!pt) {
 		if (!pt) {
 			pr_err("PT not found, aborting update_ptes\n");
 			pr_err("PT not found, aborting update_ptes\n");
-			return;
+			return -EINVAL;
 		}
 		}
 
 
 		if (params->shadow) {
 		if (params->shadow) {
+			if (WARN_ONCE(use_cpu_update,
+				"CPU VM update doesn't suuport shadow pages"))
+				return 0;
+
 			if (!pt->shadow)
 			if (!pt->shadow)
-				return;
+				return 0;
 			pt = pt->shadow;
 			pt = pt->shadow;
 		}
 		}
 
 
@@ -1082,32 +1314,25 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		else
 		else
 			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
 			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
 
 
-		next_pe_start = amdgpu_bo_gpu_offset(pt);
-		next_pe_start += (addr & mask) * 8;
+		if (use_cpu_update) {
+			r = amdgpu_bo_kmap(pt, (void *)&pe_start);
+			if (r)
+				return r;
+		} else
+			pe_start = amdgpu_bo_gpu_offset(pt);
 
 
-		if ((cur_pe_start + 8 * cur_nptes) == next_pe_start &&
-		    ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) {
-			/* The next ptb is consecutive to current ptb.
-			 * Don't call the update function now.
-			 * Will update two ptbs together in future.
-			*/
-			cur_nptes += nptes;
-		} else {
-			params->func(params, cur_pe_start, cur_dst, cur_nptes,
-				     AMDGPU_GPU_PAGE_SIZE, flags);
+		pe_start += (addr & mask) * 8;
 
 
-			cur_pe_start = next_pe_start;
-			cur_nptes = nptes;
-			cur_dst = dst;
-		}
+		params->func(params, pe_start, dst, nptes,
+			     AMDGPU_GPU_PAGE_SIZE, flags);
 
 
-		/* for next ptb*/
-		addr += nptes;
 		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
+
+		if (use_cpu_update)
+			amdgpu_bo_kunmap(pt);
 	}
 	}
 
 
-	params->func(params, cur_pe_start, cur_dst, cur_nptes,
-		     AMDGPU_GPU_PAGE_SIZE, flags);
+	return 0;
 }
 }
 
 
 /*
 /*
@@ -1119,11 +1344,14 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
  * @end: last PTE to handle
  * @end: last PTE to handle
  * @dst: addr those PTEs should point to
  * @dst: addr those PTEs should point to
  * @flags: hw mapping flags
  * @flags: hw mapping flags
+ * Returns 0 for success, -EINVAL for failure.
  */
  */
-static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
+static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 				uint64_t start, uint64_t end,
 				uint64_t start, uint64_t end,
 				uint64_t dst, uint64_t flags)
 				uint64_t dst, uint64_t flags)
 {
 {
+	int r;
+
 	/**
 	/**
 	 * The MC L1 TLB supports variable sized pages, based on a fragment
 	 * The MC L1 TLB supports variable sized pages, based on a fragment
 	 * field in the PTE. When this field is set to a non-zero value, page
 	 * field in the PTE. When this field is set to a non-zero value, page
@@ -1152,28 +1380,30 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 
 
 	/* system pages are non continuously */
 	/* system pages are non continuously */
 	if (params->src || !(flags & AMDGPU_PTE_VALID) ||
 	if (params->src || !(flags & AMDGPU_PTE_VALID) ||
-	    (frag_start >= frag_end)) {
-
-		amdgpu_vm_update_ptes(params, start, end, dst, flags);
-		return;
-	}
+	    (frag_start >= frag_end))
+		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
 
 
 	/* handle the 4K area at the beginning */
 	/* handle the 4K area at the beginning */
 	if (start != frag_start) {
 	if (start != frag_start) {
-		amdgpu_vm_update_ptes(params, start, frag_start,
-				      dst, flags);
+		r = amdgpu_vm_update_ptes(params, start, frag_start,
+					  dst, flags);
+		if (r)
+			return r;
 		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
 		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
 	}
 	}
 
 
 	/* handle the area in the middle */
 	/* handle the area in the middle */
-	amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
-			      flags | frag_flags);
+	r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
+				  flags | frag_flags);
+	if (r)
+		return r;
 
 
 	/* handle the 4K area at the end */
 	/* handle the 4K area at the end */
 	if (frag_end != end) {
 	if (frag_end != end) {
 		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
 		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
-		amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
+		r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
 	}
 	}
+	return r;
 }
 }
 
 
 /**
 /**
@@ -1215,6 +1445,25 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	params.vm = vm;
 	params.vm = vm;
 	params.src = src;
 	params.src = src;
 
 
+	if (vm->use_cpu_for_update) {
+		/* params.src is used as flag to indicate system Memory */
+		if (pages_addr)
+			params.src = ~0;
+
+		/* Wait for PT BOs to be free. PTs share the same resv. object
+		 * as the root PD BO
+		 */
+		r = amdgpu_vm_bo_wait(adev, vm->root.bo);
+		if (unlikely(r))
+			return r;
+
+		params.func = amdgpu_vm_cpu_set_ptes;
+		params.pages_addr = pages_addr;
+		params.shadow = false;
+		return amdgpu_vm_frag_ptes(&params, start, last + 1,
+					   addr, flags);
+	}
+
 	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
 	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
 
 
 	/* sync to everything on unmapping */
 	/* sync to everything on unmapping */
@@ -1294,9 +1543,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		goto error_free;
 		goto error_free;
 
 
 	params.shadow = true;
 	params.shadow = true;
-	amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
+	r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
+	if (r)
+		goto error_free;
 	params.shadow = false;
 	params.shadow = false;
-	amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
+	r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
+	if (r)
+		goto error_free;
 
 
 	amdgpu_ring_pad_ib(ring, params.ib);
 	amdgpu_ring_pad_ib(ring, params.ib);
 	WARN_ON(params.ib->length_dw > ndw);
 	WARN_ON(params.ib->length_dw > ndw);
@@ -2137,20 +2390,25 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size)
  *
  *
  * @adev: amdgpu_device pointer
  * @adev: amdgpu_device pointer
  * @vm: requested vm
  * @vm: requested vm
+ * @vm_context: Indicates if it GFX or Compute context
  *
  *
  * Init @vm fields.
  * Init @vm fields.
  */
  */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		   int vm_context)
 {
 {
 	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
 	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
 		AMDGPU_VM_PTE_COUNT(adev) * 8);
 		AMDGPU_VM_PTE_COUNT(adev) * 8);
 	unsigned ring_instance;
 	unsigned ring_instance;
 	struct amdgpu_ring *ring;
 	struct amdgpu_ring *ring;
 	struct amd_sched_rq *rq;
 	struct amd_sched_rq *rq;
-	int r;
+	int r, i;
+	u64 flags;
 
 
 	vm->va = RB_ROOT;
 	vm->va = RB_ROOT;
 	vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
 	vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
+	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
+		vm->reserved_vmid[i] = NULL;
 	spin_lock_init(&vm->status_lock);
 	spin_lock_init(&vm->status_lock);
 	INIT_LIST_HEAD(&vm->invalidated);
 	INIT_LIST_HEAD(&vm->invalidated);
 	INIT_LIST_HEAD(&vm->cleared);
 	INIT_LIST_HEAD(&vm->cleared);
@@ -2167,14 +2425,29 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
+	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
+		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
+						AMDGPU_VM_USE_CPU_FOR_COMPUTE);
+	else
+		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
+						AMDGPU_VM_USE_CPU_FOR_GFX);
+	DRM_DEBUG_DRIVER("VM update mode is %s\n",
+			 vm->use_cpu_for_update ? "CPU" : "SDMA");
+	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
+		  "CPU update of VM recommended only for large BAR system\n");
 	vm->last_dir_update = NULL;
 	vm->last_dir_update = NULL;
 
 
+	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+			AMDGPU_GEM_CREATE_VRAM_CLEARED;
+	if (vm->use_cpu_for_update)
+		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+	else
+		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
+				AMDGPU_GEM_CREATE_SHADOW);
+
 	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
 	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
 			     AMDGPU_GEM_DOMAIN_VRAM,
 			     AMDGPU_GEM_DOMAIN_VRAM,
-			     AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
-			     AMDGPU_GEM_CREATE_SHADOW |
-			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
-			     AMDGPU_GEM_CREATE_VRAM_CLEARED,
+			     flags,
 			     NULL, NULL, &vm->root.bo);
 			     NULL, NULL, &vm->root.bo);
 	if (r)
 	if (r)
 		goto error_free_sched_entity;
 		goto error_free_sched_entity;
@@ -2219,7 +2492,7 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
 		for (i = 0; i <= level->last_entry_used; i++)
 		for (i = 0; i <= level->last_entry_used; i++)
 			amdgpu_vm_free_levels(&level->entries[i]);
 			amdgpu_vm_free_levels(&level->entries[i]);
 
 
-	drm_free_large(level->entries);
+	kvfree(level->entries);
 }
 }
 
 
 /**
 /**
@@ -2235,6 +2508,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
 {
 	struct amdgpu_bo_va_mapping *mapping, *tmp;
 	struct amdgpu_bo_va_mapping *mapping, *tmp;
 	bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
 	bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
+	int i;
 
 
 	amd_sched_entity_fini(vm->entity.sched, &vm->entity);
 	amd_sched_entity_fini(vm->entity.sched, &vm->entity);
 
 
@@ -2258,6 +2532,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
 
 	amdgpu_vm_free_levels(&vm->root);
 	amdgpu_vm_free_levels(&vm->root);
 	dma_fence_put(vm->last_dir_update);
 	dma_fence_put(vm->last_dir_update);
+	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
+		amdgpu_vm_free_reserved_vmid(adev, vm, i);
 }
 }
 
 
 /**
 /**
@@ -2277,6 +2553,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 
 
 		mutex_init(&id_mgr->lock);
 		mutex_init(&id_mgr->lock);
 		INIT_LIST_HEAD(&id_mgr->ids_lru);
 		INIT_LIST_HEAD(&id_mgr->ids_lru);
+		atomic_set(&id_mgr->reserved_vmid_num, 0);
 
 
 		/* skip over VMID 0, since it is the system VM */
 		/* skip over VMID 0, since it is the system VM */
 		for (j = 1; j < id_mgr->num_ids; ++j) {
 		for (j = 1; j < id_mgr->num_ids; ++j) {
@@ -2295,6 +2572,23 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 	atomic64_set(&adev->vm_manager.client_counter, 0);
 	atomic64_set(&adev->vm_manager.client_counter, 0);
 	spin_lock_init(&adev->vm_manager.prt_lock);
 	spin_lock_init(&adev->vm_manager.prt_lock);
 	atomic_set(&adev->vm_manager.num_prt_users, 0);
 	atomic_set(&adev->vm_manager.num_prt_users, 0);
+
+	/* If not overridden by the user, by default, only in large BAR systems
+	 * Compute VM tables will be updated by CPU
+	 */
+#ifdef CONFIG_X86_64
+	if (amdgpu_vm_update_mode == -1) {
+		if (amdgpu_vm_is_large_bar(adev))
+			adev->vm_manager.vm_update_mode =
+				AMDGPU_VM_USE_CPU_FOR_COMPUTE;
+		else
+			adev->vm_manager.vm_update_mode = 0;
+	} else
+		adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode;
+#else
+	adev->vm_manager.vm_update_mode = 0;
+#endif
+
 }
 }
 
 
 /**
 /**
@@ -2322,3 +2616,28 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 		}
 		}
 	}
 	}
 }
 }
+
+int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+	union drm_amdgpu_vm *args = data;
+	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	int r;
+
+	switch (args->in.op) {
+	case AMDGPU_VM_OP_RESERVE_VMID:
+		/* current, we only have requirement to reserve vmid from gfxhub */
+		r = amdgpu_vm_alloc_reserved_vmid(adev, &fpriv->vm,
+						  AMDGPU_GFXHUB);
+		if (r)
+			return r;
+		break;
+	case AMDGPU_VM_OP_UNRESERVE_VMID:
+		amdgpu_vm_free_reserved_vmid(adev, &fpriv->vm, AMDGPU_GFXHUB);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}

+ 28 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

@@ -84,6 +84,16 @@ struct amdgpu_bo_list_entry;
 
 
 /* hardcode that limit for now */
 /* hardcode that limit for now */
 #define AMDGPU_VA_RESERVED_SIZE			(8 << 20)
 #define AMDGPU_VA_RESERVED_SIZE			(8 << 20)
+/* max vmids dedicated for process */
+#define AMDGPU_VM_MAX_RESERVED_VMID	1
+
+#define AMDGPU_VM_CONTEXT_GFX 0
+#define AMDGPU_VM_CONTEXT_COMPUTE 1
+
+/* See vm_update_mode */
+#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
+#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
+
 
 
 struct amdgpu_vm_pt {
 struct amdgpu_vm_pt {
 	struct amdgpu_bo	*bo;
 	struct amdgpu_bo	*bo;
@@ -123,8 +133,13 @@ struct amdgpu_vm {
 
 
 	/* client id */
 	/* client id */
 	u64                     client_id;
 	u64                     client_id;
+	/* dedicated to vm */
+	struct amdgpu_vm_id	*reserved_vmid[AMDGPU_MAX_VMHUBS];
 	/* each VM will map on CSA */
 	/* each VM will map on CSA */
 	struct amdgpu_bo_va *csa_bo_va;
 	struct amdgpu_bo_va *csa_bo_va;
+
+	/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
+	bool                    use_cpu_for_update;
 };
 };
 
 
 struct amdgpu_vm_id {
 struct amdgpu_vm_id {
@@ -152,6 +167,7 @@ struct amdgpu_vm_id_manager {
 	unsigned		num_ids;
 	unsigned		num_ids;
 	struct list_head	ids_lru;
 	struct list_head	ids_lru;
 	struct amdgpu_vm_id	ids[AMDGPU_NUM_VM];
 	struct amdgpu_vm_id	ids[AMDGPU_NUM_VM];
+	atomic_t		reserved_vmid_num;
 };
 };
 
 
 struct amdgpu_vm_manager {
 struct amdgpu_vm_manager {
@@ -168,8 +184,6 @@ struct amdgpu_vm_manager {
 	uint32_t				block_size;
 	uint32_t				block_size;
 	/* vram base address for page table entry  */
 	/* vram base address for page table entry  */
 	u64					vram_base_offset;
 	u64					vram_base_offset;
-	/* is vm enabled? */
-	bool					enabled;
 	/* vm pte handling */
 	/* vm pte handling */
 	const struct amdgpu_vm_pte_funcs        *vm_pte_funcs;
 	const struct amdgpu_vm_pte_funcs        *vm_pte_funcs;
 	struct amdgpu_ring                      *vm_pte_rings[AMDGPU_MAX_RINGS];
 	struct amdgpu_ring                      *vm_pte_rings[AMDGPU_MAX_RINGS];
@@ -181,11 +195,18 @@ struct amdgpu_vm_manager {
 	/* partial resident texture handling */
 	/* partial resident texture handling */
 	spinlock_t				prt_lock;
 	spinlock_t				prt_lock;
 	atomic_t				num_prt_users;
 	atomic_t				num_prt_users;
+
+	/* controls how VM page tables are updated for Graphics and Compute.
+	 * BIT0[= 0] Graphics updated by SDMA [= 1] by CPU
+	 * BIT1[= 0] Compute updated by SDMA [= 1] by CPU
+	 */
+	int					vm_update_mode;
 };
 };
 
 
 void amdgpu_vm_manager_init(struct amdgpu_device *adev);
 void amdgpu_vm_manager_init(struct amdgpu_device *adev);
 void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
 void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		   int vm_context);
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
 			 struct list_head *validated,
@@ -239,5 +260,9 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 		      struct amdgpu_bo_va *bo_va);
 		      struct amdgpu_bo_va *bo_va);
 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size);
 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size);
+int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
+				  struct amdgpu_job *job);
+void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
 
 
 #endif
 #endif

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/ci_dpm.c

@@ -22,7 +22,7 @@
  */
  */
 
 
 #include <linux/firmware.h>
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_ucode.h"

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/ci_smc.c

@@ -23,7 +23,7 @@
  */
  */
 
 
 #include <linux/firmware.h>
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "cikd.h"
 #include "cikd.h"
 #include "ppsmc.h"
 #include "ppsmc.h"

+ 61 - 62
drivers/gpu/drm/amd/amdgpu/cik.c

@@ -24,7 +24,7 @@
 #include <linux/firmware.h>
 #include <linux/firmware.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/module.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_atombios.h"
 #include "amdgpu_atombios.h"
 #include "amdgpu_ih.h"
 #include "amdgpu_ih.h"
@@ -964,62 +964,62 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev,
 }
 }
 
 
 static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = {
 static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = {
-	{mmGRBM_STATUS, false},
-	{mmGB_ADDR_CONFIG, false},
-	{mmMC_ARB_RAMCFG, false},
-	{mmGB_TILE_MODE0, false},
-	{mmGB_TILE_MODE1, false},
-	{mmGB_TILE_MODE2, false},
-	{mmGB_TILE_MODE3, false},
-	{mmGB_TILE_MODE4, false},
-	{mmGB_TILE_MODE5, false},
-	{mmGB_TILE_MODE6, false},
-	{mmGB_TILE_MODE7, false},
-	{mmGB_TILE_MODE8, false},
-	{mmGB_TILE_MODE9, false},
-	{mmGB_TILE_MODE10, false},
-	{mmGB_TILE_MODE11, false},
-	{mmGB_TILE_MODE12, false},
-	{mmGB_TILE_MODE13, false},
-	{mmGB_TILE_MODE14, false},
-	{mmGB_TILE_MODE15, false},
-	{mmGB_TILE_MODE16, false},
-	{mmGB_TILE_MODE17, false},
-	{mmGB_TILE_MODE18, false},
-	{mmGB_TILE_MODE19, false},
-	{mmGB_TILE_MODE20, false},
-	{mmGB_TILE_MODE21, false},
-	{mmGB_TILE_MODE22, false},
-	{mmGB_TILE_MODE23, false},
-	{mmGB_TILE_MODE24, false},
-	{mmGB_TILE_MODE25, false},
-	{mmGB_TILE_MODE26, false},
-	{mmGB_TILE_MODE27, false},
-	{mmGB_TILE_MODE28, false},
-	{mmGB_TILE_MODE29, false},
-	{mmGB_TILE_MODE30, false},
-	{mmGB_TILE_MODE31, false},
-	{mmGB_MACROTILE_MODE0, false},
-	{mmGB_MACROTILE_MODE1, false},
-	{mmGB_MACROTILE_MODE2, false},
-	{mmGB_MACROTILE_MODE3, false},
-	{mmGB_MACROTILE_MODE4, false},
-	{mmGB_MACROTILE_MODE5, false},
-	{mmGB_MACROTILE_MODE6, false},
-	{mmGB_MACROTILE_MODE7, false},
-	{mmGB_MACROTILE_MODE8, false},
-	{mmGB_MACROTILE_MODE9, false},
-	{mmGB_MACROTILE_MODE10, false},
-	{mmGB_MACROTILE_MODE11, false},
-	{mmGB_MACROTILE_MODE12, false},
-	{mmGB_MACROTILE_MODE13, false},
-	{mmGB_MACROTILE_MODE14, false},
-	{mmGB_MACROTILE_MODE15, false},
-	{mmCC_RB_BACKEND_DISABLE, false, true},
-	{mmGC_USER_RB_BACKEND_DISABLE, false, true},
-	{mmGB_BACKEND_MAP, false, false},
-	{mmPA_SC_RASTER_CONFIG, false, true},
-	{mmPA_SC_RASTER_CONFIG_1, false, true},
+	{mmGRBM_STATUS},
+	{mmGB_ADDR_CONFIG},
+	{mmMC_ARB_RAMCFG},
+	{mmGB_TILE_MODE0},
+	{mmGB_TILE_MODE1},
+	{mmGB_TILE_MODE2},
+	{mmGB_TILE_MODE3},
+	{mmGB_TILE_MODE4},
+	{mmGB_TILE_MODE5},
+	{mmGB_TILE_MODE6},
+	{mmGB_TILE_MODE7},
+	{mmGB_TILE_MODE8},
+	{mmGB_TILE_MODE9},
+	{mmGB_TILE_MODE10},
+	{mmGB_TILE_MODE11},
+	{mmGB_TILE_MODE12},
+	{mmGB_TILE_MODE13},
+	{mmGB_TILE_MODE14},
+	{mmGB_TILE_MODE15},
+	{mmGB_TILE_MODE16},
+	{mmGB_TILE_MODE17},
+	{mmGB_TILE_MODE18},
+	{mmGB_TILE_MODE19},
+	{mmGB_TILE_MODE20},
+	{mmGB_TILE_MODE21},
+	{mmGB_TILE_MODE22},
+	{mmGB_TILE_MODE23},
+	{mmGB_TILE_MODE24},
+	{mmGB_TILE_MODE25},
+	{mmGB_TILE_MODE26},
+	{mmGB_TILE_MODE27},
+	{mmGB_TILE_MODE28},
+	{mmGB_TILE_MODE29},
+	{mmGB_TILE_MODE30},
+	{mmGB_TILE_MODE31},
+	{mmGB_MACROTILE_MODE0},
+	{mmGB_MACROTILE_MODE1},
+	{mmGB_MACROTILE_MODE2},
+	{mmGB_MACROTILE_MODE3},
+	{mmGB_MACROTILE_MODE4},
+	{mmGB_MACROTILE_MODE5},
+	{mmGB_MACROTILE_MODE6},
+	{mmGB_MACROTILE_MODE7},
+	{mmGB_MACROTILE_MODE8},
+	{mmGB_MACROTILE_MODE9},
+	{mmGB_MACROTILE_MODE10},
+	{mmGB_MACROTILE_MODE11},
+	{mmGB_MACROTILE_MODE12},
+	{mmGB_MACROTILE_MODE13},
+	{mmGB_MACROTILE_MODE14},
+	{mmGB_MACROTILE_MODE15},
+	{mmCC_RB_BACKEND_DISABLE, true},
+	{mmGC_USER_RB_BACKEND_DISABLE, true},
+	{mmGB_BACKEND_MAP, false},
+	{mmPA_SC_RASTER_CONFIG, true},
+	{mmPA_SC_RASTER_CONFIG_1, true},
 };
 };
 
 
 static uint32_t cik_read_indexed_register(struct amdgpu_device *adev,
 static uint32_t cik_read_indexed_register(struct amdgpu_device *adev,
@@ -1050,11 +1050,10 @@ static int cik_read_register(struct amdgpu_device *adev, u32 se_num,
 		if (reg_offset != cik_allowed_read_registers[i].reg_offset)
 		if (reg_offset != cik_allowed_read_registers[i].reg_offset)
 			continue;
 			continue;
 
 
-		if (!cik_allowed_read_registers[i].untouched)
-			*value = cik_allowed_read_registers[i].grbm_indexed ?
-				 cik_read_indexed_register(adev, se_num,
-							   sh_num, reg_offset) :
-				 RREG32(reg_offset);
+		*value = cik_allowed_read_registers[i].grbm_indexed ?
+			 cik_read_indexed_register(adev, se_num,
+						   sh_num, reg_offset) :
+			 RREG32(reg_offset);
 		return 0;
 		return 0;
 	}
 	}
 	return -EINVAL;
 	return -EINVAL;

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/cik_ih.c

@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  *
  */
  */
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "amdgpu_ih.h"
 #include "cikd.h"
 #include "cikd.h"

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/cz_ih.c

@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  *
  */
  */
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "amdgpu_ih.h"
 #include "vid.h"
 #include "vid.h"

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/dce_v10_0.c

@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  *
  */
  */
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_i2c.h"
 #include "amdgpu_i2c.h"

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/dce_v11_0.c

@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  *
  */
  */
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_i2c.h"
 #include "amdgpu_i2c.h"

+ 551 - 49
drivers/gpu/drm/amd/amdgpu/dce_v6_0.c

@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  *
  */
  */
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_i2c.h"
 #include "amdgpu_i2c.h"
@@ -118,14 +118,27 @@ static const struct {
 static u32 dce_v6_0_audio_endpt_rreg(struct amdgpu_device *adev,
 static u32 dce_v6_0_audio_endpt_rreg(struct amdgpu_device *adev,
 				     u32 block_offset, u32 reg)
 				     u32 block_offset, u32 reg)
 {
 {
-	DRM_INFO("xxxx: dce_v6_0_audio_endpt_rreg ----no impl!!!!\n");
-	return 0;
+	unsigned long flags;
+	u32 r;
+
+	spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+	WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
+	r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset);
+	spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+
+	return r;
 }
 }
 
 
 static void dce_v6_0_audio_endpt_wreg(struct amdgpu_device *adev,
 static void dce_v6_0_audio_endpt_wreg(struct amdgpu_device *adev,
 				      u32 block_offset, u32 reg, u32 v)
 				      u32 block_offset, u32 reg, u32 v)
 {
 {
-	DRM_INFO("xxxx: dce_v6_0_audio_endpt_wreg ----no impl!!!!\n");
+	unsigned long flags;
+
+	spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+	WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset,
+		reg | AZALIA_F0_CODEC_ENDPOINT_INDEX__AZALIA_ENDPOINT_REG_WRITE_EN_MASK);
+	WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v);
+	spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
 }
 }
 
 
 static bool dce_v6_0_is_in_vblank(struct amdgpu_device *adev, int crtc)
 static bool dce_v6_0_is_in_vblank(struct amdgpu_device *adev, int crtc)
@@ -501,21 +514,16 @@ static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev,
 
 
 static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev)
 static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev)
 {
 {
-	int num_crtc = 0;
-
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
 	case CHIP_TAHITI:
 	case CHIP_TAHITI:
 	case CHIP_PITCAIRN:
 	case CHIP_PITCAIRN:
 	case CHIP_VERDE:
 	case CHIP_VERDE:
-		num_crtc = 6;
-		break;
+		return 6;
 	case CHIP_OLAND:
 	case CHIP_OLAND:
-		num_crtc = 2;
-		break;
+		return 2;
 	default:
 	default:
-		num_crtc = 0;
+		return 0;
 	}
 	}
-	return num_crtc;
 }
 }
 
 
 void dce_v6_0_disable_dce(struct amdgpu_device *adev)
 void dce_v6_0_disable_dce(struct amdgpu_device *adev)
@@ -1225,17 +1233,17 @@ static void dce_v6_0_bandwidth_update(struct amdgpu_device *adev)
 		dce_v6_0_program_watermarks(adev, adev->mode_info.crtcs[i+1], lb_size, num_heads);
 		dce_v6_0_program_watermarks(adev, adev->mode_info.crtcs[i+1], lb_size, num_heads);
 	}
 	}
 }
 }
-/*
+
 static void dce_v6_0_audio_get_connected_pins(struct amdgpu_device *adev)
 static void dce_v6_0_audio_get_connected_pins(struct amdgpu_device *adev)
 {
 {
 	int i;
 	int i;
-	u32 offset, tmp;
+	u32 tmp;
 
 
 	for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
 	for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
-		offset = adev->mode_info.audio.pin[i].offset;
-		tmp = RREG32_AUDIO_ENDPT(offset,
-				      AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT);
-		if (((tmp & PORT_CONNECTIVITY_MASK) >> PORT_CONNECTIVITY_SHIFT) == 1)
+		tmp = RREG32_AUDIO_ENDPT(adev->mode_info.audio.pin[i].offset,
+				ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT);
+		if (REG_GET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT,
+					PORT_CONNECTIVITY))
 			adev->mode_info.audio.pin[i].connected = false;
 			adev->mode_info.audio.pin[i].connected = false;
 		else
 		else
 			adev->mode_info.audio.pin[i].connected = true;
 			adev->mode_info.audio.pin[i].connected = true;
@@ -1257,45 +1265,206 @@ static struct amdgpu_audio_pin *dce_v6_0_audio_get_pin(struct amdgpu_device *ade
 	return NULL;
 	return NULL;
 }
 }
 
 
-static void dce_v6_0_afmt_audio_select_pin(struct drm_encoder *encoder)
+static void dce_v6_0_audio_select_pin(struct drm_encoder *encoder)
 {
 {
 	struct amdgpu_device *adev = encoder->dev->dev_private;
 	struct amdgpu_device *adev = encoder->dev->dev_private;
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
 	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
-	u32 offset;
 
 
 	if (!dig || !dig->afmt || !dig->afmt->pin)
 	if (!dig || !dig->afmt || !dig->afmt->pin)
 		return;
 		return;
 
 
-	offset = dig->afmt->offset;
-
-	WREG32(AFMT_AUDIO_SRC_CONTROL + offset,
-	       AFMT_AUDIO_SRC_SELECT(dig->afmt->pin->id));
-
+	WREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset,
+	       REG_SET_FIELD(0, AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT,
+		             dig->afmt->pin->id));
 }
 }
 
 
 static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder,
 static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder,
 						struct drm_display_mode *mode)
 						struct drm_display_mode *mode)
 {
 {
-	DRM_INFO("xxxx: dce_v6_0_audio_write_latency_fields---no imp!!!!!\n");
+	struct amdgpu_device *adev = encoder->dev->dev_private;
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct drm_connector *connector;
+	struct amdgpu_connector *amdgpu_connector = NULL;
+	int interlace = 0;
+	u32 tmp;
+
+	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder) {
+			amdgpu_connector = to_amdgpu_connector(connector);
+			break;
+		}
+	}
+
+	if (!amdgpu_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		interlace = 1;
+
+	if (connector->latency_present[interlace]) {
+		tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+				VIDEO_LIPSYNC, connector->video_latency[interlace]);
+		tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+				AUDIO_LIPSYNC, connector->audio_latency[interlace]);
+	} else {
+		tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+				VIDEO_LIPSYNC, 0);
+		tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+				AUDIO_LIPSYNC, 0);
+	}
+	WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
+			   ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp);
 }
 }
 
 
 static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
 static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
 {
 {
-	DRM_INFO("xxxx: dce_v6_0_audio_write_speaker_allocation---no imp!!!!!\n");
+	struct amdgpu_device *adev = encoder->dev->dev_private;
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct drm_connector *connector;
+	struct amdgpu_connector *amdgpu_connector = NULL;
+	u8 *sadb = NULL;
+	int sad_count;
+	u32 tmp;
+
+	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder) {
+			amdgpu_connector = to_amdgpu_connector(connector);
+			break;
+		}
+	}
+
+	if (!amdgpu_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb);
+	if (sad_count < 0) {
+		DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
+		sad_count = 0;
+	}
+
+	/* program the speaker allocation */
+	tmp = RREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
+			ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER);
+	tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+			HDMI_CONNECTION, 0);
+	tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+			DP_CONNECTION, 0);
+
+	if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort)
+		tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+				DP_CONNECTION, 1);
+	else
+		tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+				HDMI_CONNECTION, 1);
+
+	if (sad_count)
+		tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+				SPEAKER_ALLOCATION, sadb[0]);
+	else
+		tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+				SPEAKER_ALLOCATION, 5); /* stereo */
+
+	WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
+			ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp);
+
+	kfree(sadb);
 }
 }
 
 
 static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
 static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
 {
 {
-	DRM_INFO("xxxx: dce_v6_0_audio_write_sad_regs---no imp!!!!!\n");
+	struct amdgpu_device *adev = encoder->dev->dev_private;
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct drm_connector *connector;
+	struct amdgpu_connector *amdgpu_connector = NULL;
+	struct cea_sad *sads;
+	int i, sad_count;
+
+	static const u16 eld_reg_to_type[][2] = {
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
+	};
+
+	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder) {
+			amdgpu_connector = to_amdgpu_connector(connector);
+			break;
+		}
+	}
+
+	if (!amdgpu_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
+	if (sad_count <= 0) {
+		DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+		return;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
+		u32 tmp = 0;
+		u8 stereo_freqs = 0;
+		int max_channels = -1;
+		int j;
+
+		for (j = 0; j < sad_count; j++) {
+			struct cea_sad *sad = &sads[j];
+
+			if (sad->format == eld_reg_to_type[i][1]) {
+				if (sad->channels > max_channels) {
+					tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+							MAX_CHANNELS, sad->channels);
+					tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+							DESCRIPTOR_BYTE_2, sad->byte2);
+					tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+							SUPPORTED_FREQUENCIES, sad->freq);
+					max_channels = sad->channels;
+				}
+
+				if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM)
+					stereo_freqs |= sad->freq;
+				else
+					break;
+			}
+		}
+
+		tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+				SUPPORTED_FREQUENCIES_STEREO, stereo_freqs);
+		WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp);
+	}
+
+	kfree(sads);
 
 
 }
 }
-*/
+
 static void dce_v6_0_audio_enable(struct amdgpu_device *adev,
 static void dce_v6_0_audio_enable(struct amdgpu_device *adev,
 				  struct amdgpu_audio_pin *pin,
 				  struct amdgpu_audio_pin *pin,
 				  bool enable)
 				  bool enable)
 {
 {
-	DRM_INFO("xxxx: dce_v6_0_audio_enable---no imp!!!!!\n");
+	if (!pin)
+		return;
+
+	WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL,
+			enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
 }
 }
 
 
 static const u32 pin_offsets[7] =
 static const u32 pin_offsets[7] =
@@ -1311,42 +1480,372 @@ static const u32 pin_offsets[7] =
 
 
 static int dce_v6_0_audio_init(struct amdgpu_device *adev)
 static int dce_v6_0_audio_init(struct amdgpu_device *adev)
 {
 {
+	int i;
+
+	if (!amdgpu_audio)
+		return 0;
+
+	adev->mode_info.audio.enabled = true;
+
+	switch (adev->asic_type) {
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+	case CHIP_VERDE:
+	default:
+		adev->mode_info.audio.num_pins = 6;
+		break;
+	case CHIP_OLAND:
+		adev->mode_info.audio.num_pins = 2;
+		break;
+	}
+
+	for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+		adev->mode_info.audio.pin[i].channels = -1;
+		adev->mode_info.audio.pin[i].rate = -1;
+		adev->mode_info.audio.pin[i].bits_per_sample = -1;
+		adev->mode_info.audio.pin[i].status_bits = 0;
+		adev->mode_info.audio.pin[i].category_code = 0;
+		adev->mode_info.audio.pin[i].connected = false;
+		adev->mode_info.audio.pin[i].offset = pin_offsets[i];
+		adev->mode_info.audio.pin[i].id = i;
+		dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+	}
+
 	return 0;
 	return 0;
 }
 }
 
 
 static void dce_v6_0_audio_fini(struct amdgpu_device *adev)
 static void dce_v6_0_audio_fini(struct amdgpu_device *adev)
 {
 {
+	int i;
 
 
+	if (!amdgpu_audio)
+		return;
+
+	if (!adev->mode_info.audio.enabled)
+		return;
+
+	for (i = 0; i < adev->mode_info.audio.num_pins; i++)
+		dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+
+	adev->mode_info.audio.enabled = false;
 }
 }
 
 
-/*
-static void dce_v6_0_afmt_update_ACR(struct drm_encoder *encoder, uint32_t clock)
+static void dce_v6_0_audio_set_vbi_packet(struct drm_encoder *encoder)
 {
 {
-	DRM_INFO("xxxx: dce_v6_0_afmt_update_ACR---no imp!!!!!\n");
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	u32 tmp;
+
+	tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1);
+	tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, 1);
+	tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, 1);
+	WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp);
 }
 }
-*/
-/*
- * build a HDMI Video Info Frame
- */
-/*
-static void dce_v6_0_afmt_update_avi_infoframe(struct drm_encoder *encoder,
-					       void *buffer, size_t size)
+
+static void dce_v6_0_audio_set_acr(struct drm_encoder *encoder,
+				   uint32_t clock, int bpc)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	u32 tmp;
+
+	tmp = RREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, 1);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE,
+			bpc > 8 ? 0 : 1);
+	WREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmHDMI_ACR_32_0 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_0, HDMI_ACR_CTS_32, acr.cts_32khz);
+	WREG32(mmHDMI_ACR_32_0 + dig->afmt->offset, tmp);
+	tmp = RREG32(mmHDMI_ACR_32_1 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_1, HDMI_ACR_N_32, acr.n_32khz);
+	WREG32(mmHDMI_ACR_32_1 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmHDMI_ACR_44_0 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_0, HDMI_ACR_CTS_44, acr.cts_44_1khz);
+	WREG32(mmHDMI_ACR_44_0 + dig->afmt->offset, tmp);
+	tmp = RREG32(mmHDMI_ACR_44_1 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_1, HDMI_ACR_N_44, acr.n_44_1khz);
+	WREG32(mmHDMI_ACR_44_1 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmHDMI_ACR_48_0 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_0, HDMI_ACR_CTS_48, acr.cts_48khz);
+	WREG32(mmHDMI_ACR_48_0 + dig->afmt->offset, tmp);
+	tmp = RREG32(mmHDMI_ACR_48_1 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_1, HDMI_ACR_N_48, acr.n_48khz);
+	WREG32(mmHDMI_ACR_48_1 + dig->afmt->offset, tmp);
+}
+
+static void dce_v6_0_audio_set_avi_infoframe(struct drm_encoder *encoder,
+					       struct drm_display_mode *mode)
 {
 {
-	DRM_INFO("xxxx: dce_v6_0_afmt_update_avi_infoframe---no imp!!!!!\n");
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct hdmi_avi_infoframe frame;
+	u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE];
+	uint8_t *payload = buffer + 3;
+	uint8_t *header = buffer;
+	ssize_t err;
+	u32 tmp;
+
+	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode);
+	if (err < 0) {
+		DRM_ERROR("failed to setup AVI infoframe: %zd\n", err);
+		return;
+	}
+
+	err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer));
+	if (err < 0) {
+		DRM_ERROR("failed to pack AVI infoframe: %zd\n", err);
+		return;
+	}
+
+	WREG32(mmAFMT_AVI_INFO0 + dig->afmt->offset,
+	       payload[0x0] | (payload[0x1] << 8) | (payload[0x2] << 16) | (payload[0x3] << 24));
+	WREG32(mmAFMT_AVI_INFO1 + dig->afmt->offset,
+	       payload[0x4] | (payload[0x5] << 8) | (payload[0x6] << 16) | (payload[0x7] << 24));
+	WREG32(mmAFMT_AVI_INFO2 + dig->afmt->offset,
+	       payload[0x8] | (payload[0x9] << 8) | (payload[0xA] << 16) | (payload[0xB] << 24));
+	WREG32(mmAFMT_AVI_INFO3 + dig->afmt->offset,
+	       payload[0xC] | (payload[0xD] << 8) | (header[1] << 24));
+
+	tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
+	/* anything other than 0 */
+	tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1,
+			HDMI_AUDIO_INFO_LINE, 2);
+	WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
 }
 }
 
 
 static void dce_v6_0_audio_set_dto(struct drm_encoder *encoder, u32 clock)
 static void dce_v6_0_audio_set_dto(struct drm_encoder *encoder, u32 clock)
 {
 {
-	DRM_INFO("xxxx: dce_v6_0_audio_set_dto---no imp!!!!!\n");
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+	int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+	u32 tmp;
+
+	/*
+	 * Two dtos: generally use dto0 for hdmi, dto1 for dp.
+	 * Express [24MHz / target pixel clock] as an exact rational
+	 * number (coefficient of two integer numbers.  DCCG_AUDIO_DTOx_PHASE
+	 * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
+	 */
+	tmp = RREG32(mmDCCG_AUDIO_DTO_SOURCE);
+	tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE,
+			DCCG_AUDIO_DTO0_SOURCE_SEL, amdgpu_crtc->crtc_id);
+	if (em == ATOM_ENCODER_MODE_HDMI) {
+		tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE,
+				DCCG_AUDIO_DTO_SEL, 0);
+	} else if (ENCODER_MODE_IS_DP(em)) {
+		tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE,
+				DCCG_AUDIO_DTO_SEL, 1);
+	}
+	WREG32(mmDCCG_AUDIO_DTO_SOURCE, tmp);
+	if (em == ATOM_ENCODER_MODE_HDMI) {
+		WREG32(mmDCCG_AUDIO_DTO0_PHASE, 24000);
+		WREG32(mmDCCG_AUDIO_DTO0_MODULE, clock);
+	} else if (ENCODER_MODE_IS_DP(em)) {
+		WREG32(mmDCCG_AUDIO_DTO1_PHASE, 24000);
+		WREG32(mmDCCG_AUDIO_DTO1_MODULE, clock);
+	}
 }
 }
-*/
-/*
- * update the info frames with the data from the current display mode
- */
+
+static void dce_v6_0_audio_set_packet(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	u32 tmp;
+
+	tmp = RREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1);
+	WREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmAFMT_60958_0 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, 1);
+	WREG32(mmAFMT_60958_0 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmAFMT_60958_1 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2);
+	WREG32(mmAFMT_60958_1 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmAFMT_60958_2 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, 3);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, 4);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, 5);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, 6);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, 7);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, 8);
+	WREG32(mmAFMT_60958_2 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_CHANNEL_ENABLE, 0xff);
+	WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, 1);
+	tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, 3);
+	WREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_RESET_FIFO_WHEN_AUDIO_DIS, 1);
+	tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1);
+	WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+}
+
+static void dce_v6_0_audio_set_mute(struct drm_encoder *encoder, bool mute)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	u32 tmp;
+
+	tmp = RREG32(mmHDMI_GC + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_GC, HDMI_GC_AVMUTE, mute ? 1 : 0);
+	WREG32(mmHDMI_GC + dig->afmt->offset, tmp);
+}
+
+static void dce_v6_0_audio_hdmi_enable(struct drm_encoder *encoder, bool enable)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	u32 tmp;
+
+	if (enable) {
+		tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
+		tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 1);
+		tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 1);
+		tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
+		tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 1);
+		WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
+
+		tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
+		tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AVI_INFO_LINE, 2);
+		WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
+
+		tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+		tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1);
+		WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+	} else {
+		tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
+		tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 0);
+		tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 0);
+		tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 0);
+		tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 0);
+		WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
+
+		tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+		tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 0);
+		WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+	}
+}
+
+static void dce_v6_0_audio_dp_enable(struct drm_encoder *encoder, bool enable)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	u32 tmp;
+
+	if (enable) {
+		tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+		tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1);
+		WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+		tmp = RREG32(mmDP_SEC_TIMESTAMP + dig->afmt->offset);
+		tmp = REG_SET_FIELD(tmp, DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, 1);
+		WREG32(mmDP_SEC_TIMESTAMP + dig->afmt->offset, tmp);
+
+		tmp = RREG32(mmDP_SEC_CNTL + dig->afmt->offset);
+		tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_ASP_ENABLE, 1);
+		tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_ATP_ENABLE, 1);
+		tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_AIP_ENABLE, 1);
+		tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
+		WREG32(mmDP_SEC_CNTL + dig->afmt->offset, tmp);
+	} else {
+		WREG32(mmDP_SEC_CNTL + dig->afmt->offset, 0);
+	}
+}
+
 static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder,
 static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder,
 				  struct drm_display_mode *mode)
 				  struct drm_display_mode *mode)
 {
 {
-	DRM_INFO("xxxx: dce_v6_0_afmt_setmode ----no impl !!!!!!!!\n");
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct drm_connector *connector;
+	struct amdgpu_connector *amdgpu_connector = NULL;
+	int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+	int bpc = 8;
+
+	if (!dig || !dig->afmt)
+		return;
+
+	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder) {
+			amdgpu_connector = to_amdgpu_connector(connector);
+			break;
+		}
+	}
+
+	if (!amdgpu_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	if (!dig->afmt->enabled)
+		return;
+
+	dig->afmt->pin = dce_v6_0_audio_get_pin(adev);
+	if (!dig->afmt->pin)
+		return;
+
+	if (encoder->crtc) {
+		struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+		bpc = amdgpu_crtc->bpc;
+	}
+
+	/* disable audio before setting up hw */
+	dce_v6_0_audio_enable(adev, dig->afmt->pin, false);
+
+	dce_v6_0_audio_set_mute(encoder, true);
+	dce_v6_0_audio_write_speaker_allocation(encoder);
+	dce_v6_0_audio_write_sad_regs(encoder);
+	dce_v6_0_audio_write_latency_fields(encoder, mode);
+	if (em == ATOM_ENCODER_MODE_HDMI) {
+		dce_v6_0_audio_set_dto(encoder, mode->clock);
+		dce_v6_0_audio_set_vbi_packet(encoder);
+		dce_v6_0_audio_set_acr(encoder, mode->clock, bpc);
+	} else if (ENCODER_MODE_IS_DP(em)) {
+		dce_v6_0_audio_set_dto(encoder, adev->clock.default_dispclk * 10);
+	}
+	dce_v6_0_audio_set_packet(encoder);
+	dce_v6_0_audio_select_pin(encoder);
+	dce_v6_0_audio_set_avi_infoframe(encoder, mode);
+	dce_v6_0_audio_set_mute(encoder, false);
+	if (em == ATOM_ENCODER_MODE_HDMI) {
+		dce_v6_0_audio_hdmi_enable(encoder, 1);
+	} else if (ENCODER_MODE_IS_DP(em)) {
+		dce_v6_0_audio_dp_enable(encoder, 1);
+	}
+
+	/* enable audio after setting up hw */
+	dce_v6_0_audio_enable(adev, dig->afmt->pin, true);
 }
 }
 
 
 static void dce_v6_0_afmt_enable(struct drm_encoder *encoder, bool enable)
 static void dce_v6_0_afmt_enable(struct drm_encoder *encoder, bool enable)
@@ -1362,6 +1861,7 @@ static void dce_v6_0_afmt_enable(struct drm_encoder *encoder, bool enable)
 	/* Silent, r600_hdmi_enable will raise WARN for us */
 	/* Silent, r600_hdmi_enable will raise WARN for us */
 	if (enable && dig->afmt->enabled)
 	if (enable && dig->afmt->enabled)
 		return;
 		return;
+
 	if (!enable && !dig->afmt->enabled)
 	if (!enable && !dig->afmt->enabled)
 		return;
 		return;
 
 
@@ -2756,6 +3256,7 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
 {
 {
 
 
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
 
 
 	amdgpu_encoder->pixel_clock = adjusted_mode->clock;
 	amdgpu_encoder->pixel_clock = adjusted_mode->clock;
 
 
@@ -2765,7 +3266,7 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
 	/* set scaler clears this on some chips */
 	/* set scaler clears this on some chips */
 	dce_v6_0_set_interleave(encoder->crtc, mode);
 	dce_v6_0_set_interleave(encoder->crtc, mode);
 
 
-	if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) {
+	if (em == ATOM_ENCODER_MODE_HDMI || ENCODER_MODE_IS_DP(em)) {
 		dce_v6_0_afmt_enable(encoder, true);
 		dce_v6_0_afmt_enable(encoder, true);
 		dce_v6_0_afmt_setmode(encoder, adjusted_mode);
 		dce_v6_0_afmt_setmode(encoder, adjusted_mode);
 	}
 	}
@@ -2827,11 +3328,12 @@ static void dce_v6_0_encoder_disable(struct drm_encoder *encoder)
 
 
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder_atom_dig *dig;
 	struct amdgpu_encoder_atom_dig *dig;
+	int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
 
 
 	amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
 	amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
 
 
 	if (amdgpu_atombios_encoder_is_digital(encoder)) {
 	if (amdgpu_atombios_encoder_is_digital(encoder)) {
-		if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI)
+		if (em == ATOM_ENCODER_MODE_HDMI || ENCODER_MODE_IS_DP(em))
 			dce_v6_0_afmt_enable(encoder, false);
 			dce_v6_0_afmt_enable(encoder, false);
 		dig = amdgpu_encoder->enc_priv;
 		dig = amdgpu_encoder->enc_priv;
 		dig->dig_encoder = -1;
 		dig->dig_encoder = -1;

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/dce_v8_0.c

@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  *
  */
  */
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_i2c.h"
 #include "amdgpu_i2c.h"

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/dce_virtual.c

@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  *
  */
  */
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_i2c.h"
 #include "amdgpu_i2c.h"

+ 702 - 913
drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c

@@ -393,8 +393,11 @@ out:
 
 
 static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev)
 static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev)
 {
 {
-	const u32 num_tile_mode_states = 32;
-	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
+	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+	u32 reg_offset, split_equal_to_row_size, *tilemode;
+
+	memset(adev->gfx.config.tile_mode_array, 0, sizeof(adev->gfx.config.tile_mode_array));
+	tilemode = adev->gfx.config.tile_mode_array;
 
 
 	switch (adev->gfx.config.mem_row_size_in_kb) {
 	switch (adev->gfx.config.mem_row_size_in_kb) {
 	case 1:
 	case 1:
@@ -410,887 +413,680 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev)
 	}
 	}
 
 
 	if (adev->asic_type == CHIP_VERDE) {
 	if (adev->asic_type == CHIP_VERDE) {
-		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 1:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 2:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 3:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_8_BANK) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 4:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
-				break;
-			case 5:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 6:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 7:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 8:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED));
-				break;
-			case 9:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
-				break;
-			case 10:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 11:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 12:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 13:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
-				break;
-			case 14:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 15:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 16:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 17:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 18:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_1D_TILED_THICK) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
-				break;
-			case 19:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 20:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THICK) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 21:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 22:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 23:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 24:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 25:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 26:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 27:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 28:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 29:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 30:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			default:
-				continue;
-			}
-			adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
-		}
-	} else if (adev->asic_type == CHIP_OLAND ||
-	    adev->asic_type == CHIP_HAINAN) {
-		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 1:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 2:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 3:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_8_BANK) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 4:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2));
-				break;
-			case 5:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 6:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 7:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 8:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED));
-				break;
-			case 9:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2));
-				break;
-			case 10:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 11:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 12:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 13:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2));
-				break;
-			case 14:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 15:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 16:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 17:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 18:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_1D_TILED_THICK) |
-						 PIPE_CONFIG(ADDR_SURF_P2));
-				break;
-			case 19:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 20:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THICK) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 21:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 22:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 23:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 24:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 25:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 26:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 27:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 28:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 29:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 30:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			default:
-				continue;
-			}
-			adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
-		}
+		tilemode[0] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[1] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[2] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[3] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_8_BANK) |
+				TILE_SPLIT(split_equal_to_row_size);
+		tilemode[4] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16);
+		tilemode[5] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK);
+		tilemode[6] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK);
+		tilemode[7] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[8] =   ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
+		tilemode[9] =   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16);
+		tilemode[10] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[11] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[12] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[13] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16);
+		tilemode[14] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[15] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[16] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[17] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				TILE_SPLIT(split_equal_to_row_size);
+		tilemode[18] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16);
+		tilemode[19] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				TILE_SPLIT(split_equal_to_row_size);
+		tilemode[20] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				TILE_SPLIT(split_equal_to_row_size);
+		tilemode[21] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_8_BANK);
+		tilemode[22] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_8_BANK);
+		tilemode[23] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK);
+		tilemode[24] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK);
+		tilemode[25] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[26] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[27] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[28] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[29] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[30] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
+	} else if (adev->asic_type == CHIP_OLAND || adev->asic_type == CHIP_HAINAN) {
+		tilemode[0] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[1] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[2] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[3] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_8_BANK) |
+				TILE_SPLIT(split_equal_to_row_size);
+		tilemode[4] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2);
+		tilemode[5] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_8_BANK);
+		tilemode[6] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_8_BANK);
+		tilemode[7] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK);
+		tilemode[8] =   ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
+		tilemode[9] =   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2);
+		tilemode[10] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[11] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[12] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[13] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2);
+		tilemode[14] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[15] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[16] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[17] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				TILE_SPLIT(split_equal_to_row_size);
+		tilemode[18] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+				PIPE_CONFIG(ADDR_SURF_P2);
+		tilemode[19] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				TILE_SPLIT(split_equal_to_row_size);
+		tilemode[20] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				TILE_SPLIT(split_equal_to_row_size);
+		tilemode[21] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_8_BANK);
+		tilemode[22] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_8_BANK);
+		tilemode[23] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_8_BANK);
+		tilemode[24] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_8_BANK);
+		tilemode[25] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK);
+		tilemode[26] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK);
+		tilemode[27] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK);
+		tilemode[28] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK);
+		tilemode[29] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK);
+		tilemode[30] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P2) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK);
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
 	} else if ((adev->asic_type == CHIP_TAHITI) || (adev->asic_type == CHIP_PITCAIRN)) {
 	} else if ((adev->asic_type == CHIP_TAHITI) || (adev->asic_type == CHIP_PITCAIRN)) {
-		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 1:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 2:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 3:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 4:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16));
-				break;
-			case 5:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 6:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 7:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 8:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED));
-				break;
-			case 9:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16));
-				break;
-			case 10:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 11:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 12:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 13:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16));
-				break;
-			case 14:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 15:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 16:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 17:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 18:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_1D_TILED_THICK) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16));
-				break;
-			case 19:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 20:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THICK) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 21:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 22:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 23:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 24:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 25:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 26:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 27:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 28:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 29:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 30:
-				gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			default:
-				continue;
-			}
-			adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
-		}
-	} else{
-
+		tilemode[0] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[1] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[2] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[3] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK) |
+				TILE_SPLIT(split_equal_to_row_size);
+		tilemode[4] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16);
+		tilemode[5] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[6] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[7] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[8] =   ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
+		tilemode[9] =   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16);
+		tilemode[10] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[11] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[12] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[13] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16);
+		tilemode[14] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[15] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[16] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[17] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				TILE_SPLIT(split_equal_to_row_size);
+		tilemode[18] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16);
+		tilemode[19] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				TILE_SPLIT(split_equal_to_row_size);
+		tilemode[20] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				TILE_SPLIT(split_equal_to_row_size);
+		tilemode[21] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK);
+		tilemode[22] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK);
+		tilemode[23] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[24] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[25] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[26] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[27] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[28] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[29] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[30] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
+	} else {
 		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
 		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
 	}
 	}
-
 }
 }
 
 
 static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
 static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
@@ -1318,11 +1114,6 @@ static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
 	WREG32(mmGRBM_GFX_INDEX, data);
 	WREG32(mmGRBM_GFX_INDEX, data);
 }
 }
 
 
-static u32 gfx_v6_0_create_bitmask(u32 bit_width)
-{
-	return (u32)(((u64)1 << bit_width) - 1);
-}
-
 static u32 gfx_v6_0_get_rb_active_bitmap(struct amdgpu_device *adev)
 static u32 gfx_v6_0_get_rb_active_bitmap(struct amdgpu_device *adev)
 {
 {
 	u32 data, mask;
 	u32 data, mask;
@@ -1332,8 +1123,8 @@ static u32 gfx_v6_0_get_rb_active_bitmap(struct amdgpu_device *adev)
 
 
 	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
 	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
 
 
-	mask = gfx_v6_0_create_bitmask(adev->gfx.config.max_backends_per_se/
-					adev->gfx.config.max_sh_per_se);
+	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se/
+					 adev->gfx.config.max_sh_per_se);
 
 
 	return ~data & mask;
 	return ~data & mask;
 }
 }
@@ -1399,11 +1190,10 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev,
 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
 			raster_config_se &= ~PA_SC_RASTER_CONFIG__SE_MAP_MASK;
 			raster_config_se &= ~PA_SC_RASTER_CONFIG__SE_MAP_MASK;
 
 
-			if (!se_mask[idx]) {
+			if (!se_mask[idx])
 				raster_config_se |= RASTER_CONFIG_SE_MAP_3 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT;
 				raster_config_se |= RASTER_CONFIG_SE_MAP_3 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT;
-			} else {
+			else
 				raster_config_se |= RASTER_CONFIG_SE_MAP_0 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT;
 				raster_config_se |= RASTER_CONFIG_SE_MAP_0 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT;
-			}
 		}
 		}
 
 
 		pkr0_mask &= rb_mask;
 		pkr0_mask &= rb_mask;
@@ -1411,11 +1201,10 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev,
 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
 			raster_config_se &= ~PA_SC_RASTER_CONFIG__PKR_MAP_MASK;
 			raster_config_se &= ~PA_SC_RASTER_CONFIG__PKR_MAP_MASK;
 
 
-			if (!pkr0_mask) {
+			if (!pkr0_mask)
 				raster_config_se |= RASTER_CONFIG_PKR_MAP_3 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT;
 				raster_config_se |= RASTER_CONFIG_PKR_MAP_3 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT;
-			} else {
+			else
 				raster_config_se |= RASTER_CONFIG_PKR_MAP_0 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT;
 				raster_config_se |= RASTER_CONFIG_PKR_MAP_0 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT;
-			}
 		}
 		}
 
 
 		if (rb_per_se >= 2) {
 		if (rb_per_se >= 2) {
@@ -1427,13 +1216,12 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev,
 			if (!rb0_mask || !rb1_mask) {
 			if (!rb0_mask || !rb1_mask) {
 				raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR0_MASK;
 				raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR0_MASK;
 
 
-				if (!rb0_mask) {
+				if (!rb0_mask)
 					raster_config_se |=
 					raster_config_se |=
 						RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT;
 						RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT;
-				} else {
+				else
 					raster_config_se |=
 					raster_config_se |=
 						RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT;
 						RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT;
-				}
 			}
 			}
 
 
 			if (rb_per_se > 2) {
 			if (rb_per_se > 2) {
@@ -1444,13 +1232,12 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev,
 				if (!rb0_mask || !rb1_mask) {
 				if (!rb0_mask || !rb1_mask) {
 					raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR1_MASK;
 					raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR1_MASK;
 
 
-					if (!rb0_mask) {
+					if (!rb0_mask)
 						raster_config_se |=
 						raster_config_se |=
 							RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT;
 							RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT;
-					} else {
+					else
 						raster_config_se |=
 						raster_config_se |=
 							RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT;
 							RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT;
-					}
 				}
 				}
 			}
 			}
 		}
 		}
@@ -1479,8 +1266,9 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
 			gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
 			gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
 			data = gfx_v6_0_get_rb_active_bitmap(adev);
 			data = gfx_v6_0_get_rb_active_bitmap(adev);
-			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
-					rb_bitmap_width_per_sh);
+			active_rbs |= data <<
+				((i * adev->gfx.config.max_sh_per_se + j) *
+				 rb_bitmap_width_per_sh);
 		}
 		}
 	}
 	}
 	gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
@@ -1494,13 +1282,12 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
 	gfx_v6_0_raster_config(adev, &raster_config);
 	gfx_v6_0_raster_config(adev, &raster_config);
 
 
 	if (!adev->gfx.config.backend_enable_mask ||
 	if (!adev->gfx.config.backend_enable_mask ||
-			adev->gfx.config.num_rbs >= num_rb_pipes) {
+	     adev->gfx.config.num_rbs >= num_rb_pipes)
 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
-	} else {
+	else
 		gfx_v6_0_write_harvested_raster_configs(adev, raster_config,
 		gfx_v6_0_write_harvested_raster_configs(adev, raster_config,
 							adev->gfx.config.backend_enable_mask,
 							adev->gfx.config.backend_enable_mask,
 							num_rb_pipes);
 							num_rb_pipes);
-	}
 
 
 	/* cache the values for userspace */
 	/* cache the values for userspace */
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
@@ -1517,11 +1304,6 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
 	gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 	mutex_unlock(&adev->grbm_idx_mutex);
 }
 }
-/*
-static void gmc_v6_0_init_compute_vmid(struct amdgpu_device *adev)
-{
-}
-*/
 
 
 static void gfx_v6_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
 static void gfx_v6_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
 						 u32 bitmap)
 						 u32 bitmap)
@@ -1544,7 +1326,7 @@ static u32 gfx_v6_0_get_cu_enabled(struct amdgpu_device *adev)
 	data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
 	data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
 		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
 		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
 
 
-	mask = gfx_v6_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
+	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
 	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
 	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
 }
 }
 
 
@@ -1688,7 +1470,8 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev)
 	WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
 	WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
 
 
 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
-	mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
+	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
+	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
 
 
 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
 	adev->gfx.config.mem_max_burst_length_bytes = 256;
 	adev->gfx.config.mem_max_burst_length_bytes = 256;
@@ -3719,6 +3502,12 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
 	unsigned disable_masks[4 * 2];
 	unsigned disable_masks[4 * 2];
+	u32 ao_cu_num;
+
+	if (adev->flags & AMD_IS_APU)
+		ao_cu_num = 2;
+	else
+		ao_cu_num = adev->gfx.config.max_cu_per_sh;
 
 
 	memset(cu_info, 0, sizeof(*cu_info));
 	memset(cu_info, 0, sizeof(*cu_info));
 
 
@@ -3737,9 +3526,9 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
 			bitmap = gfx_v6_0_get_cu_enabled(adev);
 			bitmap = gfx_v6_0_get_cu_enabled(adev);
 			cu_info->bitmap[i][j] = bitmap;
 			cu_info->bitmap[i][j] = bitmap;
 
 
-			for (k = 0; k < 16; k++) {
+			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
 				if (bitmap & mask) {
 				if (bitmap & mask) {
-					if (counter < 2)
+					if (counter < ao_cu_num)
 						ao_bitmap |= mask;
 						ao_bitmap |= mask;
 					counter ++;
 					counter ++;
 				}
 				}

+ 358 - 294
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c

@@ -21,12 +21,13 @@
  *
  *
  */
  */
 #include <linux/firmware.h>
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "amdgpu_ih.h"
 #include "amdgpu_gfx.h"
 #include "amdgpu_gfx.h"
 #include "cikd.h"
 #include "cikd.h"
 #include "cik.h"
 #include "cik.h"
+#include "cik_structs.h"
 #include "atom.h"
 #include "atom.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_ucode.h"
 #include "clearstate_ci.h"
 #include "clearstate_ci.h"
@@ -48,7 +49,7 @@
 #include "oss/oss_2_0_sh_mask.h"
 #include "oss/oss_2_0_sh_mask.h"
 
 
 #define GFX7_NUM_GFX_RINGS     1
 #define GFX7_NUM_GFX_RINGS     1
-#define GFX7_NUM_COMPUTE_RINGS 8
+#define GFX7_MEC_HPD_SIZE      2048
 
 
 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -1606,19 +1607,6 @@ static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
 	WREG32(mmGRBM_GFX_INDEX, data);
 	WREG32(mmGRBM_GFX_INDEX, data);
 }
 }
 
 
-/**
- * gfx_v7_0_create_bitmask - create a bitmask
- *
- * @bit_width: length of the mask
- *
- * create a variable length bit mask (CIK).
- * Returns the bitmask.
- */
-static u32 gfx_v7_0_create_bitmask(u32 bit_width)
-{
-	return (u32)((1ULL << bit_width) - 1);
-}
-
 /**
 /**
  * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs
  * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs
  *
  *
@@ -1637,8 +1625,8 @@ static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev)
 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
 
 
-	mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se /
-				       adev->gfx.config.max_sh_per_se);
+	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
+					 adev->gfx.config.max_sh_per_se);
 
 
 	return (~data) & mask;
 	return (~data) & mask;
 }
 }
@@ -1837,7 +1825,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
 /**
 /**
  * gmc_v7_0_init_compute_vmid - gart enable
  * gmc_v7_0_init_compute_vmid - gart enable
  *
  *
- * @rdev: amdgpu_device pointer
+ * @adev: amdgpu_device pointer
  *
  *
  * Initialize compute vmid sh_mem registers
  * Initialize compute vmid sh_mem registers
  *
  *
@@ -2821,26 +2809,23 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
 	}
 	}
 }
 }
 
 
-#define MEC_HPD_SIZE 2048
-
 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
 {
 	int r;
 	int r;
 	u32 *hpd;
 	u32 *hpd;
+	size_t mec_hpd_size;
 
 
-	/*
-	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
-	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
-	 * Nonetheless, we assign only 1 pipe because all other pipes will
-	 * be handled by KFD
-	 */
-	adev->gfx.mec.num_mec = 1;
-	adev->gfx.mec.num_pipe = 1;
-	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
+	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+	/* take ownership of the relevant compute queues */
+	amdgpu_gfx_compute_queue_acquire(adev);
 
 
+	/* allocate space for ALL pipes (even the ones we don't own) */
+	mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
+		* GFX7_MEC_HPD_SIZE * 2;
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 		r = amdgpu_bo_create(adev,
 		r = amdgpu_bo_create(adev,
-				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
+				     mec_hpd_size,
 				     PAGE_SIZE, true,
 				     PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     &adev->gfx.mec.hpd_eop_obj);
 				     &adev->gfx.mec.hpd_eop_obj);
@@ -2870,7 +2855,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 	}
 	}
 
 
 	/* clear memory.  Not sure if this is required or not */
 	/* clear memory.  Not sure if this is required or not */
-	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
+	memset(hpd, 0, mec_hpd_size);
 
 
 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
@@ -2917,275 +2902,296 @@ struct hqd_registers
 	u32 cp_mqd_control;
 	u32 cp_mqd_control;
 };
 };
 
 
-struct bonaire_mqd
+static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev,
+				       int mec, int pipe)
 {
 {
-	u32 header;
-	u32 dispatch_initiator;
-	u32 dimensions[3];
-	u32 start_idx[3];
-	u32 num_threads[3];
-	u32 pipeline_stat_enable;
-	u32 perf_counter_enable;
-	u32 pgm[2];
-	u32 tba[2];
-	u32 tma[2];
-	u32 pgm_rsrc[2];
-	u32 vmid;
-	u32 resource_limits;
-	u32 static_thread_mgmt01[2];
-	u32 tmp_ring_size;
-	u32 static_thread_mgmt23[2];
-	u32 restart[3];
-	u32 thread_trace_enable;
-	u32 reserved1;
-	u32 user_data[16];
-	u32 vgtcs_invoke_count[2];
-	struct hqd_registers queue_state;
-	u32 dequeue_cntr;
-	u32 interrupt_queue[64];
-};
-
-/**
- * gfx_v7_0_cp_compute_resume - setup the compute queue registers
- *
- * @adev: amdgpu_device pointer
- *
- * Program the compute queues and test them to make sure they
- * are working.
- * Returns 0 for success, error for failure.
- */
-static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
-{
-	int r, i, j;
-	u32 tmp;
-	bool use_doorbell = true;
-	u64 hqd_gpu_addr;
-	u64 mqd_gpu_addr;
 	u64 eop_gpu_addr;
 	u64 eop_gpu_addr;
-	u64 wb_gpu_addr;
-	u32 *buf;
-	struct bonaire_mqd *mqd;
-	struct amdgpu_ring *ring;
-
-	/* fix up chicken bits */
-	tmp = RREG32(mmCP_CPF_DEBUG);
-	tmp |= (1 << 23);
-	WREG32(mmCP_CPF_DEBUG, tmp);
+	u32 tmp;
+	size_t eop_offset = (mec * adev->gfx.mec.num_pipe_per_mec + pipe)
+			    * GFX7_MEC_HPD_SIZE * 2;
 
 
-	/* init the pipes */
 	mutex_lock(&adev->srbm_mutex);
 	mutex_lock(&adev->srbm_mutex);
-	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
-		int me = (i < 4) ? 1 : 2;
-		int pipe = (i < 4) ? i : (i - 4);
+	eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
 
 
-		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
+	cik_srbm_select(adev, mec + 1, pipe, 0, 0);
 
 
-		cik_srbm_select(adev, me, pipe, 0, 0);
+	/* write the EOP addr */
+	WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
+	WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
 
 
-		/* write the EOP addr */
-		WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
-		WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
+	/* set the VMID assigned */
+	WREG32(mmCP_HPD_EOP_VMID, 0);
 
 
-		/* set the VMID assigned */
-		WREG32(mmCP_HPD_EOP_VMID, 0);
+	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+	tmp = RREG32(mmCP_HPD_EOP_CONTROL);
+	tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
+	tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
+	WREG32(mmCP_HPD_EOP_CONTROL, tmp);
 
 
-		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-		tmp = RREG32(mmCP_HPD_EOP_CONTROL);
-		tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
-		tmp |= order_base_2(MEC_HPD_SIZE / 8);
-		WREG32(mmCP_HPD_EOP_CONTROL, tmp);
-	}
 	cik_srbm_select(adev, 0, 0, 0, 0);
 	cik_srbm_select(adev, 0, 0, 0, 0);
 	mutex_unlock(&adev->srbm_mutex);
 	mutex_unlock(&adev->srbm_mutex);
+}
 
 
-	/* init the queues.  Just two for now. */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		ring = &adev->gfx.compute_ring[i];
+static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
+{
+	int i;
 
 
-		if (ring->mqd_obj == NULL) {
-			r = amdgpu_bo_create(adev,
-					     sizeof(struct bonaire_mqd),
-					     PAGE_SIZE, true,
-					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
-					     &ring->mqd_obj);
-			if (r) {
-				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
-				return r;
-			}
+	/* disable the queue if it's active */
+	if (RREG32(mmCP_HQD_ACTIVE) & 1) {
+		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
+		for (i = 0; i < adev->usec_timeout; i++) {
+			if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
+				break;
+			udelay(1);
 		}
 		}
 
 
-		r = amdgpu_bo_reserve(ring->mqd_obj, false);
-		if (unlikely(r != 0)) {
-			gfx_v7_0_cp_compute_fini(adev);
-			return r;
-		}
-		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
-				  &mqd_gpu_addr);
-		if (r) {
-			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
-			gfx_v7_0_cp_compute_fini(adev);
-			return r;
-		}
-		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
-		if (r) {
-			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
-			gfx_v7_0_cp_compute_fini(adev);
-			return r;
-		}
+		if (i == adev->usec_timeout)
+			return -ETIMEDOUT;
 
 
-		/* init the mqd struct */
-		memset(buf, 0, sizeof(struct bonaire_mqd));
+		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
+		WREG32(mmCP_HQD_PQ_RPTR, 0);
+		WREG32(mmCP_HQD_PQ_WPTR, 0);
+	}
 
 
-		mqd = (struct bonaire_mqd *)buf;
-		mqd->header = 0xC0310800;
-		mqd->static_thread_mgmt01[0] = 0xffffffff;
-		mqd->static_thread_mgmt01[1] = 0xffffffff;
-		mqd->static_thread_mgmt23[0] = 0xffffffff;
-		mqd->static_thread_mgmt23[1] = 0xffffffff;
+	return 0;
+}
 
 
-		mutex_lock(&adev->srbm_mutex);
-		cik_srbm_select(adev, ring->me,
-				ring->pipe,
-				ring->queue, 0);
+static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
+			     struct cik_mqd *mqd,
+			     uint64_t mqd_gpu_addr,
+			     struct amdgpu_ring *ring)
+{
+	u64 hqd_gpu_addr;
+	u64 wb_gpu_addr;
 
 
-		/* disable wptr polling */
-		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
-		tmp &= ~CP_PQ_WPTR_POLL_CNTL__EN_MASK;
-		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
+	/* init the mqd struct */
+	memset(mqd, 0, sizeof(struct cik_mqd));
 
 
-		/* enable doorbell? */
-		mqd->queue_state.cp_hqd_pq_doorbell_control =
-			RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
-		if (use_doorbell)
-			mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
-		else
-			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
-		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
-		       mqd->queue_state.cp_hqd_pq_doorbell_control);
-
-		/* disable the queue if it's active */
-		mqd->queue_state.cp_hqd_dequeue_request = 0;
-		mqd->queue_state.cp_hqd_pq_rptr = 0;
-		mqd->queue_state.cp_hqd_pq_wptr= 0;
-		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
-			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
-			for (j = 0; j < adev->usec_timeout; j++) {
-				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
-					break;
-				udelay(1);
-			}
-			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
-			WREG32(mmCP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
-			WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
-		}
+	mqd->header = 0xC0310800;
+	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
 
 
-		/* set the pointer to the MQD */
-		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
-		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
-		WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
-		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
-		/* set MQD vmid to 0 */
-		mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
-		mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
-		WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
-
-		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
-		hqd_gpu_addr = ring->gpu_addr >> 8;
-		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
-		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
-		WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
-		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
-
-		/* set up the HQD, this is similar to CP_RB0_CNTL */
-		mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
-		mqd->queue_state.cp_hqd_pq_control &=
-			~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
-					CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
-
-		mqd->queue_state.cp_hqd_pq_control |=
-			order_base_2(ring->ring_size / 8);
-		mqd->queue_state.cp_hqd_pq_control |=
-			(order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
+	/* enable doorbell? */
+	mqd->cp_hqd_pq_doorbell_control =
+		RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
+	if (ring->use_doorbell)
+		mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+	else
+		mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+
+	/* set the pointer to the MQD */
+	mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
+	mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
+
+	/* set MQD vmid to 0 */
+	mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
+	mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
+
+	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+	hqd_gpu_addr = ring->gpu_addr >> 8;
+	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+	/* set up the HQD, this is similar to CP_RB0_CNTL */
+	mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
+	mqd->cp_hqd_pq_control &=
+		~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
+				CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
+
+	mqd->cp_hqd_pq_control |=
+		order_base_2(ring->ring_size / 8);
+	mqd->cp_hqd_pq_control |=
+		(order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
 #ifdef __BIG_ENDIAN
 #ifdef __BIG_ENDIAN
-		mqd->queue_state.cp_hqd_pq_control |=
-			2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
+	mqd->cp_hqd_pq_control |=
+		2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
 #endif
 #endif
-		mqd->queue_state.cp_hqd_pq_control &=
-			~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
+	mqd->cp_hqd_pq_control &=
+		~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
 				CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
 				CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
 				CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
 				CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
-		mqd->queue_state.cp_hqd_pq_control |=
-			CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
-			CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
-		WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
-
-		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
-		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
-		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
-		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
-		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
-		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
-
-		/* set the wb address wether it's enabled or not */
-		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
-		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
-		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
-			upper_32_bits(wb_gpu_addr) & 0xffff;
-		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
-		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
-		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
-		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
-
-		/* enable the doorbell if requested */
-		if (use_doorbell) {
-			mqd->queue_state.cp_hqd_pq_doorbell_control =
-				RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
-			mqd->queue_state.cp_hqd_pq_doorbell_control &=
-				~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
-			mqd->queue_state.cp_hqd_pq_doorbell_control |=
-				(ring->doorbell_index <<
-				 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
-			mqd->queue_state.cp_hqd_pq_doorbell_control |=
-				CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
-			mqd->queue_state.cp_hqd_pq_doorbell_control &=
-				~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
-				CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
+	mqd->cp_hqd_pq_control |=
+		CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
+		CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
 
 
-		} else {
-			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
+	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+	/* set the wb address wether it's enabled or not */
+	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+	mqd->cp_hqd_pq_rptr_report_addr_hi =
+		upper_32_bits(wb_gpu_addr) & 0xffff;
+
+	/* enable the doorbell if requested */
+	if (ring->use_doorbell) {
+		mqd->cp_hqd_pq_doorbell_control =
+			RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
+		mqd->cp_hqd_pq_doorbell_control &=
+			~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
+		mqd->cp_hqd_pq_doorbell_control |=
+			(ring->doorbell_index <<
+			 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
+		mqd->cp_hqd_pq_doorbell_control |=
+			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+		mqd->cp_hqd_pq_doorbell_control &=
+			~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
+					CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
+
+	} else {
+		mqd->cp_hqd_pq_doorbell_control = 0;
+	}
+
+	/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+	ring->wptr = 0;
+	mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
+	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+
+	/* set the vmid for the queue */
+	mqd->cp_hqd_vmid = 0;
+
+	/* defaults */
+	mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
+	mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR);
+	mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI);
+	mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR);
+	mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE);
+	mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD);
+	mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE);
+	mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO);
+	mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI);
+	mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO);
+	mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI);
+	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
+	mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
+	mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
+	mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
+
+	/* activate the queue */
+	mqd->cp_hqd_active = 1;
+}
+
+int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
+{
+	uint32_t tmp;
+	uint32_t mqd_reg;
+	uint32_t *mqd_data;
+
+	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_MQD_CONTROL */
+	mqd_data = &mqd->cp_mqd_base_addr_lo;
+
+	/* disable wptr polling */
+	tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+	WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
+
+	/* program all HQD registers */
+	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++)
+		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
+
+	/* activate the HQD */
+	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
+		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
+
+	return 0;
+}
+
+static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
+{
+	int r;
+	u64 mqd_gpu_addr;
+	struct cik_mqd *mqd;
+	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+
+	if (ring->mqd_obj == NULL) {
+		r = amdgpu_bo_create(adev,
+				sizeof(struct cik_mqd),
+				PAGE_SIZE, true,
+				AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
+				&ring->mqd_obj);
+		if (r) {
+			dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
+			return r;
 		}
 		}
-		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
-		       mqd->queue_state.cp_hqd_pq_doorbell_control);
+	}
+
+	r = amdgpu_bo_reserve(ring->mqd_obj, false);
+	if (unlikely(r != 0))
+		goto out;
 
 
-		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
-		ring->wptr = 0;
-		mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
-		WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
-		mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+	r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
+			&mqd_gpu_addr);
+	if (r) {
+		dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
+		goto out_unreserve;
+	}
+	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
+	if (r) {
+		dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
+		goto out_unreserve;
+	}
 
 
-		/* set the vmid for the queue */
-		mqd->queue_state.cp_hqd_vmid = 0;
-		WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
+	mutex_lock(&adev->srbm_mutex);
+	cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 
 
-		/* activate the queue */
-		mqd->queue_state.cp_hqd_active = 1;
-		WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
+	gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring);
+	gfx_v7_0_mqd_deactivate(adev);
+	gfx_v7_0_mqd_commit(adev, mqd);
 
 
-		cik_srbm_select(adev, 0, 0, 0, 0);
-		mutex_unlock(&adev->srbm_mutex);
+	cik_srbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
 
 
-		amdgpu_bo_kunmap(ring->mqd_obj);
-		amdgpu_bo_unreserve(ring->mqd_obj);
+	amdgpu_bo_kunmap(ring->mqd_obj);
+out_unreserve:
+	amdgpu_bo_unreserve(ring->mqd_obj);
+out:
+	return 0;
+}
 
 
-		ring->ready = true;
+/**
+ * gfx_v7_0_cp_compute_resume - setup the compute queue registers
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Program the compute queues and test them to make sure they
+ * are working.
+ * Returns 0 for success, error for failure.
+ */
+static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
+{
+	int r, i, j;
+	u32 tmp;
+	struct amdgpu_ring *ring;
+
+	/* fix up chicken bits */
+	tmp = RREG32(mmCP_CPF_DEBUG);
+	tmp |= (1 << 23);
+	WREG32(mmCP_CPF_DEBUG, tmp);
+
+	/* init all pipes (even the ones we don't own) */
+	for (i = 0; i < adev->gfx.mec.num_mec; i++)
+		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++)
+			gfx_v7_0_compute_pipe_init(adev, i, j);
+
+	/* init the queues */
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		r = gfx_v7_0_compute_queue_init(adev, i);
+		if (r) {
+			gfx_v7_0_cp_compute_fini(adev);
+			return r;
+		}
 	}
 	}
 
 
 	gfx_v7_0_cp_compute_enable(adev, true);
 	gfx_v7_0_cp_compute_enable(adev, true);
 
 
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		ring = &adev->gfx.compute_ring[i];
 		ring = &adev->gfx.compute_ring[i];
-
+		ring->ready = true;
 		r = amdgpu_ring_test_ring(ring);
 		r = amdgpu_ring_test_ring(ring);
 		if (r)
 		if (r)
 			ring->ready = false;
 			ring->ready = false;
@@ -3797,6 +3803,9 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
 		gfx_v7_0_update_rlc(adev, tmp);
 		gfx_v7_0_update_rlc(adev, tmp);
 
 
 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+		if (orig != data)
+			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
+
 	} else {
 	} else {
 		gfx_v7_0_enable_gui_idle_interrupt(adev, false);
 		gfx_v7_0_enable_gui_idle_interrupt(adev, false);
 
 
@@ -3806,11 +3815,11 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
 		RREG32(mmCB_CGTT_SCLK_CTRL);
 		RREG32(mmCB_CGTT_SCLK_CTRL);
 
 
 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
-	}
-
-	if (orig != data)
-		WREG32(mmRLC_CGCG_CGLS_CTRL, data);
+		if (orig != data)
+			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
 
 
+		gfx_v7_0_enable_gui_idle_interrupt(adev, true);
+	}
 }
 }
 
 
 static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
 static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
@@ -4089,7 +4098,7 @@ static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
 
 
-	mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
+	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
 
 
 	return (~data) & mask;
 	return (~data) & mask;
 }
 }
@@ -4470,7 +4479,7 @@ static int gfx_v7_0_early_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
 	adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
 	adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
-	adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS;
+	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
 	adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
 	adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
 	adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
 	adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
 	gfx_v7_0_set_ring_funcs(adev);
 	gfx_v7_0_set_ring_funcs(adev);
@@ -4662,11 +4671,57 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
 	adev->gfx.config.gb_addr_config = gb_addr_config;
 	adev->gfx.config.gb_addr_config = gb_addr_config;
 }
 }
 
 
+static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+					int mec, int pipe, int queue)
+{
+	int r;
+	unsigned irq_type;
+	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+
+	/* mec0 is me1 */
+	ring->me = mec + 1;
+	ring->pipe = pipe;
+	ring->queue = queue;
+
+	ring->ring_obj = NULL;
+	ring->use_doorbell = true;
+	ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+		+ ring->pipe;
+
+	/* type-2 packets are deprecated on MEC, use type-3 instead */
+	r = amdgpu_ring_init(adev, ring, 1024,
+			&adev->gfx.eop_irq, irq_type);
+	if (r)
+		return r;
+
+
+	return 0;
+}
+
 static int gfx_v7_0_sw_init(void *handle)
 static int gfx_v7_0_sw_init(void *handle)
 {
 {
 	struct amdgpu_ring *ring;
 	struct amdgpu_ring *ring;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	int i, r;
+	int i, j, k, r, ring_id;
+
+	switch (adev->asic_type) {
+	case CHIP_KAVERI:
+		adev->gfx.mec.num_mec = 2;
+		break;
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+	default:
+		adev->gfx.mec.num_mec = 1;
+		break;
+	}
+	adev->gfx.mec.num_pipe_per_mec = 4;
+	adev->gfx.mec.num_queue_per_pipe = 8;
 
 
 	/* EOP Event */
 	/* EOP Event */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
@@ -4716,29 +4771,23 @@ static int gfx_v7_0_sw_init(void *handle)
 			return r;
 			return r;
 	}
 	}
 
 
-	/* set up the compute queues */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		unsigned irq_type;
-
-		/* max 32 queues per MEC */
-		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
-			DRM_ERROR("Too many (%d) compute rings!\n", i);
-			break;
+	/* set up the compute queues - allocate horizontally across pipes */
+	ring_id = 0;
+	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
+					continue;
+
+				r = gfx_v7_0_compute_ring_init(adev,
+								ring_id,
+								i, k, j);
+				if (r)
+					return r;
+
+				ring_id++;
+			}
 		}
 		}
-		ring = &adev->gfx.compute_ring[i];
-		ring->ring_obj = NULL;
-		ring->use_doorbell = true;
-		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
-		ring->me = 1; /* first MEC */
-		ring->pipe = i / 8;
-		ring->queue = i % 8;
-		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
-		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
-		/* type-2 packets are deprecated on MEC, use type-3 instead */
-		r = amdgpu_ring_init(adev, ring, 1024,
-				     &adev->gfx.eop_irq, irq_type);
-		if (r)
-			return r;
 	}
 	}
 
 
 	/* reserve GDS, GWS and OA resource for gfx */
 	/* reserve GDS, GWS and OA resource for gfx */
@@ -4969,8 +5018,8 @@ static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
 	u32 mec_int_cntl, mec_int_cntl_reg;
 	u32 mec_int_cntl, mec_int_cntl_reg;
 
 
 	/*
 	/*
-	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
-	 * handles the setting of interrupts for this specific pipe. All other
+	 * amdgpu controls only the first MEC. That's why this function only
+	 * handles the setting of interrupts for this specific MEC. All other
 	 * pipes' interrupts are set by amdkfd.
 	 * pipes' interrupts are set by amdkfd.
 	 */
 	 */
 
 
@@ -4979,6 +5028,15 @@ static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
 		case 0:
 		case 0:
 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
 			break;
 			break;
+		case 1:
+			mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
+			break;
+		case 2:
+			mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
+			break;
+		case 3:
+			mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
+			break;
 		default:
 		default:
 			DRM_DEBUG("invalid pipe %d\n", pipe);
 			DRM_DEBUG("invalid pipe %d\n", pipe);
 			return;
 			return;
@@ -5336,6 +5394,12 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
 	unsigned disable_masks[4 * 2];
 	unsigned disable_masks[4 * 2];
+	u32 ao_cu_num;
+
+	if (adev->flags & AMD_IS_APU)
+		ao_cu_num = 2;
+	else
+		ao_cu_num = adev->gfx.config.max_cu_per_sh;
 
 
 	memset(cu_info, 0, sizeof(*cu_info));
 	memset(cu_info, 0, sizeof(*cu_info));
 
 
@@ -5354,9 +5418,9 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
 			bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
 			bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
 			cu_info->bitmap[i][j] = bitmap;
 			cu_info->bitmap[i][j] = bitmap;
 
 
-			for (k = 0; k < 16; k ++) {
+			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
 				if (bitmap & mask) {
 				if (bitmap & mask) {
-					if (counter < 2)
+					if (counter < ao_cu_num)
 						ao_bitmap |= mask;
 						ao_bitmap |= mask;
 					counter ++;
 					counter ++;
 				}
 				}

+ 5 - 0
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h

@@ -29,4 +29,9 @@ extern const struct amdgpu_ip_block_version gfx_v7_1_ip_block;
 extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block;
 extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block;
 extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block;
 extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block;
 
 
+struct amdgpu_device;
+struct cik_mqd;
+
+int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd);
+
 #endif
 #endif

+ 507 - 784
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

@@ -21,7 +21,7 @@
  *
  *
  */
  */
 #include <linux/firmware.h>
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
 #include "amdgpu_gfx.h"
 #include "vi.h"
 #include "vi.h"
@@ -52,7 +52,7 @@
 #include "smu/smu_7_1_3_d.h"
 #include "smu/smu_7_1_3_d.h"
 
 
 #define GFX8_NUM_GFX_RINGS     1
 #define GFX8_NUM_GFX_RINGS     1
-#define GFX8_NUM_COMPUTE_RINGS 8
+#define GFX8_MEC_HPD_SIZE 2048
 
 
 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
@@ -657,10 +657,8 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
-static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr);
-static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr);
-static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev);
-static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev);
+static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
+static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 
 
 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 {
 {
@@ -859,7 +857,8 @@ err1:
 }
 }
 
 
 
 
-static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
+static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
+{
 	release_firmware(adev->gfx.pfp_fw);
 	release_firmware(adev->gfx.pfp_fw);
 	adev->gfx.pfp_fw = NULL;
 	adev->gfx.pfp_fw = NULL;
 	release_firmware(adev->gfx.me_fw);
 	release_firmware(adev->gfx.me_fw);
@@ -941,12 +940,6 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 
 
-	/* chain ib ucode isn't formal released, just disable it by far
-	 * TODO: when ucod ready we should use ucode version to judge if
-	 * chain-ib support or not.
-	 */
-	adev->virt.chained_ib_support = false;
-
 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 
 
 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
@@ -960,6 +953,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 
 
+	/*
+	 * Support for MCBP/Virtualization in combination with chained IBs is
+	 * formal released on feature version #46
+	 */
+	if (adev->gfx.ce_feature_version >= 46 &&
+	    adev->gfx.pfp_feature_version >= 46) {
+		adev->virt.chained_ib_support = true;
+		DRM_INFO("Chained IB support enabled!\n");
+	} else
+		adev->virt.chained_ib_support = false;
+
 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
 	if (err)
 	if (err)
@@ -1373,64 +1377,22 @@ static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
 	}
 	}
 }
 }
 
 
-static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
-				  struct amdgpu_ring *ring,
-				  struct amdgpu_irq_src *irq)
-{
-	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
-	int r = 0;
-
-	r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
-	if (r)
-		return r;
-
-	ring->adev = NULL;
-	ring->ring_obj = NULL;
-	ring->use_doorbell = true;
-	ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
-	if (adev->gfx.mec2_fw) {
-		ring->me = 2;
-		ring->pipe = 0;
-	} else {
-		ring->me = 1;
-		ring->pipe = 1;
-	}
-
-	ring->queue = 0;
-	ring->eop_gpu_addr = kiq->eop_gpu_addr;
-	sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
-	r = amdgpu_ring_init(adev, ring, 1024,
-			     irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
-	if (r)
-		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
-
-	return r;
-}
-static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
-				   struct amdgpu_irq_src *irq)
-{
-	amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
-	amdgpu_ring_fini(ring);
-}
-
-#define MEC_HPD_SIZE 2048
-
 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 {
 {
 	int r;
 	int r;
 	u32 *hpd;
 	u32 *hpd;
+	size_t mec_hpd_size;
 
 
-	/*
-	 * we assign only 1 pipe because all other pipes will
-	 * be handled by KFD
-	 */
-	adev->gfx.mec.num_mec = 1;
-	adev->gfx.mec.num_pipe = 1;
-	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
+	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+	/* take ownership of the relevant compute queues */
+	amdgpu_gfx_compute_queue_acquire(adev);
+
+	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
 
 
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 		r = amdgpu_bo_create(adev,
 		r = amdgpu_bo_create(adev,
-				     adev->gfx.mec.num_queue * MEC_HPD_SIZE,
+				     mec_hpd_size,
 				     PAGE_SIZE, true,
 				     PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     &adev->gfx.mec.hpd_eop_obj);
 				     &adev->gfx.mec.hpd_eop_obj);
@@ -1459,7 +1421,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 		return r;
 		return r;
 	}
 	}
 
 
-	memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
+	memset(hpd, 0, mec_hpd_size);
 
 
 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
@@ -1467,38 +1429,6 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 	return 0;
 	return 0;
 }
 }
 
 
-static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
-{
-	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
-
-	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
-}
-
-static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
-{
-	int r;
-	u32 *hpd;
-	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
-
-	r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
-				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
-				    &kiq->eop_gpu_addr, (void **)&hpd);
-	if (r) {
-		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
-		return r;
-	}
-
-	memset(hpd, 0, MEC_HPD_SIZE);
-
-	r = amdgpu_bo_reserve(kiq->eop_obj, true);
-	if (unlikely(r != 0))
-		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
-	amdgpu_bo_kunmap(kiq->eop_obj);
-	amdgpu_bo_unreserve(kiq->eop_obj);
-
-	return 0;
-}
-
 static const u32 vgpr_init_compute_shader[] =
 static const u32 vgpr_init_compute_shader[] =
 {
 {
 	0x7e000209, 0x7e020208,
 	0x7e000209, 0x7e020208,
@@ -1907,46 +1837,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 		adev->gfx.config.max_tile_pipes = 2;
 		adev->gfx.config.max_tile_pipes = 2;
 		adev->gfx.config.max_sh_per_se = 1;
 		adev->gfx.config.max_sh_per_se = 1;
 		adev->gfx.config.max_backends_per_se = 2;
 		adev->gfx.config.max_backends_per_se = 2;
-
-		switch (adev->pdev->revision) {
-		case 0xc4:
-		case 0x84:
-		case 0xc8:
-		case 0xcc:
-		case 0xe1:
-		case 0xe3:
-			/* B10 */
-			adev->gfx.config.max_cu_per_sh = 8;
-			break;
-		case 0xc5:
-		case 0x81:
-		case 0x85:
-		case 0xc9:
-		case 0xcd:
-		case 0xe2:
-		case 0xe4:
-			/* B8 */
-			adev->gfx.config.max_cu_per_sh = 6;
-			break;
-		case 0xc6:
-		case 0xca:
-		case 0xce:
-		case 0x88:
-		case 0xe6:
-			/* B6 */
-			adev->gfx.config.max_cu_per_sh = 6;
-			break;
-		case 0xc7:
-		case 0x87:
-		case 0xcb:
-		case 0xe5:
-		case 0x89:
-		default:
-			/* B4 */
-			adev->gfx.config.max_cu_per_sh = 4;
-			break;
-		}
-
+		adev->gfx.config.max_cu_per_sh = 8;
 		adev->gfx.config.max_texture_channel_caches = 2;
 		adev->gfx.config.max_texture_channel_caches = 2;
 		adev->gfx.config.max_gprs = 256;
 		adev->gfx.config.max_gprs = 256;
 		adev->gfx.config.max_gs_threads = 32;
 		adev->gfx.config.max_gs_threads = 32;
@@ -1963,35 +1854,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 		adev->gfx.config.max_tile_pipes = 2;
 		adev->gfx.config.max_tile_pipes = 2;
 		adev->gfx.config.max_sh_per_se = 1;
 		adev->gfx.config.max_sh_per_se = 1;
 		adev->gfx.config.max_backends_per_se = 1;
 		adev->gfx.config.max_backends_per_se = 1;
-
-		switch (adev->pdev->revision) {
-		case 0x80:
-		case 0x81:
-		case 0xc0:
-		case 0xc1:
-		case 0xc2:
-		case 0xc4:
-		case 0xc8:
-		case 0xc9:
-		case 0xd6:
-		case 0xda:
-		case 0xe9:
-		case 0xea:
-			adev->gfx.config.max_cu_per_sh = 3;
-			break;
-		case 0x83:
-		case 0xd0:
-		case 0xd1:
-		case 0xd2:
-		case 0xd4:
-		case 0xdb:
-		case 0xe1:
-		case 0xe2:
-		default:
-			adev->gfx.config.max_cu_per_sh = 2;
-			break;
-		}
-
+		adev->gfx.config.max_cu_per_sh = 3;
 		adev->gfx.config.max_texture_channel_caches = 2;
 		adev->gfx.config.max_texture_channel_caches = 2;
 		adev->gfx.config.max_gprs = 256;
 		adev->gfx.config.max_gprs = 256;
 		adev->gfx.config.max_gs_threads = 16;
 		adev->gfx.config.max_gs_threads = 16;
@@ -2083,13 +1946,67 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 	return 0;
 	return 0;
 }
 }
 
 
+static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+					int mec, int pipe, int queue)
+{
+	int r;
+	unsigned irq_type;
+	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+
+	ring = &adev->gfx.compute_ring[ring_id];
+
+	/* mec0 is me1 */
+	ring->me = mec + 1;
+	ring->pipe = pipe;
+	ring->queue = queue;
+
+	ring->ring_obj = NULL;
+	ring->use_doorbell = true;
+	ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+				+ (ring_id * GFX8_MEC_HPD_SIZE);
+	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+		+ ring->pipe;
+
+	/* type-2 packets are deprecated on MEC, use type-3 instead */
+	r = amdgpu_ring_init(adev, ring, 1024,
+			&adev->gfx.eop_irq, irq_type);
+	if (r)
+		return r;
+
+
+	return 0;
+}
+
 static int gfx_v8_0_sw_init(void *handle)
 static int gfx_v8_0_sw_init(void *handle)
 {
 {
-	int i, r;
+	int i, j, k, r, ring_id;
 	struct amdgpu_ring *ring;
 	struct amdgpu_ring *ring;
 	struct amdgpu_kiq *kiq;
 	struct amdgpu_kiq *kiq;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
+	switch (adev->asic_type) {
+	case CHIP_FIJI:
+	case CHIP_TONGA:
+	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+	case CHIP_POLARIS10:
+	case CHIP_CARRIZO:
+		adev->gfx.mec.num_mec = 2;
+		break;
+	case CHIP_TOPAZ:
+	case CHIP_STONEY:
+	default:
+		adev->gfx.mec.num_mec = 1;
+		break;
+	}
+
+	adev->gfx.mec.num_pipe_per_mec = 4;
+	adev->gfx.mec.num_queue_per_pipe = 8;
+
 	/* KIQ event */
 	/* KIQ event */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
 	if (r)
 	if (r)
@@ -2151,49 +2068,41 @@ static int gfx_v8_0_sw_init(void *handle)
 			return r;
 			return r;
 	}
 	}
 
 
-	/* set up the compute queues */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		unsigned irq_type;
 
 
-		/* max 32 queues per MEC */
-		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
-			DRM_ERROR("Too many (%d) compute rings!\n", i);
-			break;
+	/* set up the compute queues - allocate horizontally across pipes */
+	ring_id = 0;
+	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
+					continue;
+
+				r = gfx_v8_0_compute_ring_init(adev,
+								ring_id,
+								i, k, j);
+				if (r)
+					return r;
+
+				ring_id++;
+			}
 		}
 		}
-		ring = &adev->gfx.compute_ring[i];
-		ring->ring_obj = NULL;
-		ring->use_doorbell = true;
-		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
-		ring->me = 1; /* first MEC */
-		ring->pipe = i / 8;
-		ring->queue = i % 8;
-		ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
-		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
-		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
-		/* type-2 packets are deprecated on MEC, use type-3 instead */
-		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
-				     irq_type);
-		if (r)
-			return r;
 	}
 	}
 
 
-	if (amdgpu_sriov_vf(adev)) {
-		r = gfx_v8_0_kiq_init(adev);
-		if (r) {
-			DRM_ERROR("Failed to init KIQ BOs!\n");
-			return r;
-		}
+	r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
+	if (r) {
+		DRM_ERROR("Failed to init KIQ BOs!\n");
+		return r;
+	}
 
 
-		kiq = &adev->gfx.kiq;
-		r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
-		if (r)
-			return r;
+	kiq = &adev->gfx.kiq;
+	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
+	if (r)
+		return r;
 
 
-		/* create MQD for all compute queues as wel as KIQ for SRIOV case */
-		r = gfx_v8_0_compute_mqd_sw_init(adev);
-		if (r)
-			return r;
-	}
+	/* create MQD for all compute queues as well as KIQ for SRIOV case */
+	r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd));
+	if (r)
+		return r;
 
 
 	/* reserve GDS, GWS and OA resource for gfx */
 	/* reserve GDS, GWS and OA resource for gfx */
 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
@@ -2237,11 +2146,9 @@ static int gfx_v8_0_sw_fini(void *handle)
 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
 
 
-	if (amdgpu_sriov_vf(adev)) {
-		gfx_v8_0_compute_mqd_sw_fini(adev);
-		gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
-		gfx_v8_0_kiq_fini(adev);
-	}
+	amdgpu_gfx_compute_mqd_sw_fini(adev);
+	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
+	amdgpu_gfx_kiq_fini(adev);
 
 
 	gfx_v8_0_mec_fini(adev);
 	gfx_v8_0_mec_fini(adev);
 	gfx_v8_0_rlc_fini(adev);
 	gfx_v8_0_rlc_fini(adev);
@@ -3594,11 +3501,6 @@ static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
 	WREG32(mmGRBM_GFX_INDEX, data);
 	WREG32(mmGRBM_GFX_INDEX, data);
 }
 }
 
 
-static u32 gfx_v8_0_create_bitmask(u32 bit_width)
-{
-	return (u32)((1ULL << bit_width) - 1);
-}
-
 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
 {
 {
 	u32 data, mask;
 	u32 data, mask;
@@ -3608,8 +3510,8 @@ static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
 
 
 	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
 	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
 
 
-	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
-				       adev->gfx.config.max_sh_per_se);
+	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
+					 adev->gfx.config.max_sh_per_se);
 
 
 	return (~data) & mask;
 	return (~data) & mask;
 }
 }
@@ -3823,7 +3725,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
 /**
 /**
  * gfx_v8_0_init_compute_vmid - gart enable
  * gfx_v8_0_init_compute_vmid - gart enable
  *
  *
- * @rdev: amdgpu_device pointer
+ * @adev: amdgpu_device pointer
  *
  *
  * Initialize compute vmid sh_mem registers
  * Initialize compute vmid sh_mem registers
  *
  *
@@ -4481,6 +4383,39 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
 
 
 	return 0;
 	return 0;
 }
 }
+static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+	u32 tmp;
+	/* no gfx doorbells on iceland */
+	if (adev->asic_type == CHIP_TOPAZ)
+		return;
+
+	tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
+
+	if (ring->use_doorbell) {
+		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+				DOORBELL_OFFSET, ring->doorbell_index);
+		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+						DOORBELL_HIT, 0);
+		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+					    DOORBELL_EN, 1);
+	} else {
+		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
+	}
+
+	WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
+
+	if (adev->flags & AMD_IS_APU)
+		return;
+
+	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
+					DOORBELL_RANGE_LOWER,
+					AMDGPU_DOORBELL_GFX_RING0);
+	WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
+
+	WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
+		CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
+}
 
 
 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
 {
 {
@@ -4528,34 +4463,7 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
 	WREG32(mmCP_RB0_BASE, rb_addr);
 	WREG32(mmCP_RB0_BASE, rb_addr);
 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
 
 
-	/* no gfx doorbells on iceland */
-	if (adev->asic_type != CHIP_TOPAZ) {
-		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
-		if (ring->use_doorbell) {
-			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
-					    DOORBELL_OFFSET, ring->doorbell_index);
-			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
-					    DOORBELL_HIT, 0);
-			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
-					    DOORBELL_EN, 1);
-		} else {
-			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
-					    DOORBELL_EN, 0);
-		}
-		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
-
-		if (adev->asic_type == CHIP_TONGA) {
-			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
-					    DOORBELL_RANGE_LOWER,
-					    AMDGPU_DOORBELL_GFX_RING0);
-			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
-
-			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
-			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
-		}
-
-	}
-
+	gfx_v8_0_set_cpg_door_bell(adev, ring);
 	/* start the ring */
 	/* start the ring */
 	amdgpu_ring_clear_ring(ring);
 	amdgpu_ring_clear_ring(ring);
 	gfx_v8_0_cp_gfx_start(adev);
 	gfx_v8_0_cp_gfx_start(adev);
@@ -4628,29 +4536,6 @@ static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
 	return 0;
 	return 0;
 }
 }
 
 
-static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
-{
-	int i, r;
-
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
-
-		if (ring->mqd_obj) {
-			r = amdgpu_bo_reserve(ring->mqd_obj, false);
-			if (unlikely(r != 0))
-				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
-
-			amdgpu_bo_unpin(ring->mqd_obj);
-			amdgpu_bo_unreserve(ring->mqd_obj);
-
-			amdgpu_bo_unref(&ring->mqd_obj);
-			ring->mqd_obj = NULL;
-			ring->mqd_ptr = NULL;
-			ring->mqd_gpu_addr = 0;
-		}
-	}
-}
-
 /* KIQ functions */
 /* KIQ functions */
 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
 {
 {
@@ -4666,45 +4551,161 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
 	WREG32(mmRLC_CP_SCHEDULERS, tmp);
 	WREG32(mmRLC_CP_SCHEDULERS, tmp);
 }
 }
 
 
-static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring)
+static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
 {
 {
-	amdgpu_ring_alloc(ring, 8);
+	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+	uint32_t scratch, tmp = 0;
+	uint64_t queue_mask = 0;
+	int r, i;
+
+	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
+		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+			continue;
+
+		/* This situation may be hit in the future if a new HW
+		 * generation exposes more than 64 queues. If so, the
+		 * definition of queue_mask needs updating */
+		if (WARN_ON(i > (sizeof(queue_mask)*8))) {
+			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
+			break;
+		}
+
+		queue_mask |= (1ull << i);
+	}
+
+	r = amdgpu_gfx_scratch_get(adev, &scratch);
+	if (r) {
+		DRM_ERROR("Failed to get scratch reg (%d).\n", r);
+		return r;
+	}
+	WREG32(scratch, 0xCAFEDEAD);
+
+	r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
+	if (r) {
+		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+		amdgpu_gfx_scratch_free(adev, scratch);
+		return r;
+	}
 	/* set resources */
 	/* set resources */
-	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
-	amdgpu_ring_write(ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
-	amdgpu_ring_write(ring, 0x000000FF);	/* queue mask lo */
-	amdgpu_ring_write(ring, 0);	/* queue mask hi */
-	amdgpu_ring_write(ring, 0);	/* gws mask lo */
-	amdgpu_ring_write(ring, 0);	/* gws mask hi */
-	amdgpu_ring_write(ring, 0);	/* oac mask */
-	amdgpu_ring_write(ring, 0);	/* gds heap base:0, gds heap size:0 */
-	amdgpu_ring_commit(ring);
-	udelay(50);
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+	amdgpu_ring_write(kiq_ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
+	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
+	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
+	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
+	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
+	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
+	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+
+		/* map queues */
+		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+		amdgpu_ring_write(kiq_ring,
+				  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
+		amdgpu_ring_write(kiq_ring,
+				  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
+				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+				  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
+		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+	}
+	/* write to scratch for completion */
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+	amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
+	amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
+	amdgpu_ring_commit(kiq_ring);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		tmp = RREG32(scratch);
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+	if (i >= adev->usec_timeout) {
+		DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
+			  scratch, tmp);
+		r = -EINVAL;
+	}
+	amdgpu_gfx_scratch_free(adev, scratch);
+
+	return r;
 }
 }
 
 
-static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
-				   struct amdgpu_ring *ring)
+static int gfx_v8_0_kiq_kcq_disable(struct amdgpu_device *adev)
 {
 {
-	struct amdgpu_device *adev = kiq_ring->adev;
-	uint64_t mqd_addr, wptr_addr;
+	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+	uint32_t scratch, tmp = 0;
+	int r, i;
 
 
-	mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
-	wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-	amdgpu_ring_alloc(kiq_ring, 8);
+	r = amdgpu_gfx_scratch_get(adev, &scratch);
+	if (r) {
+		DRM_ERROR("Failed to get scratch reg (%d).\n", r);
+		return r;
+	}
+	WREG32(scratch, 0xCAFEDEAD);
 
 
-	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
-	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
-	amdgpu_ring_write(kiq_ring, 0x21010000);
-	amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) |
-			(ring->queue << 26) |
-			(ring->pipe << 29) |
-			((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */
-	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
-	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
-	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
-	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+	r = amdgpu_ring_alloc(kiq_ring, 6 + 3);
+	if (r) {
+		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+		amdgpu_gfx_scratch_free(adev, scratch);
+		return r;
+	}
+	/* unmap queues */
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_UNMAP_QUEUES_ACTION(1)| /* RESET_QUEUES */
+			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(2)); /* select all queues */
+	amdgpu_ring_write(kiq_ring, 0);
+	amdgpu_ring_write(kiq_ring, 0);
+	amdgpu_ring_write(kiq_ring, 0);
+	amdgpu_ring_write(kiq_ring, 0);
+	/* write to scratch for completion */
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+	amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
+	amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
 	amdgpu_ring_commit(kiq_ring);
 	amdgpu_ring_commit(kiq_ring);
-	udelay(50);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		tmp = RREG32(scratch);
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+	if (i >= adev->usec_timeout) {
+		DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n",
+			  scratch, tmp);
+		r = -EINVAL;
+	}
+	amdgpu_gfx_scratch_free(adev, scratch);
+
+	return r;
+}
+
+static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
+{
+	int i, r = 0;
+
+	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
+		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
+		for (i = 0; i < adev->usec_timeout; i++) {
+			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
+				break;
+			udelay(1);
+		}
+		if (i == adev->usec_timeout)
+			r = -ETIMEDOUT;
+	}
+	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
+	WREG32(mmCP_HQD_PQ_RPTR, 0);
+	WREG32(mmCP_HQD_PQ_WPTR, 0);
+
+	return r;
 }
 }
 
 
 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
@@ -4714,6 +4715,9 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
 	uint32_t tmp;
 	uint32_t tmp;
 
 
+	/* init the mqd struct */
+	memset(mqd, 0, sizeof(struct vi_mqd));
+
 	mqd->header = 0xC0310800;
 	mqd->header = 0xC0310800;
 	mqd->compute_pipelinestat_enable = 0x00000001;
 	mqd->compute_pipelinestat_enable = 0x00000001;
 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
@@ -4729,7 +4733,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
 	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
 	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
-			(order_base_2(MEC_HPD_SIZE / 4) - 1));
+			(order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
 
 
 	mqd->cp_hqd_eop_control = tmp;
 	mqd->cp_hqd_eop_control = tmp;
 
 
@@ -4741,11 +4745,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 
 
 	mqd->cp_hqd_pq_doorbell_control = tmp;
 	mqd->cp_hqd_pq_doorbell_control = tmp;
 
 
-	/* disable the queue if it's active */
-	mqd->cp_hqd_dequeue_request = 0;
-	mqd->cp_hqd_pq_rptr = 0;
-	mqd->cp_hqd_pq_wptr = 0;
-
 	/* set the pointer to the MQD */
 	/* set the pointer to the MQD */
 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
@@ -4815,149 +4814,170 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
 	mqd->cp_hqd_persistent_state = tmp;
 	mqd->cp_hqd_persistent_state = tmp;
 
 
+	/* set MTYPE */
+	tmp = RREG32(mmCP_HQD_IB_CONTROL);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
+	mqd->cp_hqd_ib_control = tmp;
+
+	tmp = RREG32(mmCP_HQD_IQ_TIMER);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
+	mqd->cp_hqd_iq_timer = tmp;
+
+	tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
+	mqd->cp_hqd_ctx_save_control = tmp;
+
+	/* defaults */
+	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
+	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
+	mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
+	mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
+	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
+	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
+	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
+	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
+	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
+	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
+	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
+	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
+	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
+	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
+	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
+
 	/* activate the queue */
 	/* activate the queue */
 	mqd->cp_hqd_active = 1;
 	mqd->cp_hqd_active = 1;
 
 
 	return 0;
 	return 0;
 }
 }
 
 
-static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
+int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
+			struct vi_mqd *mqd)
 {
 {
-	struct amdgpu_device *adev = ring->adev;
-	struct vi_mqd *mqd = ring->mqd_ptr;
-	int j;
+	uint32_t mqd_reg;
+	uint32_t *mqd_data;
+
+	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
+	mqd_data = &mqd->cp_mqd_base_addr_lo;
 
 
 	/* disable wptr polling */
 	/* disable wptr polling */
 	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
 	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
 
 
-	WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
-	WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
-
-	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-	WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
-
-	/* enable doorbell? */
-	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
+	/* program all HQD registers */
+	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
+		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
 
-	/* disable the queue if it's active */
-	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
-		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
-		for (j = 0; j < adev->usec_timeout; j++) {
-			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
-				break;
-			udelay(1);
-		}
-		WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
-		WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
-		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
+	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
+	 * This is safe since EOP RPTR==WPTR for any inactive HQD
+	 * on ASICs that do not support context-save.
+	 * EOP writes/reads can start anywhere in the ring.
+	 */
+	if (adev->asic_type != CHIP_TONGA) {
+		WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
+		WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
+		WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
 	}
 	}
 
 
-	/* set the pointer to the MQD */
-	WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
-	WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
+	for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
+		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
 
-	/* set MQD vmid to 0 */
-	WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
+	/* activate the HQD */
+	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
+		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
 
-	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
-	WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
-	WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
+	return 0;
+}
 
 
-	/* set up the HQD, this is similar to CP_RB0_CNTL */
-	WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
+static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
+{
+	int r = 0;
+	struct amdgpu_device *adev = ring->adev;
+	struct vi_mqd *mqd = ring->mqd_ptr;
+	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
 
 
-	/* set the wb address whether it's enabled or not */
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
-				mqd->cp_hqd_pq_rptr_report_addr_lo);
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
-				mqd->cp_hqd_pq_rptr_report_addr_hi);
+	gfx_v8_0_kiq_setting(ring);
 
 
-	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
-	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
-	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
+	if (adev->gfx.in_reset) { /* for GPU_RESET case */
+		/* reset MQD to a clean status */
+		if (adev->gfx.mec.mqd_backup[mqd_idx])
+			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
 
 
-	/* enable the doorbell if requested */
-	if (ring->use_doorbell) {
-		if ((adev->asic_type == CHIP_CARRIZO) ||
-				(adev->asic_type == CHIP_FIJI) ||
-				(adev->asic_type == CHIP_STONEY)) {
-			WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
-						AMDGPU_DOORBELL_KIQ << 2);
-			WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
-						AMDGPU_DOORBELL_MEC_RING7 << 2);
+		/* reset ring buffer */
+		ring->wptr = 0;
+		amdgpu_ring_clear_ring(ring);
+		mutex_lock(&adev->srbm_mutex);
+		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+		r = gfx_v8_0_deactivate_hqd(adev, 1);
+		if (r) {
+			dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name);
+			goto out_unlock;
 		}
 		}
-	}
-	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
-
-	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
-	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
-
-	/* set the vmid for the queue */
-	WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
-
-	WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
+		gfx_v8_0_mqd_commit(adev, mqd);
+		vi_srbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
+	} else {
+		mutex_lock(&adev->srbm_mutex);
+		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+		gfx_v8_0_mqd_init(ring);
+		r = gfx_v8_0_deactivate_hqd(adev, 1);
+		if (r) {
+			dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name);
+			goto out_unlock;
+		}
+		gfx_v8_0_mqd_commit(adev, mqd);
+		vi_srbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
 
 
-	/* activate the queue */
-	WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
+		if (adev->gfx.mec.mqd_backup[mqd_idx])
+			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+	}
 
 
-	if (ring->use_doorbell)
-		WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+	return r;
 
 
-	return 0;
+out_unlock:
+	vi_srbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+	return r;
 }
 }
 
 
-static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
+static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
 {
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 	struct vi_mqd *mqd = ring->mqd_ptr;
 	struct vi_mqd *mqd = ring->mqd_ptr;
-	bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
-	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
-
-	if (is_kiq) {
-		gfx_v8_0_kiq_setting(&kiq->ring);
-	} else {
-		mqd_idx = ring - &adev->gfx.compute_ring[0];
-	}
+	int mqd_idx = ring - &adev->gfx.compute_ring[0];
 
 
-	if (!adev->gfx.in_reset) {
-		memset((void *)mqd, 0, sizeof(*mqd));
+	if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
 		mutex_lock(&adev->srbm_mutex);
 		mutex_lock(&adev->srbm_mutex);
 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 		gfx_v8_0_mqd_init(ring);
 		gfx_v8_0_mqd_init(ring);
-		if (is_kiq)
-			gfx_v8_0_kiq_init_register(ring);
 		vi_srbm_select(adev, 0, 0, 0, 0);
 		vi_srbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
 		mutex_unlock(&adev->srbm_mutex);
 
 
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
-	} else { /* for GPU_RESET case */
+	} else if (adev->gfx.in_reset) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		/* reset MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
-
 		/* reset ring buffer */
 		/* reset ring buffer */
 		ring->wptr = 0;
 		ring->wptr = 0;
 		amdgpu_ring_clear_ring(ring);
 		amdgpu_ring_clear_ring(ring);
-
-		if (is_kiq) {
-		    mutex_lock(&adev->srbm_mutex);
-		    vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-		    gfx_v8_0_kiq_init_register(ring);
-		    vi_srbm_select(adev, 0, 0, 0, 0);
-		    mutex_unlock(&adev->srbm_mutex);
-		}
+	} else {
+		amdgpu_ring_clear_ring(ring);
 	}
 	}
-
-	if (is_kiq)
-		gfx_v8_0_kiq_enable(ring);
-	else
-		gfx_v8_0_map_queue_enable(&kiq->ring, ring);
-
 	return 0;
 	return 0;
 }
 }
 
 
+static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
+{
+	if (adev->asic_type > CHIP_TONGA) {
+		WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
+		WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
+	}
+	/* enable doorbells */
+	WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+}
+
 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
 {
 {
 	struct amdgpu_ring *ring = NULL;
 	struct amdgpu_ring *ring = NULL;
@@ -4981,13 +5001,6 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
 	if (r)
 	if (r)
 		goto done;
 		goto done;
 
 
-	ring->ready = true;
-	r = amdgpu_ring_test_ring(ring);
-	if (r) {
-		ring->ready = false;
-		goto done;
-	}
-
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		ring = &adev->gfx.compute_ring[i];
 		ring = &adev->gfx.compute_ring[i];
 
 
@@ -4996,272 +5009,41 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
 			goto done;
 			goto done;
 		r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
 		r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
 		if (!r) {
 		if (!r) {
-			r = gfx_v8_0_kiq_init_queue(ring);
+			r = gfx_v8_0_kcq_init_queue(ring);
 			amdgpu_bo_kunmap(ring->mqd_obj);
 			amdgpu_bo_kunmap(ring->mqd_obj);
 			ring->mqd_ptr = NULL;
 			ring->mqd_ptr = NULL;
 		}
 		}
 		amdgpu_bo_unreserve(ring->mqd_obj);
 		amdgpu_bo_unreserve(ring->mqd_obj);
 		if (r)
 		if (r)
 			goto done;
 			goto done;
-
-		ring->ready = true;
-		r = amdgpu_ring_test_ring(ring);
-		if (r)
-			ring->ready = false;
 	}
 	}
 
 
-done:
-	return r;
-}
+	gfx_v8_0_set_mec_doorbell_range(adev);
 
 
-static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
-{
-	int r, i, j;
-	u32 tmp;
-	bool use_doorbell = true;
-	u64 hqd_gpu_addr;
-	u64 mqd_gpu_addr;
-	u64 eop_gpu_addr;
-	u64 wb_gpu_addr;
-	u32 *buf;
-	struct vi_mqd *mqd;
-
-	/* init the queues.  */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
-
-		if (ring->mqd_obj == NULL) {
-			r = amdgpu_bo_create(adev,
-					     sizeof(struct vi_mqd),
-					     PAGE_SIZE, true,
-					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
-					     NULL, &ring->mqd_obj);
-			if (r) {
-				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
-				return r;
-			}
-		}
-
-		r = amdgpu_bo_reserve(ring->mqd_obj, false);
-		if (unlikely(r != 0)) {
-			gfx_v8_0_cp_compute_fini(adev);
-			return r;
-		}
-		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
-				  &mqd_gpu_addr);
-		if (r) {
-			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
-			gfx_v8_0_cp_compute_fini(adev);
-			return r;
-		}
-		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
-		if (r) {
-			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
-			gfx_v8_0_cp_compute_fini(adev);
-			return r;
-		}
-
-		/* init the mqd struct */
-		memset(buf, 0, sizeof(struct vi_mqd));
-
-		mqd = (struct vi_mqd *)buf;
-		mqd->header = 0xC0310800;
-		mqd->compute_pipelinestat_enable = 0x00000001;
-		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
-		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
-		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
-		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
-		mqd->compute_misc_reserved = 0x00000003;
-
-		mutex_lock(&adev->srbm_mutex);
-		vi_srbm_select(adev, ring->me,
-			       ring->pipe,
-			       ring->queue, 0);
-
-		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
-		eop_gpu_addr >>= 8;
-
-		/* write the EOP addr */
-		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
-		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
-
-		/* set the VMID assigned */
-		WREG32(mmCP_HQD_VMID, 0);
-
-		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
-				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
-		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
-
-		/* disable wptr polling */
-		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
-		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
-		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
-
-		mqd->cp_hqd_eop_base_addr_lo =
-			RREG32(mmCP_HQD_EOP_BASE_ADDR);
-		mqd->cp_hqd_eop_base_addr_hi =
-			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
-
-		/* enable doorbell? */
-		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
-		if (use_doorbell) {
-			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-		} else {
-			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
-		}
-		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
-		mqd->cp_hqd_pq_doorbell_control = tmp;
-
-		/* disable the queue if it's active */
-		mqd->cp_hqd_dequeue_request = 0;
-		mqd->cp_hqd_pq_rptr = 0;
-		mqd->cp_hqd_pq_wptr= 0;
-		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
-			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
-			for (j = 0; j < adev->usec_timeout; j++) {
-				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
-					break;
-				udelay(1);
-			}
-			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
-			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
-			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
-		}
-
-		/* set the pointer to the MQD */
-		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
-		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
-		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
-		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
-
-		/* set MQD vmid to 0 */
-		tmp = RREG32(mmCP_MQD_CONTROL);
-		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
-		WREG32(mmCP_MQD_CONTROL, tmp);
-		mqd->cp_mqd_control = tmp;
-
-		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
-		hqd_gpu_addr = ring->gpu_addr >> 8;
-		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
-		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
-		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
-		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
-
-		/* set up the HQD, this is similar to CP_RB0_CNTL */
-		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
-				    (order_base_2(ring->ring_size / 4) - 1));
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
-			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
-#ifdef __BIG_ENDIAN
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
-#endif
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
-		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
-		mqd->cp_hqd_pq_control = tmp;
-
-		/* set the wb address wether it's enabled or not */
-		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
-		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
-		mqd->cp_hqd_pq_rptr_report_addr_hi =
-			upper_32_bits(wb_gpu_addr) & 0xffff;
-		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
-		       mqd->cp_hqd_pq_rptr_report_addr_lo);
-		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
-		       mqd->cp_hqd_pq_rptr_report_addr_hi);
-
-		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
-		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-		mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
-		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
-		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
-		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
-		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
-
-		/* enable the doorbell if requested */
-		if (use_doorbell) {
-			if ((adev->asic_type == CHIP_CARRIZO) ||
-			    (adev->asic_type == CHIP_FIJI) ||
-			    (adev->asic_type == CHIP_STONEY) ||
-			    (adev->asic_type == CHIP_POLARIS11) ||
-			    (adev->asic_type == CHIP_POLARIS10) ||
-			    (adev->asic_type == CHIP_POLARIS12)) {
-				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
-				       AMDGPU_DOORBELL_KIQ << 2);
-				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
-				       AMDGPU_DOORBELL_MEC_RING7 << 2);
-			}
-			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
-			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
-					    DOORBELL_OFFSET, ring->doorbell_index);
-			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
-			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
-			mqd->cp_hqd_pq_doorbell_control = tmp;
-
-		} else {
-			mqd->cp_hqd_pq_doorbell_control = 0;
-		}
-		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
-		       mqd->cp_hqd_pq_doorbell_control);
-
-		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
-		ring->wptr = 0;
-		mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
-		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
-		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
-
-		/* set the vmid for the queue */
-		mqd->cp_hqd_vmid = 0;
-		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
-
-		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
-		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
-		mqd->cp_hqd_persistent_state = tmp;
-		if (adev->asic_type == CHIP_STONEY ||
-			adev->asic_type == CHIP_POLARIS11 ||
-			adev->asic_type == CHIP_POLARIS10 ||
-			adev->asic_type == CHIP_POLARIS12) {
-			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
-			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
-			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
-		}
-
-		/* activate the queue */
-		mqd->cp_hqd_active = 1;
-		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
-
-		vi_srbm_select(adev, 0, 0, 0, 0);
-		mutex_unlock(&adev->srbm_mutex);
-
-		amdgpu_bo_kunmap(ring->mqd_obj);
-		amdgpu_bo_unreserve(ring->mqd_obj);
-	}
+	r = gfx_v8_0_kiq_kcq_enable(adev);
+	if (r)
+		goto done;
 
 
-	if (use_doorbell) {
-		tmp = RREG32(mmCP_PQ_STATUS);
-		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
-		WREG32(mmCP_PQ_STATUS, tmp);
+	/* Test KIQ */
+	ring = &adev->gfx.kiq.ring;
+	ring->ready = true;
+	r = amdgpu_ring_test_ring(ring);
+	if (r) {
+		ring->ready = false;
+		goto done;
 	}
 	}
 
 
-	gfx_v8_0_cp_compute_enable(adev, true);
-
+	/* Test KCQs */
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
-
+		ring = &adev->gfx.compute_ring[i];
 		ring->ready = true;
 		ring->ready = true;
 		r = amdgpu_ring_test_ring(ring);
 		r = amdgpu_ring_test_ring(ring);
 		if (r)
 		if (r)
 			ring->ready = false;
 			ring->ready = false;
 	}
 	}
 
 
-	return 0;
+done:
+	return r;
 }
 }
 
 
 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
@@ -5314,10 +5096,7 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
-	if (amdgpu_sriov_vf(adev))
-		r = gfx_v8_0_kiq_resume(adev);
-	else
-		r = gfx_v8_0_cp_compute_resume(adev);
+	r = gfx_v8_0_kiq_resume(adev);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
@@ -5359,9 +5138,9 @@ static int gfx_v8_0_hw_fini(void *handle)
 		pr_debug("For SRIOV client, shouldn't do anything.\n");
 		pr_debug("For SRIOV client, shouldn't do anything.\n");
 		return 0;
 		return 0;
 	}
 	}
+	gfx_v8_0_kiq_kcq_disable(adev);
 	gfx_v8_0_cp_enable(adev, false);
 	gfx_v8_0_cp_enable(adev, false);
 	gfx_v8_0_rlc_stop(adev);
 	gfx_v8_0_rlc_stop(adev);
-	gfx_v8_0_cp_compute_fini(adev);
 
 
 	amdgpu_set_powergating_state(adev,
 	amdgpu_set_powergating_state(adev,
 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
@@ -5372,15 +5151,18 @@ static int gfx_v8_0_hw_fini(void *handle)
 static int gfx_v8_0_suspend(void *handle)
 static int gfx_v8_0_suspend(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
+	adev->gfx.in_suspend = true;
 	return gfx_v8_0_hw_fini(adev);
 	return gfx_v8_0_hw_fini(adev);
 }
 }
 
 
 static int gfx_v8_0_resume(void *handle)
 static int gfx_v8_0_resume(void *handle)
 {
 {
+	int r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
-	return gfx_v8_0_hw_init(adev);
+	r = gfx_v8_0_hw_init(adev);
+	adev->gfx.in_suspend = false;
+	return r;
 }
 }
 
 
 static bool gfx_v8_0_is_idle(void *handle)
 static bool gfx_v8_0_is_idle(void *handle)
@@ -5469,25 +5251,6 @@ static bool gfx_v8_0_check_soft_reset(void *handle)
 	}
 	}
 }
 }
 
 
-static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
-				  struct amdgpu_ring *ring)
-{
-	int i;
-
-	mutex_lock(&adev->srbm_mutex);
-	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
-		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, 2);
-		for (i = 0; i < adev->usec_timeout; i++) {
-			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
-				break;
-			udelay(1);
-		}
-	}
-	vi_srbm_select(adev, 0, 0, 0, 0);
-	mutex_unlock(&adev->srbm_mutex);
-}
-
 static int gfx_v8_0_pre_soft_reset(void *handle)
 static int gfx_v8_0_pre_soft_reset(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -5517,7 +5280,11 @@ static int gfx_v8_0_pre_soft_reset(void *handle)
 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
 
 
-			gfx_v8_0_inactive_hqd(adev, ring);
+			mutex_lock(&adev->srbm_mutex);
+			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+			gfx_v8_0_deactivate_hqd(adev, 2);
+			vi_srbm_select(adev, 0, 0, 0, 0);
+			mutex_unlock(&adev->srbm_mutex);
 		}
 		}
 		/* Disable MEC parsing/prefetching */
 		/* Disable MEC parsing/prefetching */
 		gfx_v8_0_cp_compute_enable(adev, false);
 		gfx_v8_0_cp_compute_enable(adev, false);
@@ -5588,18 +5355,6 @@ static int gfx_v8_0_soft_reset(void *handle)
 	return 0;
 	return 0;
 }
 }
 
 
-static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
-			      struct amdgpu_ring *ring)
-{
-	mutex_lock(&adev->srbm_mutex);
-	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
-	WREG32(mmCP_HQD_PQ_RPTR, 0);
-	WREG32(mmCP_HQD_PQ_WPTR, 0);
-	vi_srbm_select(adev, 0, 0, 0, 0);
-	mutex_unlock(&adev->srbm_mutex);
-}
-
 static int gfx_v8_0_post_soft_reset(void *handle)
 static int gfx_v8_0_post_soft_reset(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -5625,9 +5380,13 @@ static int gfx_v8_0_post_soft_reset(void *handle)
 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
 
 
-			gfx_v8_0_init_hqd(adev, ring);
+			mutex_lock(&adev->srbm_mutex);
+			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+			gfx_v8_0_deactivate_hqd(adev, 2);
+			vi_srbm_select(adev, 0, 0, 0, 0);
+			mutex_unlock(&adev->srbm_mutex);
 		}
 		}
-		gfx_v8_0_cp_compute_resume(adev);
+		gfx_v8_0_kiq_resume(adev);
 	}
 	}
 	gfx_v8_0_rlc_start(adev);
 	gfx_v8_0_rlc_start(adev);
 
 
@@ -5773,7 +5532,7 @@ static int gfx_v8_0_early_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
-	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
+	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
 	gfx_v8_0_set_ring_funcs(adev);
 	gfx_v8_0_set_ring_funcs(adev);
 	gfx_v8_0_set_irq_funcs(adev);
 	gfx_v8_0_set_irq_funcs(adev);
@@ -6265,6 +6024,8 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
 		if (temp != data)
 		if (temp != data)
 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
+		/* enable interrupts again for PG */
+		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
 	}
 	}
 
 
 	gfx_v8_0_wait_for_rlc_serdes(adev);
 	gfx_v8_0_wait_for_rlc_serdes(adev);
@@ -6568,9 +6329,13 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 
 
 	control |= ib->length_dw | (vm_id << 24);
 	control |= ib->length_dw | (vm_id << 24);
 
 
-	if (amdgpu_sriov_vf(ring->adev) && ib->flags & AMDGPU_IB_FLAG_PREEMPT)
+	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
 		control |= INDIRECT_BUFFER_PRE_ENB(1);
 		control |= INDIRECT_BUFFER_PRE_ENB(1);
 
 
+		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
+			gfx_v8_0_ring_emit_de_meta(ring);
+	}
+
 	amdgpu_ring_write(ring, header);
 	amdgpu_ring_write(ring, header);
 	amdgpu_ring_write(ring,
 	amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
 #ifdef __BIG_ENDIAN
@@ -6753,8 +6518,7 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
 	uint32_t dw2 = 0;
 	uint32_t dw2 = 0;
 
 
 	if (amdgpu_sriov_vf(ring->adev))
 	if (amdgpu_sriov_vf(ring->adev))
-		gfx_v8_0_ring_emit_ce_meta_init(ring,
-			(flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
+		gfx_v8_0_ring_emit_ce_meta(ring);
 
 
 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
@@ -6780,10 +6544,6 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
 	amdgpu_ring_write(ring, dw2);
 	amdgpu_ring_write(ring, dw2);
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, 0);
-
-	if (amdgpu_sriov_vf(ring->adev))
-		gfx_v8_0_ring_emit_de_meta_init(ring,
-			(flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
 }
 }
 
 
 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
@@ -6813,7 +6573,6 @@ static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne
 		ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
 		ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
 }
 }
 
 
-
 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
 {
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_device *adev = ring->adev;
@@ -6851,15 +6610,27 @@ static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
 						     int me, int pipe,
 						     int me, int pipe,
 						     enum amdgpu_interrupt_state state)
 						     enum amdgpu_interrupt_state state)
 {
 {
+	u32 mec_int_cntl, mec_int_cntl_reg;
+
 	/*
 	/*
-	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
-	 * handles the setting of interrupts for this specific pipe. All other
+	 * amdgpu controls only the first MEC. That's why this function only
+	 * handles the setting of interrupts for this specific MEC. All other
 	 * pipes' interrupts are set by amdkfd.
 	 * pipes' interrupts are set by amdkfd.
 	 */
 	 */
 
 
 	if (me == 1) {
 	if (me == 1) {
 		switch (pipe) {
 		switch (pipe) {
 		case 0:
 		case 0:
+			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
+			break;
+		case 1:
+			mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
+			break;
+		case 2:
+			mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
+			break;
+		case 3:
+			mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
 			break;
 			break;
 		default:
 		default:
 			DRM_DEBUG("invalid pipe %d\n", pipe);
 			DRM_DEBUG("invalid pipe %d\n", pipe);
@@ -6870,8 +6641,20 @@ static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
 		return;
 		return;
 	}
 	}
 
 
-	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
-		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+		mec_int_cntl = RREG32(mec_int_cntl_reg);
+		mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
+		WREG32(mec_int_cntl_reg, mec_int_cntl);
+		break;
+	case AMDGPU_IRQ_STATE_ENABLE:
+		mec_int_cntl = RREG32(mec_int_cntl_reg);
+		mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
+		WREG32(mec_int_cntl_reg, mec_int_cntl);
+		break;
+	default:
+		break;
+	}
 }
 }
 
 
 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
@@ -6992,8 +6775,6 @@ static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
 {
 {
 	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
 	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
 
 
-	BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
-
 	switch (type) {
 	switch (type) {
 	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
 	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
 		WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
 		WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
@@ -7023,8 +6804,6 @@ static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
 	u8 me_id, pipe_id, queue_id;
 	u8 me_id, pipe_id, queue_id;
 	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
 	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
 
 
-	BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
-
 	me_id = (entry->ring_id & 0x0c) >> 2;
 	me_id = (entry->ring_id & 0x0c) >> 2;
 	pipe_id = (entry->ring_id & 0x03) >> 0;
 	pipe_id = (entry->ring_id & 0x03) >> 0;
 	queue_id = (entry->ring_id & 0x70) >> 4;
 	queue_id = (entry->ring_id & 0x70) >> 4;
@@ -7257,7 +7036,7 @@ static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
 	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
 	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
 		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
 		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
 
 
-	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
+	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
 
 
 	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
 	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
 }
 }
@@ -7268,9 +7047,15 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
 	unsigned disable_masks[4 * 2];
 	unsigned disable_masks[4 * 2];
+	u32 ao_cu_num;
 
 
 	memset(cu_info, 0, sizeof(*cu_info));
 	memset(cu_info, 0, sizeof(*cu_info));
 
 
+	if (adev->flags & AMD_IS_APU)
+		ao_cu_num = 2;
+	else
+		ao_cu_num = adev->gfx.config.max_cu_per_sh;
+
 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
 
 
 	mutex_lock(&adev->grbm_idx_mutex);
 	mutex_lock(&adev->grbm_idx_mutex);
@@ -7286,9 +7071,9 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
 			cu_info->bitmap[i][j] = bitmap;
 			cu_info->bitmap[i][j] = bitmap;
 
 
-			for (k = 0; k < 16; k ++) {
+			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
 				if (bitmap & mask) {
 				if (bitmap & mask) {
-					if (counter < 2)
+					if (counter < ao_cu_num)
 						ao_bitmap |= mask;
 						ao_bitmap |= mask;
 					counter ++;
 					counter ++;
 				}
 				}
@@ -7323,7 +7108,7 @@ const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
 	.funcs = &gfx_v8_0_ip_funcs,
 	.funcs = &gfx_v8_0_ip_funcs,
 };
 };
 
 
-static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
+static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
 {
 {
 	uint64_t ce_payload_addr;
 	uint64_t ce_payload_addr;
 	int cnt_ce;
 	int cnt_ce;
@@ -7333,10 +7118,12 @@ static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t c
 	} ce_payload = {};
 	} ce_payload = {};
 
 
 	if (ring->adev->virt.chained_ib_support) {
 	if (ring->adev->virt.chained_ib_support) {
-		ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
+		ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
+						  offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
 		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
 		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
 	} else {
 	} else {
-		ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload);
+		ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
+						  offsetof(struct vi_gfx_meta_data, ce_payload);
 		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
 		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
 	}
 	}
 
 
@@ -7350,15 +7137,16 @@ static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t c
 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
 }
 }
 
 
-static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
+static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
 {
 {
-	uint64_t de_payload_addr, gds_addr;
+	uint64_t de_payload_addr, gds_addr, csa_addr;
 	int cnt_de;
 	int cnt_de;
 	static union {
 	static union {
 		struct vi_de_ib_state regular;
 		struct vi_de_ib_state regular;
 		struct vi_de_ib_state_chained_ib chained;
 		struct vi_de_ib_state_chained_ib chained;
 	} de_payload = {};
 	} de_payload = {};
 
 
+	csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
 	gds_addr = csa_addr + 4096;
 	gds_addr = csa_addr + 4096;
 	if (ring->adev->virt.chained_ib_support) {
 	if (ring->adev->virt.chained_ib_support) {
 		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
 		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
@@ -7381,68 +7169,3 @@ static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t c
 	amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
 	amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
 }
 }
-
-/* create MQD for each compute queue */
-static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev)
-{
-	struct amdgpu_ring *ring = NULL;
-	int r, i;
-
-	/* create MQD for KIQ */
-	ring = &adev->gfx.kiq.ring;
-	if (!ring->mqd_obj) {
-		r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
-					    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
-					    &ring->mqd_gpu_addr, &ring->mqd_ptr);
-		if (r) {
-			dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
-			return r;
-		}
-
-		/* prepare MQD backup */
-		adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
-		if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
-				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
-	}
-
-	/* create MQD for each KCQ */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		ring = &adev->gfx.compute_ring[i];
-		if (!ring->mqd_obj) {
-			r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
-						    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
-						    &ring->mqd_gpu_addr, &ring->mqd_ptr);
-			if (r) {
-				dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
-				return r;
-			}
-
-			/* prepare MQD backup */
-			adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
-			if (!adev->gfx.mec.mqd_backup[i])
-				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
-		}
-	}
-
-	return 0;
-}
-
-static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
-{
-	struct amdgpu_ring *ring = NULL;
-	int i;
-
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		ring = &adev->gfx.compute_ring[i];
-		kfree(adev->gfx.mec.mqd_backup[i]);
-		amdgpu_bo_free_kernel(&ring->mqd_obj,
-				      &ring->mqd_gpu_addr,
-				      &ring->mqd_ptr);
-	}
-
-	ring = &adev->gfx.kiq.ring;
-	kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
-	amdgpu_bo_free_kernel(&ring->mqd_obj,
-			      &ring->mqd_gpu_addr,
-			      &ring->mqd_ptr);
-}

+ 5 - 0
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h

@@ -27,4 +27,9 @@
 extern const struct amdgpu_ip_block_version gfx_v8_0_ip_block;
 extern const struct amdgpu_ip_block_version gfx_v8_0_ip_block;
 extern const struct amdgpu_ip_block_version gfx_v8_1_ip_block;
 extern const struct amdgpu_ip_block_version gfx_v8_1_ip_block;
 
 
+struct amdgpu_device;
+struct vi_mqd;
+
+int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd);
+
 #endif
 #endif

+ 1172 - 610
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

@@ -21,7 +21,7 @@
  *
  *
  */
  */
 #include <linux/firmware.h>
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
 #include "amdgpu_gfx.h"
 #include "soc15.h"
 #include "soc15.h"
@@ -38,8 +38,17 @@
 #include "v9_structs.h"
 #include "v9_structs.h"
 
 
 #define GFX9_NUM_GFX_RINGS     1
 #define GFX9_NUM_GFX_RINGS     1
-#define GFX9_NUM_COMPUTE_RINGS 8
-#define RLCG_UCODE_LOADING_START_ADDRESS 0x2000
+#define GFX9_MEC_HPD_SIZE 2048
+#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
+#define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34
+
+#define mmPWR_MISC_CNTL_STATUS					0x0183
+#define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
+#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
+#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
+#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
+#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
 
 
 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
@@ -48,6 +57,13 @@ MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
 
 
+MODULE_FIRMWARE("amdgpu/raven_ce.bin");
+MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
+MODULE_FIRMWARE("amdgpu/raven_me.bin");
+MODULE_FIRMWARE("amdgpu/raven_mec.bin");
+MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
+MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
+
 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 {
 {
 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
@@ -86,14 +102,27 @@ static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 
 
 static const u32 golden_settings_gc_9_0[] =
 static const u32 golden_settings_gc_9_0[] =
 {
 {
-	SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00ffeff, 0x00000400,
+	SOC15_REG_OFFSET(GC, 0, mmCPC_UTCL1_CNTL), 0x08000000, 0x08000080,
+	SOC15_REG_OFFSET(GC, 0, mmCPF_UTCL1_CNTL), 0x08000000, 0x08000080,
+	SOC15_REG_OFFSET(GC, 0, mmCPG_UTCL1_CNTL), 0x08000000, 0x08000080,
+	SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00fffff, 0x00000420,
+	SOC15_REG_OFFSET(GC, 0, mmGB_GPU_ID), 0x0000000f, 0x00000000,
+	SOC15_REG_OFFSET(GC, 0, mmIA_UTCL1_CNTL), 0x08000000, 0x08000080,
 	SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024,
 	SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024,
 	SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001,
 	SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001,
 	SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000,
 	SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000,
+	SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_0), 0x08000000, 0x08000080,
+	SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_1), 0x08000000, 0x08000080,
+	SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_2), 0x08000000, 0x08000080,
+	SOC15_REG_OFFSET(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL), 0x08000000, 0x08000080,
+	SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_UTCL1_CNTL), 0x08000000, 0x08000080,
+	SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1), 0x0000000f, 0x01000107,
 	SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000,
 	SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000,
 	SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x4a2c0e68,
 	SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x4a2c0e68,
 	SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0xb5d3f197,
 	SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0xb5d3f197,
-	SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000003ff
+	SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION), 0x3fff3af3, 0x19200000,
+	SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000003ff,
+	SOC15_REG_OFFSET(GC, 0, mmWD_UTCL1_CNTL), 0x08000000, 0x08000080
 };
 };
 
 
 static const u32 golden_settings_gc_9_0_vg10[] =
 static const u32 golden_settings_gc_9_0_vg10[] =
@@ -104,11 +133,47 @@ static const u32 golden_settings_gc_9_0_vg10[] =
 	SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x2a114042,
 	SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x2a114042,
 	SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0x00008000, 0x00048000,
 	SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0x00008000, 0x00048000,
 	SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000,
 	SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000,
-	SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x00001800, 0x00000800,
-	SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1),0x0000000f, 0x00000007
+	SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x00001800, 0x00000800
+};
+
+static const u32 golden_settings_gc_9_1[] =
+{
+	SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL), 0xfffdf3cf, 0x00014104,
+	SOC15_REG_OFFSET(GC, 0, mmCPC_UTCL1_CNTL), 0x08000000, 0x08000080,
+	SOC15_REG_OFFSET(GC, 0, mmCPF_UTCL1_CNTL), 0x08000000, 0x08000080,
+	SOC15_REG_OFFSET(GC, 0, mmCPG_UTCL1_CNTL), 0x08000000, 0x08000080,
+	SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00fffff, 0x00000420,
+	SOC15_REG_OFFSET(GC, 0, mmGB_GPU_ID), 0x0000000f, 0x00000000,
+	SOC15_REG_OFFSET(GC, 0, mmIA_UTCL1_CNTL), 0x08000000, 0x08000080,
+	SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024,
+	SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001,
+	SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000,
+	SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_0), 0x08000000, 0x08000080,
+	SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_1), 0x08000000, 0x08000080,
+	SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_2), 0x08000000, 0x08000080,
+	SOC15_REG_OFFSET(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL), 0x08000000, 0x08000080,
+	SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_UTCL1_CNTL), 0x08000000, 0x08000080,
+	SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000,
+	SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x00000000,
+	SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0x00003120,
+	SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION), 0x3fff3af3, 0x19200000,
+	SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000000ff,
+	SOC15_REG_OFFSET(GC, 0, mmWD_UTCL1_CNTL), 0x08000000, 0x08000080
+};
+
+static const u32 golden_settings_gc_9_1_rv1[] =
+{
+	SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL_3), 0x30000000, 0x10000000,
+	SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), 0xffff77ff, 0x24000042,
+	SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x24000042,
+	SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0xffffffff, 0x04048000,
+	SOC15_REG_OFFSET(GC, 0, mmPA_SC_MODE_CNTL_1), 0x06000000, 0x06000000,
+	SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000,
+	SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x01bd9f33, 0x00000800
 };
 };
 
 
 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
+#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
 
 
 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -118,6 +183,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
                                  struct amdgpu_cu_info *cu_info);
                                  struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 
 
 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
 {
 {
@@ -130,6 +196,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
 						 golden_settings_gc_9_0_vg10,
 						 golden_settings_gc_9_0_vg10,
 						 (const u32)ARRAY_SIZE(golden_settings_gc_9_0_vg10));
 						 (const u32)ARRAY_SIZE(golden_settings_gc_9_0_vg10));
 		break;
 		break;
+	case CHIP_RAVEN:
+		amdgpu_program_register_sequence(adev,
+						 golden_settings_gc_9_1,
+						 (const u32)ARRAY_SIZE(golden_settings_gc_9_1));
+		amdgpu_program_register_sequence(adev,
+						 golden_settings_gc_9_1_rv1,
+						 (const u32)ARRAY_SIZE(golden_settings_gc_9_1_rv1));
+		break;
 	default:
 	default:
 		break;
 		break;
 	}
 	}
@@ -284,6 +358,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 	struct amdgpu_firmware_info *info = NULL;
 	struct amdgpu_firmware_info *info = NULL;
 	const struct common_firmware_header *header = NULL;
 	const struct common_firmware_header *header = NULL;
 	const struct gfx_firmware_header_v1_0 *cp_hdr;
 	const struct gfx_firmware_header_v1_0 *cp_hdr;
+	const struct rlc_firmware_header_v2_0 *rlc_hdr;
+	unsigned int *tmp = NULL;
+	unsigned int i = 0;
 
 
 	DRM_DEBUG("\n");
 	DRM_DEBUG("\n");
 
 
@@ -291,6 +368,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
 		chip_name = "vega10";
 		chip_name = "vega10";
 		break;
 		break;
+	case CHIP_RAVEN:
+		chip_name = "raven";
+		break;
 	default:
 	default:
 		BUG();
 		BUG();
 	}
 	}
@@ -333,9 +413,46 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 	if (err)
 	if (err)
 		goto out;
 		goto out;
 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
-	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
-	adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
-	adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
+	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
+	adev->gfx.rlc.save_and_restore_offset =
+			le32_to_cpu(rlc_hdr->save_and_restore_offset);
+	adev->gfx.rlc.clear_state_descriptor_offset =
+			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
+	adev->gfx.rlc.avail_scratch_ram_locations =
+			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
+	adev->gfx.rlc.reg_restore_list_size =
+			le32_to_cpu(rlc_hdr->reg_restore_list_size);
+	adev->gfx.rlc.reg_list_format_start =
+			le32_to_cpu(rlc_hdr->reg_list_format_start);
+	adev->gfx.rlc.reg_list_format_separate_start =
+			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
+	adev->gfx.rlc.starting_offsets_start =
+			le32_to_cpu(rlc_hdr->starting_offsets_start);
+	adev->gfx.rlc.reg_list_format_size_bytes =
+			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
+	adev->gfx.rlc.reg_list_size_bytes =
+			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
+	adev->gfx.rlc.register_list_format =
+			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
+				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
+	if (!adev->gfx.rlc.register_list_format) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
+	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
+		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
+
+	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
+
+	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
+	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
+		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
 
 
 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
@@ -447,6 +564,261 @@ out:
 	return err;
 	return err;
 }
 }
 
 
+static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
+{
+	u32 count = 0;
+	const struct cs_section_def *sect = NULL;
+	const struct cs_extent_def *ext = NULL;
+
+	/* begin clear state */
+	count += 2;
+	/* context control state */
+	count += 3;
+
+	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
+		for (ext = sect->section; ext->extent != NULL; ++ext) {
+			if (sect->id == SECT_CONTEXT)
+				count += 2 + ext->reg_count;
+			else
+				return 0;
+		}
+	}
+
+	/* end clear state */
+	count += 2;
+	/* clear state */
+	count += 2;
+
+	return count;
+}
+
+static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
+				    volatile u32 *buffer)
+{
+	u32 count = 0, i;
+	const struct cs_section_def *sect = NULL;
+	const struct cs_extent_def *ext = NULL;
+
+	if (adev->gfx.rlc.cs_data == NULL)
+		return;
+	if (buffer == NULL)
+		return;
+
+	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+	buffer[count++] = cpu_to_le32(0x80000000);
+	buffer[count++] = cpu_to_le32(0x80000000);
+
+	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+		for (ext = sect->section; ext->extent != NULL; ++ext) {
+			if (sect->id == SECT_CONTEXT) {
+				buffer[count++] =
+					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
+				buffer[count++] = cpu_to_le32(ext->reg_index -
+						PACKET3_SET_CONTEXT_REG_START);
+				for (i = 0; i < ext->reg_count; i++)
+					buffer[count++] = cpu_to_le32(ext->extent[i]);
+			} else {
+				return;
+			}
+		}
+	}
+
+	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
+	buffer[count++] = cpu_to_le32(0);
+}
+
+static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
+{
+	uint32_t data;
+
+	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
+	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
+	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
+	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
+	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
+
+	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
+	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
+
+	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
+	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
+
+	mutex_lock(&adev->grbm_idx_mutex);
+	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
+	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
+
+	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
+	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
+	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
+	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
+	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
+
+	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
+	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
+	data &= 0x0000FFFF;
+	data |= 0x00C00000;
+	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
+
+	/* set RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF */
+	WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, 0xFFF);
+
+	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
+	 * but used for RLC_LB_CNTL configuration */
+	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
+	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
+	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
+	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
+	mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
+{
+	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
+}
+
+static void rv_init_cp_jump_table(struct amdgpu_device *adev)
+{
+	const __le32 *fw_data;
+	volatile u32 *dst_ptr;
+	int me, i, max_me = 5;
+	u32 bo_offset = 0;
+	u32 table_offset, table_size;
+
+	/* write the cp table buffer */
+	dst_ptr = adev->gfx.rlc.cp_table_ptr;
+	for (me = 0; me < max_me; me++) {
+		if (me == 0) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.ce_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else if (me == 1) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.pfp_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else if (me == 2) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.me_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else if (me == 3) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.mec_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else  if (me == 4) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.mec2_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		}
+
+		for (i = 0; i < table_size; i ++) {
+			dst_ptr[bo_offset + i] =
+				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
+		}
+
+		bo_offset += table_size;
+	}
+}
+
+static void gfx_v9_0_rlc_fini(struct amdgpu_device *adev)
+{
+	/* clear state block */
+	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+			&adev->gfx.rlc.clear_state_gpu_addr,
+			(void **)&adev->gfx.rlc.cs_ptr);
+
+	/* jump table block */
+	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
+			&adev->gfx.rlc.cp_table_gpu_addr,
+			(void **)&adev->gfx.rlc.cp_table_ptr);
+}
+
+static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
+{
+	volatile u32 *dst_ptr;
+	u32 dws;
+	const struct cs_section_def *cs_data;
+	int r;
+
+	adev->gfx.rlc.cs_data = gfx9_cs_data;
+
+	cs_data = adev->gfx.rlc.cs_data;
+
+	if (cs_data) {
+		/* clear state block */
+		adev->gfx.rlc.clear_state_size = dws = gfx_v9_0_get_csb_size(adev);
+		if (adev->gfx.rlc.clear_state_obj == NULL) {
+			r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE,
+						AMDGPU_GEM_DOMAIN_VRAM,
+						&adev->gfx.rlc.clear_state_obj,
+						&adev->gfx.rlc.clear_state_gpu_addr,
+						(void **)&adev->gfx.rlc.cs_ptr);
+			if (r) {
+				dev_err(adev->dev,
+					"(%d) failed to create rlc csb bo\n", r);
+				gfx_v9_0_rlc_fini(adev);
+				return r;
+			}
+		}
+		/* set up the cs buffer */
+		dst_ptr = adev->gfx.rlc.cs_ptr;
+		gfx_v9_0_get_csb_buffer(adev, dst_ptr);
+		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
+		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
+	}
+
+	if (adev->asic_type == CHIP_RAVEN) {
+		/* TODO: double check the cp_table_size for RV */
+		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
+		if (adev->gfx.rlc.cp_table_obj == NULL) {
+			r = amdgpu_bo_create_kernel(adev, adev->gfx.rlc.cp_table_size,
+						PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
+						&adev->gfx.rlc.cp_table_obj,
+						&adev->gfx.rlc.cp_table_gpu_addr,
+						(void **)&adev->gfx.rlc.cp_table_ptr);
+			if (r) {
+				dev_err(adev->dev,
+					"(%d) failed to create cp table bo\n", r);
+				gfx_v9_0_rlc_fini(adev);
+				return r;
+			}
+		}
+
+		rv_init_cp_jump_table(adev);
+		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
+		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
+
+		gfx_v9_0_init_lbpw(adev);
+	}
+
+	return 0;
+}
+
 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
 {
 {
 	int r;
 	int r;
@@ -473,8 +845,6 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
 	}
 	}
 }
 }
 
 
-#define MEC_HPD_SIZE 2048
-
 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
 {
 {
 	int r;
 	int r;
@@ -482,20 +852,19 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
 	const __le32 *fw_data;
 	const __le32 *fw_data;
 	unsigned fw_size;
 	unsigned fw_size;
 	u32 *fw;
 	u32 *fw;
+	size_t mec_hpd_size;
 
 
 	const struct gfx_firmware_header_v1_0 *mec_hdr;
 	const struct gfx_firmware_header_v1_0 *mec_hdr;
 
 
-	/*
-	 * we assign only 1 pipe because all other pipes will
-	 * be handled by KFD
-	 */
-	adev->gfx.mec.num_mec = 1;
-	adev->gfx.mec.num_pipe = 1;
-	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
+	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+	/* take ownership of the relevant compute queues */
+	amdgpu_gfx_compute_queue_acquire(adev);
+	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
 
 
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 		r = amdgpu_bo_create(adev,
 		r = amdgpu_bo_create(adev,
-				     adev->gfx.mec.num_queue * MEC_HPD_SIZE,
+				     mec_hpd_size,
 				     PAGE_SIZE, true,
 				     PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     &adev->gfx.mec.hpd_eop_obj);
 				     &adev->gfx.mec.hpd_eop_obj);
@@ -575,131 +944,6 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
 	return 0;
 	return 0;
 }
 }
 
 
-static void gfx_v9_0_kiq_fini(struct amdgpu_device *adev)
-{
-	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
-
-	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
-}
-
-static int gfx_v9_0_kiq_init(struct amdgpu_device *adev)
-{
-	int r;
-	u32 *hpd;
-	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
-
-	r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
-				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
-				    &kiq->eop_gpu_addr, (void **)&hpd);
-	if (r) {
-		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
-		return r;
-	}
-
-	memset(hpd, 0, MEC_HPD_SIZE);
-
-	r = amdgpu_bo_reserve(kiq->eop_obj, true);
-	if (unlikely(r != 0))
-		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
-	amdgpu_bo_kunmap(kiq->eop_obj);
-	amdgpu_bo_unreserve(kiq->eop_obj);
-
-	return 0;
-}
-
-static int gfx_v9_0_kiq_init_ring(struct amdgpu_device *adev,
-				  struct amdgpu_ring *ring,
-				  struct amdgpu_irq_src *irq)
-{
-	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
-	int r = 0;
-
-	r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
-	if (r)
-		return r;
-
-	ring->adev = NULL;
-	ring->ring_obj = NULL;
-	ring->use_doorbell = true;
-	ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
-	if (adev->gfx.mec2_fw) {
-		ring->me = 2;
-		ring->pipe = 0;
-	} else {
-		ring->me = 1;
-		ring->pipe = 1;
-	}
-
-	ring->queue = 0;
-	ring->eop_gpu_addr = kiq->eop_gpu_addr;
-	sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
-	r = amdgpu_ring_init(adev, ring, 1024,
-			     irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
-	if (r)
-		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
-
-	return r;
-}
-static void gfx_v9_0_kiq_free_ring(struct amdgpu_ring *ring,
-				   struct amdgpu_irq_src *irq)
-{
-	amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
-	amdgpu_ring_fini(ring);
-}
-
-/* create MQD for each compute queue */
-static int gfx_v9_0_compute_mqd_sw_init(struct amdgpu_device *adev)
-{
-	struct amdgpu_ring *ring = NULL;
-	int r, i;
-
-	/* create MQD for KIQ */
-	ring = &adev->gfx.kiq.ring;
-	if (!ring->mqd_obj) {
-		r = amdgpu_bo_create_kernel(adev, sizeof(struct v9_mqd), PAGE_SIZE,
-					    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
-					    &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr);
-		if (r) {
-			dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
-			return r;
-		}
-
-		/*TODO: prepare MQD backup */
-	}
-
-	/* create MQD for each KCQ */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		ring = &adev->gfx.compute_ring[i];
-		if (!ring->mqd_obj) {
-			r = amdgpu_bo_create_kernel(adev, sizeof(struct v9_mqd), PAGE_SIZE,
-						    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
-						    &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr);
-			if (r) {
-				dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
-				return r;
-			}
-
-			/* TODO: prepare MQD backup */
-		}
-	}
-
-	return 0;
-}
-
-static void gfx_v9_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
-{
-	struct amdgpu_ring *ring = NULL;
-	int i;
-
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		ring = &adev->gfx.compute_ring[i];
-		amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr);
-	}
-
-	ring = &adev->gfx.kiq.ring;
-	amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr);
-}
-
 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
 {
 {
 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
@@ -770,23 +1014,21 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
 
 
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
-		adev->gfx.config.max_shader_engines = 4;
-		adev->gfx.config.max_cu_per_sh = 16;
-		adev->gfx.config.max_sh_per_se = 1;
-		adev->gfx.config.max_backends_per_se = 4;
-		adev->gfx.config.max_texture_channel_caches = 16;
-		adev->gfx.config.max_gprs = 256;
-		adev->gfx.config.max_gs_threads = 32;
 		adev->gfx.config.max_hw_contexts = 8;
 		adev->gfx.config.max_hw_contexts = 8;
-
 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
-		adev->gfx.config.gs_vgt_table_depth = 32;
-		adev->gfx.config.gs_prim_buffer_depth = 1792;
 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
 		break;
 		break;
+	case CHIP_RAVEN:
+		adev->gfx.config.max_hw_contexts = 8;
+		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+		gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
+		break;
 	default:
 	default:
 		BUG();
 		BUG();
 		break;
 		break;
@@ -1023,13 +1265,61 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
 	return 0;
 	return 0;
 }
 }
 
 
+static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+				      int mec, int pipe, int queue)
+{
+	int r;
+	unsigned irq_type;
+	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+
+	ring = &adev->gfx.compute_ring[ring_id];
+
+	/* mec0 is me1 */
+	ring->me = mec + 1;
+	ring->pipe = pipe;
+	ring->queue = queue;
+
+	ring->ring_obj = NULL;
+	ring->use_doorbell = true;
+	ring->doorbell_index = (AMDGPU_DOORBELL_MEC_RING0 + ring_id) << 1;
+	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+				+ (ring_id * GFX9_MEC_HPD_SIZE);
+	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+		+ ring->pipe;
+
+	/* type-2 packets are deprecated on MEC, use type-3 instead */
+	r = amdgpu_ring_init(adev, ring, 1024,
+			     &adev->gfx.eop_irq, irq_type);
+	if (r)
+		return r;
+
+
+	return 0;
+}
+
 static int gfx_v9_0_sw_init(void *handle)
 static int gfx_v9_0_sw_init(void *handle)
 {
 {
-	int i, r;
+	int i, j, k, r, ring_id;
 	struct amdgpu_ring *ring;
 	struct amdgpu_ring *ring;
 	struct amdgpu_kiq *kiq;
 	struct amdgpu_kiq *kiq;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
+	switch (adev->asic_type) {
+	case CHIP_VEGA10:
+	case CHIP_RAVEN:
+		adev->gfx.mec.num_mec = 2;
+		break;
+	default:
+		adev->gfx.mec.num_mec = 1;
+		break;
+	}
+
+	adev->gfx.mec.num_pipe_per_mec = 4;
+	adev->gfx.mec.num_queue_per_pipe = 8;
+
 	/* KIQ event */
 	/* KIQ event */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq);
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq);
 	if (r)
 	if (r)
@@ -1062,6 +1352,12 @@ static int gfx_v9_0_sw_init(void *handle)
 		return r;
 		return r;
 	}
 	}
 
 
+	r = gfx_v9_0_rlc_init(adev);
+	if (r) {
+		DRM_ERROR("Failed to init rlc BOs!\n");
+		return r;
+	}
+
 	r = gfx_v9_0_mec_init(adev);
 	r = gfx_v9_0_mec_init(adev);
 	if (r) {
 	if (r) {
 		DRM_ERROR("Failed to init MEC BOs!\n");
 		DRM_ERROR("Failed to init MEC BOs!\n");
@@ -1081,49 +1377,40 @@ static int gfx_v9_0_sw_init(void *handle)
 			return r;
 			return r;
 	}
 	}
 
 
-	/* set up the compute queues */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		unsigned irq_type;
-
-		/* max 32 queues per MEC */
-		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
-			DRM_ERROR("Too many (%d) compute rings!\n", i);
-			break;
+	/* set up the compute queues - allocate horizontally across pipes */
+	ring_id = 0;
+	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
+					continue;
+
+				r = gfx_v9_0_compute_ring_init(adev,
+							       ring_id,
+							       i, k, j);
+				if (r)
+					return r;
+
+				ring_id++;
+			}
 		}
 		}
-		ring = &adev->gfx.compute_ring[i];
-		ring->ring_obj = NULL;
-		ring->use_doorbell = true;
-		ring->doorbell_index = (AMDGPU_DOORBELL64_MEC_RING0 + i) << 1;
-		ring->me = 1; /* first MEC */
-		ring->pipe = i / 8;
-		ring->queue = i % 8;
-		ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
-		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
-		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
-		/* type-2 packets are deprecated on MEC, use type-3 instead */
-		r = amdgpu_ring_init(adev, ring, 1024,
-				     &adev->gfx.eop_irq, irq_type);
-		if (r)
-			return r;
 	}
 	}
 
 
-	if (amdgpu_sriov_vf(adev)) {
-		r = gfx_v9_0_kiq_init(adev);
-		if (r) {
-			DRM_ERROR("Failed to init KIQ BOs!\n");
-			return r;
-		}
+	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
+	if (r) {
+		DRM_ERROR("Failed to init KIQ BOs!\n");
+		return r;
+	}
 
 
-		kiq = &adev->gfx.kiq;
-		r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
-		if (r)
-			return r;
+	kiq = &adev->gfx.kiq;
+	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
+	if (r)
+		return r;
 
 
-		/* create MQD for all compute queues as wel as KIQ for SRIOV case */
-		r = gfx_v9_0_compute_mqd_sw_init(adev);
-		if (r)
-			return r;
-	}
+	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
+	r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd));
+	if (r)
+		return r;
 
 
 	/* reserve GDS, GWS and OA resource for gfx */
 	/* reserve GDS, GWS and OA resource for gfx */
 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
@@ -1170,11 +1457,9 @@ static int gfx_v9_0_sw_fini(void *handle)
 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
 
 
-	if (amdgpu_sriov_vf(adev)) {
-		gfx_v9_0_compute_mqd_sw_fini(adev);
-		gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
-		gfx_v9_0_kiq_fini(adev);
-	}
+	amdgpu_gfx_compute_mqd_sw_fini(adev);
+	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
+	amdgpu_gfx_kiq_fini(adev);
 
 
 	gfx_v9_0_mec_fini(adev);
 	gfx_v9_0_mec_fini(adev);
 	gfx_v9_0_ngg_fini(adev);
 	gfx_v9_0_ngg_fini(adev);
@@ -1208,11 +1493,6 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh
 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
 }
 }
 
 
-static u32 gfx_v9_0_create_bitmask(u32 bit_width)
-{
-	return (u32)((1ULL << bit_width) - 1);
-}
-
 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
 {
 {
 	u32 data, mask;
 	u32 data, mask;
@@ -1223,8 +1503,8 @@ static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
 
 
-	mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_backends_per_se /
-				       adev->gfx.config.max_sh_per_se);
+	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
+					 adev->gfx.config.max_sh_per_se);
 
 
 	return (~data) & mask;
 	return (~data) & mask;
 }
 }
@@ -1272,7 +1552,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
 
 
 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
-			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 
+			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
 
 
 	mutex_lock(&adev->srbm_mutex);
 	mutex_lock(&adev->srbm_mutex);
 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
@@ -1370,9 +1650,6 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
 {
 {
 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
 
 
-	if (enable)
-		return;
-
 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
@@ -1381,6 +1658,373 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
 }
 }
 
 
+static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
+{
+	/* csib */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
+			adev->gfx.rlc.clear_state_gpu_addr >> 32);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
+			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
+			adev->gfx.rlc.clear_state_size);
+}
+
+static void gfx_v9_0_parse_ind_reg_list(int *register_list_format,
+				int indirect_offset,
+				int list_size,
+				int *unique_indirect_regs,
+				int *unique_indirect_reg_count,
+				int max_indirect_reg_count,
+				int *indirect_start_offsets,
+				int *indirect_start_offsets_count,
+				int max_indirect_start_offsets_count)
+{
+	int idx;
+	bool new_entry = true;
+
+	for (; indirect_offset < list_size; indirect_offset++) {
+
+		if (new_entry) {
+			new_entry = false;
+			indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
+			*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
+			BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count);
+		}
+
+		if (register_list_format[indirect_offset] == 0xFFFFFFFF) {
+			new_entry = true;
+			continue;
+		}
+
+		indirect_offset += 2;
+
+		/* look for the matching indice */
+		for (idx = 0; idx < *unique_indirect_reg_count; idx++) {
+			if (unique_indirect_regs[idx] ==
+				register_list_format[indirect_offset])
+				break;
+		}
+
+		if (idx >= *unique_indirect_reg_count) {
+			unique_indirect_regs[*unique_indirect_reg_count] =
+				register_list_format[indirect_offset];
+			idx = *unique_indirect_reg_count;
+			*unique_indirect_reg_count = *unique_indirect_reg_count + 1;
+			BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count);
+		}
+
+		register_list_format[indirect_offset] = idx;
+	}
+}
+
+static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
+{
+	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
+	int unique_indirect_reg_count = 0;
+
+	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
+	int indirect_start_offsets_count = 0;
+
+	int list_size = 0;
+	int i = 0;
+	u32 tmp = 0;
+
+	u32 *register_list_format =
+		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
+	if (!register_list_format)
+		return -ENOMEM;
+	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
+		adev->gfx.rlc.reg_list_format_size_bytes);
+
+	/* setup unique_indirect_regs array and indirect_start_offsets array */
+	gfx_v9_0_parse_ind_reg_list(register_list_format,
+				GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH,
+				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
+				unique_indirect_regs,
+				&unique_indirect_reg_count,
+				sizeof(unique_indirect_regs)/sizeof(int),
+				indirect_start_offsets,
+				&indirect_start_offsets_count,
+				sizeof(indirect_start_offsets)/sizeof(int));
+
+	/* enable auto inc in case it is disabled */
+	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
+	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
+
+	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
+		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
+	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
+			adev->gfx.rlc.register_restore[i]);
+
+	/* load direct register */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0);
+	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
+			adev->gfx.rlc.register_restore[i]);
+
+	/* load indirect register */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
+		adev->gfx.rlc.reg_list_format_start);
+	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
+			register_list_format[i]);
+
+	/* set save/restore list size */
+	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
+	list_size = list_size >> 1;
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
+		adev->gfx.rlc.reg_restore_list_size);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
+
+	/* write the starting offsets to RLC scratch ram */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
+		adev->gfx.rlc.starting_offsets_start);
+	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
+			indirect_start_offsets[i]);
+
+	/* load unique indirect regs*/
+	for (i = 0; i < sizeof(unique_indirect_regs)/sizeof(int); i++) {
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i,
+			unique_indirect_regs[i] & 0x3FFFF);
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i,
+			unique_indirect_regs[i] >> 20);
+	}
+
+	kfree(register_list_format);
+	return 0;
+}
+
+static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
+{
+	u32 tmp = 0;
+
+	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
+	tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
+}
+
+static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
+					     bool enable)
+{
+	uint32_t data = 0;
+	uint32_t default_data = 0;
+
+	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
+	if (enable == true) {
+		/* enable GFXIP control over CGPG */
+		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
+		if(default_data != data)
+			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
+
+		/* update status */
+		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
+		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
+		if(default_data != data)
+			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
+	} else {
+		/* restore GFXIP control over GCPG */
+		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
+		if(default_data != data)
+			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
+	}
+}
+
+static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
+{
+	uint32_t data = 0;
+
+	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+			      AMD_PG_SUPPORT_GFX_SMG |
+			      AMD_PG_SUPPORT_GFX_DMG)) {
+		/* init IDLE_POLL_COUNT = 60 */
+		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
+		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
+		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
+
+		/* init RLC PG Delay */
+		data = 0;
+		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
+		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
+		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
+		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
+
+		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
+		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
+		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
+
+		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
+		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
+		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
+
+		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
+		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
+
+		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
+		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
+
+		pwr_10_0_gfxip_control_over_cgpg(adev, true);
+	}
+}
+
+static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
+						bool enable)
+{
+	uint32_t data = 0;
+	uint32_t default_data = 0;
+
+	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
+
+	if (enable == true) {
+		data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
+		if (default_data != data)
+			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
+	} else {
+		data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
+		if(default_data != data)
+			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
+	}
+}
+
+static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
+						bool enable)
+{
+	uint32_t data = 0;
+	uint32_t default_data = 0;
+
+	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
+
+	if (enable == true) {
+		data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
+		if(default_data != data)
+			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
+	} else {
+		data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
+		if(default_data != data)
+			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
+	}
+}
+
+static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
+					bool enable)
+{
+	uint32_t data = 0;
+	uint32_t default_data = 0;
+
+	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
+
+	if (enable == true) {
+		data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
+		if(default_data != data)
+			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
+	} else {
+		data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
+		if(default_data != data)
+			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
+	}
+}
+
+static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
+						bool enable)
+{
+	uint32_t data, default_data;
+
+	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
+	if (enable == true)
+		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+	if(default_data != data)
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
+}
+
+static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
+						bool enable)
+{
+	uint32_t data, default_data;
+
+	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
+	if (enable == true)
+		data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
+	if(default_data != data)
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
+
+	if (!enable)
+		/* read any GFX register to wake up GFX */
+		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
+}
+
+void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
+						bool enable)
+{
+	uint32_t data, default_data;
+
+	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
+	if (enable == true)
+		data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
+	if(default_data != data)
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
+}
+
+void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
+						bool enable)
+{
+	uint32_t data, default_data;
+
+	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
+	if (enable == true)
+		data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
+	if(default_data != data)
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
+}
+
+static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
+{
+	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+			      AMD_PG_SUPPORT_GFX_SMG |
+			      AMD_PG_SUPPORT_GFX_DMG |
+			      AMD_PG_SUPPORT_CP |
+			      AMD_PG_SUPPORT_GDS |
+			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
+		gfx_v9_0_init_csb(adev);
+		gfx_v9_0_init_rlc_save_restore_list(adev);
+		gfx_v9_0_enable_save_restore_machine(adev);
+
+		if (adev->asic_type == CHIP_RAVEN) {
+			WREG32(mmRLC_JUMP_TABLE_RESTORE,
+				adev->gfx.rlc.cp_table_gpu_addr >> 8);
+			gfx_v9_0_init_gfx_power_gating(adev);
+
+			if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
+				gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
+				gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
+			} else {
+				gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
+				gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
+			}
+
+			if (adev->pg_flags & AMD_PG_SUPPORT_CP)
+				gfx_v9_0_enable_cp_power_gating(adev, true);
+			else
+				gfx_v9_0_enable_cp_power_gating(adev, false);
+		}
+	}
+}
+
 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
 {
 {
 	u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL);
 	u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL);
@@ -1425,7 +2069,7 @@ static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
 		 * default is 0x9C4 to create a 100us interval */
 		 * default is 0x9C4 to create a 100us interval */
 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
-		 * to disable the page fault retry interrupts, default is 
+		 * to disable the page fault retry interrupts, default is
 		 * 0x100 (256) */
 		 * 0x100 (256) */
 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
 	}
 	}
@@ -1474,6 +2118,8 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
 
 
 	gfx_v9_0_rlc_reset(adev);
 	gfx_v9_0_rlc_reset(adev);
 
 
+	gfx_v9_0_init_pg(adev);
+
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
 		/* legacy rlc firmware loading */
 		/* legacy rlc firmware loading */
 		r = gfx_v9_0_rlc_load_microcode(adev);
 		r = gfx_v9_0_rlc_load_microcode(adev);
@@ -1481,6 +2127,13 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
 			return r;
 			return r;
 	}
 	}
 
 
+	if (adev->asic_type == CHIP_RAVEN) {
+		if (amdgpu_lbpw != 0)
+			gfx_v9_0_enable_lbpw(adev, true);
+		else
+			gfx_v9_0_enable_lbpw(adev, false);
+	}
+
 	gfx_v9_0_rlc_start(adev);
 	gfx_v9_0_rlc_start(adev);
 
 
 	return 0;
 	return 0;
@@ -1559,35 +2212,6 @@ static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
 	return 0;
 	return 0;
 }
 }
 
 
-static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
-{
-	u32 count = 0;
-	const struct cs_section_def *sect = NULL;
-	const struct cs_extent_def *ext = NULL;
-
-	/* begin clear state */
-	count += 2;
-	/* context control state */
-	count += 3;
-
-	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
-		for (ext = sect->section; ext->extent != NULL; ++ext) {
-			if (sect->id == SECT_CONTEXT)
-				count += 2 + ext->reg_count;
-			else
-				return 0;
-		}
-	}
-	/* pa_sc_raster_config/pa_sc_raster_config1 */
-	count += 4;
-	/* end clear state */
-	count += 2;
-	/* clear state */
-	count += 2;
-
-	return count;
-}
-
 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
 {
 {
 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
@@ -1730,13 +2354,6 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
 	udelay(50);
 	udelay(50);
 }
 }
 
 
-static int gfx_v9_0_cp_compute_start(struct amdgpu_device *adev)
-{
-	gfx_v9_0_cp_compute_enable(adev, true);
-
-	return 0;
-}
-
 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
 {
 {
 	const struct gfx_firmware_header_v1_0 *mec_hdr;
 	const struct gfx_firmware_header_v1_0 *mec_hdr;
@@ -1764,7 +2381,7 @@ static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
- 
+
 	/* MEC1 */
 	/* MEC1 */
 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
 			 mec_hdr->jt_offset);
 			 mec_hdr->jt_offset);
@@ -1779,105 +2396,160 @@ static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
 	return 0;
 	return 0;
 }
 }
 
 
-static void gfx_v9_0_cp_compute_fini(struct amdgpu_device *adev)
+/* KIQ functions */
+static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
 {
 {
-	int i, r;
+	uint32_t tmp;
+	struct amdgpu_device *adev = ring->adev;
 
 
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+	/* tell RLC which is KIQ queue */
+	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
+	tmp &= 0xffffff00;
+	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+	WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+	tmp |= 0x80;
+	WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+}
 
 
-		if (ring->mqd_obj) {
-			r = amdgpu_bo_reserve(ring->mqd_obj, true);
-			if (unlikely(r != 0))
-				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
+static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+	uint32_t scratch, tmp = 0;
+	uint64_t queue_mask = 0;
+	int r, i;
 
 
-			amdgpu_bo_unpin(ring->mqd_obj);
-			amdgpu_bo_unreserve(ring->mqd_obj);
+	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
+		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+			continue;
 
 
-			amdgpu_bo_unref(&ring->mqd_obj);
-			ring->mqd_obj = NULL;
+		/* This situation may be hit in the future if a new HW
+		 * generation exposes more than 64 queues. If so, the
+		 * definition of queue_mask needs updating */
+		if (WARN_ON(i > (sizeof(queue_mask)*8))) {
+			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
+			break;
 		}
 		}
-	}
-}
 
 
-static int gfx_v9_0_init_queue(struct amdgpu_ring *ring);
+		queue_mask |= (1ull << i);
+	}
 
 
-static int gfx_v9_0_cp_compute_resume(struct amdgpu_device *adev)
-{
-	int i, r;
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
-		if (gfx_v9_0_init_queue(ring))
-			dev_warn(adev->dev, "compute queue %d init failed!\n", i);
+	r = amdgpu_gfx_scratch_get(adev, &scratch);
+	if (r) {
+		DRM_ERROR("Failed to get scratch reg (%d).\n", r);
+		return r;
 	}
 	}
+	WREG32(scratch, 0xCAFEDEAD);
 
 
-	r = gfx_v9_0_cp_compute_start(adev);
-	if (r)
+	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 11);
+	if (r) {
+		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+		amdgpu_gfx_scratch_free(adev, scratch);
 		return r;
 		return r;
+	}
 
 
-	return 0;
-}
+	/* set resources */
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
+			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
+	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
+	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
+	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
+	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
+	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
+	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+
+		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
+				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(1) | /* alloc format: all_on_one_pipe */
+				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
+				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+	}
+	/* write to scratch for completion */
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+	amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
+	amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
+	amdgpu_ring_commit(kiq_ring);
 
 
-/* KIQ functions */
-static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
-{
-	uint32_t tmp;
-	struct amdgpu_device *adev = ring->adev;
+	for (i = 0; i < adev->usec_timeout; i++) {
+		tmp = RREG32(scratch);
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+	if (i >= adev->usec_timeout) {
+		DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
+			  scratch, tmp);
+		r = -EINVAL;
+	}
+	amdgpu_gfx_scratch_free(adev, scratch);
 
 
-	/* tell RLC which is KIQ queue */
-	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
-	tmp &= 0xffffff00;
-	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
-	WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
-	tmp |= 0x80;
-	WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+	return r;
 }
 }
 
 
-static void gfx_v9_0_kiq_enable(struct amdgpu_ring *ring)
+static int gfx_v9_0_kiq_kcq_disable(struct amdgpu_device *adev)
 {
 {
-	amdgpu_ring_alloc(ring, 8);
-	/* set resources */
-	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
-	amdgpu_ring_write(ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
-	amdgpu_ring_write(ring, 0x000000FF);	/* queue mask lo */
-	amdgpu_ring_write(ring, 0);	/* queue mask hi */
-	amdgpu_ring_write(ring, 0);	/* gws mask lo */
-	amdgpu_ring_write(ring, 0);	/* gws mask hi */
-	amdgpu_ring_write(ring, 0);	/* oac mask */
-	amdgpu_ring_write(ring, 0);	/* gds heap base:0, gds heap size:0 */
-	amdgpu_ring_commit(ring);
-	udelay(50);
-}
+	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+	uint32_t scratch, tmp = 0;
+	int r, i;
+
+	r = amdgpu_gfx_scratch_get(adev, &scratch);
+	if (r) {
+		DRM_ERROR("Failed to get scratch reg (%d).\n", r);
+		return r;
+	}
+	WREG32(scratch, 0xCAFEDEAD);
 
 
-static void gfx_v9_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
-				   struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = kiq_ring->adev;
-	uint64_t mqd_addr, wptr_addr;
-
-	mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
-	wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-	amdgpu_ring_alloc(kiq_ring, 8);
-
-	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
-	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
-	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
-			  (0 << 4) | /* Queue_Sel */
-			  (0 << 8) | /* VMID */
-			  (ring->queue << 13 ) |
-			  (ring->pipe << 16) |
-			  ((ring->me == 1 ? 0 : 1) << 18) |
-			  (0 << 21) | /*queue_type: normal compute queue */
-			  (1 << 24) | /* alloc format: all_on_one_pipe */
-			  (0 << 26) | /* engine_sel: compute */
-			  (1 << 29)); /* num_queues: must be 1 */
-	amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2));
-	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
-	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
-	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
-	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+	r = amdgpu_ring_alloc(kiq_ring, 6 + 3);
+	if (r) {
+		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+		amdgpu_gfx_scratch_free(adev, scratch);
+		return r;
+	}
+	/* unmap queues */
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_UNMAP_QUEUES_ACTION(1)| /* RESET_QUEUES */
+			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(2)); /* select all queues */
+	amdgpu_ring_write(kiq_ring, 0);
+	amdgpu_ring_write(kiq_ring, 0);
+	amdgpu_ring_write(kiq_ring, 0);
+	amdgpu_ring_write(kiq_ring, 0);
+	/* write to scratch for completion */
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+	amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
+	amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
 	amdgpu_ring_commit(kiq_ring);
 	amdgpu_ring_commit(kiq_ring);
-	udelay(50);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		tmp = RREG32(scratch);
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+	if (i >= adev->usec_timeout) {
+		DRM_ERROR("KCQ disable failed (scratch(0x%04X)=0x%08X)\n",
+			  scratch, tmp);
+		r = -EINVAL;
+	}
+	amdgpu_gfx_scratch_free(adev, scratch);
+
+	return r;
 }
 }
 
 
 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
@@ -1902,7 +2574,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
-			(order_base_2(MEC_HPD_SIZE / 4) - 1));
+			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
 
 
 	mqd->cp_hqd_eop_control = tmp;
 	mqd->cp_hqd_eop_control = tmp;
 
 
@@ -2119,47 +2791,69 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
 {
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 	struct v9_mqd *mqd = ring->mqd_ptr;
 	struct v9_mqd *mqd = ring->mqd_ptr;
-	bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
 
 
-	if (is_kiq) {
-		gfx_v9_0_kiq_setting(&kiq->ring);
+	gfx_v9_0_kiq_setting(ring);
+
+	if (adev->gfx.in_reset) { /* for GPU_RESET case */
+		/* reset MQD to a clean status */
+		if (adev->gfx.mec.mqd_backup[mqd_idx])
+			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+
+		/* reset ring buffer */
+		ring->wptr = 0;
+		amdgpu_ring_clear_ring(ring);
+
+		mutex_lock(&adev->srbm_mutex);
+		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+		gfx_v9_0_kiq_init_register(ring);
+		soc15_grbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
 	} else {
 	} else {
-		mqd_idx = ring - &adev->gfx.compute_ring[0];
+		memset((void *)mqd, 0, sizeof(*mqd));
+		mutex_lock(&adev->srbm_mutex);
+		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+		gfx_v9_0_mqd_init(ring);
+		gfx_v9_0_kiq_init_register(ring);
+		soc15_grbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
+
+		if (adev->gfx.mec.mqd_backup[mqd_idx])
+			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
 	}
 	}
 
 
-	if (!adev->gfx.in_reset) {
+	return 0;
+}
+
+static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct v9_mqd *mqd = ring->mqd_ptr;
+	int mqd_idx = ring - &adev->gfx.compute_ring[0];
+
+	if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
 		memset((void *)mqd, 0, sizeof(*mqd));
 		memset((void *)mqd, 0, sizeof(*mqd));
 		mutex_lock(&adev->srbm_mutex);
 		mutex_lock(&adev->srbm_mutex);
 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 		gfx_v9_0_mqd_init(ring);
 		gfx_v9_0_mqd_init(ring);
-		if (is_kiq)
-			gfx_v9_0_kiq_init_register(ring);
 		soc15_grbm_select(adev, 0, 0, 0, 0);
 		soc15_grbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
 		mutex_unlock(&adev->srbm_mutex);
 
 
-	} else { /* for GPU_RESET case */
+		if (adev->gfx.mec.mqd_backup[mqd_idx])
+			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+	} else if (adev->gfx.in_reset) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		/* reset MQD to a clean status */
+		if (adev->gfx.mec.mqd_backup[mqd_idx])
+			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
 
 
 		/* reset ring buffer */
 		/* reset ring buffer */
 		ring->wptr = 0;
 		ring->wptr = 0;
-
-		if (is_kiq) {
-		    mutex_lock(&adev->srbm_mutex);
-		    soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-		    gfx_v9_0_kiq_init_register(ring);
-		    soc15_grbm_select(adev, 0, 0, 0, 0);
-		    mutex_unlock(&adev->srbm_mutex);
-		}
+		amdgpu_ring_clear_ring(ring);
+	} else {
+		amdgpu_ring_clear_ring(ring);
 	}
 	}
 
 
-	if (is_kiq)
-		gfx_v9_0_kiq_enable(ring);
-	else
-		gfx_v9_0_map_queue_enable(&kiq->ring, ring);
-
 	return 0;
 	return 0;
 }
 }
 
 
@@ -2194,7 +2888,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
 			goto done;
 			goto done;
 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
 		if (!r) {
 		if (!r) {
-			r = gfx_v9_0_kiq_init_queue(ring);
+			r = gfx_v9_0_kcq_init_queue(ring);
 			amdgpu_bo_kunmap(ring->mqd_obj);
 			amdgpu_bo_kunmap(ring->mqd_obj);
 			ring->mqd_ptr = NULL;
 			ring->mqd_ptr = NULL;
 		}
 		}
@@ -2203,13 +2897,14 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
 			goto done;
 			goto done;
 	}
 	}
 
 
+	r = gfx_v9_0_kiq_kcq_enable(adev);
 done:
 done:
 	return r;
 	return r;
 }
 }
 
 
 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
 {
 {
-	int r,i;
+	int r, i;
 	struct amdgpu_ring *ring;
 	struct amdgpu_ring *ring;
 
 
 	if (!(adev->flags & AMD_IS_APU))
 	if (!(adev->flags & AMD_IS_APU))
@@ -2230,10 +2925,7 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
-	if (amdgpu_sriov_vf(adev))
-		r = gfx_v9_0_kiq_resume(adev);
-	else
-		r = gfx_v9_0_cp_compute_resume(adev);
+	r = gfx_v9_0_kiq_resume(adev);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
@@ -2243,6 +2935,13 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
 		ring->ready = false;
 		ring->ready = false;
 		return r;
 		return r;
 	}
 	}
+
+	ring = &adev->gfx.kiq.ring;
+	ring->ready = true;
+	r = amdgpu_ring_test_ring(ring);
+	if (r)
+		ring->ready = false;
+
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		ring = &adev->gfx.compute_ring[i];
 		ring = &adev->gfx.compute_ring[i];
 
 
@@ -2252,14 +2951,6 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
 			ring->ready = false;
 			ring->ready = false;
 	}
 	}
 
 
-	if (amdgpu_sriov_vf(adev)) {
-		ring = &adev->gfx.kiq.ring;
-		ring->ready = true;
-		r = amdgpu_ring_test_ring(ring);
-		if (r)
-			ring->ready = false;
-	}
-
 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
 
 
 	return 0;
 	return 0;
@@ -2305,9 +2996,9 @@ static int gfx_v9_0_hw_fini(void *handle)
 		pr_debug("For SRIOV client, shouldn't do anything.\n");
 		pr_debug("For SRIOV client, shouldn't do anything.\n");
 		return 0;
 		return 0;
 	}
 	}
+	gfx_v9_0_kiq_kcq_disable(adev);
 	gfx_v9_0_cp_enable(adev, false);
 	gfx_v9_0_cp_enable(adev, false);
 	gfx_v9_0_rlc_stop(adev);
 	gfx_v9_0_rlc_stop(adev);
-	gfx_v9_0_cp_compute_fini(adev);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -2316,14 +3007,18 @@ static int gfx_v9_0_suspend(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
+	adev->gfx.in_suspend = true;
 	return gfx_v9_0_hw_fini(adev);
 	return gfx_v9_0_hw_fini(adev);
 }
 }
 
 
 static int gfx_v9_0_resume(void *handle)
 static int gfx_v9_0_resume(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	int r;
 
 
-	return gfx_v9_0_hw_init(adev);
+	r = gfx_v9_0_hw_init(adev);
+	adev->gfx.in_suspend = false;
+	return r;
 }
 }
 
 
 static bool gfx_v9_0_is_idle(void *handle)
 static bool gfx_v9_0_is_idle(void *handle)
@@ -2470,7 +3165,7 @@ static int gfx_v9_0_early_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
 	adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
 	adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
-	adev->gfx.num_compute_rings = GFX9_NUM_COMPUTE_RINGS;
+	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
 	gfx_v9_0_set_ring_funcs(adev);
 	gfx_v9_0_set_ring_funcs(adev);
 	gfx_v9_0_set_irq_funcs(adev);
 	gfx_v9_0_set_irq_funcs(adev);
 	gfx_v9_0_set_gds_init(adev);
 	gfx_v9_0_set_gds_init(adev);
@@ -2549,6 +3244,43 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
 	}
 	}
 }
 }
 
 
+static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
+						bool enable)
+{
+	/* TODO: double check if we need to perform under safe mdoe */
+	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
+
+	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
+		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
+		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
+			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
+	} else {
+		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
+		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
+	}
+
+	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
+}
+
+static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
+						bool enable)
+{
+	/* TODO: double check if we need to perform under safe mode */
+	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
+
+	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
+		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
+	else
+		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
+
+	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
+		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
+	else
+		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
+
+	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
+}
+
 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 						      bool enable)
 						      bool enable)
 {
 {
@@ -2739,6 +3471,34 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
 static int gfx_v9_0_set_powergating_state(void *handle,
 static int gfx_v9_0_set_powergating_state(void *handle,
 					  enum amd_powergating_state state)
 					  enum amd_powergating_state state)
 {
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
+
+	switch (adev->asic_type) {
+	case CHIP_RAVEN:
+		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
+			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
+			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
+		} else {
+			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
+			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
+		}
+
+		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
+			gfx_v9_0_enable_cp_power_gating(adev, true);
+		else
+			gfx_v9_0_enable_cp_power_gating(adev, false);
+
+		/* update gfx cgpg state */
+		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
+
+		/* update mgcg state */
+		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
+		break;
+	default:
+		break;
+	}
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -2752,6 +3512,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
 
 
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
+	case CHIP_RAVEN:
 		gfx_v9_0_update_gfx_clock_gating(adev,
 		gfx_v9_0_update_gfx_clock_gating(adev,
 						 state == AMD_CG_STATE_GATE ? true : false);
 						 state == AMD_CG_STATE_GATE ? true : false);
 		break;
 		break;
@@ -2879,31 +3640,33 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
                                       struct amdgpu_ib *ib,
                                       struct amdgpu_ib *ib,
                                       unsigned vm_id, bool ctx_switch)
                                       unsigned vm_id, bool ctx_switch)
 {
 {
-        u32 header, control = 0;
+	u32 header, control = 0;
 
 
-        if (ib->flags & AMDGPU_IB_FLAG_CE)
-                header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
-        else
-                header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+	if (ib->flags & AMDGPU_IB_FLAG_CE)
+		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
+	else
+		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
 
 
-        control |= ib->length_dw | (vm_id << 24);
+	control |= ib->length_dw | (vm_id << 24);
 
 
-		if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT))
-			control |= INDIRECT_BUFFER_PRE_ENB(1);
+	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+		control |= INDIRECT_BUFFER_PRE_ENB(1);
 
 
-        amdgpu_ring_write(ring, header);
-	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
-        amdgpu_ring_write(ring,
+		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
+			gfx_v9_0_ring_emit_de_meta(ring);
+	}
+
+	amdgpu_ring_write(ring, header);
+BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+	amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
 #ifdef __BIG_ENDIAN
-                          (2 << 0) |
+		(2 << 0) |
 #endif
 #endif
-                          lower_32_bits(ib->gpu_addr));
-        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
-        amdgpu_ring_write(ring, control);
+		lower_32_bits(ib->gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+	amdgpu_ring_write(ring, control);
 }
 }
 
 
-#define	INDIRECT_BUFFER_VALID                   (1 << 23)
-
 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
                                           struct amdgpu_ib *ib,
                                           struct amdgpu_ib *ib,
                                           unsigned vm_id, bool ctx_switch)
                                           unsigned vm_id, bool ctx_switch)
@@ -2971,9 +3734,8 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->vm_inv_eng;
 	unsigned eng = ring->vm_inv_eng;
 
 
-	pd_addr = pd_addr | 0x1; /* valid bit */
-	/* now only use physical base address of PDE and valid */
-	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
+	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
+	pd_addr |= AMDGPU_PTE_VALID;
 
 
 	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
 	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
 				   hub->ctx0_ptb_addr_lo32 + (2 * vm_id),
 				   hub->ctx0_ptb_addr_lo32 + (2 * vm_id),
@@ -3130,9 +3892,6 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
 	amdgpu_ring_write(ring, dw2);
 	amdgpu_ring_write(ring, dw2);
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, 0);
-
-	if (amdgpu_sriov_vf(ring->adev))
-		gfx_v9_0_ring_emit_de_meta(ring);
 }
 }
 
 
 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
@@ -3160,6 +3919,12 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne
 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
 }
 }
 
 
+static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
+}
+
 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
 {
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_device *adev = ring->adev;
@@ -3208,8 +3973,8 @@ static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
 	u32 mec_int_cntl, mec_int_cntl_reg;
 	u32 mec_int_cntl, mec_int_cntl_reg;
 
 
 	/*
 	/*
-	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
-	 * handles the setting of interrupts for this specific pipe. All other
+	 * amdgpu controls only the first MEC. That's why this function only
+	 * handles the setting of interrupts for this specific MEC. All other
 	 * pipes' interrupts are set by amdkfd.
 	 * pipes' interrupts are set by amdkfd.
 	 */
 	 */
 
 
@@ -3218,6 +3983,15 @@ static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
 		case 0:
 		case 0:
 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
 			break;
 			break;
+		case 1:
+			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
+			break;
+		case 2:
+			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
+			break;
+		case 3:
+			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
+			break;
 		default:
 		default:
 			DRM_DEBUG("invalid pipe %d\n", pipe);
 			DRM_DEBUG("invalid pipe %d\n", pipe);
 			return;
 			return;
@@ -3494,6 +4268,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
+	.emit_tmz = gfx_v9_0_ring_emit_tmz,
 };
 };
 
 
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
@@ -3605,6 +4380,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
 {
 {
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
+	case CHIP_RAVEN:
 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
 		break;
 		break;
 	default:
 	default:
@@ -3650,7 +4426,7 @@ static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
 
 
-	mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
+	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
 
 
 	return (~data) & mask;
 	return (~data) & mask;
 }
 }
@@ -3664,8 +4440,6 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 	if (!adev || !cu_info)
 	if (!adev || !cu_info)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	memset(cu_info, 0, sizeof(*cu_info));
-
 	mutex_lock(&adev->grbm_idx_mutex);
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
@@ -3676,9 +4450,9 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
 			cu_info->bitmap[i][j] = bitmap;
 			cu_info->bitmap[i][j] = bitmap;
 
 
-			for (k = 0; k < 16; k ++) {
+			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
 				if (bitmap & mask) {
 				if (bitmap & mask) {
-					if (counter < 2)
+					if (counter < adev->gfx.config.max_cu_per_sh)
 						ao_bitmap |= mask;
 						ao_bitmap |= mask;
 					counter ++;
 					counter ++;
 				}
 				}
@@ -3697,218 +4471,6 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 	return 0;
 	return 0;
 }
 }
 
 
-static int gfx_v9_0_init_queue(struct amdgpu_ring *ring)
-{
-	int r, j;
-	u32 tmp;
-	bool use_doorbell = true;
-	u64 hqd_gpu_addr;
-	u64 mqd_gpu_addr;
-	u64 eop_gpu_addr;
-	u64 wb_gpu_addr;
-	u32 *buf;
-	struct v9_mqd *mqd;
-	struct amdgpu_device *adev;
-
-	adev = ring->adev;
-	if (ring->mqd_obj == NULL) {
-		r = amdgpu_bo_create(adev,
-				sizeof(struct v9_mqd),
-				PAGE_SIZE,true,
-				AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
-				NULL, &ring->mqd_obj);
-		if (r) {
-			dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
-			return r;
-		}
-	}
-
-	r = amdgpu_bo_reserve(ring->mqd_obj, false);
-	if (unlikely(r != 0)) {
-		gfx_v9_0_cp_compute_fini(adev);
-		return r;
-	}
-
-	r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
-				  &mqd_gpu_addr);
-	if (r) {
-		dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
-		gfx_v9_0_cp_compute_fini(adev);
-		return r;
-	}
-	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
-	if (r) {
-		dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
-		gfx_v9_0_cp_compute_fini(adev);
-		return r;
-	}
-
-	/* init the mqd struct */
-	memset(buf, 0, sizeof(struct v9_mqd));
-
-	mqd = (struct v9_mqd *)buf;
-	mqd->header = 0xC0310800;
-	mqd->compute_pipelinestat_enable = 0x00000001;
-	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
-	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
-	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
-	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
-	mqd->compute_misc_reserved = 0x00000003;
-	mutex_lock(&adev->srbm_mutex);
-	soc15_grbm_select(adev, ring->me,
-			       ring->pipe,
-			       ring->queue, 0);
-	/* disable wptr polling */
-	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
-
-	/* write the EOP addr */
-	BUG_ON(ring->me != 1 || ring->pipe != 0); /* can't handle other cases eop address */
-	eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring->queue * MEC_HPD_SIZE);
-	eop_gpu_addr >>= 8;
-
-	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, lower_32_bits(eop_gpu_addr));
-	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
-	mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_gpu_addr);
-	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_gpu_addr);
-
-	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
-	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
-				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
-	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, tmp);
-
-	/* enable doorbell? */
-	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
-	if (use_doorbell)
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-	else
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
-
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
-	mqd->cp_hqd_pq_doorbell_control = tmp;
-
-	/* disable the queue if it's active */
-	ring->wptr = 0;
-	mqd->cp_hqd_dequeue_request = 0;
-	mqd->cp_hqd_pq_rptr = 0;
-	mqd->cp_hqd_pq_wptr_lo = 0;
-	mqd->cp_hqd_pq_wptr_hi = 0;
-	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
-		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
-		for (j = 0; j < adev->usec_timeout; j++) {
-			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
-				break;
-			udelay(1);
-		}
-		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
-		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
-		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo);
-		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi);
-	}
-
-	/* set the pointer to the MQD */
-	mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
-	mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
-	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
-	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
-
-	/* set MQD vmid to 0 */
-	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
-	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
-	WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, tmp);
-	mqd->cp_mqd_control = tmp;
-
-	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
-	hqd_gpu_addr = ring->gpu_addr >> 8;
-	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
-	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
-
-	/* set up the HQD, this is similar to CP_RB0_CNTL */
-	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
-	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
-		(order_base_2(ring->ring_size / 4) - 1));
-	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
-		((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
-#ifdef __BIG_ENDIAN
-	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
-#endif
-	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
-	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
-	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
-	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, tmp);
-	mqd->cp_hqd_pq_control = tmp;
-
-	/* set the wb address wether it's enabled or not */
-	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
-	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
-	mqd->cp_hqd_pq_rptr_report_addr_hi =
-	upper_32_bits(wb_gpu_addr) & 0xffff;
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
-		mqd->cp_hqd_pq_rptr_report_addr_lo);
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
-		mqd->cp_hqd_pq_rptr_report_addr_hi);
-
-	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
-	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
-	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
-		mqd->cp_hqd_pq_wptr_poll_addr_lo);
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
-		mqd->cp_hqd_pq_wptr_poll_addr_hi);
-
-	/* enable the doorbell if requested */
-	if (use_doorbell) {
-		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
-			(AMDGPU_DOORBELL64_KIQ * 2) << 2);
-		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
-			(AMDGPU_DOORBELL64_MEC_RING7 * 2) << 2);
-		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
-			DOORBELL_OFFSET, ring->doorbell_index);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
-		mqd->cp_hqd_pq_doorbell_control = tmp;
-
-	} else {
-		mqd->cp_hqd_pq_doorbell_control = 0;
-	}
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
-		mqd->cp_hqd_pq_doorbell_control);
-
-	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo);
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi);
-
-	/* set the vmid for the queue */
-	mqd->cp_hqd_vmid = 0;
-	WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
-
-	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
-	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
-	WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, tmp);
-	mqd->cp_hqd_persistent_state = tmp;
-
-	/* activate the queue */
-	mqd->cp_hqd_active = 1;
-	WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
-
-	soc15_grbm_select(adev, 0, 0, 0, 0);
-	mutex_unlock(&adev->srbm_mutex);
-
-	amdgpu_bo_kunmap(ring->mqd_obj);
-	amdgpu_bo_unreserve(ring->mqd_obj);
-
-	if (use_doorbell)
-		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
-
-	return 0;
-}
-
 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
 {
 {
 	.type = AMD_IP_BLOCK_TYPE_GFX,
 	.type = AMD_IP_BLOCK_TYPE_GFX,

+ 175 - 260
drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c

@@ -31,178 +31,161 @@
 
 
 #include "soc15_common.h"
 #include "soc15_common.h"
 
 
-int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
+u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev)
 {
 {
-	u32 tmp;
-	u64 value;
-	u32 i;
+	return (u64)RREG32_SOC15(GC, 0, mmMC_VM_FB_OFFSET) << 24;
+}
 
 
-	/* Program MC. */
-	/* Update configuration */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR),
-		adev->mc.vram_start >> 18);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR),
-		adev->mc.vram_end >> 18);
+static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
+{
+	uint64_t value;
 
 
-	value = adev->vram_scratch.gpu_addr - adev->mc.vram_start
+	BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
+	value = adev->gart.table_addr - adev->mc.vram_start
 		+ adev->vm_manager.vram_base_offset;
 		+ adev->vm_manager.vram_base_offset;
-	WREG32(SOC15_REG_OFFSET(GC, 0,
-				mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB),
-				(u32)(value >> 12));
-	WREG32(SOC15_REG_OFFSET(GC, 0,
-				mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB),
-				(u32)(value >> 44));
+	value &= 0x0000FFFFFFFFF000ULL;
+	value |= 0x1; /*valid bit*/
 
 
-	if (amdgpu_sriov_vf(adev)) {
-		/* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are VF copy registers so
-		vbios post doesn't program them, for SRIOV driver need to program them */
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_FB_LOCATION_BASE),
-				adev->mc.vram_start >> 24);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_FB_LOCATION_TOP),
-				adev->mc.vram_end >> 24);
-	}
+	WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+		     lower_32_bits(value));
+
+	WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+		     upper_32_bits(value));
+}
+
+static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+	gfxhub_v1_0_init_gart_pt_regs(adev);
+
+	WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+		     (u32)(adev->mc.gtt_start >> 12));
+	WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+		     (u32)(adev->mc.gtt_start >> 44));
+
+	WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+		     (u32)(adev->mc.gtt_end >> 12));
+	WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+		     (u32)(adev->mc.gtt_end >> 44));
+}
+
+static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+	uint64_t value;
 
 
 	/* Disable AGP. */
 	/* Disable AGP. */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BASE), 0);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_TOP), 0);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BOT), 0xFFFFFFFF);
+	WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0);
+	WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, 0);
+	WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, 0xFFFFFFFF);
 
 
-	/* GART Enable. */
+	/* Program the system aperture low logical page number. */
+	WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+		     adev->mc.vram_start >> 18);
+	WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+		     adev->mc.vram_end >> 18);
+
+	/* Set default page address. */
+	value = adev->vram_scratch.gpu_addr - adev->mc.vram_start
+		+ adev->vm_manager.vram_base_offset;
+	WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+		     (u32)(value >> 12));
+	WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+		     (u32)(value >> 44));
+
+	/* Program "protection fault". */
+	WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+		     (u32)(adev->dummy_page.addr >> 12));
+	WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+		     (u32)((u64)adev->dummy_page.addr >> 44));
+
+	WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2,
+		       ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+}
+
+static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+	uint32_t tmp;
 
 
 	/* Setup TLB control */
 	/* Setup TLB control */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL));
+	tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL);
+
 	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
 	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
-	tmp = REG_SET_FIELD(tmp,
-				MC_VM_MX_L1_TLB_CNTL,
-				SYSTEM_ACCESS_MODE,
-				3);
-	tmp = REG_SET_FIELD(tmp,
-				MC_VM_MX_L1_TLB_CNTL,
-				ENABLE_ADVANCED_DRIVER_MODEL,
-				1);
-	tmp = REG_SET_FIELD(tmp,
-				MC_VM_MX_L1_TLB_CNTL,
-				SYSTEM_APERTURE_UNMAPPED_ACCESS,
-				0);
-	tmp = REG_SET_FIELD(tmp,
-				MC_VM_MX_L1_TLB_CNTL,
-				ECO_BITS,
-				0);
-	tmp = REG_SET_FIELD(tmp,
-				MC_VM_MX_L1_TLB_CNTL,
-				MTYPE,
-				MTYPE_UC);/* XXX for emulation. */
-	tmp = REG_SET_FIELD(tmp,
-				MC_VM_MX_L1_TLB_CNTL,
-				ATC_EN,
-				1);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp);
+	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+			    ENABLE_ADVANCED_DRIVER_MODEL, 1);
+	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+			    SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+			    MTYPE, MTYPE_UC);/* XXX for emulation. */
+	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+	WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
+{
+	uint32_t tmp;
 
 
 	/* Setup L2 cache */
 	/* Setup L2 cache */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL));
+	tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
-	tmp = REG_SET_FIELD(tmp,
-				VM_L2_CNTL,
-				ENABLE_L2_FRAGMENT_PROCESSING,
-				0);
-	tmp = REG_SET_FIELD(tmp,
-				VM_L2_CNTL,
-				L2_PDE0_CACHE_TAG_GENERATION_MODE,
-				0);/* XXX for emulation, Refer to closed source code.*/
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+	/* XXX for emulation, Refer to closed source code.*/
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+			    0);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1);
-	tmp = REG_SET_FIELD(tmp,
-				VM_L2_CNTL,
-				CONTEXT1_IDENTITY_ACCESS_MODE,
-				1);
-	tmp = REG_SET_FIELD(tmp,
-				VM_L2_CNTL,
-				IDENTITY_MODE_FRAGMENT_SIZE,
-				0);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL), tmp);
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+	WREG32_SOC15(GC, 0, mmVM_L2_CNTL, tmp);
 
 
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL2));
+	tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL2);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL2), tmp);
+	WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
 
 
 	tmp = mmVM_L2_CNTL3_DEFAULT;
 	tmp = mmVM_L2_CNTL3_DEFAULT;
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL3), tmp);
+	WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
 
 
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL4));
-	tmp = REG_SET_FIELD(tmp,
-			    VM_L2_CNTL4,
-			    VMC_TAP_PDE_REQUEST_PHYSICAL,
-			    0);
-	tmp = REG_SET_FIELD(tmp,
-			    VM_L2_CNTL4,
-			    VMC_TAP_PTE_REQUEST_PHYSICAL,
-			    0);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL4), tmp);
-
-	/* setup context0 */
-	WREG32(SOC15_REG_OFFSET(GC, 0,
-				mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32),
-		(u32)(adev->mc.gtt_start >> 12));
-	WREG32(SOC15_REG_OFFSET(GC, 0,
-				mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32),
-		(u32)(adev->mc.gtt_start >> 44));
-
-	WREG32(SOC15_REG_OFFSET(GC, 0,
-				mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32),
-		(u32)(adev->mc.gtt_end >> 12));
-	WREG32(SOC15_REG_OFFSET(GC, 0,
-				mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32),
-		(u32)(adev->mc.gtt_end >> 44));
+	tmp = mmVM_L2_CNTL4_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+	WREG32_SOC15(GC, 0, mmVM_L2_CNTL4, tmp);
+}
 
 
-	BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
-	value = adev->gart.table_addr - adev->mc.vram_start
-		+ adev->vm_manager.vram_base_offset;
-	value &= 0x0000FFFFFFFFF000ULL;
-	value |= 0x1; /*valid bit*/
+static void gfxhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
+{
+	uint32_t tmp;
 
 
-	WREG32(SOC15_REG_OFFSET(GC, 0,
-				mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32),
-		(u32)value);
-	WREG32(SOC15_REG_OFFSET(GC, 0,
-				mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32),
-		(u32)(value >> 32));
-
-	WREG32(SOC15_REG_OFFSET(GC, 0,
-				mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32),
-		(u32)(adev->dummy_page.addr >> 12));
-	WREG32(SOC15_REG_OFFSET(GC, 0,
-				mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32),
-		(u32)((u64)adev->dummy_page.addr >> 44));
-
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL2));
-	tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
-			    ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY,
-			    1);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL2), tmp);
-
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL));
+	tmp = RREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL), tmp);
+	WREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL, tmp);
+}
+
+static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+	WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+		     0XFFFFFFFF);
+	WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+		     0x0000000F);
+
+	WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+		     0);
+	WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+		     0);
 
 
-	/* Disable identity aperture.*/
-	WREG32(SOC15_REG_OFFSET(GC, 0,
-		mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32), 0XFFFFFFFF);
-	WREG32(SOC15_REG_OFFSET(GC, 0,
-		mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32), 0x0000000F);
+	WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+	WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
 
 
-	WREG32(SOC15_REG_OFFSET(GC, 0,
-		mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32), 0);
-	WREG32(SOC15_REG_OFFSET(GC, 0,
-		mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32), 0);
+}
 
 
-	WREG32(SOC15_REG_OFFSET(GC, 0,
-		mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32), 0);
-	WREG32(SOC15_REG_OFFSET(GC, 0,
-		mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32), 0);
+static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+	int i;
+	uint32_t tmp;
 
 
 	for (i = 0; i <= 14; i++) {
 	for (i = 0; i <= 14; i++) {
-		tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i);
+		tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
 				    adev->vm_manager.num_level);
 				    adev->vm_manager.num_level);
@@ -223,15 +206,52 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
 				PAGE_TABLE_BLOCK_SIZE,
 				PAGE_TABLE_BLOCK_SIZE,
 				adev->vm_manager.block_size - 9);
 				adev->vm_manager.block_size - 9);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i, tmp);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2,
+		WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i, tmp);
+		WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
+		WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
+		WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,  i*2,
 			lower_32_bits(adev->vm_manager.max_pfn - 1));
 			lower_32_bits(adev->vm_manager.max_pfn - 1));
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2,
+		WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2,
 			upper_32_bits(adev->vm_manager.max_pfn - 1));
 			upper_32_bits(adev->vm_manager.max_pfn - 1));
 	}
 	}
+}
+
+static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev)
+{
+	unsigned i;
+
+	for (i = 0 ; i < 18; ++i) {
+		WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+				    2 * i, 0xffffffff);
+		WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+				    2 * i, 0x1f);
+	}
+}
+
+int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
+{
+	if (amdgpu_sriov_vf(adev)) {
+		/*
+		 * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
+		 * VF copy registers so vbios post doesn't program them, for
+		 * SRIOV driver need to program them
+		 */
+		WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE,
+			     adev->mc.vram_start >> 24);
+		WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP,
+			     adev->mc.vram_end >> 24);
+	}
 
 
+	/* GART Enable. */
+	gfxhub_v1_0_init_gart_aperture_regs(adev);
+	gfxhub_v1_0_init_system_aperture_regs(adev);
+	gfxhub_v1_0_init_tlb_regs(adev);
+	gfxhub_v1_0_init_cache_regs(adev);
+
+	gfxhub_v1_0_enable_system_domain(adev);
+	gfxhub_v1_0_disable_identity_aperture(adev);
+	gfxhub_v1_0_setup_vmid_config(adev);
+	gfxhub_v1_0_program_invalidation(adev);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -243,22 +263,20 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)
 
 
 	/* Disable all tables */
 	/* Disable all tables */
 	for (i = 0; i < 16; i++)
 	for (i = 0; i < 16; i++)
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL) + i, 0);
+		WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL, i, 0);
 
 
 	/* Setup TLB control */
 	/* Setup TLB control */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL));
+	tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL);
 	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
 	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
 	tmp = REG_SET_FIELD(tmp,
 	tmp = REG_SET_FIELD(tmp,
 				MC_VM_MX_L1_TLB_CNTL,
 				MC_VM_MX_L1_TLB_CNTL,
 				ENABLE_ADVANCED_DRIVER_MODEL,
 				ENABLE_ADVANCED_DRIVER_MODEL,
 				0);
 				0);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp);
+	WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
 
 
 	/* Setup L2 cache */
 	/* Setup L2 cache */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL));
-	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL), tmp);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL3), 0);
+	WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+	WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, 0);
 }
 }
 
 
 /**
 /**
@@ -271,7 +289,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
 					  bool value)
 					  bool value)
 {
 {
 	u32 tmp;
 	u32 tmp;
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL));
+	tmp = RREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
 	tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
 	tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
 			RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
 			RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
 	tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
 	tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
@@ -296,22 +314,11 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
 			WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
 			WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
 	tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
 	tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
 			EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
 			EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp);
-}
-
-static int gfxhub_v1_0_early_init(void *handle)
-{
-	return 0;
-}
-
-static int gfxhub_v1_0_late_init(void *handle)
-{
-	return 0;
+	WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp);
 }
 }
 
 
-static int gfxhub_v1_0_sw_init(void *handle)
+void gfxhub_v1_0_init(struct amdgpu_device *adev)
 {
 {
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB];
 	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB];
 
 
 	hub->ctx0_ptb_addr_lo32 =
 	hub->ctx0_ptb_addr_lo32 =
@@ -330,96 +337,4 @@ static int gfxhub_v1_0_sw_init(void *handle)
 		SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS);
 		SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS);
 	hub->vm_l2_pro_fault_cntl =
 	hub->vm_l2_pro_fault_cntl =
 		SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
 		SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
-
-	return 0;
-}
-
-static int gfxhub_v1_0_sw_fini(void *handle)
-{
-	return 0;
 }
 }
-
-static int gfxhub_v1_0_hw_init(void *handle)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	unsigned i;
-
-	for (i = 0 ; i < 18; ++i) {
-		WREG32(SOC15_REG_OFFSET(GC, 0,
-					mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32) +
-		       2 * i, 0xffffffff);
-		WREG32(SOC15_REG_OFFSET(GC, 0,
-					mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32) +
-		       2 * i, 0x1f);
-	}
-
-	return 0;
-}
-
-static int gfxhub_v1_0_hw_fini(void *handle)
-{
-	return 0;
-}
-
-static int gfxhub_v1_0_suspend(void *handle)
-{
-	return 0;
-}
-
-static int gfxhub_v1_0_resume(void *handle)
-{
-	return 0;
-}
-
-static bool gfxhub_v1_0_is_idle(void *handle)
-{
-	return true;
-}
-
-static int gfxhub_v1_0_wait_for_idle(void *handle)
-{
-	return 0;
-}
-
-static int gfxhub_v1_0_soft_reset(void *handle)
-{
-	return 0;
-}
-
-static int gfxhub_v1_0_set_clockgating_state(void *handle,
-					  enum amd_clockgating_state state)
-{
-	return 0;
-}
-
-static int gfxhub_v1_0_set_powergating_state(void *handle,
-					  enum amd_powergating_state state)
-{
-	return 0;
-}
-
-const struct amd_ip_funcs gfxhub_v1_0_ip_funcs = {
-	.name = "gfxhub_v1_0",
-	.early_init = gfxhub_v1_0_early_init,
-	.late_init = gfxhub_v1_0_late_init,
-	.sw_init = gfxhub_v1_0_sw_init,
-	.sw_fini = gfxhub_v1_0_sw_fini,
-	.hw_init = gfxhub_v1_0_hw_init,
-	.hw_fini = gfxhub_v1_0_hw_fini,
-	.suspend = gfxhub_v1_0_suspend,
-	.resume = gfxhub_v1_0_resume,
-	.is_idle = gfxhub_v1_0_is_idle,
-	.wait_for_idle = gfxhub_v1_0_wait_for_idle,
-	.soft_reset = gfxhub_v1_0_soft_reset,
-	.set_clockgating_state = gfxhub_v1_0_set_clockgating_state,
-	.set_powergating_state = gfxhub_v1_0_set_powergating_state,
-};
-
-const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block =
-{
-	.type = AMD_IP_BLOCK_TYPE_GFXHUB,
-	.major = 1,
-	.minor = 0,
-	.rev = 0,
-	.funcs = &gfxhub_v1_0_ip_funcs,
-};

+ 2 - 1
drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h

@@ -28,7 +28,8 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev);
 void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev);
 void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev);
 void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
 void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
 					  bool value);
 					  bool value);
-
+void gfxhub_v1_0_init(struct amdgpu_device *adev);
+u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev);
 extern const struct amd_ip_funcs gfxhub_v1_0_ip_funcs;
 extern const struct amd_ip_funcs gfxhub_v1_0_ip_funcs;
 extern const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block;
 extern const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block;
 
 

+ 31 - 47
drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c

@@ -21,7 +21,7 @@
  *
  *
  */
  */
 #include <linux/firmware.h>
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "gmc_v6_0.h"
 #include "gmc_v6_0.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_ucode.h"
@@ -395,6 +395,12 @@ static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 	return pte_flag;
 }
 }
 
 
+static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+{
+	BUG_ON(addr & 0xFFFFFF0000000FFFULL);
+	return addr;
+}
+
 static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
 static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
 					      bool value)
 					      bool value)
 {
 {
@@ -614,33 +620,6 @@ static void gmc_v6_0_gart_fini(struct amdgpu_device *adev)
 	amdgpu_gart_fini(adev);
 	amdgpu_gart_fini(adev);
 }
 }
 
 
-static int gmc_v6_0_vm_init(struct amdgpu_device *adev)
-{
-	/*
-	 * number of VMs
-	 * VMID 0 is reserved for System
-	 * amdgpu graphics/compute will use VMIDs 1-7
-	 * amdkfd will use VMIDs 8-15
-	 */
-	adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
-	adev->vm_manager.num_level = 1;
-	amdgpu_vm_manager_init(adev);
-
-	/* base offset of vram pages */
-	if (adev->flags & AMD_IS_APU) {
-		u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
-		tmp <<= 22;
-		adev->vm_manager.vram_base_offset = tmp;
-	} else
-		adev->vm_manager.vram_base_offset = 0;
-
-	return 0;
-}
-
-static void gmc_v6_0_vm_fini(struct amdgpu_device *adev)
-{
-}
-
 static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
 static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
 				     u32 status, u32 addr, u32 mc_client)
 				     u32 status, u32 addr, u32 mc_client)
 {
 {
@@ -855,6 +834,8 @@ static int gmc_v6_0_sw_init(void *handle)
 
 
 	adev->mc.mc_mask = 0xffffffffffULL;
 	adev->mc.mc_mask = 0xffffffffffULL;
 
 
+	adev->mc.stolen_size = 256 * 1024;
+
 	adev->need_dma32 = false;
 	adev->need_dma32 = false;
 	dma_bits = adev->need_dma32 ? 32 : 40;
 	dma_bits = adev->need_dma32 ? 32 : 40;
 	r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
 	r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
@@ -887,26 +868,34 @@ static int gmc_v6_0_sw_init(void *handle)
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
-	if (!adev->vm_manager.enabled) {
-		r = gmc_v6_0_vm_init(adev);
-		if (r) {
-			dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
-			return r;
-		}
-		adev->vm_manager.enabled = true;
+	/*
+	 * number of VMs
+	 * VMID 0 is reserved for System
+	 * amdgpu graphics/compute will use VMIDs 1-7
+	 * amdkfd will use VMIDs 8-15
+	 */
+	adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
+	adev->vm_manager.num_level = 1;
+	amdgpu_vm_manager_init(adev);
+
+	/* base offset of vram pages */
+	if (adev->flags & AMD_IS_APU) {
+		u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
+
+		tmp <<= 22;
+		adev->vm_manager.vram_base_offset = tmp;
+	} else {
+		adev->vm_manager.vram_base_offset = 0;
 	}
 	}
 
 
-	return r;
+	return 0;
 }
 }
 
 
 static int gmc_v6_0_sw_fini(void *handle)
 static int gmc_v6_0_sw_fini(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
-	if (adev->vm_manager.enabled) {
-		gmc_v6_0_vm_fini(adev);
-		adev->vm_manager.enabled = false;
-	}
+	amdgpu_vm_manager_fini(adev);
 	gmc_v6_0_gart_fini(adev);
 	gmc_v6_0_gart_fini(adev);
 	amdgpu_gem_force_release(adev);
 	amdgpu_gem_force_release(adev);
 	amdgpu_bo_fini(adev);
 	amdgpu_bo_fini(adev);
@@ -984,16 +973,10 @@ static bool gmc_v6_0_is_idle(void *handle)
 static int gmc_v6_0_wait_for_idle(void *handle)
 static int gmc_v6_0_wait_for_idle(void *handle)
 {
 {
 	unsigned i;
 	unsigned i;
-	u32 tmp;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
 	for (i = 0; i < adev->usec_timeout; i++) {
 	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__MCB_BUSY_MASK |
-					       SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
-					       SRBM_STATUS__MCC_BUSY_MASK |
-					       SRBM_STATUS__MCD_BUSY_MASK |
-					       SRBM_STATUS__VMC_BUSY_MASK);
-		if (!tmp)
+		if (gmc_v6_0_is_idle(handle))
 			return 0;
 			return 0;
 		udelay(1);
 		udelay(1);
 	}
 	}
@@ -1146,6 +1129,7 @@ static const struct amdgpu_gart_funcs gmc_v6_0_gart_funcs = {
 	.flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb,
 	.flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb,
 	.set_pte_pde = gmc_v6_0_gart_set_pte_pde,
 	.set_pte_pde = gmc_v6_0_gart_set_pte_pde,
 	.set_prt = gmc_v6_0_set_prt,
 	.set_prt = gmc_v6_0_set_prt,
+	.get_vm_pde = gmc_v6_0_get_vm_pde,
 	.get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
 	.get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
 };
 };
 
 

+ 31 - 64
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c

@@ -21,7 +21,7 @@
  *
  *
  */
  */
 #include <linux/firmware.h>
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "cikd.h"
 #include "cikd.h"
 #include "cik.h"
 #include "cik.h"
@@ -472,6 +472,12 @@ static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 	return pte_flag;
 }
 }
 
 
+static uint64_t gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+{
+	BUG_ON(addr & 0xFFFFFF0000000FFFULL);
+	return addr;
+}
+
 /**
 /**
  * gmc_v8_0_set_fault_enable_default - update VM fault handling
  * gmc_v8_0_set_fault_enable_default - update VM fault handling
  *
  *
@@ -724,55 +730,6 @@ static void gmc_v7_0_gart_fini(struct amdgpu_device *adev)
 	amdgpu_gart_fini(adev);
 	amdgpu_gart_fini(adev);
 }
 }
 
 
-/*
- * vm
- * VMID 0 is the physical GPU addresses as used by the kernel.
- * VMIDs 1-15 are used for userspace clients and are handled
- * by the amdgpu vm/hsa code.
- */
-/**
- * gmc_v7_0_vm_init - cik vm init callback
- *
- * @adev: amdgpu_device pointer
- *
- * Inits cik specific vm parameters (number of VMs, base of vram for
- * VMIDs 1-15) (CIK).
- * Returns 0 for success.
- */
-static int gmc_v7_0_vm_init(struct amdgpu_device *adev)
-{
-	/*
-	 * number of VMs
-	 * VMID 0 is reserved for System
-	 * amdgpu graphics/compute will use VMIDs 1-7
-	 * amdkfd will use VMIDs 8-15
-	 */
-	adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
-	adev->vm_manager.num_level = 1;
-	amdgpu_vm_manager_init(adev);
-
-	/* base offset of vram pages */
-	if (adev->flags & AMD_IS_APU) {
-		u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
-		tmp <<= 22;
-		adev->vm_manager.vram_base_offset = tmp;
-	} else
-		adev->vm_manager.vram_base_offset = 0;
-
-	return 0;
-}
-
-/**
- * gmc_v7_0_vm_fini - cik vm fini callback
- *
- * @adev: amdgpu_device pointer
- *
- * Tear down any asic specific VM setup (CIK).
- */
-static void gmc_v7_0_vm_fini(struct amdgpu_device *adev)
-{
-}
-
 /**
 /**
  * gmc_v7_0_vm_decode_fault - print human readable fault info
  * gmc_v7_0_vm_decode_fault - print human readable fault info
  *
  *
@@ -1013,6 +970,8 @@ static int gmc_v7_0_sw_init(void *handle)
 	 */
 	 */
 	adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
 	adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
 
 
+	adev->mc.stolen_size = 256 * 1024;
+
 	/* set DMA mask + need_dma32 flags.
 	/* set DMA mask + need_dma32 flags.
 	 * PCIE - can handle 40-bits.
 	 * PCIE - can handle 40-bits.
 	 * IGP - can handle 40-bits
 	 * IGP - can handle 40-bits
@@ -1051,27 +1010,34 @@ static int gmc_v7_0_sw_init(void *handle)
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
-	if (!adev->vm_manager.enabled) {
-		r = gmc_v7_0_vm_init(adev);
-		if (r) {
-			dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
-			return r;
-		}
-		adev->vm_manager.enabled = true;
+	/*
+	 * number of VMs
+	 * VMID 0 is reserved for System
+	 * amdgpu graphics/compute will use VMIDs 1-7
+	 * amdkfd will use VMIDs 8-15
+	 */
+	adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
+	adev->vm_manager.num_level = 1;
+	amdgpu_vm_manager_init(adev);
+
+	/* base offset of vram pages */
+	if (adev->flags & AMD_IS_APU) {
+		u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
+
+		tmp <<= 22;
+		adev->vm_manager.vram_base_offset = tmp;
+	} else {
+		adev->vm_manager.vram_base_offset = 0;
 	}
 	}
 
 
-	return r;
+	return 0;
 }
 }
 
 
 static int gmc_v7_0_sw_fini(void *handle)
 static int gmc_v7_0_sw_fini(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
-	if (adev->vm_manager.enabled) {
-		amdgpu_vm_manager_fini(adev);
-		gmc_v7_0_vm_fini(adev);
-		adev->vm_manager.enabled = false;
-	}
+	amdgpu_vm_manager_fini(adev);
 	gmc_v7_0_gart_fini(adev);
 	gmc_v7_0_gart_fini(adev);
 	amdgpu_gem_force_release(adev);
 	amdgpu_gem_force_release(adev);
 	amdgpu_bo_fini(adev);
 	amdgpu_bo_fini(adev);
@@ -1335,7 +1301,8 @@ static const struct amdgpu_gart_funcs gmc_v7_0_gart_funcs = {
 	.flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb,
 	.flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb,
 	.set_pte_pde = gmc_v7_0_gart_set_pte_pde,
 	.set_pte_pde = gmc_v7_0_gart_set_pte_pde,
 	.set_prt = gmc_v7_0_set_prt,
 	.set_prt = gmc_v7_0_set_prt,
-	.get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags
+	.get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags,
+	.get_vm_pde = gmc_v7_0_get_vm_pde
 };
 };
 
 
 static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {
 static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {

+ 31 - 64
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c

@@ -21,7 +21,7 @@
  *
  *
  */
  */
 #include <linux/firmware.h>
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "gmc_v8_0.h"
 #include "gmc_v8_0.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_ucode.h"
@@ -656,6 +656,12 @@ static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 	return pte_flag;
 }
 }
 
 
+static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+{
+	BUG_ON(addr & 0xFFFFFF0000000FFFULL);
+	return addr;
+}
+
 /**
 /**
  * gmc_v8_0_set_fault_enable_default - update VM fault handling
  * gmc_v8_0_set_fault_enable_default - update VM fault handling
  *
  *
@@ -927,55 +933,6 @@ static void gmc_v8_0_gart_fini(struct amdgpu_device *adev)
 	amdgpu_gart_fini(adev);
 	amdgpu_gart_fini(adev);
 }
 }
 
 
-/*
- * vm
- * VMID 0 is the physical GPU addresses as used by the kernel.
- * VMIDs 1-15 are used for userspace clients and are handled
- * by the amdgpu vm/hsa code.
- */
-/**
- * gmc_v8_0_vm_init - cik vm init callback
- *
- * @adev: amdgpu_device pointer
- *
- * Inits cik specific vm parameters (number of VMs, base of vram for
- * VMIDs 1-15) (CIK).
- * Returns 0 for success.
- */
-static int gmc_v8_0_vm_init(struct amdgpu_device *adev)
-{
-	/*
-	 * number of VMs
-	 * VMID 0 is reserved for System
-	 * amdgpu graphics/compute will use VMIDs 1-7
-	 * amdkfd will use VMIDs 8-15
-	 */
-	adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
-	adev->vm_manager.num_level = 1;
-	amdgpu_vm_manager_init(adev);
-
-	/* base offset of vram pages */
-	if (adev->flags & AMD_IS_APU) {
-		u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
-		tmp <<= 22;
-		adev->vm_manager.vram_base_offset = tmp;
-	} else
-		adev->vm_manager.vram_base_offset = 0;
-
-	return 0;
-}
-
-/**
- * gmc_v8_0_vm_fini - cik vm fini callback
- *
- * @adev: amdgpu_device pointer
- *
- * Tear down any asic specific VM setup (CIK).
- */
-static void gmc_v8_0_vm_fini(struct amdgpu_device *adev)
-{
-}
-
 /**
 /**
  * gmc_v8_0_vm_decode_fault - print human readable fault info
  * gmc_v8_0_vm_decode_fault - print human readable fault info
  *
  *
@@ -1097,6 +1054,8 @@ static int gmc_v8_0_sw_init(void *handle)
 	 */
 	 */
 	adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
 	adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
 
 
+	adev->mc.stolen_size = 256 * 1024;
+
 	/* set DMA mask + need_dma32 flags.
 	/* set DMA mask + need_dma32 flags.
 	 * PCIE - can handle 40-bits.
 	 * PCIE - can handle 40-bits.
 	 * IGP - can handle 40-bits
 	 * IGP - can handle 40-bits
@@ -1135,27 +1094,34 @@ static int gmc_v8_0_sw_init(void *handle)
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
-	if (!adev->vm_manager.enabled) {
-		r = gmc_v8_0_vm_init(adev);
-		if (r) {
-			dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
-			return r;
-		}
-		adev->vm_manager.enabled = true;
+	/*
+	 * number of VMs
+	 * VMID 0 is reserved for System
+	 * amdgpu graphics/compute will use VMIDs 1-7
+	 * amdkfd will use VMIDs 8-15
+	 */
+	adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
+	adev->vm_manager.num_level = 1;
+	amdgpu_vm_manager_init(adev);
+
+	/* base offset of vram pages */
+	if (adev->flags & AMD_IS_APU) {
+		u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
+
+		tmp <<= 22;
+		adev->vm_manager.vram_base_offset = tmp;
+	} else {
+		adev->vm_manager.vram_base_offset = 0;
 	}
 	}
 
 
-	return r;
+	return 0;
 }
 }
 
 
 static int gmc_v8_0_sw_fini(void *handle)
 static int gmc_v8_0_sw_fini(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
-	if (adev->vm_manager.enabled) {
-		amdgpu_vm_manager_fini(adev);
-		gmc_v8_0_vm_fini(adev);
-		adev->vm_manager.enabled = false;
-	}
+	amdgpu_vm_manager_fini(adev);
 	gmc_v8_0_gart_fini(adev);
 	gmc_v8_0_gart_fini(adev);
 	amdgpu_gem_force_release(adev);
 	amdgpu_gem_force_release(adev);
 	amdgpu_bo_fini(adev);
 	amdgpu_bo_fini(adev);
@@ -1654,7 +1620,8 @@ static const struct amdgpu_gart_funcs gmc_v8_0_gart_funcs = {
 	.flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb,
 	.flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb,
 	.set_pte_pde = gmc_v8_0_gart_set_pte_pde,
 	.set_pte_pde = gmc_v8_0_gart_set_pte_pde,
 	.set_prt = gmc_v8_0_set_prt,
 	.set_prt = gmc_v8_0_set_prt,
-	.get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags
+	.get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags,
+	.get_vm_pde = gmc_v8_0_get_vm_pde
 };
 };
 
 
 static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {
 static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {

+ 73 - 79
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

@@ -33,6 +33,7 @@
 #include "soc15_common.h"
 #include "soc15_common.h"
 
 
 #include "nbio_v6_1.h"
 #include "nbio_v6_1.h"
+#include "nbio_v7_0.h"
 #include "gfxhub_v1_0.h"
 #include "gfxhub_v1_0.h"
 #include "mmhub_v1_0.h"
 #include "mmhub_v1_0.h"
 
 
@@ -215,7 +216,10 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
 	unsigned i, j;
 	unsigned i, j;
 
 
 	/* flush hdp cache */
 	/* flush hdp cache */
-	nbio_v6_1_hdp_flush(adev);
+	if (adev->flags & AMD_IS_APU)
+		nbio_v7_0_hdp_flush(adev);
+	else
+		nbio_v6_1_hdp_flush(adev);
 
 
 	spin_lock(&adev->mc.invalidate_lock);
 	spin_lock(&adev->mc.invalidate_lock);
 
 
@@ -354,17 +358,19 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 	return pte_flag;
 }
 }
 
 
-static u64 gmc_v9_0_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr)
+static u64 gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, u64 addr)
 {
 {
-	return adev->vm_manager.vram_base_offset + mc_addr - adev->mc.vram_start;
+	addr = adev->vm_manager.vram_base_offset + addr - adev->mc.vram_start;
+	BUG_ON(addr & 0xFFFF00000000003FULL);
+	return addr;
 }
 }
 
 
 static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
 static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
 	.flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb,
 	.flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb,
 	.set_pte_pde = gmc_v9_0_gart_set_pte_pde,
 	.set_pte_pde = gmc_v9_0_gart_set_pte_pde,
-	.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
-	.adjust_mc_addr = gmc_v9_0_adjust_mc_addr,
 	.get_invalidate_req = gmc_v9_0_get_invalidate_req,
 	.get_invalidate_req = gmc_v9_0_get_invalidate_req,
+	.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
+	.get_vm_pde = gmc_v9_0_get_vm_pde
 };
 };
 
 
 static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev)
 static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev)
@@ -415,6 +421,11 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
 	amdgpu_vram_location(adev, &adev->mc, base);
 	amdgpu_vram_location(adev, &adev->mc, base);
 	adev->mc.gtt_base_align = 0;
 	adev->mc.gtt_base_align = 0;
 	amdgpu_gtt_location(adev, mc);
 	amdgpu_gtt_location(adev, mc);
+	/* base offset of vram pages */
+	if (adev->flags & AMD_IS_APU)
+		adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
+	else
+		adev->vm_manager.vram_base_offset = 0;
 }
 }
 
 
 /**
 /**
@@ -434,7 +445,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
 	/* hbm memory channel size */
 	/* hbm memory channel size */
 	chansize = 128;
 	chansize = 128;
 
 
-	tmp = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_CS_AON0_DramBaseAddress0));
+	tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
 	tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
 	tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
 	tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
 	tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
 	switch (tmp) {
 	switch (tmp) {
@@ -474,7 +485,8 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
 	adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
 	adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
 	/* size in MB on si */
 	/* size in MB on si */
 	adev->mc.mc_vram_size =
 	adev->mc.mc_vram_size =
-		nbio_v6_1_get_memsize(adev) * 1024ULL * 1024ULL;
+		((adev->flags & AMD_IS_APU) ? nbio_v7_0_get_memsize(adev) :
+		 nbio_v6_1_get_memsize(adev)) * 1024ULL * 1024ULL;
 	adev->mc.real_vram_size = adev->mc.mc_vram_size;
 	adev->mc.real_vram_size = adev->mc.mc_vram_size;
 	adev->mc.visible_vram_size = adev->mc.aper_size;
 	adev->mc.visible_vram_size = adev->mc.aper_size;
 
 
@@ -514,64 +526,15 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
 	return amdgpu_gart_table_vram_alloc(adev);
 	return amdgpu_gart_table_vram_alloc(adev);
 }
 }
 
 
-/*
- * vm
- * VMID 0 is the physical GPU addresses as used by the kernel.
- * VMIDs 1-15 are used for userspace clients and are handled
- * by the amdgpu vm/hsa code.
- */
-/**
- * gmc_v9_0_vm_init - vm init callback
- *
- * @adev: amdgpu_device pointer
- *
- * Inits vega10 specific vm parameters (number of VMs, base of vram for
- * VMIDs 1-15) (vega10).
- * Returns 0 for success.
- */
-static int gmc_v9_0_vm_init(struct amdgpu_device *adev)
-{
-	/*
-	 * number of VMs
-	 * VMID 0 is reserved for System
-	 * amdgpu graphics/compute will use VMIDs 1-7
-	 * amdkfd will use VMIDs 8-15
-	 */
-	adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
-	adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
-
-	/* TODO: fix num_level for APU when updating vm size and block size */
-	if (adev->flags & AMD_IS_APU)
-		adev->vm_manager.num_level = 1;
-	else
-		adev->vm_manager.num_level = 3;
-	amdgpu_vm_manager_init(adev);
-
-	/* base offset of vram pages */
-	/*XXX This value is not zero for APU*/
-	adev->vm_manager.vram_base_offset = 0;
-
-	return 0;
-}
-
-/**
- * gmc_v9_0_vm_fini - vm fini callback
- *
- * @adev: amdgpu_device pointer
- *
- * Tear down any asic specific VM setup.
- */
-static void gmc_v9_0_vm_fini(struct amdgpu_device *adev)
-{
-	return;
-}
-
 static int gmc_v9_0_sw_init(void *handle)
 static int gmc_v9_0_sw_init(void *handle)
 {
 {
 	int r;
 	int r;
 	int dma_bits;
 	int dma_bits;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
+	gfxhub_v1_0_init(adev);
+	mmhub_v1_0_init(adev);
+
 	spin_lock_init(&adev->mc.invalidate_lock);
 	spin_lock_init(&adev->mc.invalidate_lock);
 
 
 	if (adev->flags & AMD_IS_APU) {
 	if (adev->flags & AMD_IS_APU) {
@@ -609,6 +572,12 @@ static int gmc_v9_0_sw_init(void *handle)
 	 */
 	 */
 	adev->mc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
 	adev->mc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
 
 
+	/*
+	 * It needs to reserve 8M stolen memory for vega10
+	 * TODO: Figure out how to avoid that...
+	 */
+	adev->mc.stolen_size = 8 * 1024 * 1024;
+
 	/* set DMA mask + need_dma32 flags.
 	/* set DMA mask + need_dma32 flags.
 	 * PCIE - can handle 44-bits.
 	 * PCIE - can handle 44-bits.
 	 * IGP - can handle 44-bits
 	 * IGP - can handle 44-bits
@@ -641,15 +610,23 @@ static int gmc_v9_0_sw_init(void *handle)
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
-	if (!adev->vm_manager.enabled) {
-		r = gmc_v9_0_vm_init(adev);
-		if (r) {
-			dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
-			return r;
-		}
-		adev->vm_manager.enabled = true;
-	}
-	return r;
+	/*
+	 * number of VMs
+	 * VMID 0 is reserved for System
+	 * amdgpu graphics/compute will use VMIDs 1-7
+	 * amdkfd will use VMIDs 8-15
+	 */
+	adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
+	adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
+
+	/* TODO: fix num_level for APU when updating vm size and block size */
+	if (adev->flags & AMD_IS_APU)
+		adev->vm_manager.num_level = 1;
+	else
+		adev->vm_manager.num_level = 3;
+	amdgpu_vm_manager_init(adev);
+
+	return 0;
 }
 }
 
 
 /**
 /**
@@ -669,11 +646,7 @@ static int gmc_v9_0_sw_fini(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
-	if (adev->vm_manager.enabled) {
-		amdgpu_vm_manager_fini(adev);
-		gmc_v9_0_vm_fini(adev);
-		adev->vm_manager.enabled = false;
-	}
+	amdgpu_vm_manager_fini(adev);
 	gmc_v9_0_gart_fini(adev);
 	gmc_v9_0_gart_fini(adev);
 	amdgpu_gem_force_release(adev);
 	amdgpu_gem_force_release(adev);
 	amdgpu_bo_fini(adev);
 	amdgpu_bo_fini(adev);
@@ -686,6 +659,8 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
 		break;
 		break;
+	case CHIP_RAVEN:
+		break;
 	default:
 	default:
 		break;
 		break;
 	}
 	}
@@ -715,7 +690,10 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
 		return r;
 		return r;
 
 
 	/* After HDP is initialized, flush HDP.*/
 	/* After HDP is initialized, flush HDP.*/
-	nbio_v6_1_hdp_flush(adev);
+	if (adev->flags & AMD_IS_APU)
+		nbio_v7_0_hdp_flush(adev);
+	else
+		nbio_v6_1_hdp_flush(adev);
 
 
 	r = gfxhub_v1_0_gart_enable(adev);
 	r = gfxhub_v1_0_gart_enable(adev);
 	if (r)
 	if (r)
@@ -725,12 +703,12 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
-	tmp = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MISC_CNTL));
+	tmp = RREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL);
 	tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK;
 	tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK;
-	WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MISC_CNTL), tmp);
+	WREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL, tmp);
 
 
-	tmp = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_HOST_PATH_CNTL));
-	WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_HOST_PATH_CNTL), tmp);
+	tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
+	WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
 
 
 
 
 	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
 	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
@@ -781,6 +759,12 @@ static int gmc_v9_0_hw_fini(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
+	if (amdgpu_sriov_vf(adev)) {
+		/* full access mode, so don't touch any GMC register */
+		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
+		return 0;
+	}
+
 	amdgpu_irq_put(adev, &adev->mc.vm_fault, 0);
 	amdgpu_irq_put(adev, &adev->mc.vm_fault, 0);
 	gmc_v9_0_gart_disable(adev);
 	gmc_v9_0_gart_disable(adev);
 
 
@@ -831,7 +815,16 @@ static int gmc_v9_0_soft_reset(void *handle)
 static int gmc_v9_0_set_clockgating_state(void *handle,
 static int gmc_v9_0_set_clockgating_state(void *handle,
 					enum amd_clockgating_state state)
 					enum amd_clockgating_state state)
 {
 {
-	return 0;
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	return mmhub_v1_0_set_clockgating(adev, state);
+}
+
+static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	mmhub_v1_0_get_clockgating(adev, flags);
 }
 }
 
 
 static int gmc_v9_0_set_powergating_state(void *handle,
 static int gmc_v9_0_set_powergating_state(void *handle,
@@ -855,6 +848,7 @@ const struct amd_ip_funcs gmc_v9_0_ip_funcs = {
 	.soft_reset = gmc_v9_0_soft_reset,
 	.soft_reset = gmc_v9_0_soft_reset,
 	.set_clockgating_state = gmc_v9_0_set_clockgating_state,
 	.set_clockgating_state = gmc_v9_0_set_clockgating_state,
 	.set_powergating_state = gmc_v9_0_set_powergating_state,
 	.set_powergating_state = gmc_v9_0_set_powergating_state,
+	.get_clockgating_state = gmc_v9_0_get_clockgating_state,
 };
 };
 
 
 const struct amdgpu_ip_block_version gmc_v9_0_ip_block =
 const struct amdgpu_ip_block_version gmc_v9_0_ip_block =

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/iceland_ih.c

@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  *
  */
  */
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "amdgpu_ih.h"
 #include "vid.h"
 #include "vid.h"

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/kv_dpm.c

@@ -21,7 +21,7 @@
  *
  *
  */
  */
 
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_pm.h"
 #include "cikd.h"
 #include "cikd.h"

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/kv_smc.c

@@ -22,7 +22,7 @@
  * Authors: Alex Deucher
  * Authors: Alex Deucher
  */
  */
 
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "cikd.h"
 #include "cikd.h"
 #include "kv_dpm.h"
 #include "kv_dpm.h"

+ 225 - 297
drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c

@@ -34,9 +34,12 @@
 
 
 #include "soc15_common.h"
 #include "soc15_common.h"
 
 
+#define mmDAGB0_CNTL_MISC2_RV 0x008f
+#define mmDAGB0_CNTL_MISC2_RV_BASE_IDX 0
+
 u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
 u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
 {
 {
-	u64 base = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE));
+	u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE);
 
 
 	base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
 	base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
 	base <<= 24;
 	base <<= 24;
@@ -44,184 +47,160 @@ u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
 	return base;
 	return base;
 }
 }
 
 
-int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
+static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
 {
 {
-	u32 tmp;
-	u64 value;
-	uint64_t addr;
-	u32 i;
+	uint64_t value;
 
 
-	/* Program MC. */
-	/* Update configuration */
-	DRM_INFO("%s -- in\n", __func__);
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR),
-		adev->mc.vram_start >> 18);
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR),
-		adev->mc.vram_end >> 18);
-	value = adev->vram_scratch.gpu_addr - adev->mc.vram_start +
+	BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
+	value = adev->gart.table_addr - adev->mc.vram_start +
 		adev->vm_manager.vram_base_offset;
 		adev->vm_manager.vram_base_offset;
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-				mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB),
-				(u32)(value >> 12));
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-				mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB),
-				(u32)(value >> 44));
+	value &= 0x0000FFFFFFFFF000ULL;
+	value |= 0x1; /* valid bit */
 
 
-	if (amdgpu_sriov_vf(adev)) {
-		/* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are VF copy registers so
-		vbios post doesn't program them, for SRIOV driver need to program them */
-		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE),
-			adev->mc.vram_start >> 24);
-		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP),
-			adev->mc.vram_end >> 24);
-	}
+	WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+		     lower_32_bits(value));
+
+	WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+		     upper_32_bits(value));
+}
+
+static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+	mmhub_v1_0_init_gart_pt_regs(adev);
+
+	WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+		     (u32)(adev->mc.gtt_start >> 12));
+	WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+		     (u32)(adev->mc.gtt_start >> 44));
+
+	WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+		     (u32)(adev->mc.gtt_end >> 12));
+	WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+		     (u32)(adev->mc.gtt_end >> 44));
+}
+
+static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+	uint64_t value;
+	uint32_t tmp;
 
 
 	/* Disable AGP. */
 	/* Disable AGP. */
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_BASE), 0);
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_TOP), 0);
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_BOT), 0x00FFFFFF);
+	WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BASE, 0);
+	WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_TOP, 0);
+	WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BOT, 0x00FFFFFF);
 
 
-	/* GART Enable. */
+	/* Program the system aperture low logical page number. */
+	WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+		     adev->mc.vram_start >> 18);
+	WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+		     adev->mc.vram_end >> 18);
+
+	/* Set default page address. */
+	value = adev->vram_scratch.gpu_addr - adev->mc.vram_start +
+		adev->vm_manager.vram_base_offset;
+	WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+		     (u32)(value >> 12));
+	WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+		     (u32)(value >> 44));
+
+	/* Program "protection fault". */
+	WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+		     (u32)(adev->dummy_page.addr >> 12));
+	WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+		     (u32)((u64)adev->dummy_page.addr >> 44));
+
+	tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2);
+	tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
+			    ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+	WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+	uint32_t tmp;
 
 
 	/* Setup TLB control */
 	/* Setup TLB control */
-	tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL));
+	tmp = RREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL);
+
 	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
 	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
-	tmp = REG_SET_FIELD(tmp,
-				MC_VM_MX_L1_TLB_CNTL,
-				SYSTEM_ACCESS_MODE,
-				3);
-	tmp = REG_SET_FIELD(tmp,
-				MC_VM_MX_L1_TLB_CNTL,
-				ENABLE_ADVANCED_DRIVER_MODEL,
-				1);
-	tmp = REG_SET_FIELD(tmp,
-				MC_VM_MX_L1_TLB_CNTL,
-				SYSTEM_APERTURE_UNMAPPED_ACCESS,
-				0);
-	tmp = REG_SET_FIELD(tmp,
-				MC_VM_MX_L1_TLB_CNTL,
-				ECO_BITS,
-				0);
-	tmp = REG_SET_FIELD(tmp,
-				MC_VM_MX_L1_TLB_CNTL,
-				MTYPE,
-				MTYPE_UC);/* XXX for emulation. */
-	tmp = REG_SET_FIELD(tmp,
-				MC_VM_MX_L1_TLB_CNTL,
-				ATC_EN,
-				1);
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp);
+	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+			    ENABLE_ADVANCED_DRIVER_MODEL, 1);
+	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+			    SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+			    MTYPE, MTYPE_UC);/* XXX for emulation. */
+	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+	WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
+{
+	uint32_t tmp;
 
 
 	/* Setup L2 cache */
 	/* Setup L2 cache */
-	tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL));
+	tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
-	tmp = REG_SET_FIELD(tmp,
-				VM_L2_CNTL,
-				ENABLE_L2_FRAGMENT_PROCESSING,
-				0);
-	tmp = REG_SET_FIELD(tmp,
-				VM_L2_CNTL,
-				L2_PDE0_CACHE_TAG_GENERATION_MODE,
-				0);/* XXX for emulation, Refer to closed source code.*/
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+	/* XXX for emulation, Refer to closed source code.*/
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+			    0);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1);
-	tmp = REG_SET_FIELD(tmp,
-				VM_L2_CNTL,
-				CONTEXT1_IDENTITY_ACCESS_MODE,
-				1);
-	tmp = REG_SET_FIELD(tmp,
-				VM_L2_CNTL,
-				IDENTITY_MODE_FRAGMENT_SIZE,
-				0);
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL), tmp);
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
 
 
-	tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL2));
+	tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL2), tmp);
+	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
 
 
 	tmp = mmVM_L2_CNTL3_DEFAULT;
 	tmp = mmVM_L2_CNTL3_DEFAULT;
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL3), tmp);
+	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
 
 
-	tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL4));
-	tmp = REG_SET_FIELD(tmp,
-			    VM_L2_CNTL4,
-			    VMC_TAP_PDE_REQUEST_PHYSICAL,
-			    0);
-	tmp = REG_SET_FIELD(tmp,
-			    VM_L2_CNTL4,
-			    VMC_TAP_PTE_REQUEST_PHYSICAL,
-			    0);
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL4), tmp);
-
-	/* setup context0 */
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-				mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32),
-		(u32)(adev->mc.gtt_start >> 12));
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-				mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32),
-		(u32)(adev->mc.gtt_start >> 44));
-
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-				mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32),
-		(u32)(adev->mc.gtt_end >> 12));
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-				mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32),
-		(u32)(adev->mc.gtt_end >> 44));
-
-	BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
-	value = adev->gart.table_addr - adev->mc.vram_start +
-		adev->vm_manager.vram_base_offset;
-	value &= 0x0000FFFFFFFFF000ULL;
-	value |= 0x1; /* valid bit */
-
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-				mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32),
-		(u32)value);
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-				mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32),
-		(u32)(value >> 32));
-
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-				mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32),
-		(u32)(adev->dummy_page.addr >> 12));
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-				mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32),
-		(u32)((u64)adev->dummy_page.addr >> 44));
-
-	tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2));
-	tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
-			    ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY,
-			    1);
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2), tmp);
+	tmp = mmVM_L2_CNTL4_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL4, tmp);
+}
 
 
-	addr = SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL);
-	tmp = RREG32(addr);
+static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
+{
+	uint32_t tmp;
 
 
+	tmp = RREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL), tmp);
-
-	tmp = RREG32(addr);
-
-	/* Disable identity aperture.*/
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-		mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32), 0XFFFFFFFF);
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-		mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32), 0x0000000F);
+	WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL, tmp);
+}
 
 
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-		mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32), 0);
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-		mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32), 0);
+static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+	WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+		     0XFFFFFFFF);
+	WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+		     0x0000000F);
+
+	WREG32_SOC15(MMHUB, 0,
+		     mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+	WREG32_SOC15(MMHUB, 0,
+		     mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+	WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+		     0);
+	WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+		     0);
+}
 
 
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-		mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32), 0);
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-		mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32), 0);
+static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+	int i;
+	uint32_t tmp;
 
 
 	for (i = 0; i <= 14; i++) {
 	for (i = 0; i <= 14; i++) {
-		tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL)
-				+ i);
+		tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
 				ENABLE_CONTEXT, 1);
 				ENABLE_CONTEXT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
@@ -243,14 +222,52 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
 				PAGE_TABLE_BLOCK_SIZE,
 				PAGE_TABLE_BLOCK_SIZE,
 				adev->vm_manager.block_size - 9);
 				adev->vm_manager.block_size - 9);
-		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL) + i, tmp);
-		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0);
-		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0);
-		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2,
+		WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i, tmp);
+		WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
+		WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
+		WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2,
 			lower_32_bits(adev->vm_manager.max_pfn - 1));
 			lower_32_bits(adev->vm_manager.max_pfn - 1));
-		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2,
+		WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2,
 			upper_32_bits(adev->vm_manager.max_pfn - 1));
 			upper_32_bits(adev->vm_manager.max_pfn - 1));
 	}
 	}
+}
+
+static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev)
+{
+	unsigned i;
+
+	for (i = 0; i < 18; ++i) {
+		WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+				    2 * i, 0xffffffff);
+		WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+				    2 * i, 0x1f);
+	}
+}
+
+int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
+{
+	if (amdgpu_sriov_vf(adev)) {
+		/*
+		 * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
+		 * VF copy registers so vbios post doesn't program them, for
+		 * SRIOV driver need to program them
+		 */
+		WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE,
+			     adev->mc.vram_start >> 24);
+		WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP,
+			     adev->mc.vram_end >> 24);
+	}
+
+	/* GART Enable. */
+	mmhub_v1_0_init_gart_aperture_regs(adev);
+	mmhub_v1_0_init_system_aperture_regs(adev);
+	mmhub_v1_0_init_tlb_regs(adev);
+	mmhub_v1_0_init_cache_regs(adev);
+
+	mmhub_v1_0_enable_system_domain(adev);
+	mmhub_v1_0_disable_identity_aperture(adev);
+	mmhub_v1_0_setup_vmid_config(adev);
+	mmhub_v1_0_program_invalidation(adev);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -262,22 +279,22 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
 
 
 	/* Disable all tables */
 	/* Disable all tables */
 	for (i = 0; i < 16; i++)
 	for (i = 0; i < 16; i++)
-		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL) + i, 0);
+		WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL, i, 0);
 
 
 	/* Setup TLB control */
 	/* Setup TLB control */
-	tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL));
+	tmp = RREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL);
 	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
 	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
 	tmp = REG_SET_FIELD(tmp,
 	tmp = REG_SET_FIELD(tmp,
 				MC_VM_MX_L1_TLB_CNTL,
 				MC_VM_MX_L1_TLB_CNTL,
 				ENABLE_ADVANCED_DRIVER_MODEL,
 				ENABLE_ADVANCED_DRIVER_MODEL,
 				0);
 				0);
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp);
+	WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
 
 
 	/* Setup L2 cache */
 	/* Setup L2 cache */
-	tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL));
+	tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL), tmp);
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL3), 0);
+	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
+	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0);
 }
 }
 
 
 /**
 /**
@@ -289,7 +306,7 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
 void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
 void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
 {
 {
 	u32 tmp;
 	u32 tmp;
-	tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL));
+	tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
 	tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
 	tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
 			RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
 			RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
 	tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
 	tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
@@ -314,22 +331,11 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
 			WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
 			WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
 	tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
 	tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
 			EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
 			EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
-	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp);
+	WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp);
 }
 }
 
 
-static int mmhub_v1_0_early_init(void *handle)
+void mmhub_v1_0_init(struct amdgpu_device *adev)
 {
 {
-	return 0;
-}
-
-static int mmhub_v1_0_late_init(void *handle)
-{
-	return 0;
-}
-
-static int mmhub_v1_0_sw_init(void *handle)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB];
 	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB];
 
 
 	hub->ctx0_ptb_addr_lo32 =
 	hub->ctx0_ptb_addr_lo32 =
@@ -349,69 +355,20 @@ static int mmhub_v1_0_sw_init(void *handle)
 	hub->vm_l2_pro_fault_cntl =
 	hub->vm_l2_pro_fault_cntl =
 		SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
 		SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
 
 
-	return 0;
-}
-
-static int mmhub_v1_0_sw_fini(void *handle)
-{
-	return 0;
-}
-
-static int mmhub_v1_0_hw_init(void *handle)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	unsigned i;
-
-	for (i = 0; i < 18; ++i) {
-		WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-					mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32) +
-		       2 * i, 0xffffffff);
-		WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-					mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32) +
-		       2 * i, 0x1f);
-	}
-
-	return 0;
-}
-
-static int mmhub_v1_0_hw_fini(void *handle)
-{
-	return 0;
-}
-
-static int mmhub_v1_0_suspend(void *handle)
-{
-	return 0;
-}
-
-static int mmhub_v1_0_resume(void *handle)
-{
-	return 0;
-}
-
-static bool mmhub_v1_0_is_idle(void *handle)
-{
-	return true;
-}
-
-static int mmhub_v1_0_wait_for_idle(void *handle)
-{
-	return 0;
-}
-
-static int mmhub_v1_0_soft_reset(void *handle)
-{
-	return 0;
 }
 }
 
 
 static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 							bool enable)
 							bool enable)
 {
 {
-	uint32_t def, data, def1, data1, def2, data2;
+	uint32_t def, data, def1, data1, def2 = 0, data2 = 0;
 
 
-	def  = data  = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG));
-	def1 = data1 = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB0_CNTL_MISC2));
-	def2 = data2 = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_CNTL_MISC2));
+	def  = data  = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG);
+
+	if (adev->asic_type != CHIP_RAVEN) {
+		def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
+		def2 = data2 = RREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2);
+	} else
+		def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_RV);
 
 
 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
 		data |= ATC_L2_MISC_CG__ENABLE_MASK;
 		data |= ATC_L2_MISC_CG__ENABLE_MASK;
@@ -423,12 +380,13 @@ static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
 		           DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
 		           DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
 		           DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
 		           DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
 
 
-		data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
-		           DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
-		           DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
-		           DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
-		           DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
-		           DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+		if (adev->asic_type != CHIP_RAVEN)
+			data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+			           DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+			           DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+			           DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+			           DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+			           DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
 	} else {
 	} else {
 		data &= ~ATC_L2_MISC_CG__ENABLE_MASK;
 		data &= ~ATC_L2_MISC_CG__ENABLE_MASK;
 
 
@@ -439,22 +397,27 @@ static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
 			  DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
 			  DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
 			  DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
 			  DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
 
 
-		data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
-		          DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
-		          DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
-		          DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
-		          DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
-		          DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+		if (adev->asic_type != CHIP_RAVEN)
+			data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+			          DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+			          DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+			          DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+			          DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+			          DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
 	}
 	}
 
 
 	if (def != data)
 	if (def != data)
-		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG), data);
+		WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data);
 
 
-	if (def1 != data1)
-		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB0_CNTL_MISC2), data1);
+	if (def1 != data1) {
+		if (adev->asic_type != CHIP_RAVEN)
+			WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1);
+		else
+			WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_RV, data1);
+	}
 
 
-	if (def2 != data2)
-		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_CNTL_MISC2), data2);
+	if (adev->asic_type != CHIP_RAVEN && def2 != data2)
+		WREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2, data2);
 }
 }
 
 
 static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev,
@@ -462,7 +425,7 @@ static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 {
 {
 	uint32_t def, data;
 	uint32_t def, data;
 
 
-	def = data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL));
+	def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
 
 
 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
 		data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
 		data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
@@ -470,7 +433,7 @@ static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 		data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
 		data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
 
 
 	if (def != data)
 	if (def != data)
-		WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL), data);
+		WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
 }
 }
 
 
 static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
 static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
@@ -478,7 +441,7 @@ static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
 {
 {
 	uint32_t def, data;
 	uint32_t def, data;
 
 
-	def = data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG));
+	def = data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG);
 
 
 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
 		data |= ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
 		data |= ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
@@ -486,7 +449,7 @@ static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
 		data &= ~ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
 		data &= ~ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
 
 
 	if (def != data)
 	if (def != data)
-		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG), data);
+		WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data);
 }
 }
 
 
 static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev,
 static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev,
@@ -494,7 +457,7 @@ static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev,
 {
 {
 	uint32_t def, data;
 	uint32_t def, data;
 
 
-	def = data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL));
+	def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
 
 
 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
 	    (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
 	    (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
@@ -503,19 +466,18 @@ static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev,
 		data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
 		data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
 
 
 	if(def != data)
 	if(def != data)
-		WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL), data);
+		WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
 }
 }
 
 
-static int mmhub_v1_0_set_clockgating_state(void *handle,
-					enum amd_clockgating_state state)
+int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
+			       enum amd_clockgating_state state)
 {
 {
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
 	if (amdgpu_sriov_vf(adev))
 	if (amdgpu_sriov_vf(adev))
 		return 0;
 		return 0;
 
 
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
+	case CHIP_RAVEN:
 		mmhub_v1_0_update_medium_grain_clock_gating(adev,
 		mmhub_v1_0_update_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
 				state == AMD_CG_STATE_GATE ? true : false);
 		athub_update_medium_grain_clock_gating(adev,
 		athub_update_medium_grain_clock_gating(adev,
@@ -532,54 +494,20 @@ static int mmhub_v1_0_set_clockgating_state(void *handle,
 	return 0;
 	return 0;
 }
 }
 
 
-static void mmhub_v1_0_get_clockgating_state(void *handle, u32 *flags)
+void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
 {
 {
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int data;
 	int data;
 
 
 	if (amdgpu_sriov_vf(adev))
 	if (amdgpu_sriov_vf(adev))
 		*flags = 0;
 		*flags = 0;
 
 
 	/* AMD_CG_SUPPORT_MC_MGCG */
 	/* AMD_CG_SUPPORT_MC_MGCG */
-	data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL));
+	data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
 	if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
 	if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
 		*flags |= AMD_CG_SUPPORT_MC_MGCG;
 		*flags |= AMD_CG_SUPPORT_MC_MGCG;
 
 
 	/* AMD_CG_SUPPORT_MC_LS */
 	/* AMD_CG_SUPPORT_MC_LS */
-	data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG));
+	data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG);
 	if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
 	if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
 		*flags |= AMD_CG_SUPPORT_MC_LS;
 		*flags |= AMD_CG_SUPPORT_MC_LS;
 }
 }
-
-static int mmhub_v1_0_set_powergating_state(void *handle,
-					enum amd_powergating_state state)
-{
-	return 0;
-}
-
-const struct amd_ip_funcs mmhub_v1_0_ip_funcs = {
-	.name = "mmhub_v1_0",
-	.early_init = mmhub_v1_0_early_init,
-	.late_init = mmhub_v1_0_late_init,
-	.sw_init = mmhub_v1_0_sw_init,
-	.sw_fini = mmhub_v1_0_sw_fini,
-	.hw_init = mmhub_v1_0_hw_init,
-	.hw_fini = mmhub_v1_0_hw_fini,
-	.suspend = mmhub_v1_0_suspend,
-	.resume = mmhub_v1_0_resume,
-	.is_idle = mmhub_v1_0_is_idle,
-	.wait_for_idle = mmhub_v1_0_wait_for_idle,
-	.soft_reset = mmhub_v1_0_soft_reset,
-	.set_clockgating_state = mmhub_v1_0_set_clockgating_state,
-	.set_powergating_state = mmhub_v1_0_set_powergating_state,
-	.get_clockgating_state = mmhub_v1_0_get_clockgating_state,
-};
-
-const struct amdgpu_ip_block_version mmhub_v1_0_ip_block =
-{
-	.type = AMD_IP_BLOCK_TYPE_MMHUB,
-	.major = 1,
-	.minor = 0,
-	.rev = 0,
-	.funcs = &mmhub_v1_0_ip_funcs,
-};

+ 4 - 0
drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h

@@ -28,6 +28,10 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev);
 void mmhub_v1_0_gart_disable(struct amdgpu_device *adev);
 void mmhub_v1_0_gart_disable(struct amdgpu_device *adev);
 void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
 void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
 					 bool value);
 					 bool value);
+void mmhub_v1_0_init(struct amdgpu_device *adev);
+int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
+			       enum amd_clockgating_state state);
+void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
 
 
 extern const struct amd_ip_funcs mmhub_v1_0_ip_funcs;
 extern const struct amd_ip_funcs mmhub_v1_0_ip_funcs;
 extern const struct amdgpu_ip_block_version mmhub_v1_0_ip_block;
 extern const struct amdgpu_ip_block_version mmhub_v1_0_ip_block;

+ 20 - 15
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c

@@ -124,8 +124,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
 			r = -ETIME;
 			r = -ETIME;
 			break;
 			break;
 		}
 		}
-		msleep(1);
-		timeout -= 1;
+		mdelay(5);
+		timeout -= 5;
 
 
 		reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
 		reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
 						     mmBIF_BX_PF0_MAILBOX_CONTROL));
 						     mmBIF_BX_PF0_MAILBOX_CONTROL));
@@ -141,12 +141,12 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
 	r = xgpu_ai_mailbox_rcv_msg(adev, event);
 	r = xgpu_ai_mailbox_rcv_msg(adev, event);
 	while (r) {
 	while (r) {
 		if (timeout <= 0) {
 		if (timeout <= 0) {
-			pr_err("Doesn't get ack from pf.\n");
+			pr_err("Doesn't get msg:%d from pf.\n", event);
 			r = -ETIME;
 			r = -ETIME;
 			break;
 			break;
 		}
 		}
-		msleep(1);
-		timeout -= 1;
+		mdelay(5);
+		timeout -= 5;
 
 
 		r = xgpu_ai_mailbox_rcv_msg(adev, event);
 		r = xgpu_ai_mailbox_rcv_msg(adev, event);
 	}
 	}
@@ -165,7 +165,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
 	/* start to poll ack */
 	/* start to poll ack */
 	r = xgpu_ai_poll_ack(adev);
 	r = xgpu_ai_poll_ack(adev);
 	if (r)
 	if (r)
-		return r;
+		pr_err("Doesn't get ack from pf, continue\n");
 
 
 	xgpu_ai_mailbox_set_valid(adev, false);
 	xgpu_ai_mailbox_set_valid(adev, false);
 
 
@@ -174,8 +174,10 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
 		req == IDH_REQ_GPU_FINI_ACCESS ||
 		req == IDH_REQ_GPU_FINI_ACCESS ||
 		req == IDH_REQ_GPU_RESET_ACCESS) {
 		req == IDH_REQ_GPU_RESET_ACCESS) {
 		r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
 		r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
-		if (r)
+		if (r) {
+			pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
 			return r;
 			return r;
+		}
 	}
 	}
 
 
 	return 0;
 	return 0;
@@ -241,7 +243,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
 	}
 	}
 
 
 	/* Trigger recovery due to world switch failure */
 	/* Trigger recovery due to world switch failure */
-	amdgpu_sriov_gpu_reset(adev, false);
+	amdgpu_sriov_gpu_reset(adev, NULL);
 }
 }
 
 
 static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
 static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -264,12 +266,15 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
 {
 {
 	int r;
 	int r;
 
 
-	/* see what event we get */
-	r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
+	/* trigger gpu-reset by hypervisor only if TDR disbaled */
+	if (amdgpu_lockup_timeout == 0) {
+		/* see what event we get */
+		r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
 
 
-	/* only handle FLR_NOTIFY now */
-	if (!r)
-		schedule_work(&adev->virt.flr_work);
+		/* only handle FLR_NOTIFY now */
+		if (!r)
+			schedule_work(&adev->virt.flr_work);
+	}
 
 
 	return 0;
 	return 0;
 }
 }
@@ -296,11 +301,11 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev)
 {
 {
 	int r;
 	int r;
 
 
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 135, &adev->virt.rcv_irq);
+	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 138, &adev->virt.ack_irq);
+	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq);
 	if (r) {
 	if (r) {
 		amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
 		amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
 		return r;
 		return r;

+ 14 - 11
drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c

@@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev)
 			r = -ETIME;
 			r = -ETIME;
 			break;
 			break;
 		}
 		}
-		msleep(1);
-		timeout -= 1;
+		mdelay(5);
+		timeout -= 5;
 
 
 		reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
 		reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
 	}
 	}
@@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device *adev, enum idh_event event)
 			r = -ETIME;
 			r = -ETIME;
 			break;
 			break;
 		}
 		}
-		msleep(1);
-		timeout -= 1;
+		mdelay(5);
+		timeout -= 5;
 
 
 		r = xgpu_vi_mailbox_rcv_msg(adev, event);
 		r = xgpu_vi_mailbox_rcv_msg(adev, event);
 	}
 	}
@@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev,
 		request == IDH_REQ_GPU_RESET_ACCESS) {
 		request == IDH_REQ_GPU_RESET_ACCESS) {
 		r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
 		r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
 		if (r)
 		if (r)
-			return r;
+			pr_err("Doesn't get ack from pf, continue\n");
 	}
 	}
 
 
 	return 0;
 	return 0;
@@ -514,7 +514,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
 	}
 	}
 
 
 	/* Trigger recovery due to world switch failure */
 	/* Trigger recovery due to world switch failure */
-	amdgpu_sriov_gpu_reset(adev, false);
+	amdgpu_sriov_gpu_reset(adev, NULL);
 }
 }
 
 
 static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
 static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -537,12 +537,15 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev,
 {
 {
 	int r;
 	int r;
 
 
-	/* see what event we get */
-	r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
+	/* trigger gpu-reset by hypervisor only if TDR disbaled */
+	if (amdgpu_lockup_timeout == 0) {
+		/* see what event we get */
+		r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
 
 
-	/* only handle FLR_NOTIFY now */
-	if (!r)
-		schedule_work(&adev->virt.flr_work);
+		/* only handle FLR_NOTIFY now */
+		if (!r)
+			schedule_work(&adev->virt.flr_work);
+	}
 
 
 	return 0;
 	return 0;
 }
 }

部分文件因为文件数量过多而无法显示