Browse Source

Merge airlied/drm-next into drm-misc-next

Refresh -misc-next

Signed-off-by: Sean Paul <seanpaul@chromium.org>
Sean Paul 7 years ago
parent
commit
1c7095d283
100 changed files with 5624 additions and 3728 deletions
  1. 58 0
      Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
  2. 1 0
      Documentation/devicetree/bindings/display/connector/dvi-connector.txt
  3. 20 6
      Documentation/devicetree/bindings/display/msm/dsi.txt
  4. 15 20
      Documentation/devicetree/bindings/display/renesas,du.txt
  5. 2 2
      Documentation/devicetree/overlay-notes.txt
  6. 7 0
      Documentation/gpu/todo.rst
  7. 3 0
      MAINTAINERS
  8. 1 1
      arch/x86/kernel/devicetree.c
  9. 1 0
      drivers/dma-buf/dma-fence.c
  10. 4 4
      drivers/gpu/drm/amd/amdgpu/Makefile
  11. 36 115
      drivers/gpu/drm/amd/amdgpu/amdgpu.h
  12. 3 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
  13. 119 41
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
  14. 110 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
  15. 179 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
  16. 67 13
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
  17. 68 14
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
  18. 1506 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
  19. 91 4
      drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
  20. 1 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
  21. 1 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
  22. 4 4
      drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
  23. 4 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
  24. 5 426
      drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
  25. 18 23
      drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
  26. 11 11
      drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
  27. 12 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
  28. 166 123
      drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
  29. 72 55
      drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
  30. 4 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
  31. 20 15
      drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
  32. 24 11
      drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
  33. 13 10
      drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
  34. 27 27
      drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
  35. 0 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
  36. 9 10
      drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
  37. 112 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
  38. 2 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
  39. 42 16
      drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
  40. 283 129
      drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
  41. 6 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
  42. 3 42
      drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
  43. 6 52
      drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
  44. 1 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
  45. 68 30
      drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
  46. 25 54
      drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
  47. 52 60
      drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
  48. 7 9
      drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
  49. 392 174
      drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
  50. 0 290
      drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
  51. 133 29
      drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
  52. 14 35
      drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
  53. 26 17
      drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
  54. 4 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
  55. 7 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
  56. 11 51
      drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
  57. 54 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
  58. 1 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
  59. 11 12
      drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
  60. 30 8
      drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
  61. 206 91
      drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
  62. 8 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
  63. 50 78
      drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
  64. 2 4
      drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
  65. 2 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
  66. 18 69
      drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
  67. 78 17
      drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
  68. 3 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
  69. 212 114
      drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
  70. 2 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
  71. 3 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
  72. 2 2
      drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
  73. 5 0
      drivers/gpu/drm/amd/amdgpu/atombios_encoders.h
  74. 31 275
      drivers/gpu/drm/amd/amdgpu/ci_dpm.c
  75. 0 7
      drivers/gpu/drm/amd/amdgpu/ci_dpm.h
  76. 33 9
      drivers/gpu/drm/amd/amdgpu/cik.c
  77. 2 0
      drivers/gpu/drm/amd/amdgpu/cik.h
  78. 3 4
      drivers/gpu/drm/amd/amdgpu/cik_dpm.h
  79. 2 2
      drivers/gpu/drm/amd/amdgpu/cik_ih.c
  80. 14 27
      drivers/gpu/drm/amd/amdgpu/cik_sdma.c
  81. 2 2
      drivers/gpu/drm/amd/amdgpu/cz_ih.c
  82. 18 70
      drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
  83. 19 70
      drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
  84. 20 69
      drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
  85. 19 70
      drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
  86. 6 19
      drivers/gpu/drm/amd/amdgpu/dce_virtual.c
  87. 11 4
      drivers/gpu/drm/amd/amdgpu/emu_soc.c
  88. 20 58
      drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
  89. 23 75
      drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
  90. 31 55
      drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
  91. 210 99
      drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
  92. 14 14
      drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
  93. 76 61
      drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
  94. 104 83
      drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
  95. 118 97
      drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
  96. 114 68
      drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
  97. 2 2
      drivers/gpu/drm/amd/amdgpu/iceland_ih.c
  98. 14 3
      drivers/gpu/drm/amd/amdgpu/kv_dpm.c
  99. 50 42
      drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
  100. 105 95
      drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c

+ 58 - 0
Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt

@@ -0,0 +1,58 @@
+Renesas R-Car LVDS Encoder
+==========================
+
+These DT bindings describe the LVDS encoder embedded in the Renesas R-Car
+Gen2, R-Car Gen3 and RZ/G SoCs.
+
+Required properties:
+
+- compatible : Shall contain one of
+  - "renesas,r8a7743-lvds" for R8A7743 (RZ/G1M) compatible LVDS encoders
+  - "renesas,r8a7790-lvds" for R8A7790 (R-Car H2) compatible LVDS encoders
+  - "renesas,r8a7791-lvds" for R8A7791 (R-Car M2-W) compatible LVDS encoders
+  - "renesas,r8a7793-lvds" for R8A7793 (R-Car M2-N) compatible LVDS encoders
+  - "renesas,r8a7795-lvds" for R8A7795 (R-Car H3) compatible LVDS encoders
+  - "renesas,r8a7796-lvds" for R8A7796 (R-Car M3-W) compatible LVDS encoders
+  - "renesas,r8a77970-lvds" for R8A77970 (R-Car V3M) compatible LVDS encoders
+  - "renesas,r8a77995-lvds" for R8A77995 (R-Car D3) compatible LVDS encoders
+
+- reg: Base address and length for the memory-mapped registers
+- clocks: A phandle + clock-specifier pair for the functional clock
+- resets: A phandle + reset specifier for the module reset
+
+Required nodes:
+
+The LVDS encoder has two video ports. Their connections are modelled using the
+OF graph bindings specified in Documentation/devicetree/bindings/graph.txt.
+
+- Video port 0 corresponds to the parallel RGB input
+- Video port 1 corresponds to the LVDS output
+
+Each port shall have a single endpoint.
+
+
+Example:
+
+	lvds0: lvds@feb90000 {
+		compatible = "renesas,r8a7790-lvds";
+		reg = <0 0xfeb90000 0 0x1c>;
+		clocks = <&cpg CPG_MOD 726>;
+		resets = <&cpg 726>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
+				reg = <0>;
+				lvds0_in: endpoint {
+					remote-endpoint = <&du_out_lvds0>;
+				};
+			};
+			port@1 {
+				reg = <1>;
+				lvds0_out: endpoint {
+				};
+			};
+		};
+	};

+ 1 - 0
Documentation/devicetree/bindings/display/connector/dvi-connector.txt

@@ -10,6 +10,7 @@ Optional properties:
 - analog: the connector has DVI analog pins
 - analog: the connector has DVI analog pins
 - digital: the connector has DVI digital pins
 - digital: the connector has DVI digital pins
 - dual-link: the connector has pins for DVI dual-link
 - dual-link: the connector has pins for DVI dual-link
+- hpd-gpios: HPD GPIO number
 
 
 Required nodes:
 Required nodes:
 - Video port for DVI input
 - Video port for DVI input

+ 20 - 6
Documentation/devicetree/bindings/display/msm/dsi.txt

@@ -7,8 +7,6 @@ Required properties:
 - reg: Physical base address and length of the registers of controller
 - reg: Physical base address and length of the registers of controller
 - reg-names: The names of register regions. The following regions are required:
 - reg-names: The names of register regions. The following regions are required:
   * "dsi_ctrl"
   * "dsi_ctrl"
-- qcom,dsi-host-index: The ID of DSI controller hardware instance. This should
-  be 0 or 1, since we have 2 DSI controllers at most for now.
 - interrupts: The interrupt signal from the DSI block.
 - interrupts: The interrupt signal from the DSI block.
 - power-domains: Should be <&mmcc MDSS_GDSC>.
 - power-domains: Should be <&mmcc MDSS_GDSC>.
 - clocks: Phandles to device clocks.
 - clocks: Phandles to device clocks.
@@ -22,6 +20,8 @@ Required properties:
   * "core"
   * "core"
   For DSIv2, we need an additional clock:
   For DSIv2, we need an additional clock:
    * "src"
    * "src"
+  For DSI6G v2.0 onwards, we need also need the clock:
+   * "byte_intf"
 - assigned-clocks: Parents of "byte" and "pixel" for the given platform.
 - assigned-clocks: Parents of "byte" and "pixel" for the given platform.
 - assigned-clock-parents: The Byte clock and Pixel clock PLL outputs provided
 - assigned-clock-parents: The Byte clock and Pixel clock PLL outputs provided
   by a DSI PHY block. See [1] for details on clock bindings.
   by a DSI PHY block. See [1] for details on clock bindings.
@@ -88,21 +88,35 @@ Required properties:
   * "qcom,dsi-phy-28nm-lp"
   * "qcom,dsi-phy-28nm-lp"
   * "qcom,dsi-phy-20nm"
   * "qcom,dsi-phy-20nm"
   * "qcom,dsi-phy-28nm-8960"
   * "qcom,dsi-phy-28nm-8960"
-- reg: Physical base address and length of the registers of PLL, PHY and PHY
-  regulator
+  * "qcom,dsi-phy-14nm"
+  * "qcom,dsi-phy-10nm"
+- reg: Physical base address and length of the registers of PLL, PHY. Some
+  revisions require the PHY regulator base address, whereas others require the
+  PHY lane base address. See below for each PHY revision.
 - reg-names: The names of register regions. The following regions are required:
 - reg-names: The names of register regions. The following regions are required:
+  For DSI 28nm HPM/LP/8960 PHYs and 20nm PHY:
   * "dsi_pll"
   * "dsi_pll"
   * "dsi_phy"
   * "dsi_phy"
   * "dsi_phy_regulator"
   * "dsi_phy_regulator"
+  For DSI 14nm and 10nm PHYs:
+  * "dsi_pll"
+  * "dsi_phy"
+  * "dsi_phy_lane"
 - clock-cells: Must be 1. The DSI PHY block acts as a clock provider, creating
 - clock-cells: Must be 1. The DSI PHY block acts as a clock provider, creating
   2 clocks: A byte clock (index 0), and a pixel clock (index 1).
   2 clocks: A byte clock (index 0), and a pixel clock (index 1).
-- qcom,dsi-phy-index: The ID of DSI PHY hardware instance. This should
-  be 0 or 1, since we have 2 DSI PHYs at most for now.
 - power-domains: Should be <&mmcc MDSS_GDSC>.
 - power-domains: Should be <&mmcc MDSS_GDSC>.
 - clocks: Phandles to device clocks. See [1] for details on clock bindings.
 - clocks: Phandles to device clocks. See [1] for details on clock bindings.
 - clock-names: the following clocks are required:
 - clock-names: the following clocks are required:
   * "iface"
   * "iface"
+  For 28nm HPM/LP, 28nm 8960 PHYs:
+- vddio-supply: phandle to vdd-io regulator device node
+  For 20nm PHY:
 - vddio-supply: phandle to vdd-io regulator device node
 - vddio-supply: phandle to vdd-io regulator device node
+- vcca-supply: phandle to vcca regulator device node
+  For 14nm PHY:
+- vcca-supply: phandle to vcca regulator device node
+  For 10nm PHY:
+- vdds-supply: phandle to vdds regulator device node
 
 
 Optional properties:
 Optional properties:
 - qcom,dsi-phy-regulator-ldo-mode: Boolean value indicating if the LDO mode PHY
 - qcom,dsi-phy-regulator-ldo-mode: Boolean value indicating if the LDO mode PHY

+ 15 - 20
Documentation/devicetree/bindings/display/renesas,du.txt

@@ -13,13 +13,10 @@ Required Properties:
     - "renesas,du-r8a7794" for R8A7794 (R-Car E2) compatible DU
     - "renesas,du-r8a7794" for R8A7794 (R-Car E2) compatible DU
     - "renesas,du-r8a7795" for R8A7795 (R-Car H3) compatible DU
     - "renesas,du-r8a7795" for R8A7795 (R-Car H3) compatible DU
     - "renesas,du-r8a7796" for R8A7796 (R-Car M3-W) compatible DU
     - "renesas,du-r8a7796" for R8A7796 (R-Car M3-W) compatible DU
+    - "renesas,du-r8a77970" for R8A77970 (R-Car V3M) compatible DU
+    - "renesas,du-r8a77995" for R8A77995 (R-Car D3) compatible DU
 
 
-  - reg: A list of base address and length of each memory resource, one for
-    each entry in the reg-names property.
-  - reg-names: Name of the memory resources. The DU requires one memory
-    resource for the DU core (named "du") and one memory resource for each
-    LVDS encoder (named "lvds.x" with "x" being the LVDS controller numerical
-    index).
+  - reg: the memory-mapped I/O registers base address and length
 
 
   - interrupt-parent: phandle of the parent interrupt controller.
   - interrupt-parent: phandle of the parent interrupt controller.
   - interrupts: Interrupt specifiers for the DU interrupts.
   - interrupts: Interrupt specifiers for the DU interrupts.
@@ -29,14 +26,13 @@ Required Properties:
   - clock-names: Name of the clocks. This property is model-dependent.
   - clock-names: Name of the clocks. This property is model-dependent.
     - R8A7779 uses a single functional clock. The clock doesn't need to be
     - R8A7779 uses a single functional clock. The clock doesn't need to be
       named.
       named.
-    - All other DU instances use one functional clock per channel and one
-      clock per LVDS encoder (if available). The functional clocks must be
-      named "du.x" with "x" being the channel numerical index. The LVDS clocks
-      must be named "lvds.x" with "x" being the LVDS encoder numerical index.
-    - In addition to the functional and encoder clocks, all DU versions also
-      support externally supplied pixel clocks. Those clocks are optional.
-      When supplied they must be named "dclkin.x" with "x" being the input
-      clock numerical index.
+    - All other DU instances use one functional clock per channel The
+      functional clocks must be named "du.x" with "x" being the channel
+      numerical index.
+    - In addition to the functional clocks, all DU versions also support
+      externally supplied pixel clocks. Those clocks are optional. When
+      supplied they must be named "dclkin.x" with "x" being the input clock
+      numerical index.
 
 
   - vsps: A list of phandle and channel index tuples to the VSPs that handle
   - vsps: A list of phandle and channel index tuples to the VSPs that handle
     the memory interfaces for the DU channels. The phandle identifies the VSP
     the memory interfaces for the DU channels. The phandle identifies the VSP
@@ -63,15 +59,15 @@ corresponding to each DU output.
  R8A7794 (R-Car E2)   DPAD 0         DPAD 1         -              -
  R8A7794 (R-Car E2)   DPAD 0         DPAD 1         -              -
  R8A7795 (R-Car H3)   DPAD 0         HDMI 0         HDMI 1         LVDS 0
  R8A7795 (R-Car H3)   DPAD 0         HDMI 0         HDMI 1         LVDS 0
  R8A7796 (R-Car M3-W) DPAD 0         HDMI 0         LVDS 0         -
  R8A7796 (R-Car M3-W) DPAD 0         HDMI 0         LVDS 0         -
+ R8A77970 (R-Car V3M) DPAD 0         LVDS 0         -              -
+ R8A77995 (R-Car D3)  DPAD 0         LVDS 0         LVDS 1         -
 
 
 
 
 Example: R8A7795 (R-Car H3) ES2.0 DU
 Example: R8A7795 (R-Car H3) ES2.0 DU
 
 
 	du: display@feb00000 {
 	du: display@feb00000 {
 		compatible = "renesas,du-r8a7795";
 		compatible = "renesas,du-r8a7795";
-		reg = <0 0xfeb00000 0 0x80000>,
-		      <0 0xfeb90000 0 0x14>;
-		reg-names = "du", "lvds.0";
+		reg = <0 0xfeb00000 0 0x80000>;
 		interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>,
 		interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 269 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 269 IRQ_TYPE_LEVEL_HIGH>,
@@ -79,9 +75,8 @@ Example: R8A7795 (R-Car H3) ES2.0 DU
 		clocks = <&cpg CPG_MOD 724>,
 		clocks = <&cpg CPG_MOD 724>,
 			 <&cpg CPG_MOD 723>,
 			 <&cpg CPG_MOD 723>,
 			 <&cpg CPG_MOD 722>,
 			 <&cpg CPG_MOD 722>,
-			 <&cpg CPG_MOD 721>,
-			 <&cpg CPG_MOD 727>;
-		clock-names = "du.0", "du.1", "du.2", "du.3", "lvds.0";
+			 <&cpg CPG_MOD 721>;
+		clock-names = "du.0", "du.1", "du.2", "du.3";
 		vsps = <&vspd0 0>, <&vspd1 0>, <&vspd2 0>, <&vspd0 1>;
 		vsps = <&vspd0 0>, <&vspd1 0>, <&vspd2 0>, <&vspd0 1>;
 
 
 		ports {
 		ports {

+ 2 - 2
Documentation/devicetree/overlay-notes.txt

@@ -87,8 +87,8 @@ Overlay in-kernel API
 
 
 The API is quite easy to use.
 The API is quite easy to use.
 
 
-1. Call of_overlay_apply() to create and apply an overlay changeset. The return
-value is an error or a cookie identifying this overlay.
+1. Call of_overlay_fdt_apply() to create and apply an overlay changeset. The
+return value is an error or a cookie identifying this overlay.
 
 
 2. Call of_overlay_remove() to remove and cleanup the overlay changeset
 2. Call of_overlay_remove() to remove and cleanup the overlay changeset
 previously created via the call to of_overlay_apply(). Removal of an overlay
 previously created via the call to of_overlay_apply(). Removal of an overlay

+ 7 - 0
Documentation/gpu/todo.rst

@@ -450,5 +450,12 @@ See drivers/gpu/drm/amd/display/TODO for tasks.
 
 
 Contact: Harry Wentland, Alex Deucher
 Contact: Harry Wentland, Alex Deucher
 
 
+i915
+----
+
+- Our early/late pm callbacks could be removed in favour of using
+  device_link_add to model the dependency between i915 and snd_had. See
+  https://dri.freedesktop.org/docs/drm/driver-api/device_link.html
+
 Outside DRM
 Outside DRM
 ===========
 ===========

+ 3 - 0
MAINTAINERS

@@ -766,6 +766,8 @@ F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
 F:	drivers/gpu/drm/amd/amdkfd/
 F:	drivers/gpu/drm/amd/amdkfd/
 F:	drivers/gpu/drm/amd/include/cik_structs.h
 F:	drivers/gpu/drm/amd/include/cik_structs.h
 F:	drivers/gpu/drm/amd/include/kgd_kfd_interface.h
 F:	drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -4744,6 +4746,7 @@ F:	drivers/gpu/drm/rcar-du/
 F:	drivers/gpu/drm/shmobile/
 F:	drivers/gpu/drm/shmobile/
 F:	include/linux/platform_data/shmob_drm.h
 F:	include/linux/platform_data/shmob_drm.h
 F:	Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt
 F:	Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt
+F:	Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
 F:	Documentation/devicetree/bindings/display/renesas,du.txt
 F:	Documentation/devicetree/bindings/display/renesas,du.txt
 
 
 DRM DRIVERS FOR ROCKCHIP
 DRM DRIVERS FOR ROCKCHIP

+ 1 - 1
arch/x86/kernel/devicetree.c

@@ -259,7 +259,7 @@ static void __init dtb_apic_setup(void)
 	dtb_ioapic_setup();
 	dtb_ioapic_setup();
 }
 }
 
 
-#ifdef CONFIG_OF_FLATTREE
+#ifdef CONFIG_OF_EARLY_FLATTREE
 static void __init x86_flattree_get_config(void)
 static void __init x86_flattree_get_config(void)
 {
 {
 	u32 size, map_len;
 	u32 size, map_len;

+ 1 - 0
drivers/dma-buf/dma-fence.c

@@ -171,6 +171,7 @@ void dma_fence_release(struct kref *kref)
 
 
 	trace_dma_fence_destroy(fence);
 	trace_dma_fence_destroy(fence);
 
 
+	/* Failed to signal before release, could be a refcounting issue */
 	WARN_ON(!list_empty(&fence->cb_list));
 	WARN_ON(!list_empty(&fence->cb_list));
 
 
 	if (fence->ops->release)
 	if (fence->ops->release)

+ 4 - 4
drivers/gpu/drm/amd/amdgpu/Makefile

@@ -30,7 +30,6 @@ FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME)
 ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
 ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
 	-I$(FULL_AMD_PATH)/include \
 	-I$(FULL_AMD_PATH)/include \
 	-I$(FULL_AMD_PATH)/amdgpu \
 	-I$(FULL_AMD_PATH)/amdgpu \
-	-I$(FULL_AMD_PATH)/scheduler \
 	-I$(FULL_AMD_PATH)/powerplay/inc \
 	-I$(FULL_AMD_PATH)/powerplay/inc \
 	-I$(FULL_AMD_PATH)/acp/include \
 	-I$(FULL_AMD_PATH)/acp/include \
 	-I$(FULL_AMD_DISPLAY_PATH) \
 	-I$(FULL_AMD_DISPLAY_PATH) \
@@ -63,7 +62,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
 amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
 amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
 
 
 amdgpu-y += \
 amdgpu-y += \
-	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o
+	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o
 
 
 # add GMC block
 # add GMC block
 amdgpu-y += \
 amdgpu-y += \
@@ -88,8 +87,7 @@ amdgpu-y += \
 
 
 # add SMC block
 # add SMC block
 amdgpu-y += \
 amdgpu-y += \
-	amdgpu_dpm.o \
-	amdgpu_powerplay.o
+	amdgpu_dpm.o
 
 
 # add DCE block
 # add DCE block
 amdgpu-y += \
 amdgpu-y += \
@@ -130,6 +128,8 @@ amdgpu-y += \
 # add amdkfd interfaces
 # add amdkfd interfaces
 amdgpu-y += \
 amdgpu-y += \
 	 amdgpu_amdkfd.o \
 	 amdgpu_amdkfd.o \
+	 amdgpu_amdkfd_fence.o \
+	 amdgpu_amdkfd_gpuvm.o \
 	 amdgpu_amdkfd_gfx_v8.o
 	 amdgpu_amdkfd_gfx_v8.o
 
 
 # add cgs
 # add cgs

+ 36 - 115
drivers/gpu/drm/amd/amdgpu/amdgpu.h

@@ -68,6 +68,7 @@
 #include "amdgpu_vce.h"
 #include "amdgpu_vce.h"
 #include "amdgpu_vcn.h"
 #include "amdgpu_vcn.h"
 #include "amdgpu_mn.h"
 #include "amdgpu_mn.h"
+#include "amdgpu_gmc.h"
 #include "amdgpu_dm.h"
 #include "amdgpu_dm.h"
 #include "amdgpu_virt.h"
 #include "amdgpu_virt.h"
 #include "amdgpu_gart.h"
 #include "amdgpu_gart.h"
@@ -127,6 +128,7 @@ extern int amdgpu_job_hang_limit;
 extern int amdgpu_lbpw;
 extern int amdgpu_lbpw;
 extern int amdgpu_compute_multipipe;
 extern int amdgpu_compute_multipipe;
 extern int amdgpu_gpu_recovery;
 extern int amdgpu_gpu_recovery;
+extern int amdgpu_emu_mode;
 
 
 #ifdef CONFIG_DRM_AMDGPU_SI
 #ifdef CONFIG_DRM_AMDGPU_SI
 extern int amdgpu_si_support;
 extern int amdgpu_si_support;
@@ -179,10 +181,6 @@ extern int amdgpu_cik_support;
 #define CIK_CURSOR_WIDTH 128
 #define CIK_CURSOR_WIDTH 128
 #define CIK_CURSOR_HEIGHT 128
 #define CIK_CURSOR_HEIGHT 128
 
 
-/* GPU RESET flags */
-#define AMDGPU_RESET_INFO_VRAM_LOST  (1 << 0)
-#define AMDGPU_RESET_INFO_FULLRESET  (1 << 1)
-
 struct amdgpu_device;
 struct amdgpu_device;
 struct amdgpu_ib;
 struct amdgpu_ib;
 struct amdgpu_cs_parser;
 struct amdgpu_cs_parser;
@@ -318,13 +316,6 @@ struct amdgpu_vm_pte_funcs {
 	void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
 	void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
 			  uint64_t value, unsigned count,
 			  uint64_t value, unsigned count,
 			  uint32_t incr);
 			  uint32_t incr);
-
-	/* maximum nums of PTEs/PDEs in a single operation */
-	uint32_t	set_max_nums_pte_pde;
-
-	/* number of dw to reserve per operation */
-	unsigned	set_pte_pde_num_dw;
-
 	/* for linear pte/pde updates without addr mapping */
 	/* for linear pte/pde updates without addr mapping */
 	void (*set_pte_pde)(struct amdgpu_ib *ib,
 	void (*set_pte_pde)(struct amdgpu_ib *ib,
 			    uint64_t pe,
 			    uint64_t pe,
@@ -332,28 +323,6 @@ struct amdgpu_vm_pte_funcs {
 			    uint32_t incr, uint64_t flags);
 			    uint32_t incr, uint64_t flags);
 };
 };
 
 
-/* provided by the gmc block */
-struct amdgpu_gart_funcs {
-	/* flush the vm tlb via mmio */
-	void (*flush_gpu_tlb)(struct amdgpu_device *adev,
-			      uint32_t vmid);
-	/* write pte/pde updates using the cpu */
-	int (*set_pte_pde)(struct amdgpu_device *adev,
-			   void *cpu_pt_addr, /* cpu addr of page table */
-			   uint32_t gpu_page_idx, /* pte/pde to update */
-			   uint64_t addr, /* addr to write into pte/pde */
-			   uint64_t flags); /* access flags */
-	/* enable/disable PRT support */
-	void (*set_prt)(struct amdgpu_device *adev, bool enable);
-	/* set pte flags based per asic */
-	uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
-				     uint32_t flags);
-	/* get the pde for a given mc addr */
-	void (*get_vm_pde)(struct amdgpu_device *adev, int level,
-			   u64 *dst, u64 *flags);
-	uint32_t (*get_invalidate_req)(unsigned int vmid);
-};
-
 /* provided by the ih block */
 /* provided by the ih block */
 struct amdgpu_ih_funcs {
 struct amdgpu_ih_funcs {
 	/* ring read/write ptr handling, called from interrupt context */
 	/* ring read/write ptr handling, called from interrupt context */
@@ -370,14 +339,6 @@ struct amdgpu_ih_funcs {
 bool amdgpu_get_bios(struct amdgpu_device *adev);
 bool amdgpu_get_bios(struct amdgpu_device *adev);
 bool amdgpu_read_bios(struct amdgpu_device *adev);
 bool amdgpu_read_bios(struct amdgpu_device *adev);
 
 
-/*
- * Dummy page
- */
-struct amdgpu_dummy_page {
-	struct page	*page;
-	dma_addr_t	addr;
-};
-
 /*
 /*
  * Clocks
  * Clocks
  */
  */
@@ -418,8 +379,8 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 					struct drm_gem_object *gobj,
 					struct drm_gem_object *gobj,
 					int flags);
 					int flags);
-int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
-void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+					    struct dma_buf *dma_buf);
 struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
 struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
 void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
 void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
 void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
 void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
@@ -480,7 +441,7 @@ struct amdgpu_sa_bo {
 void amdgpu_gem_force_release(struct amdgpu_device *adev);
 void amdgpu_gem_force_release(struct amdgpu_device *adev);
 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 			     int alignment, u32 initial_domain,
 			     int alignment, u32 initial_domain,
-			     u64 flags, bool kernel,
+			     u64 flags, enum ttm_bo_type type,
 			     struct reservation_object *resv,
 			     struct reservation_object *resv,
 			     struct drm_gem_object **obj);
 			     struct drm_gem_object **obj);
 
 
@@ -493,56 +454,6 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp,
 int amdgpu_fence_slab_init(void);
 int amdgpu_fence_slab_init(void);
 void amdgpu_fence_slab_fini(void);
 void amdgpu_fence_slab_fini(void);
 
 
-/*
- * VMHUB structures, functions & helpers
- */
-struct amdgpu_vmhub {
-	uint32_t	ctx0_ptb_addr_lo32;
-	uint32_t	ctx0_ptb_addr_hi32;
-	uint32_t	vm_inv_eng0_req;
-	uint32_t	vm_inv_eng0_ack;
-	uint32_t	vm_context0_cntl;
-	uint32_t	vm_l2_pro_fault_status;
-	uint32_t	vm_l2_pro_fault_cntl;
-};
-
-/*
- * GPU MC structures, functions & helpers
- */
-struct amdgpu_mc {
-	resource_size_t		aper_size;
-	resource_size_t		aper_base;
-	resource_size_t		agp_base;
-	/* for some chips with <= 32MB we need to lie
-	 * about vram size near mc fb location */
-	u64			mc_vram_size;
-	u64			visible_vram_size;
-	u64			gart_size;
-	u64			gart_start;
-	u64			gart_end;
-	u64			vram_start;
-	u64			vram_end;
-	unsigned		vram_width;
-	u64			real_vram_size;
-	int			vram_mtrr;
-	u64                     mc_mask;
-	const struct firmware   *fw;	/* MC firmware */
-	uint32_t                fw_version;
-	struct amdgpu_irq_src	vm_fault;
-	uint32_t		vram_type;
-	uint32_t                srbm_soft_reset;
-	bool			prt_warning;
-	uint64_t		stolen_size;
-	/* apertures */
-	u64					shared_aperture_start;
-	u64					shared_aperture_end;
-	u64					private_aperture_start;
-	u64					private_aperture_end;
-	/* protects concurrent invalidation */
-	spinlock_t		invalidate_lock;
-	bool			translate_further;
-};
-
 /*
 /*
  * GPU doorbell structures, functions & helpers
  * GPU doorbell structures, functions & helpers
  */
  */
@@ -1125,8 +1036,9 @@ struct amdgpu_job {
 	void			*owner;
 	void			*owner;
 	uint64_t		fence_ctx; /* the fence_context this job uses */
 	uint64_t		fence_ctx; /* the fence_context this job uses */
 	bool                    vm_needs_flush;
 	bool                    vm_needs_flush;
-	unsigned		vmid;
 	uint64_t		vm_pd_addr;
 	uint64_t		vm_pd_addr;
+	unsigned		vmid;
+	unsigned		pasid;
 	uint32_t		gds_base, gds_size;
 	uint32_t		gds_base, gds_size;
 	uint32_t		gws_base, gws_size;
 	uint32_t		gws_base, gws_size;
 	uint32_t		oa_base, oa_size;
 	uint32_t		oa_base, oa_size;
@@ -1156,7 +1068,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
 /*
 /*
  * Writeback
  * Writeback
  */
  */
-#define AMDGPU_MAX_WB 512	/* Reserve at most 512 WB slots for amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 128	/* Reserve at most 128 WB slots for amdgpu-owned rings. */
 
 
 struct amdgpu_wb {
 struct amdgpu_wb {
 	struct amdgpu_bo	*wb_obj;
 	struct amdgpu_bo	*wb_obj;
@@ -1169,8 +1081,6 @@ struct amdgpu_wb {
 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb);
 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb);
 
 
-void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
-
 /*
 /*
  * SDMA
  * SDMA
  */
  */
@@ -1288,6 +1198,11 @@ struct amdgpu_asic_funcs {
 	void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes);
 	void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes);
 	/* get config memsize register */
 	/* get config memsize register */
 	u32 (*get_config_memsize)(struct amdgpu_device *adev);
 	u32 (*get_config_memsize)(struct amdgpu_device *adev);
+	/* flush hdp write queue */
+	void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+	/* invalidate hdp read cache */
+	void (*invalidate_hdp)(struct amdgpu_device *adev,
+			       struct amdgpu_ring *ring);
 };
 };
 
 
 /*
 /*
@@ -1431,7 +1346,7 @@ struct amdgpu_nbio_funcs {
 	u32 (*get_pcie_data_offset)(struct amdgpu_device *adev);
 	u32 (*get_pcie_data_offset)(struct amdgpu_device *adev);
 	u32 (*get_rev_id)(struct amdgpu_device *adev);
 	u32 (*get_rev_id)(struct amdgpu_device *adev);
 	void (*mc_access_enable)(struct amdgpu_device *adev, bool enable);
 	void (*mc_access_enable)(struct amdgpu_device *adev, bool enable);
-	void (*hdp_flush)(struct amdgpu_device *adev);
+	void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
 	u32 (*get_memsize)(struct amdgpu_device *adev);
 	u32 (*get_memsize)(struct amdgpu_device *adev);
 	void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
 	void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
 				    bool use_doorbell, int doorbell_index);
 				    bool use_doorbell, int doorbell_index);
@@ -1478,9 +1393,7 @@ enum amd_hw_ip_block_type {
 #define HWIP_MAX_INSTANCE	6
 #define HWIP_MAX_INSTANCE	6
 
 
 struct amd_powerplay {
 struct amd_powerplay {
-	struct cgs_device *cgs_device;
 	void *pp_handle;
 	void *pp_handle;
-	const struct amd_ip_funcs *ip_funcs;
 	const struct amd_pm_funcs *pp_funcs;
 	const struct amd_pm_funcs *pp_funcs;
 };
 };
 
 
@@ -1574,9 +1487,9 @@ struct amdgpu_device {
 	struct amdgpu_clock            clock;
 	struct amdgpu_clock            clock;
 
 
 	/* MC */
 	/* MC */
-	struct amdgpu_mc		mc;
+	struct amdgpu_gmc		gmc;
 	struct amdgpu_gart		gart;
 	struct amdgpu_gart		gart;
-	struct amdgpu_dummy_page	dummy_page;
+	dma_addr_t			dummy_page_addr;
 	struct amdgpu_vm_manager	vm_manager;
 	struct amdgpu_vm_manager	vm_manager;
 	struct amdgpu_vmhub             vmhub[AMDGPU_MAX_VMHUBS];
 	struct amdgpu_vmhub             vmhub[AMDGPU_MAX_VMHUBS];
 
 
@@ -1715,6 +1628,9 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
 			uint32_t acc_flags);
 			uint32_t acc_flags);
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 		    uint32_t acc_flags);
 		    uint32_t acc_flags);
+void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
+uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
+
 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg);
 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg);
 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);
 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);
 
 
@@ -1726,6 +1642,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
 
 
+int emu_soc_asic_init(struct amdgpu_device *adev);
+
 /*
 /*
  * Registers read & write functions.
  * Registers read & write functions.
  */
  */
@@ -1736,6 +1654,9 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
 #define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
 #define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
 #define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
 #define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
 
 
+#define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
+#define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
+
 #define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0)
 #define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0)
 #define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX)
 #define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX)
 #define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0))
 #define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0))
@@ -1838,13 +1759,17 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
 #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
 #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
 #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
 #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
 #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
-#define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
-#define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
-#define amdgpu_gart_get_vm_pde(adev, level, dst, flags) (adev)->gart.gart_funcs->get_vm_pde((adev), (level), (dst), (flags))
+#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
+#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
+#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
+#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
+#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
+#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
+#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
+#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags))
 #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
 #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
 #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
 #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
-#define amdgpu_vm_get_pte_flags(adev, flags) (adev)->gart.gart_funcs->get_vm_pte_flags((adev),(flags))
 #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
 #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
 #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
 #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
 #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
 #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
@@ -1857,11 +1782,11 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
 #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
 #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
 #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
 #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
 #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
-#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
 #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
 #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
 #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
 #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
 #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
 #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
+#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
 #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
 #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
@@ -1871,7 +1796,6 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
 #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
 #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
 #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
 #define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
 #define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
-#define amdgpu_display_vblank_wait(adev, crtc) (adev)->mode_info.funcs->vblank_wait((adev), (crtc))
 #define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
 #define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
 #define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
 #define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
 #define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h))
 #define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h))
@@ -1894,20 +1818,17 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 			      struct amdgpu_job* job, bool force);
 			      struct amdgpu_job* job, bool force);
 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
 bool amdgpu_device_need_post(struct amdgpu_device *adev);
 bool amdgpu_device_need_post(struct amdgpu_device *adev);
-void amdgpu_update_display_priority(struct amdgpu_device *adev);
+void amdgpu_display_update_priority(struct amdgpu_device *adev);
 
 
 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
 				  u64 num_vis_bytes);
 				  u64 num_vis_bytes);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
 void amdgpu_device_vram_location(struct amdgpu_device *adev,
 void amdgpu_device_vram_location(struct amdgpu_device *adev,
-				 struct amdgpu_mc *mc, u64 base);
+				 struct amdgpu_gmc *mc, u64 base);
 void amdgpu_device_gart_location(struct amdgpu_device *adev,
 void amdgpu_device_gart_location(struct amdgpu_device *adev,
-				 struct amdgpu_mc *mc);
+				 struct amdgpu_gmc *mc);
 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev);
 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev);
-void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size);
-int amdgpu_ttm_init(struct amdgpu_device *adev);
-void amdgpu_ttm_fini(struct amdgpu_device *adev);
 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 					     const u32 *registers,
 					     const u32 *registers,
 					     const u32 array_size);
 					     const u32 array_size);

+ 3 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c

@@ -540,6 +540,9 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
 	size_t size;
 	size_t size;
 	u32 retry = 3;
 	u32 retry = 3;
 
 
+	if (amdgpu_acpi_pcie_notify_device_ready(adev))
+		return -EINVAL;
+
 	/* Get the device handle */
 	/* Get the device handle */
 	handle = ACPI_HANDLE(&adev->pdev->dev);
 	handle = ACPI_HANDLE(&adev->pdev->dev);
 	if (!handle)
 	if (!handle)

+ 119 - 41
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

@@ -30,6 +30,8 @@
 const struct kgd2kfd_calls *kgd2kfd;
 const struct kgd2kfd_calls *kgd2kfd;
 bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
 bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
 
 
+static const unsigned int compute_vmid_bitmap = 0xFF00;
+
 int amdgpu_amdkfd_init(void)
 int amdgpu_amdkfd_init(void)
 {
 {
 	int ret;
 	int ret;
@@ -56,6 +58,7 @@ int amdgpu_amdkfd_init(void)
 #else
 #else
 	ret = -ENOENT;
 	ret = -ENOENT;
 #endif
 #endif
+	amdgpu_amdkfd_gpuvm_init_mem_limits();
 
 
 	return ret;
 	return ret;
 }
 }
@@ -78,10 +81,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
 #ifdef CONFIG_DRM_AMDGPU_CIK
 #ifdef CONFIG_DRM_AMDGPU_CIK
 	case CHIP_KAVERI:
 	case CHIP_KAVERI:
+	case CHIP_HAWAII:
 		kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
 		kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
 		break;
 		break;
 #endif
 #endif
 	case CHIP_CARRIZO:
 	case CHIP_CARRIZO:
+	case CHIP_TONGA:
+	case CHIP_FIJI:
+	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
 		kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
 		kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
 		break;
 		break;
 	default:
 	default:
@@ -132,9 +140,13 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 	int last_valid_bit;
 	int last_valid_bit;
 	if (adev->kfd) {
 	if (adev->kfd) {
 		struct kgd2kfd_shared_resources gpu_resources = {
 		struct kgd2kfd_shared_resources gpu_resources = {
-			.compute_vmid_bitmap = 0xFF00,
+			.compute_vmid_bitmap = compute_vmid_bitmap,
 			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
 			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
-			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
+			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
+			.gpuvm_size = min(adev->vm_manager.max_pfn
+					  << AMDGPU_GPU_PAGE_SHIFT,
+					  AMDGPU_VA_HOLE_START),
+			.drm_render_minor = adev->ddev->render->index
 		};
 		};
 
 
 		/* this is going to have a few of the MSBs set that we need to
 		/* this is going to have a few of the MSBs set that we need to
@@ -204,20 +216,14 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 			void **cpu_ptr)
 			void **cpu_ptr)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-	struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
+	struct amdgpu_bo *bo = NULL;
 	int r;
 	int r;
+	uint64_t gpu_addr_tmp = 0;
+	void *cpu_ptr_tmp = NULL;
 
 
-	BUG_ON(kgd == NULL);
-	BUG_ON(gpu_addr == NULL);
-	BUG_ON(cpu_ptr == NULL);
-
-	*mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
-	if ((*mem) == NULL)
-		return -ENOMEM;
-
-	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
-			     AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, 0,
-			     &(*mem)->bo);
+	r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+			     AMDGPU_GEM_CREATE_CPU_GTT_USWC, ttm_bo_type_kernel,
+			     NULL, &bo);
 	if (r) {
 	if (r) {
 		dev_err(adev->dev,
 		dev_err(adev->dev,
 			"failed to allocate BO for amdkfd (%d)\n", r);
 			"failed to allocate BO for amdkfd (%d)\n", r);
@@ -225,54 +231,53 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 	}
 	}
 
 
 	/* map the buffer */
 	/* map the buffer */
-	r = amdgpu_bo_reserve((*mem)->bo, true);
+	r = amdgpu_bo_reserve(bo, true);
 	if (r) {
 	if (r) {
 		dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
 		dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
 		goto allocate_mem_reserve_bo_failed;
 		goto allocate_mem_reserve_bo_failed;
 	}
 	}
 
 
-	r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
-				&(*mem)->gpu_addr);
+	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT,
+				&gpu_addr_tmp);
 	if (r) {
 	if (r) {
 		dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
 		dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
 		goto allocate_mem_pin_bo_failed;
 		goto allocate_mem_pin_bo_failed;
 	}
 	}
-	*gpu_addr = (*mem)->gpu_addr;
 
 
-	r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
+	r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
 	if (r) {
 	if (r) {
 		dev_err(adev->dev,
 		dev_err(adev->dev,
 			"(%d) failed to map bo to kernel for amdkfd\n", r);
 			"(%d) failed to map bo to kernel for amdkfd\n", r);
 		goto allocate_mem_kmap_bo_failed;
 		goto allocate_mem_kmap_bo_failed;
 	}
 	}
-	*cpu_ptr = (*mem)->cpu_ptr;
 
 
-	amdgpu_bo_unreserve((*mem)->bo);
+	*mem_obj = bo;
+	*gpu_addr = gpu_addr_tmp;
+	*cpu_ptr = cpu_ptr_tmp;
+
+	amdgpu_bo_unreserve(bo);
 
 
 	return 0;
 	return 0;
 
 
 allocate_mem_kmap_bo_failed:
 allocate_mem_kmap_bo_failed:
-	amdgpu_bo_unpin((*mem)->bo);
+	amdgpu_bo_unpin(bo);
 allocate_mem_pin_bo_failed:
 allocate_mem_pin_bo_failed:
-	amdgpu_bo_unreserve((*mem)->bo);
+	amdgpu_bo_unreserve(bo);
 allocate_mem_reserve_bo_failed:
 allocate_mem_reserve_bo_failed:
-	amdgpu_bo_unref(&(*mem)->bo);
+	amdgpu_bo_unref(&bo);
 
 
 	return r;
 	return r;
 }
 }
 
 
 void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
 void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
 {
 {
-	struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
-
-	BUG_ON(mem == NULL);
+	struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
 
 
-	amdgpu_bo_reserve(mem->bo, true);
-	amdgpu_bo_kunmap(mem->bo);
-	amdgpu_bo_unpin(mem->bo);
-	amdgpu_bo_unreserve(mem->bo);
-	amdgpu_bo_unref(&(mem->bo));
-	kfree(mem);
+	amdgpu_bo_reserve(bo, true);
+	amdgpu_bo_kunmap(bo);
+	amdgpu_bo_unpin(bo);
+	amdgpu_bo_unreserve(bo);
+	amdgpu_bo_unref(&(bo));
 }
 }
 
 
 void get_local_mem_info(struct kgd_dev *kgd,
 void get_local_mem_info(struct kgd_dev *kgd,
@@ -281,24 +286,29 @@ void get_local_mem_info(struct kgd_dev *kgd,
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 	uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask :
 	uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask :
 					     ~((1ULL << 32) - 1);
 					     ~((1ULL << 32) - 1);
-	resource_size_t aper_limit = adev->mc.aper_base + adev->mc.aper_size;
+	resource_size_t aper_limit = adev->gmc.aper_base + adev->gmc.aper_size;
 
 
 	memset(mem_info, 0, sizeof(*mem_info));
 	memset(mem_info, 0, sizeof(*mem_info));
-	if (!(adev->mc.aper_base & address_mask || aper_limit & address_mask)) {
-		mem_info->local_mem_size_public = adev->mc.visible_vram_size;
-		mem_info->local_mem_size_private = adev->mc.real_vram_size -
-				adev->mc.visible_vram_size;
+	if (!(adev->gmc.aper_base & address_mask || aper_limit & address_mask)) {
+		mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
+		mem_info->local_mem_size_private = adev->gmc.real_vram_size -
+				adev->gmc.visible_vram_size;
 	} else {
 	} else {
 		mem_info->local_mem_size_public = 0;
 		mem_info->local_mem_size_public = 0;
-		mem_info->local_mem_size_private = adev->mc.real_vram_size;
+		mem_info->local_mem_size_private = adev->gmc.real_vram_size;
 	}
 	}
-	mem_info->vram_width = adev->mc.vram_width;
+	mem_info->vram_width = adev->gmc.vram_width;
 
 
 	pr_debug("Address base: %pap limit %pap public 0x%llx private 0x%llx\n",
 	pr_debug("Address base: %pap limit %pap public 0x%llx private 0x%llx\n",
-			&adev->mc.aper_base, &aper_limit,
+			&adev->gmc.aper_base, &aper_limit,
 			mem_info->local_mem_size_public,
 			mem_info->local_mem_size_public,
 			mem_info->local_mem_size_private);
 			mem_info->local_mem_size_private);
 
 
+	if (amdgpu_emu_mode == 1) {
+		mem_info->mem_clk_max = 100;
+		return;
+	}
+
 	if (amdgpu_sriov_vf(adev))
 	if (amdgpu_sriov_vf(adev))
 		mem_info->mem_clk_max = adev->clock.default_mclk / 100;
 		mem_info->mem_clk_max = adev->clock.default_mclk / 100;
 	else
 	else
@@ -319,6 +329,9 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 
 
 	/* the sclk is in quantas of 10kHz */
 	/* the sclk is in quantas of 10kHz */
+	if (amdgpu_emu_mode == 1)
+		return 100;
+
 	if (amdgpu_sriov_vf(adev))
 	if (amdgpu_sriov_vf(adev))
 		return adev->clock.default_sclk / 100;
 		return adev->clock.default_sclk / 100;
 
 
@@ -354,3 +367,68 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
 
 
 	return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 	return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 }
 }
+
+int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
+				uint32_t vmid, uint64_t gpu_addr,
+				uint32_t *ib_cmd, uint32_t ib_len)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	struct amdgpu_ring *ring;
+	struct dma_fence *f = NULL;
+	int ret;
+
+	switch (engine) {
+	case KGD_ENGINE_MEC1:
+		ring = &adev->gfx.compute_ring[0];
+		break;
+	case KGD_ENGINE_SDMA1:
+		ring = &adev->sdma.instance[0].ring;
+		break;
+	case KGD_ENGINE_SDMA2:
+		ring = &adev->sdma.instance[1].ring;
+		break;
+	default:
+		pr_err("Invalid engine in IB submission: %d\n", engine);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = amdgpu_job_alloc(adev, 1, &job, NULL);
+	if (ret)
+		goto err;
+
+	ib = &job->ibs[0];
+	memset(ib, 0, sizeof(struct amdgpu_ib));
+
+	ib->gpu_addr = gpu_addr;
+	ib->ptr = ib_cmd;
+	ib->length_dw = ib_len;
+	/* This works for NO_HWS. TODO: need to handle without knowing VMID */
+	job->vmid = vmid;
+
+	ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
+	if (ret) {
+		DRM_ERROR("amdgpu: failed to schedule IB.\n");
+		goto err_ib_sched;
+	}
+
+	ret = dma_fence_wait(f, false);
+
+err_ib_sched:
+	dma_fence_put(f);
+	amdgpu_job_free(job);
+err:
+	return ret;
+}
+
+bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
+{
+	if (adev->kfd) {
+		if ((1 << vmid) & compute_vmid_bitmap)
+			return true;
+	}
+
+	return false;
+}

+ 110 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

@@ -28,13 +28,89 @@
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/mmu_context.h>
 #include <linux/mmu_context.h>
 #include <kgd_kfd_interface.h>
 #include <kgd_kfd_interface.h>
+#include <drm/ttm/ttm_execbuf_util.h>
+#include "amdgpu_sync.h"
+#include "amdgpu_vm.h"
+
+extern const struct kgd2kfd_calls *kgd2kfd;
 
 
 struct amdgpu_device;
 struct amdgpu_device;
 
 
+struct kfd_bo_va_list {
+	struct list_head bo_list;
+	struct amdgpu_bo_va *bo_va;
+	void *kgd_dev;
+	bool is_mapped;
+	uint64_t va;
+	uint64_t pte_flags;
+};
+
 struct kgd_mem {
 struct kgd_mem {
+	struct mutex lock;
 	struct amdgpu_bo *bo;
 	struct amdgpu_bo *bo;
-	uint64_t gpu_addr;
-	void *cpu_ptr;
+	struct list_head bo_va_list;
+	/* protected by amdkfd_process_info.lock */
+	struct ttm_validate_buffer validate_list;
+	struct ttm_validate_buffer resv_list;
+	uint32_t domain;
+	unsigned int mapped_to_gpu_memory;
+	uint64_t va;
+
+	uint32_t mapping_flags;
+
+	struct amdkfd_process_info *process_info;
+
+	struct amdgpu_sync sync;
+
+	bool aql_queue;
+};
+
+/* KFD Memory Eviction */
+struct amdgpu_amdkfd_fence {
+	struct dma_fence base;
+	struct mm_struct *mm;
+	spinlock_t lock;
+	char timeline_name[TASK_COMM_LEN];
+};
+
+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+						       struct mm_struct *mm);
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
+
+struct amdkfd_process_info {
+	/* List head of all VMs that belong to a KFD process */
+	struct list_head vm_list_head;
+	/* List head for all KFD BOs that belong to a KFD process. */
+	struct list_head kfd_bo_list;
+	/* Lock to protect kfd_bo_list */
+	struct mutex lock;
+
+	/* Number of VMs */
+	unsigned int n_vms;
+	/* Eviction Fence */
+	struct amdgpu_amdkfd_fence *eviction_fence;
+};
+
+/* struct amdkfd_vm -
+ * For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs
+ * belonging to a KFD process. All the VMs belonging to the same process point
+ * to the same amdkfd_process_info.
+ */
+struct amdkfd_vm {
+	/* Keep base as the first parameter for pointer compatibility between
+	 * amdkfd_vm and amdgpu_vm.
+	 */
+	struct amdgpu_vm base;
+
+	/* List node in amdkfd_process_info.vm_list_head*/
+	struct list_head vm_list_node;
+
+	struct amdgpu_device *adev;
+	/* Points to the KFD process VM info*/
+	struct amdkfd_process_info *process_info;
+
+	uint64_t pd_phys_addr;
 };
 };
 
 
 int amdgpu_amdkfd_init(void);
 int amdgpu_amdkfd_init(void);
@@ -48,9 +124,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
 
 
+int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
+				uint32_t vmid, uint64_t gpu_addr,
+				uint32_t *ib_cmd, uint32_t ib_len);
+
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
 
 
+bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
+
 /* Shared API */
 /* Shared API */
 int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 			void **mem_obj, uint64_t *gpu_addr,
 			void **mem_obj, uint64_t *gpu_addr,
@@ -79,4 +161,30 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
 		valid;							\
 		valid;							\
 	})
 	})
 
 
+/* GPUVM API */
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
+					  void **process_info,
+					  struct dma_fence **ef);
+void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
+uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
+int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
+		struct kgd_dev *kgd, uint64_t va, uint64_t size,
+		void *vm, struct kgd_mem **mem,
+		uint64_t *offset, uint32_t flags);
+int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
+		struct kgd_dev *kgd, struct kgd_mem *mem);
+int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
+		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
+int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
+		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
+int amdgpu_amdkfd_gpuvm_sync_memory(
+		struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
+		struct kgd_mem *mem, void **kptr, uint64_t *size);
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
+					    struct dma_fence **ef);
+
+void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
+void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
+
 #endif /* AMDGPU_AMDKFD_H_INCLUDED */
 #endif /* AMDGPU_AMDKFD_H_INCLUDED */

+ 179 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c

@@ -0,0 +1,179 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/stacktrace.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/sched/mm.h>
+#include "amdgpu_amdkfd.h"
+
+static const struct dma_fence_ops amdkfd_fence_ops;
+static atomic_t fence_seq = ATOMIC_INIT(0);
+
+/* Eviction Fence
+ * Fence helper functions to deal with KFD memory eviction.
+ * Big Idea - Since KFD submissions are done by user queues, a BO cannot be
+ *  evicted unless all the user queues for that process are evicted.
+ *
+ * All the BOs in a process share an eviction fence. When process X wants
+ * to map VRAM memory but TTM can't find enough space, TTM will attempt to
+ * evict BOs from its LRU list. TTM checks if the BO is valuable to evict
+ * by calling ttm_bo_driver->eviction_valuable().
+ *
+ * ttm_bo_driver->eviction_valuable() - will return false if the BO belongs
+ *  to process X. Otherwise, it will return true to indicate BO can be
+ *  evicted by TTM.
+ *
+ * If ttm_bo_driver->eviction_valuable returns true, then TTM will continue
+ * the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move
+ * --> amdgpu_copy_buffer(). This sets up job in GPU scheduler.
+ *
+ * GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to
+ *  nofity when the BO is free to move. fence_add_callback --> enable_signaling
+ *  --> amdgpu_amdkfd_fence.enable_signaling
+ *
+ * amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce
+ * user queues and signal fence. The work item will also start another delayed
+ * work item to restore BOs
+ */
+
+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+						       struct mm_struct *mm)
+{
+	struct amdgpu_amdkfd_fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (fence == NULL)
+		return NULL;
+
+	/* This reference gets released in amdkfd_fence_release */
+	mmgrab(mm);
+	fence->mm = mm;
+	get_task_comm(fence->timeline_name, current);
+	spin_lock_init(&fence->lock);
+
+	dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
+		   context, atomic_inc_return(&fence_seq));
+
+	return fence;
+}
+
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
+{
+	struct amdgpu_amdkfd_fence *fence;
+
+	if (!f)
+		return NULL;
+
+	fence = container_of(f, struct amdgpu_amdkfd_fence, base);
+	if (fence && f->ops == &amdkfd_fence_ops)
+		return fence;
+
+	return NULL;
+}
+
+static const char *amdkfd_fence_get_driver_name(struct dma_fence *f)
+{
+	return "amdgpu_amdkfd_fence";
+}
+
+static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
+{
+	struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+	return fence->timeline_name;
+}
+
+/**
+ * amdkfd_fence_enable_signaling - This gets called when TTM wants to evict
+ *  a KFD BO and schedules a job to move the BO.
+ *  If fence is already signaled return true.
+ *  If fence is not signaled schedule a evict KFD process work item.
+ */
+static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
+{
+	struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+	if (!fence)
+		return false;
+
+	if (dma_fence_is_signaled(f))
+		return true;
+
+	if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f))
+		return true;
+
+	return false;
+}
+
+/**
+ * amdkfd_fence_release - callback that fence can be freed
+ *
+ * @fence: fence
+ *
+ * This function is called when the reference count becomes zero.
+ * Drops the mm_struct reference and RCU schedules freeing up the fence.
+ */
+static void amdkfd_fence_release(struct dma_fence *f)
+{
+	struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+	/* Unconditionally signal the fence. The process is getting
+	 * terminated.
+	 */
+	if (WARN_ON(!fence))
+		return; /* Not an amdgpu_amdkfd_fence */
+
+	mmdrop(fence->mm);
+	kfree_rcu(f, rcu);
+}
+
+/**
+ * amdkfd_fence_check_mm - Check if @mm is same as that of the fence @f
+ *  if same return TRUE else return FALSE.
+ *
+ * @f: [IN] fence
+ * @mm: [IN] mm that needs to be verified
+ */
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
+{
+	struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+	if (!fence)
+		return false;
+	else if (fence->mm == mm)
+		return true;
+
+	return false;
+}
+
+static const struct dma_fence_ops amdkfd_fence_ops = {
+	.get_driver_name = amdkfd_fence_get_driver_name,
+	.get_timeline_name = amdkfd_fence_get_timeline_name,
+	.enable_signaling = amdkfd_fence_enable_signaling,
+	.signaled = NULL,
+	.wait = dma_fence_default_wait,
+	.release = amdkfd_fence_release,
+};

+ 67 - 13
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c

@@ -139,11 +139,14 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
 static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 							uint8_t vmid);
 							uint8_t vmid);
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
 
 
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 static void set_scratch_backing_va(struct kgd_dev *kgd,
 static void set_scratch_backing_va(struct kgd_dev *kgd,
 					uint64_t va, uint32_t vmid);
 					uint64_t va, uint32_t vmid);
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+		uint32_t page_table_base);
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
 
 
 /* Because of REG_GET_FIELD() being used, we put this function in the
 /* Because of REG_GET_FIELD() being used, we put this function in the
  * asic specific file.
  * asic specific file.
@@ -196,12 +199,25 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.address_watch_get_offset = kgd_address_watch_get_offset,
 	.address_watch_get_offset = kgd_address_watch_get_offset,
 	.get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
 	.get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
 	.get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
 	.get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
-	.write_vmid_invalidate_request = write_vmid_invalidate_request,
 	.get_fw_version = get_fw_version,
 	.get_fw_version = get_fw_version,
 	.set_scratch_backing_va = set_scratch_backing_va,
 	.set_scratch_backing_va = set_scratch_backing_va,
 	.get_tile_config = get_tile_config,
 	.get_tile_config = get_tile_config,
 	.get_cu_info = get_cu_info,
 	.get_cu_info = get_cu_info,
-	.get_vram_usage = amdgpu_amdkfd_get_vram_usage
+	.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
+	.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
+	.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+	.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
+	.set_vm_context_page_table_base = set_vm_context_page_table_base,
+	.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
+	.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
+	.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
+	.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
+	.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
+	.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
+	.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
+	.invalidate_tlbs = invalidate_tlbs,
+	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
+	.submit_ib = amdgpu_amdkfd_submit_ib,
 };
 };
 
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
@@ -787,14 +803,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 
 
 	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
-	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 }
 }
 
 
 static void set_scratch_backing_va(struct kgd_dev *kgd,
 static void set_scratch_backing_va(struct kgd_dev *kgd,
@@ -812,8 +821,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 	const union amdgpu_firmware_header *hdr;
 	const union amdgpu_firmware_header *hdr;
 
 
-	BUG_ON(kgd == NULL);
-
 	switch (type) {
 	switch (type) {
 	case KGD_ENGINE_PFP:
 	case KGD_ENGINE_PFP:
 		hdr = (const union amdgpu_firmware_header *)
 		hdr = (const union amdgpu_firmware_header *)
@@ -866,3 +873,50 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 	return hdr->common.ucode_version;
 	return hdr->common.ucode_version;
 }
 }
 
 
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+			uint32_t page_table_base)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("trying to set page table base for wrong VMID\n");
+		return;
+	}
+	WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
+}
+
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	int vmid;
+	unsigned int tmp;
+
+	for (vmid = 0; vmid < 16; vmid++) {
+		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
+			continue;
+
+		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+			RREG32(mmVM_INVALIDATE_RESPONSE);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("non kfd vmid\n");
+		return 0;
+	}
+
+	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+	RREG32(mmVM_INVALIDATE_RESPONSE);
+	return 0;
+}

+ 68 - 14
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c

@@ -81,7 +81,6 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 				uint32_t queue_id);
 				uint32_t queue_id);
 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 				unsigned int utimeout);
 				unsigned int utimeout);
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
 static int kgd_address_watch_disable(struct kgd_dev *kgd);
 static int kgd_address_watch_disable(struct kgd_dev *kgd);
 static int kgd_address_watch_execute(struct kgd_dev *kgd,
 static int kgd_address_watch_execute(struct kgd_dev *kgd,
 					unsigned int watch_point_id,
 					unsigned int watch_point_id,
@@ -99,10 +98,13 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
 		uint8_t vmid);
 		uint8_t vmid);
 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 		uint8_t vmid);
 		uint8_t vmid);
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 static void set_scratch_backing_va(struct kgd_dev *kgd,
 static void set_scratch_backing_va(struct kgd_dev *kgd,
 					uint64_t va, uint32_t vmid);
 					uint64_t va, uint32_t vmid);
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+		uint32_t page_table_base);
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
 
 
 /* Because of REG_GET_FIELD() being used, we put this function in the
 /* Because of REG_GET_FIELD() being used, we put this function in the
  * asic specific file.
  * asic specific file.
@@ -157,12 +159,25 @@ static const struct kfd2kgd_calls kfd2kgd = {
 			get_atc_vmid_pasid_mapping_pasid,
 			get_atc_vmid_pasid_mapping_pasid,
 	.get_atc_vmid_pasid_mapping_valid =
 	.get_atc_vmid_pasid_mapping_valid =
 			get_atc_vmid_pasid_mapping_valid,
 			get_atc_vmid_pasid_mapping_valid,
-	.write_vmid_invalidate_request = write_vmid_invalidate_request,
 	.get_fw_version = get_fw_version,
 	.get_fw_version = get_fw_version,
 	.set_scratch_backing_va = set_scratch_backing_va,
 	.set_scratch_backing_va = set_scratch_backing_va,
 	.get_tile_config = get_tile_config,
 	.get_tile_config = get_tile_config,
 	.get_cu_info = get_cu_info,
 	.get_cu_info = get_cu_info,
-	.get_vram_usage = amdgpu_amdkfd_get_vram_usage
+	.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
+	.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
+	.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+	.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
+	.set_vm_context_page_table_base = set_vm_context_page_table_base,
+	.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
+	.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
+	.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
+	.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
+	.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
+	.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
+	.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
+	.invalidate_tlbs = invalidate_tlbs,
+	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
+	.submit_ib = amdgpu_amdkfd_submit_ib,
 };
 };
 
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
@@ -704,14 +719,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 
 
 	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
-	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 }
 }
 
 
 static int kgd_address_watch_disable(struct kgd_dev *kgd)
 static int kgd_address_watch_disable(struct kgd_dev *kgd)
@@ -775,8 +783,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 	const union amdgpu_firmware_header *hdr;
 	const union amdgpu_firmware_header *hdr;
 
 
-	BUG_ON(kgd == NULL);
-
 	switch (type) {
 	switch (type) {
 	case KGD_ENGINE_PFP:
 	case KGD_ENGINE_PFP:
 		hdr = (const union amdgpu_firmware_header *)
 		hdr = (const union amdgpu_firmware_header *)
@@ -828,3 +834,51 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 	/* Only 12 bit in use*/
 	/* Only 12 bit in use*/
 	return hdr->common.ucode_version;
 	return hdr->common.ucode_version;
 }
 }
+
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+		uint32_t page_table_base)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("trying to set page table base for wrong VMID\n");
+		return;
+	}
+	WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
+}
+
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	int vmid;
+	unsigned int tmp;
+
+	for (vmid = 0; vmid < 16; vmid++) {
+		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
+			continue;
+
+		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+			RREG32(mmVM_INVALIDATE_RESPONSE);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("non kfd vmid %d\n", vmid);
+		return -EINVAL;
+	}
+
+	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+	RREG32(mmVM_INVALIDATE_RESPONSE);
+	return 0;
+}

+ 1506 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

@@ -0,0 +1,1506 @@
+/*
+ * Copyright 2014-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define pr_fmt(fmt) "kfd2kgd: " fmt
+
+#include <linux/list.h>
+#include <drm/drmP.h>
+#include "amdgpu_object.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_amdkfd.h"
+
+/* Special VM and GART address alignment needed for VI pre-Fiji due to
+ * a HW bug.
+ */
+#define VI_BO_SIZE_ALIGN (0x8000)
+
+/* Impose limit on how much memory KFD can use */
+static struct {
+	uint64_t max_system_mem_limit;
+	int64_t system_mem_used;
+	spinlock_t mem_limit_lock;
+} kfd_mem_limit;
+
+/* Struct used for amdgpu_amdkfd_bo_validate */
+struct amdgpu_vm_parser {
+	uint32_t        domain;
+	bool            wait;
+};
+
+static const char * const domain_bit_to_string[] = {
+		"CPU",
+		"GTT",
+		"VRAM",
+		"GDS",
+		"GWS",
+		"OA"
+};
+
+#define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
+
+
+
+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
+{
+	return (struct amdgpu_device *)kgd;
+}
+
+static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
+		struct kgd_mem *mem)
+{
+	struct kfd_bo_va_list *entry;
+
+	list_for_each_entry(entry, &mem->bo_va_list, bo_list)
+		if (entry->bo_va->base.vm == avm)
+			return false;
+
+	return true;
+}
+
+/* Set memory usage limits. Current, limits are
+ *  System (kernel) memory - 3/8th System RAM
+ */
+void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
+{
+	struct sysinfo si;
+	uint64_t mem;
+
+	si_meminfo(&si);
+	mem = si.totalram - si.totalhigh;
+	mem *= si.mem_unit;
+
+	spin_lock_init(&kfd_mem_limit.mem_limit_lock);
+	kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
+	pr_debug("Kernel memory limit %lluM\n",
+		(kfd_mem_limit.max_system_mem_limit >> 20));
+}
+
+static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
+					      uint64_t size, u32 domain)
+{
+	size_t acc_size;
+	int ret = 0;
+
+	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
+				       sizeof(struct amdgpu_bo));
+
+	spin_lock(&kfd_mem_limit.mem_limit_lock);
+	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
+		if (kfd_mem_limit.system_mem_used + (acc_size + size) >
+			kfd_mem_limit.max_system_mem_limit) {
+			ret = -ENOMEM;
+			goto err_no_mem;
+		}
+		kfd_mem_limit.system_mem_used += (acc_size + size);
+	}
+err_no_mem:
+	spin_unlock(&kfd_mem_limit.mem_limit_lock);
+	return ret;
+}
+
+static void unreserve_system_mem_limit(struct amdgpu_device *adev,
+				       uint64_t size, u32 domain)
+{
+	size_t acc_size;
+
+	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
+				       sizeof(struct amdgpu_bo));
+
+	spin_lock(&kfd_mem_limit.mem_limit_lock);
+	if (domain == AMDGPU_GEM_DOMAIN_GTT)
+		kfd_mem_limit.system_mem_used -= (acc_size + size);
+	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
+		  "kfd system memory accounting unbalanced");
+
+	spin_unlock(&kfd_mem_limit.mem_limit_lock);
+}
+
+void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
+{
+	spin_lock(&kfd_mem_limit.mem_limit_lock);
+
+	if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
+		kfd_mem_limit.system_mem_used -=
+			(bo->tbo.acc_size + amdgpu_bo_size(bo));
+	}
+	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
+		  "kfd system memory accounting unbalanced");
+
+	spin_unlock(&kfd_mem_limit.mem_limit_lock);
+}
+
+
+/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's
+ *  reservation object.
+ *
+ * @bo: [IN] Remove eviction fence(s) from this BO
+ * @ef: [IN] If ef is specified, then this eviction fence is removed if it
+ *  is present in the shared list.
+ * @ef_list: [OUT] Returns list of eviction fences. These fences are removed
+ *  from BO's reservation object shared list.
+ * @ef_count: [OUT] Number of fences in ef_list.
+ *
+ * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be
+ *  called to restore the eviction fences and to avoid memory leak. This is
+ *  useful for shared BOs.
+ * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held.
+ */
+static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
+					struct amdgpu_amdkfd_fence *ef,
+					struct amdgpu_amdkfd_fence ***ef_list,
+					unsigned int *ef_count)
+{
+	struct reservation_object_list *fobj;
+	struct reservation_object *resv;
+	unsigned int i = 0, j = 0, k = 0, shared_count;
+	unsigned int count = 0;
+	struct amdgpu_amdkfd_fence **fence_list;
+
+	if (!ef && !ef_list)
+		return -EINVAL;
+
+	if (ef_list) {
+		*ef_list = NULL;
+		*ef_count = 0;
+	}
+
+	resv = bo->tbo.resv;
+	fobj = reservation_object_get_list(resv);
+
+	if (!fobj)
+		return 0;
+
+	preempt_disable();
+	write_seqcount_begin(&resv->seq);
+
+	/* Go through all the shared fences in the resevation object. If
+	 * ef is specified and it exists in the list, remove it and reduce the
+	 * count. If ef is not specified, then get the count of eviction fences
+	 * present.
+	 */
+	shared_count = fobj->shared_count;
+	for (i = 0; i < shared_count; ++i) {
+		struct dma_fence *f;
+
+		f = rcu_dereference_protected(fobj->shared[i],
+					      reservation_object_held(resv));
+
+		if (ef) {
+			if (f->context == ef->base.context) {
+				dma_fence_put(f);
+				fobj->shared_count--;
+			} else {
+				RCU_INIT_POINTER(fobj->shared[j++], f);
+			}
+		} else if (to_amdgpu_amdkfd_fence(f))
+			count++;
+	}
+	write_seqcount_end(&resv->seq);
+	preempt_enable();
+
+	if (ef || !count)
+		return 0;
+
+	/* Alloc memory for count number of eviction fence pointers. Fill the
+	 * ef_list array and ef_count
+	 */
+	fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *),
+			     GFP_KERNEL);
+	if (!fence_list)
+		return -ENOMEM;
+
+	preempt_disable();
+	write_seqcount_begin(&resv->seq);
+
+	j = 0;
+	for (i = 0; i < shared_count; ++i) {
+		struct dma_fence *f;
+		struct amdgpu_amdkfd_fence *efence;
+
+		f = rcu_dereference_protected(fobj->shared[i],
+			reservation_object_held(resv));
+
+		efence = to_amdgpu_amdkfd_fence(f);
+		if (efence) {
+			fence_list[k++] = efence;
+			fobj->shared_count--;
+		} else {
+			RCU_INIT_POINTER(fobj->shared[j++], f);
+		}
+	}
+
+	write_seqcount_end(&resv->seq);
+	preempt_enable();
+
+	*ef_list = fence_list;
+	*ef_count = k;
+
+	return 0;
+}
+
+/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's
+ *  reservation object.
+ *
+ * @bo: [IN] Add eviction fences to this BO
+ * @ef_list: [IN] List of eviction fences to be added
+ * @ef_count: [IN] Number of fences in ef_list.
+ *
+ * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this
+ *  function.
+ */
+static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo,
+				struct amdgpu_amdkfd_fence **ef_list,
+				unsigned int ef_count)
+{
+	int i;
+
+	if (!ef_list || !ef_count)
+		return;
+
+	for (i = 0; i < ef_count; i++) {
+		amdgpu_bo_fence(bo, &ef_list[i]->base, true);
+		/* Re-adding the fence takes an additional reference. Drop that
+		 * reference.
+		 */
+		dma_fence_put(&ef_list[i]->base);
+	}
+
+	kfree(ef_list);
+}
+
+static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
+				     bool wait)
+{
+	struct ttm_operation_ctx ctx = { false, false };
+	int ret;
+
+	if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm),
+		 "Called with userptr BO"))
+		return -EINVAL;
+
+	amdgpu_ttm_placement_from_domain(bo, domain);
+
+	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (ret)
+		goto validate_fail;
+	if (wait) {
+		struct amdgpu_amdkfd_fence **ef_list;
+		unsigned int ef_count;
+
+		ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list,
+							  &ef_count);
+		if (ret)
+			goto validate_fail;
+
+		ttm_bo_wait(&bo->tbo, false, false);
+		amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count);
+	}
+
+validate_fail:
+	return ret;
+}
+
+static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo)
+{
+	struct amdgpu_vm_parser *p = param;
+
+	return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait);
+}
+
+/* vm_validate_pt_pd_bos - Validate page table and directory BOs
+ *
+ * Page directories are not updated here because huge page handling
+ * during page table updates can invalidate page directory entries
+ * again. Page directories are only updated after updating page
+ * tables.
+ */
+static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm)
+{
+	struct amdgpu_bo *pd = vm->base.root.base.bo;
+	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
+	struct amdgpu_vm_parser param;
+	uint64_t addr, flags = AMDGPU_PTE_VALID;
+	int ret;
+
+	param.domain = AMDGPU_GEM_DOMAIN_VRAM;
+	param.wait = false;
+
+	ret = amdgpu_vm_validate_pt_bos(adev, &vm->base, amdgpu_amdkfd_validate,
+					&param);
+	if (ret) {
+		pr_err("amdgpu: failed to validate PT BOs\n");
+		return ret;
+	}
+
+	ret = amdgpu_amdkfd_validate(&param, pd);
+	if (ret) {
+		pr_err("amdgpu: failed to validate PD\n");
+		return ret;
+	}
+
+	addr = amdgpu_bo_gpu_offset(vm->base.root.base.bo);
+	amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
+	vm->pd_phys_addr = addr;
+
+	if (vm->base.use_cpu_for_update) {
+		ret = amdgpu_bo_kmap(pd, NULL);
+		if (ret) {
+			pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
+			 struct dma_fence *f)
+{
+	int ret = amdgpu_sync_fence(adev, sync, f, false);
+
+	/* Sync objects can't handle multiple GPUs (contexts) updating
+	 * sync->last_vm_update. Fortunately we don't need it for
+	 * KFD's purposes, so we can just drop that fence.
+	 */
+	if (sync->last_vm_update) {
+		dma_fence_put(sync->last_vm_update);
+		sync->last_vm_update = NULL;
+	}
+
+	return ret;
+}
+
+static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
+{
+	struct amdgpu_bo *pd = vm->root.base.bo;
+	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
+	int ret;
+
+	ret = amdgpu_vm_update_directories(adev, vm);
+	if (ret)
+		return ret;
+
+	return sync_vm_fence(adev, sync, vm->last_update);
+}
+
+/* add_bo_to_vm - Add a BO to a VM
+ *
+ * Everything that needs to bo done only once when a BO is first added
+ * to a VM. It can later be mapped and unmapped many times without
+ * repeating these steps.
+ *
+ * 1. Allocate and initialize BO VA entry data structure
+ * 2. Add BO to the VM
+ * 3. Determine ASIC-specific PTE flags
+ * 4. Alloc page tables and directories if needed
+ * 4a.  Validate new page tables and directories
+ */
+static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
+		struct amdgpu_vm *avm, bool is_aql,
+		struct kfd_bo_va_list **p_bo_va_entry)
+{
+	int ret;
+	struct kfd_bo_va_list *bo_va_entry;
+	struct amdkfd_vm *kvm = container_of(avm,
+					     struct amdkfd_vm, base);
+	struct amdgpu_bo *pd = avm->root.base.bo;
+	struct amdgpu_bo *bo = mem->bo;
+	uint64_t va = mem->va;
+	struct list_head *list_bo_va = &mem->bo_va_list;
+	unsigned long bo_size = bo->tbo.mem.size;
+
+	if (!va) {
+		pr_err("Invalid VA when adding BO to VM\n");
+		return -EINVAL;
+	}
+
+	if (is_aql)
+		va += bo_size;
+
+	bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL);
+	if (!bo_va_entry)
+		return -ENOMEM;
+
+	pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
+			va + bo_size, avm);
+
+	/* Add BO to VM internal data structures*/
+	bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo);
+	if (!bo_va_entry->bo_va) {
+		ret = -EINVAL;
+		pr_err("Failed to add BO object to VM. ret == %d\n",
+				ret);
+		goto err_vmadd;
+	}
+
+	bo_va_entry->va = va;
+	bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev,
+							 mem->mapping_flags);
+	bo_va_entry->kgd_dev = (void *)adev;
+	list_add(&bo_va_entry->bo_list, list_bo_va);
+
+	if (p_bo_va_entry)
+		*p_bo_va_entry = bo_va_entry;
+
+	/* Allocate new page tables if needed and validate
+	 * them. Clearing of new page tables and validate need to wait
+	 * on move fences. We don't want that to trigger the eviction
+	 * fence, so remove it temporarily.
+	 */
+	amdgpu_amdkfd_remove_eviction_fence(pd,
+					kvm->process_info->eviction_fence,
+					NULL, NULL);
+
+	ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo));
+	if (ret) {
+		pr_err("Failed to allocate pts, err=%d\n", ret);
+		goto err_alloc_pts;
+	}
+
+	ret = vm_validate_pt_pd_bos(kvm);
+	if (ret) {
+		pr_err("validate_pt_pd_bos() failed\n");
+		goto err_alloc_pts;
+	}
+
+	/* Add the eviction fence back */
+	amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
+
+	return 0;
+
+err_alloc_pts:
+	amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
+	amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
+	list_del(&bo_va_entry->bo_list);
+err_vmadd:
+	kfree(bo_va_entry);
+	return ret;
+}
+
+static void remove_bo_from_vm(struct amdgpu_device *adev,
+		struct kfd_bo_va_list *entry, unsigned long size)
+{
+	pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n",
+			entry->va,
+			entry->va + size, entry);
+	amdgpu_vm_bo_rmv(adev, entry->bo_va);
+	list_del(&entry->bo_list);
+	kfree(entry);
+}
+
+static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
+				struct amdkfd_process_info *process_info)
+{
+	struct ttm_validate_buffer *entry = &mem->validate_list;
+	struct amdgpu_bo *bo = mem->bo;
+
+	INIT_LIST_HEAD(&entry->head);
+	entry->shared = true;
+	entry->bo = &bo->tbo;
+	mutex_lock(&process_info->lock);
+	list_add_tail(&entry->head, &process_info->kfd_bo_list);
+	mutex_unlock(&process_info->lock);
+}
+
+/* Reserving a BO and its page table BOs must happen atomically to
+ * avoid deadlocks. Some operations update multiple VMs at once. Track
+ * all the reservation info in a context structure. Optionally a sync
+ * object can track VM updates.
+ */
+struct bo_vm_reservation_context {
+	struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */
+	unsigned int n_vms;		    /* Number of VMs reserved	    */
+	struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries  */
+	struct ww_acquire_ctx ticket;	    /* Reservation ticket	    */
+	struct list_head list, duplicates;  /* BO lists			    */
+	struct amdgpu_sync *sync;	    /* Pointer to sync object	    */
+	bool reserved;			    /* Whether BOs are reserved	    */
+};
+
+enum bo_vm_match {
+	BO_VM_NOT_MAPPED = 0,	/* Match VMs where a BO is not mapped */
+	BO_VM_MAPPED,		/* Match VMs where a BO is mapped     */
+	BO_VM_ALL,		/* Match all VMs a BO was added to    */
+};
+
+/**
+ * reserve_bo_and_vm - reserve a BO and a VM unconditionally.
+ * @mem: KFD BO structure.
+ * @vm: the VM to reserve.
+ * @ctx: the struct that will be used in unreserve_bo_and_vms().
+ */
+static int reserve_bo_and_vm(struct kgd_mem *mem,
+			      struct amdgpu_vm *vm,
+			      struct bo_vm_reservation_context *ctx)
+{
+	struct amdgpu_bo *bo = mem->bo;
+	int ret;
+
+	WARN_ON(!vm);
+
+	ctx->reserved = false;
+	ctx->n_vms = 1;
+	ctx->sync = &mem->sync;
+
+	INIT_LIST_HEAD(&ctx->list);
+	INIT_LIST_HEAD(&ctx->duplicates);
+
+	ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL);
+	if (!ctx->vm_pd)
+		return -ENOMEM;
+
+	ctx->kfd_bo.robj = bo;
+	ctx->kfd_bo.priority = 0;
+	ctx->kfd_bo.tv.bo = &bo->tbo;
+	ctx->kfd_bo.tv.shared = true;
+	ctx->kfd_bo.user_pages = NULL;
+	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
+
+	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
+
+	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
+				     false, &ctx->duplicates);
+	if (!ret)
+		ctx->reserved = true;
+	else {
+		pr_err("Failed to reserve buffers in ttm\n");
+		kfree(ctx->vm_pd);
+		ctx->vm_pd = NULL;
+	}
+
+	return ret;
+}
+
+/**
+ * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally
+ * @mem: KFD BO structure.
+ * @vm: the VM to reserve. If NULL, then all VMs associated with the BO
+ * is used. Otherwise, a single VM associated with the BO.
+ * @map_type: the mapping status that will be used to filter the VMs.
+ * @ctx: the struct that will be used in unreserve_bo_and_vms().
+ *
+ * Returns 0 for success, negative for failure.
+ */
+static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
+				struct amdgpu_vm *vm, enum bo_vm_match map_type,
+				struct bo_vm_reservation_context *ctx)
+{
+	struct amdgpu_bo *bo = mem->bo;
+	struct kfd_bo_va_list *entry;
+	unsigned int i;
+	int ret;
+
+	ctx->reserved = false;
+	ctx->n_vms = 0;
+	ctx->vm_pd = NULL;
+	ctx->sync = &mem->sync;
+
+	INIT_LIST_HEAD(&ctx->list);
+	INIT_LIST_HEAD(&ctx->duplicates);
+
+	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
+		if ((vm && vm != entry->bo_va->base.vm) ||
+			(entry->is_mapped != map_type
+			&& map_type != BO_VM_ALL))
+			continue;
+
+		ctx->n_vms++;
+	}
+
+	if (ctx->n_vms != 0) {
+		ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd),
+				     GFP_KERNEL);
+		if (!ctx->vm_pd)
+			return -ENOMEM;
+	}
+
+	ctx->kfd_bo.robj = bo;
+	ctx->kfd_bo.priority = 0;
+	ctx->kfd_bo.tv.bo = &bo->tbo;
+	ctx->kfd_bo.tv.shared = true;
+	ctx->kfd_bo.user_pages = NULL;
+	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
+
+	i = 0;
+	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
+		if ((vm && vm != entry->bo_va->base.vm) ||
+			(entry->is_mapped != map_type
+			&& map_type != BO_VM_ALL))
+			continue;
+
+		amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list,
+				&ctx->vm_pd[i]);
+		i++;
+	}
+
+	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
+				     false, &ctx->duplicates);
+	if (!ret)
+		ctx->reserved = true;
+	else
+		pr_err("Failed to reserve buffers in ttm.\n");
+
+	if (ret) {
+		kfree(ctx->vm_pd);
+		ctx->vm_pd = NULL;
+	}
+
+	return ret;
+}
+
+/**
+ * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context
+ * @ctx: Reservation context to unreserve
+ * @wait: Optionally wait for a sync object representing pending VM updates
+ * @intr: Whether the wait is interruptible
+ *
+ * Also frees any resources allocated in
+ * reserve_bo_and_(cond_)vm(s). Returns the status from
+ * amdgpu_sync_wait.
+ */
+static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
+				 bool wait, bool intr)
+{
+	int ret = 0;
+
+	if (wait)
+		ret = amdgpu_sync_wait(ctx->sync, intr);
+
+	if (ctx->reserved)
+		ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list);
+	kfree(ctx->vm_pd);
+
+	ctx->sync = NULL;
+
+	ctx->reserved = false;
+	ctx->vm_pd = NULL;
+
+	return ret;
+}
+
+static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
+				struct kfd_bo_va_list *entry,
+				struct amdgpu_sync *sync)
+{
+	struct amdgpu_bo_va *bo_va = entry->bo_va;
+	struct amdgpu_vm *vm = bo_va->base.vm;
+	struct amdkfd_vm *kvm = container_of(vm, struct amdkfd_vm, base);
+	struct amdgpu_bo *pd = vm->root.base.bo;
+
+	/* Remove eviction fence from PD (and thereby from PTs too as
+	 * they share the resv. object). Otherwise during PT update
+	 * job (see amdgpu_vm_bo_update_mapping), eviction fence would
+	 * get added to job->sync object and job execution would
+	 * trigger the eviction fence.
+	 */
+	amdgpu_amdkfd_remove_eviction_fence(pd,
+					    kvm->process_info->eviction_fence,
+					    NULL, NULL);
+	amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
+
+	amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
+
+	/* Add the eviction fence back */
+	amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
+
+	sync_vm_fence(adev, sync, bo_va->last_pt_update);
+
+	return 0;
+}
+
+static int update_gpuvm_pte(struct amdgpu_device *adev,
+		struct kfd_bo_va_list *entry,
+		struct amdgpu_sync *sync)
+{
+	int ret;
+	struct amdgpu_vm *vm;
+	struct amdgpu_bo_va *bo_va;
+	struct amdgpu_bo *bo;
+
+	bo_va = entry->bo_va;
+	vm = bo_va->base.vm;
+	bo = bo_va->base.bo;
+
+	/* Update the page tables  */
+	ret = amdgpu_vm_bo_update(adev, bo_va, false);
+	if (ret) {
+		pr_err("amdgpu_vm_bo_update failed\n");
+		return ret;
+	}
+
+	return sync_vm_fence(adev, sync, bo_va->last_pt_update);
+}
+
+static int map_bo_to_gpuvm(struct amdgpu_device *adev,
+		struct kfd_bo_va_list *entry, struct amdgpu_sync *sync)
+{
+	int ret;
+
+	/* Set virtual address for the allocation */
+	ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0,
+			       amdgpu_bo_size(entry->bo_va->base.bo),
+			       entry->pte_flags);
+	if (ret) {
+		pr_err("Failed to map VA 0x%llx in vm. ret %d\n",
+				entry->va, ret);
+		return ret;
+	}
+
+	ret = update_gpuvm_pte(adev, entry, sync);
+	if (ret) {
+		pr_err("update_gpuvm_pte() failed\n");
+		goto update_gpuvm_pte_failed;
+	}
+
+	return 0;
+
+update_gpuvm_pte_failed:
+	unmap_bo_from_gpuvm(adev, entry, sync);
+	return ret;
+}
+
+static int process_validate_vms(struct amdkfd_process_info *process_info)
+{
+	struct amdkfd_vm *peer_vm;
+	int ret;
+
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node) {
+		ret = vm_validate_pt_pd_bos(peer_vm);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int process_update_pds(struct amdkfd_process_info *process_info,
+			      struct amdgpu_sync *sync)
+{
+	struct amdkfd_vm *peer_vm;
+	int ret;
+
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node) {
+		ret = vm_update_pds(&peer_vm->base, sync);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
+					  void **process_info,
+					  struct dma_fence **ef)
+{
+	int ret;
+	struct amdkfd_vm *new_vm;
+	struct amdkfd_process_info *info;
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL);
+	if (!new_vm)
+		return -ENOMEM;
+
+	/* Initialize the VM context, allocate the page directory and zero it */
+	ret = amdgpu_vm_init(adev, &new_vm->base, AMDGPU_VM_CONTEXT_COMPUTE, 0);
+	if (ret) {
+		pr_err("Failed init vm ret %d\n", ret);
+		goto vm_init_fail;
+	}
+	new_vm->adev = adev;
+
+	if (!*process_info) {
+		info = kzalloc(sizeof(*info), GFP_KERNEL);
+		if (!info) {
+			ret = -ENOMEM;
+			goto alloc_process_info_fail;
+		}
+
+		mutex_init(&info->lock);
+		INIT_LIST_HEAD(&info->vm_list_head);
+		INIT_LIST_HEAD(&info->kfd_bo_list);
+
+		info->eviction_fence =
+			amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
+						   current->mm);
+		if (!info->eviction_fence) {
+			pr_err("Failed to create eviction fence\n");
+			goto create_evict_fence_fail;
+		}
+
+		*process_info = info;
+		*ef = dma_fence_get(&info->eviction_fence->base);
+	}
+
+	new_vm->process_info = *process_info;
+
+	mutex_lock(&new_vm->process_info->lock);
+	list_add_tail(&new_vm->vm_list_node,
+			&(new_vm->process_info->vm_list_head));
+	new_vm->process_info->n_vms++;
+	mutex_unlock(&new_vm->process_info->lock);
+
+	*vm = (void *) new_vm;
+
+	pr_debug("Created process vm %p\n", *vm);
+
+	return ret;
+
+create_evict_fence_fail:
+	mutex_destroy(&info->lock);
+	kfree(info);
+alloc_process_info_fail:
+	amdgpu_vm_fini(adev, &new_vm->base);
+vm_init_fail:
+	kfree(new_vm);
+	return ret;
+
+}
+
+void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *) vm;
+	struct amdgpu_vm *avm = &kfd_vm->base;
+	struct amdgpu_bo *pd;
+	struct amdkfd_process_info *process_info;
+
+	if (WARN_ON(!kgd || !vm))
+		return;
+
+	pr_debug("Destroying process vm %p\n", vm);
+	/* Release eviction fence from PD */
+	pd = avm->root.base.bo;
+	amdgpu_bo_reserve(pd, false);
+	amdgpu_bo_fence(pd, NULL, false);
+	amdgpu_bo_unreserve(pd);
+
+	process_info = kfd_vm->process_info;
+
+	mutex_lock(&process_info->lock);
+	process_info->n_vms--;
+	list_del(&kfd_vm->vm_list_node);
+	mutex_unlock(&process_info->lock);
+
+	/* Release per-process resources */
+	if (!process_info->n_vms) {
+		WARN_ON(!list_empty(&process_info->kfd_bo_list));
+
+		dma_fence_put(&process_info->eviction_fence->base);
+		mutex_destroy(&process_info->lock);
+		kfree(process_info);
+	}
+
+	/* Release the VM context */
+	amdgpu_vm_fini(adev, avm);
+	kfree(vm);
+}
+
+uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
+{
+	struct amdkfd_vm *avm = (struct amdkfd_vm *)vm;
+
+	return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
+}
+
+int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
+		struct kgd_dev *kgd, uint64_t va, uint64_t size,
+		void *vm, struct kgd_mem **mem,
+		uint64_t *offset, uint32_t flags)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm;
+	struct amdgpu_bo *bo;
+	int byte_align;
+	u32 alloc_domain;
+	u64 alloc_flags;
+	uint32_t mapping_flags;
+	int ret;
+
+	/*
+	 * Check on which domain to allocate BO
+	 */
+	if (flags & ALLOC_MEM_FLAGS_VRAM) {
+		alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
+		alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
+		alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
+			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
+			AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+	} else if (flags & ALLOC_MEM_FLAGS_GTT) {
+		alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+		alloc_flags = 0;
+	} else {
+		return -EINVAL;
+	}
+
+	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+	if (!*mem)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&(*mem)->bo_va_list);
+	mutex_init(&(*mem)->lock);
+	(*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
+
+	/* Workaround for AQL queue wraparound bug. Map the same
+	 * memory twice. That means we only actually allocate half
+	 * the memory.
+	 */
+	if ((*mem)->aql_queue)
+		size = size >> 1;
+
+	/* Workaround for TLB bug on older VI chips */
+	byte_align = (adev->family == AMDGPU_FAMILY_VI &&
+			adev->asic_type != CHIP_FIJI &&
+			adev->asic_type != CHIP_POLARIS10 &&
+			adev->asic_type != CHIP_POLARIS11) ?
+			VI_BO_SIZE_ALIGN : 1;
+
+	mapping_flags = AMDGPU_VM_PAGE_READABLE;
+	if (flags & ALLOC_MEM_FLAGS_WRITABLE)
+		mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
+	if (flags & ALLOC_MEM_FLAGS_EXECUTABLE)
+		mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+	if (flags & ALLOC_MEM_FLAGS_COHERENT)
+		mapping_flags |= AMDGPU_VM_MTYPE_UC;
+	else
+		mapping_flags |= AMDGPU_VM_MTYPE_NC;
+	(*mem)->mapping_flags = mapping_flags;
+
+	amdgpu_sync_create(&(*mem)->sync);
+
+	ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
+	if (ret) {
+		pr_debug("Insufficient system memory\n");
+		goto err_reserve_system_mem;
+	}
+
+	pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
+			va, size, domain_string(alloc_domain));
+
+	ret = amdgpu_bo_create(adev, size, byte_align,
+				alloc_domain, alloc_flags, ttm_bo_type_device, NULL, &bo);
+	if (ret) {
+		pr_debug("Failed to create BO on domain %s. ret %d\n",
+				domain_string(alloc_domain), ret);
+		goto err_bo_create;
+	}
+	bo->kfd_bo = *mem;
+	(*mem)->bo = bo;
+
+	(*mem)->va = va;
+	(*mem)->domain = alloc_domain;
+	(*mem)->mapped_to_gpu_memory = 0;
+	(*mem)->process_info = kfd_vm->process_info;
+	add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info);
+
+	if (offset)
+		*offset = amdgpu_bo_mmap_offset(bo);
+
+	return 0;
+
+err_bo_create:
+	unreserve_system_mem_limit(adev, size, alloc_domain);
+err_reserve_system_mem:
+	mutex_destroy(&(*mem)->lock);
+	kfree(*mem);
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
+		struct kgd_dev *kgd, struct kgd_mem *mem)
+{
+	struct amdkfd_process_info *process_info = mem->process_info;
+	unsigned long bo_size = mem->bo->tbo.mem.size;
+	struct kfd_bo_va_list *entry, *tmp;
+	struct bo_vm_reservation_context ctx;
+	struct ttm_validate_buffer *bo_list_entry;
+	int ret;
+
+	mutex_lock(&mem->lock);
+
+	if (mem->mapped_to_gpu_memory > 0) {
+		pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
+				mem->va, bo_size);
+		mutex_unlock(&mem->lock);
+		return -EBUSY;
+	}
+
+	mutex_unlock(&mem->lock);
+	/* lock is not needed after this, since mem is unused and will
+	 * be freed anyway
+	 */
+
+	/* Make sure restore workers don't access the BO any more */
+	bo_list_entry = &mem->validate_list;
+	mutex_lock(&process_info->lock);
+	list_del(&bo_list_entry->head);
+	mutex_unlock(&process_info->lock);
+
+	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
+	if (unlikely(ret))
+		return ret;
+
+	/* The eviction fence should be removed by the last unmap.
+	 * TODO: Log an error condition if the bo still has the eviction fence
+	 * attached
+	 */
+	amdgpu_amdkfd_remove_eviction_fence(mem->bo,
+					process_info->eviction_fence,
+					NULL, NULL);
+	pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
+		mem->va + bo_size * (1 + mem->aql_queue));
+
+	/* Remove from VM internal data structures */
+	list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list)
+		remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev,
+				entry, bo_size);
+
+	ret = unreserve_bo_and_vms(&ctx, false, false);
+
+	/* Free the sync object */
+	amdgpu_sync_free(&mem->sync);
+
+	/* Free the BO*/
+	amdgpu_bo_unref(&mem->bo);
+	mutex_destroy(&mem->lock);
+	kfree(mem);
+
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
+		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm;
+	int ret;
+	struct amdgpu_bo *bo;
+	uint32_t domain;
+	struct kfd_bo_va_list *entry;
+	struct bo_vm_reservation_context ctx;
+	struct kfd_bo_va_list *bo_va_entry = NULL;
+	struct kfd_bo_va_list *bo_va_entry_aql = NULL;
+	unsigned long bo_size;
+
+	/* Make sure restore is not running concurrently.
+	 */
+	mutex_lock(&mem->process_info->lock);
+
+	mutex_lock(&mem->lock);
+
+	bo = mem->bo;
+
+	if (!bo) {
+		pr_err("Invalid BO when mapping memory to GPU\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	domain = mem->domain;
+	bo_size = bo->tbo.mem.size;
+
+	pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
+			mem->va,
+			mem->va + bo_size * (1 + mem->aql_queue),
+			vm, domain_string(domain));
+
+	ret = reserve_bo_and_vm(mem, vm, &ctx);
+	if (unlikely(ret))
+		goto out;
+
+	if (check_if_add_bo_to_vm((struct amdgpu_vm *)vm, mem)) {
+		ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, false,
+				&bo_va_entry);
+		if (ret)
+			goto add_bo_to_vm_failed;
+		if (mem->aql_queue) {
+			ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm,
+					true, &bo_va_entry_aql);
+			if (ret)
+				goto add_bo_to_vm_failed_aql;
+		}
+	} else {
+		ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm);
+		if (unlikely(ret))
+			goto add_bo_to_vm_failed;
+	}
+
+	if (mem->mapped_to_gpu_memory == 0) {
+		/* Validate BO only once. The eviction fence gets added to BO
+		 * the first time it is mapped. Validate will wait for all
+		 * background evictions to complete.
+		 */
+		ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
+		if (ret) {
+			pr_debug("Validate failed\n");
+			goto map_bo_to_gpuvm_failed;
+		}
+	}
+
+	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
+		if (entry->bo_va->base.vm == vm && !entry->is_mapped) {
+			pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
+					entry->va, entry->va + bo_size,
+					entry);
+
+			ret = map_bo_to_gpuvm(adev, entry, ctx.sync);
+			if (ret) {
+				pr_err("Failed to map radeon bo to gpuvm\n");
+				goto map_bo_to_gpuvm_failed;
+			}
+
+			ret = vm_update_pds(vm, ctx.sync);
+			if (ret) {
+				pr_err("Failed to update page directories\n");
+				goto map_bo_to_gpuvm_failed;
+			}
+
+			entry->is_mapped = true;
+			mem->mapped_to_gpu_memory++;
+			pr_debug("\t INC mapping count %d\n",
+					mem->mapped_to_gpu_memory);
+		}
+	}
+
+	if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count)
+		amdgpu_bo_fence(bo,
+				&kfd_vm->process_info->eviction_fence->base,
+				true);
+	ret = unreserve_bo_and_vms(&ctx, false, false);
+
+	goto out;
+
+map_bo_to_gpuvm_failed:
+	if (bo_va_entry_aql)
+		remove_bo_from_vm(adev, bo_va_entry_aql, bo_size);
+add_bo_to_vm_failed_aql:
+	if (bo_va_entry)
+		remove_bo_from_vm(adev, bo_va_entry, bo_size);
+add_bo_to_vm_failed:
+	unreserve_bo_and_vms(&ctx, false, false);
+out:
+	mutex_unlock(&mem->process_info->lock);
+	mutex_unlock(&mem->lock);
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
+		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct amdkfd_process_info *process_info =
+		((struct amdkfd_vm *)vm)->process_info;
+	unsigned long bo_size = mem->bo->tbo.mem.size;
+	struct kfd_bo_va_list *entry;
+	struct bo_vm_reservation_context ctx;
+	int ret;
+
+	mutex_lock(&mem->lock);
+
+	ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx);
+	if (unlikely(ret))
+		goto out;
+	/* If no VMs were reserved, it means the BO wasn't actually mapped */
+	if (ctx.n_vms == 0) {
+		ret = -EINVAL;
+		goto unreserve_out;
+	}
+
+	ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm);
+	if (unlikely(ret))
+		goto unreserve_out;
+
+	pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
+		mem->va,
+		mem->va + bo_size * (1 + mem->aql_queue),
+		vm);
+
+	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
+		if (entry->bo_va->base.vm == vm && entry->is_mapped) {
+			pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
+					entry->va,
+					entry->va + bo_size,
+					entry);
+
+			ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync);
+			if (ret == 0) {
+				entry->is_mapped = false;
+			} else {
+				pr_err("failed to unmap VA 0x%llx\n",
+						mem->va);
+				goto unreserve_out;
+			}
+
+			mem->mapped_to_gpu_memory--;
+			pr_debug("\t DEC mapping count %d\n",
+					mem->mapped_to_gpu_memory);
+		}
+	}
+
+	/* If BO is unmapped from all VMs, unfence it. It can be evicted if
+	 * required.
+	 */
+	if (mem->mapped_to_gpu_memory == 0 &&
+	    !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count)
+		amdgpu_amdkfd_remove_eviction_fence(mem->bo,
+						process_info->eviction_fence,
+						    NULL, NULL);
+
+unreserve_out:
+	unreserve_bo_and_vms(&ctx, false, false);
+out:
+	mutex_unlock(&mem->lock);
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_sync_memory(
+		struct kgd_dev *kgd, struct kgd_mem *mem, bool intr)
+{
+	struct amdgpu_sync sync;
+	int ret;
+
+	amdgpu_sync_create(&sync);
+
+	mutex_lock(&mem->lock);
+	amdgpu_sync_clone(&mem->sync, &sync);
+	mutex_unlock(&mem->lock);
+
+	ret = amdgpu_sync_wait(&sync, intr);
+	amdgpu_sync_free(&sync);
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
+		struct kgd_mem *mem, void **kptr, uint64_t *size)
+{
+	int ret;
+	struct amdgpu_bo *bo = mem->bo;
+
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+		pr_err("userptr can't be mapped to kernel\n");
+		return -EINVAL;
+	}
+
+	/* delete kgd_mem from kfd_bo_list to avoid re-validating
+	 * this BO in BO's restoring after eviction.
+	 */
+	mutex_lock(&mem->process_info->lock);
+
+	ret = amdgpu_bo_reserve(bo, true);
+	if (ret) {
+		pr_err("Failed to reserve bo. ret %d\n", ret);
+		goto bo_reserve_failed;
+	}
+
+	ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
+	if (ret) {
+		pr_err("Failed to pin bo. ret %d\n", ret);
+		goto pin_failed;
+	}
+
+	ret = amdgpu_bo_kmap(bo, kptr);
+	if (ret) {
+		pr_err("Failed to map bo to kernel. ret %d\n", ret);
+		goto kmap_failed;
+	}
+
+	amdgpu_amdkfd_remove_eviction_fence(
+		bo, mem->process_info->eviction_fence, NULL, NULL);
+	list_del_init(&mem->validate_list.head);
+
+	if (size)
+		*size = amdgpu_bo_size(bo);
+
+	amdgpu_bo_unreserve(bo);
+
+	mutex_unlock(&mem->process_info->lock);
+	return 0;
+
+kmap_failed:
+	amdgpu_bo_unpin(bo);
+pin_failed:
+	amdgpu_bo_unreserve(bo);
+bo_reserve_failed:
+	mutex_unlock(&mem->process_info->lock);
+
+	return ret;
+}
+
+/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
+ *   KFD process identified by process_info
+ *
+ * @process_info: amdkfd_process_info of the KFD process
+ *
+ * After memory eviction, restore thread calls this function. The function
+ * should be called when the Process is still valid. BO restore involves -
+ *
+ * 1.  Release old eviction fence and create new one
+ * 2.  Get two copies of PD BO list from all the VMs. Keep one copy as pd_list.
+ * 3   Use the second PD list and kfd_bo_list to create a list (ctx.list) of
+ *     BOs that need to be reserved.
+ * 4.  Reserve all the BOs
+ * 5.  Validate of PD and PT BOs.
+ * 6.  Validate all KFD BOs using kfd_bo_list and Map them and add new fence
+ * 7.  Add fence to all PD and PT BOs.
+ * 8.  Unreserve all BOs
+ */
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
+{
+	struct amdgpu_bo_list_entry *pd_bo_list;
+	struct amdkfd_process_info *process_info = info;
+	struct amdkfd_vm *peer_vm;
+	struct kgd_mem *mem;
+	struct bo_vm_reservation_context ctx;
+	struct amdgpu_amdkfd_fence *new_fence;
+	int ret = 0, i;
+	struct list_head duplicate_save;
+	struct amdgpu_sync sync_obj;
+
+	INIT_LIST_HEAD(&duplicate_save);
+	INIT_LIST_HEAD(&ctx.list);
+	INIT_LIST_HEAD(&ctx.duplicates);
+
+	pd_bo_list = kcalloc(process_info->n_vms,
+			     sizeof(struct amdgpu_bo_list_entry),
+			     GFP_KERNEL);
+	if (!pd_bo_list)
+		return -ENOMEM;
+
+	i = 0;
+	mutex_lock(&process_info->lock);
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			vm_list_node)
+		amdgpu_vm_get_pd_bo(&peer_vm->base, &ctx.list,
+				    &pd_bo_list[i++]);
+
+	/* Reserve all BOs and page tables/directory. Add all BOs from
+	 * kfd_bo_list to ctx.list
+	 */
+	list_for_each_entry(mem, &process_info->kfd_bo_list,
+			    validate_list.head) {
+
+		list_add_tail(&mem->resv_list.head, &ctx.list);
+		mem->resv_list.bo = mem->validate_list.bo;
+		mem->resv_list.shared = mem->validate_list.shared;
+	}
+
+	ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
+				     false, &duplicate_save);
+	if (ret) {
+		pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
+		goto ttm_reserve_fail;
+	}
+
+	amdgpu_sync_create(&sync_obj);
+
+	/* Validate PDs and PTs */
+	ret = process_validate_vms(process_info);
+	if (ret)
+		goto validate_map_fail;
+
+	/* Wait for PD/PTs validate to finish */
+	/* FIXME: I think this isn't needed */
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node) {
+		struct amdgpu_bo *bo = peer_vm->base.root.base.bo;
+
+		ttm_bo_wait(&bo->tbo, false, false);
+	}
+
+	/* Validate BOs and map them to GPUVM (update VM page tables). */
+	list_for_each_entry(mem, &process_info->kfd_bo_list,
+			    validate_list.head) {
+
+		struct amdgpu_bo *bo = mem->bo;
+		uint32_t domain = mem->domain;
+		struct kfd_bo_va_list *bo_va_entry;
+
+		ret = amdgpu_amdkfd_bo_validate(bo, domain, false);
+		if (ret) {
+			pr_debug("Memory eviction: Validate BOs failed. Try again\n");
+			goto validate_map_fail;
+		}
+
+		list_for_each_entry(bo_va_entry, &mem->bo_va_list,
+				    bo_list) {
+			ret = update_gpuvm_pte((struct amdgpu_device *)
+					      bo_va_entry->kgd_dev,
+					      bo_va_entry,
+					      &sync_obj);
+			if (ret) {
+				pr_debug("Memory eviction: update PTE failed. Try again\n");
+				goto validate_map_fail;
+			}
+		}
+	}
+
+	/* Update page directories */
+	ret = process_update_pds(process_info, &sync_obj);
+	if (ret) {
+		pr_debug("Memory eviction: update PDs failed. Try again\n");
+		goto validate_map_fail;
+	}
+
+	amdgpu_sync_wait(&sync_obj, false);
+
+	/* Release old eviction fence and create new one, because fence only
+	 * goes from unsignaled to signaled, fence cannot be reused.
+	 * Use context and mm from the old fence.
+	 */
+	new_fence = amdgpu_amdkfd_fence_create(
+				process_info->eviction_fence->base.context,
+				process_info->eviction_fence->mm);
+	if (!new_fence) {
+		pr_err("Failed to create eviction fence\n");
+		ret = -ENOMEM;
+		goto validate_map_fail;
+	}
+	dma_fence_put(&process_info->eviction_fence->base);
+	process_info->eviction_fence = new_fence;
+	*ef = dma_fence_get(&new_fence->base);
+
+	/* Wait for validate to finish and attach new eviction fence */
+	list_for_each_entry(mem, &process_info->kfd_bo_list,
+		validate_list.head)
+		ttm_bo_wait(&mem->bo->tbo, false, false);
+	list_for_each_entry(mem, &process_info->kfd_bo_list,
+		validate_list.head)
+		amdgpu_bo_fence(mem->bo,
+			&process_info->eviction_fence->base, true);
+
+	/* Attach eviction fence to PD / PT BOs */
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node) {
+		struct amdgpu_bo *bo = peer_vm->base.root.base.bo;
+
+		amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true);
+	}
+
+validate_map_fail:
+	ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list);
+	amdgpu_sync_free(&sync_obj);
+ttm_reserve_fail:
+	mutex_unlock(&process_info->lock);
+	kfree(pd_bo_list);
+	return ret;
+}

+ 91 - 4
drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c

@@ -114,6 +114,9 @@ union igp_info {
 	struct atom_integrated_system_info_v1_11 v11;
 	struct atom_integrated_system_info_v1_11 v11;
 };
 };
 
 
+union umc_info {
+	struct atom_umc_info_v3_1 v31;
+};
 /*
 /*
  * Return vram width from integrated system info table, if available,
  * Return vram width from integrated system info table, if available,
  * or 0 if not.
  * or 0 if not.
@@ -143,6 +146,94 @@ int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev)
 	return 0;
 	return 0;
 }
 }
 
 
+static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev,
+					       int atom_mem_type)
+{
+	int vram_type;
+
+	if (adev->flags & AMD_IS_APU) {
+		switch (atom_mem_type) {
+		case Ddr2MemType:
+		case LpDdr2MemType:
+			vram_type = AMDGPU_VRAM_TYPE_DDR2;
+			break;
+		case Ddr3MemType:
+		case LpDdr3MemType:
+			vram_type = AMDGPU_VRAM_TYPE_DDR3;
+			break;
+		case Ddr4MemType:
+		case LpDdr4MemType:
+			vram_type = AMDGPU_VRAM_TYPE_DDR4;
+			break;
+		default:
+			vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+			break;
+		}
+	} else {
+		switch (atom_mem_type) {
+		case ATOM_DGPU_VRAM_TYPE_GDDR5:
+			vram_type = AMDGPU_VRAM_TYPE_GDDR5;
+			break;
+		case ATOM_DGPU_VRAM_TYPE_HBM:
+			vram_type = AMDGPU_VRAM_TYPE_HBM;
+			break;
+		default:
+			vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+			break;
+		}
+	}
+
+	return vram_type;
+}
+/*
+ * Return vram type from either integrated system info table
+ * or umc info table, if available, or 0 (TYPE_UNKNOWN) if not
+ */
+int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	int index;
+	u16 data_offset, size;
+	union igp_info *igp_info;
+	union umc_info *umc_info;
+	u8 frev, crev;
+	u8 mem_type;
+
+	if (adev->flags & AMD_IS_APU)
+		index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+						    integratedsysteminfo);
+	else
+		index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+						    umc_info);
+	if (amdgpu_atom_parse_data_header(mode_info->atom_context,
+					  index, &size,
+					  &frev, &crev, &data_offset)) {
+		if (adev->flags & AMD_IS_APU) {
+			igp_info = (union igp_info *)
+				(mode_info->atom_context->bios + data_offset);
+			switch (crev) {
+			case 11:
+				mem_type = igp_info->v11.memorytype;
+				return convert_atom_mem_type_to_vram_type(adev, mem_type);
+			default:
+				return 0;
+			}
+		} else {
+			umc_info = (union umc_info *)
+				(mode_info->atom_context->bios + data_offset);
+			switch (crev) {
+			case 1:
+				mem_type = umc_info->v31.vram_type;
+				return convert_atom_mem_type_to_vram_type(adev, mem_type);
+			default:
+				return 0;
+			}
+		}
+	}
+
+	return 0;
+}
+
 union firmware_info {
 union firmware_info {
 	struct atom_firmware_info_v3_1 v31;
 	struct atom_firmware_info_v3_1 v31;
 };
 };
@@ -151,10 +242,6 @@ union smu_info {
 	struct atom_smu_info_v3_1 v31;
 	struct atom_smu_info_v3_1 v31;
 };
 };
 
 
-union umc_info {
-	struct atom_umc_info_v3_1 v31;
-};
-
 int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
 int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
 {
 {
 	struct amdgpu_mode_info *mode_info = &adev->mode_info;
 	struct amdgpu_mode_info *mode_info = &adev->mode_info;

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h

@@ -28,6 +28,7 @@ bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev)
 void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
 void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
 
 
 #endif
 #endif

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c

@@ -568,6 +568,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = {
 	/* HG _PR3 doesn't seem to work on this A+A weston board */
 	/* HG _PR3 doesn't seem to work on this A+A weston board */
 	{ 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX },
 	{ 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX },
 	{ 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
 	{ 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
+	{ 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX },
 	{ 0, 0, 0, 0, 0 },
 	{ 0, 0, 0, 0, 0 },
 };
 };
 
 

+ 4 - 4
drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c

@@ -80,8 +80,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
 	int time;
 	int time;
 
 
 	n = AMDGPU_BENCHMARK_ITERATIONS;
 	n = AMDGPU_BENCHMARK_ITERATIONS;
-	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, NULL,
-			     NULL, 0, &sobj);
+	r = amdgpu_bo_create(adev, size, PAGE_SIZE,sdomain, 0,
+			     ttm_bo_type_kernel, NULL, &sobj);
 	if (r) {
 	if (r) {
 		goto out_cleanup;
 		goto out_cleanup;
 	}
 	}
@@ -93,8 +93,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
 	if (r) {
 	if (r) {
 		goto out_cleanup;
 		goto out_cleanup;
 	}
 	}
-	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, NULL,
-			     NULL, 0, &dobj);
+	r = amdgpu_bo_create(adev, size, PAGE_SIZE, ddomain, 0,
+			     ttm_bo_type_kernel, NULL, &dobj);
 	if (r) {
 	if (r) {
 		goto out_cleanup;
 		goto out_cleanup;
 	}
 	}

+ 4 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c

@@ -233,8 +233,10 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
 	for (i = 0; i < list->num_entries; i++) {
 	for (i = 0; i < list->num_entries; i++) {
 		unsigned priority = list->array[i].priority;
 		unsigned priority = list->array[i].priority;
 
 
-		list_add_tail(&list->array[i].tv.head,
-			      &bucket[priority]);
+		if (!list->array[i].robj->parent)
+			list_add_tail(&list->array[i].tv.head,
+				      &bucket[priority]);
+
 		list->array[i].user_pages = NULL;
 		list->array[i].user_pages = NULL;
 	}
 	}
 
 

+ 5 - 426
drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c

@@ -24,7 +24,6 @@
 #include <linux/list.h>
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
 #include <linux/pci.h>
-#include <linux/acpi.h>
 #include <drm/drmP.h>
 #include <drm/drmP.h>
 #include <linux/firmware.h>
 #include <linux/firmware.h>
 #include <drm/amdgpu_drm.h>
 #include <drm/amdgpu_drm.h>
@@ -42,152 +41,6 @@ struct amdgpu_cgs_device {
 	struct amdgpu_device *adev =					\
 	struct amdgpu_device *adev =					\
 		((struct amdgpu_cgs_device *)cgs_device)->adev
 		((struct amdgpu_cgs_device *)cgs_device)->adev
 
 
-static void *amdgpu_cgs_register_pp_handle(struct cgs_device *cgs_device,
-			int (*call_back_func)(struct amd_pp_init *, void **))
-{
-	CGS_FUNC_ADEV;
-	struct amd_pp_init pp_init;
-	struct amd_powerplay *amd_pp;
-
-	if (call_back_func == NULL)
-		return NULL;
-
-	amd_pp = &(adev->powerplay);
-	pp_init.chip_family = adev->family;
-	pp_init.chip_id = adev->asic_type;
-	pp_init.pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : false;
-	pp_init.feature_mask = amdgpu_pp_feature_mask;
-	pp_init.device = cgs_device;
-	if (call_back_func(&pp_init, &(amd_pp->pp_handle)))
-		return NULL;
-
-	return adev->powerplay.pp_handle;
-}
-
-static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
-				    enum cgs_gpu_mem_type type,
-				    uint64_t size, uint64_t align,
-				    cgs_handle_t *handle)
-{
-	CGS_FUNC_ADEV;
-	uint16_t flags = 0;
-	int ret = 0;
-	uint32_t domain = 0;
-	struct amdgpu_bo *obj;
-
-	/* fail if the alignment is not a power of 2 */
-	if (((align != 1) && (align & (align - 1)))
-	    || size == 0 || align == 0)
-		return -EINVAL;
-
-
-	switch(type) {
-	case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB:
-	case CGS_GPU_MEM_TYPE__VISIBLE_FB:
-		flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-			AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
-		domain = AMDGPU_GEM_DOMAIN_VRAM;
-		break;
-	case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB:
-	case CGS_GPU_MEM_TYPE__INVISIBLE_FB:
-		flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
-			AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
-		domain = AMDGPU_GEM_DOMAIN_VRAM;
-		break;
-	case CGS_GPU_MEM_TYPE__GART_CACHEABLE:
-		domain = AMDGPU_GEM_DOMAIN_GTT;
-		break;
-	case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE:
-		flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
-		domain = AMDGPU_GEM_DOMAIN_GTT;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-
-	*handle = 0;
-
-	ret = amdgpu_bo_create(adev, size, align, true, domain, flags,
-			       NULL, NULL, 0, &obj);
-	if (ret) {
-		DRM_ERROR("(%d) bo create failed\n", ret);
-		return ret;
-	}
-	*handle = (cgs_handle_t)obj;
-
-	return ret;
-}
-
-static int amdgpu_cgs_free_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle)
-{
-	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-
-	if (obj) {
-		int r = amdgpu_bo_reserve(obj, true);
-		if (likely(r == 0)) {
-			amdgpu_bo_kunmap(obj);
-			amdgpu_bo_unpin(obj);
-			amdgpu_bo_unreserve(obj);
-		}
-		amdgpu_bo_unref(&obj);
-
-	}
-	return 0;
-}
-
-static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle,
-				   uint64_t *mcaddr)
-{
-	int r;
-	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-
-	WARN_ON_ONCE(obj->placement.num_placement > 1);
-
-	r = amdgpu_bo_reserve(obj, true);
-	if (unlikely(r != 0))
-		return r;
-	r = amdgpu_bo_pin(obj, obj->preferred_domains, mcaddr);
-	amdgpu_bo_unreserve(obj);
-	return r;
-}
-
-static int amdgpu_cgs_gunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle)
-{
-	int r;
-	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-	r = amdgpu_bo_reserve(obj, true);
-	if (unlikely(r != 0))
-		return r;
-	r = amdgpu_bo_unpin(obj);
-	amdgpu_bo_unreserve(obj);
-	return r;
-}
-
-static int amdgpu_cgs_kmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle,
-				   void **map)
-{
-	int r;
-	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-	r = amdgpu_bo_reserve(obj, true);
-	if (unlikely(r != 0))
-		return r;
-	r = amdgpu_bo_kmap(obj, map);
-	amdgpu_bo_unreserve(obj);
-	return r;
-}
-
-static int amdgpu_cgs_kunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle)
-{
-	int r;
-	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-	r = amdgpu_bo_reserve(obj, true);
-	if (unlikely(r != 0))
-		return r;
-	amdgpu_bo_kunmap(obj);
-	amdgpu_bo_unreserve(obj);
-	return r;
-}
 
 
 static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned offset)
 static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned offset)
 {
 {
@@ -801,11 +654,6 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 				else
 				else
 					strcpy(fw_name, "amdgpu/vega10_smc.bin");
 					strcpy(fw_name, "amdgpu/vega10_smc.bin");
 				break;
 				break;
-			case CHIP_CARRIZO:
-			case CHIP_STONEY:
-			case CHIP_RAVEN:
-				adev->pm.fw_version = info->version;
-				return 0;
 			default:
 			default:
 				DRM_ERROR("SMC firmware not supported\n");
 				DRM_ERROR("SMC firmware not supported\n");
 				return -EINVAL;
 				return -EINVAL;
@@ -857,61 +705,6 @@ static int amdgpu_cgs_is_virtualization_enabled(void *cgs_device)
 	return amdgpu_sriov_vf(adev);
 	return amdgpu_sriov_vf(adev);
 }
 }
 
 
-static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
-					struct cgs_system_info *sys_info)
-{
-	CGS_FUNC_ADEV;
-
-	if (NULL == sys_info)
-		return -ENODEV;
-
-	if (sizeof(struct cgs_system_info) != sys_info->size)
-		return -ENODEV;
-
-	switch (sys_info->info_id) {
-	case CGS_SYSTEM_INFO_ADAPTER_BDF_ID:
-		sys_info->value = adev->pdev->devfn | (adev->pdev->bus->number << 8);
-		break;
-	case CGS_SYSTEM_INFO_PCIE_GEN_INFO:
-		sys_info->value = adev->pm.pcie_gen_mask;
-		break;
-	case CGS_SYSTEM_INFO_PCIE_MLW:
-		sys_info->value = adev->pm.pcie_mlw_mask;
-		break;
-	case CGS_SYSTEM_INFO_PCIE_DEV:
-		sys_info->value = adev->pdev->device;
-		break;
-	case CGS_SYSTEM_INFO_PCIE_REV:
-		sys_info->value = adev->pdev->revision;
-		break;
-	case CGS_SYSTEM_INFO_CG_FLAGS:
-		sys_info->value = adev->cg_flags;
-		break;
-	case CGS_SYSTEM_INFO_PG_FLAGS:
-		sys_info->value = adev->pg_flags;
-		break;
-	case CGS_SYSTEM_INFO_GFX_CU_INFO:
-		sys_info->value = adev->gfx.cu_info.number;
-		break;
-	case CGS_SYSTEM_INFO_GFX_SE_INFO:
-		sys_info->value = adev->gfx.config.max_shader_engines;
-		break;
-	case CGS_SYSTEM_INFO_PCIE_SUB_SYS_ID:
-		sys_info->value = adev->pdev->subsystem_device;
-		break;
-	case CGS_SYSTEM_INFO_PCIE_SUB_SYS_VENDOR_ID:
-		sys_info->value = adev->pdev->subsystem_vendor;
-		break;
-	case CGS_SYSTEM_INFO_PCIE_BUS_DEVFN:
-		sys_info->value = adev->pdev->devfn;
-		break;
-	default:
-		return -ENODEV;
-	}
-
-	return 0;
-}
-
 static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
 static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
 					  struct cgs_display_info *info)
 					  struct cgs_display_info *info)
 {
 {
@@ -953,6 +746,11 @@ static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
 								(amdgpu_crtc->v_border * 2);
 								(amdgpu_crtc->v_border * 2);
 					mode_info->vblank_time_us = vblank_lines * line_time_us;
 					mode_info->vblank_time_us = vblank_lines * line_time_us;
 					mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
 					mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
+					/* we have issues with mclk switching with refresh rates
+					 * over 120 hz on the non-DC code.
+					 */
+					if (mode_info->refresh_rate > 120)
+						mode_info->vblank_time_us = 0;
 					mode_info = NULL;
 					mode_info = NULL;
 				}
 				}
 			}
 			}
@@ -977,223 +775,7 @@ static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool ena
 	return 0;
 	return 0;
 }
 }
 
 
-/** \brief evaluate acpi namespace object, handle or pathname must be valid
- *  \param cgs_device
- *  \param info input/output arguments for the control method
- *  \return status
- */
-
-#if defined(CONFIG_ACPI)
-static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
-				    struct cgs_acpi_method_info *info)
-{
-	CGS_FUNC_ADEV;
-	acpi_handle handle;
-	struct acpi_object_list input;
-	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
-	union acpi_object *params, *obj;
-	uint8_t name[5] = {'\0'};
-	struct cgs_acpi_method_argument *argument;
-	uint32_t i, count;
-	acpi_status status;
-	int result;
-
-	handle = ACPI_HANDLE(&adev->pdev->dev);
-	if (!handle)
-		return -ENODEV;
-
-	memset(&input, 0, sizeof(struct acpi_object_list));
-
-	/* validate input info */
-	if (info->size != sizeof(struct cgs_acpi_method_info))
-		return -EINVAL;
-
-	input.count = info->input_count;
-	if (info->input_count > 0) {
-		if (info->pinput_argument == NULL)
-			return -EINVAL;
-		argument = info->pinput_argument;
-		for (i = 0; i < info->input_count; i++) {
-			if (((argument->type == ACPI_TYPE_STRING) ||
-			     (argument->type == ACPI_TYPE_BUFFER)) &&
-			    (argument->pointer == NULL))
-				return -EINVAL;
-			argument++;
-		}
-	}
-
-	if (info->output_count > 0) {
-		if (info->poutput_argument == NULL)
-			return -EINVAL;
-		argument = info->poutput_argument;
-		for (i = 0; i < info->output_count; i++) {
-			if (((argument->type == ACPI_TYPE_STRING) ||
-				(argument->type == ACPI_TYPE_BUFFER))
-				&& (argument->pointer == NULL))
-				return -EINVAL;
-			argument++;
-		}
-	}
-
-	/* The path name passed to acpi_evaluate_object should be null terminated */
-	if ((info->field & CGS_ACPI_FIELD_METHOD_NAME) != 0) {
-		strncpy(name, (char *)&(info->name), sizeof(uint32_t));
-		name[4] = '\0';
-	}
-
-	/* parse input parameters */
-	if (input.count > 0) {
-		input.pointer = params =
-				kzalloc(sizeof(union acpi_object) * input.count, GFP_KERNEL);
-		if (params == NULL)
-			return -EINVAL;
-
-		argument = info->pinput_argument;
-
-		for (i = 0; i < input.count; i++) {
-			params->type = argument->type;
-			switch (params->type) {
-			case ACPI_TYPE_INTEGER:
-				params->integer.value = argument->value;
-				break;
-			case ACPI_TYPE_STRING:
-				params->string.length = argument->data_length;
-				params->string.pointer = argument->pointer;
-				break;
-			case ACPI_TYPE_BUFFER:
-				params->buffer.length = argument->data_length;
-				params->buffer.pointer = argument->pointer;
-				break;
-			default:
-				break;
-			}
-			params++;
-			argument++;
-		}
-	}
-
-	/* parse output info */
-	count = info->output_count;
-	argument = info->poutput_argument;
-
-	/* evaluate the acpi method */
-	status = acpi_evaluate_object(handle, name, &input, &output);
-
-	if (ACPI_FAILURE(status)) {
-		result = -EIO;
-		goto free_input;
-	}
-
-	/* return the output info */
-	obj = output.pointer;
-
-	if (count > 1) {
-		if ((obj->type != ACPI_TYPE_PACKAGE) ||
-			(obj->package.count != count)) {
-			result = -EIO;
-			goto free_obj;
-		}
-		params = obj->package.elements;
-	} else
-		params = obj;
-
-	if (params == NULL) {
-		result = -EIO;
-		goto free_obj;
-	}
-
-	for (i = 0; i < count; i++) {
-		if (argument->type != params->type) {
-			result = -EIO;
-			goto free_obj;
-		}
-		switch (params->type) {
-		case ACPI_TYPE_INTEGER:
-			argument->value = params->integer.value;
-			break;
-		case ACPI_TYPE_STRING:
-			if ((params->string.length != argument->data_length) ||
-				(params->string.pointer == NULL)) {
-				result = -EIO;
-				goto free_obj;
-			}
-			strncpy(argument->pointer,
-				params->string.pointer,
-				params->string.length);
-			break;
-		case ACPI_TYPE_BUFFER:
-			if (params->buffer.pointer == NULL) {
-				result = -EIO;
-				goto free_obj;
-			}
-			memcpy(argument->pointer,
-				params->buffer.pointer,
-				argument->data_length);
-			break;
-		default:
-			break;
-		}
-		argument++;
-		params++;
-	}
-
-	result = 0;
-free_obj:
-	kfree(obj);
-free_input:
-	kfree((void *)input.pointer);
-	return result;
-}
-#else
-static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
-				struct cgs_acpi_method_info *info)
-{
-	return -EIO;
-}
-#endif
-
-static int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device,
-					uint32_t acpi_method,
-					uint32_t acpi_function,
-					void *pinput, void *poutput,
-					uint32_t output_count,
-					uint32_t input_size,
-					uint32_t output_size)
-{
-	struct cgs_acpi_method_argument acpi_input[2] = { {0}, {0} };
-	struct cgs_acpi_method_argument acpi_output = {0};
-	struct cgs_acpi_method_info info = {0};
-
-	acpi_input[0].type = CGS_ACPI_TYPE_INTEGER;
-	acpi_input[0].data_length = sizeof(uint32_t);
-	acpi_input[0].value = acpi_function;
-
-	acpi_input[1].type = CGS_ACPI_TYPE_BUFFER;
-	acpi_input[1].data_length = input_size;
-	acpi_input[1].pointer = pinput;
-
-	acpi_output.type = CGS_ACPI_TYPE_BUFFER;
-	acpi_output.data_length = output_size;
-	acpi_output.pointer = poutput;
-
-	info.size = sizeof(struct cgs_acpi_method_info);
-	info.field = CGS_ACPI_FIELD_METHOD_NAME | CGS_ACPI_FIELD_INPUT_ARGUMENT_COUNT;
-	info.input_count = 2;
-	info.name = acpi_method;
-	info.pinput_argument = acpi_input;
-	info.output_count = output_count;
-	info.poutput_argument = &acpi_output;
-
-	return amdgpu_cgs_acpi_eval_object(cgs_device, &info);
-}
-
 static const struct cgs_ops amdgpu_cgs_ops = {
 static const struct cgs_ops amdgpu_cgs_ops = {
-	.alloc_gpu_mem = amdgpu_cgs_alloc_gpu_mem,
-	.free_gpu_mem = amdgpu_cgs_free_gpu_mem,
-	.gmap_gpu_mem = amdgpu_cgs_gmap_gpu_mem,
-	.gunmap_gpu_mem = amdgpu_cgs_gunmap_gpu_mem,
-	.kmap_gpu_mem = amdgpu_cgs_kmap_gpu_mem,
-	.kunmap_gpu_mem = amdgpu_cgs_kunmap_gpu_mem,
 	.read_register = amdgpu_cgs_read_register,
 	.read_register = amdgpu_cgs_read_register,
 	.write_register = amdgpu_cgs_write_register,
 	.write_register = amdgpu_cgs_write_register,
 	.read_ind_register = amdgpu_cgs_read_ind_register,
 	.read_ind_register = amdgpu_cgs_read_ind_register,
@@ -1208,12 +790,9 @@ static const struct cgs_ops amdgpu_cgs_ops = {
 	.set_clockgating_state = amdgpu_cgs_set_clockgating_state,
 	.set_clockgating_state = amdgpu_cgs_set_clockgating_state,
 	.get_active_displays_info = amdgpu_cgs_get_active_displays_info,
 	.get_active_displays_info = amdgpu_cgs_get_active_displays_info,
 	.notify_dpm_enabled = amdgpu_cgs_notify_dpm_enabled,
 	.notify_dpm_enabled = amdgpu_cgs_notify_dpm_enabled,
-	.call_acpi_method = amdgpu_cgs_call_acpi_method,
-	.query_system_info = amdgpu_cgs_query_system_info,
 	.is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled,
 	.is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled,
 	.enter_safe_mode = amdgpu_cgs_enter_safe_mode,
 	.enter_safe_mode = amdgpu_cgs_enter_safe_mode,
 	.lock_grbm_idx = amdgpu_cgs_lock_grbm_idx,
 	.lock_grbm_idx = amdgpu_cgs_lock_grbm_idx,
-	.register_pp_handle = amdgpu_cgs_register_pp_handle,
 };
 };
 
 
 static const struct cgs_os_ops amdgpu_cgs_os_ops = {
 static const struct cgs_os_ops amdgpu_cgs_os_ops = {

+ 18 - 23
drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c

@@ -69,25 +69,18 @@ void amdgpu_connector_hotplug(struct drm_connector *connector)
 		/* don't do anything if sink is not display port, i.e.,
 		/* don't do anything if sink is not display port, i.e.,
 		 * passive dp->(dvi|hdmi) adaptor
 		 * passive dp->(dvi|hdmi) adaptor
 		 */
 		 */
-		if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) {
-			int saved_dpms = connector->dpms;
-			/* Only turn off the display if it's physically disconnected */
-			if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
-				drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
-			} else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
-				/* Don't try to start link training before we
-				 * have the dpcd */
-				if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
-					return;
-
-				/* set it to OFF so that drm_helper_connector_dpms()
-				 * won't return immediately since the current state
-				 * is ON at this point.
-				 */
-				connector->dpms = DRM_MODE_DPMS_OFF;
-				drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
-			}
-			connector->dpms = saved_dpms;
+		if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT &&
+		    amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd) &&
+		    amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
+			/* Don't start link training before we have the DPCD */
+			if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
+				return;
+
+			/* Turn the connector off and back on immediately, which
+			 * will trigger link training
+			 */
+			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
 		}
 		}
 	}
 	}
 }
 }
@@ -877,7 +870,7 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
 		ret = connector_status_disconnected;
 		ret = connector_status_disconnected;
 
 
 	if (amdgpu_connector->ddc_bus)
 	if (amdgpu_connector->ddc_bus)
-		dret = amdgpu_ddc_probe(amdgpu_connector, false);
+		dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
 	if (dret) {
 	if (dret) {
 		amdgpu_connector->detected_by_load = false;
 		amdgpu_connector->detected_by_load = false;
 		amdgpu_connector_free_edid(connector);
 		amdgpu_connector_free_edid(connector);
@@ -998,7 +991,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
 	}
 	}
 
 
 	if (amdgpu_connector->ddc_bus)
 	if (amdgpu_connector->ddc_bus)
-		dret = amdgpu_ddc_probe(amdgpu_connector, false);
+		dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
 	if (dret) {
 	if (dret) {
 		amdgpu_connector->detected_by_load = false;
 		amdgpu_connector->detected_by_load = false;
 		amdgpu_connector_free_edid(connector);
 		amdgpu_connector_free_edid(connector);
@@ -1401,7 +1394,8 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
 			/* setup ddc on the bridge */
 			/* setup ddc on the bridge */
 			amdgpu_atombios_encoder_setup_ext_encoder_ddc(encoder);
 			amdgpu_atombios_encoder_setup_ext_encoder_ddc(encoder);
 			/* bridge chips are always aux */
 			/* bridge chips are always aux */
-			if (amdgpu_ddc_probe(amdgpu_connector, true)) /* try DDC */
+			/* try DDC */
+			if (amdgpu_display_ddc_probe(amdgpu_connector, true))
 				ret = connector_status_connected;
 				ret = connector_status_connected;
 			else if (amdgpu_connector->dac_load_detect) { /* try load detection */
 			else if (amdgpu_connector->dac_load_detect) { /* try load detection */
 				const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
 				const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
@@ -1421,7 +1415,8 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
 					ret = connector_status_connected;
 					ret = connector_status_connected;
 			} else {
 			} else {
 				/* try non-aux ddc (DP to DVI/HDMI/etc. adapter) */
 				/* try non-aux ddc (DP to DVI/HDMI/etc. adapter) */
-				if (amdgpu_ddc_probe(amdgpu_connector, false))
+				if (amdgpu_display_ddc_probe(amdgpu_connector,
+							     false))
 					ret = connector_status_connected;
 					ret = connector_status_connected;
 			}
 			}
 		}
 		}

+ 11 - 11
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

@@ -257,7 +257,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
 		return;
 		return;
 	}
 	}
 
 
-	total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
+	total_vram = adev->gmc.real_vram_size - adev->vram_pin_size;
 	used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 	used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 	free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
 	free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
 
 
@@ -302,8 +302,8 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
 	*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
 	*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
 
 
 	/* Do the same for visible VRAM if half of it is free */
 	/* Do the same for visible VRAM if half of it is free */
-	if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
-		u64 total_vis_vram = adev->mc.visible_vram_size;
+	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size) {
+		u64 total_vis_vram = adev->gmc.visible_vram_size;
 		u64 used_vis_vram =
 		u64 used_vis_vram =
 			amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 			amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 
 
@@ -346,8 +346,8 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 	struct ttm_operation_ctx ctx = {
 	struct ttm_operation_ctx ctx = {
 		.interruptible = true,
 		.interruptible = true,
 		.no_wait_gpu = false,
 		.no_wait_gpu = false,
-		.allow_reserved_eviction = false,
-		.resv = bo->tbo.resv
+		.resv = bo->tbo.resv,
+		.flags = 0
 	};
 	};
 	uint32_t domain;
 	uint32_t domain;
 	int r;
 	int r;
@@ -359,7 +359,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 	 * to move it. Don't move anything if the threshold is zero.
 	 * to move it. Don't move anything if the threshold is zero.
 	 */
 	 */
 	if (p->bytes_moved < p->bytes_moved_threshold) {
 	if (p->bytes_moved < p->bytes_moved_threshold) {
-		if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+		if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
 		    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
 		    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
 			/* And don't move a CPU_ACCESS_REQUIRED BO to limited
 			/* And don't move a CPU_ACCESS_REQUIRED BO to limited
 			 * visible VRAM if we've depleted our allowance to do
 			 * visible VRAM if we've depleted our allowance to do
@@ -381,9 +381,9 @@ retry:
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 
 
 	p->bytes_moved += ctx.bytes_moved;
 	p->bytes_moved += ctx.bytes_moved;
-	if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
 	    bo->tbo.mem.mem_type == TTM_PL_VRAM &&
 	    bo->tbo.mem.mem_type == TTM_PL_VRAM &&
-	    bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
+	    bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
 		p->bytes_moved_vis += ctx.bytes_moved;
 		p->bytes_moved_vis += ctx.bytes_moved;
 
 
 	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
 	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
@@ -437,9 +437,9 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
 		/* Good we can try to move this BO somewhere else */
 		/* Good we can try to move this BO somewhere else */
 		amdgpu_ttm_placement_from_domain(bo, other);
 		amdgpu_ttm_placement_from_domain(bo, other);
 		update_bytes_moved_vis =
 		update_bytes_moved_vis =
-			adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+			adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
 			bo->tbo.mem.mem_type == TTM_PL_VRAM &&
 			bo->tbo.mem.mem_type == TTM_PL_VRAM &&
-			bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT;
+			bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT;
 		initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
 		initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 		bytes_moved = atomic64_read(&adev->num_bytes_moved) -
 		bytes_moved = atomic64_read(&adev->num_bytes_moved) -
@@ -542,7 +542,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	INIT_LIST_HEAD(&duplicates);
 	INIT_LIST_HEAD(&duplicates);
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
 
 
-	if (p->uf_entry.robj)
+	if (p->uf_entry.robj && !p->uf_entry.robj->parent)
 		list_add(&p->uf_entry.tv.head, &p->validated);
 		list_add(&p->uf_entry.tv.head, &p->validated);
 
 
 	while (1) {
 	while (1) {

+ 12 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c

@@ -767,10 +767,21 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data)
 	return 0;
 	return 0;
 }
 }
 
 
+static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+
+	seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT));
+	return 0;
+}
+
 static const struct drm_info_list amdgpu_debugfs_list[] = {
 static const struct drm_info_list amdgpu_debugfs_list[] = {
 	{"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump},
 	{"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump},
 	{"amdgpu_test_ib", &amdgpu_debugfs_test_ib},
 	{"amdgpu_test_ib", &amdgpu_debugfs_test_ib},
-	{"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram}
+	{"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram},
+	{"amdgpu_evict_gtt", &amdgpu_debugfs_evict_gtt},
 };
 };
 
 
 int amdgpu_debugfs_init(struct amdgpu_device *adev)
 int amdgpu_debugfs_init(struct amdgpu_device *adev)

+ 166 - 123
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

@@ -87,6 +87,8 @@ static const char *amdgpu_asic_name[] = {
 	"LAST",
 	"LAST",
 };
 };
 
 
+static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
+
 bool amdgpu_device_is_px(struct drm_device *dev)
 bool amdgpu_device_is_px(struct drm_device *dev)
 {
 {
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_device *adev = dev->dev_private;
@@ -121,6 +123,32 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
 	return ret;
 	return ret;
 }
 }
 
 
+/*
+ * MMIO register read with bytes helper functions
+ * @offset:bytes offset from MMIO start
+ *
+*/
+
+uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
+	if (offset < adev->rmmio_size)
+		return (readb(adev->rmmio + offset));
+	BUG();
+}
+
+/*
+ * MMIO register write with bytes helper functions
+ * @offset:bytes offset from MMIO start
+ * @value: the value want to be written to the register
+ *
+*/
+void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
+	if (offset < adev->rmmio_size)
+		writeb(value, adev->rmmio + offset);
+	else
+		BUG();
+}
+
+
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 		    uint32_t acc_flags)
 		    uint32_t acc_flags)
 {
 {
@@ -492,7 +520,7 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev)
 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
 
 
 		/* clear wb memory */
 		/* clear wb memory */
-		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t));
+		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
 	}
 	}
 
 
 	return 0;
 	return 0;
@@ -530,8 +558,9 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
  */
  */
 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
 {
 {
+	wb >>= 3;
 	if (wb < adev->wb.num_wb)
 	if (wb < adev->wb.num_wb)
-		__clear_bit(wb >> 3, adev->wb.used);
+		__clear_bit(wb, adev->wb.used);
 }
 }
 
 
 /**
 /**
@@ -544,7 +573,7 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
  * as parameter.
  * as parameter.
  */
  */
 void amdgpu_device_vram_location(struct amdgpu_device *adev,
 void amdgpu_device_vram_location(struct amdgpu_device *adev,
-				 struct amdgpu_mc *mc, u64 base)
+				 struct amdgpu_gmc *mc, u64 base)
 {
 {
 	uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
 	uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
 
 
@@ -570,11 +599,11 @@ void amdgpu_device_vram_location(struct amdgpu_device *adev,
  * FIXME: when reducing GTT size align new size on power of 2.
  * FIXME: when reducing GTT size align new size on power of 2.
  */
  */
 void amdgpu_device_gart_location(struct amdgpu_device *adev,
 void amdgpu_device_gart_location(struct amdgpu_device *adev,
-				 struct amdgpu_mc *mc)
+				 struct amdgpu_gmc *mc)
 {
 {
 	u64 size_af, size_bf;
 	u64 size_af, size_bf;
 
 
-	size_af = adev->mc.mc_mask - mc->vram_end;
+	size_af = adev->gmc.mc_mask - mc->vram_end;
 	size_bf = mc->vram_start;
 	size_bf = mc->vram_start;
 	if (size_bf > size_af) {
 	if (size_bf > size_af) {
 		if (mc->gart_size > size_bf) {
 		if (mc->gart_size > size_bf) {
@@ -608,7 +637,7 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev,
  */
  */
 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
 {
 {
-	u64 space_needed = roundup_pow_of_two(adev->mc.real_vram_size);
+	u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
 	u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
 	u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
 	struct pci_bus *root;
 	struct pci_bus *root;
 	struct resource *res;
 	struct resource *res;
@@ -829,6 +858,8 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
 		dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
 		dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
 		amdgpu_lockup_timeout = 10000;
 		amdgpu_lockup_timeout = 10000;
 	}
 	}
+
+	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
 }
 }
 
 
 /**
 /**
@@ -1036,7 +1067,7 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
 	if (!ip_block_version)
 	if (!ip_block_version)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	DRM_DEBUG("add ip block number %d <%s>\n", adev->num_ip_blocks,
+	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
 		  ip_block_version->funcs->name);
 		  ip_block_version->funcs->name);
 
 
 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
@@ -1310,6 +1341,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 			return r;
 			return r;
 		}
 		}
 		adev->ip_blocks[i].status.sw = true;
 		adev->ip_blocks[i].status.sw = true;
+
 		/* need to do gmc hw init early so we can allocate gpu mem */
 		/* need to do gmc hw init early so we can allocate gpu mem */
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
 			r = amdgpu_device_vram_scratch_init(adev);
 			r = amdgpu_device_vram_scratch_init(adev);
@@ -1343,8 +1375,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.sw)
 		if (!adev->ip_blocks[i].status.sw)
 			continue;
 			continue;
-		/* gmc hw init is done early */
-		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC)
+		if (adev->ip_blocks[i].status.hw)
 			continue;
 			continue;
 		r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
 		r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
 		if (r) {
 		if (r) {
@@ -1378,12 +1409,16 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
 {
 {
 	int i = 0, r;
 	int i = 0, r;
 
 
+	if (amdgpu_emu_mode == 1)
+		return 0;
+
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.valid)
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
 			continue;
 		/* skip CG for VCE/UVD, it's handled specially */
 		/* skip CG for VCE/UVD, it's handled specially */
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
-		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) {
+		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
 			/* enable clockgating to save power */
 			/* enable clockgating to save power */
 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
 										     AMD_CG_STATE_GATE);
 										     AMD_CG_STATE_GATE);
@@ -1432,7 +1467,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.hw)
 		if (!adev->ip_blocks[i].status.hw)
 			continue;
 			continue;
-		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC &&
+			adev->ip_blocks[i].version->funcs->set_clockgating_state) {
 			/* ungate blocks before hw fini so that we can shutdown the blocks safely */
 			/* ungate blocks before hw fini so that we can shutdown the blocks safely */
 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
 										     AMD_CG_STATE_UNGATE);
 										     AMD_CG_STATE_UNGATE);
@@ -1455,11 +1491,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
 		if (!adev->ip_blocks[i].status.hw)
 		if (!adev->ip_blocks[i].status.hw)
 			continue;
 			continue;
-		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
-			amdgpu_free_static_csa(adev);
-			amdgpu_device_wb_fini(adev);
-			amdgpu_device_vram_scratch_fini(adev);
-		}
 
 
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
 			adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) {
 			adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) {
@@ -1483,9 +1514,19 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 		adev->ip_blocks[i].status.hw = false;
 		adev->ip_blocks[i].status.hw = false;
 	}
 	}
 
 
+	/* disable all interrupts */
+	amdgpu_irq_disable_all(adev);
+
 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
 		if (!adev->ip_blocks[i].status.sw)
 		if (!adev->ip_blocks[i].status.sw)
 			continue;
 			continue;
+
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+			amdgpu_free_static_csa(adev);
+			amdgpu_device_wb_fini(adev);
+			amdgpu_device_vram_scratch_fini(adev);
+		}
+
 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
 		/* XXX handle errors */
 		/* XXX handle errors */
 		if (r) {
 		if (r) {
@@ -1536,7 +1577,8 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
 		if (!adev->ip_blocks[i].status.valid)
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
 			continue;
 		/* ungate blocks so that suspend can properly shut them down */
 		/* ungate blocks so that suspend can properly shut them down */
-		if (i != AMD_IP_BLOCK_TYPE_SMC) {
+		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_SMC &&
+			adev->ip_blocks[i].version->funcs->set_clockgating_state) {
 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
 										     AMD_CG_STATE_UNGATE);
 										     AMD_CG_STATE_UNGATE);
 			if (r) {
 			if (r) {
@@ -1582,6 +1624,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
 
 
 			r = block->version->funcs->hw_init(adev);
 			r = block->version->funcs->hw_init(adev);
 			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
 			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
+			if (r)
+				return r;
 		}
 		}
 	}
 	}
 
 
@@ -1615,6 +1659,8 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
 
 
 			r = block->version->funcs->hw_init(adev);
 			r = block->version->funcs->hw_init(adev);
 			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
 			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
+			if (r)
+				return r;
 		}
 		}
 	}
 	}
 
 
@@ -1701,6 +1747,8 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
 	case CHIP_BONAIRE:
 	case CHIP_BONAIRE:
 	case CHIP_HAWAII:
 	case CHIP_HAWAII:
 	case CHIP_KAVERI:
 	case CHIP_KAVERI:
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
 	case CHIP_CARRIZO:
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
 	case CHIP_STONEY:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS11:
@@ -1711,9 +1759,6 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
 #if defined(CONFIG_DRM_AMD_DC_PRE_VEGA)
 #if defined(CONFIG_DRM_AMD_DC_PRE_VEGA)
 		return amdgpu_dc != 0;
 		return amdgpu_dc != 0;
 #endif
 #endif
-	case CHIP_KABINI:
-	case CHIP_MULLINS:
-		return amdgpu_dc > 0;
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
 	case CHIP_RAVEN:
 	case CHIP_RAVEN:
@@ -1768,14 +1813,16 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	adev->flags = flags;
 	adev->flags = flags;
 	adev->asic_type = flags & AMD_ASIC_MASK;
 	adev->asic_type = flags & AMD_ASIC_MASK;
 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
-	adev->mc.gart_size = 512 * 1024 * 1024;
+	if (amdgpu_emu_mode == 1)
+		adev->usec_timeout *= 2;
+	adev->gmc.gart_size = 512 * 1024 * 1024;
 	adev->accel_working = false;
 	adev->accel_working = false;
 	adev->num_rings = 0;
 	adev->num_rings = 0;
 	adev->mman.buffer_funcs = NULL;
 	adev->mman.buffer_funcs = NULL;
 	adev->mman.buffer_funcs_ring = NULL;
 	adev->mman.buffer_funcs_ring = NULL;
 	adev->vm_manager.vm_pte_funcs = NULL;
 	adev->vm_manager.vm_pte_funcs = NULL;
 	adev->vm_manager.vm_pte_num_rings = 0;
 	adev->vm_manager.vm_pte_num_rings = 0;
-	adev->gart.gart_funcs = NULL;
+	adev->gmc.gmc_funcs = NULL;
 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
 
 
@@ -1864,6 +1911,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	if (adev->rio_mem == NULL)
 	if (adev->rio_mem == NULL)
 		DRM_INFO("PCI I/O BAR is not found.\n");
 		DRM_INFO("PCI I/O BAR is not found.\n");
 
 
+	amdgpu_device_get_pcie_info(adev);
+
 	/* early init functions */
 	/* early init functions */
 	r = amdgpu_device_ip_early_init(adev);
 	r = amdgpu_device_ip_early_init(adev);
 	if (r)
 	if (r)
@@ -1882,6 +1931,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	if (runtime)
 	if (runtime)
 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
 
 
+	if (amdgpu_emu_mode == 1) {
+		/* post the asic on emulation mode */
+		emu_soc_asic_init(adev);
+		goto fence_driver_init;
+	}
+
 	/* Read BIOS */
 	/* Read BIOS */
 	if (!amdgpu_get_bios(adev)) {
 	if (!amdgpu_get_bios(adev)) {
 		r = -EINVAL;
 		r = -EINVAL;
@@ -1934,6 +1989,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 			amdgpu_atombios_i2c_init(adev);
 			amdgpu_atombios_i2c_init(adev);
 	}
 	}
 
 
+fence_driver_init:
 	/* Fence driver */
 	/* Fence driver */
 	r = amdgpu_fence_driver_init(adev);
 	r = amdgpu_fence_driver_init(adev);
 	if (r) {
 	if (r) {
@@ -2065,6 +2121,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 
 
 	amdgpu_ib_pool_fini(adev);
 	amdgpu_ib_pool_fini(adev);
 	amdgpu_fence_driver_fini(adev);
 	amdgpu_fence_driver_fini(adev);
+	amdgpu_pm_sysfs_fini(adev);
 	amdgpu_fbdev_fini(adev);
 	amdgpu_fbdev_fini(adev);
 	r = amdgpu_device_ip_fini(adev);
 	r = amdgpu_device_ip_fini(adev);
 	if (adev->firmware.gpu_info_fw) {
 	if (adev->firmware.gpu_info_fw) {
@@ -2076,7 +2133,10 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	/* free i2c buses */
 	/* free i2c buses */
 	if (!amdgpu_device_has_dc_support(adev))
 	if (!amdgpu_device_has_dc_support(adev))
 		amdgpu_i2c_fini(adev);
 		amdgpu_i2c_fini(adev);
-	amdgpu_atombios_fini(adev);
+
+	if (amdgpu_emu_mode != 1)
+		amdgpu_atombios_fini(adev);
+
 	kfree(adev->bios);
 	kfree(adev->bios);
 	adev->bios = NULL;
 	adev->bios = NULL;
 	if (!pci_is_thunderbolt_attached(adev->pdev))
 	if (!pci_is_thunderbolt_attached(adev->pdev))
@@ -2090,7 +2150,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	iounmap(adev->rmmio);
 	iounmap(adev->rmmio);
 	adev->rmmio = NULL;
 	adev->rmmio = NULL;
 	amdgpu_device_doorbell_fini(adev);
 	amdgpu_device_doorbell_fini(adev);
-	amdgpu_pm_sysfs_fini(adev);
 	amdgpu_debugfs_regs_cleanup(adev);
 	amdgpu_debugfs_regs_cleanup(adev);
 }
 }
 
 
@@ -2284,14 +2343,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 				drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
 				drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
 			}
 			}
 			drm_modeset_unlock_all(dev);
 			drm_modeset_unlock_all(dev);
-		} else {
-			/*
-			 * There is no equivalent atomic helper to turn on
-			 * display, so we defined our own function for this,
-			 * once suspend resume is supported by the atomic
-			 * framework this will be reworked
-			 */
-			amdgpu_dm_display_resume(adev);
 		}
 		}
 	}
 	}
 
 
@@ -2458,17 +2509,71 @@ err:
 	return r;
 	return r;
 }
 }
 
 
+static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+	struct amdgpu_bo *bo, *tmp;
+	struct dma_fence *fence = NULL, *next = NULL;
+	long r = 1;
+	int i = 0;
+	long tmo;
+
+	if (amdgpu_sriov_runtime(adev))
+		tmo = msecs_to_jiffies(amdgpu_lockup_timeout);
+	else
+		tmo = msecs_to_jiffies(100);
+
+	DRM_INFO("recover vram bo from shadow start\n");
+	mutex_lock(&adev->shadow_list_lock);
+	list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
+		next = NULL;
+		amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
+		if (fence) {
+			r = dma_fence_wait_timeout(fence, false, tmo);
+			if (r == 0)
+				pr_err("wait fence %p[%d] timeout\n", fence, i);
+			else if (r < 0)
+				pr_err("wait fence %p[%d] interrupted\n", fence, i);
+			if (r < 1) {
+				dma_fence_put(fence);
+				fence = next;
+				break;
+			}
+			i++;
+		}
+
+		dma_fence_put(fence);
+		fence = next;
+	}
+	mutex_unlock(&adev->shadow_list_lock);
+
+	if (fence) {
+		r = dma_fence_wait_timeout(fence, false, tmo);
+		if (r == 0)
+			pr_err("wait fence %p[%d] timeout\n", fence, i);
+		else if (r < 0)
+			pr_err("wait fence %p[%d] interrupted\n", fence, i);
+
+	}
+	dma_fence_put(fence);
+
+	if (r > 0)
+		DRM_INFO("recover vram bo from shadow done\n");
+	else
+		DRM_ERROR("recover vram bo from shadow failed\n");
+
+	return (r > 0?0:1);
+}
+
 /*
 /*
  * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough
  * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough
  *
  *
  * @adev: amdgpu device pointer
  * @adev: amdgpu device pointer
- * @reset_flags: output param tells caller the reset result
  *
  *
  * attempt to do soft-reset or full-reset and reinitialize Asic
  * attempt to do soft-reset or full-reset and reinitialize Asic
  * return 0 means successed otherwise failed
  * return 0 means successed otherwise failed
 */
 */
-static int amdgpu_device_reset(struct amdgpu_device *adev,
-			       uint64_t* reset_flags)
+static int amdgpu_device_reset(struct amdgpu_device *adev)
 {
 {
 	bool need_full_reset, vram_lost = 0;
 	bool need_full_reset, vram_lost = 0;
 	int r;
 	int r;
@@ -2483,7 +2588,6 @@ static int amdgpu_device_reset(struct amdgpu_device *adev,
 			DRM_INFO("soft reset failed, will fallback to full reset!\n");
 			DRM_INFO("soft reset failed, will fallback to full reset!\n");
 			need_full_reset = true;
 			need_full_reset = true;
 		}
 		}
-
 	}
 	}
 
 
 	if (need_full_reset) {
 	if (need_full_reset) {
@@ -2532,13 +2636,8 @@ out:
 		}
 		}
 	}
 	}
 
 
-	if (reset_flags) {
-		if (vram_lost)
-			(*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST;
-
-		if (need_full_reset)
-			(*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET;
-	}
+	if (!r && ((need_full_reset && !(adev->flags & AMD_IS_APU)) || vram_lost))
+		r = amdgpu_device_handle_vram_lost(adev);
 
 
 	return r;
 	return r;
 }
 }
@@ -2547,14 +2646,11 @@ out:
  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
  *
  *
  * @adev: amdgpu device pointer
  * @adev: amdgpu device pointer
- * @reset_flags: output param tells caller the reset result
  *
  *
  * do VF FLR and reinitialize Asic
  * do VF FLR and reinitialize Asic
  * return 0 means successed otherwise failed
  * return 0 means successed otherwise failed
 */
 */
-static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
-				     uint64_t *reset_flags,
-				     bool from_hypervisor)
+static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, bool from_hypervisor)
 {
 {
 	int r;
 	int r;
 
 
@@ -2575,28 +2671,20 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
 
 
 	/* now we are okay to resume SMC/CP/SDMA */
 	/* now we are okay to resume SMC/CP/SDMA */
 	r = amdgpu_device_ip_reinit_late_sriov(adev);
 	r = amdgpu_device_ip_reinit_late_sriov(adev);
+	amdgpu_virt_release_full_gpu(adev, true);
 	if (r)
 	if (r)
 		goto error;
 		goto error;
 
 
 	amdgpu_irq_gpu_reset_resume_helper(adev);
 	amdgpu_irq_gpu_reset_resume_helper(adev);
 	r = amdgpu_ib_ring_tests(adev);
 	r = amdgpu_ib_ring_tests(adev);
-	if (r)
-		dev_err(adev->dev, "[GPU_RESET] ib ring test failed (%d).\n", r);
-
-error:
-	/* release full control of GPU after ib test */
-	amdgpu_virt_release_full_gpu(adev, true);
 
 
-	if (reset_flags) {
-		if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
-			(*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST;
-			atomic_inc(&adev->vram_lost_counter);
-		}
-
-		/* VF FLR or hotlink reset is always full-reset */
-		(*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET;
+	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
+		atomic_inc(&adev->vram_lost_counter);
+		r = amdgpu_device_handle_vram_lost(adev);
 	}
 	}
 
 
+error:
+
 	return r;
 	return r;
 }
 }
 
 
@@ -2614,7 +2702,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 			      struct amdgpu_job *job, bool force)
 			      struct amdgpu_job *job, bool force)
 {
 {
 	struct drm_atomic_state *state = NULL;
 	struct drm_atomic_state *state = NULL;
-	uint64_t reset_flags = 0;
 	int i, r, resched;
 	int i, r, resched;
 
 
 	if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
 	if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
@@ -2636,22 +2723,23 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
 
 	/* block TTM */
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
 	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
+
 	/* store modesetting */
 	/* store modesetting */
 	if (amdgpu_device_has_dc_support(adev))
 	if (amdgpu_device_has_dc_support(adev))
 		state = drm_atomic_helper_suspend(adev->ddev);
 		state = drm_atomic_helper_suspend(adev->ddev);
 
 
-	/* block scheduler */
+	/* block all schedulers and reset given job's ring */
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
 		struct amdgpu_ring *ring = adev->rings[i];
 
 
 		if (!ring || !ring->sched.thread)
 		if (!ring || !ring->sched.thread)
 			continue;
 			continue;
 
 
-		/* only focus on the ring hit timeout if &job not NULL */
+		kthread_park(ring->sched.thread);
+
 		if (job && job->ring->idx != i)
 		if (job && job->ring->idx != i)
 			continue;
 			continue;
 
 
-		kthread_park(ring->sched.thread);
 		drm_sched_hw_job_reset(&ring->sched, &job->base);
 		drm_sched_hw_job_reset(&ring->sched, &job->base);
 
 
 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
@@ -2659,74 +2747,29 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	}
 	}
 
 
 	if (amdgpu_sriov_vf(adev))
 	if (amdgpu_sriov_vf(adev))
-		r = amdgpu_device_reset_sriov(adev, &reset_flags, job ? false : true);
+		r = amdgpu_device_reset_sriov(adev, job ? false : true);
 	else
 	else
-		r = amdgpu_device_reset(adev, &reset_flags);
+		r = amdgpu_device_reset(adev);
 
 
-	if (!r) {
-		if (((reset_flags & AMDGPU_RESET_INFO_FULLRESET) && !(adev->flags & AMD_IS_APU)) ||
-			(reset_flags & AMDGPU_RESET_INFO_VRAM_LOST)) {
-			struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
-			struct amdgpu_bo *bo, *tmp;
-			struct dma_fence *fence = NULL, *next = NULL;
-
-			DRM_INFO("recover vram bo from shadow\n");
-			mutex_lock(&adev->shadow_list_lock);
-			list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
-				next = NULL;
-				amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
-				if (fence) {
-					r = dma_fence_wait(fence, false);
-					if (r) {
-						WARN(r, "recovery from shadow isn't completed\n");
-						break;
-					}
-				}
-
-				dma_fence_put(fence);
-				fence = next;
-			}
-			mutex_unlock(&adev->shadow_list_lock);
-			if (fence) {
-				r = dma_fence_wait(fence, false);
-				if (r)
-					WARN(r, "recovery from shadow isn't completed\n");
-			}
-			dma_fence_put(fence);
-		}
-
-		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-			struct amdgpu_ring *ring = adev->rings[i];
-
-			if (!ring || !ring->sched.thread)
-				continue;
+	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+		struct amdgpu_ring *ring = adev->rings[i];
 
 
-			/* only focus on the ring hit timeout if &job not NULL */
-			if (job && job->ring->idx != i)
-				continue;
+		if (!ring || !ring->sched.thread)
+			continue;
 
 
+		/* only need recovery sched of the given job's ring
+		 * or all rings (in the case @job is NULL)
+		 * after above amdgpu_reset accomplished
+		 */
+		if ((!job || job->ring->idx == i) && !r)
 			drm_sched_job_recovery(&ring->sched);
 			drm_sched_job_recovery(&ring->sched);
-			kthread_unpark(ring->sched.thread);
-		}
-	} else {
-		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-			struct amdgpu_ring *ring = adev->rings[i];
-
-			if (!ring || !ring->sched.thread)
-				continue;
 
 
-			/* only focus on the ring hit timeout if &job not NULL */
-			if (job && job->ring->idx != i)
-				continue;
-
-			kthread_unpark(adev->rings[i]->sched.thread);
-		}
+		kthread_unpark(ring->sched.thread);
 	}
 	}
 
 
 	if (amdgpu_device_has_dc_support(adev)) {
 	if (amdgpu_device_has_dc_support(adev)) {
 		if (drm_atomic_helper_resume(adev->ddev, state))
 		if (drm_atomic_helper_resume(adev->ddev, state))
 			dev_info(adev->dev, "drm resume failed:%d\n", r);
 			dev_info(adev->dev, "drm resume failed:%d\n", r);
-		amdgpu_dm_display_resume(adev);
 	} else {
 	} else {
 		drm_helper_resume_force_mode(adev->ddev);
 		drm_helper_resume_force_mode(adev->ddev);
 	}
 	}
@@ -2747,7 +2790,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	return r;
 	return r;
 }
 }
 
 
-void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
+static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
 {
 {
 	u32 mask;
 	u32 mask;
 	int ret;
 	int ret;

+ 72 - 55
drivers/gpu/drm/amd/amdgpu/amdgpu_display.c

@@ -29,6 +29,7 @@
 #include "amdgpu_i2c.h"
 #include "amdgpu_i2c.h"
 #include "atom.h"
 #include "atom.h"
 #include "amdgpu_connectors.h"
 #include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
 #include <asm/div64.h>
 #include <asm/div64.h>
 
 
 #include <linux/pm_runtime.h>
 #include <linux/pm_runtime.h>
@@ -36,7 +37,8 @@
 #include <drm/drm_edid.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_fb_helper.h>
 
 
-static void amdgpu_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb)
+static void amdgpu_display_flip_callback(struct dma_fence *f,
+					 struct dma_fence_cb *cb)
 {
 {
 	struct amdgpu_flip_work *work =
 	struct amdgpu_flip_work *work =
 		container_of(cb, struct amdgpu_flip_work, cb);
 		container_of(cb, struct amdgpu_flip_work, cb);
@@ -45,8 +47,8 @@ static void amdgpu_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb)
 	schedule_work(&work->flip_work.work);
 	schedule_work(&work->flip_work.work);
 }
 }
 
 
-static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work,
-				     struct dma_fence **f)
+static bool amdgpu_display_flip_handle_fence(struct amdgpu_flip_work *work,
+					     struct dma_fence **f)
 {
 {
 	struct dma_fence *fence= *f;
 	struct dma_fence *fence= *f;
 
 
@@ -55,14 +57,15 @@ static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work,
 
 
 	*f = NULL;
 	*f = NULL;
 
 
-	if (!dma_fence_add_callback(fence, &work->cb, amdgpu_flip_callback))
+	if (!dma_fence_add_callback(fence, &work->cb,
+				    amdgpu_display_flip_callback))
 		return true;
 		return true;
 
 
 	dma_fence_put(fence);
 	dma_fence_put(fence);
 	return false;
 	return false;
 }
 }
 
 
-static void amdgpu_flip_work_func(struct work_struct *__work)
+static void amdgpu_display_flip_work_func(struct work_struct *__work)
 {
 {
 	struct delayed_work *delayed_work =
 	struct delayed_work *delayed_work =
 		container_of(__work, struct delayed_work, work);
 		container_of(__work, struct delayed_work, work);
@@ -76,20 +79,20 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
 	unsigned i;
 	unsigned i;
 	int vpos, hpos;
 	int vpos, hpos;
 
 
-	if (amdgpu_flip_handle_fence(work, &work->excl))
+	if (amdgpu_display_flip_handle_fence(work, &work->excl))
 		return;
 		return;
 
 
 	for (i = 0; i < work->shared_count; ++i)
 	for (i = 0; i < work->shared_count; ++i)
-		if (amdgpu_flip_handle_fence(work, &work->shared[i]))
+		if (amdgpu_display_flip_handle_fence(work, &work->shared[i]))
 			return;
 			return;
 
 
 	/* Wait until we're out of the vertical blank period before the one
 	/* Wait until we're out of the vertical blank period before the one
 	 * targeted by the flip
 	 * targeted by the flip
 	 */
 	 */
 	if (amdgpu_crtc->enabled &&
 	if (amdgpu_crtc->enabled &&
-	    (amdgpu_get_crtc_scanoutpos(adev->ddev, work->crtc_id, 0,
-					&vpos, &hpos, NULL, NULL,
-					&crtc->hwmode)
+	    (amdgpu_display_get_crtc_scanoutpos(adev->ddev, work->crtc_id, 0,
+						&vpos, &hpos, NULL, NULL,
+						&crtc->hwmode)
 	     & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) ==
 	     & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) ==
 	    (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) &&
 	    (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) &&
 	    (int)(work->target_vblank -
 	    (int)(work->target_vblank -
@@ -117,7 +120,7 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
 /*
 /*
  * Handle unpin events outside the interrupt handler proper.
  * Handle unpin events outside the interrupt handler proper.
  */
  */
-static void amdgpu_unpin_work_func(struct work_struct *__work)
+static void amdgpu_display_unpin_work_func(struct work_struct *__work)
 {
 {
 	struct amdgpu_flip_work *work =
 	struct amdgpu_flip_work *work =
 		container_of(__work, struct amdgpu_flip_work, unpin_work);
 		container_of(__work, struct amdgpu_flip_work, unpin_work);
@@ -139,11 +142,11 @@ static void amdgpu_unpin_work_func(struct work_struct *__work)
 	kfree(work);
 	kfree(work);
 }
 }
 
 
-int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
-				 struct drm_framebuffer *fb,
-				 struct drm_pending_vblank_event *event,
-				 uint32_t page_flip_flags, uint32_t target,
-				 struct drm_modeset_acquire_ctx *ctx)
+int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
+				struct drm_framebuffer *fb,
+				struct drm_pending_vblank_event *event,
+				uint32_t page_flip_flags, uint32_t target,
+				struct drm_modeset_acquire_ctx *ctx)
 {
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_device *dev = crtc->dev;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_device *adev = dev->dev_private;
@@ -162,8 +165,8 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
 	if (work == NULL)
 	if (work == NULL)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
-	INIT_DELAYED_WORK(&work->flip_work, amdgpu_flip_work_func);
-	INIT_WORK(&work->unpin_work, amdgpu_unpin_work_func);
+	INIT_DELAYED_WORK(&work->flip_work, amdgpu_display_flip_work_func);
+	INIT_WORK(&work->unpin_work, amdgpu_display_unpin_work_func);
 
 
 	work->event = event;
 	work->event = event;
 	work->adev = adev;
 	work->adev = adev;
@@ -189,7 +192,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
 		goto cleanup;
 		goto cleanup;
 	}
 	}
 
 
-	r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_VRAM, &base);
+	r = amdgpu_bo_pin(new_abo, amdgpu_display_framebuffer_domains(adev), &base);
 	if (unlikely(r != 0)) {
 	if (unlikely(r != 0)) {
 		DRM_ERROR("failed to pin new abo buffer before flip\n");
 		DRM_ERROR("failed to pin new abo buffer before flip\n");
 		goto unreserve;
 		goto unreserve;
@@ -207,7 +210,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
 	amdgpu_bo_unreserve(new_abo);
 	amdgpu_bo_unreserve(new_abo);
 
 
 	work->base = base;
 	work->base = base;
-	work->target_vblank = target - drm_crtc_vblank_count(crtc) +
+	work->target_vblank = target - (uint32_t)drm_crtc_vblank_count(crtc) +
 		amdgpu_get_vblank_counter_kms(dev, work->crtc_id);
 		amdgpu_get_vblank_counter_kms(dev, work->crtc_id);
 
 
 	/* we borrow the event spin lock for protecting flip_wrok */
 	/* we borrow the event spin lock for protecting flip_wrok */
@@ -228,7 +231,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
 	/* update crtc fb */
 	/* update crtc fb */
 	crtc->primary->fb = fb;
 	crtc->primary->fb = fb;
 	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
-	amdgpu_flip_work_func(&work->flip_work.work);
+	amdgpu_display_flip_work_func(&work->flip_work.work);
 	return 0;
 	return 0;
 
 
 pflip_cleanup:
 pflip_cleanup:
@@ -254,8 +257,8 @@ cleanup:
 	return r;
 	return r;
 }
 }
 
 
-int amdgpu_crtc_set_config(struct drm_mode_set *set,
-			   struct drm_modeset_acquire_ctx *ctx)
+int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
+				   struct drm_modeset_acquire_ctx *ctx)
 {
 {
 	struct drm_device *dev;
 	struct drm_device *dev;
 	struct amdgpu_device *adev;
 	struct amdgpu_device *adev;
@@ -352,7 +355,7 @@ static const char *hpd_names[6] = {
 	"HPD6",
 	"HPD6",
 };
 };
 
 
-void amdgpu_print_display_setup(struct drm_device *dev)
+void amdgpu_display_print_display_setup(struct drm_device *dev)
 {
 {
 	struct drm_connector *connector;
 	struct drm_connector *connector;
 	struct amdgpu_connector *amdgpu_connector;
 	struct amdgpu_connector *amdgpu_connector;
@@ -429,11 +432,11 @@ void amdgpu_print_display_setup(struct drm_device *dev)
 }
 }
 
 
 /**
 /**
- * amdgpu_ddc_probe
+ * amdgpu_display_ddc_probe
  *
  *
  */
  */
-bool amdgpu_ddc_probe(struct amdgpu_connector *amdgpu_connector,
-		       bool use_aux)
+bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
+			      bool use_aux)
 {
 {
 	u8 out = 0x0;
 	u8 out = 0x0;
 	u8 buf[8];
 	u8 buf[8];
@@ -479,7 +482,7 @@ bool amdgpu_ddc_probe(struct amdgpu_connector *amdgpu_connector,
 	return true;
 	return true;
 }
 }
 
 
-static void amdgpu_user_framebuffer_destroy(struct drm_framebuffer *fb)
+static void amdgpu_display_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 {
 	struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
 	struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
 
 
@@ -488,9 +491,10 @@ static void amdgpu_user_framebuffer_destroy(struct drm_framebuffer *fb)
 	kfree(amdgpu_fb);
 	kfree(amdgpu_fb);
 }
 }
 
 
-static int amdgpu_user_framebuffer_create_handle(struct drm_framebuffer *fb,
-						  struct drm_file *file_priv,
-						  unsigned int *handle)
+static int amdgpu_display_user_framebuffer_create_handle(
+			struct drm_framebuffer *fb,
+			struct drm_file *file_priv,
+			unsigned int *handle)
 {
 {
 	struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
 	struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
 
 
@@ -498,15 +502,28 @@ static int amdgpu_user_framebuffer_create_handle(struct drm_framebuffer *fb,
 }
 }
 
 
 static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
 static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
-	.destroy = amdgpu_user_framebuffer_destroy,
-	.create_handle = amdgpu_user_framebuffer_create_handle,
+	.destroy = amdgpu_display_user_framebuffer_destroy,
+	.create_handle = amdgpu_display_user_framebuffer_create_handle,
 };
 };
 
 
-int
-amdgpu_framebuffer_init(struct drm_device *dev,
-			struct amdgpu_framebuffer *rfb,
-			const struct drm_mode_fb_cmd2 *mode_cmd,
-			struct drm_gem_object *obj)
+uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev)
+{
+	uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
+
+#if defined(CONFIG_DRM_AMD_DC)
+	if (adev->asic_type >= CHIP_CARRIZO && adev->asic_type < CHIP_RAVEN &&
+	    adev->flags & AMD_IS_APU &&
+	    amdgpu_device_asic_has_dc_support(adev->asic_type))
+		domain |= AMDGPU_GEM_DOMAIN_GTT;
+#endif
+
+	return domain;
+}
+
+int amdgpu_display_framebuffer_init(struct drm_device *dev,
+				    struct amdgpu_framebuffer *rfb,
+				    const struct drm_mode_fb_cmd2 *mode_cmd,
+				    struct drm_gem_object *obj)
 {
 {
 	int ret;
 	int ret;
 	rfb->obj = obj;
 	rfb->obj = obj;
@@ -520,9 +537,9 @@ amdgpu_framebuffer_init(struct drm_device *dev,
 }
 }
 
 
 struct drm_framebuffer *
 struct drm_framebuffer *
-amdgpu_user_framebuffer_create(struct drm_device *dev,
-			       struct drm_file *file_priv,
-			       const struct drm_mode_fb_cmd2 *mode_cmd)
+amdgpu_display_user_framebuffer_create(struct drm_device *dev,
+				       struct drm_file *file_priv,
+				       const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 {
 	struct drm_gem_object *obj;
 	struct drm_gem_object *obj;
 	struct amdgpu_framebuffer *amdgpu_fb;
 	struct amdgpu_framebuffer *amdgpu_fb;
@@ -547,7 +564,7 @@ amdgpu_user_framebuffer_create(struct drm_device *dev,
 		return ERR_PTR(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
 	}
 	}
 
 
-	ret = amdgpu_framebuffer_init(dev, amdgpu_fb, mode_cmd, obj);
+	ret = amdgpu_display_framebuffer_init(dev, amdgpu_fb, mode_cmd, obj);
 	if (ret) {
 	if (ret) {
 		kfree(amdgpu_fb);
 		kfree(amdgpu_fb);
 		drm_gem_object_put_unlocked(obj);
 		drm_gem_object_put_unlocked(obj);
@@ -558,7 +575,7 @@ amdgpu_user_framebuffer_create(struct drm_device *dev,
 }
 }
 
 
 const struct drm_mode_config_funcs amdgpu_mode_funcs = {
 const struct drm_mode_config_funcs amdgpu_mode_funcs = {
-	.fb_create = amdgpu_user_framebuffer_create,
+	.fb_create = amdgpu_display_user_framebuffer_create,
 	.output_poll_changed = drm_fb_helper_output_poll_changed,
 	.output_poll_changed = drm_fb_helper_output_poll_changed,
 };
 };
 
 
@@ -580,7 +597,7 @@ static const struct drm_prop_enum_list amdgpu_dither_enum_list[] =
 	{ AMDGPU_FMT_DITHER_ENABLE, "on" },
 	{ AMDGPU_FMT_DITHER_ENABLE, "on" },
 };
 };
 
 
-int amdgpu_modeset_create_props(struct amdgpu_device *adev)
+int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
 {
 {
 	int sz;
 	int sz;
 
 
@@ -629,7 +646,7 @@ int amdgpu_modeset_create_props(struct amdgpu_device *adev)
 	return 0;
 	return 0;
 }
 }
 
 
-void amdgpu_update_display_priority(struct amdgpu_device *adev)
+void amdgpu_display_update_priority(struct amdgpu_device *adev)
 {
 {
 	/* adjustment options for the display watermarks */
 	/* adjustment options for the display watermarks */
 	if ((amdgpu_disp_priority == 0) || (amdgpu_disp_priority > 2))
 	if ((amdgpu_disp_priority == 0) || (amdgpu_disp_priority > 2))
@@ -639,7 +656,7 @@ void amdgpu_update_display_priority(struct amdgpu_device *adev)
 
 
 }
 }
 
 
-static bool is_hdtv_mode(const struct drm_display_mode *mode)
+static bool amdgpu_display_is_hdtv_mode(const struct drm_display_mode *mode)
 {
 {
 	/* try and guess if this is a tv or a monitor */
 	/* try and guess if this is a tv or a monitor */
 	if ((mode->vdisplay == 480 && mode->hdisplay == 720) || /* 480p */
 	if ((mode->vdisplay == 480 && mode->hdisplay == 720) || /* 480p */
@@ -651,9 +668,9 @@ static bool is_hdtv_mode(const struct drm_display_mode *mode)
 		return false;
 		return false;
 }
 }
 
 
-bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
-				    const struct drm_display_mode *mode,
-				    struct drm_display_mode *adjusted_mode)
+bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
+					const struct drm_display_mode *mode,
+					struct drm_display_mode *adjusted_mode)
 {
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_device *dev = crtc->dev;
 	struct drm_encoder *encoder;
 	struct drm_encoder *encoder;
@@ -696,7 +713,7 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
 		    ((amdgpu_encoder->underscan_type == UNDERSCAN_ON) ||
 		    ((amdgpu_encoder->underscan_type == UNDERSCAN_ON) ||
 		     ((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) &&
 		     ((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) &&
 		      drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) &&
 		      drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) &&
-		      is_hdtv_mode(mode)))) {
+		      amdgpu_display_is_hdtv_mode(mode)))) {
 			if (amdgpu_encoder->underscan_hborder != 0)
 			if (amdgpu_encoder->underscan_hborder != 0)
 				amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder;
 				amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder;
 			else
 			else
@@ -764,10 +781,10 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
  * unknown small number of scanlines wrt. real scanout position.
  * unknown small number of scanlines wrt. real scanout position.
  *
  *
  */
  */
-int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
-			       unsigned int flags, int *vpos, int *hpos,
-			       ktime_t *stime, ktime_t *etime,
-			       const struct drm_display_mode *mode)
+int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
+			unsigned int pipe, unsigned int flags, int *vpos,
+			int *hpos, ktime_t *stime, ktime_t *etime,
+			const struct drm_display_mode *mode)
 {
 {
 	u32 vbl = 0, position = 0;
 	u32 vbl = 0, position = 0;
 	int vbl_start, vbl_end, vtotal, ret = 0;
 	int vbl_start, vbl_end, vtotal, ret = 0;
@@ -859,7 +876,7 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 	return ret;
 	return ret;
 }
 }
 
 
-int amdgpu_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc)
+int amdgpu_display_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc)
 {
 {
 	if (crtc < 0 || crtc >= adev->mode_info.num_crtc)
 	if (crtc < 0 || crtc >= adev->mode_info.num_crtc)
 		return AMDGPU_CRTC_IRQ_NONE;
 		return AMDGPU_CRTC_IRQ_NONE;

+ 4 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_display.h

@@ -23,9 +23,10 @@
 #ifndef __AMDGPU_DISPLAY_H__
 #ifndef __AMDGPU_DISPLAY_H__
 #define __AMDGPU_DISPLAY_H__
 #define __AMDGPU_DISPLAY_H__
 
 
+uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev);
 struct drm_framebuffer *
 struct drm_framebuffer *
-amdgpu_user_framebuffer_create(struct drm_device *dev,
-			       struct drm_file *file_priv,
-			       const struct drm_mode_fb_cmd2 *mode_cmd);
+amdgpu_display_user_framebuffer_create(struct drm_device *dev,
+				       struct drm_file *file_priv,
+				       const struct drm_mode_fb_cmd2 *mode_cmd);
 
 
 #endif
 #endif

+ 20 - 15
drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h

@@ -265,9 +265,6 @@ enum amdgpu_pcie_gen {
 #define amdgpu_dpm_read_sensor(adev, idx, value, size) \
 #define amdgpu_dpm_read_sensor(adev, idx, value, size) \
 		((adev)->powerplay.pp_funcs->read_sensor((adev)->powerplay.pp_handle, (idx), (value), (size)))
 		((adev)->powerplay.pp_funcs->read_sensor((adev)->powerplay.pp_handle, (idx), (value), (size)))
 
 
-#define amdgpu_dpm_get_temperature(adev) \
-		((adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle))
-
 #define amdgpu_dpm_set_fan_control_mode(adev, m) \
 #define amdgpu_dpm_set_fan_control_mode(adev, m) \
 		((adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m)))
 		((adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m)))
 
 
@@ -328,8 +325,8 @@ enum amdgpu_pcie_gen {
 #define amdgpu_dpm_set_mclk_od(adev, value) \
 #define amdgpu_dpm_set_mclk_od(adev, value) \
 		((adev)->powerplay.pp_funcs->set_mclk_od((adev)->powerplay.pp_handle, value))
 		((adev)->powerplay.pp_funcs->set_mclk_od((adev)->powerplay.pp_handle, value))
 
 
-#define amdgpu_dpm_dispatch_task(adev, task_id, input, output)		\
-		((adev)->powerplay.pp_funcs->dispatch_tasks)((adev)->powerplay.pp_handle, (task_id), (input), (output))
+#define amdgpu_dpm_dispatch_task(adev, task_id, user_state)		\
+		((adev)->powerplay.pp_funcs->dispatch_tasks)((adev)->powerplay.pp_handle, (task_id), (user_state))
 
 
 #define amdgpu_dpm_check_state_equal(adev, cps, rps, equal) \
 #define amdgpu_dpm_check_state_equal(adev, cps, rps, equal) \
 		((adev)->powerplay.pp_funcs->check_state_equal((adev)->powerplay.pp_handle, (cps), (rps), (equal)))
 		((adev)->powerplay.pp_funcs->check_state_equal((adev)->powerplay.pp_handle, (cps), (rps), (equal)))
@@ -344,17 +341,9 @@ enum amdgpu_pcie_gen {
 		((adev)->powerplay.pp_funcs->reset_power_profile_state(\
 		((adev)->powerplay.pp_funcs->reset_power_profile_state(\
 			(adev)->powerplay.pp_handle, request))
 			(adev)->powerplay.pp_handle, request))
 
 
-#define amdgpu_dpm_get_power_profile_state(adev, query) \
-		((adev)->powerplay.pp_funcs->get_power_profile_state(\
-			(adev)->powerplay.pp_handle, query))
-
-#define amdgpu_dpm_set_power_profile_state(adev, request) \
-		((adev)->powerplay.pp_funcs->set_power_profile_state(\
-			(adev)->powerplay.pp_handle, request))
-
-#define amdgpu_dpm_switch_power_profile(adev, type) \
+#define amdgpu_dpm_switch_power_profile(adev, type, en) \
 		((adev)->powerplay.pp_funcs->switch_power_profile(\
 		((adev)->powerplay.pp_funcs->switch_power_profile(\
-			(adev)->powerplay.pp_handle, type))
+			(adev)->powerplay.pp_handle, type, en))
 
 
 #define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \
 #define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \
 		((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
 		((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
@@ -366,6 +355,22 @@ enum amdgpu_pcie_gen {
 			(adev)->powerplay.pp_handle, virtual_addr_low, \
 			(adev)->powerplay.pp_handle, virtual_addr_low, \
 			virtual_addr_hi, mc_addr_low, mc_addr_hi, size)
 			virtual_addr_hi, mc_addr_low, mc_addr_hi, size)
 
 
+#define amdgpu_dpm_get_power_profile_mode(adev, buf) \
+		((adev)->powerplay.pp_funcs->get_power_profile_mode(\
+			(adev)->powerplay.pp_handle, buf))
+
+#define amdgpu_dpm_set_power_profile_mode(adev, parameter, size) \
+		((adev)->powerplay.pp_funcs->set_power_profile_mode(\
+			(adev)->powerplay.pp_handle, parameter, size))
+
+#define amdgpu_dpm_odn_edit_dpm_table(adev, type, parameter, size) \
+		((adev)->powerplay.pp_funcs->odn_edit_dpm_table(\
+			(adev)->powerplay.pp_handle, type, parameter, size))
+
+#define amdgpu_dpm_set_mmhub_powergating_by_smu(adev) \
+		((adev)->powerplay.pp_funcs->set_mmhub_powergating_by_smu( \
+		(adev)->powerplay.pp_handle))
+
 struct amdgpu_dpm {
 struct amdgpu_dpm {
 	struct amdgpu_ps        *ps;
 	struct amdgpu_ps        *ps;
 	/* number of valid power states */
 	/* number of valid power states */

+ 24 - 11
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

@@ -73,9 +73,11 @@
  * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
  * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
  * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl
  * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl
  * - 3.23.0 - Add query for VRAM lost counter
  * - 3.23.0 - Add query for VRAM lost counter
+ * - 3.24.0 - Add high priority compute support for gfx9
+ * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
  */
  */
 #define KMS_DRIVER_MAJOR	3
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	23
+#define KMS_DRIVER_MINOR	25
 #define KMS_DRIVER_PATCHLEVEL	0
 #define KMS_DRIVER_PATCHLEVEL	0
 
 
 int amdgpu_vram_limit = 0;
 int amdgpu_vram_limit = 0;
@@ -119,7 +121,7 @@ uint amdgpu_pg_mask = 0xffffffff;
 uint amdgpu_sdma_phase_quantum = 32;
 uint amdgpu_sdma_phase_quantum = 32;
 char *amdgpu_disable_cu = NULL;
 char *amdgpu_disable_cu = NULL;
 char *amdgpu_virtual_display = NULL;
 char *amdgpu_virtual_display = NULL;
-uint amdgpu_pp_feature_mask = 0xffffffff;
+uint amdgpu_pp_feature_mask = 0xffffbfff;
 int amdgpu_ngg = 0;
 int amdgpu_ngg = 0;
 int amdgpu_prim_buf_per_se = 0;
 int amdgpu_prim_buf_per_se = 0;
 int amdgpu_pos_buf_per_se = 0;
 int amdgpu_pos_buf_per_se = 0;
@@ -129,6 +131,7 @@ int amdgpu_job_hang_limit = 0;
 int amdgpu_lbpw = -1;
 int amdgpu_lbpw = -1;
 int amdgpu_compute_multipipe = -1;
 int amdgpu_compute_multipipe = -1;
 int amdgpu_gpu_recovery = -1; /* auto */
 int amdgpu_gpu_recovery = -1; /* auto */
+int amdgpu_emu_mode = 0;
 
 
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -281,9 +284,12 @@ module_param_named(lbpw, amdgpu_lbpw, int, 0444);
 MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)");
 MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
 module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
 
 
-MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto");
+MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
 module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
 
 
+MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)");
+module_param_named(emu_mode, amdgpu_emu_mode, int, 0444);
+
 #ifdef CONFIG_DRM_AMDGPU_SI
 #ifdef CONFIG_DRM_AMDGPU_SI
 
 
 #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
 #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
@@ -576,6 +582,11 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 	struct drm_device *dev;
 	struct drm_device *dev;
 	unsigned long flags = ent->driver_data;
 	unsigned long flags = ent->driver_data;
 	int ret, retry = 0;
 	int ret, retry = 0;
+	bool supports_atomic = false;
+
+	if (!amdgpu_virtual_display &&
+	    amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
+		supports_atomic = true;
 
 
 	if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
 	if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
 		DRM_INFO("This hardware requires experimental hardware support.\n"
 		DRM_INFO("This hardware requires experimental hardware support.\n"
@@ -596,6 +607,13 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 
+	/* warn the user if they mix atomic and non-atomic capable GPUs */
+	if ((kms_driver.driver_features & DRIVER_ATOMIC) && !supports_atomic)
+		DRM_ERROR("Mixing atomic and non-atomic capable GPUs!\n");
+	/* support atomic early so the atomic debugfs stuff gets created */
+	if (supports_atomic)
+		kms_driver.driver_features |= DRIVER_ATOMIC;
+
 	dev = drm_dev_alloc(&kms_driver, &pdev->dev);
 	dev = drm_dev_alloc(&kms_driver, &pdev->dev);
 	if (IS_ERR(dev))
 	if (IS_ERR(dev))
 		return PTR_ERR(dev);
 		return PTR_ERR(dev);
@@ -833,8 +851,8 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
 				 ktime_t *stime, ktime_t *etime,
 				 ktime_t *stime, ktime_t *etime,
 				 const struct drm_display_mode *mode)
 				 const struct drm_display_mode *mode)
 {
 {
-	return amdgpu_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos,
-					  stime, etime, mode);
+	return amdgpu_display_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos,
+						  stime, etime, mode);
 }
 }
 
 
 static struct drm_driver kms_driver = {
 static struct drm_driver kms_driver = {
@@ -852,9 +870,6 @@ static struct drm_driver kms_driver = {
 	.disable_vblank = amdgpu_disable_vblank_kms,
 	.disable_vblank = amdgpu_disable_vblank_kms,
 	.get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos,
 	.get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos,
 	.get_scanout_position = amdgpu_get_crtc_scanout_position,
 	.get_scanout_position = amdgpu_get_crtc_scanout_position,
-	.irq_preinstall = amdgpu_irq_preinstall,
-	.irq_postinstall = amdgpu_irq_postinstall,
-	.irq_uninstall = amdgpu_irq_uninstall,
 	.irq_handler = amdgpu_irq_handler,
 	.irq_handler = amdgpu_irq_handler,
 	.ioctls = amdgpu_ioctls_kms,
 	.ioctls = amdgpu_ioctls_kms,
 	.gem_free_object_unlocked = amdgpu_gem_object_free,
 	.gem_free_object_unlocked = amdgpu_gem_object_free,
@@ -867,9 +882,7 @@ static struct drm_driver kms_driver = {
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_export = amdgpu_gem_prime_export,
 	.gem_prime_export = amdgpu_gem_prime_export,
-	.gem_prime_import = drm_gem_prime_import,
-	.gem_prime_pin = amdgpu_gem_prime_pin,
-	.gem_prime_unpin = amdgpu_gem_prime_unpin,
+	.gem_prime_import = amdgpu_gem_prime_import,
 	.gem_prime_res_obj = amdgpu_gem_prime_res_obj,
 	.gem_prime_res_obj = amdgpu_gem_prime_res_obj,
 	.gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table,
 	.gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table,
 	.gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
 	.gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,

+ 13 - 10
drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c

@@ -38,6 +38,8 @@
 
 
 #include <linux/vga_switcheroo.h>
 #include <linux/vga_switcheroo.h>
 
 
+#include "amdgpu_display.h"
+
 /* object hierarchy -
 /* object hierarchy -
    this contains a helper + a amdgpu fb
    this contains a helper + a amdgpu fb
    the helper contains a pointer to amdgpu framebuffer baseclass.
    the helper contains a pointer to amdgpu framebuffer baseclass.
@@ -124,7 +126,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
 	struct drm_gem_object *gobj = NULL;
 	struct drm_gem_object *gobj = NULL;
 	struct amdgpu_bo *abo = NULL;
 	struct amdgpu_bo *abo = NULL;
 	bool fb_tiled = false; /* useful for testing */
 	bool fb_tiled = false; /* useful for testing */
-	u32 tiling_flags = 0;
+	u32 tiling_flags = 0, domain;
 	int ret;
 	int ret;
 	int aligned_size, size;
 	int aligned_size, size;
 	int height = mode_cmd->height;
 	int height = mode_cmd->height;
@@ -135,12 +137,12 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
 	/* need to align pitch with crtc limits */
 	/* need to align pitch with crtc limits */
 	mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
 	mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
 						  fb_tiled);
 						  fb_tiled);
+	domain = amdgpu_display_framebuffer_domains(adev);
 
 
 	height = ALIGN(mode_cmd->height, 8);
 	height = ALIGN(mode_cmd->height, 8);
 	size = mode_cmd->pitches[0] * height;
 	size = mode_cmd->pitches[0] * height;
 	aligned_size = ALIGN(size, PAGE_SIZE);
 	aligned_size = ALIGN(size, PAGE_SIZE);
-	ret = amdgpu_gem_object_create(adev, aligned_size, 0,
-				       AMDGPU_GEM_DOMAIN_VRAM,
+	ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain,
 				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 				       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
 				       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
 				       AMDGPU_GEM_CREATE_VRAM_CLEARED,
 				       AMDGPU_GEM_CREATE_VRAM_CLEARED,
@@ -166,7 +168,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
 	}
 	}
 
 
 
 
-	ret = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, NULL);
+	ret = amdgpu_bo_pin(abo, domain, NULL);
 	if (ret) {
 	if (ret) {
 		amdgpu_bo_unreserve(abo);
 		amdgpu_bo_unreserve(abo);
 		goto out_unref;
 		goto out_unref;
@@ -225,7 +227,8 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
 	info->par = rfbdev;
 	info->par = rfbdev;
 	info->skip_vt_switch = true;
 	info->skip_vt_switch = true;
 
 
-	ret = amdgpu_framebuffer_init(adev->ddev, &rfbdev->rfb, &mode_cmd, gobj);
+	ret = amdgpu_display_framebuffer_init(adev->ddev, &rfbdev->rfb,
+					      &mode_cmd, gobj);
 	if (ret) {
 	if (ret) {
 		DRM_ERROR("failed to initialize framebuffer %d\n", ret);
 		DRM_ERROR("failed to initialize framebuffer %d\n", ret);
 		goto out;
 		goto out;
@@ -242,8 +245,8 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
 
 
 	info->fbops = &amdgpufb_ops;
 	info->fbops = &amdgpufb_ops;
 
 
-	tmp = amdgpu_bo_gpu_offset(abo) - adev->mc.vram_start;
-	info->fix.smem_start = adev->mc.aper_base + tmp;
+	tmp = amdgpu_bo_gpu_offset(abo) - adev->gmc.vram_start;
+	info->fix.smem_start = adev->gmc.aper_base + tmp;
 	info->fix.smem_len = amdgpu_bo_size(abo);
 	info->fix.smem_len = amdgpu_bo_size(abo);
 	info->screen_base = amdgpu_bo_kptr(abo);
 	info->screen_base = amdgpu_bo_kptr(abo);
 	info->screen_size = amdgpu_bo_size(abo);
 	info->screen_size = amdgpu_bo_size(abo);
@@ -252,7 +255,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
 
 
 	/* setup aperture base/size for vesafb takeover */
 	/* setup aperture base/size for vesafb takeover */
 	info->apertures->ranges[0].base = adev->ddev->mode_config.fb_base;
 	info->apertures->ranges[0].base = adev->ddev->mode_config.fb_base;
-	info->apertures->ranges[0].size = adev->mc.aper_size;
+	info->apertures->ranges[0].size = adev->gmc.aper_size;
 
 
 	/* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
 	/* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
 
 
@@ -262,7 +265,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
 	}
 	}
 
 
 	DRM_INFO("fb mappable at 0x%lX\n",  info->fix.smem_start);
 	DRM_INFO("fb mappable at 0x%lX\n",  info->fix.smem_start);
-	DRM_INFO("vram apper at 0x%lX\n",  (unsigned long)adev->mc.aper_base);
+	DRM_INFO("vram apper at 0x%lX\n",  (unsigned long)adev->gmc.aper_base);
 	DRM_INFO("size %lu\n", (unsigned long)amdgpu_bo_size(abo));
 	DRM_INFO("size %lu\n", (unsigned long)amdgpu_bo_size(abo));
 	DRM_INFO("fb depth is %d\n", fb->format->depth);
 	DRM_INFO("fb depth is %d\n", fb->format->depth);
 	DRM_INFO("   pitch is %d\n", fb->pitches[0]);
 	DRM_INFO("   pitch is %d\n", fb->pitches[0]);
@@ -319,7 +322,7 @@ int amdgpu_fbdev_init(struct amdgpu_device *adev)
 		return 0;
 		return 0;
 
 
 	/* select 8 bpp console on low vram cards */
 	/* select 8 bpp console on low vram cards */
-	if (adev->mc.real_vram_size <= (32*1024*1024))
+	if (adev->gmc.real_vram_size <= (32*1024*1024))
 		bpp_sel = 8;
 		bpp_sel = 8;
 
 
 	rfbdev = kzalloc(sizeof(struct amdgpu_fbdev), GFP_KERNEL);
 	rfbdev = kzalloc(sizeof(struct amdgpu_fbdev), GFP_KERNEL);

+ 27 - 27
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c

@@ -68,17 +68,15 @@
  */
  */
 static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
 static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
 {
 {
-	if (adev->dummy_page.page)
+	struct page *dummy_page = adev->mman.bdev.glob->dummy_read_page;
+
+	if (adev->dummy_page_addr)
 		return 0;
 		return 0;
-	adev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO);
-	if (adev->dummy_page.page == NULL)
-		return -ENOMEM;
-	adev->dummy_page.addr = pci_map_page(adev->pdev, adev->dummy_page.page,
-					0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	if (pci_dma_mapping_error(adev->pdev, adev->dummy_page.addr)) {
+	adev->dummy_page_addr = pci_map_page(adev->pdev, dummy_page, 0,
+					     PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	if (pci_dma_mapping_error(adev->pdev, adev->dummy_page_addr)) {
 		dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n");
 		dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n");
-		__free_page(adev->dummy_page.page);
-		adev->dummy_page.page = NULL;
+		adev->dummy_page_addr = 0;
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
 	return 0;
 	return 0;
@@ -93,12 +91,11 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
  */
  */
 static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
 static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
 {
 {
-	if (adev->dummy_page.page == NULL)
+	if (!adev->dummy_page_addr)
 		return;
 		return;
-	pci_unmap_page(adev->pdev, adev->dummy_page.addr,
-			PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	__free_page(adev->dummy_page.page);
-	adev->dummy_page.page = NULL;
+	pci_unmap_page(adev->pdev, adev->dummy_page_addr,
+		       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	adev->dummy_page_addr = 0;
 }
 }
 
 
 /**
 /**
@@ -116,11 +113,12 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
 	int r;
 	int r;
 
 
 	if (adev->gart.robj == NULL) {
 	if (adev->gart.robj == NULL) {
-		r = amdgpu_bo_create(adev, adev->gart.table_size,
-				     PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
+		r = amdgpu_bo_create(adev, adev->gart.table_size, PAGE_SIZE,
+				     AMDGPU_GEM_DOMAIN_VRAM,
 				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
 				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-				     NULL, NULL, 0, &adev->gart.robj);
+				     ttm_bo_type_kernel, NULL,
+				     &adev->gart.robj);
 		if (r) {
 		if (r) {
 			return r;
 			return r;
 		}
 		}
@@ -236,18 +234,19 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
 		adev->gart.pages[p] = NULL;
 		adev->gart.pages[p] = NULL;
 #endif
 #endif
-		page_base = adev->dummy_page.addr;
+		page_base = adev->dummy_page_addr;
 		if (!adev->gart.ptr)
 		if (!adev->gart.ptr)
 			continue;
 			continue;
 
 
 		for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
 		for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
-			amdgpu_gart_set_pte_pde(adev, adev->gart.ptr,
-						t, page_base, flags);
+			amdgpu_gmc_set_pte_pde(adev, adev->gart.ptr,
+					       t, page_base, flags);
 			page_base += AMDGPU_GPU_PAGE_SIZE;
 			page_base += AMDGPU_GPU_PAGE_SIZE;
 		}
 		}
 	}
 	}
 	mb();
 	mb();
-	amdgpu_gart_flush_gpu_tlb(adev, 0);
+	amdgpu_asic_flush_hdp(adev, NULL);
+	amdgpu_gmc_flush_gpu_tlb(adev, 0);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -279,7 +278,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
 	for (i = 0; i < pages; i++) {
 	for (i = 0; i < pages; i++) {
 		page_base = dma_addr[i];
 		page_base = dma_addr[i];
 		for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
 		for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
-			amdgpu_gart_set_pte_pde(adev, dst, t, page_base, flags);
+			amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, flags);
 			page_base += AMDGPU_GPU_PAGE_SIZE;
 			page_base += AMDGPU_GPU_PAGE_SIZE;
 		}
 		}
 	}
 	}
@@ -317,7 +316,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
 	t = offset / AMDGPU_GPU_PAGE_SIZE;
 	t = offset / AMDGPU_GPU_PAGE_SIZE;
 	p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
 	p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
 	for (i = 0; i < pages; i++, p++)
 	for (i = 0; i < pages; i++, p++)
-		adev->gart.pages[p] = pagelist[i];
+		adev->gart.pages[p] = pagelist ? pagelist[i] : NULL;
 #endif
 #endif
 
 
 	if (!adev->gart.ptr)
 	if (!adev->gart.ptr)
@@ -329,7 +328,8 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
 		return r;
 		return r;
 
 
 	mb();
 	mb();
-	amdgpu_gart_flush_gpu_tlb(adev, 0);
+	amdgpu_asic_flush_hdp(adev, NULL);
+	amdgpu_gmc_flush_gpu_tlb(adev, 0);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -345,7 +345,7 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
 {
 {
 	int r;
 	int r;
 
 
-	if (adev->dummy_page.page)
+	if (adev->dummy_page_addr)
 		return 0;
 		return 0;
 
 
 	/* We need PAGE_SIZE >= AMDGPU_GPU_PAGE_SIZE */
 	/* We need PAGE_SIZE >= AMDGPU_GPU_PAGE_SIZE */
@@ -357,8 +357,8 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
 	if (r)
 	if (r)
 		return r;
 		return r;
 	/* Compute table size */
 	/* Compute table size */
-	adev->gart.num_cpu_pages = adev->mc.gart_size / PAGE_SIZE;
-	adev->gart.num_gpu_pages = adev->mc.gart_size / AMDGPU_GPU_PAGE_SIZE;
+	adev->gart.num_cpu_pages = adev->gmc.gart_size / PAGE_SIZE;
+	adev->gart.num_gpu_pages = adev->gmc.gart_size / AMDGPU_GPU_PAGE_SIZE;
 	DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
 	DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
 		 adev->gart.num_cpu_pages, adev->gart.num_gpu_pages);
 		 adev->gart.num_cpu_pages, adev->gart.num_gpu_pages);
 
 

+ 0 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h

@@ -31,7 +31,6 @@
  */
  */
 struct amdgpu_device;
 struct amdgpu_device;
 struct amdgpu_bo;
 struct amdgpu_bo;
-struct amdgpu_gart_funcs;
 
 
 #define AMDGPU_GPU_PAGE_SIZE 4096
 #define AMDGPU_GPU_PAGE_SIZE 4096
 #define AMDGPU_GPU_PAGE_MASK (AMDGPU_GPU_PAGE_SIZE - 1)
 #define AMDGPU_GPU_PAGE_MASK (AMDGPU_GPU_PAGE_SIZE - 1)
@@ -52,8 +51,6 @@ struct amdgpu_gart {
 
 
 	/* Asic default pte flags */
 	/* Asic default pte flags */
 	uint64_t			gart_pte_flags;
 	uint64_t			gart_pte_flags;
-
-	const struct amdgpu_gart_funcs *gart_funcs;
 };
 };
 
 
 int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);
 int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);

+ 9 - 10
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

@@ -36,8 +36,6 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
 	struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj);
 	struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj);
 
 
 	if (robj) {
 	if (robj) {
-		if (robj->gem_base.import_attach)
-			drm_prime_gem_destroy(&robj->gem_base, robj->tbo.sg);
 		amdgpu_mn_unregister(robj);
 		amdgpu_mn_unregister(robj);
 		amdgpu_bo_unref(&robj);
 		amdgpu_bo_unref(&robj);
 	}
 	}
@@ -45,7 +43,7 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
 
 
 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 			     int alignment, u32 initial_domain,
 			     int alignment, u32 initial_domain,
-			     u64 flags, bool kernel,
+			     u64 flags, enum ttm_bo_type type,
 			     struct reservation_object *resv,
 			     struct reservation_object *resv,
 			     struct drm_gem_object **obj)
 			     struct drm_gem_object **obj)
 {
 {
@@ -59,8 +57,8 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 	}
 	}
 
 
 retry:
 retry:
-	r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
-			     flags, NULL, resv, 0, &bo);
+	r = amdgpu_bo_create(adev, size, alignment, initial_domain,
+			     flags, type, resv, &bo);
 	if (r) {
 	if (r) {
 		if (r != -ERESTARTSYS) {
 		if (r != -ERESTARTSYS) {
 			if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
 			if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
@@ -523,12 +521,13 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 		goto error;
 		goto error;
 
 
 	if (operation == AMDGPU_VA_OP_MAP ||
 	if (operation == AMDGPU_VA_OP_MAP ||
-	    operation == AMDGPU_VA_OP_REPLACE)
+	    operation == AMDGPU_VA_OP_REPLACE) {
 		r = amdgpu_vm_bo_update(adev, bo_va, false);
 		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		if (r)
+			goto error;
+	}
 
 
 	r = amdgpu_vm_update_directories(adev, vm);
 	r = amdgpu_vm_update_directories(adev, vm);
-	if (r)
-		goto error;
 
 
 error:
 error:
 	if (r && r != -ERESTARTSYS)
 	if (r && r != -ERESTARTSYS)
@@ -634,7 +633,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 		if (r)
 		if (r)
 			goto error_backoff;
 			goto error_backoff;
 
 
-		va_flags = amdgpu_vm_get_pte_flags(adev, args->flags);
+		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
 		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
 		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
 				     args->offset_in_bo, args->map_size,
 				     args->offset_in_bo, args->map_size,
 				     va_flags);
 				     va_flags);
@@ -654,7 +653,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 		if (r)
 		if (r)
 			goto error_backoff;
 			goto error_backoff;
 
 
-		va_flags = amdgpu_vm_get_pte_flags(adev, args->flags);
+		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
 		r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
 		r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
 					     args->offset_in_bo, args->map_size,
 					     args->offset_in_bo, args->map_size,
 					     va_flags);
 					     va_flags);

+ 112 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h

@@ -0,0 +1,112 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+#ifndef __AMDGPU_GMC_H__
+#define __AMDGPU_GMC_H__
+
+#include <linux/types.h>
+
+#include "amdgpu_irq.h"
+
+struct firmware;
+
+/*
+ * VMHUB structures, functions & helpers
+ */
+struct amdgpu_vmhub {
+	uint32_t	ctx0_ptb_addr_lo32;
+	uint32_t	ctx0_ptb_addr_hi32;
+	uint32_t	vm_inv_eng0_req;
+	uint32_t	vm_inv_eng0_ack;
+	uint32_t	vm_context0_cntl;
+	uint32_t	vm_l2_pro_fault_status;
+	uint32_t	vm_l2_pro_fault_cntl;
+};
+
+/*
+ * GPU MC structures, functions & helpers
+ */
+struct amdgpu_gmc_funcs {
+	/* flush the vm tlb via mmio */
+	void (*flush_gpu_tlb)(struct amdgpu_device *adev,
+			      uint32_t vmid);
+	/* flush the vm tlb via ring */
+	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
+				       uint64_t pd_addr);
+	/* Change the VMID -> PASID mapping */
+	void (*emit_pasid_mapping)(struct amdgpu_ring *ring, unsigned vmid,
+				   unsigned pasid);
+	/* write pte/pde updates using the cpu */
+	int (*set_pte_pde)(struct amdgpu_device *adev,
+			   void *cpu_pt_addr, /* cpu addr of page table */
+			   uint32_t gpu_page_idx, /* pte/pde to update */
+			   uint64_t addr, /* addr to write into pte/pde */
+			   uint64_t flags); /* access flags */
+	/* enable/disable PRT support */
+	void (*set_prt)(struct amdgpu_device *adev, bool enable);
+	/* set pte flags based per asic */
+	uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
+				     uint32_t flags);
+	/* get the pde for a given mc addr */
+	void (*get_vm_pde)(struct amdgpu_device *adev, int level,
+			   u64 *dst, u64 *flags);
+};
+
+struct amdgpu_gmc {
+	resource_size_t		aper_size;
+	resource_size_t		aper_base;
+	/* for some chips with <= 32MB we need to lie
+	 * about vram size near mc fb location */
+	u64			mc_vram_size;
+	u64			visible_vram_size;
+	u64			gart_size;
+	u64			gart_start;
+	u64			gart_end;
+	u64			vram_start;
+	u64			vram_end;
+	unsigned		vram_width;
+	u64			real_vram_size;
+	int			vram_mtrr;
+	u64                     mc_mask;
+	const struct firmware   *fw;	/* MC firmware */
+	uint32_t                fw_version;
+	struct amdgpu_irq_src	vm_fault;
+	uint32_t		vram_type;
+	uint32_t                srbm_soft_reset;
+	bool			prt_warning;
+	uint64_t		stolen_size;
+	/* apertures */
+	u64			shared_aperture_start;
+	u64			shared_aperture_end;
+	u64			private_aperture_start;
+	u64			private_aperture_end;
+	/* protects concurrent invalidation */
+	spinlock_t		invalidate_lock;
+	bool			translate_further;
+
+	const struct amdgpu_gmc_funcs	*gmc_funcs;
+};
+
+#endif

+ 2 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c

@@ -56,7 +56,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man,
 		return -ENOMEM;
 		return -ENOMEM;
 
 
 	start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
 	start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
-	size = (adev->mc.gart_size >> PAGE_SHIFT) - start;
+	size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
 	drm_mm_init(&mgr->mm, start, size);
 	drm_mm_init(&mgr->mm, start, size);
 	spin_lock_init(&mgr->lock);
 	spin_lock_init(&mgr->lock);
 	atomic64_set(&mgr->available, p_size);
 	atomic64_set(&mgr->available, p_size);
@@ -75,7 +75,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man,
 static int amdgpu_gtt_mgr_fini(struct ttm_mem_type_manager *man)
 static int amdgpu_gtt_mgr_fini(struct ttm_mem_type_manager *man)
 {
 {
 	struct amdgpu_gtt_mgr *mgr = man->priv;
 	struct amdgpu_gtt_mgr *mgr = man->priv;
-
+	spin_lock(&mgr->lock);
 	drm_mm_takedown(&mgr->mm);
 	drm_mm_takedown(&mgr->mm);
 	spin_unlock(&mgr->lock);
 	spin_unlock(&mgr->lock);
 	kfree(mgr);
 	kfree(mgr);

+ 42 - 16
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

@@ -181,15 +181,18 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		}
 		}
 	}
 	}
 
 
-	if (ring->funcs->init_cond_exec)
+	if (job && ring->funcs->init_cond_exec)
 		patch_offset = amdgpu_ring_init_cond_exec(ring);
 		patch_offset = amdgpu_ring_init_cond_exec(ring);
 
 
-	if (ring->funcs->emit_hdp_flush
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
-	    && !(adev->flags & AMD_IS_APU)
+	if (!(adev->flags & AMD_IS_APU))
 #endif
 #endif
-	   )
-		amdgpu_ring_emit_hdp_flush(ring);
+	{
+		if (ring->funcs->emit_hdp_flush)
+			amdgpu_ring_emit_hdp_flush(ring);
+		else
+			amdgpu_asic_flush_hdp(adev, ring);
+	}
 
 
 	skip_preamble = ring->current_ctx == fence_ctx;
 	skip_preamble = ring->current_ctx == fence_ctx;
 	need_ctx_switch = ring->current_ctx != fence_ctx;
 	need_ctx_switch = ring->current_ctx != fence_ctx;
@@ -219,12 +222,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (ring->funcs->emit_tmz)
 	if (ring->funcs->emit_tmz)
 		amdgpu_ring_emit_tmz(ring, false);
 		amdgpu_ring_emit_tmz(ring, false);
 
 
-	if (ring->funcs->emit_hdp_invalidate
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
-	    && !(adev->flags & AMD_IS_APU)
+	if (!(adev->flags & AMD_IS_APU))
 #endif
 #endif
-	   )
-		amdgpu_ring_emit_hdp_invalidate(ring);
+		amdgpu_asic_invalidate_hdp(adev, ring);
 
 
 	r = amdgpu_fence_emit(ring, f);
 	r = amdgpu_fence_emit(ring, f);
 	if (r) {
 	if (r) {
@@ -278,11 +279,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
 		return r;
 		return r;
 	}
 	}
 
 
-	r = amdgpu_sa_bo_manager_start(adev, &adev->ring_tmp_bo);
-	if (r) {
-		return r;
-	}
-
 	adev->ib_pool_ready = true;
 	adev->ib_pool_ready = true;
 	if (amdgpu_debugfs_sa_init(adev)) {
 	if (amdgpu_debugfs_sa_init(adev)) {
 		dev_err(adev->dev, "failed to register debugfs file for SA\n");
 		dev_err(adev->dev, "failed to register debugfs file for SA\n");
@@ -301,7 +297,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
 void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
 void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
 {
 {
 	if (adev->ib_pool_ready) {
 	if (adev->ib_pool_ready) {
-		amdgpu_sa_bo_manager_suspend(adev, &adev->ring_tmp_bo);
 		amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo);
 		amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo);
 		adev->ib_pool_ready = false;
 		adev->ib_pool_ready = false;
 	}
 	}
@@ -321,14 +316,45 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 {
 {
 	unsigned i;
 	unsigned i;
 	int r, ret = 0;
 	int r, ret = 0;
+	long tmo_gfx, tmo_mm;
+
+	tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT;
+	if (amdgpu_sriov_vf(adev)) {
+		/* for MM engines in hypervisor side they are not scheduled together
+		 * with CP and SDMA engines, so even in exclusive mode MM engine could
+		 * still running on other VF thus the IB TEST TIMEOUT for MM engines
+		 * under SR-IOV should be set to a long time. 8 sec should be enough
+		 * for the MM comes back to this VF.
+		 */
+		tmo_mm = 8 * AMDGPU_IB_TEST_TIMEOUT;
+	}
+
+	if (amdgpu_sriov_runtime(adev)) {
+		/* for CP & SDMA engines since they are scheduled together so
+		 * need to make the timeout width enough to cover the time
+		 * cost waiting for it coming back under RUNTIME only
+		*/
+		tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT;
+	}
 
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
 		struct amdgpu_ring *ring = adev->rings[i];
+		long tmo;
 
 
 		if (!ring || !ring->ready)
 		if (!ring || !ring->ready)
 			continue;
 			continue;
 
 
-		r = amdgpu_ring_test_ib(ring, AMDGPU_IB_TEST_TIMEOUT);
+		/* MM engine need more time */
+		if (ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
+			ring->funcs->type == AMDGPU_RING_TYPE_VCE ||
+			ring->funcs->type == AMDGPU_RING_TYPE_UVD_ENC ||
+			ring->funcs->type == AMDGPU_RING_TYPE_VCN_DEC ||
+			ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+			tmo = tmo_mm;
+		else
+			tmo = tmo_gfx;
+
+		r = amdgpu_ring_test_ib(ring, tmo);
 		if (r) {
 		if (r) {
 			ring->ready = false;
 			ring->ready = false;
 
 

+ 283 - 129
drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c

@@ -40,6 +40,12 @@
  */
  */
 static DEFINE_IDA(amdgpu_pasid_ida);
 static DEFINE_IDA(amdgpu_pasid_ida);
 
 
+/* Helper to free pasid from a fence callback */
+struct amdgpu_pasid_cb {
+	struct dma_fence_cb cb;
+	unsigned int pasid;
+};
+
 /**
 /**
  * amdgpu_pasid_alloc - Allocate a PASID
  * amdgpu_pasid_alloc - Allocate a PASID
  * @bits: Maximum width of the PASID in bits, must be at least 1
  * @bits: Maximum width of the PASID in bits, must be at least 1
@@ -63,6 +69,9 @@ int amdgpu_pasid_alloc(unsigned int bits)
 			break;
 			break;
 	}
 	}
 
 
+	if (pasid >= 0)
+		trace_amdgpu_pasid_allocated(pasid);
+
 	return pasid;
 	return pasid;
 }
 }
 
 
@@ -72,9 +81,86 @@ int amdgpu_pasid_alloc(unsigned int bits)
  */
  */
 void amdgpu_pasid_free(unsigned int pasid)
 void amdgpu_pasid_free(unsigned int pasid)
 {
 {
+	trace_amdgpu_pasid_freed(pasid);
 	ida_simple_remove(&amdgpu_pasid_ida, pasid);
 	ida_simple_remove(&amdgpu_pasid_ida, pasid);
 }
 }
 
 
+static void amdgpu_pasid_free_cb(struct dma_fence *fence,
+				 struct dma_fence_cb *_cb)
+{
+	struct amdgpu_pasid_cb *cb =
+		container_of(_cb, struct amdgpu_pasid_cb, cb);
+
+	amdgpu_pasid_free(cb->pasid);
+	dma_fence_put(fence);
+	kfree(cb);
+}
+
+/**
+ * amdgpu_pasid_free_delayed - free pasid when fences signal
+ *
+ * @resv: reservation object with the fences to wait for
+ * @pasid: pasid to free
+ *
+ * Free the pasid only after all the fences in resv are signaled.
+ */
+void amdgpu_pasid_free_delayed(struct reservation_object *resv,
+			       unsigned int pasid)
+{
+	struct dma_fence *fence, **fences;
+	struct amdgpu_pasid_cb *cb;
+	unsigned count;
+	int r;
+
+	r = reservation_object_get_fences_rcu(resv, NULL, &count, &fences);
+	if (r)
+		goto fallback;
+
+	if (count == 0) {
+		amdgpu_pasid_free(pasid);
+		return;
+	}
+
+	if (count == 1) {
+		fence = fences[0];
+		kfree(fences);
+	} else {
+		uint64_t context = dma_fence_context_alloc(1);
+		struct dma_fence_array *array;
+
+		array = dma_fence_array_create(count, fences, context,
+					       1, false);
+		if (!array) {
+			kfree(fences);
+			goto fallback;
+		}
+		fence = &array->base;
+	}
+
+	cb = kmalloc(sizeof(*cb), GFP_KERNEL);
+	if (!cb) {
+		/* Last resort when we are OOM */
+		dma_fence_wait(fence, false);
+		dma_fence_put(fence);
+		amdgpu_pasid_free(pasid);
+	} else {
+		cb->pasid = pasid;
+		if (dma_fence_add_callback(fence, &cb->cb,
+					   amdgpu_pasid_free_cb))
+			amdgpu_pasid_free_cb(fence, &cb->cb);
+	}
+
+	return;
+
+fallback:
+	/* Not enough memory for the delayed delete, as last resort
+	 * block for all the fences to complete.
+	 */
+	reservation_object_wait_timeout_rcu(resv, true, false,
+					    MAX_SCHEDULE_TIMEOUT);
+	amdgpu_pasid_free(pasid);
+}
+
 /*
 /*
  * VMID manager
  * VMID manager
  *
  *
@@ -96,164 +182,185 @@ bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
 		atomic_read(&adev->gpu_reset_counter);
 		atomic_read(&adev->gpu_reset_counter);
 }
 }
 
 
-/* idr_mgr->lock must be held */
-static int amdgpu_vmid_grab_reserved_locked(struct amdgpu_vm *vm,
-					    struct amdgpu_ring *ring,
-					    struct amdgpu_sync *sync,
-					    struct dma_fence *fence,
-					    struct amdgpu_job *job)
-{
-	struct amdgpu_device *adev = ring->adev;
-	unsigned vmhub = ring->funcs->vmhub;
-	uint64_t fence_context = adev->fence_context + ring->idx;
-	struct amdgpu_vmid *id = vm->reserved_vmid[vmhub];
-	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-	struct dma_fence *updates = sync->last_vm_update;
-	int r = 0;
-	struct dma_fence *flushed, *tmp;
-	bool needs_flush = vm->use_cpu_for_update;
-
-	flushed  = id->flushed_updates;
-	if ((amdgpu_vmid_had_gpu_reset(adev, id)) ||
-	    (atomic64_read(&id->owner) != vm->entity.fence_context) ||
-	    (job->vm_pd_addr != id->pd_gpu_addr) ||
-	    (updates && (!flushed || updates->context != flushed->context ||
-			dma_fence_is_later(updates, flushed))) ||
-	    (!id->last_flush || (id->last_flush->context != fence_context &&
-				 !dma_fence_is_signaled(id->last_flush)))) {
-		needs_flush = true;
-		/* to prevent one context starved by another context */
-		id->pd_gpu_addr = 0;
-		tmp = amdgpu_sync_peek_fence(&id->active, ring);
-		if (tmp) {
-			r = amdgpu_sync_fence(adev, sync, tmp, false);
-			return r;
-		}
-	}
-
-	/* Good we can use this VMID. Remember this submission as
-	* user of the VMID.
-	*/
-	r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
-	if (r)
-		goto out;
-
-	if (updates && (!flushed || updates->context != flushed->context ||
-			dma_fence_is_later(updates, flushed))) {
-		dma_fence_put(id->flushed_updates);
-		id->flushed_updates = dma_fence_get(updates);
-	}
-	id->pd_gpu_addr = job->vm_pd_addr;
-	atomic64_set(&id->owner, vm->entity.fence_context);
-	job->vm_needs_flush = needs_flush;
-	if (needs_flush) {
-		dma_fence_put(id->last_flush);
-		id->last_flush = NULL;
-	}
-	job->vmid = id - id_mgr->ids;
-	trace_amdgpu_vm_grab_id(vm, ring, job);
-out:
-	return r;
-}
-
 /**
 /**
- * amdgpu_vm_grab_id - allocate the next free VMID
+ * amdgpu_vm_grab_idle - grab idle VMID
  *
  *
  * @vm: vm to allocate id for
  * @vm: vm to allocate id for
  * @ring: ring we want to submit job to
  * @ring: ring we want to submit job to
  * @sync: sync object where we add dependencies
  * @sync: sync object where we add dependencies
- * @fence: fence protecting ID from reuse
+ * @idle: resulting idle VMID
  *
  *
- * Allocate an id for the vm, adding fences to the sync obj as necessary.
+ * Try to find an idle VMID, if none is idle add a fence to wait to the sync
+ * object. Returns -ENOMEM when we are out of memory.
  */
  */
-int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
-		     struct amdgpu_sync *sync, struct dma_fence *fence,
-		     struct amdgpu_job *job)
+static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
+				 struct amdgpu_ring *ring,
+				 struct amdgpu_sync *sync,
+				 struct amdgpu_vmid **idle)
 {
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_device *adev = ring->adev;
 	unsigned vmhub = ring->funcs->vmhub;
 	unsigned vmhub = ring->funcs->vmhub;
 	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-	uint64_t fence_context = adev->fence_context + ring->idx;
-	struct dma_fence *updates = sync->last_vm_update;
-	struct amdgpu_vmid *id, *idle;
 	struct dma_fence **fences;
 	struct dma_fence **fences;
 	unsigned i;
 	unsigned i;
-	int r = 0;
+	int r;
+
+	if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait))
+		return amdgpu_sync_fence(adev, sync, ring->vmid_wait, false);
 
 
-	mutex_lock(&id_mgr->lock);
-	if (vm->reserved_vmid[vmhub]) {
-		r = amdgpu_vmid_grab_reserved_locked(vm, ring, sync, fence, job);
-		mutex_unlock(&id_mgr->lock);
-		return r;
-	}
 	fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
 	fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
-	if (!fences) {
-		mutex_unlock(&id_mgr->lock);
+	if (!fences)
 		return -ENOMEM;
 		return -ENOMEM;
-	}
+
 	/* Check if we have an idle VMID */
 	/* Check if we have an idle VMID */
 	i = 0;
 	i = 0;
-	list_for_each_entry(idle, &id_mgr->ids_lru, list) {
-		fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
+	list_for_each_entry((*idle), &id_mgr->ids_lru, list) {
+		fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, ring);
 		if (!fences[i])
 		if (!fences[i])
 			break;
 			break;
 		++i;
 		++i;
 	}
 	}
 
 
 	/* If we can't find a idle VMID to use, wait till one becomes available */
 	/* If we can't find a idle VMID to use, wait till one becomes available */
-	if (&idle->list == &id_mgr->ids_lru) {
+	if (&(*idle)->list == &id_mgr->ids_lru) {
 		u64 fence_context = adev->vm_manager.fence_context + ring->idx;
 		u64 fence_context = adev->vm_manager.fence_context + ring->idx;
 		unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
 		unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
 		struct dma_fence_array *array;
 		struct dma_fence_array *array;
 		unsigned j;
 		unsigned j;
 
 
+		*idle = NULL;
 		for (j = 0; j < i; ++j)
 		for (j = 0; j < i; ++j)
 			dma_fence_get(fences[j]);
 			dma_fence_get(fences[j]);
 
 
 		array = dma_fence_array_create(i, fences, fence_context,
 		array = dma_fence_array_create(i, fences, fence_context,
-					   seqno, true);
+					       seqno, true);
 		if (!array) {
 		if (!array) {
 			for (j = 0; j < i; ++j)
 			for (j = 0; j < i; ++j)
 				dma_fence_put(fences[j]);
 				dma_fence_put(fences[j]);
 			kfree(fences);
 			kfree(fences);
-			r = -ENOMEM;
-			goto error;
+			return -ENOMEM;
 		}
 		}
 
 
+		r = amdgpu_sync_fence(adev, sync, &array->base, false);
+		dma_fence_put(ring->vmid_wait);
+		ring->vmid_wait = &array->base;
+		return r;
+	}
+	kfree(fences);
 
 
-		r = amdgpu_sync_fence(ring->adev, sync, &array->base, false);
-		dma_fence_put(&array->base);
-		if (r)
-			goto error;
+	return 0;
+}
 
 
-		mutex_unlock(&id_mgr->lock);
-		return 0;
+/**
+ * amdgpu_vm_grab_reserved - try to assign reserved VMID
+ *
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ * @sync: sync object where we add dependencies
+ * @fence: fence protecting ID from reuse
+ * @job: job who wants to use the VMID
+ *
+ * Try to assign a reserved VMID.
+ */
+static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
+				     struct amdgpu_ring *ring,
+				     struct amdgpu_sync *sync,
+				     struct dma_fence *fence,
+				     struct amdgpu_job *job,
+				     struct amdgpu_vmid **id)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned vmhub = ring->funcs->vmhub;
+	uint64_t fence_context = adev->fence_context + ring->idx;
+	struct dma_fence *updates = sync->last_vm_update;
+	bool needs_flush = vm->use_cpu_for_update;
+	int r = 0;
+
+	*id = vm->reserved_vmid[vmhub];
+	if (updates && (*id)->flushed_updates &&
+	    updates->context == (*id)->flushed_updates->context &&
+	    !dma_fence_is_later(updates, (*id)->flushed_updates))
+	    updates = NULL;
+
+	if ((*id)->owner != vm->entity.fence_context ||
+	    job->vm_pd_addr != (*id)->pd_gpu_addr ||
+	    updates || !(*id)->last_flush ||
+	    ((*id)->last_flush->context != fence_context &&
+	     !dma_fence_is_signaled((*id)->last_flush))) {
+		struct dma_fence *tmp;
 
 
+		/* to prevent one context starved by another context */
+		(*id)->pd_gpu_addr = 0;
+		tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
+		if (tmp) {
+			*id = NULL;
+			r = amdgpu_sync_fence(adev, sync, tmp, false);
+			return r;
+		}
+		needs_flush = true;
 	}
 	}
-	kfree(fences);
+
+	/* Good we can use this VMID. Remember this submission as
+	* user of the VMID.
+	*/
+	r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false);
+	if (r)
+		return r;
+
+	if (updates) {
+		dma_fence_put((*id)->flushed_updates);
+		(*id)->flushed_updates = dma_fence_get(updates);
+	}
+	job->vm_needs_flush = needs_flush;
+	return 0;
+}
+
+/**
+ * amdgpu_vm_grab_used - try to reuse a VMID
+ *
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ * @sync: sync object where we add dependencies
+ * @fence: fence protecting ID from reuse
+ * @job: job who wants to use the VMID
+ * @id: resulting VMID
+ *
+ * Try to reuse a VMID for this submission.
+ */
+static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
+				 struct amdgpu_ring *ring,
+				 struct amdgpu_sync *sync,
+				 struct dma_fence *fence,
+				 struct amdgpu_job *job,
+				 struct amdgpu_vmid **id)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned vmhub = ring->funcs->vmhub;
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	uint64_t fence_context = adev->fence_context + ring->idx;
+	struct dma_fence *updates = sync->last_vm_update;
+	int r;
 
 
 	job->vm_needs_flush = vm->use_cpu_for_update;
 	job->vm_needs_flush = vm->use_cpu_for_update;
+
 	/* Check if we can use a VMID already assigned to this VM */
 	/* Check if we can use a VMID already assigned to this VM */
-	list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) {
-		struct dma_fence *flushed;
+	list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) {
 		bool needs_flush = vm->use_cpu_for_update;
 		bool needs_flush = vm->use_cpu_for_update;
+		struct dma_fence *flushed;
 
 
 		/* Check all the prerequisites to using this VMID */
 		/* Check all the prerequisites to using this VMID */
-		if (amdgpu_vmid_had_gpu_reset(adev, id))
-			continue;
-
-		if (atomic64_read(&id->owner) != vm->entity.fence_context)
+		if ((*id)->owner != vm->entity.fence_context)
 			continue;
 			continue;
 
 
-		if (job->vm_pd_addr != id->pd_gpu_addr)
+		if ((*id)->pd_gpu_addr != job->vm_pd_addr)
 			continue;
 			continue;
 
 
-		if (!id->last_flush ||
-		    (id->last_flush->context != fence_context &&
-		     !dma_fence_is_signaled(id->last_flush)))
+		if (!(*id)->last_flush ||
+		    ((*id)->last_flush->context != fence_context &&
+		     !dma_fence_is_signaled((*id)->last_flush)))
 			needs_flush = true;
 			needs_flush = true;
 
 
-		flushed  = id->flushed_updates;
+		flushed  = (*id)->flushed_updates;
 		if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
 		if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
 			needs_flush = true;
 			needs_flush = true;
 
 
@@ -261,47 +368,91 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		if (adev->asic_type < CHIP_VEGA10 && needs_flush)
 		if (adev->asic_type < CHIP_VEGA10 && needs_flush)
 			continue;
 			continue;
 
 
-		/* Good we can use this VMID. Remember this submission as
+		/* Good, we can use this VMID. Remember this submission as
 		 * user of the VMID.
 		 * user of the VMID.
 		 */
 		 */
-		r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
+		r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false);
 		if (r)
 		if (r)
-			goto error;
+			return r;
 
 
 		if (updates && (!flushed || dma_fence_is_later(updates, flushed))) {
 		if (updates && (!flushed || dma_fence_is_later(updates, flushed))) {
-			dma_fence_put(id->flushed_updates);
-			id->flushed_updates = dma_fence_get(updates);
+			dma_fence_put((*id)->flushed_updates);
+			(*id)->flushed_updates = dma_fence_get(updates);
 		}
 		}
 
 
-		if (needs_flush)
-			goto needs_flush;
-		else
-			goto no_flush_needed;
+		job->vm_needs_flush |= needs_flush;
+		return 0;
+	}
 
 
-	};
+	*id = NULL;
+	return 0;
+}
 
 
-	/* Still no ID to use? Then use the idle one found earlier */
-	id = idle;
+/**
+ * amdgpu_vm_grab_id - allocate the next free VMID
+ *
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ * @sync: sync object where we add dependencies
+ * @fence: fence protecting ID from reuse
+ * @job: job who wants to use the VMID
+ *
+ * Allocate an id for the vm, adding fences to the sync obj as necessary.
+ */
+int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+		     struct amdgpu_sync *sync, struct dma_fence *fence,
+		     struct amdgpu_job *job)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned vmhub = ring->funcs->vmhub;
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	struct amdgpu_vmid *idle = NULL;
+	struct amdgpu_vmid *id = NULL;
+	int r = 0;
 
 
-	/* Remember this submission as user of the VMID */
-	r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
-	if (r)
+	mutex_lock(&id_mgr->lock);
+	r = amdgpu_vmid_grab_idle(vm, ring, sync, &idle);
+	if (r || !idle)
 		goto error;
 		goto error;
 
 
-	id->pd_gpu_addr = job->vm_pd_addr;
-	dma_fence_put(id->flushed_updates);
-	id->flushed_updates = dma_fence_get(updates);
-	atomic64_set(&id->owner, vm->entity.fence_context);
+	if (vm->reserved_vmid[vmhub]) {
+		r = amdgpu_vmid_grab_reserved(vm, ring, sync, fence, job, &id);
+		if (r || !id)
+			goto error;
+	} else {
+		r = amdgpu_vmid_grab_used(vm, ring, sync, fence, job, &id);
+		if (r)
+			goto error;
 
 
-needs_flush:
-	job->vm_needs_flush = true;
-	dma_fence_put(id->last_flush);
-	id->last_flush = NULL;
+		if (!id) {
+			struct dma_fence *updates = sync->last_vm_update;
 
 
-no_flush_needed:
-	list_move_tail(&id->list, &id_mgr->ids_lru);
+			/* Still no ID to use? Then use the idle one found earlier */
+			id = idle;
 
 
+			/* Remember this submission as user of the VMID */
+			r = amdgpu_sync_fence(ring->adev, &id->active,
+					      fence, false);
+			if (r)
+				goto error;
+
+			dma_fence_put(id->flushed_updates);
+			id->flushed_updates = dma_fence_get(updates);
+			job->vm_needs_flush = true;
+		}
+
+		list_move_tail(&id->list, &id_mgr->ids_lru);
+	}
+
+	id->pd_gpu_addr = job->vm_pd_addr;
+	id->owner = vm->entity.fence_context;
+
+	if (job->vm_needs_flush) {
+		dma_fence_put(id->last_flush);
+		id->last_flush = NULL;
+	}
 	job->vmid = id - id_mgr->ids;
 	job->vmid = id - id_mgr->ids;
+	job->pasid = vm->pasid;
 	trace_amdgpu_vm_grab_id(vm, ring, job);
 	trace_amdgpu_vm_grab_id(vm, ring, job);
 
 
 error:
 error:
@@ -370,13 +521,15 @@ void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
 	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 	struct amdgpu_vmid *id = &id_mgr->ids[vmid];
 	struct amdgpu_vmid *id = &id_mgr->ids[vmid];
 
 
-	atomic64_set(&id->owner, 0);
+	mutex_lock(&id_mgr->lock);
+	id->owner = 0;
 	id->gds_base = 0;
 	id->gds_base = 0;
 	id->gds_size = 0;
 	id->gds_size = 0;
 	id->gws_base = 0;
 	id->gws_base = 0;
 	id->gws_size = 0;
 	id->gws_size = 0;
 	id->oa_base = 0;
 	id->oa_base = 0;
 	id->oa_size = 0;
 	id->oa_size = 0;
+	mutex_unlock(&id_mgr->lock);
 }
 }
 
 
 /**
 /**
@@ -454,6 +607,7 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
 			amdgpu_sync_free(&id->active);
 			amdgpu_sync_free(&id->active);
 			dma_fence_put(id->flushed_updates);
 			dma_fence_put(id->flushed_updates);
 			dma_fence_put(id->last_flush);
 			dma_fence_put(id->last_flush);
+			dma_fence_put(id->pasid_mapping);
 		}
 		}
 	}
 	}
 }
 }

+ 6 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h

@@ -43,7 +43,7 @@ struct amdgpu_vmid {
 	struct list_head	list;
 	struct list_head	list;
 	struct amdgpu_sync	active;
 	struct amdgpu_sync	active;
 	struct dma_fence	*last_flush;
 	struct dma_fence	*last_flush;
-	atomic64_t		owner;
+	uint64_t		owner;
 
 
 	uint64_t		pd_gpu_addr;
 	uint64_t		pd_gpu_addr;
 	/* last flushed PD/PT update */
 	/* last flushed PD/PT update */
@@ -57,6 +57,9 @@ struct amdgpu_vmid {
 	uint32_t		gws_size;
 	uint32_t		gws_size;
 	uint32_t		oa_base;
 	uint32_t		oa_base;
 	uint32_t		oa_size;
 	uint32_t		oa_size;
+
+	unsigned		pasid;
+	struct dma_fence	*pasid_mapping;
 };
 };
 
 
 struct amdgpu_vmid_mgr {
 struct amdgpu_vmid_mgr {
@@ -69,6 +72,8 @@ struct amdgpu_vmid_mgr {
 
 
 int amdgpu_pasid_alloc(unsigned int bits);
 int amdgpu_pasid_alloc(unsigned int bits);
 void amdgpu_pasid_free(unsigned int pasid);
 void amdgpu_pasid_free(unsigned int pasid);
+void amdgpu_pasid_free_delayed(struct reservation_object *resv,
+			       unsigned int pasid);
 
 
 bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
 bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
 			       struct amdgpu_vmid *id);
 			       struct amdgpu_vmid *id);

+ 3 - 42
drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h

@@ -25,51 +25,12 @@
 #define __AMDGPU_IH_H__
 #define __AMDGPU_IH_H__
 
 
 #include <linux/chash.h>
 #include <linux/chash.h>
+#include "soc15_ih_clientid.h"
 
 
 struct amdgpu_device;
 struct amdgpu_device;
- /*
-  * vega10+ IH clients
- */
-enum amdgpu_ih_clientid
-{
-    AMDGPU_IH_CLIENTID_IH	    = 0x00,
-    AMDGPU_IH_CLIENTID_ACP	    = 0x01,
-    AMDGPU_IH_CLIENTID_ATHUB	    = 0x02,
-    AMDGPU_IH_CLIENTID_BIF	    = 0x03,
-    AMDGPU_IH_CLIENTID_DCE	    = 0x04,
-    AMDGPU_IH_CLIENTID_ISP	    = 0x05,
-    AMDGPU_IH_CLIENTID_PCIE0	    = 0x06,
-    AMDGPU_IH_CLIENTID_RLC	    = 0x07,
-    AMDGPU_IH_CLIENTID_SDMA0	    = 0x08,
-    AMDGPU_IH_CLIENTID_SDMA1	    = 0x09,
-    AMDGPU_IH_CLIENTID_SE0SH	    = 0x0a,
-    AMDGPU_IH_CLIENTID_SE1SH	    = 0x0b,
-    AMDGPU_IH_CLIENTID_SE2SH	    = 0x0c,
-    AMDGPU_IH_CLIENTID_SE3SH	    = 0x0d,
-    AMDGPU_IH_CLIENTID_SYSHUB	    = 0x0e,
-    AMDGPU_IH_CLIENTID_THM	    = 0x0f,
-    AMDGPU_IH_CLIENTID_UVD	    = 0x10,
-    AMDGPU_IH_CLIENTID_VCE0	    = 0x11,
-    AMDGPU_IH_CLIENTID_VMC	    = 0x12,
-    AMDGPU_IH_CLIENTID_XDMA	    = 0x13,
-    AMDGPU_IH_CLIENTID_GRBM_CP	    = 0x14,
-    AMDGPU_IH_CLIENTID_ATS	    = 0x15,
-    AMDGPU_IH_CLIENTID_ROM_SMUIO    = 0x16,
-    AMDGPU_IH_CLIENTID_DF	    = 0x17,
-    AMDGPU_IH_CLIENTID_VCE1	    = 0x18,
-    AMDGPU_IH_CLIENTID_PWR	    = 0x19,
-    AMDGPU_IH_CLIENTID_UTCL2	    = 0x1b,
-    AMDGPU_IH_CLIENTID_EA	    = 0x1c,
-    AMDGPU_IH_CLIENTID_UTCL2LOG	    = 0x1d,
-    AMDGPU_IH_CLIENTID_MP0	    = 0x1e,
-    AMDGPU_IH_CLIENTID_MP1	    = 0x1f,
-
-    AMDGPU_IH_CLIENTID_MAX,
-
-    AMDGPU_IH_CLIENTID_VCN	    = AMDGPU_IH_CLIENTID_UVD
-};
 
 
 #define AMDGPU_IH_CLIENTID_LEGACY 0
 #define AMDGPU_IH_CLIENTID_LEGACY 0
+#define AMDGPU_IH_CLIENTID_MAX SOC15_IH_CLIENTID_MAX
 
 
 #define AMDGPU_PAGEFAULT_HASH_BITS 8
 #define AMDGPU_PAGEFAULT_HASH_BITS 8
 struct amdgpu_retryfault_hashtable {
 struct amdgpu_retryfault_hashtable {
@@ -109,7 +70,7 @@ struct amdgpu_iv_entry {
 	unsigned vmid_src;
 	unsigned vmid_src;
 	uint64_t timestamp;
 	uint64_t timestamp;
 	unsigned timestamp_src;
 	unsigned timestamp_src;
-	unsigned pas_id;
+	unsigned pasid;
 	unsigned pasid_src;
 	unsigned pasid_src;
 	unsigned src_data[AMDGPU_IH_SRC_DATA_MAX_SIZE_DW];
 	unsigned src_data[AMDGPU_IH_SRC_DATA_MAX_SIZE_DW];
 	const uint32_t *iv_entry;
 	const uint32_t *iv_entry;

+ 6 - 52
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c

@@ -92,7 +92,7 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
 }
 }
 
 
 /* Disable *all* interrupts */
 /* Disable *all* interrupts */
-static void amdgpu_irq_disable_all(struct amdgpu_device *adev)
+void amdgpu_irq_disable_all(struct amdgpu_device *adev)
 {
 {
 	unsigned long irqflags;
 	unsigned long irqflags;
 	unsigned i, j, k;
 	unsigned i, j, k;
@@ -122,55 +122,6 @@ static void amdgpu_irq_disable_all(struct amdgpu_device *adev)
 	spin_unlock_irqrestore(&adev->irq.lock, irqflags);
 	spin_unlock_irqrestore(&adev->irq.lock, irqflags);
 }
 }
 
 
-/**
- * amdgpu_irq_preinstall - drm irq preinstall callback
- *
- * @dev: drm dev pointer
- *
- * Gets the hw ready to enable irqs (all asics).
- * This function disables all interrupt sources on the GPU.
- */
-void amdgpu_irq_preinstall(struct drm_device *dev)
-{
-	struct amdgpu_device *adev = dev->dev_private;
-
-	/* Disable *all* interrupts */
-	amdgpu_irq_disable_all(adev);
-	/* Clear bits */
-	amdgpu_ih_process(adev);
-}
-
-/**
- * amdgpu_irq_postinstall - drm irq preinstall callback
- *
- * @dev: drm dev pointer
- *
- * Handles stuff to be done after enabling irqs (all asics).
- * Returns 0 on success.
- */
-int amdgpu_irq_postinstall(struct drm_device *dev)
-{
-	dev->max_vblank_count = 0x00ffffff;
-	return 0;
-}
-
-/**
- * amdgpu_irq_uninstall - drm irq uninstall callback
- *
- * @dev: drm dev pointer
- *
- * This function disables all interrupt sources on the GPU (all asics).
- */
-void amdgpu_irq_uninstall(struct drm_device *dev)
-{
-	struct amdgpu_device *adev = dev->dev_private;
-
-	if (adev == NULL) {
-		return;
-	}
-	amdgpu_irq_disable_all(adev);
-}
-
 /**
 /**
  * amdgpu_irq_handler - irq handler
  * amdgpu_irq_handler - irq handler
  *
  *
@@ -257,10 +208,12 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
 	r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq);
 	r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq);
 	if (r) {
 	if (r) {
 		adev->irq.installed = false;
 		adev->irq.installed = false;
-		flush_work(&adev->hotplug_work);
+		if (!amdgpu_device_has_dc_support(adev))
+			flush_work(&adev->hotplug_work);
 		cancel_work_sync(&adev->reset_work);
 		cancel_work_sync(&adev->reset_work);
 		return r;
 		return r;
 	}
 	}
+	adev->ddev->max_vblank_count = 0x00ffffff;
 
 
 	DRM_DEBUG("amdgpu: irq initialized.\n");
 	DRM_DEBUG("amdgpu: irq initialized.\n");
 	return 0;
 	return 0;
@@ -282,7 +235,8 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
 		adev->irq.installed = false;
 		adev->irq.installed = false;
 		if (adev->irq.msi_enabled)
 		if (adev->irq.msi_enabled)
 			pci_disable_msi(adev->pdev);
 			pci_disable_msi(adev->pdev);
-		flush_work(&adev->hotplug_work);
+		if (!amdgpu_device_has_dc_support(adev))
+			flush_work(&adev->hotplug_work);
 		cancel_work_sync(&adev->reset_work);
 		cancel_work_sync(&adev->reset_work);
 	}
 	}
 
 

+ 1 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h

@@ -78,9 +78,7 @@ struct amdgpu_irq {
 	uint32_t                        srbm_soft_reset;
 	uint32_t                        srbm_soft_reset;
 };
 };
 
 
-void amdgpu_irq_preinstall(struct drm_device *dev);
-int amdgpu_irq_postinstall(struct drm_device *dev);
-void amdgpu_irq_uninstall(struct drm_device *dev);
+void amdgpu_irq_disable_all(struct amdgpu_device *adev);
 irqreturn_t amdgpu_irq_handler(int irq, void *arg);
 irqreturn_t amdgpu_irq_handler(int irq, void *arg);
 
 
 int amdgpu_irq_init(struct amdgpu_device *adev);
 int amdgpu_irq_init(struct amdgpu_device *adev);

+ 68 - 30
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

@@ -191,7 +191,7 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
 		fw_info->feature = 0;
 		fw_info->feature = 0;
 		break;
 		break;
 	case AMDGPU_INFO_FW_GMC:
 	case AMDGPU_INFO_FW_GMC:
-		fw_info->ver = adev->mc.fw_version;
+		fw_info->ver = adev->gmc.fw_version;
 		fw_info->feature = 0;
 		fw_info->feature = 0;
 		break;
 		break;
 	case AMDGPU_INFO_FW_GFX_ME:
 	case AMDGPU_INFO_FW_GFX_ME:
@@ -470,9 +470,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 	case AMDGPU_INFO_VRAM_GTT: {
 	case AMDGPU_INFO_VRAM_GTT: {
 		struct drm_amdgpu_info_vram_gtt vram_gtt;
 		struct drm_amdgpu_info_vram_gtt vram_gtt;
 
 
-		vram_gtt.vram_size = adev->mc.real_vram_size;
+		vram_gtt.vram_size = adev->gmc.real_vram_size;
 		vram_gtt.vram_size -= adev->vram_pin_size;
 		vram_gtt.vram_size -= adev->vram_pin_size;
-		vram_gtt.vram_cpu_accessible_size = adev->mc.visible_vram_size;
+		vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size;
 		vram_gtt.vram_cpu_accessible_size -= (adev->vram_pin_size - adev->invisible_pin_size);
 		vram_gtt.vram_cpu_accessible_size -= (adev->vram_pin_size - adev->invisible_pin_size);
 		vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size;
 		vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size;
 		vram_gtt.gtt_size *= PAGE_SIZE;
 		vram_gtt.gtt_size *= PAGE_SIZE;
@@ -484,17 +484,17 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		struct drm_amdgpu_memory_info mem;
 		struct drm_amdgpu_memory_info mem;
 
 
 		memset(&mem, 0, sizeof(mem));
 		memset(&mem, 0, sizeof(mem));
-		mem.vram.total_heap_size = adev->mc.real_vram_size;
+		mem.vram.total_heap_size = adev->gmc.real_vram_size;
 		mem.vram.usable_heap_size =
 		mem.vram.usable_heap_size =
-			adev->mc.real_vram_size - adev->vram_pin_size;
+			adev->gmc.real_vram_size - adev->vram_pin_size;
 		mem.vram.heap_usage =
 		mem.vram.heap_usage =
 			amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 			amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 		mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
 		mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
 
 
 		mem.cpu_accessible_vram.total_heap_size =
 		mem.cpu_accessible_vram.total_heap_size =
-			adev->mc.visible_vram_size;
+			adev->gmc.visible_vram_size;
 		mem.cpu_accessible_vram.usable_heap_size =
 		mem.cpu_accessible_vram.usable_heap_size =
-			adev->mc.visible_vram_size -
+			adev->gmc.visible_vram_size -
 			(adev->vram_pin_size - adev->invisible_pin_size);
 			(adev->vram_pin_size - adev->invisible_pin_size);
 		mem.cpu_accessible_vram.heap_usage =
 		mem.cpu_accessible_vram.heap_usage =
 			amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 			amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
@@ -580,11 +580,16 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
 			dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
 
 
 		vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
 		vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
+		vm_size -= AMDGPU_VA_RESERVED_SIZE;
+
+		/* Older VCE FW versions are buggy and can handle only 40bits */
+		if (adev->vce.fw_version < AMDGPU_VCE_FW_53_45)
+			vm_size = min(vm_size, 1ULL << 40);
+
 		dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
 		dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
 		dev_info.virtual_address_max =
 		dev_info.virtual_address_max =
 			min(vm_size, AMDGPU_VA_HOLE_START);
 			min(vm_size, AMDGPU_VA_HOLE_START);
 
 
-		vm_size -= AMDGPU_VA_RESERVED_SIZE;
 		if (vm_size > AMDGPU_VA_HOLE_START) {
 		if (vm_size > AMDGPU_VA_HOLE_START) {
 			dev_info.high_va_offset = AMDGPU_VA_HOLE_END;
 			dev_info.high_va_offset = AMDGPU_VA_HOLE_END;
 			dev_info.high_va_max = AMDGPU_VA_HOLE_END | vm_size;
 			dev_info.high_va_max = AMDGPU_VA_HOLE_END | vm_size;
@@ -599,8 +604,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		       sizeof(adev->gfx.cu_info.ao_cu_bitmap));
 		       sizeof(adev->gfx.cu_info.ao_cu_bitmap));
 		memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],
 		memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],
 		       sizeof(adev->gfx.cu_info.bitmap));
 		       sizeof(adev->gfx.cu_info.bitmap));
-		dev_info.vram_type = adev->mc.vram_type;
-		dev_info.vram_bit_width = adev->mc.vram_width;
+		dev_info.vram_type = adev->gmc.vram_type;
+		dev_info.vram_bit_width = adev->gmc.vram_width;
 		dev_info.vce_harvest_config = adev->vce.harvest_config;
 		dev_info.vce_harvest_config = adev->vce.harvest_config;
 		dev_info.gc_double_offchip_lds_buf =
 		dev_info.gc_double_offchip_lds_buf =
 			adev->gfx.config.double_offchip_lds_buf;
 			adev->gfx.config.double_offchip_lds_buf;
@@ -758,6 +763,24 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 				return -EINVAL;
 				return -EINVAL;
 			}
 			}
 			break;
 			break;
+		case AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK:
+			/* get stable pstate sclk in Mhz */
+			if (amdgpu_dpm_read_sensor(adev,
+						   AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK,
+						   (void *)&ui32, &ui32_size)) {
+				return -EINVAL;
+			}
+			ui32 /= 100;
+			break;
+		case AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK:
+			/* get stable pstate mclk in Mhz */
+			if (amdgpu_dpm_read_sensor(adev,
+						   AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK,
+						   (void *)&ui32, &ui32_size)) {
+				return -EINVAL;
+			}
+			ui32 /= 100;
+			break;
 		default:
 		default:
 			DRM_DEBUG_KMS("Invalid request %d\n",
 			DRM_DEBUG_KMS("Invalid request %d\n",
 				      info->sensor_info.type);
 				      info->sensor_info.type);
@@ -805,7 +828,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 {
 {
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_fpriv *fpriv;
 	struct amdgpu_fpriv *fpriv;
-	int r;
+	int r, pasid;
 
 
 	file_priv->driver_priv = NULL;
 	file_priv->driver_priv = NULL;
 
 
@@ -819,28 +842,25 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 		goto out_suspend;
 		goto out_suspend;
 	}
 	}
 
 
-	r = amdgpu_vm_init(adev, &fpriv->vm,
-			   AMDGPU_VM_CONTEXT_GFX, 0);
-	if (r) {
-		kfree(fpriv);
-		goto out_suspend;
+	pasid = amdgpu_pasid_alloc(16);
+	if (pasid < 0) {
+		dev_warn(adev->dev, "No more PASIDs available!");
+		pasid = 0;
 	}
 	}
+	r = amdgpu_vm_init(adev, &fpriv->vm, AMDGPU_VM_CONTEXT_GFX, pasid);
+	if (r)
+		goto error_pasid;
 
 
 	fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
 	fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
 	if (!fpriv->prt_va) {
 	if (!fpriv->prt_va) {
 		r = -ENOMEM;
 		r = -ENOMEM;
-		amdgpu_vm_fini(adev, &fpriv->vm);
-		kfree(fpriv);
-		goto out_suspend;
+		goto error_vm;
 	}
 	}
 
 
 	if (amdgpu_sriov_vf(adev)) {
 	if (amdgpu_sriov_vf(adev)) {
 		r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va);
 		r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va);
-		if (r) {
-			amdgpu_vm_fini(adev, &fpriv->vm);
-			kfree(fpriv);
-			goto out_suspend;
-		}
+		if (r)
+			goto error_vm;
 	}
 	}
 
 
 	mutex_init(&fpriv->bo_list_lock);
 	mutex_init(&fpriv->bo_list_lock);
@@ -849,6 +869,16 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
 	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
 
 
 	file_priv->driver_priv = fpriv;
 	file_priv->driver_priv = fpriv;
+	goto out_suspend;
+
+error_vm:
+	amdgpu_vm_fini(adev, &fpriv->vm);
+
+error_pasid:
+	if (pasid)
+		amdgpu_pasid_free(pasid);
+
+	kfree(fpriv);
 
 
 out_suspend:
 out_suspend:
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_mark_last_busy(dev->dev);
@@ -871,6 +901,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
 	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
 	struct amdgpu_bo_list *list;
 	struct amdgpu_bo_list *list;
+	struct amdgpu_bo *pd;
+	unsigned int pasid;
 	int handle;
 	int handle;
 
 
 	if (!fpriv)
 	if (!fpriv)
@@ -895,7 +927,13 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 		amdgpu_bo_unreserve(adev->virt.csa_obj);
 		amdgpu_bo_unreserve(adev->virt.csa_obj);
 	}
 	}
 
 
+	pasid = fpriv->vm.pasid;
+	pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
+
 	amdgpu_vm_fini(adev, &fpriv->vm);
 	amdgpu_vm_fini(adev, &fpriv->vm);
+	if (pasid)
+		amdgpu_pasid_free_delayed(pd->tbo.resv, pasid);
+	amdgpu_bo_unref(&pd);
 
 
 	idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
 	idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
 		amdgpu_bo_list_free(list);
 		amdgpu_bo_list_free(list);
@@ -947,11 +985,11 @@ u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe)
 		 */
 		 */
 		do {
 		do {
 			count = amdgpu_display_vblank_get_counter(adev, pipe);
 			count = amdgpu_display_vblank_get_counter(adev, pipe);
-			/* Ask amdgpu_get_crtc_scanoutpos to return vpos as
-			 * distance to start of vblank, instead of regular
-			 * vertical scanout pos.
+			/* Ask amdgpu_display_get_crtc_scanoutpos to return
+			 * vpos as distance to start of vblank, instead of
+			 * regular vertical scanout pos.
 			 */
 			 */
-			stat = amdgpu_get_crtc_scanoutpos(
+			stat = amdgpu_display_get_crtc_scanoutpos(
 				dev, pipe, GET_DISTANCE_TO_VBLANKSTART,
 				dev, pipe, GET_DISTANCE_TO_VBLANKSTART,
 				&vpos, &hpos, NULL, NULL,
 				&vpos, &hpos, NULL, NULL,
 				&adev->mode_info.crtcs[pipe]->base.hwmode);
 				&adev->mode_info.crtcs[pipe]->base.hwmode);
@@ -992,7 +1030,7 @@ u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe)
 int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe)
 int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe)
 {
 {
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_device *adev = dev->dev_private;
-	int idx = amdgpu_crtc_idx_to_irq_type(adev, pipe);
+	int idx = amdgpu_display_crtc_idx_to_irq_type(adev, pipe);
 
 
 	return amdgpu_irq_get(adev, &adev->crtc_irq, idx);
 	return amdgpu_irq_get(adev, &adev->crtc_irq, idx);
 }
 }
@@ -1008,7 +1046,7 @@ int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe)
 void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe)
 void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe)
 {
 {
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_device *adev = dev->dev_private;
-	int idx = amdgpu_crtc_idx_to_irq_type(adev, pipe);
+	int idx = amdgpu_display_crtc_idx_to_irq_type(adev, pipe);
 
 
 	amdgpu_irq_put(adev, &adev->crtc_irq, idx);
 	amdgpu_irq_put(adev, &adev->crtc_irq, idx);
 }
 }

+ 25 - 54
drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h

@@ -267,8 +267,6 @@ struct amdgpu_display_funcs {
 	void (*bandwidth_update)(struct amdgpu_device *adev);
 	void (*bandwidth_update)(struct amdgpu_device *adev);
 	/* get frame count */
 	/* get frame count */
 	u32 (*vblank_get_counter)(struct amdgpu_device *adev, int crtc);
 	u32 (*vblank_get_counter)(struct amdgpu_device *adev, int crtc);
-	/* wait for vblank */
-	void (*vblank_wait)(struct amdgpu_device *adev, int crtc);
 	/* set backlight level */
 	/* set backlight level */
 	void (*backlight_set_level)(struct amdgpu_encoder *amdgpu_encoder,
 	void (*backlight_set_level)(struct amdgpu_encoder *amdgpu_encoder,
 				    u8 level);
 				    u8 level);
@@ -352,6 +350,7 @@ struct amdgpu_mode_info {
 	u16 firmware_flags;
 	u16 firmware_flags;
 	/* pointer to backlight encoder */
 	/* pointer to backlight encoder */
 	struct amdgpu_encoder *bl_encoder;
 	struct amdgpu_encoder *bl_encoder;
+	u8 bl_level; /* saved backlight level */
 	struct amdgpu_audio	audio; /* audio stuff */
 	struct amdgpu_audio	audio; /* audio stuff */
 	int			num_crtc; /* number of crtcs */
 	int			num_crtc; /* number of crtcs */
 	int			num_hpd; /* number of hpd pins */
 	int			num_hpd; /* number of hpd pins */
@@ -552,14 +551,6 @@ struct amdgpu_connector {
 	/* we need to mind the EDID between detect
 	/* we need to mind the EDID between detect
 	   and get modes due to analog/digital/tvencoder */
 	   and get modes due to analog/digital/tvencoder */
 	struct edid *edid;
 	struct edid *edid;
-	/* number of modes generated from EDID at 'dc_sink' */
-	int num_modes;
-	/* The 'old' sink - before an HPD.
-	 * The 'current' sink is in dc_link->sink. */
-	struct dc_sink *dc_sink;
-	struct dc_link *dc_link;
-	struct dc_sink *dc_em_sink;
-	const struct dc_stream *stream;
 	void *con_priv;
 	void *con_priv;
 	bool dac_load_detect;
 	bool dac_load_detect;
 	bool detected_by_load; /* if the connection status was determined by load */
 	bool detected_by_load; /* if the connection status was determined by load */
@@ -570,27 +561,6 @@ struct amdgpu_connector {
 	enum amdgpu_connector_audio audio;
 	enum amdgpu_connector_audio audio;
 	enum amdgpu_connector_dither dither;
 	enum amdgpu_connector_dither dither;
 	unsigned pixelclock_for_modeset;
 	unsigned pixelclock_for_modeset;
-
-	struct drm_dp_mst_topology_mgr mst_mgr;
-	struct amdgpu_dm_dp_aux dm_dp_aux;
-	struct drm_dp_mst_port *port;
-	struct amdgpu_connector *mst_port;
-	struct amdgpu_encoder *mst_encoder;
-	struct semaphore mst_sem;
-
-	/* TODO see if we can merge with ddc_bus or make a dm_connector */
-	struct amdgpu_i2c_adapter *i2c;
-
-	/* Monitor range limits */
-	int min_vfreq ;
-	int max_vfreq ;
-	int pixel_clock_mhz;
-
-	/*freesync caps*/
-	struct mod_freesync_caps caps;
-
-	struct mutex hpd_lock;
-
 };
 };
 
 
 /* TODO: start to use this struct and remove same field from base one */
 /* TODO: start to use this struct and remove same field from base one */
@@ -608,7 +578,7 @@ struct amdgpu_mst_connector {
 #define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \
 #define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \
 				((em) == ATOM_ENCODER_MODE_DP_MST))
 				((em) == ATOM_ENCODER_MODE_DP_MST))
 
 
-/* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */
+/* Driver internal use only flags of amdgpu_display_get_crtc_scanoutpos() */
 #define DRM_SCANOUTPOS_VALID        (1 << 0)
 #define DRM_SCANOUTPOS_VALID        (1 << 0)
 #define DRM_SCANOUTPOS_IN_VBLANK    (1 << 1)
 #define DRM_SCANOUTPOS_IN_VBLANK    (1 << 1)
 #define DRM_SCANOUTPOS_ACCURATE     (1 << 2)
 #define DRM_SCANOUTPOS_ACCURATE     (1 << 2)
@@ -627,30 +597,31 @@ bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder,
 u16 amdgpu_encoder_get_dp_bridge_encoder_id(struct drm_encoder *encoder);
 u16 amdgpu_encoder_get_dp_bridge_encoder_id(struct drm_encoder *encoder);
 struct drm_encoder *amdgpu_get_external_encoder(struct drm_encoder *encoder);
 struct drm_encoder *amdgpu_get_external_encoder(struct drm_encoder *encoder);
 
 
-bool amdgpu_ddc_probe(struct amdgpu_connector *amdgpu_connector, bool use_aux);
+bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
+			      bool use_aux);
 
 
 void amdgpu_encoder_set_active_device(struct drm_encoder *encoder);
 void amdgpu_encoder_set_active_device(struct drm_encoder *encoder);
 
 
-int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
-			       unsigned int flags, int *vpos, int *hpos,
-			       ktime_t *stime, ktime_t *etime,
-			       const struct drm_display_mode *mode);
+int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
+			unsigned int pipe, unsigned int flags, int *vpos,
+			int *hpos, ktime_t *stime, ktime_t *etime,
+			const struct drm_display_mode *mode);
 
 
-int amdgpu_framebuffer_init(struct drm_device *dev,
-			     struct amdgpu_framebuffer *rfb,
-			     const struct drm_mode_fb_cmd2 *mode_cmd,
-			     struct drm_gem_object *obj);
+int amdgpu_display_framebuffer_init(struct drm_device *dev,
+				    struct amdgpu_framebuffer *rfb,
+				    const struct drm_mode_fb_cmd2 *mode_cmd,
+				    struct drm_gem_object *obj);
 
 
 int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb);
 int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb);
 
 
 void amdgpu_enc_destroy(struct drm_encoder *encoder);
 void amdgpu_enc_destroy(struct drm_encoder *encoder);
 void amdgpu_copy_fb(struct drm_device *dev, struct drm_gem_object *dst_obj);
 void amdgpu_copy_fb(struct drm_device *dev, struct drm_gem_object *dst_obj);
-bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
-					const struct drm_display_mode *mode,
-					struct drm_display_mode *adjusted_mode);
+bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
+				const struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode);
 void amdgpu_panel_mode_fixup(struct drm_encoder *encoder,
 void amdgpu_panel_mode_fixup(struct drm_encoder *encoder,
 			     struct drm_display_mode *adjusted_mode);
 			     struct drm_display_mode *adjusted_mode);
-int amdgpu_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc);
+int amdgpu_display_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc);
 
 
 /* fbdev layer */
 /* fbdev layer */
 int amdgpu_fbdev_init(struct amdgpu_device *adev);
 int amdgpu_fbdev_init(struct amdgpu_device *adev);
@@ -662,15 +633,15 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
 int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int bpp, bool tiled);
 int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int bpp, bool tiled);
 
 
 /* amdgpu_display.c */
 /* amdgpu_display.c */
-void amdgpu_print_display_setup(struct drm_device *dev);
-int amdgpu_modeset_create_props(struct amdgpu_device *adev);
-int amdgpu_crtc_set_config(struct drm_mode_set *set,
-			   struct drm_modeset_acquire_ctx *ctx);
-int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
-				 struct drm_framebuffer *fb,
-				 struct drm_pending_vblank_event *event,
-				 uint32_t page_flip_flags, uint32_t target,
-				 struct drm_modeset_acquire_ctx *ctx);
+void amdgpu_display_print_display_setup(struct drm_device *dev);
+int amdgpu_display_modeset_create_props(struct amdgpu_device *adev);
+int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
+				   struct drm_modeset_acquire_ctx *ctx);
+int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
+				struct drm_framebuffer *fb,
+				struct drm_pending_vblank_event *event,
+				uint32_t page_flip_flags, uint32_t target,
+				struct drm_modeset_acquire_ctx *ctx);
 extern const struct drm_mode_config_funcs amdgpu_mode_funcs;
 extern const struct drm_mode_config_funcs amdgpu_mode_funcs;
 
 
 #endif
 #endif

+ 52 - 60
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

@@ -36,6 +36,7 @@
 #include <drm/drm_cache.h>
 #include <drm/drm_cache.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
 
 
 static bool amdgpu_need_backup(struct amdgpu_device *adev)
 static bool amdgpu_need_backup(struct amdgpu_device *adev)
 {
 {
@@ -54,8 +55,13 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
 	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
 	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
 
 
+	if (bo->kfd_bo)
+		amdgpu_amdkfd_unreserve_system_memory_limit(bo);
+
 	amdgpu_bo_kunmap(bo);
 	amdgpu_bo_kunmap(bo);
 
 
+	if (bo->gem_base.import_attach)
+		drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg);
 	drm_gem_object_release(&bo->gem_base);
 	drm_gem_object_release(&bo->gem_base);
 	amdgpu_bo_unref(&bo->parent);
 	amdgpu_bo_unref(&bo->parent);
 	if (!list_empty(&bo->shadow_list)) {
 	if (!list_empty(&bo->shadow_list)) {
@@ -83,7 +89,7 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 	u32 c = 0;
 	u32 c = 0;
 
 
 	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
 	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
-		unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
+		unsigned visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
 
 
 		places[c].fpfn = 0;
 		places[c].fpfn = 0;
 		places[c].lpfn = 0;
 		places[c].lpfn = 0;
@@ -103,7 +109,7 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 	if (domain & AMDGPU_GEM_DOMAIN_GTT) {
 	if (domain & AMDGPU_GEM_DOMAIN_GTT) {
 		places[c].fpfn = 0;
 		places[c].fpfn = 0;
 		if (flags & AMDGPU_GEM_CREATE_SHADOW)
 		if (flags & AMDGPU_GEM_CREATE_SHADOW)
-			places[c].lpfn = adev->mc.gart_size >> PAGE_SHIFT;
+			places[c].lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
 		else
 		else
 			places[c].lpfn = 0;
 			places[c].lpfn = 0;
 		places[c].flags = TTM_PL_FLAG_TT;
 		places[c].flags = TTM_PL_FLAG_TT;
@@ -169,13 +175,15 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
  * @size: size for the new BO
  * @size: size for the new BO
  * @align: alignment for the new BO
  * @align: alignment for the new BO
  * @domain: where to place it
  * @domain: where to place it
- * @bo_ptr: resulting BO
+ * @bo_ptr: used to initialize BOs in structures
  * @gpu_addr: GPU addr of the pinned BO
  * @gpu_addr: GPU addr of the pinned BO
  * @cpu_addr: optional CPU address mapping
  * @cpu_addr: optional CPU address mapping
  *
  *
  * Allocates and pins a BO for kernel internal use, and returns it still
  * Allocates and pins a BO for kernel internal use, and returns it still
  * reserved.
  * reserved.
  *
  *
+ * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
+ *
  * Returns 0 on success, negative error code otherwise.
  * Returns 0 on success, negative error code otherwise.
  */
  */
 int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
 int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
@@ -187,10 +195,10 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
 	int r;
 	int r;
 
 
 	if (!*bo_ptr) {
 	if (!*bo_ptr) {
-		r = amdgpu_bo_create(adev, size, align, true, domain,
+		r = amdgpu_bo_create(adev, size, align, domain,
 				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
 				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-				     NULL, NULL, 0, bo_ptr);
+				     ttm_bo_type_kernel, NULL, bo_ptr);
 		if (r) {
 		if (r) {
 			dev_err(adev->dev, "(%d) failed to allocate kernel bo\n",
 			dev_err(adev->dev, "(%d) failed to allocate kernel bo\n",
 				r);
 				r);
@@ -238,12 +246,14 @@ error_free:
  * @size: size for the new BO
  * @size: size for the new BO
  * @align: alignment for the new BO
  * @align: alignment for the new BO
  * @domain: where to place it
  * @domain: where to place it
- * @bo_ptr: resulting BO
+ * @bo_ptr:  used to initialize BOs in structures
  * @gpu_addr: GPU addr of the pinned BO
  * @gpu_addr: GPU addr of the pinned BO
  * @cpu_addr: optional CPU address mapping
  * @cpu_addr: optional CPU address mapping
  *
  *
  * Allocates and pins a BO for kernel internal use.
  * Allocates and pins a BO for kernel internal use.
  *
  *
+ * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
+ *
  * Returns 0 on success, negative error code otherwise.
  * Returns 0 on success, negative error code otherwise.
  */
  */
 int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
 int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
@@ -331,22 +341,19 @@ fail:
 	return false;
 	return false;
 }
 }
 
 
-static int amdgpu_bo_do_create(struct amdgpu_device *adev,
-			       unsigned long size, int byte_align,
-			       bool kernel, u32 domain, u64 flags,
-			       struct sg_table *sg,
+static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
+			       int byte_align, u32 domain,
+			       u64 flags, enum ttm_bo_type type,
 			       struct reservation_object *resv,
 			       struct reservation_object *resv,
-			       uint64_t init_value,
 			       struct amdgpu_bo **bo_ptr)
 			       struct amdgpu_bo **bo_ptr)
 {
 {
 	struct ttm_operation_ctx ctx = {
 	struct ttm_operation_ctx ctx = {
-		.interruptible = !kernel,
+		.interruptible = (type != ttm_bo_type_kernel),
 		.no_wait_gpu = false,
 		.no_wait_gpu = false,
-		.allow_reserved_eviction = true,
-		.resv = resv
+		.resv = resv,
+		.flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
 	};
 	};
 	struct amdgpu_bo *bo;
 	struct amdgpu_bo *bo;
-	enum ttm_bo_type type;
 	unsigned long page_align;
 	unsigned long page_align;
 	size_t acc_size;
 	size_t acc_size;
 	int r;
 	int r;
@@ -357,13 +364,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 	if (!amdgpu_bo_validate_size(adev, size, domain))
 	if (!amdgpu_bo_validate_size(adev, size, domain))
 		return -ENOMEM;
 		return -ENOMEM;
 
 
-	if (kernel) {
-		type = ttm_bo_type_kernel;
-	} else if (sg) {
-		type = ttm_bo_type_sg;
-	} else {
-		type = ttm_bo_type_device;
-	}
 	*bo_ptr = NULL;
 	*bo_ptr = NULL;
 
 
 	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
 	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
@@ -372,11 +372,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 	bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL);
 	bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL);
 	if (bo == NULL)
 	if (bo == NULL)
 		return -ENOMEM;
 		return -ENOMEM;
-	r = drm_gem_object_init(adev->ddev, &bo->gem_base, size);
-	if (unlikely(r)) {
-		kfree(bo);
-		return r;
-	}
+	drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
 	INIT_LIST_HEAD(&bo->shadow_list);
 	INIT_LIST_HEAD(&bo->shadow_list);
 	INIT_LIST_HEAD(&bo->va);
 	INIT_LIST_HEAD(&bo->va);
 	bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
 	bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
@@ -386,7 +382,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 					 AMDGPU_GEM_DOMAIN_GWS |
 					 AMDGPU_GEM_DOMAIN_GWS |
 					 AMDGPU_GEM_DOMAIN_OA);
 					 AMDGPU_GEM_DOMAIN_OA);
 	bo->allowed_domains = bo->preferred_domains;
 	bo->allowed_domains = bo->preferred_domains;
-	if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
+	if (type != ttm_bo_type_kernel &&
+	    bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
 		bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
 		bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
 
 
 	bo->flags = flags;
 	bo->flags = flags;
@@ -423,27 +420,27 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 	amdgpu_ttm_placement_from_domain(bo, domain);
 	amdgpu_ttm_placement_from_domain(bo, domain);
 
 
 	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
 	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
-				 &bo->placement, page_align, &ctx, NULL,
-				 acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
+				 &bo->placement, page_align, &ctx, acc_size,
+				 NULL, resv, &amdgpu_ttm_bo_destroy);
 	if (unlikely(r != 0))
 	if (unlikely(r != 0))
 		return r;
 		return r;
 
 
-	if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
 	    bo->tbo.mem.mem_type == TTM_PL_VRAM &&
 	    bo->tbo.mem.mem_type == TTM_PL_VRAM &&
-	    bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
+	    bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
 		amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
 		amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
 					     ctx.bytes_moved);
 					     ctx.bytes_moved);
 	else
 	else
 		amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
 		amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
 
 
-	if (kernel)
+	if (type == ttm_bo_type_kernel)
 		bo->tbo.priority = 1;
 		bo->tbo.priority = 1;
 
 
 	if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
 	if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
 	    bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
 	    bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
 		struct dma_fence *fence;
 		struct dma_fence *fence;
 
 
-		r = amdgpu_fill_buffer(bo, init_value, bo->tbo.resv, &fence);
+		r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence);
 		if (unlikely(r))
 		if (unlikely(r))
 			goto fail_unreserve;
 			goto fail_unreserve;
 
 
@@ -480,12 +477,11 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
 	if (bo->shadow)
 	if (bo->shadow)
 		return 0;
 		return 0;
 
 
-	r = amdgpu_bo_do_create(adev, size, byte_align, true,
-				AMDGPU_GEM_DOMAIN_GTT,
+	r = amdgpu_bo_do_create(adev, size, byte_align, AMDGPU_GEM_DOMAIN_GTT,
 				AMDGPU_GEM_CREATE_CPU_GTT_USWC |
 				AMDGPU_GEM_CREATE_CPU_GTT_USWC |
 				AMDGPU_GEM_CREATE_SHADOW,
 				AMDGPU_GEM_CREATE_SHADOW,
-				NULL, bo->tbo.resv, 0,
-				&bo->shadow);
+				ttm_bo_type_kernel,
+				bo->tbo.resv, &bo->shadow);
 	if (!r) {
 	if (!r) {
 		bo->shadow->parent = amdgpu_bo_ref(bo);
 		bo->shadow->parent = amdgpu_bo_ref(bo);
 		mutex_lock(&adev->shadow_list_lock);
 		mutex_lock(&adev->shadow_list_lock);
@@ -496,22 +492,17 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
 	return r;
 	return r;
 }
 }
 
 
-/* init_value will only take effect when flags contains
- * AMDGPU_GEM_CREATE_VRAM_CLEARED.
- */
-int amdgpu_bo_create(struct amdgpu_device *adev,
-		     unsigned long size, int byte_align,
-		     bool kernel, u32 domain, u64 flags,
-		     struct sg_table *sg,
+int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size,
+		     int byte_align, u32 domain,
+		     u64 flags, enum ttm_bo_type type,
 		     struct reservation_object *resv,
 		     struct reservation_object *resv,
-		     uint64_t init_value,
 		     struct amdgpu_bo **bo_ptr)
 		     struct amdgpu_bo **bo_ptr)
 {
 {
 	uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
 	uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
 	int r;
 	int r;
 
 
-	r = amdgpu_bo_do_create(adev, size, byte_align, kernel, domain,
-				parent_flags, sg, resv, init_value, bo_ptr);
+	r = amdgpu_bo_do_create(adev, size, byte_align, domain,
+				parent_flags, type, resv, bo_ptr);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
@@ -826,31 +817,32 @@ static const char *amdgpu_vram_names[] = {
 	"GDDR4",
 	"GDDR4",
 	"GDDR5",
 	"GDDR5",
 	"HBM",
 	"HBM",
-	"DDR3"
+	"DDR3",
+	"DDR4",
 };
 };
 
 
 int amdgpu_bo_init(struct amdgpu_device *adev)
 int amdgpu_bo_init(struct amdgpu_device *adev)
 {
 {
 	/* reserve PAT memory space to WC for VRAM */
 	/* reserve PAT memory space to WC for VRAM */
-	arch_io_reserve_memtype_wc(adev->mc.aper_base,
-				   adev->mc.aper_size);
+	arch_io_reserve_memtype_wc(adev->gmc.aper_base,
+				   adev->gmc.aper_size);
 
 
 	/* Add an MTRR for the VRAM */
 	/* Add an MTRR for the VRAM */
-	adev->mc.vram_mtrr = arch_phys_wc_add(adev->mc.aper_base,
-					      adev->mc.aper_size);
+	adev->gmc.vram_mtrr = arch_phys_wc_add(adev->gmc.aper_base,
+					      adev->gmc.aper_size);
 	DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
 	DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
-		 adev->mc.mc_vram_size >> 20,
-		 (unsigned long long)adev->mc.aper_size >> 20);
+		 adev->gmc.mc_vram_size >> 20,
+		 (unsigned long long)adev->gmc.aper_size >> 20);
 	DRM_INFO("RAM width %dbits %s\n",
 	DRM_INFO("RAM width %dbits %s\n",
-		 adev->mc.vram_width, amdgpu_vram_names[adev->mc.vram_type]);
+		 adev->gmc.vram_width, amdgpu_vram_names[adev->gmc.vram_type]);
 	return amdgpu_ttm_init(adev);
 	return amdgpu_ttm_init(adev);
 }
 }
 
 
 void amdgpu_bo_fini(struct amdgpu_device *adev)
 void amdgpu_bo_fini(struct amdgpu_device *adev)
 {
 {
 	amdgpu_ttm_fini(adev);
 	amdgpu_ttm_fini(adev);
-	arch_phys_wc_del(adev->mc.vram_mtrr);
-	arch_io_free_memtype_wc(adev->mc.aper_base, adev->mc.aper_size);
+	arch_phys_wc_del(adev->gmc.vram_mtrr);
+	arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
 }
 }
 
 
 int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
 int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
@@ -980,7 +972,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 
 
 	size = bo->mem.num_pages << PAGE_SHIFT;
 	size = bo->mem.num_pages << PAGE_SHIFT;
 	offset = bo->mem.start << PAGE_SHIFT;
 	offset = bo->mem.start << PAGE_SHIFT;
-	if ((offset + size) <= adev->mc.visible_vram_size)
+	if ((offset + size) <= adev->gmc.visible_vram_size)
 		return 0;
 		return 0;
 
 
 	/* Can't move a pinned BO to visible VRAM */
 	/* Can't move a pinned BO to visible VRAM */
@@ -1003,7 +995,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 	offset = bo->mem.start << PAGE_SHIFT;
 	offset = bo->mem.start << PAGE_SHIFT;
 	/* this should never happen */
 	/* this should never happen */
 	if (bo->mem.mem_type == TTM_PL_VRAM &&
 	if (bo->mem.mem_type == TTM_PL_VRAM &&
-	    (offset + size) > adev->mc.visible_vram_size)
+	    (offset + size) > adev->gmc.visible_vram_size)
 		return -EINVAL;
 		return -EINVAL;
 
 
 	return 0;
 	return 0;

+ 7 - 9
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h

@@ -92,6 +92,8 @@ struct amdgpu_bo {
 		struct list_head	mn_list;
 		struct list_head	mn_list;
 		struct list_head	shadow_list;
 		struct list_head	shadow_list;
 	};
 	};
+
+	struct kgd_mem                  *kfd_bo;
 };
 };
 
 
 static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
 static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
@@ -201,13 +203,11 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
 	return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
 	return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
 }
 }
 
 
-int amdgpu_bo_create(struct amdgpu_device *adev,
-			    unsigned long size, int byte_align,
-			    bool kernel, u32 domain, u64 flags,
-			    struct sg_table *sg,
-			    struct reservation_object *resv,
-			    uint64_t init_value,
-			    struct amdgpu_bo **bo_ptr);
+int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size,
+		     int byte_align, u32 domain,
+		     u64 flags, enum ttm_bo_type type,
+		     struct reservation_object *resv,
+		     struct amdgpu_bo **bo_ptr);
 int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
 int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
 			      unsigned long size, int align,
 			      unsigned long size, int align,
 			      u32 domain, struct amdgpu_bo **bo_ptr,
 			      u32 domain, struct amdgpu_bo **bo_ptr,
@@ -282,8 +282,6 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
 				      struct amdgpu_sa_manager *sa_manager);
 				      struct amdgpu_sa_manager *sa_manager);
 int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
 int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
 				      struct amdgpu_sa_manager *sa_manager);
 				      struct amdgpu_sa_manager *sa_manager);
-int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
-					struct amdgpu_sa_manager *sa_manager);
 int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 		     struct amdgpu_sa_bo **sa_bo,
 		     struct amdgpu_sa_bo **sa_bo,
 		     unsigned size, unsigned align);
 		     unsigned size, unsigned align);

+ 392 - 174
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c

@@ -116,7 +116,7 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
 	}
 	}
 
 
 	if (adev->powerplay.pp_funcs->dispatch_tasks) {
 	if (adev->powerplay.pp_funcs->dispatch_tasks) {
-		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state, NULL);
+		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state);
 	} else {
 	} else {
 		mutex_lock(&adev->pm.mutex);
 		mutex_lock(&adev->pm.mutex);
 		adev->pm.dpm.user_state = state;
 		adev->pm.dpm.user_state = state;
@@ -316,7 +316,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 		if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
 		if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
 		    state != POWER_STATE_TYPE_DEFAULT) {
 		    state != POWER_STATE_TYPE_DEFAULT) {
 			amdgpu_dpm_dispatch_task(adev,
 			amdgpu_dpm_dispatch_task(adev,
-					AMD_PP_TASK_ENABLE_USER_STATE, &state, NULL);
+					AMD_PP_TASK_ENABLE_USER_STATE, &state);
 			adev->pp_force_state_enabled = true;
 			adev->pp_force_state_enabled = true;
 		}
 		}
 	}
 	}
@@ -360,6 +360,90 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
 	return count;
 	return count;
 }
 }
 
 
+static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	int ret;
+	uint32_t parameter_size = 0;
+	long parameter[64];
+	char buf_cpy[128];
+	char *tmp_str;
+	char *sub_str;
+	const char delimiter[3] = {' ', '\n', '\0'};
+	uint32_t type;
+
+	if (count > 127)
+		return -EINVAL;
+
+	if (*buf == 's')
+		type = PP_OD_EDIT_SCLK_VDDC_TABLE;
+	else if (*buf == 'm')
+		type = PP_OD_EDIT_MCLK_VDDC_TABLE;
+	else if(*buf == 'r')
+		type = PP_OD_RESTORE_DEFAULT_TABLE;
+	else if (*buf == 'c')
+		type = PP_OD_COMMIT_DPM_TABLE;
+	else
+		return -EINVAL;
+
+	memcpy(buf_cpy, buf, count+1);
+
+	tmp_str = buf_cpy;
+
+	while (isspace(*++tmp_str));
+
+	while (tmp_str[0]) {
+		sub_str = strsep(&tmp_str, delimiter);
+		ret = kstrtol(sub_str, 0, &parameter[parameter_size]);
+		if (ret)
+			return -EINVAL;
+		parameter_size++;
+
+		while (isspace(*tmp_str))
+			tmp_str++;
+	}
+
+	if (adev->powerplay.pp_funcs->odn_edit_dpm_table)
+		ret = amdgpu_dpm_odn_edit_dpm_table(adev, type,
+						parameter, parameter_size);
+
+	if (ret)
+		return -EINVAL;
+
+	if (type == PP_OD_COMMIT_DPM_TABLE) {
+		if (adev->powerplay.pp_funcs->dispatch_tasks) {
+			amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL);
+			return count;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	return count;
+}
+
+static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	uint32_t size = 0;
+
+	if (adev->powerplay.pp_funcs->print_clock_levels) {
+		size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
+		size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
+		return size;
+	} else {
+		return snprintf(buf, PAGE_SIZE, "\n");
+	}
+
+}
+
 static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
 static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
 		struct device_attribute *attr,
 		struct device_attribute *attr,
 		char *buf)
 		char *buf)
@@ -530,7 +614,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
 		amdgpu_dpm_set_sclk_od(adev, (uint32_t)value);
 		amdgpu_dpm_set_sclk_od(adev, (uint32_t)value);
 
 
 	if (adev->powerplay.pp_funcs->dispatch_tasks) {
 	if (adev->powerplay.pp_funcs->dispatch_tasks) {
-		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL);
+		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL);
 	} else {
 	} else {
 		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
 		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
 		amdgpu_pm_compute_clocks(adev);
 		amdgpu_pm_compute_clocks(adev);
@@ -574,7 +658,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
 		amdgpu_dpm_set_mclk_od(adev, (uint32_t)value);
 		amdgpu_dpm_set_mclk_od(adev, (uint32_t)value);
 
 
 	if (adev->powerplay.pp_funcs->dispatch_tasks) {
 	if (adev->powerplay.pp_funcs->dispatch_tasks) {
-		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL);
+		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL);
 	} else {
 	} else {
 		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
 		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
 		amdgpu_pm_compute_clocks(adev);
 		amdgpu_pm_compute_clocks(adev);
@@ -584,159 +668,70 @@ fail:
 	return count;
 	return count;
 }
 }
 
 
-static ssize_t amdgpu_get_pp_power_profile(struct device *dev,
-		char *buf, struct amd_pp_profile *query)
+static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
 {
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	struct amdgpu_device *adev = ddev->dev_private;
-	int ret = 0xff;
 
 
-	if (adev->powerplay.pp_funcs->get_power_profile_state)
-		ret = amdgpu_dpm_get_power_profile_state(
-				adev, query);
+	if (adev->powerplay.pp_funcs->get_power_profile_mode)
+		return amdgpu_dpm_get_power_profile_mode(adev, buf);
 
 
-	if (ret)
-		return ret;
-
-	return snprintf(buf, PAGE_SIZE,
-			"%d %d %d %d %d\n",
-			query->min_sclk / 100,
-			query->min_mclk / 100,
-			query->activity_threshold,
-			query->up_hyst,
-			query->down_hyst);
+	return snprintf(buf, PAGE_SIZE, "\n");
 }
 }
 
 
-static ssize_t amdgpu_get_pp_gfx_power_profile(struct device *dev,
-		struct device_attribute *attr,
-		char *buf)
-{
-	struct amd_pp_profile query = {0};
-
-	query.type = AMD_PP_GFX_PROFILE;
-
-	return amdgpu_get_pp_power_profile(dev, buf, &query);
-}
 
 
-static ssize_t amdgpu_get_pp_compute_power_profile(struct device *dev,
+static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
 		struct device_attribute *attr,
 		struct device_attribute *attr,
-		char *buf)
-{
-	struct amd_pp_profile query = {0};
-
-	query.type = AMD_PP_COMPUTE_PROFILE;
-
-	return amdgpu_get_pp_power_profile(dev, buf, &query);
-}
-
-static ssize_t amdgpu_set_pp_power_profile(struct device *dev,
 		const char *buf,
 		const char *buf,
-		size_t count,
-		struct amd_pp_profile *request)
+		size_t count)
 {
 {
+	int ret = 0xff;
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	struct amdgpu_device *adev = ddev->dev_private;
-	uint32_t loop = 0;
-	char *sub_str, buf_cpy[128], *tmp_str;
+	uint32_t parameter_size = 0;
+	long parameter[64];
+	char *sub_str, buf_cpy[128];
+	char *tmp_str;
+	uint32_t i = 0;
+	char tmp[2];
+	long int profile_mode = 0;
 	const char delimiter[3] = {' ', '\n', '\0'};
 	const char delimiter[3] = {' ', '\n', '\0'};
-	long int value;
-	int ret = 0xff;
-
-	if (strncmp("reset", buf, strlen("reset")) == 0) {
-		if (adev->powerplay.pp_funcs->reset_power_profile_state)
-			ret = amdgpu_dpm_reset_power_profile_state(
-					adev, request);
-		if (ret) {
-			count = -EINVAL;
-			goto fail;
-		}
-		return count;
-	}
-
-	if (strncmp("set", buf, strlen("set")) == 0) {
-		if (adev->powerplay.pp_funcs->set_power_profile_state)
-			ret = amdgpu_dpm_set_power_profile_state(
-					adev, request);
-
-		if (ret) {
-			count = -EINVAL;
-			goto fail;
-		}
-		return count;
-	}
 
 
-	if (count + 1 >= 128) {
-		count = -EINVAL;
+	tmp[0] = *(buf);
+	tmp[1] = '\0';
+	ret = kstrtol(tmp, 0, &profile_mode);
+	if (ret)
 		goto fail;
 		goto fail;
-	}
-
-	memcpy(buf_cpy, buf, count + 1);
-	tmp_str = buf_cpy;
-
-	while (tmp_str[0]) {
-		sub_str = strsep(&tmp_str, delimiter);
-		ret = kstrtol(sub_str, 0, &value);
-		if (ret) {
-			count = -EINVAL;
-			goto fail;
-		}
 
 
-		switch (loop) {
-		case 0:
-			/* input unit MHz convert to dpm table unit 10KHz*/
-			request->min_sclk = (uint32_t)value * 100;
-			break;
-		case 1:
-			/* input unit MHz convert to dpm table unit 10KHz*/
-			request->min_mclk = (uint32_t)value * 100;
-			break;
-		case 2:
-			request->activity_threshold = (uint16_t)value;
-			break;
-		case 3:
-			request->up_hyst = (uint8_t)value;
-			break;
-		case 4:
-			request->down_hyst = (uint8_t)value;
-			break;
-		default:
-			break;
+	if (profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
+		if (count < 2 || count > 127)
+			return -EINVAL;
+		while (isspace(*++buf))
+			i++;
+		memcpy(buf_cpy, buf, count-i);
+		tmp_str = buf_cpy;
+		while (tmp_str[0]) {
+			sub_str = strsep(&tmp_str, delimiter);
+			ret = kstrtol(sub_str, 0, &parameter[parameter_size]);
+			if (ret) {
+				count = -EINVAL;
+				goto fail;
+			}
+			parameter_size++;
+			while (isspace(*tmp_str))
+				tmp_str++;
 		}
 		}
-
-		loop++;
 	}
 	}
-	if (adev->powerplay.pp_funcs->set_power_profile_state)
-		ret = amdgpu_dpm_set_power_profile_state(adev, request);
-
-	if (ret)
-		count = -EINVAL;
+	parameter[parameter_size] = profile_mode;
+	if (adev->powerplay.pp_funcs->set_power_profile_mode)
+		ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size);
 
 
+	if (!ret)
+		return count;
 fail:
 fail:
-	return count;
-}
-
-static ssize_t amdgpu_set_pp_gfx_power_profile(struct device *dev,
-		struct device_attribute *attr,
-		const char *buf,
-		size_t count)
-{
-	struct amd_pp_profile request = {0};
-
-	request.type = AMD_PP_GFX_PROFILE;
-
-	return amdgpu_set_pp_power_profile(dev, buf, count, &request);
-}
-
-static ssize_t amdgpu_set_pp_compute_power_profile(struct device *dev,
-		struct device_attribute *attr,
-		const char *buf,
-		size_t count)
-{
-	struct amd_pp_profile request = {0};
-
-	request.type = AMD_PP_COMPUTE_PROFILE;
-
-	return amdgpu_set_pp_power_profile(dev, buf, count, &request);
+	return -EINVAL;
 }
 }
 
 
 static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
 static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
@@ -766,12 +761,12 @@ static DEVICE_ATTR(pp_sclk_od, S_IRUGO | S_IWUSR,
 static DEVICE_ATTR(pp_mclk_od, S_IRUGO | S_IWUSR,
 static DEVICE_ATTR(pp_mclk_od, S_IRUGO | S_IWUSR,
 		amdgpu_get_pp_mclk_od,
 		amdgpu_get_pp_mclk_od,
 		amdgpu_set_pp_mclk_od);
 		amdgpu_set_pp_mclk_od);
-static DEVICE_ATTR(pp_gfx_power_profile, S_IRUGO | S_IWUSR,
-		amdgpu_get_pp_gfx_power_profile,
-		amdgpu_set_pp_gfx_power_profile);
-static DEVICE_ATTR(pp_compute_power_profile, S_IRUGO | S_IWUSR,
-		amdgpu_get_pp_compute_power_profile,
-		amdgpu_set_pp_compute_power_profile);
+static DEVICE_ATTR(pp_power_profile_mode, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_power_profile_mode,
+		amdgpu_set_pp_power_profile_mode);
+static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_od_clk_voltage,
+		amdgpu_set_pp_od_clk_voltage);
 
 
 static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 				      struct device_attribute *attr,
 				      struct device_attribute *attr,
@@ -779,17 +774,23 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 {
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	struct drm_device *ddev = adev->ddev;
 	struct drm_device *ddev = adev->ddev;
-	int temp;
+	int r, temp, size = sizeof(temp);
 
 
 	/* Can't get temperature when the card is off */
 	/* Can't get temperature when the card is off */
 	if  ((adev->flags & AMD_IS_PX) &&
 	if  ((adev->flags & AMD_IS_PX) &&
 	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
 	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
 		return -EINVAL;
 		return -EINVAL;
 
 
-	if (!adev->powerplay.pp_funcs->get_temperature)
-		temp = 0;
-	else
-		temp = amdgpu_dpm_get_temperature(adev);
+	/* sanity check PP is enabled */
+	if (!(adev->powerplay.pp_funcs &&
+	      adev->powerplay.pp_funcs->read_sensor))
+		return -EINVAL;
+
+	/* get the temperature */
+	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
+				   (void *)&temp, &size);
+	if (r)
+		return r;
 
 
 	return snprintf(buf, PAGE_SIZE, "%d\n", temp);
 	return snprintf(buf, PAGE_SIZE, "%d\n", temp);
 }
 }
@@ -834,6 +835,11 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
 	int err;
 	int err;
 	int value;
 	int value;
 
 
+	/* Can't adjust fan when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
 	if (!adev->powerplay.pp_funcs->set_fan_control_mode)
 	if (!adev->powerplay.pp_funcs->set_fan_control_mode)
 		return -EINVAL;
 		return -EINVAL;
 
 
@@ -868,6 +874,11 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
 	int err;
 	int err;
 	u32 value;
 	u32 value;
 
 
+	/* Can't adjust fan when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
 	err = kstrtou32(buf, 10, &value);
 	err = kstrtou32(buf, 10, &value);
 	if (err)
 	if (err)
 		return err;
 		return err;
@@ -891,6 +902,11 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
 	int err;
 	int err;
 	u32 speed = 0;
 	u32 speed = 0;
 
 
+	/* Can't adjust fan when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
 	if (adev->powerplay.pp_funcs->get_fan_speed_percent) {
 	if (adev->powerplay.pp_funcs->get_fan_speed_percent) {
 		err = amdgpu_dpm_get_fan_speed_percent(adev, &speed);
 		err = amdgpu_dpm_get_fan_speed_percent(adev, &speed);
 		if (err)
 		if (err)
@@ -910,6 +926,11 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
 	int err;
 	int err;
 	u32 speed = 0;
 	u32 speed = 0;
 
 
+	/* Can't adjust fan when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
 	if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
 	if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
 		err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed);
 		err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed);
 		if (err)
 		if (err)
@@ -919,6 +940,175 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
 	return sprintf(buf, "%i\n", speed);
 	return sprintf(buf, "%i\n", speed);
 }
 }
 
 
+static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	struct drm_device *ddev = adev->ddev;
+	u32 vddgfx;
+	int r, size = sizeof(vddgfx);
+
+	/* Can't get voltage when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
+	/* sanity check PP is enabled */
+	if (!(adev->powerplay.pp_funcs &&
+	      adev->powerplay.pp_funcs->read_sensor))
+	      return -EINVAL;
+
+	/* get the voltage */
+	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX,
+				   (void *)&vddgfx, &size);
+	if (r)
+		return r;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", vddgfx);
+}
+
+static ssize_t amdgpu_hwmon_show_vddgfx_label(struct device *dev,
+					      struct device_attribute *attr,
+					      char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "vddgfx\n");
+}
+
+static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	struct drm_device *ddev = adev->ddev;
+	u32 vddnb;
+	int r, size = sizeof(vddnb);
+
+	/* only APUs have vddnb */
+	if  (adev->flags & AMD_IS_APU)
+		return -EINVAL;
+
+	/* Can't get voltage when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
+	/* sanity check PP is enabled */
+	if (!(adev->powerplay.pp_funcs &&
+	      adev->powerplay.pp_funcs->read_sensor))
+	      return -EINVAL;
+
+	/* get the voltage */
+	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB,
+				   (void *)&vddnb, &size);
+	if (r)
+		return r;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", vddnb);
+}
+
+static ssize_t amdgpu_hwmon_show_vddnb_label(struct device *dev,
+					      struct device_attribute *attr,
+					      char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "vddnb\n");
+}
+
+static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	struct drm_device *ddev = adev->ddev;
+	struct pp_gpu_power query = {0};
+	int r, size = sizeof(query);
+	unsigned uw;
+
+	/* Can't get power when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
+	/* sanity check PP is enabled */
+	if (!(adev->powerplay.pp_funcs &&
+	      adev->powerplay.pp_funcs->read_sensor))
+	      return -EINVAL;
+
+	/* get the voltage */
+	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER,
+				   (void *)&query, &size);
+	if (r)
+		return r;
+
+	/* convert to microwatts */
+	uw = (query.average_gpu_power >> 8) * 1000000;
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", uw);
+}
+
+static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	return sprintf(buf, "%i\n", 0);
+}
+
+static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	uint32_t limit = 0;
+
+	if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
+		adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true);
+		return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+	} else {
+		return snprintf(buf, PAGE_SIZE, "\n");
+	}
+}
+
+static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	uint32_t limit = 0;
+
+	if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
+		adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false);
+		return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+	} else {
+		return snprintf(buf, PAGE_SIZE, "\n");
+	}
+}
+
+
+static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	int err;
+	u32 value;
+
+	err = kstrtou32(buf, 10, &value);
+	if (err)
+		return err;
+
+	value = value / 1000000; /* convert to Watt */
+	if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) {
+		err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value);
+		if (err)
+			return err;
+	} else {
+		return -EINVAL;
+	}
+
+	return count;
+}
+
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
 static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
@@ -927,6 +1117,14 @@ static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_
 static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
 static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
 static SENSOR_DEVICE_ATTR(pwm1_max, S_IRUGO, amdgpu_hwmon_get_pwm1_max, NULL, 0);
 static SENSOR_DEVICE_ATTR(pwm1_max, S_IRUGO, amdgpu_hwmon_get_pwm1_max, NULL, 0);
 static SENSOR_DEVICE_ATTR(fan1_input, S_IRUGO, amdgpu_hwmon_get_fan1_input, NULL, 0);
 static SENSOR_DEVICE_ATTR(fan1_input, S_IRUGO, amdgpu_hwmon_get_fan1_input, NULL, 0);
+static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, amdgpu_hwmon_show_vddgfx, NULL, 0);
+static SENSOR_DEVICE_ATTR(in0_label, S_IRUGO, amdgpu_hwmon_show_vddgfx_label, NULL, 0);
+static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, amdgpu_hwmon_show_vddnb, NULL, 0);
+static SENSOR_DEVICE_ATTR(in1_label, S_IRUGO, amdgpu_hwmon_show_vddnb_label, NULL, 0);
+static SENSOR_DEVICE_ATTR(power1_average, S_IRUGO, amdgpu_hwmon_show_power_avg, NULL, 0);
+static SENSOR_DEVICE_ATTR(power1_cap_max, S_IRUGO, amdgpu_hwmon_show_power_cap_max, NULL, 0);
+static SENSOR_DEVICE_ATTR(power1_cap_min, S_IRUGO, amdgpu_hwmon_show_power_cap_min, NULL, 0);
+static SENSOR_DEVICE_ATTR(power1_cap, S_IRUGO | S_IWUSR, amdgpu_hwmon_show_power_cap, amdgpu_hwmon_set_power_cap, 0);
 
 
 static struct attribute *hwmon_attributes[] = {
 static struct attribute *hwmon_attributes[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
@@ -937,6 +1135,14 @@ static struct attribute *hwmon_attributes[] = {
 	&sensor_dev_attr_pwm1_min.dev_attr.attr,
 	&sensor_dev_attr_pwm1_min.dev_attr.attr,
 	&sensor_dev_attr_pwm1_max.dev_attr.attr,
 	&sensor_dev_attr_pwm1_max.dev_attr.attr,
 	&sensor_dev_attr_fan1_input.dev_attr.attr,
 	&sensor_dev_attr_fan1_input.dev_attr.attr,
+	&sensor_dev_attr_in0_input.dev_attr.attr,
+	&sensor_dev_attr_in0_label.dev_attr.attr,
+	&sensor_dev_attr_in1_input.dev_attr.attr,
+	&sensor_dev_attr_in1_label.dev_attr.attr,
+	&sensor_dev_attr_power1_average.dev_attr.attr,
+	&sensor_dev_attr_power1_cap_max.dev_attr.attr,
+	&sensor_dev_attr_power1_cap_min.dev_attr.attr,
+	&sensor_dev_attr_power1_cap.dev_attr.attr,
 	NULL
 	NULL
 };
 };
 
 
@@ -947,9 +1153,19 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	umode_t effective_mode = attr->mode;
 	umode_t effective_mode = attr->mode;
 
 
-	/* no skipping for powerplay */
-	if (adev->powerplay.cgs_device)
-		return effective_mode;
+	/* handle non-powerplay limitations */
+	if (!adev->powerplay.pp_handle) {
+		/* Skip fan attributes if fan is not present */
+		if (adev->pm.no_fan &&
+		    (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
+		     attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
+		     attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
+		     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
+			return 0;
+		/* requires powerplay */
+		if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr)
+			return 0;
+	}
 
 
 	/* Skip limit attributes if DPM is not enabled */
 	/* Skip limit attributes if DPM is not enabled */
 	if (!adev->pm.dpm_enabled &&
 	if (!adev->pm.dpm_enabled &&
@@ -961,14 +1177,6 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
 	     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
 		return 0;
 		return 0;
 
 
-	/* Skip fan attributes if fan is not present */
-	if (adev->pm.no_fan &&
-	    (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
-	     attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
-	     attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
-	     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
-		return 0;
-
 	/* mask fan attributes if we have no bindings for this asic to expose */
 	/* mask fan attributes if we have no bindings for this asic to expose */
 	if ((!adev->powerplay.pp_funcs->get_fan_speed_percent &&
 	if ((!adev->powerplay.pp_funcs->get_fan_speed_percent &&
 	     attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't query fan */
 	     attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't query fan */
@@ -982,6 +1190,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	     attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */
 	     attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */
 		effective_mode &= ~S_IWUSR;
 		effective_mode &= ~S_IWUSR;
 
 
+	if ((adev->flags & AMD_IS_APU) &&
+	    (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr ||
+	     attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr||
+	     attr == &sensor_dev_attr_power1_cap.dev_attr.attr))
+		return 0;
+
 	/* hide max/min values if we can't both query and manage the fan */
 	/* hide max/min values if we can't both query and manage the fan */
 	if ((!adev->powerplay.pp_funcs->set_fan_speed_percent &&
 	if ((!adev->powerplay.pp_funcs->set_fan_speed_percent &&
 	     !adev->powerplay.pp_funcs->get_fan_speed_percent) &&
 	     !adev->powerplay.pp_funcs->get_fan_speed_percent) &&
@@ -989,8 +1203,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
 	     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
 		return 0;
 		return 0;
 
 
-	/* requires powerplay */
-	if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr)
+	/* only APUs have vddnb */
+	if (!(adev->flags & AMD_IS_APU) &&
+	    (attr == &sensor_dev_attr_in1_input.dev_attr.attr ||
+	     attr == &sensor_dev_attr_in1_label.dev_attr.attr))
 		return 0;
 		return 0;
 
 
 	return effective_mode;
 	return effective_mode;
@@ -1013,13 +1229,15 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work)
 			     pm.dpm.thermal.work);
 			     pm.dpm.thermal.work);
 	/* switch to the thermal state */
 	/* switch to the thermal state */
 	enum amd_pm_state_type dpm_state = POWER_STATE_TYPE_INTERNAL_THERMAL;
 	enum amd_pm_state_type dpm_state = POWER_STATE_TYPE_INTERNAL_THERMAL;
+	int temp, size = sizeof(temp);
 
 
 	if (!adev->pm.dpm_enabled)
 	if (!adev->pm.dpm_enabled)
 		return;
 		return;
 
 
-	if (adev->powerplay.pp_funcs->get_temperature) {
-		int temp = amdgpu_dpm_get_temperature(adev);
-
+	if (adev->powerplay.pp_funcs &&
+	    adev->powerplay.pp_funcs->read_sensor &&
+	    !amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
+				    (void *)&temp, &size)) {
 		if (temp < adev->pm.dpm.thermal.min_temp)
 		if (temp < adev->pm.dpm.thermal.min_temp)
 			/* switch back the user state */
 			/* switch back the user state */
 			dpm_state = adev->pm.dpm.user_state;
 			dpm_state = adev->pm.dpm.user_state;
@@ -1319,9 +1537,6 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 	if (adev->pm.dpm_enabled == 0)
 	if (adev->pm.dpm_enabled == 0)
 		return 0;
 		return 0;
 
 
-	if (adev->powerplay.pp_funcs->get_temperature == NULL)
-		return 0;
-
 	adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev,
 	adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev,
 								   DRIVER_NAME, adev,
 								   DRIVER_NAME, adev,
 								   hwmon_groups);
 								   hwmon_groups);
@@ -1391,20 +1606,19 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 		return ret;
 		return ret;
 	}
 	}
 	ret = device_create_file(adev->dev,
 	ret = device_create_file(adev->dev,
-			&dev_attr_pp_gfx_power_profile);
+			&dev_attr_pp_power_profile_mode);
 	if (ret) {
 	if (ret) {
 		DRM_ERROR("failed to create device file	"
 		DRM_ERROR("failed to create device file	"
-				"pp_gfx_power_profile\n");
+				"pp_power_profile_mode\n");
 		return ret;
 		return ret;
 	}
 	}
 	ret = device_create_file(adev->dev,
 	ret = device_create_file(adev->dev,
-			&dev_attr_pp_compute_power_profile);
+			&dev_attr_pp_od_clk_voltage);
 	if (ret) {
 	if (ret) {
 		DRM_ERROR("failed to create device file	"
 		DRM_ERROR("failed to create device file	"
-				"pp_compute_power_profile\n");
+				"pp_od_clk_voltage\n");
 		return ret;
 		return ret;
 	}
 	}
-
 	ret = amdgpu_debugfs_pm_init(adev);
 	ret = amdgpu_debugfs_pm_init(adev);
 	if (ret) {
 	if (ret) {
 		DRM_ERROR("Failed to register debugfs file for dpm!\n");
 		DRM_ERROR("Failed to register debugfs file for dpm!\n");
@@ -1437,9 +1651,9 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
 	device_remove_file(adev->dev, &dev_attr_pp_sclk_od);
 	device_remove_file(adev->dev, &dev_attr_pp_sclk_od);
 	device_remove_file(adev->dev, &dev_attr_pp_mclk_od);
 	device_remove_file(adev->dev, &dev_attr_pp_mclk_od);
 	device_remove_file(adev->dev,
 	device_remove_file(adev->dev,
-			&dev_attr_pp_gfx_power_profile);
+			&dev_attr_pp_power_profile_mode);
 	device_remove_file(adev->dev,
 	device_remove_file(adev->dev,
-			&dev_attr_pp_compute_power_profile);
+			&dev_attr_pp_od_clk_voltage);
 }
 }
 
 
 void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
 void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
@@ -1462,7 +1676,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
 	}
 	}
 
 
 	if (adev->powerplay.pp_funcs->dispatch_tasks) {
 	if (adev->powerplay.pp_funcs->dispatch_tasks) {
-		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL, NULL);
+		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL);
 	} else {
 	} else {
 		mutex_lock(&adev->pm.mutex);
 		mutex_lock(&adev->pm.mutex);
 		adev->pm.dpm.new_active_crtcs = 0;
 		adev->pm.dpm.new_active_crtcs = 0;
@@ -1512,6 +1726,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
 		seq_printf(m, "\t%u MHz (MCLK)\n", value/100);
 		seq_printf(m, "\t%u MHz (MCLK)\n", value/100);
 	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&value, &size))
 	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&value, &size))
 		seq_printf(m, "\t%u MHz (SCLK)\n", value/100);
 		seq_printf(m, "\t%u MHz (SCLK)\n", value/100);
+	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK, (void *)&value, &size))
+		seq_printf(m, "\t%u MHz (PSTATE_SCLK)\n", value/100);
+	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK, (void *)&value, &size))
+		seq_printf(m, "\t%u MHz (PSTATE_MCLK)\n", value/100);
 	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&value, &size))
 	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&value, &size))
 		seq_printf(m, "\t%u mV (VDDGFX)\n", value);
 		seq_printf(m, "\t%u mV (VDDGFX)\n", value);
 	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size))
 	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size))

+ 0 - 290
drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c

@@ -1,290 +0,0 @@
-/*
- * Copyright 2015 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-#include "atom.h"
-#include "amdgpu.h"
-#include "amd_shared.h"
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include "amdgpu_pm.h"
-#include <drm/amdgpu_drm.h>
-#include "amdgpu_powerplay.h"
-#include "si_dpm.h"
-#include "cik_dpm.h"
-#include "vi_dpm.h"
-
-static int amdgpu_pp_early_init(void *handle)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amd_powerplay *amd_pp;
-	int ret = 0;
-
-	amd_pp = &(adev->powerplay);
-	amd_pp->pp_handle = (void *)adev;
-
-	switch (adev->asic_type) {
-	case CHIP_POLARIS11:
-	case CHIP_POLARIS10:
-	case CHIP_POLARIS12:
-	case CHIP_TONGA:
-	case CHIP_FIJI:
-	case CHIP_TOPAZ:
-	case CHIP_CARRIZO:
-	case CHIP_STONEY:
-	case CHIP_VEGA10:
-	case CHIP_RAVEN:
-		amd_pp->cgs_device = amdgpu_cgs_create_device(adev);
-		amd_pp->ip_funcs = &pp_ip_funcs;
-		amd_pp->pp_funcs = &pp_dpm_funcs;
-		break;
-	/* These chips don't have powerplay implemenations */
-#ifdef CONFIG_DRM_AMDGPU_SI
-	case CHIP_TAHITI:
-	case CHIP_PITCAIRN:
-	case CHIP_VERDE:
-	case CHIP_OLAND:
-	case CHIP_HAINAN:
-		amd_pp->ip_funcs = &si_dpm_ip_funcs;
-		amd_pp->pp_funcs = &si_dpm_funcs;
-	break;
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
-	case CHIP_BONAIRE:
-	case CHIP_HAWAII:
-		if (amdgpu_dpm == -1) {
-			amd_pp->ip_funcs = &ci_dpm_ip_funcs;
-			amd_pp->pp_funcs = &ci_dpm_funcs;
-		} else {
-			amd_pp->cgs_device = amdgpu_cgs_create_device(adev);
-			amd_pp->ip_funcs = &pp_ip_funcs;
-			amd_pp->pp_funcs = &pp_dpm_funcs;
-		}
-		break;
-	case CHIP_KABINI:
-	case CHIP_MULLINS:
-	case CHIP_KAVERI:
-		amd_pp->ip_funcs = &kv_dpm_ip_funcs;
-		amd_pp->pp_funcs = &kv_dpm_funcs;
-		break;
-#endif
-	default:
-		ret = -EINVAL;
-		break;
-	}
-
-	if (adev->powerplay.ip_funcs->early_init)
-		ret = adev->powerplay.ip_funcs->early_init(
-					amd_pp->cgs_device ? amd_pp->cgs_device :
-					amd_pp->pp_handle);
-
-	return ret;
-}
-
-
-static int amdgpu_pp_late_init(void *handle)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->late_init)
-		ret = adev->powerplay.ip_funcs->late_init(
-					adev->powerplay.pp_handle);
-
-	return ret;
-}
-
-static int amdgpu_pp_sw_init(void *handle)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->sw_init)
-		ret = adev->powerplay.ip_funcs->sw_init(
-					adev->powerplay.pp_handle);
-
-	return ret;
-}
-
-static int amdgpu_pp_sw_fini(void *handle)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->sw_fini)
-		ret = adev->powerplay.ip_funcs->sw_fini(
-					adev->powerplay.pp_handle);
-	if (ret)
-		return ret;
-
-	return ret;
-}
-
-static int amdgpu_pp_hw_init(void *handle)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
-		amdgpu_ucode_init_bo(adev);
-
-	if (adev->powerplay.ip_funcs->hw_init)
-		ret = adev->powerplay.ip_funcs->hw_init(
-					adev->powerplay.pp_handle);
-
-	return ret;
-}
-
-static int amdgpu_pp_hw_fini(void *handle)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->hw_fini)
-		ret = adev->powerplay.ip_funcs->hw_fini(
-					adev->powerplay.pp_handle);
-
-	if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
-		amdgpu_ucode_fini_bo(adev);
-
-	return ret;
-}
-
-static void amdgpu_pp_late_fini(void *handle)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->late_fini)
-		adev->powerplay.ip_funcs->late_fini(
-			  adev->powerplay.pp_handle);
-
-	if (adev->powerplay.cgs_device)
-		amdgpu_cgs_destroy_device(adev->powerplay.cgs_device);
-}
-
-static int amdgpu_pp_suspend(void *handle)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->suspend)
-		ret = adev->powerplay.ip_funcs->suspend(
-					 adev->powerplay.pp_handle);
-	return ret;
-}
-
-static int amdgpu_pp_resume(void *handle)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->resume)
-		ret = adev->powerplay.ip_funcs->resume(
-					adev->powerplay.pp_handle);
-	return ret;
-}
-
-static int amdgpu_pp_set_clockgating_state(void *handle,
-					enum amd_clockgating_state state)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->set_clockgating_state)
-		ret = adev->powerplay.ip_funcs->set_clockgating_state(
-				adev->powerplay.pp_handle, state);
-	return ret;
-}
-
-static int amdgpu_pp_set_powergating_state(void *handle,
-					enum amd_powergating_state state)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->set_powergating_state)
-		ret = adev->powerplay.ip_funcs->set_powergating_state(
-				 adev->powerplay.pp_handle, state);
-	return ret;
-}
-
-
-static bool amdgpu_pp_is_idle(void *handle)
-{
-	bool ret = true;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->is_idle)
-		ret = adev->powerplay.ip_funcs->is_idle(
-					adev->powerplay.pp_handle);
-	return ret;
-}
-
-static int amdgpu_pp_wait_for_idle(void *handle)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->wait_for_idle)
-		ret = adev->powerplay.ip_funcs->wait_for_idle(
-					adev->powerplay.pp_handle);
-	return ret;
-}
-
-static int amdgpu_pp_soft_reset(void *handle)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->soft_reset)
-		ret = adev->powerplay.ip_funcs->soft_reset(
-					adev->powerplay.pp_handle);
-	return ret;
-}
-
-static const struct amd_ip_funcs amdgpu_pp_ip_funcs = {
-	.name = "amdgpu_powerplay",
-	.early_init = amdgpu_pp_early_init,
-	.late_init = amdgpu_pp_late_init,
-	.sw_init = amdgpu_pp_sw_init,
-	.sw_fini = amdgpu_pp_sw_fini,
-	.hw_init = amdgpu_pp_hw_init,
-	.hw_fini = amdgpu_pp_hw_fini,
-	.late_fini = amdgpu_pp_late_fini,
-	.suspend = amdgpu_pp_suspend,
-	.resume = amdgpu_pp_resume,
-	.is_idle = amdgpu_pp_is_idle,
-	.wait_for_idle = amdgpu_pp_wait_for_idle,
-	.soft_reset = amdgpu_pp_soft_reset,
-	.set_clockgating_state = amdgpu_pp_set_clockgating_state,
-	.set_powergating_state = amdgpu_pp_set_powergating_state,
-};
-
-const struct amdgpu_ip_block_version amdgpu_pp_ip_block =
-{
-	.type = AMD_IP_BLOCK_TYPE_SMC,
-	.major = 1,
-	.minor = 0,
-	.rev = 0,
-	.funcs = &amdgpu_pp_ip_funcs,
-};

+ 133 - 29
drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c

@@ -26,9 +26,12 @@
 #include <drm/drmP.h>
 #include <drm/drmP.h>
 
 
 #include "amdgpu.h"
 #include "amdgpu.h"
+#include "amdgpu_display.h"
 #include <drm/amdgpu_drm.h>
 #include <drm/amdgpu_drm.h>
 #include <linux/dma-buf.h>
 #include <linux/dma-buf.h>
 
 
+static const struct dma_buf_ops amdgpu_dmabuf_ops;
+
 struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
 struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
 {
 {
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -102,59 +105,93 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 	int ret;
 	int ret;
 
 
 	ww_mutex_lock(&resv->lock, NULL);
 	ww_mutex_lock(&resv->lock, NULL);
-	ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, false,
-			       AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, 0, &bo);
-	ww_mutex_unlock(&resv->lock);
+	ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE,
+			       AMDGPU_GEM_DOMAIN_CPU, 0, ttm_bo_type_sg,
+			       resv, &bo);
 	if (ret)
 	if (ret)
-		return ERR_PTR(ret);
+		goto error;
 
 
-	bo->prime_shared_count = 1;
+	bo->tbo.sg = sg;
+	bo->tbo.ttm->sg = sg;
+	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+	if (attach->dmabuf->ops != &amdgpu_dmabuf_ops)
+		bo->prime_shared_count = 1;
+
+	ww_mutex_unlock(&resv->lock);
 	return &bo->gem_base;
 	return &bo->gem_base;
+
+error:
+	ww_mutex_unlock(&resv->lock);
+	return ERR_PTR(ret);
 }
 }
 
 
-int amdgpu_gem_prime_pin(struct drm_gem_object *obj)
+static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
+				 struct device *target_dev,
+				 struct dma_buf_attachment *attach)
 {
 {
+	struct drm_gem_object *obj = dma_buf->priv;
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-	long ret = 0;
-
-	ret = amdgpu_bo_reserve(bo, false);
-	if (unlikely(ret != 0))
-		return ret;
-
-	/*
-	 * Wait for all shared fences to complete before we switch to future
-	 * use of exclusive fence on this prime shared bo.
-	 */
-	ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false,
-						  MAX_SCHEDULE_TIMEOUT);
-	if (unlikely(ret < 0)) {
-		DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret);
-		amdgpu_bo_unreserve(bo);
-		return ret;
+	long r;
+
+	r = drm_gem_map_attach(dma_buf, target_dev, attach);
+	if (r)
+		return r;
+
+	r = amdgpu_bo_reserve(bo, false);
+	if (unlikely(r != 0))
+		goto error_detach;
+
+
+	if (dma_buf->ops != &amdgpu_dmabuf_ops) {
+		/*
+		 * Wait for all shared fences to complete before we switch to future
+		 * use of exclusive fence on this prime shared bo.
+		 */
+		r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
+							true, false,
+							MAX_SCHEDULE_TIMEOUT);
+		if (unlikely(r < 0)) {
+			DRM_DEBUG_PRIME("Fence wait failed: %li\n", r);
+			goto error_unreserve;
+		}
 	}
 	}
 
 
 	/* pin buffer into GTT */
 	/* pin buffer into GTT */
-	ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
-	if (likely(ret == 0))
+	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
+	if (r)
+		goto error_unreserve;
+
+	if (dma_buf->ops != &amdgpu_dmabuf_ops)
 		bo->prime_shared_count++;
 		bo->prime_shared_count++;
 
 
+error_unreserve:
 	amdgpu_bo_unreserve(bo);
 	amdgpu_bo_unreserve(bo);
-	return ret;
+
+error_detach:
+	if (r)
+		drm_gem_map_detach(dma_buf, attach);
+	return r;
 }
 }
 
 
-void amdgpu_gem_prime_unpin(struct drm_gem_object *obj)
+static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
+				  struct dma_buf_attachment *attach)
 {
 {
+	struct drm_gem_object *obj = dma_buf->priv;
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
 	int ret = 0;
 	int ret = 0;
 
 
 	ret = amdgpu_bo_reserve(bo, true);
 	ret = amdgpu_bo_reserve(bo, true);
 	if (unlikely(ret != 0))
 	if (unlikely(ret != 0))
-		return;
+		goto error;
 
 
 	amdgpu_bo_unpin(bo);
 	amdgpu_bo_unpin(bo);
-	if (bo->prime_shared_count)
+	if (dma_buf->ops != &amdgpu_dmabuf_ops && bo->prime_shared_count)
 		bo->prime_shared_count--;
 		bo->prime_shared_count--;
 	amdgpu_bo_unreserve(bo);
 	amdgpu_bo_unreserve(bo);
+
+error:
+	drm_gem_map_detach(dma_buf, attach);
 }
 }
 
 
 struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
 struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
@@ -164,6 +201,50 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
 	return bo->tbo.resv;
 	return bo->tbo.resv;
 }
 }
 
 
+static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
+				       enum dma_data_direction direction)
+{
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct ttm_operation_ctx ctx = { true, false };
+	u32 domain = amdgpu_display_framebuffer_domains(adev);
+	int ret;
+	bool reads = (direction == DMA_BIDIRECTIONAL ||
+		      direction == DMA_FROM_DEVICE);
+
+	if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT))
+		return 0;
+
+	/* move to gtt */
+	ret = amdgpu_bo_reserve(bo, false);
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
+		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+		ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	}
+
+	amdgpu_bo_unreserve(bo);
+	return ret;
+}
+
+static const struct dma_buf_ops amdgpu_dmabuf_ops = {
+	.attach = amdgpu_gem_map_attach,
+	.detach = amdgpu_gem_map_detach,
+	.map_dma_buf = drm_gem_map_dma_buf,
+	.unmap_dma_buf = drm_gem_unmap_dma_buf,
+	.release = drm_gem_dmabuf_release,
+	.begin_cpu_access = amdgpu_gem_begin_cpu_access,
+	.map = drm_gem_dmabuf_kmap,
+	.map_atomic = drm_gem_dmabuf_kmap_atomic,
+	.unmap = drm_gem_dmabuf_kunmap,
+	.unmap_atomic = drm_gem_dmabuf_kunmap_atomic,
+	.mmap = drm_gem_dmabuf_mmap,
+	.vmap = drm_gem_dmabuf_vmap,
+	.vunmap = drm_gem_dmabuf_vunmap,
+};
+
 struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 					struct drm_gem_object *gobj,
 					struct drm_gem_object *gobj,
 					int flags)
 					int flags)
@@ -176,7 +257,30 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 		return ERR_PTR(-EPERM);
 		return ERR_PTR(-EPERM);
 
 
 	buf = drm_gem_prime_export(dev, gobj, flags);
 	buf = drm_gem_prime_export(dev, gobj, flags);
-	if (!IS_ERR(buf))
+	if (!IS_ERR(buf)) {
 		buf->file->f_mapping = dev->anon_inode->i_mapping;
 		buf->file->f_mapping = dev->anon_inode->i_mapping;
+		buf->ops = &amdgpu_dmabuf_ops;
+	}
+
 	return buf;
 	return buf;
 }
 }
+
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+					    struct dma_buf *dma_buf)
+{
+	struct drm_gem_object *obj;
+
+	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
+		obj = dma_buf->priv;
+		if (obj->dev == dev) {
+			/*
+			 * Importing dmabuf exported from out own gem increases
+			 * refcount on gem itself instead of f_count of dmabuf.
+			 */
+			drm_gem_object_get(obj);
+			return obj;
+		}
+	}
+
+	return drm_gem_prime_import(dev, dma_buf);
+}

+ 14 - 35
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c

@@ -51,29 +51,10 @@ static int psp_sw_init(void *handle)
 
 
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
-		psp->init_microcode = psp_v3_1_init_microcode;
-		psp->bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv;
-		psp->bootloader_load_sos = psp_v3_1_bootloader_load_sos;
-		psp->prep_cmd_buf = psp_v3_1_prep_cmd_buf;
-		psp->ring_init = psp_v3_1_ring_init;
-		psp->ring_create = psp_v3_1_ring_create;
-		psp->ring_stop = psp_v3_1_ring_stop;
-		psp->ring_destroy = psp_v3_1_ring_destroy;
-		psp->cmd_submit = psp_v3_1_cmd_submit;
-		psp->compare_sram_data = psp_v3_1_compare_sram_data;
-		psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk;
-		psp->mode1_reset = psp_v3_1_mode1_reset;
+		psp_v3_1_set_psp_funcs(psp);
 		break;
 		break;
 	case CHIP_RAVEN:
 	case CHIP_RAVEN:
-		psp->init_microcode = psp_v10_0_init_microcode;
-		psp->prep_cmd_buf = psp_v10_0_prep_cmd_buf;
-		psp->ring_init = psp_v10_0_ring_init;
-		psp->ring_create = psp_v10_0_ring_create;
-		psp->ring_stop = psp_v10_0_ring_stop;
-		psp->ring_destroy = psp_v10_0_ring_destroy;
-		psp->cmd_submit = psp_v10_0_cmd_submit;
-		psp->compare_sram_data = psp_v10_0_compare_sram_data;
-		psp->mode1_reset = psp_v10_0_mode1_reset;
+		psp_v10_0_set_psp_funcs(psp);
 		break;
 		break;
 	default:
 	default:
 		return -EINVAL;
 		return -EINVAL;
@@ -81,6 +62,9 @@ static int psp_sw_init(void *handle)
 
 
 	psp->adev = adev;
 	psp->adev = adev;
 
 
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+		return 0;
+
 	ret = psp_init_microcode(psp);
 	ret = psp_init_microcode(psp);
 	if (ret) {
 	if (ret) {
 		DRM_ERROR("Failed to load psp firmware!\n");
 		DRM_ERROR("Failed to load psp firmware!\n");
@@ -94,6 +78,9 @@ static int psp_sw_fini(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+		return 0;
+
 	release_firmware(adev->psp.sos_fw);
 	release_firmware(adev->psp.sos_fw);
 	adev->psp.sos_fw = NULL;
 	adev->psp.sos_fw = NULL;
 	release_firmware(adev->psp.asd_fw);
 	release_firmware(adev->psp.asd_fw);
@@ -472,6 +459,9 @@ static int psp_suspend(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct psp_context *psp = &adev->psp;
 	struct psp_context *psp = &adev->psp;
 
 
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+		return 0;
+
 	ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
 	ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
 	if (ret) {
 	if (ret) {
 		DRM_ERROR("PSP ring stop failed\n");
 		DRM_ERROR("PSP ring stop failed\n");
@@ -512,19 +502,8 @@ failed:
 	return ret;
 	return ret;
 }
 }
 
 
-static bool psp_check_reset(void* handle)
+int psp_gpu_reset(struct amdgpu_device *adev)
 {
 {
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->flags & AMD_IS_APU)
-		return true;
-
-	return false;
-}
-
-static int psp_reset(void* handle)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	return psp_mode1_reset(&adev->psp);
 	return psp_mode1_reset(&adev->psp);
 }
 }
 
 
@@ -571,9 +550,9 @@ const struct amd_ip_funcs psp_ip_funcs = {
 	.suspend = psp_suspend,
 	.suspend = psp_suspend,
 	.resume = psp_resume,
 	.resume = psp_resume,
 	.is_idle = NULL,
 	.is_idle = NULL,
-	.check_soft_reset = psp_check_reset,
+	.check_soft_reset = NULL,
 	.wait_for_idle = NULL,
 	.wait_for_idle = NULL,
-	.soft_reset = psp_reset,
+	.soft_reset = NULL,
 	.set_clockgating_state = psp_set_clockgating_state,
 	.set_clockgating_state = psp_set_clockgating_state,
 	.set_powergating_state = psp_set_powergating_state,
 	.set_powergating_state = psp_set_powergating_state,
 };
 };

+ 26 - 17
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h

@@ -33,6 +33,8 @@
 #define PSP_ASD_SHARED_MEM_SIZE	0x4000
 #define PSP_ASD_SHARED_MEM_SIZE	0x4000
 #define PSP_1_MEG		0x100000
 #define PSP_1_MEG		0x100000
 
 
+struct psp_context;
+
 enum psp_ring_type
 enum psp_ring_type
 {
 {
 	PSP_RING_TYPE__INVALID = 0,
 	PSP_RING_TYPE__INVALID = 0,
@@ -53,12 +55,8 @@ struct psp_ring
 	uint32_t			ring_size;
 	uint32_t			ring_size;
 };
 };
 
 
-struct psp_context
+struct psp_funcs
 {
 {
-	struct amdgpu_device            *adev;
-	struct psp_ring                 km_ring;
-	struct psp_gfx_cmd_resp		*cmd;
-
 	int (*init_microcode)(struct psp_context *psp);
 	int (*init_microcode)(struct psp_context *psp);
 	int (*bootloader_load_sysdrv)(struct psp_context *psp);
 	int (*bootloader_load_sysdrv)(struct psp_context *psp);
 	int (*bootloader_load_sos)(struct psp_context *psp);
 	int (*bootloader_load_sos)(struct psp_context *psp);
@@ -77,6 +75,15 @@ struct psp_context
 				  enum AMDGPU_UCODE_ID ucode_type);
 				  enum AMDGPU_UCODE_ID ucode_type);
 	bool (*smu_reload_quirk)(struct psp_context *psp);
 	bool (*smu_reload_quirk)(struct psp_context *psp);
 	int (*mode1_reset)(struct psp_context *psp);
 	int (*mode1_reset)(struct psp_context *psp);
+};
+
+struct psp_context
+{
+	struct amdgpu_device            *adev;
+	struct psp_ring                 km_ring;
+	struct psp_gfx_cmd_resp		*cmd;
+
+	const struct psp_funcs 		*funcs;
 
 
 	/* fence buffer */
 	/* fence buffer */
 	struct amdgpu_bo 		*fw_pri_bo;
 	struct amdgpu_bo 		*fw_pri_bo;
@@ -123,25 +130,25 @@ struct amdgpu_psp_funcs {
 					enum AMDGPU_UCODE_ID);
 					enum AMDGPU_UCODE_ID);
 };
 };
 
 
-#define psp_prep_cmd_buf(ucode, type) (psp)->prep_cmd_buf((ucode), (type))
-#define psp_ring_init(psp, type) (psp)->ring_init((psp), (type))
-#define psp_ring_create(psp, type) (psp)->ring_create((psp), (type))
-#define psp_ring_stop(psp, type) (psp)->ring_stop((psp), (type))
-#define psp_ring_destroy(psp, type) ((psp)->ring_destroy((psp), (type)))
+#define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type))
+#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
+#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
+#define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type))
+#define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type)))
 #define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \
 #define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \
-		(psp)->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index))
+		(psp)->funcs->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index))
 #define psp_compare_sram_data(psp, ucode, type) \
 #define psp_compare_sram_data(psp, ucode, type) \
-		(psp)->compare_sram_data((psp), (ucode), (type))
+		(psp)->funcs->compare_sram_data((psp), (ucode), (type))
 #define psp_init_microcode(psp) \
 #define psp_init_microcode(psp) \
-		((psp)->init_microcode ? (psp)->init_microcode((psp)) : 0)
+		((psp)->funcs->init_microcode ? (psp)->funcs->init_microcode((psp)) : 0)
 #define psp_bootloader_load_sysdrv(psp) \
 #define psp_bootloader_load_sysdrv(psp) \
-		((psp)->bootloader_load_sysdrv ? (psp)->bootloader_load_sysdrv((psp)) : 0)
+		((psp)->funcs->bootloader_load_sysdrv ? (psp)->funcs->bootloader_load_sysdrv((psp)) : 0)
 #define psp_bootloader_load_sos(psp) \
 #define psp_bootloader_load_sos(psp) \
-		((psp)->bootloader_load_sos ? (psp)->bootloader_load_sos((psp)) : 0)
+		((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0)
 #define psp_smu_reload_quirk(psp) \
 #define psp_smu_reload_quirk(psp) \
-		((psp)->smu_reload_quirk ? (psp)->smu_reload_quirk((psp)) : false)
+		((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false)
 #define psp_mode1_reset(psp) \
 #define psp_mode1_reset(psp) \
-		((psp)->mode1_reset ? (psp)->mode1_reset((psp)) : false)
+		((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false)
 
 
 extern const struct amd_ip_funcs psp_ip_funcs;
 extern const struct amd_ip_funcs psp_ip_funcs;
 
 
@@ -151,4 +158,6 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
 
 
 extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
 extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
 
 
+int psp_gpu_reset(struct amdgpu_device *adev);
+
 #endif
 #endif

+ 4 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c

@@ -360,6 +360,9 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 
 
 	amdgpu_debugfs_ring_fini(ring);
 	amdgpu_debugfs_ring_fini(ring);
 
 
+	dma_fence_put(ring->vmid_wait);
+	ring->vmid_wait = NULL;
+
 	ring->adev->rings[ring->idx] = NULL;
 	ring->adev->rings[ring->idx] = NULL;
 }
 }
 
 
@@ -481,7 +484,7 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
 	result = 0;
 	result = 0;
 
 
 	if (*pos < 12) {
 	if (*pos < 12) {
-		early[0] = amdgpu_ring_get_rptr(ring);
+		early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask;
 		early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
 		early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
 		early[2] = ring->wptr & ring->buf_mask;
 		early[2] = ring->wptr & ring->buf_mask;
 		for (i = *pos / 4; i < 3 && size; i++) {
 		for (i = *pos / 4; i < 3 && size; i++) {

+ 7 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

@@ -26,6 +26,7 @@
 
 
 #include <drm/amdgpu_drm.h>
 #include <drm/amdgpu_drm.h>
 #include <drm/gpu_scheduler.h>
 #include <drm/gpu_scheduler.h>
+#include <drm/drm_print.h>
 
 
 /* max number of rings */
 /* max number of rings */
 #define AMDGPU_MAX_RINGS		18
 #define AMDGPU_MAX_RINGS		18
@@ -35,8 +36,9 @@
 #define AMDGPU_MAX_UVD_ENC_RINGS	2
 #define AMDGPU_MAX_UVD_ENC_RINGS	2
 
 
 /* some special values for the owner field */
 /* some special values for the owner field */
-#define AMDGPU_FENCE_OWNER_UNDEFINED	((void*)0ul)
-#define AMDGPU_FENCE_OWNER_VM		((void*)1ul)
+#define AMDGPU_FENCE_OWNER_UNDEFINED	((void *)0ul)
+#define AMDGPU_FENCE_OWNER_VM		((void *)1ul)
+#define AMDGPU_FENCE_OWNER_KFD		((void *)2ul)
 
 
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
@@ -128,7 +130,6 @@ struct amdgpu_ring_funcs {
 	void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid,
 	void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid,
 			      uint64_t pd_addr);
 			      uint64_t pd_addr);
 	void (*emit_hdp_flush)(struct amdgpu_ring *ring);
 	void (*emit_hdp_flush)(struct amdgpu_ring *ring);
-	void (*emit_hdp_invalidate)(struct amdgpu_ring *ring);
 	void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
 	void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
 				uint32_t gds_base, uint32_t gds_size,
 				uint32_t gds_base, uint32_t gds_size,
 				uint32_t gws_base, uint32_t gws_size,
 				uint32_t gws_base, uint32_t gws_size,
@@ -151,6 +152,8 @@ struct amdgpu_ring_funcs {
 	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
 	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
 	void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
 	void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
 	void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
 	void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
+	void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
+			      uint32_t val, uint32_t mask);
 	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
 	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
 	/* priority functions */
 	/* priority functions */
 	void (*set_priority) (struct amdgpu_ring *ring,
 	void (*set_priority) (struct amdgpu_ring *ring,
@@ -195,6 +198,7 @@ struct amdgpu_ring {
 	u64			cond_exe_gpu_addr;
 	u64			cond_exe_gpu_addr;
 	volatile u32		*cond_exe_cpu_addr;
 	volatile u32		*cond_exe_cpu_addr;
 	unsigned		vm_inv_eng;
 	unsigned		vm_inv_eng;
+	struct dma_fence	*vmid_wait;
 	bool			has_compute_vm_bug;
 	bool			has_compute_vm_bug;
 
 
 	atomic_t		num_jobs[DRM_SCHED_PRIORITY_MAX];
 	atomic_t		num_jobs[DRM_SCHED_PRIORITY_MAX];

+ 11 - 51
drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c

@@ -63,21 +63,27 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
 	for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
 	for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
 		INIT_LIST_HEAD(&sa_manager->flist[i]);
 		INIT_LIST_HEAD(&sa_manager->flist[i]);
 
 
-	r = amdgpu_bo_create(adev, size, align, true, domain,
-			     0, NULL, NULL, 0, &sa_manager->bo);
+	r = amdgpu_bo_create_kernel(adev, size, align, domain, &sa_manager->bo,
+				&sa_manager->gpu_addr, &sa_manager->cpu_ptr);
 	if (r) {
 	if (r) {
 		dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r);
 		dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r);
 		return r;
 		return r;
 	}
 	}
 
 
+	memset(sa_manager->cpu_ptr, 0, sa_manager->size);
 	return r;
 	return r;
 }
 }
 
 
 void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
 void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
-			       struct amdgpu_sa_manager *sa_manager)
+                              struct amdgpu_sa_manager *sa_manager)
 {
 {
 	struct amdgpu_sa_bo *sa_bo, *tmp;
 	struct amdgpu_sa_bo *sa_bo, *tmp;
 
 
+	if (sa_manager->bo == NULL) {
+		dev_err(adev->dev, "no bo for sa manager\n");
+		return;
+	}
+
 	if (!list_empty(&sa_manager->olist)) {
 	if (!list_empty(&sa_manager->olist)) {
 		sa_manager->hole = &sa_manager->olist,
 		sa_manager->hole = &sa_manager->olist,
 		amdgpu_sa_bo_try_free(sa_manager);
 		amdgpu_sa_bo_try_free(sa_manager);
@@ -88,55 +94,9 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
 	list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
 	list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
 		amdgpu_sa_bo_remove_locked(sa_bo);
 		amdgpu_sa_bo_remove_locked(sa_bo);
 	}
 	}
-	amdgpu_bo_unref(&sa_manager->bo);
-	sa_manager->size = 0;
-}
-
-int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
-			       struct amdgpu_sa_manager *sa_manager)
-{
-	int r;
-
-	if (sa_manager->bo == NULL) {
-		dev_err(adev->dev, "no bo for sa manager\n");
-		return -EINVAL;
-	}
 
 
-	/* map the buffer */
-	r = amdgpu_bo_reserve(sa_manager->bo, false);
-	if (r) {
-		dev_err(adev->dev, "(%d) failed to reserve manager bo\n", r);
-		return r;
-	}
-	r = amdgpu_bo_pin(sa_manager->bo, sa_manager->domain, &sa_manager->gpu_addr);
-	if (r) {
-		amdgpu_bo_unreserve(sa_manager->bo);
-		dev_err(adev->dev, "(%d) failed to pin manager bo\n", r);
-		return r;
-	}
-	r = amdgpu_bo_kmap(sa_manager->bo, &sa_manager->cpu_ptr);
-	memset(sa_manager->cpu_ptr, 0, sa_manager->size);
-	amdgpu_bo_unreserve(sa_manager->bo);
-	return r;
-}
-
-int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
-				 struct amdgpu_sa_manager *sa_manager)
-{
-	int r;
-
-	if (sa_manager->bo == NULL) {
-		dev_err(adev->dev, "no bo for sa manager\n");
-		return -EINVAL;
-	}
-
-	r = amdgpu_bo_reserve(sa_manager->bo, true);
-	if (!r) {
-		amdgpu_bo_kunmap(sa_manager->bo);
-		amdgpu_bo_unpin(sa_manager->bo);
-		amdgpu_bo_unreserve(sa_manager->bo);
-	}
-	return r;
+	amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
+	sa_manager->size = 0;
 }
 }
 
 
 static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
 static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)

+ 54 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c

@@ -31,6 +31,7 @@
 #include <drm/drmP.h>
 #include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
 
 
 struct amdgpu_sync_entry {
 struct amdgpu_sync_entry {
 	struct hlist_node	node;
 	struct hlist_node	node;
@@ -85,11 +86,20 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
  */
  */
 static void *amdgpu_sync_get_owner(struct dma_fence *f)
 static void *amdgpu_sync_get_owner(struct dma_fence *f)
 {
 {
-	struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
+	struct drm_sched_fence *s_fence;
+	struct amdgpu_amdkfd_fence *kfd_fence;
+
+	if (!f)
+		return AMDGPU_FENCE_OWNER_UNDEFINED;
 
 
+	s_fence = to_drm_sched_fence(f);
 	if (s_fence)
 	if (s_fence)
 		return s_fence->owner;
 		return s_fence->owner;
 
 
+	kfd_fence = to_amdgpu_amdkfd_fence(f);
+	if (kfd_fence)
+		return AMDGPU_FENCE_OWNER_KFD;
+
 	return AMDGPU_FENCE_OWNER_UNDEFINED;
 	return AMDGPU_FENCE_OWNER_UNDEFINED;
 }
 }
 
 
@@ -204,11 +214,18 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 	for (i = 0; i < flist->shared_count; ++i) {
 	for (i = 0; i < flist->shared_count; ++i) {
 		f = rcu_dereference_protected(flist->shared[i],
 		f = rcu_dereference_protected(flist->shared[i],
 					      reservation_object_held(resv));
 					      reservation_object_held(resv));
+		/* We only want to trigger KFD eviction fences on
+		 * evict or move jobs. Skip KFD fences otherwise.
+		 */
+		fence_owner = amdgpu_sync_get_owner(f);
+		if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
+		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+			continue;
+
 		if (amdgpu_sync_same_dev(adev, f)) {
 		if (amdgpu_sync_same_dev(adev, f)) {
 			/* VM updates are only interesting
 			/* VM updates are only interesting
 			 * for other VM updates and moves.
 			 * for other VM updates and moves.
 			 */
 			 */
-			fence_owner = amdgpu_sync_get_owner(f);
 			if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
 			if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
 			    (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
 			    (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
 			    ((owner == AMDGPU_FENCE_OWNER_VM) !=
 			    ((owner == AMDGPU_FENCE_OWNER_VM) !=
@@ -305,6 +322,41 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit
 	return NULL;
 	return NULL;
 }
 }
 
 
+/**
+ * amdgpu_sync_clone - clone a sync object
+ *
+ * @source: sync object to clone
+ * @clone: pointer to destination sync object
+ *
+ * Adds references to all unsignaled fences in @source to @clone. Also
+ * removes signaled fences from @source while at it.
+ */
+int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
+{
+	struct amdgpu_sync_entry *e;
+	struct hlist_node *tmp;
+	struct dma_fence *f;
+	int i, r;
+
+	hash_for_each_safe(source->fences, i, tmp, e, node) {
+		f = e->fence;
+		if (!dma_fence_is_signaled(f)) {
+			r = amdgpu_sync_fence(NULL, clone, f, e->explicit);
+			if (r)
+				return r;
+		} else {
+			hash_del(&e->node);
+			dma_fence_put(f);
+			kmem_cache_free(amdgpu_sync_slab, e);
+		}
+	}
+
+	dma_fence_put(clone->last_vm_update);
+	clone->last_vm_update = dma_fence_get(source->last_vm_update);
+
+	return 0;
+}
+
 int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
 int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
 {
 {
 	struct amdgpu_sync_entry *e;
 	struct amdgpu_sync_entry *e;

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h

@@ -50,6 +50,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
 struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
 				     struct amdgpu_ring *ring);
 				     struct amdgpu_ring *ring);
 struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit);
 struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit);
+int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
 int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
 int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
 void amdgpu_sync_free(struct amdgpu_sync *sync);
 void amdgpu_sync_free(struct amdgpu_sync *sync);
 int amdgpu_sync_init(void);
 int amdgpu_sync_init(void);

+ 11 - 12
drivers/gpu/drm/amd/amdgpu/amdgpu_test.c

@@ -42,7 +42,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 	/* Number of tests =
 	/* Number of tests =
 	 * (Total GTT - IB pool - writeback page - ring buffers) / test size
 	 * (Total GTT - IB pool - writeback page - ring buffers) / test size
 	 */
 	 */
-	n = adev->mc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024;
+	n = adev->gmc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024;
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 		if (adev->rings[i])
 		if (adev->rings[i])
 			n -= adev->rings[i]->ring_size;
 			n -= adev->rings[i]->ring_size;
@@ -59,9 +59,8 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 		goto out_cleanup;
 		goto out_cleanup;
 	}
 	}
 
 
-	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
-			     AMDGPU_GEM_DOMAIN_VRAM, 0,
-			     NULL, NULL, 0, &vram_obj);
+	r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 0,
+			     ttm_bo_type_kernel, NULL, &vram_obj);
 	if (r) {
 	if (r) {
 		DRM_ERROR("Failed to create VRAM object\n");
 		DRM_ERROR("Failed to create VRAM object\n");
 		goto out_cleanup;
 		goto out_cleanup;
@@ -80,9 +79,9 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 		void **vram_start, **vram_end;
 		void **vram_start, **vram_end;
 		struct dma_fence *fence = NULL;
 		struct dma_fence *fence = NULL;
 
 
-		r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
-				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
-				     NULL, 0, gtt_obj + i);
+		r = amdgpu_bo_create(adev, size, PAGE_SIZE,
+				     AMDGPU_GEM_DOMAIN_GTT, 0,
+				     ttm_bo_type_kernel, NULL, gtt_obj + i);
 		if (r) {
 		if (r) {
 			DRM_ERROR("Failed to create GTT object %d\n", i);
 			DRM_ERROR("Failed to create GTT object %d\n", i);
 			goto out_lclean;
 			goto out_lclean;
@@ -142,10 +141,10 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 					  "0x%16llx/0x%16llx)\n",
 					  "0x%16llx/0x%16llx)\n",
 					  i, *vram_start, gart_start,
 					  i, *vram_start, gart_start,
 					  (unsigned long long)
 					  (unsigned long long)
-					  (gart_addr - adev->mc.gart_start +
+					  (gart_addr - adev->gmc.gart_start +
 					   (void*)gart_start - gtt_map),
 					   (void*)gart_start - gtt_map),
 					  (unsigned long long)
 					  (unsigned long long)
-					  (vram_addr - adev->mc.vram_start +
+					  (vram_addr - adev->gmc.vram_start +
 					   (void*)gart_start - gtt_map));
 					   (void*)gart_start - gtt_map));
 				amdgpu_bo_kunmap(vram_obj);
 				amdgpu_bo_kunmap(vram_obj);
 				goto out_lclean_unpin;
 				goto out_lclean_unpin;
@@ -187,10 +186,10 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 					  "0x%16llx/0x%16llx)\n",
 					  "0x%16llx/0x%16llx)\n",
 					  i, *gart_start, vram_start,
 					  i, *gart_start, vram_start,
 					  (unsigned long long)
 					  (unsigned long long)
-					  (vram_addr - adev->mc.vram_start +
+					  (vram_addr - adev->gmc.vram_start +
 					   (void*)vram_start - vram_map),
 					   (void*)vram_start - vram_map),
 					  (unsigned long long)
 					  (unsigned long long)
-					  (gart_addr - adev->mc.gart_start +
+					  (gart_addr - adev->gmc.gart_start +
 					   (void*)vram_start - vram_map));
 					   (void*)vram_start - vram_map));
 				amdgpu_bo_kunmap(gtt_obj[i]);
 				amdgpu_bo_kunmap(gtt_obj[i]);
 				goto out_lclean_unpin;
 				goto out_lclean_unpin;
@@ -200,7 +199,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 		amdgpu_bo_kunmap(gtt_obj[i]);
 		amdgpu_bo_kunmap(gtt_obj[i]);
 
 
 		DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n",
 		DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n",
-			 gart_addr - adev->mc.gart_start);
+			 gart_addr - adev->gmc.gart_start);
 		continue;
 		continue;
 
 
 out_lclean_unpin:
 out_lclean_unpin:

+ 30 - 8
drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h

@@ -86,7 +86,7 @@ TRACE_EVENT(amdgpu_iv,
 			     __field(unsigned, vmid_src)
 			     __field(unsigned, vmid_src)
 			     __field(uint64_t, timestamp)
 			     __field(uint64_t, timestamp)
 			     __field(unsigned, timestamp_src)
 			     __field(unsigned, timestamp_src)
-			     __field(unsigned, pas_id)
+			     __field(unsigned, pasid)
 			     __array(unsigned, src_data, 4)
 			     __array(unsigned, src_data, 4)
 			    ),
 			    ),
 	    TP_fast_assign(
 	    TP_fast_assign(
@@ -97,16 +97,16 @@ TRACE_EVENT(amdgpu_iv,
 			   __entry->vmid_src = iv->vmid_src;
 			   __entry->vmid_src = iv->vmid_src;
 			   __entry->timestamp = iv->timestamp;
 			   __entry->timestamp = iv->timestamp;
 			   __entry->timestamp_src = iv->timestamp_src;
 			   __entry->timestamp_src = iv->timestamp_src;
-			   __entry->pas_id = iv->pas_id;
+			   __entry->pasid = iv->pasid;
 			   __entry->src_data[0] = iv->src_data[0];
 			   __entry->src_data[0] = iv->src_data[0];
 			   __entry->src_data[1] = iv->src_data[1];
 			   __entry->src_data[1] = iv->src_data[1];
 			   __entry->src_data[2] = iv->src_data[2];
 			   __entry->src_data[2] = iv->src_data[2];
 			   __entry->src_data[3] = iv->src_data[3];
 			   __entry->src_data[3] = iv->src_data[3];
 			   ),
 			   ),
-	    TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pas_id:%u src_data: %08x %08x %08x %08x\n",
+	    TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x\n",
 		      __entry->client_id, __entry->src_id,
 		      __entry->client_id, __entry->src_id,
 		      __entry->ring_id, __entry->vmid,
 		      __entry->ring_id, __entry->vmid,
-		      __entry->timestamp, __entry->pas_id,
+		      __entry->timestamp, __entry->pasid,
 		      __entry->src_data[0], __entry->src_data[1],
 		      __entry->src_data[0], __entry->src_data[1],
 		      __entry->src_data[2], __entry->src_data[3])
 		      __entry->src_data[2], __entry->src_data[3])
 );
 );
@@ -217,7 +217,7 @@ TRACE_EVENT(amdgpu_vm_grab_id,
 		     struct amdgpu_job *job),
 		     struct amdgpu_job *job),
 	    TP_ARGS(vm, ring, job),
 	    TP_ARGS(vm, ring, job),
 	    TP_STRUCT__entry(
 	    TP_STRUCT__entry(
-			     __field(struct amdgpu_vm *, vm)
+			     __field(u32, pasid)
 			     __field(u32, ring)
 			     __field(u32, ring)
 			     __field(u32, vmid)
 			     __field(u32, vmid)
 			     __field(u32, vm_hub)
 			     __field(u32, vm_hub)
@@ -226,15 +226,15 @@ TRACE_EVENT(amdgpu_vm_grab_id,
 			     ),
 			     ),
 
 
 	    TP_fast_assign(
 	    TP_fast_assign(
-			   __entry->vm = vm;
+			   __entry->pasid = vm->pasid;
 			   __entry->ring = ring->idx;
 			   __entry->ring = ring->idx;
 			   __entry->vmid = job->vmid;
 			   __entry->vmid = job->vmid;
 			   __entry->vm_hub = ring->funcs->vmhub,
 			   __entry->vm_hub = ring->funcs->vmhub,
 			   __entry->pd_addr = job->vm_pd_addr;
 			   __entry->pd_addr = job->vm_pd_addr;
 			   __entry->needs_flush = job->vm_needs_flush;
 			   __entry->needs_flush = job->vm_needs_flush;
 			   ),
 			   ),
-	    TP_printk("vm=%p, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
-		      __entry->vm, __entry->ring, __entry->vmid,
+	    TP_printk("pasid=%d, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
+		      __entry->pasid, __entry->ring, __entry->vmid,
 		      __entry->vm_hub, __entry->pd_addr, __entry->needs_flush)
 		      __entry->vm_hub, __entry->pd_addr, __entry->needs_flush)
 );
 );
 
 
@@ -378,6 +378,28 @@ TRACE_EVENT(amdgpu_vm_flush,
 		      __entry->vm_hub,__entry->pd_addr)
 		      __entry->vm_hub,__entry->pd_addr)
 );
 );
 
 
+DECLARE_EVENT_CLASS(amdgpu_pasid,
+	    TP_PROTO(unsigned pasid),
+	    TP_ARGS(pasid),
+	    TP_STRUCT__entry(
+			     __field(unsigned, pasid)
+			     ),
+	    TP_fast_assign(
+			   __entry->pasid = pasid;
+			   ),
+	    TP_printk("pasid=%u", __entry->pasid)
+);
+
+DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_allocated,
+	    TP_PROTO(unsigned pasid),
+	    TP_ARGS(pasid)
+);
+
+DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_freed,
+	    TP_PROTO(unsigned pasid),
+	    TP_ARGS(pasid)
+);
+
 TRACE_EVENT(amdgpu_bo_list_set,
 TRACE_EVENT(amdgpu_bo_list_set,
 	    TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo),
 	    TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo),
 	    TP_ARGS(list, bo),
 	    TP_ARGS(list, bo),

+ 206 - 91
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

@@ -46,6 +46,7 @@
 #include "amdgpu.h"
 #include "amdgpu.h"
 #include "amdgpu_object.h"
 #include "amdgpu_object.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
 #include "bif/bif_4_1_d.h"
 #include "bif/bif_4_1_d.h"
 
 
 #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
 #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
@@ -161,7 +162,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 		break;
 		break;
 	case TTM_PL_TT:
 	case TTM_PL_TT:
 		man->func = &amdgpu_gtt_mgr_func;
 		man->func = &amdgpu_gtt_mgr_func;
-		man->gpu_offset = adev->mc.gart_start;
+		man->gpu_offset = adev->gmc.gart_start;
 		man->available_caching = TTM_PL_MASK_CACHING;
 		man->available_caching = TTM_PL_MASK_CACHING;
 		man->default_caching = TTM_PL_FLAG_CACHED;
 		man->default_caching = TTM_PL_FLAG_CACHED;
 		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA;
 		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA;
@@ -169,7 +170,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 	case TTM_PL_VRAM:
 	case TTM_PL_VRAM:
 		/* "On-card" video ram */
 		/* "On-card" video ram */
 		man->func = &amdgpu_vram_mgr_func;
 		man->func = &amdgpu_vram_mgr_func;
-		man->gpu_offset = adev->mc.vram_start;
+		man->gpu_offset = adev->gmc.vram_start;
 		man->flags = TTM_MEMTYPE_FLAG_FIXED |
 		man->flags = TTM_MEMTYPE_FLAG_FIXED |
 			     TTM_MEMTYPE_FLAG_MAPPABLE;
 			     TTM_MEMTYPE_FLAG_MAPPABLE;
 		man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
 		man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
@@ -203,6 +204,12 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 		.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
 		.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
 	};
 	};
 
 
+	if (bo->type == ttm_bo_type_sg) {
+		placement->num_placement = 0;
+		placement->num_busy_placement = 0;
+		return;
+	}
+
 	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
 	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
 		placement->placement = &placements;
 		placement->placement = &placements;
 		placement->busy_placement = &placements;
 		placement->busy_placement = &placements;
@@ -213,13 +220,11 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 	abo = ttm_to_amdgpu_bo(bo);
 	abo = ttm_to_amdgpu_bo(bo);
 	switch (bo->mem.mem_type) {
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
 	case TTM_PL_VRAM:
-		if (adev->mman.buffer_funcs &&
-		    adev->mman.buffer_funcs_ring &&
-		    adev->mman.buffer_funcs_ring->ready == false) {
+		if (!adev->mman.buffer_funcs_enabled) {
 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
-		} else if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+		} else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
 			   !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
 			   !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
-			unsigned fpfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
+			unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
 			struct drm_mm_node *node = bo->mem.mm_node;
 			struct drm_mm_node *node = bo->mem.mm_node;
 			unsigned long pages_left;
 			unsigned long pages_left;
 
 
@@ -260,6 +265,13 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 {
 {
 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
 
 
+	/*
+	 * Don't verify access for KFD BOs. They don't have a GEM
+	 * object associated with them.
+	 */
+	if (abo->kfd_bo)
+		return 0;
+
 	if (amdgpu_ttm_tt_get_usermm(bo->ttm))
 	if (amdgpu_ttm_tt_get_usermm(bo->ttm))
 		return -EPERM;
 		return -EPERM;
 	return drm_vma_node_verify_access(&abo->gem_base.vma_node,
 	return drm_vma_node_verify_access(&abo->gem_base.vma_node,
@@ -331,7 +343,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 	const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
 	const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
 					AMDGPU_GPU_PAGE_SIZE);
 					AMDGPU_GPU_PAGE_SIZE);
 
 
-	if (!ring->ready) {
+	if (!adev->mman.buffer_funcs_enabled) {
 		DRM_ERROR("Trying to move memory with ring turned off.\n");
 		DRM_ERROR("Trying to move memory with ring turned off.\n");
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
@@ -577,12 +589,9 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 		amdgpu_move_null(bo, new_mem);
 		amdgpu_move_null(bo, new_mem);
 		return 0;
 		return 0;
 	}
 	}
-	if (adev->mman.buffer_funcs == NULL ||
-	    adev->mman.buffer_funcs_ring == NULL ||
-	    !adev->mman.buffer_funcs_ring->ready) {
-		/* use memcpy */
+
+	if (!adev->mman.buffer_funcs_enabled)
 		goto memcpy;
 		goto memcpy;
-	}
 
 
 	if (old_mem->mem_type == TTM_PL_VRAM &&
 	if (old_mem->mem_type == TTM_PL_VRAM &&
 	    new_mem->mem_type == TTM_PL_SYSTEM) {
 	    new_mem->mem_type == TTM_PL_SYSTEM) {
@@ -621,6 +630,7 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
 {
 {
 	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
 	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
+	struct drm_mm_node *mm_node = mem->mm_node;
 
 
 	mem->bus.addr = NULL;
 	mem->bus.addr = NULL;
 	mem->bus.offset = 0;
 	mem->bus.offset = 0;
@@ -638,9 +648,18 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
 	case TTM_PL_VRAM:
 	case TTM_PL_VRAM:
 		mem->bus.offset = mem->start << PAGE_SHIFT;
 		mem->bus.offset = mem->start << PAGE_SHIFT;
 		/* check if it's visible */
 		/* check if it's visible */
-		if ((mem->bus.offset + mem->bus.size) > adev->mc.visible_vram_size)
+		if ((mem->bus.offset + mem->bus.size) > adev->gmc.visible_vram_size)
 			return -EINVAL;
 			return -EINVAL;
-		mem->bus.base = adev->mc.aper_base;
+		/* Only physically contiguous buffers apply. In a contiguous
+		 * buffer, size of the first mm_node would match the number of
+		 * pages in ttm_mem_reg.
+		 */
+		if (adev->mman.aper_base_kaddr &&
+		    (mm_node->size == mem->num_pages))
+			mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
+					mem->bus.offset;
+
+		mem->bus.base = adev->gmc.aper_base;
 		mem->bus.is_iomem = true;
 		mem->bus.is_iomem = true;
 		break;
 		break;
 	default:
 	default:
@@ -674,7 +693,6 @@ struct amdgpu_ttm_gup_task_list {
 
 
 struct amdgpu_ttm_tt {
 struct amdgpu_ttm_tt {
 	struct ttm_dma_tt	ttm;
 	struct ttm_dma_tt	ttm;
-	struct amdgpu_device	*adev;
 	u64			offset;
 	u64			offset;
 	uint64_t		userptr;
 	uint64_t		userptr;
 	struct mm_struct	*usermm;
 	struct mm_struct	*usermm;
@@ -832,6 +850,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
 static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
 				   struct ttm_mem_reg *bo_mem)
 				   struct ttm_mem_reg *bo_mem)
 {
 {
+	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
 	struct amdgpu_ttm_tt *gtt = (void*)ttm;
 	struct amdgpu_ttm_tt *gtt = (void*)ttm;
 	uint64_t flags;
 	uint64_t flags;
 	int r = 0;
 	int r = 0;
@@ -858,9 +877,9 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
 		return 0;
 		return 0;
 	}
 	}
 
 
-	flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
+	flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
 	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
 	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
-	r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
+	r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
 		ttm->pages, gtt->ttm.dma_address, flags);
 		ttm->pages, gtt->ttm.dma_address, flags);
 
 
 	if (r)
 	if (r)
@@ -891,7 +910,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
 	placement.num_busy_placement = 1;
 	placement.num_busy_placement = 1;
 	placement.busy_placement = &placements;
 	placement.busy_placement = &placements;
 	placements.fpfn = 0;
 	placements.fpfn = 0;
-	placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT;
+	placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
 	placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
 	placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
 		TTM_PL_FLAG_TT;
 		TTM_PL_FLAG_TT;
 
 
@@ -937,6 +956,7 @@ int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
 
 
 static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
 static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
 {
 {
+	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	int r;
 	int r;
 
 
@@ -947,7 +967,7 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
 		return 0;
 		return 0;
 
 
 	/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
 	/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
-	r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
+	r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
 	if (r)
 	if (r)
 		DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
 		DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
 			  gtt->ttm.ttm.num_pages, gtt->offset);
 			  gtt->ttm.ttm.num_pages, gtt->offset);
@@ -968,22 +988,20 @@ static struct ttm_backend_func amdgpu_backend_func = {
 	.destroy = &amdgpu_ttm_backend_destroy,
 	.destroy = &amdgpu_ttm_backend_destroy,
 };
 };
 
 
-static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev,
-				    unsigned long size, uint32_t page_flags,
-				    struct page *dummy_read_page)
+static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
+					   uint32_t page_flags)
 {
 {
 	struct amdgpu_device *adev;
 	struct amdgpu_device *adev;
 	struct amdgpu_ttm_tt *gtt;
 	struct amdgpu_ttm_tt *gtt;
 
 
-	adev = amdgpu_ttm_adev(bdev);
+	adev = amdgpu_ttm_adev(bo->bdev);
 
 
 	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
 	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
 	if (gtt == NULL) {
 	if (gtt == NULL) {
 		return NULL;
 		return NULL;
 	}
 	}
 	gtt->ttm.ttm.func = &amdgpu_backend_func;
 	gtt->ttm.ttm.func = &amdgpu_backend_func;
-	gtt->adev = adev;
-	if (ttm_dma_tt_init(&gtt->ttm, bdev, size, page_flags, dummy_read_page)) {
+	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
 		kfree(gtt);
 		kfree(gtt);
 		return NULL;
 		return NULL;
 	}
 	}
@@ -997,9 +1015,6 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
 	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
 
 
-	if (ttm->state != tt_unpopulated)
-		return 0;
-
 	if (gtt && gtt->userptr) {
 	if (gtt && gtt->userptr) {
 		ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
 		ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
 		if (!ttm->sg)
 		if (!ttm->sg)
@@ -1012,7 +1027,8 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
 
 
 	if (slave && ttm->sg) {
 	if (slave && ttm->sg) {
 		drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
 		drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
-						 gtt->ttm.dma_address, ttm->num_pages);
+						 gtt->ttm.dma_address,
+						 ttm->num_pages);
 		ttm->state = tt_unbound;
 		ttm->state = tt_unbound;
 		return 0;
 		return 0;
 	}
 	}
@@ -1170,6 +1186,23 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 {
 {
 	unsigned long num_pages = bo->mem.num_pages;
 	unsigned long num_pages = bo->mem.num_pages;
 	struct drm_mm_node *node = bo->mem.mm_node;
 	struct drm_mm_node *node = bo->mem.mm_node;
+	struct reservation_object_list *flist;
+	struct dma_fence *f;
+	int i;
+
+	/* If bo is a KFD BO, check if the bo belongs to the current process.
+	 * If true, then return false as any KFD process needs all its BOs to
+	 * be resident to run successfully
+	 */
+	flist = reservation_object_get_list(bo->resv);
+	if (flist) {
+		for (i = 0; i < flist->shared_count; ++i) {
+			f = rcu_dereference_protected(flist->shared[i],
+				reservation_object_held(bo->resv));
+			if (amdkfd_fence_check_mm(f, current->mm))
+				return false;
+		}
+	}
 
 
 	switch (bo->mem.mem_type) {
 	switch (bo->mem.mem_type) {
 	case TTM_PL_TT:
 	case TTM_PL_TT:
@@ -1212,7 +1245,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 	nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
 	nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
 	pos = (nodes->start << PAGE_SHIFT) + offset;
 	pos = (nodes->start << PAGE_SHIFT) + offset;
 
 
-	while (len && pos < adev->mc.mc_vram_size) {
+	while (len && pos < adev->gmc.mc_vram_size) {
 		uint64_t aligned_pos = pos & ~(uint64_t)3;
 		uint64_t aligned_pos = pos & ~(uint64_t)3;
 		uint32_t bytes = 4 - (pos & 3);
 		uint32_t bytes = 4 - (pos & 3);
 		uint32_t shift = (pos & 3) * 8;
 		uint32_t shift = (pos & 3) * 8;
@@ -1298,7 +1331,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
 	struct ttm_operation_ctx ctx = { false, false };
 	struct ttm_operation_ctx ctx = { false, false };
 	int r = 0;
 	int r = 0;
 	int i;
 	int i;
-	u64 vram_size = adev->mc.visible_vram_size;
+	u64 vram_size = adev->gmc.visible_vram_size;
 	u64 offset = adev->fw_vram_usage.start_offset;
 	u64 offset = adev->fw_vram_usage.start_offset;
 	u64 size = adev->fw_vram_usage.size;
 	u64 size = adev->fw_vram_usage.size;
 	struct amdgpu_bo *bo;
 	struct amdgpu_bo *bo;
@@ -1309,11 +1342,12 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
 	if (adev->fw_vram_usage.size > 0 &&
 	if (adev->fw_vram_usage.size > 0 &&
 		adev->fw_vram_usage.size <= vram_size) {
 		adev->fw_vram_usage.size <= vram_size) {
 
 
-		r = amdgpu_bo_create(adev, adev->fw_vram_usage.size,
-			PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
-			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-			AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0,
-			&adev->fw_vram_usage.reserved_bo);
+		r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE,
+				     AMDGPU_GEM_DOMAIN_VRAM,
+				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
+				     ttm_bo_type_kernel, NULL,
+				     &adev->fw_vram_usage.reserved_bo);
 		if (r)
 		if (r)
 			goto error_create;
 			goto error_create;
 
 
@@ -1387,8 +1421,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 		return r;
 		return r;
 	}
 	}
 	adev->mman.initialized = true;
 	adev->mman.initialized = true;
+
+	/* We opt to avoid OOM on system pages allocations */
+	adev->mman.bdev.no_retry = true;
+
 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
-				adev->mc.real_vram_size >> PAGE_SHIFT);
+				adev->gmc.real_vram_size >> PAGE_SHIFT);
 	if (r) {
 	if (r) {
 		DRM_ERROR("Failed initializing VRAM heap.\n");
 		DRM_ERROR("Failed initializing VRAM heap.\n");
 		return r;
 		return r;
@@ -1397,11 +1435,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	/* Reduce size of CPU-visible VRAM if requested */
 	/* Reduce size of CPU-visible VRAM if requested */
 	vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024;
 	vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024;
 	if (amdgpu_vis_vram_limit > 0 &&
 	if (amdgpu_vis_vram_limit > 0 &&
-	    vis_vram_limit <= adev->mc.visible_vram_size)
-		adev->mc.visible_vram_size = vis_vram_limit;
+	    vis_vram_limit <= adev->gmc.visible_vram_size)
+		adev->gmc.visible_vram_size = vis_vram_limit;
 
 
 	/* Change the size here instead of the init above so only lpfn is affected */
 	/* Change the size here instead of the init above so only lpfn is affected */
-	amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
+	amdgpu_ttm_set_buffer_funcs_status(adev, false);
+#ifdef CONFIG_64BIT
+	adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
+						adev->gmc.visible_vram_size);
+#endif
 
 
 	/*
 	/*
 	 *The reserved vram for firmware must be pinned to the specified
 	 *The reserved vram for firmware must be pinned to the specified
@@ -1412,21 +1454,21 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 		return r;
 		return r;
 	}
 	}
 
 
-	r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE,
+	r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
 				    AMDGPU_GEM_DOMAIN_VRAM,
 				    AMDGPU_GEM_DOMAIN_VRAM,
 				    &adev->stolen_vga_memory,
 				    &adev->stolen_vga_memory,
 				    NULL, NULL);
 				    NULL, NULL);
 	if (r)
 	if (r)
 		return r;
 		return r;
 	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
 	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
-		 (unsigned) (adev->mc.real_vram_size / (1024 * 1024)));
+		 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
 
 
 	if (amdgpu_gtt_size == -1) {
 	if (amdgpu_gtt_size == -1) {
 		struct sysinfo si;
 		struct sysinfo si;
 
 
 		si_meminfo(&si);
 		si_meminfo(&si);
 		gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
 		gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
-			       adev->mc.mc_vram_size),
+			       adev->gmc.mc_vram_size),
 			       ((uint64_t)si.totalram * si.mem_unit * 3/4));
 			       ((uint64_t)si.totalram * si.mem_unit * 3/4));
 	}
 	}
 	else
 	else
@@ -1494,6 +1536,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
 	amdgpu_ttm_debugfs_fini(adev);
 	amdgpu_ttm_debugfs_fini(adev);
 	amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
 	amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
 	amdgpu_ttm_fw_reserve_vram_fini(adev);
 	amdgpu_ttm_fw_reserve_vram_fini(adev);
+	if (adev->mman.aper_base_kaddr)
+		iounmap(adev->mman.aper_base_kaddr);
+	adev->mman.aper_base_kaddr = NULL;
 
 
 	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
 	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
 	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
 	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
@@ -1509,18 +1554,30 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
 	DRM_INFO("amdgpu: ttm finalized\n");
 	DRM_INFO("amdgpu: ttm finalized\n");
 }
 }
 
 
-/* this should only be called at bootup or when userspace
- * isn't running */
-void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size)
+/**
+ * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: true when we can use buffer functions.
+ *
+ * Enable/disable use of buffer functions during suspend/resume. This should
+ * only be called at bootup or when userspace isn't running.
+ */
+void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
 {
 {
-	struct ttm_mem_type_manager *man;
+	struct ttm_mem_type_manager *man = &adev->mman.bdev.man[TTM_PL_VRAM];
+	uint64_t size;
 
 
-	if (!adev->mman.initialized)
+	if (!adev->mman.initialized || adev->in_gpu_reset)
 		return;
 		return;
 
 
-	man = &adev->mman.bdev.man[TTM_PL_VRAM];
 	/* this just adjusts TTM size idea, which sets lpfn to the correct value */
 	/* this just adjusts TTM size idea, which sets lpfn to the correct value */
+	if (enable)
+		size = adev->gmc.real_vram_size;
+	else
+		size = adev->gmc.visible_vram_size;
 	man->size = size >> PAGE_SHIFT;
 	man->size = size >> PAGE_SHIFT;
+	adev->mman.buffer_funcs_enabled = enable;
 }
 }
 
 
 int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
 int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -1559,7 +1616,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
 	BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
 	BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
 	       AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
 	       AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
 
 
-	*addr = adev->mc.gart_start;
+	*addr = adev->gmc.gart_start;
 	*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
 	*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
 		AMDGPU_GPU_PAGE_SIZE;
 		AMDGPU_GPU_PAGE_SIZE;
 
 
@@ -1619,6 +1676,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 	unsigned i;
 	unsigned i;
 	int r;
 	int r;
 
 
+	if (direct_submit && !ring->ready) {
+		DRM_ERROR("Trying to move memory with ring turned off.\n");
+		return -EINVAL;
+	}
+
 	max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
 	max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
 	num_loops = DIV_ROUND_UP(byte_count, max_bytes);
 	num_loops = DIV_ROUND_UP(byte_count, max_bytes);
 	num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw;
 	num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw;
@@ -1677,13 +1739,12 @@ error_free:
 }
 }
 
 
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
-		       uint64_t src_data,
+		       uint32_t src_data,
 		       struct reservation_object *resv,
 		       struct reservation_object *resv,
 		       struct dma_fence **fence)
 		       struct dma_fence **fence)
 {
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	uint32_t max_bytes = 8 *
-			adev->vm_manager.vm_pte_funcs->set_max_nums_pte_pde;
+	uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 
 
 	struct drm_mm_node *mm_node;
 	struct drm_mm_node *mm_node;
@@ -1693,7 +1754,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 	struct amdgpu_job *job;
 	struct amdgpu_job *job;
 	int r;
 	int r;
 
 
-	if (!ring->ready) {
+	if (!adev->mman.buffer_funcs_enabled) {
 		DRM_ERROR("Trying to clear memory with ring turned off.\n");
 		DRM_ERROR("Trying to clear memory with ring turned off.\n");
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
@@ -1714,9 +1775,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 		num_pages -= mm_node->size;
 		num_pages -= mm_node->size;
 		++mm_node;
 		++mm_node;
 	}
 	}
-
-	/* num of dwords for each SDMA_OP_PTEPDE cmd */
-	num_dw = num_loops * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
+	num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
 
 
 	/* for IB padding */
 	/* for IB padding */
 	num_dw += 64;
 	num_dw += 64;
@@ -1741,16 +1800,12 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 		uint32_t byte_count = mm_node->size << PAGE_SHIFT;
 		uint32_t byte_count = mm_node->size << PAGE_SHIFT;
 		uint64_t dst_addr;
 		uint64_t dst_addr;
 
 
-		WARN_ONCE(byte_count & 0x7, "size should be a multiple of 8");
-
 		dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem);
 		dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem);
 		while (byte_count) {
 		while (byte_count) {
 			uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
 			uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
 
 
-			amdgpu_vm_set_pte_pde(adev, &job->ibs[0],
-					dst_addr, 0,
-					cur_size_in_bytes >> 3, 0,
-					src_data);
+			amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data,
+						dst_addr, cur_size_in_bytes);
 
 
 			dst_addr += cur_size_in_bytes;
 			dst_addr += cur_size_in_bytes;
 			byte_count -= cur_size_in_bytes;
 			byte_count -= cur_size_in_bytes;
@@ -1811,14 +1866,14 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
 	if (size & 0x3 || *pos & 0x3)
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	if (*pos >= adev->mc.mc_vram_size)
+	if (*pos >= adev->gmc.mc_vram_size)
 		return -ENXIO;
 		return -ENXIO;
 
 
 	while (size) {
 	while (size) {
 		unsigned long flags;
 		unsigned long flags;
 		uint32_t value;
 		uint32_t value;
 
 
-		if (*pos >= adev->mc.mc_vram_size)
+		if (*pos >= adev->gmc.mc_vram_size)
 			return result;
 			return result;
 
 
 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
@@ -1850,14 +1905,14 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
 	if (size & 0x3 || *pos & 0x3)
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	if (*pos >= adev->mc.mc_vram_size)
+	if (*pos >= adev->gmc.mc_vram_size)
 		return -ENXIO;
 		return -ENXIO;
 
 
 	while (size) {
 	while (size) {
 		unsigned long flags;
 		unsigned long flags;
 		uint32_t value;
 		uint32_t value;
 
 
-		if (*pos >= adev->mc.mc_vram_size)
+		if (*pos >= adev->gmc.mc_vram_size)
 			return result;
 			return result;
 
 
 		r = get_user(value, (uint32_t *)buf);
 		r = get_user(value, (uint32_t *)buf);
@@ -1935,38 +1990,98 @@ static const struct file_operations amdgpu_ttm_gtt_fops = {
 
 
 #endif
 #endif
 
 
-static ssize_t amdgpu_iova_to_phys_read(struct file *f, char __user *buf,
-				   size_t size, loff_t *pos)
+static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
+				 size_t size, loff_t *pos)
 {
 {
 	struct amdgpu_device *adev = file_inode(f)->i_private;
 	struct amdgpu_device *adev = file_inode(f)->i_private;
-	int r;
-	uint64_t phys;
 	struct iommu_domain *dom;
 	struct iommu_domain *dom;
+	ssize_t result = 0;
+	int r;
 
 
-	// always return 8 bytes
-	if (size != 8)
-		return -EINVAL;
+	dom = iommu_get_domain_for_dev(adev->dev);
 
 
-	// only accept page addresses
-	if (*pos & 0xFFF)
-		return -EINVAL;
+	while (size) {
+		phys_addr_t addr = *pos & PAGE_MASK;
+		loff_t off = *pos & ~PAGE_MASK;
+		size_t bytes = PAGE_SIZE - off;
+		unsigned long pfn;
+		struct page *p;
+		void *ptr;
+
+		bytes = bytes < size ? bytes : size;
+
+		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
+
+		pfn = addr >> PAGE_SHIFT;
+		if (!pfn_valid(pfn))
+			return -EPERM;
+
+		p = pfn_to_page(pfn);
+		if (p->mapping != adev->mman.bdev.dev_mapping)
+			return -EPERM;
+
+		ptr = kmap(p);
+		r = copy_to_user(buf, ptr, bytes);
+		kunmap(p);
+		if (r)
+			return -EFAULT;
+
+		size -= bytes;
+		*pos += bytes;
+		result += bytes;
+	}
+
+	return result;
+}
+
+static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
+				 size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	struct iommu_domain *dom;
+	ssize_t result = 0;
+	int r;
 
 
 	dom = iommu_get_domain_for_dev(adev->dev);
 	dom = iommu_get_domain_for_dev(adev->dev);
-	if (dom)
-		phys = iommu_iova_to_phys(dom, *pos);
-	else
-		phys = *pos;
 
 
-	r = copy_to_user(buf, &phys, 8);
-	if (r)
-		return -EFAULT;
+	while (size) {
+		phys_addr_t addr = *pos & PAGE_MASK;
+		loff_t off = *pos & ~PAGE_MASK;
+		size_t bytes = PAGE_SIZE - off;
+		unsigned long pfn;
+		struct page *p;
+		void *ptr;
+
+		bytes = bytes < size ? bytes : size;
+
+		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
 
 
-	return 8;
+		pfn = addr >> PAGE_SHIFT;
+		if (!pfn_valid(pfn))
+			return -EPERM;
+
+		p = pfn_to_page(pfn);
+		if (p->mapping != adev->mman.bdev.dev_mapping)
+			return -EPERM;
+
+		ptr = kmap(p);
+		r = copy_from_user(ptr, buf, bytes);
+		kunmap(p);
+		if (r)
+			return -EFAULT;
+
+		size -= bytes;
+		*pos += bytes;
+		result += bytes;
+	}
+
+	return result;
 }
 }
 
 
-static const struct file_operations amdgpu_ttm_iova_fops = {
+static const struct file_operations amdgpu_ttm_iomem_fops = {
 	.owner = THIS_MODULE,
 	.owner = THIS_MODULE,
-	.read = amdgpu_iova_to_phys_read,
+	.read = amdgpu_iomem_read,
+	.write = amdgpu_iomem_write,
 	.llseek = default_llseek
 	.llseek = default_llseek
 };
 };
 
 
@@ -1979,7 +2094,7 @@ static const struct {
 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
 	{ "amdgpu_gtt", &amdgpu_ttm_gtt_fops, TTM_PL_TT },
 	{ "amdgpu_gtt", &amdgpu_ttm_gtt_fops, TTM_PL_TT },
 #endif
 #endif
-	{ "amdgpu_iova", &amdgpu_ttm_iova_fops, TTM_PL_SYSTEM },
+	{ "amdgpu_iomem", &amdgpu_ttm_iomem_fops, TTM_PL_SYSTEM },
 };
 };
 
 
 #endif
 #endif
@@ -2001,9 +2116,9 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
 		if (IS_ERR(ent))
 		if (IS_ERR(ent))
 			return PTR_ERR(ent);
 			return PTR_ERR(ent);
 		if (ttm_debugfs_entries[count].domain == TTM_PL_VRAM)
 		if (ttm_debugfs_entries[count].domain == TTM_PL_VRAM)
-			i_size_write(ent->d_inode, adev->mc.mc_vram_size);
+			i_size_write(ent->d_inode, adev->gmc.mc_vram_size);
 		else if (ttm_debugfs_entries[count].domain == TTM_PL_TT)
 		else if (ttm_debugfs_entries[count].domain == TTM_PL_TT)
-			i_size_write(ent->d_inode, adev->mc.gart_size);
+			i_size_write(ent->d_inode, adev->gmc.gart_size);
 		adev->mman.debugfs_entries[count] = ent;
 		adev->mman.debugfs_entries[count] = ent;
 	}
 	}
 
 

+ 8 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

@@ -44,6 +44,7 @@ struct amdgpu_mman {
 	struct ttm_bo_device		bdev;
 	struct ttm_bo_device		bdev;
 	bool				mem_global_referenced;
 	bool				mem_global_referenced;
 	bool				initialized;
 	bool				initialized;
+	void __iomem			*aper_base_kaddr;
 
 
 #if defined(CONFIG_DEBUG_FS)
 #if defined(CONFIG_DEBUG_FS)
 	struct dentry			*debugfs_entries[8];
 	struct dentry			*debugfs_entries[8];
@@ -52,6 +53,7 @@ struct amdgpu_mman {
 	/* buffer handling */
 	/* buffer handling */
 	const struct amdgpu_buffer_funcs	*buffer_funcs;
 	const struct amdgpu_buffer_funcs	*buffer_funcs;
 	struct amdgpu_ring			*buffer_funcs_ring;
 	struct amdgpu_ring			*buffer_funcs_ring;
+	bool					buffer_funcs_enabled;
 
 
 	struct mutex				gtt_window_lock;
 	struct mutex				gtt_window_lock;
 	/* Scheduler entity for buffer moves */
 	/* Scheduler entity for buffer moves */
@@ -74,6 +76,11 @@ int amdgpu_gtt_mgr_recover(struct ttm_mem_type_manager *man);
 uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
 uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
 uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
 uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
 
 
+int amdgpu_ttm_init(struct amdgpu_device *adev);
+void amdgpu_ttm_fini(struct amdgpu_device *adev);
+void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
+					bool enable);
+
 int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 		       uint64_t dst_offset, uint32_t byte_count,
 		       uint64_t dst_offset, uint32_t byte_count,
 		       struct reservation_object *resv,
 		       struct reservation_object *resv,
@@ -86,7 +93,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 			       struct reservation_object *resv,
 			       struct reservation_object *resv,
 			       struct dma_fence **f);
 			       struct dma_fence **f);
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
-			uint64_t src_data,
+			uint32_t src_data,
 			struct reservation_object *resv,
 			struct reservation_object *resv,
 			struct dma_fence **fence);
 			struct dma_fence **fence);
 
 

+ 50 - 78
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c

@@ -299,12 +299,15 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
 
 
 	cancel_delayed_work_sync(&adev->uvd.idle_work);
 	cancel_delayed_work_sync(&adev->uvd.idle_work);
 
 
-	for (i = 0; i < adev->uvd.max_handles; ++i)
-		if (atomic_read(&adev->uvd.handles[i]))
-			break;
+	/* only valid for physical mode */
+	if (adev->asic_type < CHIP_POLARIS10) {
+		for (i = 0; i < adev->uvd.max_handles; ++i)
+			if (atomic_read(&adev->uvd.handles[i]))
+				break;
 
 
-	if (i == AMDGPU_MAX_UVD_HANDLES)
-		return 0;
+		if (i == adev->uvd.max_handles)
+			return 0;
+	}
 
 
 	size = amdgpu_bo_size(adev->uvd.vcpu_bo);
 	size = amdgpu_bo_size(adev->uvd.vcpu_bo);
 	ptr = adev->uvd.cpu_addr;
 	ptr = adev->uvd.cpu_addr;
@@ -952,37 +955,28 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
 static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 			       bool direct, struct dma_fence **fence)
 			       bool direct, struct dma_fence **fence)
 {
 {
-	struct ttm_operation_ctx ctx = { true, false };
-	struct ttm_validate_buffer tv;
-	struct ww_acquire_ctx ticket;
-	struct list_head head;
+	struct amdgpu_device *adev = ring->adev;
+	struct dma_fence *f = NULL;
 	struct amdgpu_job *job;
 	struct amdgpu_job *job;
 	struct amdgpu_ib *ib;
 	struct amdgpu_ib *ib;
-	struct dma_fence *f = NULL;
-	struct amdgpu_device *adev = ring->adev;
-	uint64_t addr;
 	uint32_t data[4];
 	uint32_t data[4];
-	int i, r;
-
-	memset(&tv, 0, sizeof(tv));
-	tv.bo = &bo->tbo;
-
-	INIT_LIST_HEAD(&head);
-	list_add(&tv.head, &head);
+	uint64_t addr;
+	long r;
+	int i;
 
 
-	r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL);
-	if (r)
-		return r;
+	amdgpu_bo_kunmap(bo);
+	amdgpu_bo_unpin(bo);
 
 
 	if (!ring->adev->uvd.address_64_bit) {
 	if (!ring->adev->uvd.address_64_bit) {
+		struct ttm_operation_ctx ctx = { true, false };
+
 		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
 		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
 		amdgpu_uvd_force_into_uvd_segment(bo);
 		amdgpu_uvd_force_into_uvd_segment(bo);
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+		if (r)
+			goto err;
 	}
 	}
 
 
-	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-	if (r)
-		goto err;
-
 	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
 	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
 	if (r)
 	if (r)
 		goto err;
 		goto err;
@@ -1014,6 +1008,14 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	ib->length_dw = 16;
 	ib->length_dw = 16;
 
 
 	if (direct) {
 	if (direct) {
+		r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
+							true, false,
+							msecs_to_jiffies(10));
+		if (r == 0)
+			r = -ETIMEDOUT;
+		if (r < 0)
+			goto err_free;
+
 		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
 		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
 		job->fence = dma_fence_get(f);
 		job->fence = dma_fence_get(f);
 		if (r)
 		if (r)
@@ -1021,17 +1023,23 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 
 
 		amdgpu_job_free(job);
 		amdgpu_job_free(job);
 	} else {
 	} else {
+		r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
+				     AMDGPU_FENCE_OWNER_UNDEFINED, false);
+		if (r)
+			goto err_free;
+
 		r = amdgpu_job_submit(job, ring, &adev->uvd.entity,
 		r = amdgpu_job_submit(job, ring, &adev->uvd.entity,
 				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
 				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
 		if (r)
 		if (r)
 			goto err_free;
 			goto err_free;
 	}
 	}
 
 
-	ttm_eu_fence_buffer_objects(&ticket, &head, f);
+	amdgpu_bo_fence(bo, f, false);
+	amdgpu_bo_unreserve(bo);
+	amdgpu_bo_unref(&bo);
 
 
 	if (fence)
 	if (fence)
 		*fence = dma_fence_get(f);
 		*fence = dma_fence_get(f);
-	amdgpu_bo_unref(&bo);
 	dma_fence_put(f);
 	dma_fence_put(f);
 
 
 	return 0;
 	return 0;
@@ -1040,7 +1048,8 @@ err_free:
 	amdgpu_job_free(job);
 	amdgpu_job_free(job);
 
 
 err:
 err:
-	ttm_eu_backoff_reservation(&ticket, &head);
+	amdgpu_bo_unreserve(bo);
+	amdgpu_bo_unref(&bo);
 	return r;
 	return r;
 }
 }
 
 
@@ -1051,31 +1060,16 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 			      struct dma_fence **fence)
 			      struct dma_fence **fence)
 {
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_bo *bo;
+	struct amdgpu_bo *bo = NULL;
 	uint32_t *msg;
 	uint32_t *msg;
 	int r, i;
 	int r, i;
 
 
-	r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
-			     AMDGPU_GEM_DOMAIN_VRAM,
-			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-			     NULL, NULL, 0, &bo);
+	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM,
+				      &bo, NULL, (void **)&msg);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
-	r = amdgpu_bo_reserve(bo, false);
-	if (r) {
-		amdgpu_bo_unref(&bo);
-		return r;
-	}
-
-	r = amdgpu_bo_kmap(bo, (void **)&msg);
-	if (r) {
-		amdgpu_bo_unreserve(bo);
-		amdgpu_bo_unref(&bo);
-		return r;
-	}
-
 	/* stitch together an UVD create msg */
 	/* stitch together an UVD create msg */
 	msg[0] = cpu_to_le32(0x00000de4);
 	msg[0] = cpu_to_le32(0x00000de4);
 	msg[1] = cpu_to_le32(0x00000000);
 	msg[1] = cpu_to_le32(0x00000000);
@@ -1091,9 +1085,6 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 	for (i = 11; i < 1024; ++i)
 	for (i = 11; i < 1024; ++i)
 		msg[i] = cpu_to_le32(0x0);
 		msg[i] = cpu_to_le32(0x0);
 
 
-	amdgpu_bo_kunmap(bo);
-	amdgpu_bo_unreserve(bo);
-
 	return amdgpu_uvd_send_msg(ring, bo, true, fence);
 	return amdgpu_uvd_send_msg(ring, bo, true, fence);
 }
 }
 
 
@@ -1101,31 +1092,16 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 			       bool direct, struct dma_fence **fence)
 			       bool direct, struct dma_fence **fence)
 {
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_bo *bo;
+	struct amdgpu_bo *bo = NULL;
 	uint32_t *msg;
 	uint32_t *msg;
 	int r, i;
 	int r, i;
 
 
-	r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
-			     AMDGPU_GEM_DOMAIN_VRAM,
-			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-			     NULL, NULL, 0, &bo);
+	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM,
+				      &bo, NULL, (void **)&msg);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
-	r = amdgpu_bo_reserve(bo, false);
-	if (r) {
-		amdgpu_bo_unref(&bo);
-		return r;
-	}
-
-	r = amdgpu_bo_kmap(bo, (void **)&msg);
-	if (r) {
-		amdgpu_bo_unreserve(bo);
-		amdgpu_bo_unref(&bo);
-		return r;
-	}
-
 	/* stitch together an UVD destroy msg */
 	/* stitch together an UVD destroy msg */
 	msg[0] = cpu_to_le32(0x00000de4);
 	msg[0] = cpu_to_le32(0x00000de4);
 	msg[1] = cpu_to_le32(0x00000002);
 	msg[1] = cpu_to_le32(0x00000002);
@@ -1134,9 +1110,6 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	for (i = 4; i < 1024; ++i)
 	for (i = 4; i < 1024; ++i)
 		msg[i] = cpu_to_le32(0x0);
 		msg[i] = cpu_to_le32(0x0);
 
 
-	amdgpu_bo_kunmap(bo);
-	amdgpu_bo_unreserve(bo);
-
 	return amdgpu_uvd_send_msg(ring, bo, direct, fence);
 	return amdgpu_uvd_send_msg(ring, bo, direct, fence);
 }
 }
 
 
@@ -1146,9 +1119,6 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 		container_of(work, struct amdgpu_device, uvd.idle_work.work);
 		container_of(work, struct amdgpu_device, uvd.idle_work.work);
 	unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
 	unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
 
 
-	if (amdgpu_sriov_vf(adev))
-		return;
-
 	if (fences == 0) {
 	if (fences == 0) {
 		if (adev->pm.dpm_enabled) {
 		if (adev->pm.dpm_enabled) {
 			amdgpu_dpm_enable_uvd(adev, false);
 			amdgpu_dpm_enable_uvd(adev, false);
@@ -1168,11 +1138,12 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
 void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
 {
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_device *adev = ring->adev;
-	bool set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
+	bool set_clocks;
 
 
 	if (amdgpu_sriov_vf(adev))
 	if (amdgpu_sriov_vf(adev))
 		return;
 		return;
 
 
+	set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
 	if (set_clocks) {
 	if (set_clocks) {
 		if (adev->pm.dpm_enabled) {
 		if (adev->pm.dpm_enabled) {
 			amdgpu_dpm_enable_uvd(adev, true);
 			amdgpu_dpm_enable_uvd(adev, true);
@@ -1188,7 +1159,8 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
 
 
 void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
 void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
 {
 {
-	schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
+	if (!amdgpu_sriov_vf(ring->adev))
+		schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
 }
 }
 
 
 /**
 /**

+ 2 - 4
drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c

@@ -300,9 +300,6 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work)
 		container_of(work, struct amdgpu_device, vce.idle_work.work);
 		container_of(work, struct amdgpu_device, vce.idle_work.work);
 	unsigned i, count = 0;
 	unsigned i, count = 0;
 
 
-	if (amdgpu_sriov_vf(adev))
-		return;
-
 	for (i = 0; i < adev->vce.num_rings; i++)
 	for (i = 0; i < adev->vce.num_rings; i++)
 		count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
 		count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
 
 
@@ -362,7 +359,8 @@ void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
  */
  */
 void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring)
 void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring)
 {
 {
-	schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
+	if (!amdgpu_sriov_vf(ring->adev))
+		schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
 }
 }
 
 
 /**
 /**

+ 2 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h

@@ -30,6 +30,8 @@
 #define AMDGPU_VCE_HARVEST_VCE0 (1 << 0)
 #define AMDGPU_VCE_HARVEST_VCE0 (1 << 0)
 #define AMDGPU_VCE_HARVEST_VCE1 (1 << 1)
 #define AMDGPU_VCE_HARVEST_VCE1 (1 << 1)
 
 
+#define AMDGPU_VCE_FW_53_45	((53 << 24) | (45 << 16))
+
 struct amdgpu_vce {
 struct amdgpu_vce {
 	struct amdgpu_bo	*vcpu_bo;
 	struct amdgpu_bo	*vcpu_bo;
 	uint64_t		gpu_addr;
 	uint64_t		gpu_addr;

+ 18 - 69
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c

@@ -270,34 +270,17 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
 	return r;
 	return r;
 }
 }
 
 
-static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
-			       bool direct, struct dma_fence **fence)
+static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
+				   struct amdgpu_bo *bo, bool direct,
+				   struct dma_fence **fence)
 {
 {
-	struct ttm_operation_ctx ctx = { true, false };
-	struct ttm_validate_buffer tv;
-	struct ww_acquire_ctx ticket;
-	struct list_head head;
+	struct amdgpu_device *adev = ring->adev;
+	struct dma_fence *f = NULL;
 	struct amdgpu_job *job;
 	struct amdgpu_job *job;
 	struct amdgpu_ib *ib;
 	struct amdgpu_ib *ib;
-	struct dma_fence *f = NULL;
-	struct amdgpu_device *adev = ring->adev;
 	uint64_t addr;
 	uint64_t addr;
 	int i, r;
 	int i, r;
 
 
-	memset(&tv, 0, sizeof(tv));
-	tv.bo = &bo->tbo;
-
-	INIT_LIST_HEAD(&head);
-	list_add(&tv.head, &head);
-
-	r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL);
-	if (r)
-		return r;
-
-	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-	if (r)
-		goto err;
-
 	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
 	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
 	if (r)
 	if (r)
 		goto err;
 		goto err;
@@ -330,11 +313,12 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *b
 			goto err_free;
 			goto err_free;
 	}
 	}
 
 
-	ttm_eu_fence_buffer_objects(&ticket, &head, f);
+	amdgpu_bo_fence(bo, f, false);
+	amdgpu_bo_unreserve(bo);
+	amdgpu_bo_unref(&bo);
 
 
 	if (fence)
 	if (fence)
 		*fence = dma_fence_get(f);
 		*fence = dma_fence_get(f);
-	amdgpu_bo_unref(&bo);
 	dma_fence_put(f);
 	dma_fence_put(f);
 
 
 	return 0;
 	return 0;
@@ -343,7 +327,8 @@ err_free:
 	amdgpu_job_free(job);
 	amdgpu_job_free(job);
 
 
 err:
 err:
-	ttm_eu_backoff_reservation(&ticket, &head);
+	amdgpu_bo_unreserve(bo);
+	amdgpu_bo_unref(&bo);
 	return r;
 	return r;
 }
 }
 
 
@@ -351,31 +336,16 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
 			      struct dma_fence **fence)
 			      struct dma_fence **fence)
 {
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_bo *bo;
+	struct amdgpu_bo *bo = NULL;
 	uint32_t *msg;
 	uint32_t *msg;
 	int r, i;
 	int r, i;
 
 
-	r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
-			     AMDGPU_GEM_DOMAIN_VRAM,
-			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-			     NULL, NULL, 0, &bo);
+	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM,
+				      &bo, NULL, (void **)&msg);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
-	r = amdgpu_bo_reserve(bo, false);
-	if (r) {
-		amdgpu_bo_unref(&bo);
-		return r;
-	}
-
-	r = amdgpu_bo_kmap(bo, (void **)&msg);
-	if (r) {
-		amdgpu_bo_unreserve(bo);
-		amdgpu_bo_unref(&bo);
-		return r;
-	}
-
 	msg[0] = cpu_to_le32(0x00000028);
 	msg[0] = cpu_to_le32(0x00000028);
 	msg[1] = cpu_to_le32(0x00000038);
 	msg[1] = cpu_to_le32(0x00000038);
 	msg[2] = cpu_to_le32(0x00000001);
 	msg[2] = cpu_to_le32(0x00000001);
@@ -393,9 +363,6 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
 	for (i = 14; i < 1024; ++i)
 	for (i = 14; i < 1024; ++i)
 		msg[i] = cpu_to_le32(0x0);
 		msg[i] = cpu_to_le32(0x0);
 
 
-	amdgpu_bo_kunmap(bo);
-	amdgpu_bo_unreserve(bo);
-
 	return amdgpu_vcn_dec_send_msg(ring, bo, true, fence);
 	return amdgpu_vcn_dec_send_msg(ring, bo, true, fence);
 }
 }
 
 
@@ -403,31 +370,16 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
 			       bool direct, struct dma_fence **fence)
 			       bool direct, struct dma_fence **fence)
 {
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_bo *bo;
+	struct amdgpu_bo *bo = NULL;
 	uint32_t *msg;
 	uint32_t *msg;
 	int r, i;
 	int r, i;
 
 
-	r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
-			     AMDGPU_GEM_DOMAIN_VRAM,
-			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-			     NULL, NULL, 0, &bo);
+	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM,
+				      &bo, NULL, (void **)&msg);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
-	r = amdgpu_bo_reserve(bo, false);
-	if (r) {
-		amdgpu_bo_unref(&bo);
-		return r;
-	}
-
-	r = amdgpu_bo_kmap(bo, (void **)&msg);
-	if (r) {
-		amdgpu_bo_unreserve(bo);
-		amdgpu_bo_unref(&bo);
-		return r;
-	}
-
 	msg[0] = cpu_to_le32(0x00000028);
 	msg[0] = cpu_to_le32(0x00000028);
 	msg[1] = cpu_to_le32(0x00000018);
 	msg[1] = cpu_to_le32(0x00000018);
 	msg[2] = cpu_to_le32(0x00000000);
 	msg[2] = cpu_to_le32(0x00000000);
@@ -437,9 +389,6 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
 	for (i = 6; i < 1024; ++i)
 	for (i = 6; i < 1024; ++i)
 		msg[i] = cpu_to_le32(0x0);
 		msg[i] = cpu_to_le32(0x0);
 
 
-	amdgpu_bo_kunmap(bo);
-	amdgpu_bo_unreserve(bo);
-
 	return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence);
 	return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence);
 }
 }
 
 

+ 78 - 17
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c

@@ -22,7 +22,21 @@
  */
  */
 
 
 #include "amdgpu.h"
 #include "amdgpu.h"
-#define MAX_KIQ_REG_WAIT	100000000 /* in usecs */
+#define MAX_KIQ_REG_WAIT	5000 /* in usecs, 5ms */
+#define MAX_KIQ_REG_BAILOUT_INTERVAL	5 /* in msecs, 5ms */
+#define MAX_KIQ_REG_TRY 20
+
+uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
+{
+	uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
+
+	addr -= AMDGPU_VA_RESERVED_SIZE;
+
+	if (addr >= AMDGPU_VA_HOLE_START)
+		addr |= AMDGPU_VA_HOLE_END;
+
+	return addr;
+}
 
 
 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
 {
 {
@@ -55,14 +69,14 @@ void amdgpu_free_static_csa(struct amdgpu_device *adev) {
 
 
 /*
 /*
  * amdgpu_map_static_csa should be called during amdgpu_vm_init
  * amdgpu_map_static_csa should be called during amdgpu_vm_init
- * it maps virtual address "AMDGPU_VA_RESERVED_SIZE - AMDGPU_CSA_SIZE"
- * to this VM, and each command submission of GFX should use this virtual
- * address within META_DATA init package to support SRIOV gfx preemption.
+ * it maps virtual address amdgpu_csa_vaddr() to this VM, and each command
+ * submission of GFX should use this virtual address within META_DATA init
+ * package to support SRIOV gfx preemption.
  */
  */
-
 int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			  struct amdgpu_bo_va **bo_va)
 			  struct amdgpu_bo_va **bo_va)
 {
 {
+	uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_VA_HOLE_MASK;
 	struct ww_acquire_ctx ticket;
 	struct ww_acquire_ctx ticket;
 	struct list_head list;
 	struct list_head list;
 	struct amdgpu_bo_list_entry pd;
 	struct amdgpu_bo_list_entry pd;
@@ -90,7 +104,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
 
 
-	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, AMDGPU_CSA_VADDR,
+	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
 				AMDGPU_CSA_SIZE);
 				AMDGPU_CSA_SIZE);
 	if (r) {
 	if (r) {
 		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
 		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
@@ -99,7 +113,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		return r;
 		return r;
 	}
 	}
 
 
-	r = amdgpu_vm_bo_map(adev, *bo_va, AMDGPU_CSA_VADDR, 0, AMDGPU_CSA_SIZE,
+	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, AMDGPU_CSA_SIZE,
 			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
 			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
 			     AMDGPU_PTE_EXECUTABLE);
 			     AMDGPU_PTE_EXECUTABLE);
 
 
@@ -125,9 +139,9 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 
 
 uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
 uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
 {
 {
-	signed long r;
+	signed long r, cnt = 0;
 	unsigned long flags;
 	unsigned long flags;
-	uint32_t val, seq;
+	uint32_t seq;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 	struct amdgpu_ring *ring = &kiq->ring;
 	struct amdgpu_ring *ring = &kiq->ring;
 
 
@@ -141,18 +155,39 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
 
 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
-	if (r < 1) {
-		DRM_ERROR("wait for kiq fence error: %ld\n", r);
-		return ~0;
+
+	/* don't wait anymore for gpu reset case because this way may
+	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+	 * never return if we keep waiting in virt_kiq_rreg, which cause
+	 * gpu_recover() hang there.
+	 *
+	 * also don't wait anymore for IRQ context
+	 * */
+	if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
+		goto failed_kiq_read;
+
+	if (in_interrupt())
+		might_sleep();
+
+	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
 	}
 	}
-	val = adev->wb.wb[adev->virt.reg_val_offs];
 
 
-	return val;
+	if (cnt > MAX_KIQ_REG_TRY)
+		goto failed_kiq_read;
+
+	return adev->wb.wb[adev->virt.reg_val_offs];
+
+failed_kiq_read:
+	pr_err("failed to read reg:%x\n", reg);
+	return ~0;
 }
 }
 
 
 void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 {
 {
-	signed long r;
+	signed long r, cnt = 0;
 	unsigned long flags;
 	unsigned long flags;
 	uint32_t seq;
 	uint32_t seq;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
@@ -168,8 +203,34 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
 
 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
-	if (r < 1)
-		DRM_ERROR("wait for kiq fence error: %ld\n", r);
+
+	/* don't wait anymore for gpu reset case because this way may
+	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+	 * never return if we keep waiting in virt_kiq_rreg, which cause
+	 * gpu_recover() hang there.
+	 *
+	 * also don't wait anymore for IRQ context
+	 * */
+	if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
+		goto failed_kiq_write;
+
+	if (in_interrupt())
+		might_sleep();
+
+	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+
+		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+	}
+
+	if (cnt > MAX_KIQ_REG_TRY)
+		goto failed_kiq_write;
+
+	return;
+
+failed_kiq_write:
+	pr_err("failed to write reg:%x\n", reg);
 }
 }
 
 
 /**
 /**

+ 3 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h

@@ -251,8 +251,7 @@ struct amdgpu_virt {
 	uint32_t gim_feature;
 	uint32_t gim_feature;
 };
 };
 
 
-#define AMDGPU_CSA_SIZE    (8 * 1024)
-#define AMDGPU_CSA_VADDR   (AMDGPU_VA_RESERVED_SIZE - AMDGPU_CSA_SIZE)
+#define AMDGPU_CSA_SIZE		(8 * 1024)
 
 
 #define amdgpu_sriov_enabled(adev) \
 #define amdgpu_sriov_enabled(adev) \
 ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
 ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
@@ -279,6 +278,8 @@ static inline bool is_virtual_machine(void)
 }
 }
 
 
 struct amdgpu_vm;
 struct amdgpu_vm;
+
+uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev);
 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 int amdgpu_allocate_static_csa(struct amdgpu_device *adev);
 int amdgpu_allocate_static_csa(struct amdgpu_device *adev);
 int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,

+ 212 - 114
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

@@ -75,7 +75,8 @@ struct amdgpu_pte_update_params {
 	/* indirect buffer to fill with commands */
 	/* indirect buffer to fill with commands */
 	struct amdgpu_ib *ib;
 	struct amdgpu_ib *ib;
 	/* Function which actually does the update */
 	/* Function which actually does the update */
-	void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe,
+	void (*func)(struct amdgpu_pte_update_params *params,
+		     struct amdgpu_bo *bo, uint64_t pe,
 		     uint64_t addr, unsigned count, uint32_t incr,
 		     uint64_t addr, unsigned count, uint32_t incr,
 		     uint64_t flags);
 		     uint64_t flags);
 	/* The next two are used during VM update by CPU
 	/* The next two are used during VM update by CPU
@@ -256,6 +257,104 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
 	return ready;
 	return ready;
 }
 }
 
 
+/**
+ * amdgpu_vm_clear_bo - initially clear the PDs/PTs
+ *
+ * @adev: amdgpu_device pointer
+ * @bo: BO to clear
+ * @level: level this BO is at
+ *
+ * Root PD needs to be reserved when calling this.
+ */
+static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
+			      struct amdgpu_vm *vm, struct amdgpu_bo *bo,
+			      unsigned level, bool pte_support_ats)
+{
+	struct ttm_operation_ctx ctx = { true, false };
+	struct dma_fence *fence = NULL;
+	unsigned entries, ats_entries;
+	struct amdgpu_ring *ring;
+	struct amdgpu_job *job;
+	uint64_t addr;
+	int r;
+
+	addr = amdgpu_bo_gpu_offset(bo);
+	entries = amdgpu_bo_size(bo) / 8;
+
+	if (pte_support_ats) {
+		if (level == adev->vm_manager.root_level) {
+			ats_entries = amdgpu_vm_level_shift(adev, level);
+			ats_entries += AMDGPU_GPU_PAGE_SHIFT;
+			ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
+			ats_entries = min(ats_entries, entries);
+			entries -= ats_entries;
+		} else {
+			ats_entries = entries;
+			entries = 0;
+		}
+	} else {
+		ats_entries = 0;
+	}
+
+	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+
+	r = reservation_object_reserve_shared(bo->tbo.resv);
+	if (r)
+		return r;
+
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (r)
+		goto error;
+
+	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
+	if (r)
+		goto error;
+
+	if (ats_entries) {
+		uint64_t ats_value;
+
+		ats_value = AMDGPU_PTE_DEFAULT_ATC;
+		if (level != AMDGPU_VM_PTB)
+			ats_value |= AMDGPU_PDE_PTE;
+
+		amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
+				      ats_entries, 0, ats_value);
+		addr += ats_entries * 8;
+	}
+
+	if (entries)
+		amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
+				      entries, 0, 0);
+
+	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+
+	WARN_ON(job->ibs[0].length_dw > 64);
+	r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
+			     AMDGPU_FENCE_OWNER_UNDEFINED, false);
+	if (r)
+		goto error_free;
+
+	r = amdgpu_job_submit(job, ring, &vm->entity,
+			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
+	if (r)
+		goto error_free;
+
+	amdgpu_bo_fence(bo, fence, true);
+	dma_fence_put(fence);
+
+	if (bo->shadow)
+		return amdgpu_vm_clear_bo(adev, vm, bo->shadow,
+					  level, pte_support_ats);
+
+	return 0;
+
+error_free:
+	amdgpu_job_free(job);
+
+error:
+	return r;
+}
+
 /**
 /**
  * amdgpu_vm_alloc_levels - allocate the PD/PT levels
  * amdgpu_vm_alloc_levels - allocate the PD/PT levels
  *
  *
@@ -270,13 +369,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 				  struct amdgpu_vm *vm,
 				  struct amdgpu_vm *vm,
 				  struct amdgpu_vm_pt *parent,
 				  struct amdgpu_vm_pt *parent,
 				  uint64_t saddr, uint64_t eaddr,
 				  uint64_t saddr, uint64_t eaddr,
-				  unsigned level)
+				  unsigned level, bool ats)
 {
 {
 	unsigned shift = amdgpu_vm_level_shift(adev, level);
 	unsigned shift = amdgpu_vm_level_shift(adev, level);
 	unsigned pt_idx, from, to;
 	unsigned pt_idx, from, to;
-	int r;
 	u64 flags;
 	u64 flags;
-	uint64_t init_value = 0;
+	int r;
 
 
 	if (!parent->entries) {
 	if (!parent->entries) {
 		unsigned num_entries = amdgpu_vm_num_entries(adev, level);
 		unsigned num_entries = amdgpu_vm_num_entries(adev, level);
@@ -299,21 +397,13 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 	saddr = saddr & ((1 << shift) - 1);
 	saddr = saddr & ((1 << shift) - 1);
 	eaddr = eaddr & ((1 << shift) - 1);
 	eaddr = eaddr & ((1 << shift) - 1);
 
 
-	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
-			AMDGPU_GEM_CREATE_VRAM_CLEARED;
+	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
 	if (vm->use_cpu_for_update)
 	if (vm->use_cpu_for_update)
 		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 	else
 	else
 		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
 		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
 				AMDGPU_GEM_CREATE_SHADOW);
 				AMDGPU_GEM_CREATE_SHADOW);
 
 
-	if (vm->pte_support_ats) {
-		init_value = AMDGPU_PTE_DEFAULT_ATC;
-		if (level != AMDGPU_VM_PTB)
-			init_value |= AMDGPU_PDE_PTE;
-
-	}
-
 	/* walk over the address space and allocate the page tables */
 	/* walk over the address space and allocate the page tables */
 	for (pt_idx = from; pt_idx <= to; ++pt_idx) {
 	for (pt_idx = from; pt_idx <= to; ++pt_idx) {
 		struct reservation_object *resv = vm->root.base.bo->tbo.resv;
 		struct reservation_object *resv = vm->root.base.bo->tbo.resv;
@@ -323,16 +413,23 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 		if (!entry->base.bo) {
 		if (!entry->base.bo) {
 			r = amdgpu_bo_create(adev,
 			r = amdgpu_bo_create(adev,
 					     amdgpu_vm_bo_size(adev, level),
 					     amdgpu_vm_bo_size(adev, level),
-					     AMDGPU_GPU_PAGE_SIZE, true,
-					     AMDGPU_GEM_DOMAIN_VRAM,
-					     flags,
-					     NULL, resv, init_value, &pt);
+					     AMDGPU_GPU_PAGE_SIZE,
+					     AMDGPU_GEM_DOMAIN_VRAM, flags,
+					     ttm_bo_type_kernel, resv, &pt);
 			if (r)
 			if (r)
 				return r;
 				return r;
 
 
+			r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats);
+			if (r) {
+				amdgpu_bo_unref(&pt->shadow);
+				amdgpu_bo_unref(&pt);
+				return r;
+			}
+
 			if (vm->use_cpu_for_update) {
 			if (vm->use_cpu_for_update) {
 				r = amdgpu_bo_kmap(pt, NULL);
 				r = amdgpu_bo_kmap(pt, NULL);
 				if (r) {
 				if (r) {
+					amdgpu_bo_unref(&pt->shadow);
 					amdgpu_bo_unref(&pt);
 					amdgpu_bo_unref(&pt);
 					return r;
 					return r;
 				}
 				}
@@ -356,7 +453,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 			uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
 			uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
 				((1 << shift) - 1);
 				((1 << shift) - 1);
 			r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
 			r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
-						   sub_eaddr, level);
+						   sub_eaddr, level, ats);
 			if (r)
 			if (r)
 				return r;
 				return r;
 		}
 		}
@@ -379,26 +476,29 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 			struct amdgpu_vm *vm,
 			struct amdgpu_vm *vm,
 			uint64_t saddr, uint64_t size)
 			uint64_t saddr, uint64_t size)
 {
 {
-	uint64_t last_pfn;
 	uint64_t eaddr;
 	uint64_t eaddr;
+	bool ats = false;
 
 
 	/* validate the parameters */
 	/* validate the parameters */
 	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
 	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
 		return -EINVAL;
 		return -EINVAL;
 
 
 	eaddr = saddr + size - 1;
 	eaddr = saddr + size - 1;
-	last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE;
-	if (last_pfn >= adev->vm_manager.max_pfn) {
-		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
-			last_pfn, adev->vm_manager.max_pfn);
-		return -EINVAL;
-	}
+
+	if (vm->pte_support_ats)
+		ats = saddr < AMDGPU_VA_HOLE_START;
 
 
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
 
 
+	if (eaddr >= adev->vm_manager.max_pfn) {
+		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
+			eaddr, adev->vm_manager.max_pfn);
+		return -EINVAL;
+	}
+
 	return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
 	return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
-				      adev->vm_manager.root_level);
+				      adev->vm_manager.root_level, ats);
 }
 }
 
 
 /**
 /**
@@ -465,7 +565,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 
 
 static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
 static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
 {
 {
-	return (adev->mc.real_vram_size == adev->mc.visible_vram_size);
+	return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size);
 }
 }
 
 
 /**
 /**
@@ -491,14 +591,24 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 		id->oa_base != job->oa_base ||
 		id->oa_base != job->oa_base ||
 		id->oa_size != job->oa_size);
 		id->oa_size != job->oa_size);
 	bool vm_flush_needed = job->vm_needs_flush;
 	bool vm_flush_needed = job->vm_needs_flush;
+	bool pasid_mapping_needed = id->pasid != job->pasid ||
+		!id->pasid_mapping ||
+		!dma_fence_is_signaled(id->pasid_mapping);
+	struct dma_fence *fence = NULL;
 	unsigned patch_offset = 0;
 	unsigned patch_offset = 0;
 	int r;
 	int r;
 
 
 	if (amdgpu_vmid_had_gpu_reset(adev, id)) {
 	if (amdgpu_vmid_had_gpu_reset(adev, id)) {
 		gds_switch_needed = true;
 		gds_switch_needed = true;
 		vm_flush_needed = true;
 		vm_flush_needed = true;
+		pasid_mapping_needed = true;
 	}
 	}
 
 
+	gds_switch_needed &= !!ring->funcs->emit_gds_switch;
+	vm_flush_needed &= !!ring->funcs->emit_vm_flush;
+	pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
+		ring->funcs->emit_wreg;
+
 	if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
 	if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
 		return 0;
 		return 0;
 
 
@@ -508,23 +618,36 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 	if (need_pipe_sync)
 	if (need_pipe_sync)
 		amdgpu_ring_emit_pipeline_sync(ring);
 		amdgpu_ring_emit_pipeline_sync(ring);
 
 
-	if (ring->funcs->emit_vm_flush && vm_flush_needed) {
-		struct dma_fence *fence;
-
+	if (vm_flush_needed) {
 		trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
 		trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
 		amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
 		amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
+	}
 
 
+	if (pasid_mapping_needed)
+		amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
+
+	if (vm_flush_needed || pasid_mapping_needed) {
 		r = amdgpu_fence_emit(ring, &fence);
 		r = amdgpu_fence_emit(ring, &fence);
 		if (r)
 		if (r)
 			return r;
 			return r;
+	}
 
 
+	if (vm_flush_needed) {
 		mutex_lock(&id_mgr->lock);
 		mutex_lock(&id_mgr->lock);
 		dma_fence_put(id->last_flush);
 		dma_fence_put(id->last_flush);
-		id->last_flush = fence;
-		id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
+		id->last_flush = dma_fence_get(fence);
+		id->current_gpu_reset_count =
+			atomic_read(&adev->gpu_reset_counter);
 		mutex_unlock(&id_mgr->lock);
 		mutex_unlock(&id_mgr->lock);
 	}
 	}
 
 
+	if (pasid_mapping_needed) {
+		id->pasid = job->pasid;
+		dma_fence_put(id->pasid_mapping);
+		id->pasid_mapping = dma_fence_get(fence);
+	}
+	dma_fence_put(fence);
+
 	if (ring->funcs->emit_gds_switch && gds_switch_needed) {
 	if (ring->funcs->emit_gds_switch && gds_switch_needed) {
 		id->gds_base = job->gds_base;
 		id->gds_base = job->gds_base;
 		id->gds_size = job->gds_size;
 		id->gds_size = job->gds_size;
@@ -578,6 +701,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
  * amdgpu_vm_do_set_ptes - helper to call the right asic function
  * amdgpu_vm_do_set_ptes - helper to call the right asic function
  *
  *
  * @params: see amdgpu_pte_update_params definition
  * @params: see amdgpu_pte_update_params definition
+ * @bo: PD/PT to update
  * @pe: addr of the page entry
  * @pe: addr of the page entry
  * @addr: dst addr to write into pe
  * @addr: dst addr to write into pe
  * @count: number of page entries to update
  * @count: number of page entries to update
@@ -588,10 +712,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
  * to setup the page table using the DMA.
  * to setup the page table using the DMA.
  */
  */
 static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
 static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
+				  struct amdgpu_bo *bo,
 				  uint64_t pe, uint64_t addr,
 				  uint64_t pe, uint64_t addr,
 				  unsigned count, uint32_t incr,
 				  unsigned count, uint32_t incr,
 				  uint64_t flags)
 				  uint64_t flags)
 {
 {
+	pe += amdgpu_bo_gpu_offset(bo);
 	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
 	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
 
 
 	if (count < 3) {
 	if (count < 3) {
@@ -608,6 +734,7 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
  * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
  * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
  *
  *
  * @params: see amdgpu_pte_update_params definition
  * @params: see amdgpu_pte_update_params definition
+ * @bo: PD/PT to update
  * @pe: addr of the page entry
  * @pe: addr of the page entry
  * @addr: dst addr to write into pe
  * @addr: dst addr to write into pe
  * @count: number of page entries to update
  * @count: number of page entries to update
@@ -617,13 +744,14 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
  * Traces the parameters and calls the DMA function to copy the PTEs.
  * Traces the parameters and calls the DMA function to copy the PTEs.
  */
  */
 static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
 static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
+				   struct amdgpu_bo *bo,
 				   uint64_t pe, uint64_t addr,
 				   uint64_t pe, uint64_t addr,
 				   unsigned count, uint32_t incr,
 				   unsigned count, uint32_t incr,
 				   uint64_t flags)
 				   uint64_t flags)
 {
 {
 	uint64_t src = (params->src + (addr >> 12) * 8);
 	uint64_t src = (params->src + (addr >> 12) * 8);
 
 
-
+	pe += amdgpu_bo_gpu_offset(bo);
 	trace_amdgpu_vm_copy_ptes(pe, src, count);
 	trace_amdgpu_vm_copy_ptes(pe, src, count);
 
 
 	amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count);
 	amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count);
@@ -657,6 +785,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
  * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
  * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
  *
  *
  * @params: see amdgpu_pte_update_params definition
  * @params: see amdgpu_pte_update_params definition
+ * @bo: PD/PT to update
  * @pe: kmap addr of the page entry
  * @pe: kmap addr of the page entry
  * @addr: dst addr to write into pe
  * @addr: dst addr to write into pe
  * @count: number of page entries to update
  * @count: number of page entries to update
@@ -666,6 +795,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
  * Write count number of PT/PD entries directly.
  * Write count number of PT/PD entries directly.
  */
  */
 static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
 static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
+				   struct amdgpu_bo *bo,
 				   uint64_t pe, uint64_t addr,
 				   uint64_t pe, uint64_t addr,
 				   unsigned count, uint32_t incr,
 				   unsigned count, uint32_t incr,
 				   uint64_t flags)
 				   uint64_t flags)
@@ -673,14 +803,16 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
 	unsigned int i;
 	unsigned int i;
 	uint64_t value;
 	uint64_t value;
 
 
+	pe += (unsigned long)amdgpu_bo_kptr(bo);
+
 	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
 	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
 
 
 	for (i = 0; i < count; i++) {
 	for (i = 0; i < count; i++) {
 		value = params->pages_addr ?
 		value = params->pages_addr ?
 			amdgpu_vm_map_gart(params->pages_addr, addr) :
 			amdgpu_vm_map_gart(params->pages_addr, addr) :
 			addr;
 			addr;
-		amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
-					i, value, flags);
+		amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
+				       i, value, flags);
 		addr += incr;
 		addr += incr;
 	}
 	}
 }
 }
@@ -714,8 +846,7 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
 				 struct amdgpu_vm_pt *parent,
 				 struct amdgpu_vm_pt *parent,
 				 struct amdgpu_vm_pt *entry)
 				 struct amdgpu_vm_pt *entry)
 {
 {
-	struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo;
-	uint64_t pd_addr, shadow_addr = 0;
+	struct amdgpu_bo *bo = parent->base.bo, *pbo;
 	uint64_t pde, pt, flags;
 	uint64_t pde, pt, flags;
 	unsigned level;
 	unsigned level;
 
 
@@ -723,29 +854,17 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
 	if (entry->huge)
 	if (entry->huge)
 		return;
 		return;
 
 
-	if (vm->use_cpu_for_update) {
-		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
-	} else {
-		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
-		shadow = parent->base.bo->shadow;
-		if (shadow)
-			shadow_addr = amdgpu_bo_gpu_offset(shadow);
-	}
-
-	for (level = 0, pbo = parent->base.bo->parent; pbo; ++level)
+	for (level = 0, pbo = bo->parent; pbo; ++level)
 		pbo = pbo->parent;
 		pbo = pbo->parent;
 
 
 	level += params->adev->vm_manager.root_level;
 	level += params->adev->vm_manager.root_level;
-	pt = amdgpu_bo_gpu_offset(bo);
+	pt = amdgpu_bo_gpu_offset(entry->base.bo);
 	flags = AMDGPU_PTE_VALID;
 	flags = AMDGPU_PTE_VALID;
-	amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags);
-	if (shadow) {
-		pde = shadow_addr + (entry - parent->entries) * 8;
-		params->func(params, pde, pt, 1, 0, flags);
-	}
-
-	pde = pd_addr + (entry - parent->entries) * 8;
-	params->func(params, pde, pt, 1, 0, flags);
+	amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags);
+	pde = (entry - parent->entries) * 8;
+	if (bo->shadow)
+		params->func(params, bo->shadow, pde, pt, 1, 0, flags);
+	params->func(params, bo, pde, pt, 1, 0, flags);
 }
 }
 
 
 /*
 /*
@@ -856,7 +975,7 @@ restart:
 	if (vm->use_cpu_for_update) {
 	if (vm->use_cpu_for_update) {
 		/* Flush HDP */
 		/* Flush HDP */
 		mb();
 		mb();
-		amdgpu_gart_flush_gpu_tlb(adev, 0);
+		amdgpu_asic_flush_hdp(adev, NULL);
 	} else if (params.ib->length_dw == 0) {
 	} else if (params.ib->length_dw == 0) {
 		amdgpu_job_free(job);
 		amdgpu_job_free(job);
 	} else {
 	} else {
@@ -870,11 +989,6 @@ restart:
 		amdgpu_ring_pad_ib(ring, params.ib);
 		amdgpu_ring_pad_ib(ring, params.ib);
 		amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
 		amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
 				 AMDGPU_FENCE_OWNER_VM, false);
 				 AMDGPU_FENCE_OWNER_VM, false);
-		if (root->shadow)
-			amdgpu_sync_resv(adev, &job->sync,
-					 root->shadow->tbo.resv,
-					 AMDGPU_FENCE_OWNER_VM, false);
-
 		WARN_ON(params.ib->length_dw > ndw);
 		WARN_ON(params.ib->length_dw > ndw);
 		r = amdgpu_job_submit(job, ring, &vm->entity,
 		r = amdgpu_job_submit(job, ring, &vm->entity,
 				      AMDGPU_FENCE_OWNER_VM, &fence);
 				      AMDGPU_FENCE_OWNER_VM, &fence);
@@ -946,7 +1060,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 					unsigned nptes, uint64_t dst,
 					unsigned nptes, uint64_t dst,
 					uint64_t flags)
 					uint64_t flags)
 {
 {
-	uint64_t pd_addr, pde;
+	uint64_t pde;
 
 
 	/* In the case of a mixed PT the PDE must point to it*/
 	/* In the case of a mixed PT the PDE must point to it*/
 	if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
 	if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
@@ -967,21 +1081,12 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 	}
 	}
 
 
 	entry->huge = true;
 	entry->huge = true;
-	amdgpu_gart_get_vm_pde(p->adev, AMDGPU_VM_PDB0,
-			       &dst, &flags);
+	amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags);
 
 
-	if (p->func == amdgpu_vm_cpu_set_ptes) {
-		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
-	} else {
-		if (parent->base.bo->shadow) {
-			pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow);
-			pde = pd_addr + (entry - parent->entries) * 8;
-			p->func(p, pde, dst, 1, 0, flags);
-		}
-		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
-	}
-	pde = pd_addr + (entry - parent->entries) * 8;
-	p->func(p, pde, dst, 1, 0, flags);
+	pde = (entry - parent->entries) * 8;
+	if (parent->base.bo->shadow)
+		p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags);
+	p->func(p, parent->base.bo, pde, dst, 1, 0, flags);
 }
 }
 
 
 /**
 /**
@@ -1007,7 +1112,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	uint64_t addr, pe_start;
 	uint64_t addr, pe_start;
 	struct amdgpu_bo *pt;
 	struct amdgpu_bo *pt;
 	unsigned nptes;
 	unsigned nptes;
-	bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);
 
 
 	/* walk over the address space and update the page tables */
 	/* walk over the address space and update the page tables */
 	for (addr = start; addr < end; addr += nptes,
 	for (addr = start; addr < end; addr += nptes,
@@ -1030,20 +1134,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 			continue;
 			continue;
 
 
 		pt = entry->base.bo;
 		pt = entry->base.bo;
-		if (use_cpu_update) {
-			pe_start = (unsigned long)amdgpu_bo_kptr(pt);
-		} else {
-			if (pt->shadow) {
-				pe_start = amdgpu_bo_gpu_offset(pt->shadow);
-				pe_start += (addr & mask) * 8;
-				params->func(params, pe_start, dst, nptes,
-					     AMDGPU_GPU_PAGE_SIZE, flags);
-			}
-			pe_start = amdgpu_bo_gpu_offset(pt);
-		}
-
-		pe_start += (addr & mask) * 8;
-		params->func(params, pe_start, dst, nptes,
+		pe_start = (addr & mask) * 8;
+		if (pt->shadow)
+			params->func(params, pt->shadow, pe_start, dst, nptes,
+				     AMDGPU_GPU_PAGE_SIZE, flags);
+		params->func(params, pt, pe_start, dst, nptes,
 			     AMDGPU_GPU_PAGE_SIZE, flags);
 			     AMDGPU_GPU_PAGE_SIZE, flags);
 	}
 	}
 
 
@@ -1204,11 +1299,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 
 	} else {
 	} else {
 		/* set page commands needed */
 		/* set page commands needed */
-		ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
+		ndw += ncmds * 10;
 
 
 		/* extra commands for begin/end fragments */
 		/* extra commands for begin/end fragments */
-		ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw
-				* adev->vm_manager.fragment_size;
+		ndw += 2 * 10 * adev->vm_manager.fragment_size;
 
 
 		params.func = amdgpu_vm_do_set_ptes;
 		params.func = amdgpu_vm_do_set_ptes;
 	}
 	}
@@ -1457,7 +1551,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	if (vm->use_cpu_for_update) {
 	if (vm->use_cpu_for_update) {
 		/* Flush HDP */
 		/* Flush HDP */
 		mb();
 		mb();
-		amdgpu_gart_flush_gpu_tlb(adev, 0);
+		amdgpu_asic_flush_hdp(adev, NULL);
 	}
 	}
 
 
 	spin_lock(&vm->status_lock);
 	spin_lock(&vm->status_lock);
@@ -1485,7 +1579,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
 
 
 	spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
 	spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
 	enable = !!atomic_read(&adev->vm_manager.num_prt_users);
 	enable = !!atomic_read(&adev->vm_manager.num_prt_users);
-	adev->gart.gart_funcs->set_prt(adev, enable);
+	adev->gmc.gmc_funcs->set_prt(adev, enable);
 	spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
 	spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
 }
 }
 
 
@@ -1494,7 +1588,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
  */
  */
 static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
 static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
 {
 {
-	if (!adev->gart.gart_funcs->set_prt)
+	if (!adev->gmc.gmc_funcs->set_prt)
 		return;
 		return;
 
 
 	if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
 	if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
@@ -1529,7 +1623,7 @@ static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
 {
 {
 	struct amdgpu_prt_cb *cb;
 	struct amdgpu_prt_cb *cb;
 
 
-	if (!adev->gart.gart_funcs->set_prt)
+	if (!adev->gmc.gmc_funcs->set_prt)
 		return;
 		return;
 
 
 	cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
 	cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
@@ -1623,16 +1717,16 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			  struct dma_fence **fence)
 			  struct dma_fence **fence)
 {
 {
 	struct amdgpu_bo_va_mapping *mapping;
 	struct amdgpu_bo_va_mapping *mapping;
+	uint64_t init_pte_value = 0;
 	struct dma_fence *f = NULL;
 	struct dma_fence *f = NULL;
 	int r;
 	int r;
-	uint64_t init_pte_value = 0;
 
 
 	while (!list_empty(&vm->freed)) {
 	while (!list_empty(&vm->freed)) {
 		mapping = list_first_entry(&vm->freed,
 		mapping = list_first_entry(&vm->freed,
 			struct amdgpu_bo_va_mapping, list);
 			struct amdgpu_bo_va_mapping, list);
 		list_del(&mapping->list);
 		list_del(&mapping->list);
 
 
-		if (vm->pte_support_ats)
+		if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START)
 			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
 			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
 
 
 		r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
 		r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
@@ -2262,11 +2356,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 {
 {
 	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
 	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
 		AMDGPU_VM_PTE_COUNT(adev) * 8);
 		AMDGPU_VM_PTE_COUNT(adev) * 8);
-	uint64_t init_pde_value = 0, flags;
 	unsigned ring_instance;
 	unsigned ring_instance;
 	struct amdgpu_ring *ring;
 	struct amdgpu_ring *ring;
 	struct drm_sched_rq *rq;
 	struct drm_sched_rq *rq;
 	unsigned long size;
 	unsigned long size;
+	uint64_t flags;
 	int r, i;
 	int r, i;
 
 
 	vm->va = RB_ROOT_CACHED;
 	vm->va = RB_ROOT_CACHED;
@@ -2295,23 +2389,19 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
 		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
 						AMDGPU_VM_USE_CPU_FOR_COMPUTE);
 						AMDGPU_VM_USE_CPU_FOR_COMPUTE);
 
 
-		if (adev->asic_type == CHIP_RAVEN) {
+		if (adev->asic_type == CHIP_RAVEN)
 			vm->pte_support_ats = true;
 			vm->pte_support_ats = true;
-			init_pde_value = AMDGPU_PTE_DEFAULT_ATC
-					| AMDGPU_PDE_PTE;
-
-		}
-	} else
+	} else {
 		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
 		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
 						AMDGPU_VM_USE_CPU_FOR_GFX);
 						AMDGPU_VM_USE_CPU_FOR_GFX);
+	}
 	DRM_DEBUG_DRIVER("VM update mode is %s\n",
 	DRM_DEBUG_DRIVER("VM update mode is %s\n",
 			 vm->use_cpu_for_update ? "CPU" : "SDMA");
 			 vm->use_cpu_for_update ? "CPU" : "SDMA");
 	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
 	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
 		  "CPU update of VM recommended only for large BAR system\n");
 		  "CPU update of VM recommended only for large BAR system\n");
 	vm->last_update = NULL;
 	vm->last_update = NULL;
 
 
-	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
-			AMDGPU_GEM_CREATE_VRAM_CLEARED;
+	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
 	if (vm->use_cpu_for_update)
 	if (vm->use_cpu_for_update)
 		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 	else
 	else
@@ -2319,9 +2409,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 				AMDGPU_GEM_CREATE_SHADOW);
 				AMDGPU_GEM_CREATE_SHADOW);
 
 
 	size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
 	size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
-	r = amdgpu_bo_create(adev, size, align, true, AMDGPU_GEM_DOMAIN_VRAM,
-			     flags, NULL, NULL, init_pde_value,
-			     &vm->root.base.bo);
+	r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags,
+			     ttm_bo_type_kernel, NULL, &vm->root.base.bo);
 	if (r)
 	if (r)
 		goto error_free_sched_entity;
 		goto error_free_sched_entity;
 
 
@@ -2329,6 +2418,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	if (r)
 	if (r)
 		goto error_free_root;
 		goto error_free_root;
 
 
+	r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
+			       adev->vm_manager.root_level,
+			       vm->pte_support_ats);
+	if (r)
+		goto error_unreserve;
+
 	vm->root.base.vm = vm;
 	vm->root.base.vm = vm;
 	list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
 	list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
 	list_add_tail(&vm->root.base.vm_status, &vm->evicted);
 	list_add_tail(&vm->root.base.vm_status, &vm->evicted);
@@ -2352,6 +2447,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 
 	return 0;
 	return 0;
 
 
+error_unreserve:
+	amdgpu_bo_unreserve(vm->root.base.bo);
+
 error_free_root:
 error_free_root:
 	amdgpu_bo_unref(&vm->root.base.bo->shadow);
 	amdgpu_bo_unref(&vm->root.base.bo->shadow);
 	amdgpu_bo_unref(&vm->root.base.bo);
 	amdgpu_bo_unref(&vm->root.base.bo);
@@ -2405,7 +2503,7 @@ static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
 {
 	struct amdgpu_bo_va_mapping *mapping, *tmp;
 	struct amdgpu_bo_va_mapping *mapping, *tmp;
-	bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
+	bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
 	struct amdgpu_bo *root;
 	struct amdgpu_bo *root;
 	u64 fault;
 	u64 fault;
 	int i, r;
 	int i, r;

+ 2 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

@@ -28,6 +28,7 @@
 #include <linux/kfifo.h>
 #include <linux/kfifo.h>
 #include <linux/rbtree.h>
 #include <linux/rbtree.h>
 #include <drm/gpu_scheduler.h>
 #include <drm/gpu_scheduler.h>
+#include <drm/drm_file.h>
 
 
 #include "amdgpu_sync.h"
 #include "amdgpu_sync.h"
 #include "amdgpu_ring.h"
 #include "amdgpu_ring.h"
@@ -99,7 +100,7 @@ struct amdgpu_bo_list_entry;
 #define AMDGPU_MMHUB				1
 #define AMDGPU_MMHUB				1
 
 
 /* hardcode that limit for now */
 /* hardcode that limit for now */
-#define AMDGPU_VA_RESERVED_SIZE			(8ULL << 20)
+#define AMDGPU_VA_RESERVED_SIZE			(1ULL << 20)
 
 
 /* VA hole for 48bit addresses on Vega10 */
 /* VA hole for 48bit addresses on Vega10 */
 #define AMDGPU_VA_HOLE_START			0x0000800000000000ULL
 #define AMDGPU_VA_HOLE_START			0x0000800000000000ULL

+ 3 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

@@ -89,11 +89,11 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
 	uint64_t start = node->start << PAGE_SHIFT;
 	uint64_t start = node->start << PAGE_SHIFT;
 	uint64_t end = (node->size + node->start) << PAGE_SHIFT;
 	uint64_t end = (node->size + node->start) << PAGE_SHIFT;
 
 
-	if (start >= adev->mc.visible_vram_size)
+	if (start >= adev->gmc.visible_vram_size)
 		return 0;
 		return 0;
 
 
-	return (end > adev->mc.visible_vram_size ?
-		adev->mc.visible_vram_size : end) - start;
+	return (end > adev->gmc.visible_vram_size ?
+		adev->gmc.visible_vram_size : end) - start;
 }
 }
 
 
 /**
 /**

+ 2 - 2
drivers/gpu/drm/amd/amdgpu/atombios_encoders.c

@@ -34,7 +34,7 @@
 #include <linux/backlight.h>
 #include <linux/backlight.h>
 #include "bif/bif_4_1_d.h"
 #include "bif/bif_4_1_d.h"
 
 
-static u8
+u8
 amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev)
 amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev)
 {
 {
 	u8 backlight_level;
 	u8 backlight_level;
@@ -48,7 +48,7 @@ amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev)
 	return backlight_level;
 	return backlight_level;
 }
 }
 
 
-static void
+void
 amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev,
 amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev,
 					    u8 backlight_level)
 					    u8 backlight_level)
 {
 {

+ 5 - 0
drivers/gpu/drm/amd/amdgpu/atombios_encoders.h

@@ -24,6 +24,11 @@
 #ifndef __ATOMBIOS_ENCODER_H__
 #ifndef __ATOMBIOS_ENCODER_H__
 #define __ATOMBIOS_ENCODER_H__
 #define __ATOMBIOS_ENCODER_H__
 
 
+u8
+amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev);
+void
+amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev,
+						   u8 backlight_level);
 u8
 u8
 amdgpu_atombios_encoder_get_backlight_level(struct amdgpu_encoder *amdgpu_encoder);
 amdgpu_atombios_encoder_get_backlight_level(struct amdgpu_encoder *amdgpu_encoder);
 void
 void

+ 31 - 275
drivers/gpu/drm/amd/amdgpu/ci_dpm.c

@@ -65,6 +65,8 @@ MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
 #define VOLTAGE_VID_OFFSET_SCALE1    625
 #define VOLTAGE_VID_OFFSET_SCALE1    625
 #define VOLTAGE_VID_OFFSET_SCALE2    100
 #define VOLTAGE_VID_OFFSET_SCALE2    100
 
 
+static const struct amd_pm_funcs ci_dpm_funcs;
+
 static const struct ci_pt_defaults defaults_hawaii_xt =
 static const struct ci_pt_defaults defaults_hawaii_xt =
 {
 {
 	1, 0xF, 0xFD, 0x19, 5, 0x14, 0, 0xB0000,
 	1, 0xF, 0xFD, 0x19, 5, 0x14, 0, 0xB0000,
@@ -905,7 +907,7 @@ static bool ci_dpm_vblank_too_short(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	u32 vblank_time = amdgpu_dpm_get_vblank_time(adev);
 	u32 vblank_time = amdgpu_dpm_get_vblank_time(adev);
-	u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300;
+	u32 switch_limit = adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300;
 
 
 	/* disable mclk switching if the refresh is >120Hz, even if the
 	/* disable mclk switching if the refresh is >120Hz, even if the
 	 * blanking period would allow it
 	 * blanking period would allow it
@@ -2954,7 +2956,7 @@ static int ci_calculate_mclk_params(struct amdgpu_device *adev,
 	mpll_ad_func_cntl &= ~MPLL_AD_FUNC_CNTL__YCLK_POST_DIV_MASK;
 	mpll_ad_func_cntl &= ~MPLL_AD_FUNC_CNTL__YCLK_POST_DIV_MASK;
 	mpll_ad_func_cntl |= (mpll_param.post_div << MPLL_AD_FUNC_CNTL__YCLK_POST_DIV__SHIFT);
 	mpll_ad_func_cntl |= (mpll_param.post_div << MPLL_AD_FUNC_CNTL__YCLK_POST_DIV__SHIFT);
 
 
-	if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
+	if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
 		mpll_dq_func_cntl &= ~(MPLL_DQ_FUNC_CNTL__YCLK_SEL_MASK |
 		mpll_dq_func_cntl &= ~(MPLL_DQ_FUNC_CNTL__YCLK_SEL_MASK |
 				MPLL_AD_FUNC_CNTL__YCLK_POST_DIV_MASK);
 				MPLL_AD_FUNC_CNTL__YCLK_POST_DIV_MASK);
 		mpll_dq_func_cntl |= (mpll_param.yclk_sel << MPLL_DQ_FUNC_CNTL__YCLK_SEL__SHIFT) |
 		mpll_dq_func_cntl |= (mpll_param.yclk_sel << MPLL_DQ_FUNC_CNTL__YCLK_SEL__SHIFT) |
@@ -3077,7 +3079,7 @@ static int ci_populate_single_memory_level(struct amdgpu_device *adev,
 	    (memory_clock <= pi->mclk_strobe_mode_threshold))
 	    (memory_clock <= pi->mclk_strobe_mode_threshold))
 		memory_level->StrobeEnable = 1;
 		memory_level->StrobeEnable = 1;
 
 
-	if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
+	if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
 		memory_level->StrobeRatio =
 		memory_level->StrobeRatio =
 			ci_get_mclk_frequency_ratio(memory_clock, memory_level->StrobeEnable);
 			ci_get_mclk_frequency_ratio(memory_clock, memory_level->StrobeEnable);
 		if (pi->mclk_edc_enable_threshold &&
 		if (pi->mclk_edc_enable_threshold &&
@@ -3695,40 +3697,6 @@ static int ci_find_boot_level(struct ci_single_dpm_table *table,
 	return ret;
 	return ret;
 }
 }
 
 
-static void ci_save_default_power_profile(struct amdgpu_device *adev)
-{
-	struct ci_power_info *pi = ci_get_pi(adev);
-	struct SMU7_Discrete_GraphicsLevel *levels =
-				pi->smc_state_table.GraphicsLevel;
-	uint32_t min_level = 0;
-
-	pi->default_gfx_power_profile.activity_threshold =
-			be16_to_cpu(levels[0].ActivityLevel);
-	pi->default_gfx_power_profile.up_hyst = levels[0].UpH;
-	pi->default_gfx_power_profile.down_hyst = levels[0].DownH;
-	pi->default_gfx_power_profile.type = AMD_PP_GFX_PROFILE;
-
-	pi->default_compute_power_profile = pi->default_gfx_power_profile;
-	pi->default_compute_power_profile.type = AMD_PP_COMPUTE_PROFILE;
-
-	/* Optimize compute power profile: Use only highest
-	 * 2 power levels (if more than 2 are available), Hysteresis:
-	 * 0ms up, 5ms down
-	 */
-	if (pi->smc_state_table.GraphicsDpmLevelCount > 2)
-		min_level = pi->smc_state_table.GraphicsDpmLevelCount - 2;
-	else if (pi->smc_state_table.GraphicsDpmLevelCount == 2)
-		min_level = 1;
-	pi->default_compute_power_profile.min_sclk =
-			be32_to_cpu(levels[min_level].SclkFrequency);
-
-	pi->default_compute_power_profile.up_hyst = 0;
-	pi->default_compute_power_profile.down_hyst = 5;
-
-	pi->gfx_power_profile = pi->default_gfx_power_profile;
-	pi->compute_power_profile = pi->default_compute_power_profile;
-}
-
 static int ci_init_smc_table(struct amdgpu_device *adev)
 static int ci_init_smc_table(struct amdgpu_device *adev)
 {
 {
 	struct ci_power_info *pi = ci_get_pi(adev);
 	struct ci_power_info *pi = ci_get_pi(adev);
@@ -3752,7 +3720,7 @@ static int ci_init_smc_table(struct amdgpu_device *adev)
 	if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_STEPVDDC)
 	if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_STEPVDDC)
 		table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC;
 		table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC;
 
 
-	if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5)
+	if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5)
 		table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5;
 		table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5;
 
 
 	if (ulv->supported) {
 	if (ulv->supported) {
@@ -3874,8 +3842,6 @@ static int ci_init_smc_table(struct amdgpu_device *adev)
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 
-	ci_save_default_power_profile(adev);
-
 	return 0;
 	return 0;
 }
 }
 
 
@@ -4549,12 +4515,12 @@ static int ci_set_mc_special_registers(struct amdgpu_device *adev,
 			for (k = 0; k < table->num_entries; k++) {
 			for (k = 0; k < table->num_entries; k++) {
 				table->mc_reg_table_entry[k].mc_data[j] =
 				table->mc_reg_table_entry[k].mc_data[j] =
 					(temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff);
 					(temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff);
-				if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5)
+				if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5)
 					table->mc_reg_table_entry[k].mc_data[j] |= 0x100;
 					table->mc_reg_table_entry[k].mc_data[j] |= 0x100;
 			}
 			}
 			j++;
 			j++;
 
 
-			if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) {
+			if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) {
 				if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
 				if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
 					return -EINVAL;
 					return -EINVAL;
 				table->mc_reg_address[j].s1 = mmMC_PMG_AUTO_CMD;
 				table->mc_reg_address[j].s1 = mmMC_PMG_AUTO_CMD;
@@ -6277,6 +6243,7 @@ static int ci_dpm_early_init(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
+	adev->powerplay.pp_funcs = &ci_dpm_funcs;
 	ci_dpm_set_irq_funcs(adev);
 	ci_dpm_set_irq_funcs(adev);
 
 
 	return 0;
 	return 0;
@@ -6639,9 +6606,10 @@ static int ci_dpm_force_clock_level(void *handle,
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct ci_power_info *pi = ci_get_pi(adev);
 	struct ci_power_info *pi = ci_get_pi(adev);
 
 
-	if (adev->pm.dpm.forced_level & (AMD_DPM_FORCED_LEVEL_AUTO |
-				AMD_DPM_FORCED_LEVEL_LOW |
-				AMD_DPM_FORCED_LEVEL_HIGH))
+	if (adev->pm.dpm.forced_level != AMD_DPM_FORCED_LEVEL_MANUAL)
+		return -EINVAL;
+
+	if (mask == 0)
 		return -EINVAL;
 		return -EINVAL;
 
 
 	switch (type) {
 	switch (type) {
@@ -6662,15 +6630,15 @@ static int ci_dpm_force_clock_level(void *handle,
 	case PP_PCIE:
 	case PP_PCIE:
 	{
 	{
 		uint32_t tmp = mask & pi->dpm_level_enable_mask.pcie_dpm_enable_mask;
 		uint32_t tmp = mask & pi->dpm_level_enable_mask.pcie_dpm_enable_mask;
-		uint32_t level = 0;
 
 
-		while (tmp >>= 1)
-			level++;
-
-		if (!pi->pcie_dpm_key_disabled)
-			amdgpu_ci_send_msg_to_smc_with_parameter(adev,
+		if (!pi->pcie_dpm_key_disabled) {
+			if (fls(tmp) != ffs(tmp))
+				amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_PCIeDPM_UnForceLevel);
+			else
+				amdgpu_ci_send_msg_to_smc_with_parameter(adev,
 					PPSMC_MSG_PCIeDPM_ForceLevel,
 					PPSMC_MSG_PCIeDPM_ForceLevel,
-					level);
+					fls(tmp) - 1);
+		}
 		break;
 		break;
 	}
 	}
 	default:
 	default:
@@ -6752,222 +6720,6 @@ static int ci_dpm_set_mclk_od(void *handle, uint32_t value)
 	return 0;
 	return 0;
 }
 }
 
 
-static int ci_dpm_get_power_profile_state(void *handle,
-		struct amd_pp_profile *query)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct ci_power_info *pi = ci_get_pi(adev);
-
-	if (!pi || !query)
-		return -EINVAL;
-
-	if (query->type == AMD_PP_GFX_PROFILE)
-		memcpy(query, &pi->gfx_power_profile,
-				sizeof(struct amd_pp_profile));
-	else if (query->type == AMD_PP_COMPUTE_PROFILE)
-		memcpy(query, &pi->compute_power_profile,
-				sizeof(struct amd_pp_profile));
-	else
-		return -EINVAL;
-
-	return 0;
-}
-
-static int ci_populate_requested_graphic_levels(struct amdgpu_device *adev,
-		struct amd_pp_profile *request)
-{
-	struct ci_power_info *pi = ci_get_pi(adev);
-	struct ci_dpm_table *dpm_table = &(pi->dpm_table);
-	struct SMU7_Discrete_GraphicsLevel *levels =
-			pi->smc_state_table.GraphicsLevel;
-	uint32_t array = pi->dpm_table_start +
-			offsetof(SMU7_Discrete_DpmTable, GraphicsLevel);
-	uint32_t array_size = sizeof(struct SMU7_Discrete_GraphicsLevel) *
-			SMU7_MAX_LEVELS_GRAPHICS;
-	uint32_t i;
-
-	for (i = 0; i < dpm_table->sclk_table.count; i++) {
-		levels[i].ActivityLevel =
-				cpu_to_be16(request->activity_threshold);
-		levels[i].EnabledForActivity = 1;
-		levels[i].UpH = request->up_hyst;
-		levels[i].DownH = request->down_hyst;
-	}
-
-	return amdgpu_ci_copy_bytes_to_smc(adev, array, (uint8_t *)levels,
-				array_size, pi->sram_end);
-}
-
-static void ci_find_min_clock_masks(struct amdgpu_device *adev,
-		uint32_t *sclk_mask, uint32_t *mclk_mask,
-		uint32_t min_sclk, uint32_t min_mclk)
-{
-	struct ci_power_info *pi = ci_get_pi(adev);
-	struct ci_dpm_table *dpm_table = &(pi->dpm_table);
-	uint32_t i;
-
-	for (i = 0; i < dpm_table->sclk_table.count; i++) {
-		if (dpm_table->sclk_table.dpm_levels[i].enabled &&
-			dpm_table->sclk_table.dpm_levels[i].value >= min_sclk)
-			*sclk_mask |= 1 << i;
-	}
-
-	for (i = 0; i < dpm_table->mclk_table.count; i++) {
-		if (dpm_table->mclk_table.dpm_levels[i].enabled &&
-			dpm_table->mclk_table.dpm_levels[i].value >= min_mclk)
-			*mclk_mask |= 1 << i;
-	}
-}
-
-static int ci_set_power_profile_state(struct amdgpu_device *adev,
-		struct amd_pp_profile *request)
-{
-	struct ci_power_info *pi = ci_get_pi(adev);
-	int tmp_result, result = 0;
-	uint32_t sclk_mask = 0, mclk_mask = 0;
-
-	tmp_result = ci_freeze_sclk_mclk_dpm(adev);
-	if (tmp_result) {
-		DRM_ERROR("Failed to freeze SCLK MCLK DPM!");
-		result = tmp_result;
-	}
-
-	tmp_result = ci_populate_requested_graphic_levels(adev,
-			request);
-	if (tmp_result) {
-		DRM_ERROR("Failed to populate requested graphic levels!");
-		result = tmp_result;
-	}
-
-	tmp_result = ci_unfreeze_sclk_mclk_dpm(adev);
-	if (tmp_result) {
-		DRM_ERROR("Failed to unfreeze SCLK MCLK DPM!");
-		result = tmp_result;
-	}
-
-	ci_find_min_clock_masks(adev, &sclk_mask, &mclk_mask,
-			request->min_sclk, request->min_mclk);
-
-	if (sclk_mask) {
-		if (!pi->sclk_dpm_key_disabled)
-			amdgpu_ci_send_msg_to_smc_with_parameter(
-				adev,
-				PPSMC_MSG_SCLKDPM_SetEnabledMask,
-				pi->dpm_level_enable_mask.
-				sclk_dpm_enable_mask &
-				sclk_mask);
-	}
-
-	if (mclk_mask) {
-		if (!pi->mclk_dpm_key_disabled)
-			amdgpu_ci_send_msg_to_smc_with_parameter(
-				adev,
-				PPSMC_MSG_MCLKDPM_SetEnabledMask,
-				pi->dpm_level_enable_mask.
-				mclk_dpm_enable_mask &
-				mclk_mask);
-	}
-
-
-	return result;
-}
-
-static int ci_dpm_set_power_profile_state(void *handle,
-		struct amd_pp_profile *request)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct ci_power_info *pi = ci_get_pi(adev);
-	int ret = -1;
-
-	if (!pi || !request)
-		return -EINVAL;
-
-	if (adev->pm.dpm.forced_level !=
-			AMD_DPM_FORCED_LEVEL_AUTO)
-		return -EINVAL;
-
-	if (request->min_sclk ||
-		request->min_mclk ||
-		request->activity_threshold ||
-		request->up_hyst ||
-		request->down_hyst) {
-		if (request->type == AMD_PP_GFX_PROFILE)
-			memcpy(&pi->gfx_power_profile, request,
-					sizeof(struct amd_pp_profile));
-		else if (request->type == AMD_PP_COMPUTE_PROFILE)
-			memcpy(&pi->compute_power_profile, request,
-					sizeof(struct amd_pp_profile));
-		else
-			return -EINVAL;
-
-		if (request->type == pi->current_power_profile)
-			ret = ci_set_power_profile_state(
-					adev,
-					request);
-	} else {
-		/* set power profile if it exists */
-		switch (request->type) {
-		case AMD_PP_GFX_PROFILE:
-			ret = ci_set_power_profile_state(
-				adev,
-				&pi->gfx_power_profile);
-			break;
-		case AMD_PP_COMPUTE_PROFILE:
-			ret = ci_set_power_profile_state(
-				adev,
-				&pi->compute_power_profile);
-			break;
-		default:
-			return -EINVAL;
-		}
-	}
-
-	if (!ret)
-		pi->current_power_profile = request->type;
-
-	return 0;
-}
-
-static int ci_dpm_reset_power_profile_state(void *handle,
-		struct amd_pp_profile *request)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct ci_power_info *pi = ci_get_pi(adev);
-
-	if (!pi || !request)
-		return -EINVAL;
-
-	if (request->type == AMD_PP_GFX_PROFILE) {
-		pi->gfx_power_profile = pi->default_gfx_power_profile;
-		return ci_dpm_set_power_profile_state(adev,
-					  &pi->gfx_power_profile);
-	} else if (request->type == AMD_PP_COMPUTE_PROFILE) {
-		pi->compute_power_profile =
-			pi->default_compute_power_profile;
-		return ci_dpm_set_power_profile_state(adev,
-					  &pi->compute_power_profile);
-	} else
-		return -EINVAL;
-}
-
-static int ci_dpm_switch_power_profile(void *handle,
-		enum amd_pp_profile_type type)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct ci_power_info *pi = ci_get_pi(adev);
-	struct amd_pp_profile request = {0};
-
-	if (!pi)
-		return -EINVAL;
-
-	if (pi->current_power_profile != type) {
-		request.type = type;
-		return ci_dpm_set_power_profile_state(adev, &request);
-	}
-
-	return 0;
-}
-
 static int ci_dpm_read_sensor(void *handle, int idx,
 static int ci_dpm_read_sensor(void *handle, int idx,
 			      void *value, int *size)
 			      void *value, int *size)
 {
 {
@@ -7011,7 +6763,7 @@ static int ci_dpm_read_sensor(void *handle, int idx,
 	}
 	}
 }
 }
 
 
-const struct amd_ip_funcs ci_dpm_ip_funcs = {
+static const struct amd_ip_funcs ci_dpm_ip_funcs = {
 	.name = "ci_dpm",
 	.name = "ci_dpm",
 	.early_init = ci_dpm_early_init,
 	.early_init = ci_dpm_early_init,
 	.late_init = ci_dpm_late_init,
 	.late_init = ci_dpm_late_init,
@@ -7028,8 +6780,16 @@ const struct amd_ip_funcs ci_dpm_ip_funcs = {
 	.set_powergating_state = ci_dpm_set_powergating_state,
 	.set_powergating_state = ci_dpm_set_powergating_state,
 };
 };
 
 
-const struct amd_pm_funcs ci_dpm_funcs = {
-	.get_temperature = &ci_dpm_get_temp,
+const struct amdgpu_ip_block_version ci_smu_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_SMC,
+	.major = 7,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &ci_dpm_ip_funcs,
+};
+
+static const struct amd_pm_funcs ci_dpm_funcs = {
 	.pre_set_power_state = &ci_dpm_pre_set_power_state,
 	.pre_set_power_state = &ci_dpm_pre_set_power_state,
 	.set_power_state = &ci_dpm_set_power_state,
 	.set_power_state = &ci_dpm_set_power_state,
 	.post_set_power_state = &ci_dpm_post_set_power_state,
 	.post_set_power_state = &ci_dpm_post_set_power_state,
@@ -7053,10 +6813,6 @@ const struct amd_pm_funcs ci_dpm_funcs = {
 	.set_mclk_od = ci_dpm_set_mclk_od,
 	.set_mclk_od = ci_dpm_set_mclk_od,
 	.check_state_equal = ci_check_state_equal,
 	.check_state_equal = ci_check_state_equal,
 	.get_vce_clock_state = amdgpu_get_vce_clock_state,
 	.get_vce_clock_state = amdgpu_get_vce_clock_state,
-	.get_power_profile_state = ci_dpm_get_power_profile_state,
-	.set_power_profile_state = ci_dpm_set_power_profile_state,
-	.reset_power_profile_state = ci_dpm_reset_power_profile_state,
-	.switch_power_profile = ci_dpm_switch_power_profile,
 	.read_sensor = ci_dpm_read_sensor,
 	.read_sensor = ci_dpm_read_sensor,
 };
 };
 
 

+ 0 - 7
drivers/gpu/drm/amd/amdgpu/ci_dpm.h

@@ -295,13 +295,6 @@ struct ci_power_info {
 	bool fan_is_controlled_by_smc;
 	bool fan_is_controlled_by_smc;
 	u32 t_min;
 	u32 t_min;
 	u32 fan_ctrl_default_mode;
 	u32 fan_ctrl_default_mode;
-
-	/* power profile */
-	struct amd_pp_profile gfx_power_profile;
-	struct amd_pp_profile compute_power_profile;
-	struct amd_pp_profile default_gfx_power_profile;
-	struct amd_pp_profile default_compute_power_profile;
-	enum amd_pp_profile_type current_power_profile;
 };
 };
 
 
 #define CISLANDS_VOLTAGE_CONTROL_NONE                   0x0
 #define CISLANDS_VOLTAGE_CONTROL_NONE                   0x0

+ 33 - 9
drivers/gpu/drm/amd/amdgpu/cik.c

@@ -67,7 +67,6 @@
 
 
 #include "amdgpu_dm.h"
 #include "amdgpu_dm.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_amdkfd.h"
-#include "amdgpu_powerplay.h"
 #include "dce_virtual.h"
 #include "dce_virtual.h"
 
 
 /*
 /*
@@ -1715,6 +1714,27 @@ static void cik_detect_hw_virtualization(struct amdgpu_device *adev)
 		adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
 		adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
 }
 }
 
 
+static void cik_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+	if (!ring || !ring->funcs->emit_wreg) {
+		WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
+		RREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL);
+	} else {
+		amdgpu_ring_emit_wreg(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
+	}
+}
+
+static void cik_invalidate_hdp(struct amdgpu_device *adev,
+			       struct amdgpu_ring *ring)
+{
+	if (!ring || !ring->funcs->emit_wreg) {
+		WREG32(mmHDP_DEBUG0, 1);
+		RREG32(mmHDP_DEBUG0);
+	} else {
+		amdgpu_ring_emit_wreg(ring, mmHDP_DEBUG0, 1);
+	}
+}
+
 static const struct amdgpu_asic_funcs cik_asic_funcs =
 static const struct amdgpu_asic_funcs cik_asic_funcs =
 {
 {
 	.read_disabled_bios = &cik_read_disabled_bios,
 	.read_disabled_bios = &cik_read_disabled_bios,
@@ -1726,6 +1746,8 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
 	.set_uvd_clocks = &cik_set_uvd_clocks,
 	.set_uvd_clocks = &cik_set_uvd_clocks,
 	.set_vce_clocks = &cik_set_vce_clocks,
 	.set_vce_clocks = &cik_set_vce_clocks,
 	.get_config_memsize = &cik_get_config_memsize,
 	.get_config_memsize = &cik_get_config_memsize,
+	.flush_hdp = &cik_flush_hdp,
+	.invalidate_hdp = &cik_invalidate_hdp,
 };
 };
 
 
 static int cik_common_early_init(void *handle)
 static int cik_common_early_init(void *handle)
@@ -1864,10 +1886,6 @@ static int cik_common_early_init(void *handle)
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
-	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
-
-	amdgpu_device_get_pcie_info(adev);
-
 	return 0;
 	return 0;
 }
 }
 
 
@@ -1977,7 +1995,10 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
-		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+		if (amdgpu_dpm == -1)
+			amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
+		else
+			amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
 		if (adev->enable_virtual_display)
 		if (adev->enable_virtual_display)
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
 #if defined(CONFIG_DRM_AMD_DC)
@@ -1995,7 +2016,10 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
-		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+		if (amdgpu_dpm == -1)
+			amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
+		else
+			amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
 		if (adev->enable_virtual_display)
 		if (adev->enable_virtual_display)
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
 #if defined(CONFIG_DRM_AMD_DC)
@@ -2013,7 +2037,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
-		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+		amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
 		if (adev->enable_virtual_display)
 		if (adev->enable_virtual_display)
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
 #if defined(CONFIG_DRM_AMD_DC)
@@ -2032,7 +2056,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
-		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+		amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
 		if (adev->enable_virtual_display)
 		if (adev->enable_virtual_display)
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
 #if defined(CONFIG_DRM_AMD_DC)

+ 2 - 0
drivers/gpu/drm/amd/amdgpu/cik.h

@@ -24,6 +24,8 @@
 #ifndef __CIK_H__
 #ifndef __CIK_H__
 #define __CIK_H__
 #define __CIK_H__
 
 
+#define CIK_FLUSH_GPU_TLB_NUM_WREG	3
+
 void cik_srbm_select(struct amdgpu_device *adev,
 void cik_srbm_select(struct amdgpu_device *adev,
 		     u32 me, u32 pipe, u32 queue, u32 vmid);
 		     u32 me, u32 pipe, u32 queue, u32 vmid);
 int cik_set_ip_blocks(struct amdgpu_device *adev);
 int cik_set_ip_blocks(struct amdgpu_device *adev);

+ 3 - 4
drivers/gpu/drm/amd/amdgpu/cik_dpm.h

@@ -24,8 +24,7 @@
 #ifndef __CIK_DPM_H__
 #ifndef __CIK_DPM_H__
 #define __CIK_DPM_H__
 #define __CIK_DPM_H__
 
 
-extern const struct amd_ip_funcs ci_dpm_ip_funcs;
-extern const struct amd_ip_funcs kv_dpm_ip_funcs;
-extern const struct amd_pm_funcs ci_dpm_funcs;
-extern const struct amd_pm_funcs kv_dpm_funcs;
+extern const struct amdgpu_ip_block_version ci_smu_ip_block;
+extern const struct amdgpu_ip_block_version kv_smu_ip_block;
+
 #endif
 #endif

+ 2 - 2
drivers/gpu/drm/amd/amdgpu/cik_ih.c

@@ -111,7 +111,7 @@ static int cik_ih_irq_init(struct amdgpu_device *adev)
 	cik_ih_disable_interrupts(adev);
 	cik_ih_disable_interrupts(adev);
 
 
 	/* setup interrupt control */
 	/* setup interrupt control */
-	WREG32(mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8);
+	WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
 	interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
 	interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
 	/* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
 	/* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
 	 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
 	 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
@@ -281,7 +281,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev,
 	entry->src_data[0] = dw[1] & 0xfffffff;
 	entry->src_data[0] = dw[1] & 0xfffffff;
 	entry->ring_id = dw[2] & 0xff;
 	entry->ring_id = dw[2] & 0xff;
 	entry->vmid = (dw[2] >> 8) & 0xff;
 	entry->vmid = (dw[2] >> 8) & 0xff;
-	entry->pas_id = (dw[2] >> 16) & 0xffff;
+	entry->pasid = (dw[2] >> 16) & 0xffff;
 
 
 	/* wptr/rptr are in bytes! */
 	/* wptr/rptr are in bytes! */
 	adev->irq.ih.rptr += 16;
 	adev->irq.ih.rptr += 16;

+ 14 - 27
drivers/gpu/drm/amd/amdgpu/cik_sdma.c

@@ -261,13 +261,6 @@ static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
 	amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
 }
 }
 
 
-static void cik_sdma_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
-	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-	amdgpu_ring_write(ring, mmHDP_DEBUG0);
-	amdgpu_ring_write(ring, 1);
-}
-
 /**
 /**
  * cik_sdma_ring_emit_fence - emit a fence on the DMA ring
  * cik_sdma_ring_emit_fence - emit a fence on the DMA ring
  *
  *
@@ -317,7 +310,7 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
 
 
 	if ((adev->mman.buffer_funcs_ring == sdma0) ||
 	if ((adev->mman.buffer_funcs_ring == sdma0) ||
 	    (adev->mman.buffer_funcs_ring == sdma1))
 	    (adev->mman.buffer_funcs_ring == sdma1))
-		amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
+			amdgpu_ttm_set_buffer_funcs_status(adev, false);
 
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
 		rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
@@ -517,7 +510,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
 		}
 		}
 
 
 		if (adev->mman.buffer_funcs_ring == ring)
 		if (adev->mman.buffer_funcs_ring == ring)
-			amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size);
+			amdgpu_ttm_set_buffer_funcs_status(adev, true);
 	}
 	}
 
 
 	return 0;
 	return 0;
@@ -885,18 +878,7 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) |
 	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) |
 			  SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */
 			  SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */
 
 
-	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-	if (vmid < 8) {
-		amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
-	} else {
-		amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
-	}
-	amdgpu_ring_write(ring, pd_addr >> 12);
-
-	/* flush TLB */
-	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
-	amdgpu_ring_write(ring, 1 << vmid);
+	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
 
 
 	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
 	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
@@ -906,6 +888,14 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
 	amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
 }
 }
 
 
+static void cik_sdma_ring_emit_wreg(struct amdgpu_ring *ring,
+				    uint32_t reg, uint32_t val)
+{
+	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+	amdgpu_ring_write(ring, reg);
+	amdgpu_ring_write(ring, val);
+}
+
 static void cik_enable_sdma_mgcg(struct amdgpu_device *adev,
 static void cik_enable_sdma_mgcg(struct amdgpu_device *adev,
 				 bool enable)
 				 bool enable)
 {
 {
@@ -1279,9 +1269,9 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
 	.set_wptr = cik_sdma_ring_set_wptr,
 	.set_wptr = cik_sdma_ring_set_wptr,
 	.emit_frame_size =
 	.emit_frame_size =
 		6 + /* cik_sdma_ring_emit_hdp_flush */
 		6 + /* cik_sdma_ring_emit_hdp_flush */
-		3 + /* cik_sdma_ring_emit_hdp_invalidate */
+		3 + /* hdp invalidate */
 		6 + /* cik_sdma_ring_emit_pipeline_sync */
 		6 + /* cik_sdma_ring_emit_pipeline_sync */
-		12 + /* cik_sdma_ring_emit_vm_flush */
+		CIK_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* cik_sdma_ring_emit_vm_flush */
 		9 + 9 + 9, /* cik_sdma_ring_emit_fence x3 for user fence, vm fence */
 		9 + 9 + 9, /* cik_sdma_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size = 7 + 4, /* cik_sdma_ring_emit_ib */
 	.emit_ib_size = 7 + 4, /* cik_sdma_ring_emit_ib */
 	.emit_ib = cik_sdma_ring_emit_ib,
 	.emit_ib = cik_sdma_ring_emit_ib,
@@ -1289,11 +1279,11 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
 	.emit_pipeline_sync = cik_sdma_ring_emit_pipeline_sync,
 	.emit_pipeline_sync = cik_sdma_ring_emit_pipeline_sync,
 	.emit_vm_flush = cik_sdma_ring_emit_vm_flush,
 	.emit_vm_flush = cik_sdma_ring_emit_vm_flush,
 	.emit_hdp_flush = cik_sdma_ring_emit_hdp_flush,
 	.emit_hdp_flush = cik_sdma_ring_emit_hdp_flush,
-	.emit_hdp_invalidate = cik_sdma_ring_emit_hdp_invalidate,
 	.test_ring = cik_sdma_ring_test_ring,
 	.test_ring = cik_sdma_ring_test_ring,
 	.test_ib = cik_sdma_ring_test_ib,
 	.test_ib = cik_sdma_ring_test_ib,
 	.insert_nop = cik_sdma_ring_insert_nop,
 	.insert_nop = cik_sdma_ring_insert_nop,
 	.pad_ib = cik_sdma_ring_pad_ib,
 	.pad_ib = cik_sdma_ring_pad_ib,
+	.emit_wreg = cik_sdma_ring_emit_wreg,
 };
 };
 
 
 static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
 static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
@@ -1391,9 +1381,6 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
 	.copy_pte = cik_sdma_vm_copy_pte,
 	.copy_pte = cik_sdma_vm_copy_pte,
 
 
 	.write_pte = cik_sdma_vm_write_pte,
 	.write_pte = cik_sdma_vm_write_pte,
-
-	.set_max_nums_pte_pde = 0x1fffff >> 3,
-	.set_pte_pde_num_dw = 10,
 	.set_pte_pde = cik_sdma_vm_set_pte_pde,
 	.set_pte_pde = cik_sdma_vm_set_pte_pde,
 };
 };
 
 

+ 2 - 2
drivers/gpu/drm/amd/amdgpu/cz_ih.c

@@ -111,7 +111,7 @@ static int cz_ih_irq_init(struct amdgpu_device *adev)
 	cz_ih_disable_interrupts(adev);
 	cz_ih_disable_interrupts(adev);
 
 
 	/* setup interrupt control */
 	/* setup interrupt control */
-	WREG32(mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8);
+	WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
 	interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
 	interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
 	/* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
 	/* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
 	 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
 	 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
@@ -260,7 +260,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev,
 	entry->src_data[0] = dw[1] & 0xfffffff;
 	entry->src_data[0] = dw[1] & 0xfffffff;
 	entry->ring_id = dw[2] & 0xff;
 	entry->ring_id = dw[2] & 0xff;
 	entry->vmid = (dw[2] >> 8) & 0xff;
 	entry->vmid = (dw[2] >> 8) & 0xff;
-	entry->pas_id = (dw[2] >> 16) & 0xffff;
+	entry->pasid = (dw[2] >> 16) & 0xffff;
 
 
 	/* wptr/rptr are in bytes! */
 	/* wptr/rptr are in bytes! */
 	adev->irq.ih.rptr += 16;
 	adev->irq.ih.rptr += 16;

+ 18 - 70
drivers/gpu/drm/amd/amdgpu/dce_v10_0.c

@@ -190,66 +190,6 @@ static void dce_v10_0_audio_endpt_wreg(struct amdgpu_device *adev,
 	spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
 	spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
 }
 }
 
 
-static bool dce_v10_0_is_in_vblank(struct amdgpu_device *adev, int crtc)
-{
-	if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) &
-			CRTC_V_BLANK_START_END__CRTC_V_BLANK_START_MASK)
-		return true;
-	else
-		return false;
-}
-
-static bool dce_v10_0_is_counter_moving(struct amdgpu_device *adev, int crtc)
-{
-	u32 pos1, pos2;
-
-	pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
-	pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
-
-	if (pos1 != pos2)
-		return true;
-	else
-		return false;
-}
-
-/**
- * dce_v10_0_vblank_wait - vblank wait asic callback.
- *
- * @adev: amdgpu_device pointer
- * @crtc: crtc to wait for vblank on
- *
- * Wait for vblank on the requested crtc (evergreen+).
- */
-static void dce_v10_0_vblank_wait(struct amdgpu_device *adev, int crtc)
-{
-	unsigned i = 100;
-
-	if (crtc >= adev->mode_info.num_crtc)
-		return;
-
-	if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK))
-		return;
-
-	/* depending on when we hit vblank, we may be close to active; if so,
-	 * wait for another frame.
-	 */
-	while (dce_v10_0_is_in_vblank(adev, crtc)) {
-		if (i++ == 100) {
-			i = 0;
-			if (!dce_v10_0_is_counter_moving(adev, crtc))
-				break;
-		}
-	}
-
-	while (!dce_v10_0_is_in_vblank(adev, crtc)) {
-		if (i++ == 100) {
-			i = 0;
-			if (!dce_v10_0_is_counter_moving(adev, crtc))
-				break;
-		}
-	}
-}
-
 static u32 dce_v10_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
 static u32 dce_v10_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
 {
 {
 	if (crtc >= adev->mode_info.num_crtc)
 	if (crtc >= adev->mode_info.num_crtc)
@@ -1205,7 +1145,7 @@ static void dce_v10_0_bandwidth_update(struct amdgpu_device *adev)
 	u32 num_heads = 0, lb_size;
 	u32 num_heads = 0, lb_size;
 	int i;
 	int i;
 
 
-	amdgpu_update_display_priority(adev);
+	amdgpu_display_update_priority(adev);
 
 
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
 		if (adev->mode_info.crtcs[i]->base.enabled)
 		if (adev->mode_info.crtcs[i]->base.enabled)
@@ -2517,9 +2457,9 @@ static const struct drm_crtc_funcs dce_v10_0_crtc_funcs = {
 	.cursor_set2 = dce_v10_0_crtc_cursor_set2,
 	.cursor_set2 = dce_v10_0_crtc_cursor_set2,
 	.cursor_move = dce_v10_0_crtc_cursor_move,
 	.cursor_move = dce_v10_0_crtc_cursor_move,
 	.gamma_set = dce_v10_0_crtc_gamma_set,
 	.gamma_set = dce_v10_0_crtc_gamma_set,
-	.set_config = amdgpu_crtc_set_config,
+	.set_config = amdgpu_display_crtc_set_config,
 	.destroy = dce_v10_0_crtc_destroy,
 	.destroy = dce_v10_0_crtc_destroy,
-	.page_flip_target = amdgpu_crtc_page_flip_target,
+	.page_flip_target = amdgpu_display_crtc_page_flip_target,
 };
 };
 
 
 static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode)
 static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode)
@@ -2537,7 +2477,8 @@ static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode)
 		amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
 		amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
 		dce_v10_0_vga_enable(crtc, false);
 		dce_v10_0_vga_enable(crtc, false);
 		/* Make sure VBLANK and PFLIP interrupts are still enabled */
 		/* Make sure VBLANK and PFLIP interrupts are still enabled */
-		type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
+		type = amdgpu_display_crtc_idx_to_irq_type(adev,
+						amdgpu_crtc->crtc_id);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
 		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
 		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
 		drm_crtc_vblank_on(crtc);
 		drm_crtc_vblank_on(crtc);
@@ -2676,7 +2617,7 @@ static bool dce_v10_0_crtc_mode_fixup(struct drm_crtc *crtc,
 		amdgpu_crtc->connector = NULL;
 		amdgpu_crtc->connector = NULL;
 		return false;
 		return false;
 	}
 	}
-	if (!amdgpu_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
+	if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
 		return false;
 		return false;
 	if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
 	if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
 		return false;
 		return false;
@@ -2824,9 +2765,9 @@ static int dce_v10_0_sw_init(void *handle)
 	adev->ddev->mode_config.preferred_depth = 24;
 	adev->ddev->mode_config.preferred_depth = 24;
 	adev->ddev->mode_config.prefer_shadow = 1;
 	adev->ddev->mode_config.prefer_shadow = 1;
 
 
-	adev->ddev->mode_config.fb_base = adev->mc.aper_base;
+	adev->ddev->mode_config.fb_base = adev->gmc.aper_base;
 
 
-	r = amdgpu_modeset_create_props(adev);
+	r = amdgpu_display_modeset_create_props(adev);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
@@ -2841,7 +2782,7 @@ static int dce_v10_0_sw_init(void *handle)
 	}
 	}
 
 
 	if (amdgpu_atombios_get_connector_info_from_object_table(adev))
 	if (amdgpu_atombios_get_connector_info_from_object_table(adev))
-		amdgpu_print_display_setup(adev->ddev);
+		amdgpu_display_print_display_setup(adev->ddev);
 	else
 	else
 		return -EINVAL;
 		return -EINVAL;
 
 
@@ -2921,6 +2862,11 @@ static int dce_v10_0_hw_fini(void *handle)
 
 
 static int dce_v10_0_suspend(void *handle)
 static int dce_v10_0_suspend(void *handle)
 {
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	adev->mode_info.bl_level =
+		amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
 	return dce_v10_0_hw_fini(handle);
 	return dce_v10_0_hw_fini(handle);
 }
 }
 
 
@@ -2929,6 +2875,9 @@ static int dce_v10_0_resume(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int ret;
 	int ret;
 
 
+	amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+							   adev->mode_info.bl_level);
+
 	ret = dce_v10_0_hw_init(handle);
 	ret = dce_v10_0_hw_init(handle);
 
 
 	/* turn on the BL */
 	/* turn on the BL */
@@ -3249,7 +3198,7 @@ static int dce_v10_0_crtc_irq(struct amdgpu_device *adev,
 {
 {
 	unsigned crtc = entry->src_id - 1;
 	unsigned crtc = entry->src_id - 1;
 	uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
 	uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
-	unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc);
+	unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, crtc);
 
 
 	switch (entry->src_data[0]) {
 	switch (entry->src_data[0]) {
 	case 0: /* vblank */
 	case 0: /* vblank */
@@ -3601,7 +3550,6 @@ static void dce_v10_0_encoder_add(struct amdgpu_device *adev,
 static const struct amdgpu_display_funcs dce_v10_0_display_funcs = {
 static const struct amdgpu_display_funcs dce_v10_0_display_funcs = {
 	.bandwidth_update = &dce_v10_0_bandwidth_update,
 	.bandwidth_update = &dce_v10_0_bandwidth_update,
 	.vblank_get_counter = &dce_v10_0_vblank_get_counter,
 	.vblank_get_counter = &dce_v10_0_vblank_get_counter,
-	.vblank_wait = &dce_v10_0_vblank_wait,
 	.backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
 	.backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
 	.backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
 	.backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
 	.hpd_sense = &dce_v10_0_hpd_sense,
 	.hpd_sense = &dce_v10_0_hpd_sense,

+ 19 - 70
drivers/gpu/drm/amd/amdgpu/dce_v11_0.c

@@ -207,66 +207,6 @@ static void dce_v11_0_audio_endpt_wreg(struct amdgpu_device *adev,
 	spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
 	spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
 }
 }
 
 
-static bool dce_v11_0_is_in_vblank(struct amdgpu_device *adev, int crtc)
-{
-	if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) &
-			CRTC_V_BLANK_START_END__CRTC_V_BLANK_START_MASK)
-		return true;
-	else
-		return false;
-}
-
-static bool dce_v11_0_is_counter_moving(struct amdgpu_device *adev, int crtc)
-{
-	u32 pos1, pos2;
-
-	pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
-	pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
-
-	if (pos1 != pos2)
-		return true;
-	else
-		return false;
-}
-
-/**
- * dce_v11_0_vblank_wait - vblank wait asic callback.
- *
- * @adev: amdgpu_device pointer
- * @crtc: crtc to wait for vblank on
- *
- * Wait for vblank on the requested crtc (evergreen+).
- */
-static void dce_v11_0_vblank_wait(struct amdgpu_device *adev, int crtc)
-{
-	unsigned i = 100;
-
-	if (crtc < 0 || crtc >= adev->mode_info.num_crtc)
-		return;
-
-	if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK))
-		return;
-
-	/* depending on when we hit vblank, we may be close to active; if so,
-	 * wait for another frame.
-	 */
-	while (dce_v11_0_is_in_vblank(adev, crtc)) {
-		if (i++ == 100) {
-			i = 0;
-			if (!dce_v11_0_is_counter_moving(adev, crtc))
-				break;
-		}
-	}
-
-	while (!dce_v11_0_is_in_vblank(adev, crtc)) {
-		if (i++ == 100) {
-			i = 0;
-			if (!dce_v11_0_is_counter_moving(adev, crtc))
-				break;
-		}
-	}
-}
-
 static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
 static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
 {
 {
 	if (crtc < 0 || crtc >= adev->mode_info.num_crtc)
 	if (crtc < 0 || crtc >= adev->mode_info.num_crtc)
@@ -1229,7 +1169,7 @@ static void dce_v11_0_bandwidth_update(struct amdgpu_device *adev)
 	u32 num_heads = 0, lb_size;
 	u32 num_heads = 0, lb_size;
 	int i;
 	int i;
 
 
-	amdgpu_update_display_priority(adev);
+	amdgpu_display_update_priority(adev);
 
 
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
 		if (adev->mode_info.crtcs[i]->base.enabled)
 		if (adev->mode_info.crtcs[i]->base.enabled)
@@ -2592,9 +2532,9 @@ static const struct drm_crtc_funcs dce_v11_0_crtc_funcs = {
 	.cursor_set2 = dce_v11_0_crtc_cursor_set2,
 	.cursor_set2 = dce_v11_0_crtc_cursor_set2,
 	.cursor_move = dce_v11_0_crtc_cursor_move,
 	.cursor_move = dce_v11_0_crtc_cursor_move,
 	.gamma_set = dce_v11_0_crtc_gamma_set,
 	.gamma_set = dce_v11_0_crtc_gamma_set,
-	.set_config = amdgpu_crtc_set_config,
+	.set_config = amdgpu_display_crtc_set_config,
 	.destroy = dce_v11_0_crtc_destroy,
 	.destroy = dce_v11_0_crtc_destroy,
-	.page_flip_target = amdgpu_crtc_page_flip_target,
+	.page_flip_target = amdgpu_display_crtc_page_flip_target,
 };
 };
 
 
 static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode)
 static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode)
@@ -2612,7 +2552,8 @@ static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode)
 		amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
 		amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
 		dce_v11_0_vga_enable(crtc, false);
 		dce_v11_0_vga_enable(crtc, false);
 		/* Make sure VBLANK and PFLIP interrupts are still enabled */
 		/* Make sure VBLANK and PFLIP interrupts are still enabled */
-		type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
+		type = amdgpu_display_crtc_idx_to_irq_type(adev,
+						amdgpu_crtc->crtc_id);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
 		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
 		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
 		drm_crtc_vblank_on(crtc);
 		drm_crtc_vblank_on(crtc);
@@ -2779,7 +2720,7 @@ static bool dce_v11_0_crtc_mode_fixup(struct drm_crtc *crtc,
 		amdgpu_crtc->connector = NULL;
 		amdgpu_crtc->connector = NULL;
 		return false;
 		return false;
 	}
 	}
-	if (!amdgpu_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
+	if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
 		return false;
 		return false;
 	if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
 	if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
 		return false;
 		return false;
@@ -2939,9 +2880,9 @@ static int dce_v11_0_sw_init(void *handle)
 	adev->ddev->mode_config.preferred_depth = 24;
 	adev->ddev->mode_config.preferred_depth = 24;
 	adev->ddev->mode_config.prefer_shadow = 1;
 	adev->ddev->mode_config.prefer_shadow = 1;
 
 
-	adev->ddev->mode_config.fb_base = adev->mc.aper_base;
+	adev->ddev->mode_config.fb_base = adev->gmc.aper_base;
 
 
-	r = amdgpu_modeset_create_props(adev);
+	r = amdgpu_display_modeset_create_props(adev);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
@@ -2957,7 +2898,7 @@ static int dce_v11_0_sw_init(void *handle)
 	}
 	}
 
 
 	if (amdgpu_atombios_get_connector_info_from_object_table(adev))
 	if (amdgpu_atombios_get_connector_info_from_object_table(adev))
-		amdgpu_print_display_setup(adev->ddev);
+		amdgpu_display_print_display_setup(adev->ddev);
 	else
 	else
 		return -EINVAL;
 		return -EINVAL;
 
 
@@ -3047,6 +2988,11 @@ static int dce_v11_0_hw_fini(void *handle)
 
 
 static int dce_v11_0_suspend(void *handle)
 static int dce_v11_0_suspend(void *handle)
 {
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	adev->mode_info.bl_level =
+		amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
 	return dce_v11_0_hw_fini(handle);
 	return dce_v11_0_hw_fini(handle);
 }
 }
 
 
@@ -3055,6 +3001,9 @@ static int dce_v11_0_resume(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int ret;
 	int ret;
 
 
+	amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+							   adev->mode_info.bl_level);
+
 	ret = dce_v11_0_hw_init(handle);
 	ret = dce_v11_0_hw_init(handle);
 
 
 	/* turn on the BL */
 	/* turn on the BL */
@@ -3368,7 +3317,8 @@ static int dce_v11_0_crtc_irq(struct amdgpu_device *adev,
 {
 {
 	unsigned crtc = entry->src_id - 1;
 	unsigned crtc = entry->src_id - 1;
 	uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
 	uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
-	unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc);
+	unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
+								    crtc);
 
 
 	switch (entry->src_data[0]) {
 	switch (entry->src_data[0]) {
 	case 0: /* vblank */
 	case 0: /* vblank */
@@ -3725,7 +3675,6 @@ static void dce_v11_0_encoder_add(struct amdgpu_device *adev,
 static const struct amdgpu_display_funcs dce_v11_0_display_funcs = {
 static const struct amdgpu_display_funcs dce_v11_0_display_funcs = {
 	.bandwidth_update = &dce_v11_0_bandwidth_update,
 	.bandwidth_update = &dce_v11_0_bandwidth_update,
 	.vblank_get_counter = &dce_v11_0_vblank_get_counter,
 	.vblank_get_counter = &dce_v11_0_vblank_get_counter,
-	.vblank_wait = &dce_v11_0_vblank_wait,
 	.backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
 	.backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
 	.backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
 	.backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
 	.hpd_sense = &dce_v11_0_hpd_sense,
 	.hpd_sense = &dce_v11_0_hpd_sense,

+ 20 - 69
drivers/gpu/drm/amd/amdgpu/dce_v6_0.c

@@ -142,64 +142,6 @@ static void dce_v6_0_audio_endpt_wreg(struct amdgpu_device *adev,
 	spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
 	spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
 }
 }
 
 
-static bool dce_v6_0_is_in_vblank(struct amdgpu_device *adev, int crtc)
-{
-	if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) & CRTC_STATUS__CRTC_V_BLANK_MASK)
-		return true;
-	else
-		return false;
-}
-
-static bool dce_v6_0_is_counter_moving(struct amdgpu_device *adev, int crtc)
-{
-	u32 pos1, pos2;
-
-	pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
-	pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
-
-	if (pos1 != pos2)
-		return true;
-	else
-		return false;
-}
-
-/**
- * dce_v6_0_wait_for_vblank - vblank wait asic callback.
- *
- * @crtc: crtc to wait for vblank on
- *
- * Wait for vblank on the requested crtc (evergreen+).
- */
-static void dce_v6_0_vblank_wait(struct amdgpu_device *adev, int crtc)
-{
-	unsigned i = 100;
-
-	if (crtc >= adev->mode_info.num_crtc)
-		return;
-
-	if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK))
-		return;
-
-	/* depending on when we hit vblank, we may be close to active; if so,
-	 * wait for another frame.
-	 */
-	while (dce_v6_0_is_in_vblank(adev, crtc)) {
-		if (i++ == 100) {
-			i = 0;
-			if (!dce_v6_0_is_counter_moving(adev, crtc))
-				break;
-		}
-	}
-
-	while (!dce_v6_0_is_in_vblank(adev, crtc)) {
-		if (i++ == 100) {
-			i = 0;
-			if (!dce_v6_0_is_counter_moving(adev, crtc))
-				break;
-		}
-	}
-}
-
 static u32 dce_v6_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
 static u32 dce_v6_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
 {
 {
 	if (crtc >= adev->mode_info.num_crtc)
 	if (crtc >= adev->mode_info.num_crtc)
@@ -1108,7 +1050,7 @@ static void dce_v6_0_bandwidth_update(struct amdgpu_device *adev)
 	if (!adev->mode_info.mode_config_initialized)
 	if (!adev->mode_info.mode_config_initialized)
 		return;
 		return;
 
 
-	amdgpu_update_display_priority(adev);
+	amdgpu_display_update_priority(adev);
 
 
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
 		if (adev->mode_info.crtcs[i]->base.enabled)
 		if (adev->mode_info.crtcs[i]->base.enabled)
@@ -2407,9 +2349,9 @@ static const struct drm_crtc_funcs dce_v6_0_crtc_funcs = {
 	.cursor_set2 = dce_v6_0_crtc_cursor_set2,
 	.cursor_set2 = dce_v6_0_crtc_cursor_set2,
 	.cursor_move = dce_v6_0_crtc_cursor_move,
 	.cursor_move = dce_v6_0_crtc_cursor_move,
 	.gamma_set = dce_v6_0_crtc_gamma_set,
 	.gamma_set = dce_v6_0_crtc_gamma_set,
-	.set_config = amdgpu_crtc_set_config,
+	.set_config = amdgpu_display_crtc_set_config,
 	.destroy = dce_v6_0_crtc_destroy,
 	.destroy = dce_v6_0_crtc_destroy,
-	.page_flip_target = amdgpu_crtc_page_flip_target,
+	.page_flip_target = amdgpu_display_crtc_page_flip_target,
 };
 };
 
 
 static void dce_v6_0_crtc_dpms(struct drm_crtc *crtc, int mode)
 static void dce_v6_0_crtc_dpms(struct drm_crtc *crtc, int mode)
@@ -2425,7 +2367,8 @@ static void dce_v6_0_crtc_dpms(struct drm_crtc *crtc, int mode)
 		amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE);
 		amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE);
 		amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
 		amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
 		/* Make sure VBLANK and PFLIP interrupts are still enabled */
 		/* Make sure VBLANK and PFLIP interrupts are still enabled */
-		type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
+		type = amdgpu_display_crtc_idx_to_irq_type(adev,
+						amdgpu_crtc->crtc_id);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
 		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
 		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
 		drm_crtc_vblank_on(crtc);
 		drm_crtc_vblank_on(crtc);
@@ -2562,7 +2505,7 @@ static bool dce_v6_0_crtc_mode_fixup(struct drm_crtc *crtc,
 		amdgpu_crtc->connector = NULL;
 		amdgpu_crtc->connector = NULL;
 		return false;
 		return false;
 	}
 	}
-	if (!amdgpu_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
+	if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
 		return false;
 		return false;
 	if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
 	if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
 		return false;
 		return false;
@@ -2693,9 +2636,9 @@ static int dce_v6_0_sw_init(void *handle)
 	adev->ddev->mode_config.max_height = 16384;
 	adev->ddev->mode_config.max_height = 16384;
 	adev->ddev->mode_config.preferred_depth = 24;
 	adev->ddev->mode_config.preferred_depth = 24;
 	adev->ddev->mode_config.prefer_shadow = 1;
 	adev->ddev->mode_config.prefer_shadow = 1;
-	adev->ddev->mode_config.fb_base = adev->mc.aper_base;
+	adev->ddev->mode_config.fb_base = adev->gmc.aper_base;
 
 
-	r = amdgpu_modeset_create_props(adev);
+	r = amdgpu_display_modeset_create_props(adev);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
@@ -2711,7 +2654,7 @@ static int dce_v6_0_sw_init(void *handle)
 
 
 	ret = amdgpu_atombios_get_connector_info_from_object_table(adev);
 	ret = amdgpu_atombios_get_connector_info_from_object_table(adev);
 	if (ret)
 	if (ret)
-		amdgpu_print_display_setup(adev->ddev);
+		amdgpu_display_print_display_setup(adev->ddev);
 	else
 	else
 		return -EINVAL;
 		return -EINVAL;
 
 
@@ -2787,6 +2730,11 @@ static int dce_v6_0_hw_fini(void *handle)
 
 
 static int dce_v6_0_suspend(void *handle)
 static int dce_v6_0_suspend(void *handle)
 {
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	adev->mode_info.bl_level =
+		amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
 	return dce_v6_0_hw_fini(handle);
 	return dce_v6_0_hw_fini(handle);
 }
 }
 
 
@@ -2795,6 +2743,9 @@ static int dce_v6_0_resume(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int ret;
 	int ret;
 
 
+	amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+							   adev->mode_info.bl_level);
+
 	ret = dce_v6_0_hw_init(handle);
 	ret = dce_v6_0_hw_init(handle);
 
 
 	/* turn on the BL */
 	/* turn on the BL */
@@ -2966,7 +2917,8 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
 {
 {
 	unsigned crtc = entry->src_id - 1;
 	unsigned crtc = entry->src_id - 1;
 	uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
 	uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
-	unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc);
+	unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
+								    crtc);
 
 
 	switch (entry->src_data[0]) {
 	switch (entry->src_data[0]) {
 	case 0: /* vblank */
 	case 0: /* vblank */
@@ -3093,7 +3045,7 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev,
 		tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
 		tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
 		WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
 		WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
 		schedule_work(&adev->hotplug_work);
 		schedule_work(&adev->hotplug_work);
-		DRM_INFO("IH: HPD%d\n", hpd + 1);
+		DRM_DEBUG("IH: HPD%d\n", hpd + 1);
 	}
 	}
 
 
 	return 0;
 	return 0;
@@ -3407,7 +3359,6 @@ static void dce_v6_0_encoder_add(struct amdgpu_device *adev,
 static const struct amdgpu_display_funcs dce_v6_0_display_funcs = {
 static const struct amdgpu_display_funcs dce_v6_0_display_funcs = {
 	.bandwidth_update = &dce_v6_0_bandwidth_update,
 	.bandwidth_update = &dce_v6_0_bandwidth_update,
 	.vblank_get_counter = &dce_v6_0_vblank_get_counter,
 	.vblank_get_counter = &dce_v6_0_vblank_get_counter,
-	.vblank_wait = &dce_v6_0_vblank_wait,
 	.backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
 	.backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
 	.backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
 	.backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
 	.hpd_sense = &dce_v6_0_hpd_sense,
 	.hpd_sense = &dce_v6_0_hpd_sense,

+ 19 - 70
drivers/gpu/drm/amd/amdgpu/dce_v8_0.c

@@ -140,66 +140,6 @@ static void dce_v8_0_audio_endpt_wreg(struct amdgpu_device *adev,
 	spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
 	spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
 }
 }
 
 
-static bool dce_v8_0_is_in_vblank(struct amdgpu_device *adev, int crtc)
-{
-	if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) &
-			CRTC_V_BLANK_START_END__CRTC_V_BLANK_START_MASK)
-		return true;
-	else
-		return false;
-}
-
-static bool dce_v8_0_is_counter_moving(struct amdgpu_device *adev, int crtc)
-{
-	u32 pos1, pos2;
-
-	pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
-	pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
-
-	if (pos1 != pos2)
-		return true;
-	else
-		return false;
-}
-
-/**
- * dce_v8_0_vblank_wait - vblank wait asic callback.
- *
- * @adev: amdgpu_device pointer
- * @crtc: crtc to wait for vblank on
- *
- * Wait for vblank on the requested crtc (evergreen+).
- */
-static void dce_v8_0_vblank_wait(struct amdgpu_device *adev, int crtc)
-{
-	unsigned i = 100;
-
-	if (crtc >= adev->mode_info.num_crtc)
-		return;
-
-	if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK))
-		return;
-
-	/* depending on when we hit vblank, we may be close to active; if so,
-	 * wait for another frame.
-	 */
-	while (dce_v8_0_is_in_vblank(adev, crtc)) {
-		if (i++ == 100) {
-			i = 0;
-			if (!dce_v8_0_is_counter_moving(adev, crtc))
-				break;
-		}
-	}
-
-	while (!dce_v8_0_is_in_vblank(adev, crtc)) {
-		if (i++ == 100) {
-			i = 0;
-			if (!dce_v8_0_is_counter_moving(adev, crtc))
-				break;
-		}
-	}
-}
-
 static u32 dce_v8_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
 static u32 dce_v8_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
 {
 {
 	if (crtc >= adev->mode_info.num_crtc)
 	if (crtc >= adev->mode_info.num_crtc)
@@ -1144,7 +1084,7 @@ static void dce_v8_0_bandwidth_update(struct amdgpu_device *adev)
 	u32 num_heads = 0, lb_size;
 	u32 num_heads = 0, lb_size;
 	int i;
 	int i;
 
 
-	amdgpu_update_display_priority(adev);
+	amdgpu_display_update_priority(adev);
 
 
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
 		if (adev->mode_info.crtcs[i]->base.enabled)
 		if (adev->mode_info.crtcs[i]->base.enabled)
@@ -2421,9 +2361,9 @@ static const struct drm_crtc_funcs dce_v8_0_crtc_funcs = {
 	.cursor_set2 = dce_v8_0_crtc_cursor_set2,
 	.cursor_set2 = dce_v8_0_crtc_cursor_set2,
 	.cursor_move = dce_v8_0_crtc_cursor_move,
 	.cursor_move = dce_v8_0_crtc_cursor_move,
 	.gamma_set = dce_v8_0_crtc_gamma_set,
 	.gamma_set = dce_v8_0_crtc_gamma_set,
-	.set_config = amdgpu_crtc_set_config,
+	.set_config = amdgpu_display_crtc_set_config,
 	.destroy = dce_v8_0_crtc_destroy,
 	.destroy = dce_v8_0_crtc_destroy,
-	.page_flip_target = amdgpu_crtc_page_flip_target,
+	.page_flip_target = amdgpu_display_crtc_page_flip_target,
 };
 };
 
 
 static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
 static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
@@ -2441,7 +2381,8 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
 		amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
 		amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
 		dce_v8_0_vga_enable(crtc, false);
 		dce_v8_0_vga_enable(crtc, false);
 		/* Make sure VBLANK and PFLIP interrupts are still enabled */
 		/* Make sure VBLANK and PFLIP interrupts are still enabled */
-		type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
+		type = amdgpu_display_crtc_idx_to_irq_type(adev,
+						amdgpu_crtc->crtc_id);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
 		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
 		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
 		drm_crtc_vblank_on(crtc);
 		drm_crtc_vblank_on(crtc);
@@ -2587,7 +2528,7 @@ static bool dce_v8_0_crtc_mode_fixup(struct drm_crtc *crtc,
 		amdgpu_crtc->connector = NULL;
 		amdgpu_crtc->connector = NULL;
 		return false;
 		return false;
 	}
 	}
-	if (!amdgpu_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
+	if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
 		return false;
 		return false;
 	if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
 	if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
 		return false;
 		return false;
@@ -2724,9 +2665,9 @@ static int dce_v8_0_sw_init(void *handle)
 	adev->ddev->mode_config.preferred_depth = 24;
 	adev->ddev->mode_config.preferred_depth = 24;
 	adev->ddev->mode_config.prefer_shadow = 1;
 	adev->ddev->mode_config.prefer_shadow = 1;
 
 
-	adev->ddev->mode_config.fb_base = adev->mc.aper_base;
+	adev->ddev->mode_config.fb_base = adev->gmc.aper_base;
 
 
-	r = amdgpu_modeset_create_props(adev);
+	r = amdgpu_display_modeset_create_props(adev);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
@@ -2741,7 +2682,7 @@ static int dce_v8_0_sw_init(void *handle)
 	}
 	}
 
 
 	if (amdgpu_atombios_get_connector_info_from_object_table(adev))
 	if (amdgpu_atombios_get_connector_info_from_object_table(adev))
-		amdgpu_print_display_setup(adev->ddev);
+		amdgpu_display_print_display_setup(adev->ddev);
 	else
 	else
 		return -EINVAL;
 		return -EINVAL;
 
 
@@ -2819,6 +2760,11 @@ static int dce_v8_0_hw_fini(void *handle)
 
 
 static int dce_v8_0_suspend(void *handle)
 static int dce_v8_0_suspend(void *handle)
 {
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	adev->mode_info.bl_level =
+		amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
 	return dce_v8_0_hw_fini(handle);
 	return dce_v8_0_hw_fini(handle);
 }
 }
 
 
@@ -2827,6 +2773,9 @@ static int dce_v8_0_resume(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int ret;
 	int ret;
 
 
+	amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+							   adev->mode_info.bl_level);
+
 	ret = dce_v8_0_hw_init(handle);
 	ret = dce_v8_0_hw_init(handle);
 
 
 	/* turn on the BL */
 	/* turn on the BL */
@@ -3063,7 +3012,8 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev,
 {
 {
 	unsigned crtc = entry->src_id - 1;
 	unsigned crtc = entry->src_id - 1;
 	uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
 	uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
-	unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc);
+	unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
+								    crtc);
 
 
 	switch (entry->src_data[0]) {
 	switch (entry->src_data[0]) {
 	case 0: /* vblank */
 	case 0: /* vblank */
@@ -3491,7 +3441,6 @@ static void dce_v8_0_encoder_add(struct amdgpu_device *adev,
 static const struct amdgpu_display_funcs dce_v8_0_display_funcs = {
 static const struct amdgpu_display_funcs dce_v8_0_display_funcs = {
 	.bandwidth_update = &dce_v8_0_bandwidth_update,
 	.bandwidth_update = &dce_v8_0_bandwidth_update,
 	.vblank_get_counter = &dce_v8_0_vblank_get_counter,
 	.vblank_get_counter = &dce_v8_0_vblank_get_counter,
-	.vblank_wait = &dce_v8_0_vblank_wait,
 	.backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
 	.backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
 	.backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
 	.backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
 	.hpd_sense = &dce_v8_0_hpd_sense,
 	.hpd_sense = &dce_v8_0_hpd_sense,

+ 6 - 19
drivers/gpu/drm/amd/amdgpu/dce_virtual.c

@@ -48,19 +48,6 @@ static void dce_virtual_set_crtc_vblank_interrupt_state(struct amdgpu_device *ad
 							int crtc,
 							int crtc,
 							enum amdgpu_interrupt_state state);
 							enum amdgpu_interrupt_state state);
 
 
-/**
- * dce_virtual_vblank_wait - vblank wait asic callback.
- *
- * @adev: amdgpu_device pointer
- * @crtc: crtc to wait for vblank on
- *
- * Wait for vblank on the requested crtc (evergreen+).
- */
-static void dce_virtual_vblank_wait(struct amdgpu_device *adev, int crtc)
-{
-	return;
-}
-
 static u32 dce_virtual_vblank_get_counter(struct amdgpu_device *adev, int crtc)
 static u32 dce_virtual_vblank_get_counter(struct amdgpu_device *adev, int crtc)
 {
 {
 	return 0;
 	return 0;
@@ -130,9 +117,9 @@ static const struct drm_crtc_funcs dce_virtual_crtc_funcs = {
 	.cursor_set2 = NULL,
 	.cursor_set2 = NULL,
 	.cursor_move = NULL,
 	.cursor_move = NULL,
 	.gamma_set = dce_virtual_crtc_gamma_set,
 	.gamma_set = dce_virtual_crtc_gamma_set,
-	.set_config = amdgpu_crtc_set_config,
+	.set_config = amdgpu_display_crtc_set_config,
 	.destroy = dce_virtual_crtc_destroy,
 	.destroy = dce_virtual_crtc_destroy,
-	.page_flip_target = amdgpu_crtc_page_flip_target,
+	.page_flip_target = amdgpu_display_crtc_page_flip_target,
 };
 };
 
 
 static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode)
 static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode)
@@ -149,7 +136,8 @@ static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode)
 	case DRM_MODE_DPMS_ON:
 	case DRM_MODE_DPMS_ON:
 		amdgpu_crtc->enabled = true;
 		amdgpu_crtc->enabled = true;
 		/* Make sure VBLANK interrupts are still enabled */
 		/* Make sure VBLANK interrupts are still enabled */
-		type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
+		type = amdgpu_display_crtc_idx_to_irq_type(adev,
+						amdgpu_crtc->crtc_id);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
 		drm_crtc_vblank_on(crtc);
 		drm_crtc_vblank_on(crtc);
 		break;
 		break;
@@ -406,9 +394,9 @@ static int dce_virtual_sw_init(void *handle)
 	adev->ddev->mode_config.preferred_depth = 24;
 	adev->ddev->mode_config.preferred_depth = 24;
 	adev->ddev->mode_config.prefer_shadow = 1;
 	adev->ddev->mode_config.prefer_shadow = 1;
 
 
-	adev->ddev->mode_config.fb_base = adev->mc.aper_base;
+	adev->ddev->mode_config.fb_base = adev->gmc.aper_base;
 
 
-	r = amdgpu_modeset_create_props(adev);
+	r = amdgpu_display_modeset_create_props(adev);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
@@ -653,7 +641,6 @@ static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev,
 static const struct amdgpu_display_funcs dce_virtual_display_funcs = {
 static const struct amdgpu_display_funcs dce_virtual_display_funcs = {
 	.bandwidth_update = &dce_virtual_bandwidth_update,
 	.bandwidth_update = &dce_virtual_bandwidth_update,
 	.vblank_get_counter = &dce_virtual_vblank_get_counter,
 	.vblank_get_counter = &dce_virtual_vblank_get_counter,
-	.vblank_wait = &dce_virtual_vblank_wait,
 	.backlight_set_level = NULL,
 	.backlight_set_level = NULL,
 	.backlight_get_level = NULL,
 	.backlight_get_level = NULL,
 	.hpd_sense = &dce_virtual_hpd_sense,
 	.hpd_sense = &dce_virtual_hpd_sense,

+ 11 - 4
drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h → drivers/gpu/drm/amd/amdgpu/emu_soc.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright 2015 Advanced Micro Devices, Inc.
+ * Copyright 2018 Advanced Micro Devices, Inc.
  *
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * copy of this software and associated documentation files (the "Software"),
@@ -20,7 +20,14 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  *
  */
  */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "soc15_hw_ip.h"
+
+int emu_soc_asic_init(struct amdgpu_device *adev)
+{
+	return 0;
+}
 
 
-bool acpi_atcs_functions_supported(void *device, uint32_t index);
-int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise);
-bool acpi_atcs_notify_pcie_device_ready(void *device);

+ 20 - 58
drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c

@@ -38,6 +38,7 @@
 #include "dce/dce_6_0_sh_mask.h"
 #include "dce/dce_6_0_sh_mask.h"
 #include "gca/gfx_7_2_enum.h"
 #include "gca/gfx_7_2_enum.h"
 #include "si_enums.h"
 #include "si_enums.h"
+#include "si.h"
 
 
 static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -1808,17 +1809,6 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
 	return r;
 	return r;
 }
 }
 
 
-static void gfx_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
-{
-	/* flush hdp cache */
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
-				 WRITE_DATA_DST_SEL(0)));
-	amdgpu_ring_write(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL);
-	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, 0x1);
-}
-
 static void gfx_v6_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
 static void gfx_v6_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
 {
 {
 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
@@ -1826,24 +1816,6 @@ static void gfx_v6_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
 		EVENT_INDEX(0));
 		EVENT_INDEX(0));
 }
 }
 
 
-/**
- * gfx_v6_0_ring_emit_hdp_invalidate - emit an hdp invalidate on the cp
- *
- * @adev: amdgpu_device pointer
- * @ridx: amdgpu ring index
- *
- * Emits an hdp invalidate on the cp.
- */
-static void gfx_v6_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
-				 WRITE_DATA_DST_SEL(0)));
-	amdgpu_ring_write(ring, mmHDP_DEBUG0);
-	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, 0x1);
-}
-
 static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
 static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
 				     u64 seq, unsigned flags)
 				     u64 seq, unsigned flags)
 {
 {
@@ -2358,25 +2330,7 @@ static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 {
 {
 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
 
 
-	/* write new base address */
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
-				 WRITE_DATA_DST_SEL(0)));
-	if (vmid < 8) {
-		amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid ));
-	} else {
-		amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vmid - 8)));
-	}
-	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, pd_addr >> 12);
-
-	/* bits 0-15 are the VM contexts0-15 */
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
-				 WRITE_DATA_DST_SEL(0)));
-	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
-	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, 1 << vmid);
+	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
 
 
 	/* wait for the invalidate to complete */
 	/* wait for the invalidate to complete */
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
@@ -2401,6 +2355,18 @@ static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	}
 	}
 }
 }
 
 
+static void gfx_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
+				    uint32_t reg, uint32_t val)
+{
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
+				 WRITE_DATA_DST_SEL(0)));
+	amdgpu_ring_write(ring, reg);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, val);
+}
 
 
 static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev)
 static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev)
 {
 {
@@ -3511,23 +3477,21 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
 	.get_wptr = gfx_v6_0_ring_get_wptr,
 	.get_wptr = gfx_v6_0_ring_get_wptr,
 	.set_wptr = gfx_v6_0_ring_set_wptr_gfx,
 	.set_wptr = gfx_v6_0_ring_set_wptr_gfx,
 	.emit_frame_size =
 	.emit_frame_size =
-		5 + /* gfx_v6_0_ring_emit_hdp_flush */
-		5 + /* gfx_v6_0_ring_emit_hdp_invalidate */
+		5 + 5 + /* hdp flush / invalidate */
 		14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
 		14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
 		7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */
 		7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */
-		17 + 6 + /* gfx_v6_0_ring_emit_vm_flush */
+		SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v6_0_ring_emit_vm_flush */
 		3 + 2, /* gfx_v6_ring_emit_cntxcntl including vgt flush */
 		3 + 2, /* gfx_v6_ring_emit_cntxcntl including vgt flush */
 	.emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
 	.emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
 	.emit_ib = gfx_v6_0_ring_emit_ib,
 	.emit_ib = gfx_v6_0_ring_emit_ib,
 	.emit_fence = gfx_v6_0_ring_emit_fence,
 	.emit_fence = gfx_v6_0_ring_emit_fence,
 	.emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync,
 	.emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync,
 	.emit_vm_flush = gfx_v6_0_ring_emit_vm_flush,
 	.emit_vm_flush = gfx_v6_0_ring_emit_vm_flush,
-	.emit_hdp_flush = gfx_v6_0_ring_emit_hdp_flush,
-	.emit_hdp_invalidate = gfx_v6_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v6_0_ring_test_ring,
 	.test_ring = gfx_v6_0_ring_test_ring,
 	.test_ib = gfx_v6_0_ring_test_ib,
 	.test_ib = gfx_v6_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.emit_cntxcntl = gfx_v6_ring_emit_cntxcntl,
 	.emit_cntxcntl = gfx_v6_ring_emit_cntxcntl,
+	.emit_wreg = gfx_v6_0_ring_emit_wreg,
 };
 };
 
 
 static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
 static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
@@ -3538,21 +3502,19 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
 	.get_wptr = gfx_v6_0_ring_get_wptr,
 	.get_wptr = gfx_v6_0_ring_get_wptr,
 	.set_wptr = gfx_v6_0_ring_set_wptr_compute,
 	.set_wptr = gfx_v6_0_ring_set_wptr_compute,
 	.emit_frame_size =
 	.emit_frame_size =
-		5 + /* gfx_v6_0_ring_emit_hdp_flush */
-		5 + /* gfx_v6_0_ring_emit_hdp_invalidate */
+		5 + 5 + /* hdp flush / invalidate */
 		7 + /* gfx_v6_0_ring_emit_pipeline_sync */
 		7 + /* gfx_v6_0_ring_emit_pipeline_sync */
-		17 + /* gfx_v6_0_ring_emit_vm_flush */
+		SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v6_0_ring_emit_vm_flush */
 		14 + 14 + 14, /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
 		14 + 14 + 14, /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
 	.emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
 	.emit_ib = gfx_v6_0_ring_emit_ib,
 	.emit_ib = gfx_v6_0_ring_emit_ib,
 	.emit_fence = gfx_v6_0_ring_emit_fence,
 	.emit_fence = gfx_v6_0_ring_emit_fence,
 	.emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync,
 	.emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync,
 	.emit_vm_flush = gfx_v6_0_ring_emit_vm_flush,
 	.emit_vm_flush = gfx_v6_0_ring_emit_vm_flush,
-	.emit_hdp_flush = gfx_v6_0_ring_emit_hdp_flush,
-	.emit_hdp_invalidate = gfx_v6_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v6_0_ring_test_ring,
 	.test_ring = gfx_v6_0_ring_test_ring,
 	.test_ib = gfx_v6_0_ring_test_ib,
 	.test_ib = gfx_v6_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.insert_nop = amdgpu_ring_insert_nop,
+	.emit_wreg = gfx_v6_0_ring_emit_wreg,
 };
 };
 
 
 static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev)
 static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev)

+ 23 - 75
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c

@@ -1946,7 +1946,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
 		if (i == 0)
 		if (i == 0)
 			sh_mem_base = 0;
 			sh_mem_base = 0;
 		else
 		else
-			sh_mem_base = adev->mc.shared_aperture_start >> 48;
+			sh_mem_base = adev->gmc.shared_aperture_start >> 48;
 		cik_srbm_select(adev, 0, 0, 0, i);
 		cik_srbm_select(adev, 0, 0, 0, i);
 		/* CP and shaders */
 		/* CP and shaders */
 		WREG32(mmSH_MEM_CONFIG, sh_mem_cfg);
 		WREG32(mmSH_MEM_CONFIG, sh_mem_cfg);
@@ -2147,26 +2147,6 @@ static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
 		EVENT_INDEX(0));
 		EVENT_INDEX(0));
 }
 }
 
 
-
-/**
- * gfx_v7_0_ring_emit_hdp_invalidate - emit an hdp invalidate on the cp
- *
- * @adev: amdgpu_device pointer
- * @ridx: amdgpu ring index
- *
- * Emits an hdp invalidate on the cp.
- */
-static void gfx_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-				 WRITE_DATA_DST_SEL(0) |
-				 WR_CONFIRM));
-	amdgpu_ring_write(ring, mmHDP_DEBUG0);
-	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, 1);
-}
-
 /**
 /**
  * gfx_v7_0_ring_emit_fence_gfx - emit a fence on the gfx ring
  * gfx_v7_0_ring_emit_fence_gfx - emit a fence on the gfx ring
  *
  *
@@ -3243,26 +3223,7 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 {
 {
 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
 
 
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
-				 WRITE_DATA_DST_SEL(0)));
-	if (vmid < 8) {
-		amdgpu_ring_write(ring,
-				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
-	} else {
-		amdgpu_ring_write(ring,
-				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
-	}
-	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, pd_addr >> 12);
-
-	/* bits 0-15 are the VM contexts0-15 */
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-				 WRITE_DATA_DST_SEL(0)));
-	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
-	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, 1 << vmid);
+	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
 
 
 	/* wait for the invalidate to complete */
 	/* wait for the invalidate to complete */
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
@@ -3289,6 +3250,19 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	}
 	}
 }
 }
 
 
+static void gfx_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
+				    uint32_t reg, uint32_t val)
+{
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
+				 WRITE_DATA_DST_SEL(0)));
+	amdgpu_ring_write(ring, reg);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, val);
+}
+
 /*
 /*
  * RLC
  * RLC
  * The RLC is a multi-purpose microengine that handles a
  * The RLC is a multi-purpose microengine that handles a
@@ -4384,34 +4358,8 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
 	case CHIP_KAVERI:
 	case CHIP_KAVERI:
 		adev->gfx.config.max_shader_engines = 1;
 		adev->gfx.config.max_shader_engines = 1;
 		adev->gfx.config.max_tile_pipes = 4;
 		adev->gfx.config.max_tile_pipes = 4;
-		if ((adev->pdev->device == 0x1304) ||
-		    (adev->pdev->device == 0x1305) ||
-		    (adev->pdev->device == 0x130C) ||
-		    (adev->pdev->device == 0x130F) ||
-		    (adev->pdev->device == 0x1310) ||
-		    (adev->pdev->device == 0x1311) ||
-		    (adev->pdev->device == 0x131C)) {
-			adev->gfx.config.max_cu_per_sh = 8;
-			adev->gfx.config.max_backends_per_se = 2;
-		} else if ((adev->pdev->device == 0x1309) ||
-			   (adev->pdev->device == 0x130A) ||
-			   (adev->pdev->device == 0x130D) ||
-			   (adev->pdev->device == 0x1313) ||
-			   (adev->pdev->device == 0x131D)) {
-			adev->gfx.config.max_cu_per_sh = 6;
-			adev->gfx.config.max_backends_per_se = 2;
-		} else if ((adev->pdev->device == 0x1306) ||
-			   (adev->pdev->device == 0x1307) ||
-			   (adev->pdev->device == 0x130B) ||
-			   (adev->pdev->device == 0x130E) ||
-			   (adev->pdev->device == 0x1315) ||
-			   (adev->pdev->device == 0x131B)) {
-			adev->gfx.config.max_cu_per_sh = 4;
-			adev->gfx.config.max_backends_per_se = 1;
-		} else {
-			adev->gfx.config.max_cu_per_sh = 3;
-			adev->gfx.config.max_backends_per_se = 1;
-		}
+		adev->gfx.config.max_cu_per_sh = 8;
+		adev->gfx.config.max_backends_per_se = 2;
 		adev->gfx.config.max_sh_per_se = 1;
 		adev->gfx.config.max_sh_per_se = 1;
 		adev->gfx.config.max_texture_channel_caches = 4;
 		adev->gfx.config.max_texture_channel_caches = 4;
 		adev->gfx.config.max_gprs = 256;
 		adev->gfx.config.max_gprs = 256;
@@ -5115,10 +5063,10 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
 	.emit_frame_size =
 	.emit_frame_size =
 		20 + /* gfx_v7_0_ring_emit_gds_switch */
 		20 + /* gfx_v7_0_ring_emit_gds_switch */
 		7 + /* gfx_v7_0_ring_emit_hdp_flush */
 		7 + /* gfx_v7_0_ring_emit_hdp_flush */
-		5 + /* gfx_v7_0_ring_emit_hdp_invalidate */
+		5 + /* hdp invalidate */
 		12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
 		12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
 		7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
 		7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
-		17 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
+		CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
 		3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
 		3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
 	.emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */
 	.emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */
 	.emit_ib = gfx_v7_0_ring_emit_ib_gfx,
 	.emit_ib = gfx_v7_0_ring_emit_ib_gfx,
@@ -5127,12 +5075,12 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
 	.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
 	.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
 	.emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
 	.emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
 	.emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
 	.emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
-	.emit_hdp_invalidate = gfx_v7_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v7_0_ring_test_ring,
 	.test_ring = gfx_v7_0_ring_test_ring,
 	.test_ib = gfx_v7_0_ring_test_ib,
 	.test_ib = gfx_v7_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
 	.emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
+	.emit_wreg = gfx_v7_0_ring_emit_wreg,
 };
 };
 
 
 static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
 static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
@@ -5146,9 +5094,9 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
 	.emit_frame_size =
 	.emit_frame_size =
 		20 + /* gfx_v7_0_ring_emit_gds_switch */
 		20 + /* gfx_v7_0_ring_emit_gds_switch */
 		7 + /* gfx_v7_0_ring_emit_hdp_flush */
 		7 + /* gfx_v7_0_ring_emit_hdp_flush */
-		5 + /* gfx_v7_0_ring_emit_hdp_invalidate */
+		5 + /* hdp invalidate */
 		7 + /* gfx_v7_0_ring_emit_pipeline_sync */
 		7 + /* gfx_v7_0_ring_emit_pipeline_sync */
-		17 + /* gfx_v7_0_ring_emit_vm_flush */
+		CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */
 		7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
 		7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
 	.emit_ib_size =	4, /* gfx_v7_0_ring_emit_ib_compute */
 	.emit_ib_size =	4, /* gfx_v7_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v7_0_ring_emit_ib_compute,
 	.emit_ib = gfx_v7_0_ring_emit_ib_compute,
@@ -5157,11 +5105,11 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
 	.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
 	.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
 	.emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
 	.emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
 	.emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
 	.emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
-	.emit_hdp_invalidate = gfx_v7_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v7_0_ring_test_ring,
 	.test_ring = gfx_v7_0_ring_test_ring,
 	.test_ib = gfx_v7_0_ring_test_ib,
 	.test_ib = gfx_v7_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.emit_wreg = gfx_v7_0_ring_emit_wreg,
 };
 };
 
 
 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)

+ 31 - 55
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

@@ -3796,7 +3796,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
 			WREG32(mmSH_MEM_CONFIG, tmp);
 			WREG32(mmSH_MEM_CONFIG, tmp);
-			tmp = adev->mc.shared_aperture_start >> 48;
+			tmp = adev->gmc.shared_aperture_start >> 48;
 			WREG32(mmSH_MEM_BASES, tmp);
 			WREG32(mmSH_MEM_BASES, tmp);
 		}
 		}
 
 
@@ -4847,6 +4847,9 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
 		/* reset MQD to a clean status */
 		/* reset MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
+		/* reset ring buffer */
+		ring->wptr = 0;
+		amdgpu_ring_clear_ring(ring);
 	} else {
 	} else {
 		amdgpu_ring_clear_ring(ring);
 		amdgpu_ring_clear_ring(ring);
 	}
 	}
@@ -4921,13 +4924,6 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
 	/* Test KCQs */
 	/* Test KCQs */
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		ring = &adev->gfx.compute_ring[i];
 		ring = &adev->gfx.compute_ring[i];
-		if (adev->in_gpu_reset) {
-			/* move reset ring buffer to here to workaround
-			 * compute ring test failed
-			 */
-			ring->wptr = 0;
-			amdgpu_ring_clear_ring(ring);
-		}
 		ring->ready = true;
 		ring->ready = true;
 		r = amdgpu_ring_test_ring(ring);
 		r = amdgpu_ring_test_ring(ring);
 		if (r)
 		if (r)
@@ -6230,19 +6226,6 @@ static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
 		EVENT_INDEX(0));
 		EVENT_INDEX(0));
 }
 }
 
 
-
-static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-				 WRITE_DATA_DST_SEL(0) |
-				 WR_CONFIRM));
-	amdgpu_ring_write(ring, mmHDP_DEBUG0);
-	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, 1);
-
-}
-
 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 				      struct amdgpu_ib *ib,
 				      struct amdgpu_ib *ib,
 				      unsigned vmid, bool ctx_switch)
 				      unsigned vmid, bool ctx_switch)
@@ -6332,28 +6315,7 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 {
 {
 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
 
 
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
-				 WRITE_DATA_DST_SEL(0)) |
-				 WR_CONFIRM);
-	if (vmid < 8) {
-		amdgpu_ring_write(ring,
-				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
-	} else {
-		amdgpu_ring_write(ring,
-				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
-	}
-	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, pd_addr >> 12);
-
-	/* bits 0-15 are the VM contexts0-15 */
-	/* invalidate the cache */
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-				 WRITE_DATA_DST_SEL(0)));
-	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
-	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, 1 << vmid);
+	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
 
 
 	/* wait for the invalidate to complete */
 	/* wait for the invalidate to complete */
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
@@ -6617,8 +6579,22 @@ static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
 				  uint32_t val)
 				  uint32_t val)
 {
 {
+	uint32_t cmd;
+
+	switch (ring->funcs->type) {
+	case AMDGPU_RING_TYPE_GFX:
+		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+		break;
+	case AMDGPU_RING_TYPE_KIQ:
+		cmd = 1 << 16; /* no inc addr */
+		break;
+	default:
+		cmd = WR_CONFIRM;
+		break;
+	}
+
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
+	amdgpu_ring_write(ring, cmd);
 	amdgpu_ring_write(ring, reg);
 	amdgpu_ring_write(ring, reg);
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, val);
 	amdgpu_ring_write(ring, val);
@@ -6871,7 +6847,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 	.emit_frame_size = /* maximum 215dw if count 16 IBs in */
 	.emit_frame_size = /* maximum 215dw if count 16 IBs in */
 		5 +  /* COND_EXEC */
 		5 +  /* COND_EXEC */
 		7 +  /* PIPELINE_SYNC */
 		7 +  /* PIPELINE_SYNC */
-		19 + /* VM_FLUSH */
+		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
 		8 +  /* FENCE for VM_FLUSH */
 		8 +  /* FENCE for VM_FLUSH */
 		20 + /* GDS switch */
 		20 + /* GDS switch */
 		4 + /* double SWITCH_BUFFER,
 		4 + /* double SWITCH_BUFFER,
@@ -6893,7 +6869,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
-	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v8_0_ring_test_ring,
 	.test_ring = gfx_v8_0_ring_test_ring,
 	.test_ib = gfx_v8_0_ring_test_ib,
 	.test_ib = gfx_v8_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.insert_nop = amdgpu_ring_insert_nop,
@@ -6902,6 +6877,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
 	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
 	.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
 	.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
 	.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
 	.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
+	.emit_wreg = gfx_v8_0_ring_emit_wreg,
 };
 };
 
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
@@ -6915,9 +6891,9 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
 	.emit_frame_size =
 	.emit_frame_size =
 		20 + /* gfx_v8_0_ring_emit_gds_switch */
 		20 + /* gfx_v8_0_ring_emit_gds_switch */
 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
-		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
+		5 + /* hdp_invalidate */
 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
-		17 + /* gfx_v8_0_ring_emit_vm_flush */
+		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
@@ -6926,12 +6902,12 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
-	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v8_0_ring_test_ring,
 	.test_ring = gfx_v8_0_ring_test_ring,
 	.test_ib = gfx_v8_0_ring_test_ib,
 	.test_ib = gfx_v8_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.set_priority = gfx_v8_0_ring_set_priority_compute,
 	.set_priority = gfx_v8_0_ring_set_priority_compute,
+	.emit_wreg = gfx_v8_0_ring_emit_wreg,
 };
 };
 
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
@@ -6945,7 +6921,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
 	.emit_frame_size =
 	.emit_frame_size =
 		20 + /* gfx_v8_0_ring_emit_gds_switch */
 		20 + /* gfx_v8_0_ring_emit_gds_switch */
 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
-		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
+		5 + /* hdp_invalidate */
 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
 		17 + /* gfx_v8_0_ring_emit_vm_flush */
 		17 + /* gfx_v8_0_ring_emit_vm_flush */
 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
@@ -7151,12 +7127,12 @@ static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
 	} ce_payload = {};
 	} ce_payload = {};
 
 
 	if (ring->adev->virt.chained_ib_support) {
 	if (ring->adev->virt.chained_ib_support) {
-		ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
-						  offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
+		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
+			offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
 		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
 		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
 	} else {
 	} else {
-		ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
-						  offsetof(struct vi_gfx_meta_data, ce_payload);
+		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
+			offsetof(struct vi_gfx_meta_data, ce_payload);
 		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
 		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
 	}
 	}
 
 
@@ -7179,7 +7155,7 @@ static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
 		struct vi_de_ib_state_chained_ib chained;
 		struct vi_de_ib_state_chained_ib chained;
 	} de_payload = {};
 	} de_payload = {};
 
 
-	csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
+	csa_addr = amdgpu_csa_vaddr(ring->adev);
 	gds_addr = csa_addr + 4096;
 	gds_addr = csa_addr + 4096;
 	if (ring->adev->virt.chained_ib_support) {
 	if (ring->adev->virt.chained_ib_support) {
 		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
 		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);

+ 210 - 99
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

@@ -271,58 +271,65 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
 
 
 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 {
-        struct amdgpu_device *adev = ring->adev;
-        struct amdgpu_ib ib;
-        struct dma_fence *f = NULL;
-        uint32_t scratch;
-        uint32_t tmp = 0;
-        long r;
-
-        r = amdgpu_gfx_scratch_get(adev, &scratch);
-        if (r) {
-                DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
-                return r;
-        }
-        WREG32(scratch, 0xCAFEDEAD);
-        memset(&ib, 0, sizeof(ib));
-        r = amdgpu_ib_get(adev, NULL, 256, &ib);
-        if (r) {
-                DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
-                goto err1;
-        }
-        ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
-        ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
-        ib.ptr[2] = 0xDEADBEEF;
-        ib.length_dw = 3;
-
-        r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
-        if (r)
-                goto err2;
-
-        r = dma_fence_wait_timeout(f, false, timeout);
-        if (r == 0) {
-                DRM_ERROR("amdgpu: IB test timed out.\n");
-                r = -ETIMEDOUT;
-                goto err2;
-        } else if (r < 0) {
-                DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
-                goto err2;
-        }
-        tmp = RREG32(scratch);
-        if (tmp == 0xDEADBEEF) {
-                DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
-                r = 0;
-        } else {
-                DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
-                          scratch, tmp);
-                r = -EINVAL;
-        }
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ib ib;
+	struct dma_fence *f = NULL;
+
+	unsigned index;
+	uint64_t gpu_addr;
+	uint32_t tmp;
+	long r;
+
+	r = amdgpu_device_wb_get(adev, &index);
+	if (r) {
+		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+		return r;
+	}
+
+	gpu_addr = adev->wb.gpu_addr + (index * 4);
+	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+	memset(&ib, 0, sizeof(ib));
+	r = amdgpu_ib_get(adev, NULL, 16, &ib);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+		goto err1;
+	}
+	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+	ib.ptr[2] = lower_32_bits(gpu_addr);
+	ib.ptr[3] = upper_32_bits(gpu_addr);
+	ib.ptr[4] = 0xDEADBEEF;
+	ib.length_dw = 5;
+
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+	if (r)
+		goto err2;
+
+	r = dma_fence_wait_timeout(f, false, timeout);
+	if (r == 0) {
+			DRM_ERROR("amdgpu: IB test timed out.\n");
+			r = -ETIMEDOUT;
+			goto err2;
+	} else if (r < 0) {
+			DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+			goto err2;
+	}
+
+	tmp = adev->wb.wb[index];
+	if (tmp == 0xDEADBEEF) {
+			DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
+			r = 0;
+	} else {
+			DRM_ERROR("ib test on ring %d failed\n", ring->idx);
+			r = -EINVAL;
+	}
+
 err2:
 err2:
-        amdgpu_ib_free(adev, &ib, NULL);
-        dma_fence_put(f);
+	amdgpu_ib_free(adev, &ib, NULL);
+	dma_fence_put(f);
 err1:
 err1:
-        amdgpu_gfx_scratch_free(adev, scratch);
-        return r;
+	amdgpu_device_wb_free(adev, index);
+	return r;
 }
 }
 
 
 
 
@@ -1254,23 +1261,23 @@ static int gfx_v9_0_sw_init(void *handle)
 	adev->gfx.mec.num_queue_per_pipe = 8;
 	adev->gfx.mec.num_queue_per_pipe = 8;
 
 
 	/* KIQ event */
 	/* KIQ event */
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq);
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
 	/* EOP Event */
 	/* EOP Event */
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq);
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
 	/* Privileged reg */
 	/* Privileged reg */
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 184,
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 184,
 			      &adev->gfx.priv_reg_irq);
 			      &adev->gfx.priv_reg_irq);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
 	/* Privileged inst */
 	/* Privileged inst */
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 185,
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 185,
 			      &adev->gfx.priv_inst_irq);
 			      &adev->gfx.priv_inst_irq);
 	if (r)
 	if (r)
 		return r;
 		return r;
@@ -1539,7 +1546,7 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
 			WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
 			WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
-			tmp = adev->mc.shared_aperture_start >> 48;
+			tmp = adev->gmc.shared_aperture_start >> 48;
 			WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
 			WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
 		}
 		}
 	}
 	}
@@ -2954,7 +2961,13 @@ static int gfx_v9_0_hw_fini(void *handle)
 		gfx_v9_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
 		gfx_v9_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
 
 
 	if (amdgpu_sriov_vf(adev)) {
 	if (amdgpu_sriov_vf(adev)) {
-		pr_debug("For SRIOV client, shouldn't do anything.\n");
+		gfx_v9_0_cp_gfx_enable(adev, false);
+		/* must disable polling for SRIOV when hw finished, otherwise
+		 * CPC engine may still keep fetching WB address which is already
+		 * invalid after sw finished and trigger DMAR reading error in
+		 * hypervisor side.
+		 */
+		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
 		return 0;
 		return 0;
 	}
 	}
 	gfx_v9_0_cp_enable(adev, false);
 	gfx_v9_0_cp_enable(adev, false);
@@ -3585,14 +3598,6 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 			      ref_and_mask, ref_and_mask, 0x20);
 			      ref_and_mask, ref_and_mask, 0x20);
 }
 }
 
 
-static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	gfx_v9_0_write_data_to_reg(ring, 0, true,
-				   SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
-}
-
 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
                                       struct amdgpu_ib *ib,
                                       struct amdgpu_ib *ib,
                                       unsigned vmid, bool ctx_switch)
                                       unsigned vmid, bool ctx_switch)
@@ -3686,32 +3691,10 @@ static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					unsigned vmid, uint64_t pd_addr)
 					unsigned vmid, uint64_t pd_addr)
 {
 {
-	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
-	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
-	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
-	uint64_t flags = AMDGPU_PTE_VALID;
-	unsigned eng = ring->vm_inv_eng;
-
-	amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
-	pd_addr |= flags;
-
-	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-				   hub->ctx0_ptb_addr_lo32 + (2 * vmid),
-				   lower_32_bits(pd_addr));
-
-	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-				   hub->ctx0_ptb_addr_hi32 + (2 * vmid),
-				   upper_32_bits(pd_addr));
-
-	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-				   hub->vm_inv_eng0_req + eng, req);
-
-	/* wait for the invalidate to complete */
-	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
-			      eng, 0, 1 << vmid, 1 << vmid, 0x20);
+	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
 
 
 	/* compute doesn't have PFP */
 	/* compute doesn't have PFP */
-	if (usepfp) {
+	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
 		amdgpu_ring_write(ring, 0x0);
 		amdgpu_ring_write(ring, 0x0);
@@ -3735,6 +3718,105 @@ static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
 	return wptr;
 	return wptr;
 }
 }
 
 
+static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
+					   bool acquire)
+{
+	struct amdgpu_device *adev = ring->adev;
+	int pipe_num, tmp, reg;
+	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
+
+	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
+
+	/* first me only has 2 entries, GFX and HP3D */
+	if (ring->me > 0)
+		pipe_num -= 2;
+
+	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
+	tmp = RREG32(reg);
+	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
+	WREG32(reg, tmp);
+}
+
+static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
+					    struct amdgpu_ring *ring,
+					    bool acquire)
+{
+	int i, pipe;
+	bool reserve;
+	struct amdgpu_ring *iring;
+
+	mutex_lock(&adev->gfx.pipe_reserve_mutex);
+	pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
+	if (acquire)
+		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+	else
+		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+
+	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
+		/* Clear all reservations - everyone reacquires all resources */
+		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
+			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
+						       true);
+
+		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
+			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
+						       true);
+	} else {
+		/* Lower all pipes without a current reservation */
+		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
+			iring = &adev->gfx.gfx_ring[i];
+			pipe = amdgpu_gfx_queue_to_bit(adev,
+						       iring->me,
+						       iring->pipe,
+						       0);
+			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
+		}
+
+		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
+			iring = &adev->gfx.compute_ring[i];
+			pipe = amdgpu_gfx_queue_to_bit(adev,
+						       iring->me,
+						       iring->pipe,
+						       0);
+			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
+		}
+	}
+
+	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
+}
+
+static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
+				      struct amdgpu_ring *ring,
+				      bool acquire)
+{
+	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
+	uint32_t queue_priority = acquire ? 0xf : 0x0;
+
+	mutex_lock(&adev->srbm_mutex);
+	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+	WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
+	WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
+
+	soc15_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
+					       enum drm_sched_priority priority)
+{
+	struct amdgpu_device *adev = ring->adev;
+	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
+
+	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
+		return;
+
+	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
+	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
+}
+
 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
 {
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_device *adev = ring->adev;
@@ -3788,7 +3870,7 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
 	int cnt;
 	int cnt;
 
 
 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
-	csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
+	csa_addr = amdgpu_csa_vaddr(ring->adev);
 
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
@@ -3806,7 +3888,7 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
 	uint64_t csa_addr, gds_addr;
 	uint64_t csa_addr, gds_addr;
 	int cnt;
 	int cnt;
 
 
-	csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
+	csa_addr = amdgpu_csa_vaddr(ring->adev);
 	gds_addr = csa_addr + 4096;
 	gds_addr = csa_addr + 4096;
 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
@@ -3904,15 +3986,34 @@ static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
 }
 }
 
 
 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
-				  uint32_t val)
+				    uint32_t val)
 {
 {
+	uint32_t cmd = 0;
+
+	switch (ring->funcs->type) {
+	case AMDGPU_RING_TYPE_GFX:
+		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+		break;
+	case AMDGPU_RING_TYPE_KIQ:
+		cmd = (1 << 16); /* no inc addr */
+		break;
+	default:
+		cmd = WR_CONFIRM;
+		break;
+	}
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
+	amdgpu_ring_write(ring, cmd);
 	amdgpu_ring_write(ring, reg);
 	amdgpu_ring_write(ring, reg);
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, val);
 	amdgpu_ring_write(ring, val);
 }
 }
 
 
+static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+					uint32_t val, uint32_t mask)
+{
+	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 						 enum amdgpu_interrupt_state state)
 						 enum amdgpu_interrupt_state state)
 {
 {
@@ -4199,7 +4300,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
 		5 +  /* COND_EXEC */
 		5 +  /* COND_EXEC */
 		7 +  /* PIPELINE_SYNC */
 		7 +  /* PIPELINE_SYNC */
-		24 + /* VM_FLUSH */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		2 + /* VM_FLUSH */
 		8 +  /* FENCE for VM_FLUSH */
 		8 +  /* FENCE for VM_FLUSH */
 		20 + /* GDS switch */
 		20 + /* GDS switch */
 		4 + /* double SWITCH_BUFFER,
 		4 + /* double SWITCH_BUFFER,
@@ -4221,7 +4324,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
-	.emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v9_0_ring_test_ring,
 	.test_ring = gfx_v9_0_ring_test_ring,
 	.test_ib = gfx_v9_0_ring_test_ib,
 	.test_ib = gfx_v9_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.insert_nop = amdgpu_ring_insert_nop,
@@ -4231,6 +4333,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
+	.emit_wreg = gfx_v9_0_ring_emit_wreg,
+	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 };
 };
 
 
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
@@ -4245,9 +4349,11 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 	.emit_frame_size =
 	.emit_frame_size =
 		20 + /* gfx_v9_0_ring_emit_gds_switch */
 		20 + /* gfx_v9_0_ring_emit_gds_switch */
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
-		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
+		5 + /* hdp invalidate */
 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
-		24 + /* gfx_v9_0_ring_emit_vm_flush */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		2 + /* gfx_v9_0_ring_emit_vm_flush */
 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
@@ -4256,11 +4362,13 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
-	.emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v9_0_ring_test_ring,
 	.test_ring = gfx_v9_0_ring_test_ring,
 	.test_ib = gfx_v9_0_ring_test_ib,
 	.test_ib = gfx_v9_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.set_priority = gfx_v9_0_ring_set_priority_compute,
+	.emit_wreg = gfx_v9_0_ring_emit_wreg,
+	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 };
 };
 
 
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
@@ -4275,9 +4383,11 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 	.emit_frame_size =
 	.emit_frame_size =
 		20 + /* gfx_v9_0_ring_emit_gds_switch */
 		20 + /* gfx_v9_0_ring_emit_gds_switch */
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
-		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
+		5 + /* hdp invalidate */
 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
-		24 + /* gfx_v9_0_ring_emit_vm_flush */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		2 + /* gfx_v9_0_ring_emit_vm_flush */
 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
@@ -4288,6 +4398,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
+	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 };
 };
 
 
 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)

+ 14 - 14
drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c

@@ -40,7 +40,7 @@ static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
 	uint64_t value;
 	uint64_t value;
 
 
 	BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
 	BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
-	value = adev->gart.table_addr - adev->mc.vram_start
+	value = adev->gart.table_addr - adev->gmc.vram_start
 		+ adev->vm_manager.vram_base_offset;
 		+ adev->vm_manager.vram_base_offset;
 	value &= 0x0000FFFFFFFFF000ULL;
 	value &= 0x0000FFFFFFFFF000ULL;
 	value |= 0x1; /*valid bit*/
 	value |= 0x1; /*valid bit*/
@@ -57,14 +57,14 @@ static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
 	gfxhub_v1_0_init_gart_pt_regs(adev);
 	gfxhub_v1_0_init_gart_pt_regs(adev);
 
 
 	WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
 	WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
-		     (u32)(adev->mc.gart_start >> 12));
+		     (u32)(adev->gmc.gart_start >> 12));
 	WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
 	WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
-		     (u32)(adev->mc.gart_start >> 44));
+		     (u32)(adev->gmc.gart_start >> 44));
 
 
 	WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
 	WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
-		     (u32)(adev->mc.gart_end >> 12));
+		     (u32)(adev->gmc.gart_end >> 12));
 	WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
 	WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
-		     (u32)(adev->mc.gart_end >> 44));
+		     (u32)(adev->gmc.gart_end >> 44));
 }
 }
 
 
 static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
@@ -78,12 +78,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 
 
 	/* Program the system aperture low logical page number. */
 	/* Program the system aperture low logical page number. */
 	WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
 	WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
-		     adev->mc.vram_start >> 18);
+		     adev->gmc.vram_start >> 18);
 	WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
 	WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
-		     adev->mc.vram_end >> 18);
+		     adev->gmc.vram_end >> 18);
 
 
 	/* Set default page address. */
 	/* Set default page address. */
-	value = adev->vram_scratch.gpu_addr - adev->mc.vram_start
+	value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start
 		+ adev->vm_manager.vram_base_offset;
 		+ adev->vm_manager.vram_base_offset;
 	WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
 	WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
 		     (u32)(value >> 12));
 		     (u32)(value >> 12));
@@ -92,9 +92,9 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 
 
 	/* Program "protection fault". */
 	/* Program "protection fault". */
 	WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
 	WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
-		     (u32)(adev->dummy_page.addr >> 12));
+		     (u32)(adev->dummy_page_addr >> 12));
 	WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
 	WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
-		     (u32)((u64)adev->dummy_page.addr >> 44));
+		     (u32)((u64)adev->dummy_page_addr >> 44));
 
 
 	WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2,
 	WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2,
 		       ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
 		       ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
@@ -143,7 +143,7 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
 	WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
 	WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
 
 
 	tmp = mmVM_L2_CNTL3_DEFAULT;
 	tmp = mmVM_L2_CNTL3_DEFAULT;
-	if (adev->mc.translate_further) {
+	if (adev->gmc.translate_further) {
 		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
 		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
 		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
 		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
 				    L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
 				    L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
@@ -195,7 +195,7 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
 
 
 	num_level = adev->vm_manager.num_level;
 	num_level = adev->vm_manager.num_level;
 	block_size = adev->vm_manager.block_size;
 	block_size = adev->vm_manager.block_size;
-	if (adev->mc.translate_further)
+	if (adev->gmc.translate_further)
 		num_level -= 1;
 		num_level -= 1;
 	else
 	else
 		block_size -= 9;
 		block_size -= 9;
@@ -257,9 +257,9 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
 		 * SRIOV driver need to program them
 		 * SRIOV driver need to program them
 		 */
 		 */
 		WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE,
 		WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE,
-			     adev->mc.vram_start >> 24);
+			     adev->gmc.vram_start >> 24);
 		WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP,
 		WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP,
-			     adev->mc.vram_end >> 24);
+			     adev->gmc.vram_end >> 24);
 	}
 	}
 
 
 	/* GART Enable. */
 	/* GART Enable. */

+ 76 - 61
drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c

@@ -37,7 +37,7 @@
 #include "dce/dce_6_0_sh_mask.h"
 #include "dce/dce_6_0_sh_mask.h"
 #include "si_enums.h"
 #include "si_enums.h"
 
 
-static void gmc_v6_0_set_gart_funcs(struct amdgpu_device *adev);
+static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev);
 static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev);
 static int gmc_v6_0_wait_for_idle(void *handle);
 static int gmc_v6_0_wait_for_idle(void *handle);
 
 
@@ -137,19 +137,19 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev)
 		snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
 		snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
 	else
 	else
 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
-	err = request_firmware(&adev->mc.fw, fw_name, adev->dev);
+	err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);
 	if (err)
 	if (err)
 		goto out;
 		goto out;
 
 
-	err = amdgpu_ucode_validate(adev->mc.fw);
+	err = amdgpu_ucode_validate(adev->gmc.fw);
 
 
 out:
 out:
 	if (err) {
 	if (err) {
 		dev_err(adev->dev,
 		dev_err(adev->dev,
 		       "si_mc: Failed to load firmware \"%s\"\n",
 		       "si_mc: Failed to load firmware \"%s\"\n",
 		       fw_name);
 		       fw_name);
-		release_firmware(adev->mc.fw);
-		adev->mc.fw = NULL;
+		release_firmware(adev->gmc.fw);
+		adev->gmc.fw = NULL;
 	}
 	}
 	return err;
 	return err;
 }
 }
@@ -162,20 +162,20 @@ static int gmc_v6_0_mc_load_microcode(struct amdgpu_device *adev)
 	int i, regs_size, ucode_size;
 	int i, regs_size, ucode_size;
 	const struct mc_firmware_header_v1_0 *hdr;
 	const struct mc_firmware_header_v1_0 *hdr;
 
 
-	if (!adev->mc.fw)
+	if (!adev->gmc.fw)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data;
+	hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data;
 
 
 	amdgpu_ucode_print_mc_hdr(&hdr->header);
 	amdgpu_ucode_print_mc_hdr(&hdr->header);
 
 
-	adev->mc.fw_version = le32_to_cpu(hdr->header.ucode_version);
+	adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version);
 	regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
 	regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
 	new_io_mc_regs = (const __le32 *)
 	new_io_mc_regs = (const __le32 *)
-		(adev->mc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
+		(adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
 	ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
 	ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
 	new_fw_data = (const __le32 *)
 	new_fw_data = (const __le32 *)
-		(adev->mc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+		(adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 
 
 	running = RREG32(mmMC_SEQ_SUP_CNTL) & MC_SEQ_SUP_CNTL__RUN_MASK;
 	running = RREG32(mmMC_SEQ_SUP_CNTL) & MC_SEQ_SUP_CNTL__RUN_MASK;
 
 
@@ -218,12 +218,12 @@ static int gmc_v6_0_mc_load_microcode(struct amdgpu_device *adev)
 }
 }
 
 
 static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev,
 static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev,
-				       struct amdgpu_mc *mc)
+				       struct amdgpu_gmc *mc)
 {
 {
 	u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
 	u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
 	base <<= 24;
 	base <<= 24;
 
 
-	amdgpu_device_vram_location(adev, &adev->mc, base);
+	amdgpu_device_vram_location(adev, &adev->gmc, base);
 	amdgpu_device_gart_location(adev, mc);
 	amdgpu_device_gart_location(adev, mc);
 }
 }
 
 
@@ -260,9 +260,9 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
 	}
 	}
 	/* Update configuration */
 	/* Update configuration */
 	WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
 	WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
-	       adev->mc.vram_start >> 12);
+	       adev->gmc.vram_start >> 12);
 	WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
 	WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
-	       adev->mc.vram_end >> 12);
+	       adev->gmc.vram_end >> 12);
 	WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
 	WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
 	       adev->vram_scratch.gpu_addr >> 12);
 	       adev->vram_scratch.gpu_addr >> 12);
 	WREG32(mmMC_VM_AGP_BASE, 0);
 	WREG32(mmMC_VM_AGP_BASE, 0);
@@ -320,56 +320,69 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
 		numchan = 16;
 		numchan = 16;
 		break;
 		break;
 	}
 	}
-	adev->mc.vram_width = numchan * chansize;
+	adev->gmc.vram_width = numchan * chansize;
 	/* size in MB on si */
 	/* size in MB on si */
-	adev->mc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
-	adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+	adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+	adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
 
 
 	if (!(adev->flags & AMD_IS_APU)) {
 	if (!(adev->flags & AMD_IS_APU)) {
 		r = amdgpu_device_resize_fb_bar(adev);
 		r = amdgpu_device_resize_fb_bar(adev);
 		if (r)
 		if (r)
 			return r;
 			return r;
 	}
 	}
-	adev->mc.aper_base = pci_resource_start(adev->pdev, 0);
-	adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
-	adev->mc.visible_vram_size = adev->mc.aper_size;
+	adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+	adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+	adev->gmc.visible_vram_size = adev->gmc.aper_size;
 
 
 	/* set the gart size */
 	/* set the gart size */
 	if (amdgpu_gart_size == -1) {
 	if (amdgpu_gart_size == -1) {
 		switch (adev->asic_type) {
 		switch (adev->asic_type) {
 		case CHIP_HAINAN:    /* no MM engines */
 		case CHIP_HAINAN:    /* no MM engines */
 		default:
 		default:
-			adev->mc.gart_size = 256ULL << 20;
+			adev->gmc.gart_size = 256ULL << 20;
 			break;
 			break;
 		case CHIP_VERDE:    /* UVD, VCE do not support GPUVM */
 		case CHIP_VERDE:    /* UVD, VCE do not support GPUVM */
 		case CHIP_TAHITI:   /* UVD, VCE do not support GPUVM */
 		case CHIP_TAHITI:   /* UVD, VCE do not support GPUVM */
 		case CHIP_PITCAIRN: /* UVD, VCE do not support GPUVM */
 		case CHIP_PITCAIRN: /* UVD, VCE do not support GPUVM */
 		case CHIP_OLAND:    /* UVD, VCE do not support GPUVM */
 		case CHIP_OLAND:    /* UVD, VCE do not support GPUVM */
-			adev->mc.gart_size = 1024ULL << 20;
+			adev->gmc.gart_size = 1024ULL << 20;
 			break;
 			break;
 		}
 		}
 	} else {
 	} else {
-		adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
+		adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
 	}
 	}
 
 
-	gmc_v6_0_vram_gtt_location(adev, &adev->mc);
+	gmc_v6_0_vram_gtt_location(adev, &adev->gmc);
 
 
 	return 0;
 	return 0;
 }
 }
 
 
-static void gmc_v6_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
-					uint32_t vmid)
+static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid)
 {
 {
-	WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
-
 	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 }
 }
 
 
-static int gmc_v6_0_gart_set_pte_pde(struct amdgpu_device *adev,
-				     void *cpu_pt_addr,
-				     uint32_t gpu_page_idx,
-				     uint64_t addr,
-				     uint64_t flags)
+static uint64_t gmc_v6_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+					    unsigned vmid, uint64_t pd_addr)
+{
+	uint32_t reg;
+
+	/* write new base address */
+	if (vmid < 8)
+		reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid;
+	else
+		reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vmid - 8);
+	amdgpu_ring_emit_wreg(ring, reg, pd_addr >> 12);
+
+	/* bits 0-15 are the VM contexts0-15 */
+	amdgpu_ring_emit_wreg(ring, mmVM_INVALIDATE_REQUEST, 1 << vmid);
+
+	return pd_addr;
+}
+
+static int gmc_v6_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
+				uint32_t gpu_page_idx, uint64_t addr,
+				uint64_t flags)
 {
 {
 	void __iomem *ptr = (void *)cpu_pt_addr;
 	void __iomem *ptr = (void *)cpu_pt_addr;
 	uint64_t value;
 	uint64_t value;
@@ -433,9 +446,9 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
 {
 {
 	u32 tmp;
 	u32 tmp;
 
 
-	if (enable && !adev->mc.prt_warning) {
+	if (enable && !adev->gmc.prt_warning) {
 		dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n");
 		dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n");
-		adev->mc.prt_warning = true;
+		adev->gmc.prt_warning = true;
 	}
 	}
 
 
 	tmp = RREG32(mmVM_PRT_CNTL);
 	tmp = RREG32(mmVM_PRT_CNTL);
@@ -455,7 +468,8 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
 
 
 	if (enable) {
 	if (enable) {
 		uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
 		uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
-		uint32_t high = adev->vm_manager.max_pfn;
+		uint32_t high = adev->vm_manager.max_pfn -
+			(AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
 
 
 		WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
 		WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
 		WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
 		WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
@@ -515,11 +529,11 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
 	       (field << VM_L2_CNTL3__BANK_SELECT__SHIFT) |
 	       (field << VM_L2_CNTL3__BANK_SELECT__SHIFT) |
 	       (field << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT));
 	       (field << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT));
 	/* setup context0 */
 	/* setup context0 */
-	WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12);
-	WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12);
+	WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12);
+	WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12);
 	WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
 	WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
 	WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
 	WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
-			(u32)(adev->dummy_page.addr >> 12));
+			(u32)(adev->dummy_page_addr >> 12));
 	WREG32(mmVM_CONTEXT0_CNTL2, 0);
 	WREG32(mmVM_CONTEXT0_CNTL2, 0);
 	WREG32(mmVM_CONTEXT0_CNTL,
 	WREG32(mmVM_CONTEXT0_CNTL,
 	       VM_CONTEXT0_CNTL__ENABLE_CONTEXT_MASK |
 	       VM_CONTEXT0_CNTL__ENABLE_CONTEXT_MASK |
@@ -549,7 +563,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
 
 
 	/* enable context1-15 */
 	/* enable context1-15 */
 	WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
 	WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
-	       (u32)(adev->dummy_page.addr >> 12));
+	       (u32)(adev->dummy_page_addr >> 12));
 	WREG32(mmVM_CONTEXT1_CNTL2, 4);
 	WREG32(mmVM_CONTEXT1_CNTL2, 4);
 	WREG32(mmVM_CONTEXT1_CNTL,
 	WREG32(mmVM_CONTEXT1_CNTL,
 	       VM_CONTEXT1_CNTL__ENABLE_CONTEXT_MASK |
 	       VM_CONTEXT1_CNTL__ENABLE_CONTEXT_MASK |
@@ -561,9 +575,9 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
 	else
 	else
 		gmc_v6_0_set_fault_enable_default(adev, true);
 		gmc_v6_0_set_fault_enable_default(adev, true);
 
 
-	gmc_v6_0_gart_flush_gpu_tlb(adev, 0);
+	gmc_v6_0_flush_gpu_tlb(adev, 0);
 	dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
 	dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
-		 (unsigned)(adev->mc.gart_size >> 20),
+		 (unsigned)(adev->gmc.gart_size >> 20),
 		 (unsigned long long)adev->gart.table_addr);
 		 (unsigned long long)adev->gart.table_addr);
 	adev->gart.ready = true;
 	adev->gart.ready = true;
 	return 0;
 	return 0;
@@ -795,7 +809,7 @@ static int gmc_v6_0_early_init(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
-	gmc_v6_0_set_gart_funcs(adev);
+	gmc_v6_0_set_gmc_funcs(adev);
 	gmc_v6_0_set_irq_funcs(adev);
 	gmc_v6_0_set_irq_funcs(adev);
 
 
 	return 0;
 	return 0;
@@ -806,7 +820,7 @@ static int gmc_v6_0_late_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
 	if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
 	if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
-		return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
+		return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
 	else
 	else
 		return 0;
 		return 0;
 }
 }
@@ -818,26 +832,26 @@ static int gmc_v6_0_sw_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
 	if (adev->flags & AMD_IS_APU) {
 	if (adev->flags & AMD_IS_APU) {
-		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+		adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
 	} else {
 	} else {
 		u32 tmp = RREG32(mmMC_SEQ_MISC0);
 		u32 tmp = RREG32(mmMC_SEQ_MISC0);
 		tmp &= MC_SEQ_MISC0__MT__MASK;
 		tmp &= MC_SEQ_MISC0__MT__MASK;
-		adev->mc.vram_type = gmc_v6_0_convert_vram_type(tmp);
+		adev->gmc.vram_type = gmc_v6_0_convert_vram_type(tmp);
 	}
 	}
 
 
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault);
+	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->mc.vm_fault);
+	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
 	amdgpu_vm_adjust_size(adev, 64, 9, 1, 40);
 	amdgpu_vm_adjust_size(adev, 64, 9, 1, 40);
 
 
-	adev->mc.mc_mask = 0xffffffffffULL;
+	adev->gmc.mc_mask = 0xffffffffffULL;
 
 
-	adev->mc.stolen_size = 256 * 1024;
+	adev->gmc.stolen_size = 256 * 1024;
 
 
 	adev->need_dma32 = false;
 	adev->need_dma32 = false;
 	dma_bits = adev->need_dma32 ? 32 : 40;
 	dma_bits = adev->need_dma32 ? 32 : 40;
@@ -902,8 +916,8 @@ static int gmc_v6_0_sw_fini(void *handle)
 	amdgpu_vm_manager_fini(adev);
 	amdgpu_vm_manager_fini(adev);
 	gmc_v6_0_gart_fini(adev);
 	gmc_v6_0_gart_fini(adev);
 	amdgpu_bo_fini(adev);
 	amdgpu_bo_fini(adev);
-	release_firmware(adev->mc.fw);
-	adev->mc.fw = NULL;
+	release_firmware(adev->gmc.fw);
+	adev->gmc.fw = NULL;
 
 
 	return 0;
 	return 0;
 }
 }
@@ -934,7 +948,7 @@ static int gmc_v6_0_hw_fini(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
-	amdgpu_irq_put(adev, &adev->mc.vm_fault, 0);
+	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
 	gmc_v6_0_gart_disable(adev);
 	gmc_v6_0_gart_disable(adev);
 
 
 	return 0;
 	return 0;
@@ -1129,9 +1143,10 @@ static const struct amd_ip_funcs gmc_v6_0_ip_funcs = {
 	.set_powergating_state = gmc_v6_0_set_powergating_state,
 	.set_powergating_state = gmc_v6_0_set_powergating_state,
 };
 };
 
 
-static const struct amdgpu_gart_funcs gmc_v6_0_gart_funcs = {
-	.flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb,
-	.set_pte_pde = gmc_v6_0_gart_set_pte_pde,
+static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = {
+	.flush_gpu_tlb = gmc_v6_0_flush_gpu_tlb,
+	.emit_flush_gpu_tlb = gmc_v6_0_emit_flush_gpu_tlb,
+	.set_pte_pde = gmc_v6_0_set_pte_pde,
 	.set_prt = gmc_v6_0_set_prt,
 	.set_prt = gmc_v6_0_set_prt,
 	.get_vm_pde = gmc_v6_0_get_vm_pde,
 	.get_vm_pde = gmc_v6_0_get_vm_pde,
 	.get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
 	.get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
@@ -1142,16 +1157,16 @@ static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = {
 	.process = gmc_v6_0_process_interrupt,
 	.process = gmc_v6_0_process_interrupt,
 };
 };
 
 
-static void gmc_v6_0_set_gart_funcs(struct amdgpu_device *adev)
+static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev)
 {
 {
-	if (adev->gart.gart_funcs == NULL)
-		adev->gart.gart_funcs = &gmc_v6_0_gart_funcs;
+	if (adev->gmc.gmc_funcs == NULL)
+		adev->gmc.gmc_funcs = &gmc_v6_0_gmc_funcs;
 }
 }
 
 
 static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev)
 static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev)
 {
 {
-	adev->mc.vm_fault.num_types = 1;
-	adev->mc.vm_fault.funcs = &gmc_v6_0_irq_funcs;
+	adev->gmc.vm_fault.num_types = 1;
+	adev->gmc.vm_fault.funcs = &gmc_v6_0_irq_funcs;
 }
 }
 
 
 const struct amdgpu_ip_block_version gmc_v6_0_ip_block =
 const struct amdgpu_ip_block_version gmc_v6_0_ip_block =

+ 104 - 83
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c

@@ -43,7 +43,7 @@
 
 
 #include "amdgpu_atombios.h"
 #include "amdgpu_atombios.h"
 
 
-static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev);
+static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev);
 static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 static int gmc_v7_0_wait_for_idle(void *handle);
 static int gmc_v7_0_wait_for_idle(void *handle);
 
 
@@ -152,16 +152,16 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
 	else
 	else
 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
 
 
-	err = request_firmware(&adev->mc.fw, fw_name, adev->dev);
+	err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);
 	if (err)
 	if (err)
 		goto out;
 		goto out;
-	err = amdgpu_ucode_validate(adev->mc.fw);
+	err = amdgpu_ucode_validate(adev->gmc.fw);
 
 
 out:
 out:
 	if (err) {
 	if (err) {
 		pr_err("cik_mc: Failed to load firmware \"%s\"\n", fw_name);
 		pr_err("cik_mc: Failed to load firmware \"%s\"\n", fw_name);
-		release_firmware(adev->mc.fw);
-		adev->mc.fw = NULL;
+		release_firmware(adev->gmc.fw);
+		adev->gmc.fw = NULL;
 	}
 	}
 	return err;
 	return err;
 }
 }
@@ -182,19 +182,19 @@ static int gmc_v7_0_mc_load_microcode(struct amdgpu_device *adev)
 	u32 running;
 	u32 running;
 	int i, ucode_size, regs_size;
 	int i, ucode_size, regs_size;
 
 
-	if (!adev->mc.fw)
+	if (!adev->gmc.fw)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data;
+	hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data;
 	amdgpu_ucode_print_mc_hdr(&hdr->header);
 	amdgpu_ucode_print_mc_hdr(&hdr->header);
 
 
-	adev->mc.fw_version = le32_to_cpu(hdr->header.ucode_version);
+	adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version);
 	regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
 	regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
 	io_mc_regs = (const __le32 *)
 	io_mc_regs = (const __le32 *)
-		(adev->mc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
+		(adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
 	ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
 	ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
 	fw_data = (const __le32 *)
 	fw_data = (const __le32 *)
-		(adev->mc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+		(adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 
 
 	running = REG_GET_FIELD(RREG32(mmMC_SEQ_SUP_CNTL), MC_SEQ_SUP_CNTL, RUN);
 	running = REG_GET_FIELD(RREG32(mmMC_SEQ_SUP_CNTL), MC_SEQ_SUP_CNTL, RUN);
 
 
@@ -236,12 +236,12 @@ static int gmc_v7_0_mc_load_microcode(struct amdgpu_device *adev)
 }
 }
 
 
 static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev,
 static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev,
-				       struct amdgpu_mc *mc)
+				       struct amdgpu_gmc *mc)
 {
 {
 	u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
 	u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
 	base <<= 24;
 	base <<= 24;
 
 
-	amdgpu_device_vram_location(adev, &adev->mc, base);
+	amdgpu_device_vram_location(adev, &adev->gmc, base);
 	amdgpu_device_gart_location(adev, mc);
 	amdgpu_device_gart_location(adev, mc);
 }
 }
 
 
@@ -284,9 +284,9 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
 	}
 	}
 	/* Update configuration */
 	/* Update configuration */
 	WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
 	WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
-	       adev->mc.vram_start >> 12);
+	       adev->gmc.vram_start >> 12);
 	WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
 	WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
-	       adev->mc.vram_end >> 12);
+	       adev->gmc.vram_end >> 12);
 	WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
 	WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
 	       adev->vram_scratch.gpu_addr >> 12);
 	       adev->vram_scratch.gpu_addr >> 12);
 	WREG32(mmMC_VM_AGP_BASE, 0);
 	WREG32(mmMC_VM_AGP_BASE, 0);
@@ -319,8 +319,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
 {
 {
 	int r;
 	int r;
 
 
-	adev->mc.vram_width = amdgpu_atombios_get_vram_width(adev);
-	if (!adev->mc.vram_width) {
+	adev->gmc.vram_width = amdgpu_atombios_get_vram_width(adev);
+	if (!adev->gmc.vram_width) {
 		u32 tmp;
 		u32 tmp;
 		int chansize, numchan;
 		int chansize, numchan;
 
 
@@ -362,38 +362,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
 			numchan = 16;
 			numchan = 16;
 			break;
 			break;
 		}
 		}
-		adev->mc.vram_width = numchan * chansize;
+		adev->gmc.vram_width = numchan * chansize;
 	}
 	}
 	/* size in MB on si */
 	/* size in MB on si */
-	adev->mc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
-	adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+	adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+	adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
 
 
 	if (!(adev->flags & AMD_IS_APU)) {
 	if (!(adev->flags & AMD_IS_APU)) {
 		r = amdgpu_device_resize_fb_bar(adev);
 		r = amdgpu_device_resize_fb_bar(adev);
 		if (r)
 		if (r)
 			return r;
 			return r;
 	}
 	}
-	adev->mc.aper_base = pci_resource_start(adev->pdev, 0);
-	adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
+	adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+	adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
 
 
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
 	if (adev->flags & AMD_IS_APU) {
 	if (adev->flags & AMD_IS_APU) {
-		adev->mc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
-		adev->mc.aper_size = adev->mc.real_vram_size;
+		adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
+		adev->gmc.aper_size = adev->gmc.real_vram_size;
 	}
 	}
 #endif
 #endif
 
 
 	/* In case the PCI BAR is larger than the actual amount of vram */
 	/* In case the PCI BAR is larger than the actual amount of vram */
-	adev->mc.visible_vram_size = adev->mc.aper_size;
-	if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
-		adev->mc.visible_vram_size = adev->mc.real_vram_size;
+	adev->gmc.visible_vram_size = adev->gmc.aper_size;
+	if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
+		adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
 
 
 	/* set the gart size */
 	/* set the gart size */
 	if (amdgpu_gart_size == -1) {
 	if (amdgpu_gart_size == -1) {
 		switch (adev->asic_type) {
 		switch (adev->asic_type) {
 		case CHIP_TOPAZ:     /* no MM engines */
 		case CHIP_TOPAZ:     /* no MM engines */
 		default:
 		default:
-			adev->mc.gart_size = 256ULL << 20;
+			adev->gmc.gart_size = 256ULL << 20;
 			break;
 			break;
 #ifdef CONFIG_DRM_AMDGPU_CIK
 #ifdef CONFIG_DRM_AMDGPU_CIK
 		case CHIP_BONAIRE: /* UVD, VCE do not support GPUVM */
 		case CHIP_BONAIRE: /* UVD, VCE do not support GPUVM */
@@ -401,15 +401,15 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
 		case CHIP_KAVERI:  /* UVD, VCE do not support GPUVM */
 		case CHIP_KAVERI:  /* UVD, VCE do not support GPUVM */
 		case CHIP_KABINI:  /* UVD, VCE do not support GPUVM */
 		case CHIP_KABINI:  /* UVD, VCE do not support GPUVM */
 		case CHIP_MULLINS: /* UVD, VCE do not support GPUVM */
 		case CHIP_MULLINS: /* UVD, VCE do not support GPUVM */
-			adev->mc.gart_size = 1024ULL << 20;
+			adev->gmc.gart_size = 1024ULL << 20;
 			break;
 			break;
 #endif
 #endif
 		}
 		}
 	} else {
 	} else {
-		adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
+		adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
 	}
 	}
 
 
-	gmc_v7_0_vram_gtt_location(adev, &adev->mc);
+	gmc_v7_0_vram_gtt_location(adev, &adev->gmc);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -422,25 +422,44 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
  */
  */
 
 
 /**
 /**
- * gmc_v7_0_gart_flush_gpu_tlb - gart tlb flush callback
+ * gmc_v7_0_flush_gpu_tlb - gart tlb flush callback
  *
  *
  * @adev: amdgpu_device pointer
  * @adev: amdgpu_device pointer
  * @vmid: vm instance to flush
  * @vmid: vm instance to flush
  *
  *
  * Flush the TLB for the requested page table (CIK).
  * Flush the TLB for the requested page table (CIK).
  */
  */
-static void gmc_v7_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
-					uint32_t vmid)
+static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid)
 {
 {
-	/* flush hdp cache */
-	WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
-
 	/* bits 0-15 are the VM contexts0-15 */
 	/* bits 0-15 are the VM contexts0-15 */
 	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 }
 }
 
 
+static uint64_t gmc_v7_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+					    unsigned vmid, uint64_t pd_addr)
+{
+	uint32_t reg;
+
+	if (vmid < 8)
+		reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid;
+	else
+		reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8;
+	amdgpu_ring_emit_wreg(ring, reg, pd_addr >> 12);
+
+	/* bits 0-15 are the VM contexts0-15 */
+	amdgpu_ring_emit_wreg(ring, mmVM_INVALIDATE_REQUEST, 1 << vmid);
+
+	return pd_addr;
+}
+
+static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
+					unsigned pasid)
+{
+	amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid);
+}
+
 /**
 /**
- * gmc_v7_0_gart_set_pte_pde - update the page tables using MMIO
+ * gmc_v7_0_set_pte_pde - update the page tables using MMIO
  *
  *
  * @adev: amdgpu_device pointer
  * @adev: amdgpu_device pointer
  * @cpu_pt_addr: cpu address of the page table
  * @cpu_pt_addr: cpu address of the page table
@@ -450,11 +469,9 @@ static void gmc_v7_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
  *
  *
  * Update the page tables using the CPU.
  * Update the page tables using the CPU.
  */
  */
-static int gmc_v7_0_gart_set_pte_pde(struct amdgpu_device *adev,
-				     void *cpu_pt_addr,
-				     uint32_t gpu_page_idx,
-				     uint64_t addr,
-				     uint64_t flags)
+static int gmc_v7_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
+				 uint32_t gpu_page_idx, uint64_t addr,
+				 uint64_t flags)
 {
 {
 	void __iomem *ptr = (void *)cpu_pt_addr;
 	void __iomem *ptr = (void *)cpu_pt_addr;
 	uint64_t value;
 	uint64_t value;
@@ -524,9 +541,9 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable)
 {
 {
 	uint32_t tmp;
 	uint32_t tmp;
 
 
-	if (enable && !adev->mc.prt_warning) {
+	if (enable && !adev->gmc.prt_warning) {
 		dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n");
 		dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n");
-		adev->mc.prt_warning = true;
+		adev->gmc.prt_warning = true;
 	}
 	}
 
 
 	tmp = RREG32(mmVM_PRT_CNTL);
 	tmp = RREG32(mmVM_PRT_CNTL);
@@ -548,7 +565,8 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable)
 
 
 	if (enable) {
 	if (enable) {
 		uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
 		uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
-		uint32_t high = adev->vm_manager.max_pfn;
+		uint32_t high = adev->vm_manager.max_pfn -
+			(AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
 
 
 		WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
 		WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
 		WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
 		WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
@@ -622,11 +640,11 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field);
 	WREG32(mmVM_L2_CNTL3, tmp);
 	WREG32(mmVM_L2_CNTL3, tmp);
 	/* setup context0 */
 	/* setup context0 */
-	WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12);
-	WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12);
+	WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12);
+	WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12);
 	WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
 	WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
 	WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
 	WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
-			(u32)(adev->dummy_page.addr >> 12));
+			(u32)(adev->dummy_page_addr >> 12));
 	WREG32(mmVM_CONTEXT0_CNTL2, 0);
 	WREG32(mmVM_CONTEXT0_CNTL2, 0);
 	tmp = RREG32(mmVM_CONTEXT0_CNTL);
 	tmp = RREG32(mmVM_CONTEXT0_CNTL);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
@@ -656,7 +674,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
 
 
 	/* enable context1-15 */
 	/* enable context1-15 */
 	WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
 	WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
-	       (u32)(adev->dummy_page.addr >> 12));
+	       (u32)(adev->dummy_page_addr >> 12));
 	WREG32(mmVM_CONTEXT1_CNTL2, 4);
 	WREG32(mmVM_CONTEXT1_CNTL2, 4);
 	tmp = RREG32(mmVM_CONTEXT1_CNTL);
 	tmp = RREG32(mmVM_CONTEXT1_CNTL);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
@@ -675,9 +693,9 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
 		WREG32(mmCHUB_CONTROL, tmp);
 		WREG32(mmCHUB_CONTROL, tmp);
 	}
 	}
 
 
-	gmc_v7_0_gart_flush_gpu_tlb(adev, 0);
+	gmc_v7_0_flush_gpu_tlb(adev, 0);
 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
-		 (unsigned)(adev->mc.gart_size >> 20),
+		 (unsigned)(adev->gmc.gart_size >> 20),
 		 (unsigned long long)adev->gart.table_addr);
 		 (unsigned long long)adev->gart.table_addr);
 	adev->gart.ready = true;
 	adev->gart.ready = true;
 	return 0;
 	return 0;
@@ -750,21 +768,21 @@ static void gmc_v7_0_gart_fini(struct amdgpu_device *adev)
  *
  *
  * Print human readable fault information (CIK).
  * Print human readable fault information (CIK).
  */
  */
-static void gmc_v7_0_vm_decode_fault(struct amdgpu_device *adev,
-				     u32 status, u32 addr, u32 mc_client)
+static void gmc_v7_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
+				     u32 addr, u32 mc_client, unsigned pasid)
 {
 {
-	u32 mc_id;
 	u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
 	u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
 	u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 	u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 					PROTECTIONS);
 					PROTECTIONS);
 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
+	u32 mc_id;
 
 
 	mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 	mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 			      MEMORY_CLIENT_ID);
 			      MEMORY_CLIENT_ID);
 
 
-	dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
-	       protections, vmid, addr,
+	dev_err(adev->dev, "VM fault (0x%02x, vmid %d, pasid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
+	       protections, vmid, pasid, addr,
 	       REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 	       REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 			     MEMORY_CLIENT_RW) ?
 			     MEMORY_CLIENT_RW) ?
 	       "write" : "read", block, mc_client, mc_id);
 	       "write" : "read", block, mc_client, mc_id);
@@ -922,16 +940,16 @@ static int gmc_v7_0_early_init(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
-	gmc_v7_0_set_gart_funcs(adev);
+	gmc_v7_0_set_gmc_funcs(adev);
 	gmc_v7_0_set_irq_funcs(adev);
 	gmc_v7_0_set_irq_funcs(adev);
 
 
-	adev->mc.shared_aperture_start = 0x2000000000000000ULL;
-	adev->mc.shared_aperture_end =
-		adev->mc.shared_aperture_start + (4ULL << 30) - 1;
-	adev->mc.private_aperture_start =
-		adev->mc.shared_aperture_end + 1;
-	adev->mc.private_aperture_end =
-		adev->mc.private_aperture_start + (4ULL << 30) - 1;
+	adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
+	adev->gmc.shared_aperture_end =
+		adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
+	adev->gmc.private_aperture_start =
+		adev->gmc.shared_aperture_end + 1;
+	adev->gmc.private_aperture_end =
+		adev->gmc.private_aperture_start + (4ULL << 30) - 1;
 
 
 	return 0;
 	return 0;
 }
 }
@@ -941,7 +959,7 @@ static int gmc_v7_0_late_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
 	if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
 	if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
-		return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
+		return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
 	else
 	else
 		return 0;
 		return 0;
 }
 }
@@ -953,18 +971,18 @@ static int gmc_v7_0_sw_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
 	if (adev->flags & AMD_IS_APU) {
 	if (adev->flags & AMD_IS_APU) {
-		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+		adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
 	} else {
 	} else {
 		u32 tmp = RREG32(mmMC_SEQ_MISC0);
 		u32 tmp = RREG32(mmMC_SEQ_MISC0);
 		tmp &= MC_SEQ_MISC0__MT__MASK;
 		tmp &= MC_SEQ_MISC0__MT__MASK;
-		adev->mc.vram_type = gmc_v7_0_convert_vram_type(tmp);
+		adev->gmc.vram_type = gmc_v7_0_convert_vram_type(tmp);
 	}
 	}
 
 
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault);
+	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->mc.vm_fault);
+	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
@@ -978,9 +996,9 @@ static int gmc_v7_0_sw_init(void *handle)
 	 * This is the max address of the GPU's
 	 * This is the max address of the GPU's
 	 * internal address space.
 	 * internal address space.
 	 */
 	 */
-	adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
+	adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
 
 
-	adev->mc.stolen_size = 256 * 1024;
+	adev->gmc.stolen_size = 256 * 1024;
 
 
 	/* set DMA mask + need_dma32 flags.
 	/* set DMA mask + need_dma32 flags.
 	 * PCIE - can handle 40-bits.
 	 * PCIE - can handle 40-bits.
@@ -1051,8 +1069,8 @@ static int gmc_v7_0_sw_fini(void *handle)
 	amdgpu_vm_manager_fini(adev);
 	amdgpu_vm_manager_fini(adev);
 	gmc_v7_0_gart_fini(adev);
 	gmc_v7_0_gart_fini(adev);
 	amdgpu_bo_fini(adev);
 	amdgpu_bo_fini(adev);
-	release_firmware(adev->mc.fw);
-	adev->mc.fw = NULL;
+	release_firmware(adev->gmc.fw);
+	adev->gmc.fw = NULL;
 
 
 	return 0;
 	return 0;
 }
 }
@@ -1085,7 +1103,7 @@ static int gmc_v7_0_hw_fini(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
-	amdgpu_irq_put(adev, &adev->mc.vm_fault, 0);
+	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
 	gmc_v7_0_gart_disable(adev);
 	gmc_v7_0_gart_disable(adev);
 
 
 	return 0;
 	return 0;
@@ -1259,7 +1277,8 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
 			addr);
 			addr);
 		dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
 		dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
 			status);
 			status);
-		gmc_v7_0_vm_decode_fault(adev, status, addr, mc_client);
+		gmc_v7_0_vm_decode_fault(adev, status, addr, mc_client,
+					 entry->pasid);
 	}
 	}
 
 
 	return 0;
 	return 0;
@@ -1308,9 +1327,11 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
 	.set_powergating_state = gmc_v7_0_set_powergating_state,
 	.set_powergating_state = gmc_v7_0_set_powergating_state,
 };
 };
 
 
-static const struct amdgpu_gart_funcs gmc_v7_0_gart_funcs = {
-	.flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb,
-	.set_pte_pde = gmc_v7_0_gart_set_pte_pde,
+static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
+	.flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
+	.emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
+	.emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
+	.set_pte_pde = gmc_v7_0_set_pte_pde,
 	.set_prt = gmc_v7_0_set_prt,
 	.set_prt = gmc_v7_0_set_prt,
 	.get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags,
 	.get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags,
 	.get_vm_pde = gmc_v7_0_get_vm_pde
 	.get_vm_pde = gmc_v7_0_get_vm_pde
@@ -1321,16 +1342,16 @@ static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {
 	.process = gmc_v7_0_process_interrupt,
 	.process = gmc_v7_0_process_interrupt,
 };
 };
 
 
-static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev)
+static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev)
 {
 {
-	if (adev->gart.gart_funcs == NULL)
-		adev->gart.gart_funcs = &gmc_v7_0_gart_funcs;
+	if (adev->gmc.gmc_funcs == NULL)
+		adev->gmc.gmc_funcs = &gmc_v7_0_gmc_funcs;
 }
 }
 
 
 static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev)
 static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev)
 {
 {
-	adev->mc.vm_fault.num_types = 1;
-	adev->mc.vm_fault.funcs = &gmc_v7_0_irq_funcs;
+	adev->gmc.vm_fault.num_types = 1;
+	adev->gmc.vm_fault.funcs = &gmc_v7_0_irq_funcs;
 }
 }
 
 
 const struct amdgpu_ip_block_version gmc_v7_0_ip_block =
 const struct amdgpu_ip_block_version gmc_v7_0_ip_block =

+ 118 - 97
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c

@@ -45,7 +45,7 @@
 #include "amdgpu_atombios.h"
 #include "amdgpu_atombios.h"
 
 
 
 
-static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev);
+static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev);
 static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 static int gmc_v8_0_wait_for_idle(void *handle);
 static int gmc_v8_0_wait_for_idle(void *handle);
 
 
@@ -236,16 +236,16 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
 	}
 	}
 
 
 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
-	err = request_firmware(&adev->mc.fw, fw_name, adev->dev);
+	err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);
 	if (err)
 	if (err)
 		goto out;
 		goto out;
-	err = amdgpu_ucode_validate(adev->mc.fw);
+	err = amdgpu_ucode_validate(adev->gmc.fw);
 
 
 out:
 out:
 	if (err) {
 	if (err) {
 		pr_err("mc: Failed to load firmware \"%s\"\n", fw_name);
 		pr_err("mc: Failed to load firmware \"%s\"\n", fw_name);
-		release_firmware(adev->mc.fw);
-		adev->mc.fw = NULL;
+		release_firmware(adev->gmc.fw);
+		adev->gmc.fw = NULL;
 	}
 	}
 	return err;
 	return err;
 }
 }
@@ -274,19 +274,19 @@ static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev)
 	if (amdgpu_sriov_bios(adev))
 	if (amdgpu_sriov_bios(adev))
 		return 0;
 		return 0;
 
 
-	if (!adev->mc.fw)
+	if (!adev->gmc.fw)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data;
+	hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data;
 	amdgpu_ucode_print_mc_hdr(&hdr->header);
 	amdgpu_ucode_print_mc_hdr(&hdr->header);
 
 
-	adev->mc.fw_version = le32_to_cpu(hdr->header.ucode_version);
+	adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version);
 	regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
 	regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
 	io_mc_regs = (const __le32 *)
 	io_mc_regs = (const __le32 *)
-		(adev->mc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
+		(adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
 	ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
 	ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
 	fw_data = (const __le32 *)
 	fw_data = (const __le32 *)
-		(adev->mc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+		(adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 
 
 	running = REG_GET_FIELD(RREG32(mmMC_SEQ_SUP_CNTL), MC_SEQ_SUP_CNTL, RUN);
 	running = REG_GET_FIELD(RREG32(mmMC_SEQ_SUP_CNTL), MC_SEQ_SUP_CNTL, RUN);
 
 
@@ -350,19 +350,19 @@ static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev)
 	if (vbios_version == 0)
 	if (vbios_version == 0)
 		return 0;
 		return 0;
 
 
-	if (!adev->mc.fw)
+	if (!adev->gmc.fw)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data;
+	hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data;
 	amdgpu_ucode_print_mc_hdr(&hdr->header);
 	amdgpu_ucode_print_mc_hdr(&hdr->header);
 
 
-	adev->mc.fw_version = le32_to_cpu(hdr->header.ucode_version);
+	adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version);
 	regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
 	regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
 	io_mc_regs = (const __le32 *)
 	io_mc_regs = (const __le32 *)
-		(adev->mc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
+		(adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
 	ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
 	ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
 	fw_data = (const __le32 *)
 	fw_data = (const __le32 *)
-		(adev->mc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+		(adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 
 
 	data = RREG32(mmMC_SEQ_MISC0);
 	data = RREG32(mmMC_SEQ_MISC0);
 	data &= ~(0x40);
 	data &= ~(0x40);
@@ -398,7 +398,7 @@ static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev)
 }
 }
 
 
 static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
 static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
-				       struct amdgpu_mc *mc)
+				       struct amdgpu_gmc *mc)
 {
 {
 	u64 base = 0;
 	u64 base = 0;
 
 
@@ -406,7 +406,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
 		base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
 		base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
 	base <<= 24;
 	base <<= 24;
 
 
-	amdgpu_device_vram_location(adev, &adev->mc, base);
+	amdgpu_device_vram_location(adev, &adev->gmc, base);
 	amdgpu_device_gart_location(adev, mc);
 	amdgpu_device_gart_location(adev, mc);
 }
 }
 
 
@@ -449,18 +449,18 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
 	}
 	}
 	/* Update configuration */
 	/* Update configuration */
 	WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
 	WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
-	       adev->mc.vram_start >> 12);
+	       adev->gmc.vram_start >> 12);
 	WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
 	WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
-	       adev->mc.vram_end >> 12);
+	       adev->gmc.vram_end >> 12);
 	WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
 	WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
 	       adev->vram_scratch.gpu_addr >> 12);
 	       adev->vram_scratch.gpu_addr >> 12);
 
 
 	if (amdgpu_sriov_vf(adev)) {
 	if (amdgpu_sriov_vf(adev)) {
-		tmp = ((adev->mc.vram_end >> 24) & 0xFFFF) << 16;
-		tmp |= ((adev->mc.vram_start >> 24) & 0xFFFF);
+		tmp = ((adev->gmc.vram_end >> 24) & 0xFFFF) << 16;
+		tmp |= ((adev->gmc.vram_start >> 24) & 0xFFFF);
 		WREG32(mmMC_VM_FB_LOCATION, tmp);
 		WREG32(mmMC_VM_FB_LOCATION, tmp);
 		/* XXX double check these! */
 		/* XXX double check these! */
-		WREG32(mmHDP_NONSURFACE_BASE, (adev->mc.vram_start >> 8));
+		WREG32(mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
 		WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
 		WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
 		WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF);
 		WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF);
 	}
 	}
@@ -495,8 +495,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
 {
 {
 	int r;
 	int r;
 
 
-	adev->mc.vram_width = amdgpu_atombios_get_vram_width(adev);
-	if (!adev->mc.vram_width) {
+	adev->gmc.vram_width = amdgpu_atombios_get_vram_width(adev);
+	if (!adev->gmc.vram_width) {
 		u32 tmp;
 		u32 tmp;
 		int chansize, numchan;
 		int chansize, numchan;
 
 
@@ -538,31 +538,31 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
 			numchan = 16;
 			numchan = 16;
 			break;
 			break;
 		}
 		}
-		adev->mc.vram_width = numchan * chansize;
+		adev->gmc.vram_width = numchan * chansize;
 	}
 	}
 	/* size in MB on si */
 	/* size in MB on si */
-	adev->mc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
-	adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+	adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+	adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
 
 
 	if (!(adev->flags & AMD_IS_APU)) {
 	if (!(adev->flags & AMD_IS_APU)) {
 		r = amdgpu_device_resize_fb_bar(adev);
 		r = amdgpu_device_resize_fb_bar(adev);
 		if (r)
 		if (r)
 			return r;
 			return r;
 	}
 	}
-	adev->mc.aper_base = pci_resource_start(adev->pdev, 0);
-	adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
+	adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+	adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
 
 
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
 	if (adev->flags & AMD_IS_APU) {
 	if (adev->flags & AMD_IS_APU) {
-		adev->mc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
-		adev->mc.aper_size = adev->mc.real_vram_size;
+		adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
+		adev->gmc.aper_size = adev->gmc.real_vram_size;
 	}
 	}
 #endif
 #endif
 
 
 	/* In case the PCI BAR is larger than the actual amount of vram */
 	/* In case the PCI BAR is larger than the actual amount of vram */
-	adev->mc.visible_vram_size = adev->mc.aper_size;
-	if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
-		adev->mc.visible_vram_size = adev->mc.real_vram_size;
+	adev->gmc.visible_vram_size = adev->gmc.aper_size;
+	if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
+		adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
 
 
 	/* set the gart size */
 	/* set the gart size */
 	if (amdgpu_gart_size == -1) {
 	if (amdgpu_gart_size == -1) {
@@ -571,20 +571,20 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
 		case CHIP_POLARIS10: /* all engines support GPUVM */
 		case CHIP_POLARIS10: /* all engines support GPUVM */
 		case CHIP_POLARIS12: /* all engines support GPUVM */
 		case CHIP_POLARIS12: /* all engines support GPUVM */
 		default:
 		default:
-			adev->mc.gart_size = 256ULL << 20;
+			adev->gmc.gart_size = 256ULL << 20;
 			break;
 			break;
 		case CHIP_TONGA:   /* UVD, VCE do not support GPUVM */
 		case CHIP_TONGA:   /* UVD, VCE do not support GPUVM */
 		case CHIP_FIJI:    /* UVD, VCE do not support GPUVM */
 		case CHIP_FIJI:    /* UVD, VCE do not support GPUVM */
 		case CHIP_CARRIZO: /* UVD, VCE do not support GPUVM, DCE SG support */
 		case CHIP_CARRIZO: /* UVD, VCE do not support GPUVM, DCE SG support */
 		case CHIP_STONEY:  /* UVD does not support GPUVM, DCE SG support */
 		case CHIP_STONEY:  /* UVD does not support GPUVM, DCE SG support */
-			adev->mc.gart_size = 1024ULL << 20;
+			adev->gmc.gart_size = 1024ULL << 20;
 			break;
 			break;
 		}
 		}
 	} else {
 	} else {
-		adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
+		adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
 	}
 	}
 
 
-	gmc_v8_0_vram_gtt_location(adev, &adev->mc);
+	gmc_v8_0_vram_gtt_location(adev, &adev->gmc);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -597,25 +597,45 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
  */
  */
 
 
 /**
 /**
- * gmc_v8_0_gart_flush_gpu_tlb - gart tlb flush callback
+ * gmc_v8_0_flush_gpu_tlb - gart tlb flush callback
  *
  *
  * @adev: amdgpu_device pointer
  * @adev: amdgpu_device pointer
  * @vmid: vm instance to flush
  * @vmid: vm instance to flush
  *
  *
  * Flush the TLB for the requested page table (CIK).
  * Flush the TLB for the requested page table (CIK).
  */
  */
-static void gmc_v8_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
+static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
 					uint32_t vmid)
 					uint32_t vmid)
 {
 {
-	/* flush hdp cache */
-	WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
-
 	/* bits 0-15 are the VM contexts0-15 */
 	/* bits 0-15 are the VM contexts0-15 */
 	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 }
 }
 
 
+static uint64_t gmc_v8_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+					    unsigned vmid, uint64_t pd_addr)
+{
+	uint32_t reg;
+
+	if (vmid < 8)
+		reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid;
+	else
+		reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8;
+	amdgpu_ring_emit_wreg(ring, reg, pd_addr >> 12);
+
+	/* bits 0-15 are the VM contexts0-15 */
+	amdgpu_ring_emit_wreg(ring, mmVM_INVALIDATE_REQUEST, 1 << vmid);
+
+	return pd_addr;
+}
+
+static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
+					unsigned pasid)
+{
+	amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid);
+}
+
 /**
 /**
- * gmc_v8_0_gart_set_pte_pde - update the page tables using MMIO
+ * gmc_v8_0_set_pte_pde - update the page tables using MMIO
  *
  *
  * @adev: amdgpu_device pointer
  * @adev: amdgpu_device pointer
  * @cpu_pt_addr: cpu address of the page table
  * @cpu_pt_addr: cpu address of the page table
@@ -625,11 +645,9 @@ static void gmc_v8_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
  *
  *
  * Update the page tables using the CPU.
  * Update the page tables using the CPU.
  */
  */
-static int gmc_v8_0_gart_set_pte_pde(struct amdgpu_device *adev,
-				     void *cpu_pt_addr,
-				     uint32_t gpu_page_idx,
-				     uint64_t addr,
-				     uint64_t flags)
+static int gmc_v8_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
+				uint32_t gpu_page_idx, uint64_t addr,
+				uint64_t flags)
 {
 {
 	void __iomem *ptr = (void *)cpu_pt_addr;
 	void __iomem *ptr = (void *)cpu_pt_addr;
 	uint64_t value;
 	uint64_t value;
@@ -723,9 +741,9 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
 {
 {
 	u32 tmp;
 	u32 tmp;
 
 
-	if (enable && !adev->mc.prt_warning) {
+	if (enable && !adev->gmc.prt_warning) {
 		dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n");
 		dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n");
-		adev->mc.prt_warning = true;
+		adev->gmc.prt_warning = true;
 	}
 	}
 
 
 	tmp = RREG32(mmVM_PRT_CNTL);
 	tmp = RREG32(mmVM_PRT_CNTL);
@@ -747,7 +765,8 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
 
 
 	if (enable) {
 	if (enable) {
 		uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
 		uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
-		uint32_t high = adev->vm_manager.max_pfn;
+		uint32_t high = adev->vm_manager.max_pfn -
+			(AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
 
 
 		WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
 		WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
 		WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
 		WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
@@ -837,11 +856,11 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT1_PTE_REQUEST_SNOOP, 0);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT1_PTE_REQUEST_SNOOP, 0);
 	WREG32(mmVM_L2_CNTL4, tmp);
 	WREG32(mmVM_L2_CNTL4, tmp);
 	/* setup context0 */
 	/* setup context0 */
-	WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12);
-	WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12);
+	WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12);
+	WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12);
 	WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
 	WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
 	WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
 	WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
-			(u32)(adev->dummy_page.addr >> 12));
+			(u32)(adev->dummy_page_addr >> 12));
 	WREG32(mmVM_CONTEXT0_CNTL2, 0);
 	WREG32(mmVM_CONTEXT0_CNTL2, 0);
 	tmp = RREG32(mmVM_CONTEXT0_CNTL);
 	tmp = RREG32(mmVM_CONTEXT0_CNTL);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
@@ -871,7 +890,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
 
 
 	/* enable context1-15 */
 	/* enable context1-15 */
 	WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
 	WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
-	       (u32)(adev->dummy_page.addr >> 12));
+	       (u32)(adev->dummy_page_addr >> 12));
 	WREG32(mmVM_CONTEXT1_CNTL2, 4);
 	WREG32(mmVM_CONTEXT1_CNTL2, 4);
 	tmp = RREG32(mmVM_CONTEXT1_CNTL);
 	tmp = RREG32(mmVM_CONTEXT1_CNTL);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
@@ -891,9 +910,9 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
 	else
 	else
 		gmc_v8_0_set_fault_enable_default(adev, true);
 		gmc_v8_0_set_fault_enable_default(adev, true);
 
 
-	gmc_v8_0_gart_flush_gpu_tlb(adev, 0);
+	gmc_v8_0_flush_gpu_tlb(adev, 0);
 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
-		 (unsigned)(adev->mc.gart_size >> 20),
+		 (unsigned)(adev->gmc.gart_size >> 20),
 		 (unsigned long long)adev->gart.table_addr);
 		 (unsigned long long)adev->gart.table_addr);
 	adev->gart.ready = true;
 	adev->gart.ready = true;
 	return 0;
 	return 0;
@@ -966,21 +985,21 @@ static void gmc_v8_0_gart_fini(struct amdgpu_device *adev)
  *
  *
  * Print human readable fault information (CIK).
  * Print human readable fault information (CIK).
  */
  */
-static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev,
-				     u32 status, u32 addr, u32 mc_client)
+static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
+				     u32 addr, u32 mc_client, unsigned pasid)
 {
 {
-	u32 mc_id;
 	u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
 	u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
 	u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 	u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 					PROTECTIONS);
 					PROTECTIONS);
 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
+	u32 mc_id;
 
 
 	mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 	mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 			      MEMORY_CLIENT_ID);
 			      MEMORY_CLIENT_ID);
 
 
-	dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
-	       protections, vmid, addr,
+	dev_err(adev->dev, "VM fault (0x%02x, vmid %d, pasid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
+	       protections, vmid, pasid, addr,
 	       REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 	       REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 			     MEMORY_CLIENT_RW) ?
 			     MEMORY_CLIENT_RW) ?
 	       "write" : "read", block, mc_client, mc_id);
 	       "write" : "read", block, mc_client, mc_id);
@@ -1012,16 +1031,16 @@ static int gmc_v8_0_early_init(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
-	gmc_v8_0_set_gart_funcs(adev);
+	gmc_v8_0_set_gmc_funcs(adev);
 	gmc_v8_0_set_irq_funcs(adev);
 	gmc_v8_0_set_irq_funcs(adev);
 
 
-	adev->mc.shared_aperture_start = 0x2000000000000000ULL;
-	adev->mc.shared_aperture_end =
-		adev->mc.shared_aperture_start + (4ULL << 30) - 1;
-	adev->mc.private_aperture_start =
-		adev->mc.shared_aperture_end + 1;
-	adev->mc.private_aperture_end =
-		adev->mc.private_aperture_start + (4ULL << 30) - 1;
+	adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
+	adev->gmc.shared_aperture_end =
+		adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
+	adev->gmc.private_aperture_start =
+		adev->gmc.shared_aperture_end + 1;
+	adev->gmc.private_aperture_end =
+		adev->gmc.private_aperture_start + (4ULL << 30) - 1;
 
 
 	return 0;
 	return 0;
 }
 }
@@ -1031,7 +1050,7 @@ static int gmc_v8_0_late_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
 	if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
 	if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
-		return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
+		return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
 	else
 	else
 		return 0;
 		return 0;
 }
 }
@@ -1045,7 +1064,7 @@ static int gmc_v8_0_sw_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
 	if (adev->flags & AMD_IS_APU) {
 	if (adev->flags & AMD_IS_APU) {
-		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+		adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
 	} else {
 	} else {
 		u32 tmp;
 		u32 tmp;
 
 
@@ -1054,14 +1073,14 @@ static int gmc_v8_0_sw_init(void *handle)
 		else
 		else
 			tmp = RREG32(mmMC_SEQ_MISC0);
 			tmp = RREG32(mmMC_SEQ_MISC0);
 		tmp &= MC_SEQ_MISC0__MT__MASK;
 		tmp &= MC_SEQ_MISC0__MT__MASK;
-		adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp);
+		adev->gmc.vram_type = gmc_v8_0_convert_vram_type(tmp);
 	}
 	}
 
 
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault);
+	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->mc.vm_fault);
+	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
@@ -1075,9 +1094,9 @@ static int gmc_v8_0_sw_init(void *handle)
 	 * This is the max address of the GPU's
 	 * This is the max address of the GPU's
 	 * internal address space.
 	 * internal address space.
 	 */
 	 */
-	adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
+	adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
 
 
-	adev->mc.stolen_size = 256 * 1024;
+	adev->gmc.stolen_size = 256 * 1024;
 
 
 	/* set DMA mask + need_dma32 flags.
 	/* set DMA mask + need_dma32 flags.
 	 * PCIE - can handle 40-bits.
 	 * PCIE - can handle 40-bits.
@@ -1086,7 +1105,6 @@ static int gmc_v8_0_sw_init(void *handle)
 	 */
 	 */
 	adev->need_dma32 = false;
 	adev->need_dma32 = false;
 	dma_bits = adev->need_dma32 ? 32 : 40;
 	dma_bits = adev->need_dma32 ? 32 : 40;
-	adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits);
 	r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
 	r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
 	if (r) {
 	if (r) {
 		adev->need_dma32 = true;
 		adev->need_dma32 = true;
@@ -1149,8 +1167,8 @@ static int gmc_v8_0_sw_fini(void *handle)
 	amdgpu_vm_manager_fini(adev);
 	amdgpu_vm_manager_fini(adev);
 	gmc_v8_0_gart_fini(adev);
 	gmc_v8_0_gart_fini(adev);
 	amdgpu_bo_fini(adev);
 	amdgpu_bo_fini(adev);
-	release_firmware(adev->mc.fw);
-	adev->mc.fw = NULL;
+	release_firmware(adev->gmc.fw);
+	adev->gmc.fw = NULL;
 
 
 	return 0;
 	return 0;
 }
 }
@@ -1191,7 +1209,7 @@ static int gmc_v8_0_hw_fini(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
-	amdgpu_irq_put(adev, &adev->mc.vm_fault, 0);
+	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
 	gmc_v8_0_gart_disable(adev);
 	gmc_v8_0_gart_disable(adev);
 
 
 	return 0;
 	return 0;
@@ -1271,10 +1289,10 @@ static bool gmc_v8_0_check_soft_reset(void *handle)
 							SRBM_SOFT_RESET, SOFT_RESET_MC, 1);
 							SRBM_SOFT_RESET, SOFT_RESET_MC, 1);
 	}
 	}
 	if (srbm_soft_reset) {
 	if (srbm_soft_reset) {
-		adev->mc.srbm_soft_reset = srbm_soft_reset;
+		adev->gmc.srbm_soft_reset = srbm_soft_reset;
 		return true;
 		return true;
 	} else {
 	} else {
-		adev->mc.srbm_soft_reset = 0;
+		adev->gmc.srbm_soft_reset = 0;
 		return false;
 		return false;
 	}
 	}
 }
 }
@@ -1283,7 +1301,7 @@ static int gmc_v8_0_pre_soft_reset(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
-	if (!adev->mc.srbm_soft_reset)
+	if (!adev->gmc.srbm_soft_reset)
 		return 0;
 		return 0;
 
 
 	gmc_v8_0_mc_stop(adev);
 	gmc_v8_0_mc_stop(adev);
@@ -1299,9 +1317,9 @@ static int gmc_v8_0_soft_reset(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	u32 srbm_soft_reset;
 	u32 srbm_soft_reset;
 
 
-	if (!adev->mc.srbm_soft_reset)
+	if (!adev->gmc.srbm_soft_reset)
 		return 0;
 		return 0;
-	srbm_soft_reset = adev->mc.srbm_soft_reset;
+	srbm_soft_reset = adev->gmc.srbm_soft_reset;
 
 
 	if (srbm_soft_reset) {
 	if (srbm_soft_reset) {
 		u32 tmp;
 		u32 tmp;
@@ -1329,7 +1347,7 @@ static int gmc_v8_0_post_soft_reset(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
-	if (!adev->mc.srbm_soft_reset)
+	if (!adev->gmc.srbm_soft_reset)
 		return 0;
 		return 0;
 
 
 	gmc_v8_0_mc_resume(adev);
 	gmc_v8_0_mc_resume(adev);
@@ -1410,7 +1428,8 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
 			addr);
 			addr);
 		dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
 		dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
 			status);
 			status);
-		gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client);
+		gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client,
+					 entry->pasid);
 	}
 	}
 
 
 	return 0;
 	return 0;
@@ -1642,9 +1661,11 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
 	.get_clockgating_state = gmc_v8_0_get_clockgating_state,
 	.get_clockgating_state = gmc_v8_0_get_clockgating_state,
 };
 };
 
 
-static const struct amdgpu_gart_funcs gmc_v8_0_gart_funcs = {
-	.flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb,
-	.set_pte_pde = gmc_v8_0_gart_set_pte_pde,
+static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
+	.flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
+	.emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
+	.emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
+	.set_pte_pde = gmc_v8_0_set_pte_pde,
 	.set_prt = gmc_v8_0_set_prt,
 	.set_prt = gmc_v8_0_set_prt,
 	.get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags,
 	.get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags,
 	.get_vm_pde = gmc_v8_0_get_vm_pde
 	.get_vm_pde = gmc_v8_0_get_vm_pde
@@ -1655,16 +1676,16 @@ static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {
 	.process = gmc_v8_0_process_interrupt,
 	.process = gmc_v8_0_process_interrupt,
 };
 };
 
 
-static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev)
+static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev)
 {
 {
-	if (adev->gart.gart_funcs == NULL)
-		adev->gart.gart_funcs = &gmc_v8_0_gart_funcs;
+	if (adev->gmc.gmc_funcs == NULL)
+		adev->gmc.gmc_funcs = &gmc_v8_0_gmc_funcs;
 }
 }
 
 
 static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev)
 static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev)
 {
 {
-	adev->mc.vm_fault.num_types = 1;
-	adev->mc.vm_fault.funcs = &gmc_v8_0_irq_funcs;
+	adev->gmc.vm_fault.num_types = 1;
+	adev->gmc.vm_fault.funcs = &gmc_v8_0_irq_funcs;
 }
 }
 
 
 const struct amdgpu_ip_block_version gmc_v8_0_ip_block =
 const struct amdgpu_ip_block_version gmc_v8_0_ip_block =

+ 114 - 68
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

@@ -34,6 +34,7 @@
 #include "vega10_enum.h"
 #include "vega10_enum.h"
 #include "mmhub/mmhub_1_0_offset.h"
 #include "mmhub/mmhub_1_0_offset.h"
 #include "athub/athub_1_0_offset.h"
 #include "athub/athub_1_0_offset.h"
+#include "oss/osssys_4_0_offset.h"
 
 
 #include "soc15.h"
 #include "soc15.h"
 #include "soc15_common.h"
 #include "soc15_common.h"
@@ -263,10 +264,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 
 
 	if (printk_ratelimit()) {
 	if (printk_ratelimit()) {
 		dev_err(adev->dev,
 		dev_err(adev->dev,
-			"[%s] VMC page fault (src_id:%u ring:%u vmid:%u pas_id:%u)\n",
+			"[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
 			entry->vmid_src ? "mmhub" : "gfxhub",
 			entry->vmid_src ? "mmhub" : "gfxhub",
 			entry->src_id, entry->ring_id, entry->vmid,
 			entry->src_id, entry->ring_id, entry->vmid,
-			entry->pas_id);
+			entry->pasid);
 		dev_err(adev->dev, "  at page 0x%016llx from %d\n",
 		dev_err(adev->dev, "  at page 0x%016llx from %d\n",
 			addr, entry->client_id);
 			addr, entry->client_id);
 		if (!amdgpu_sriov_vf(adev))
 		if (!amdgpu_sriov_vf(adev))
@@ -285,8 +286,8 @@ static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
 
 
 static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
 static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
 {
 {
-	adev->mc.vm_fault.num_types = 1;
-	adev->mc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
+	adev->gmc.vm_fault.num_types = 1;
+	adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
 }
 }
 
 
 static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
 static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
@@ -316,24 +317,21 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
  */
  */
 
 
 /**
 /**
- * gmc_v9_0_gart_flush_gpu_tlb - gart tlb flush callback
+ * gmc_v9_0_flush_gpu_tlb - gart tlb flush callback
  *
  *
  * @adev: amdgpu_device pointer
  * @adev: amdgpu_device pointer
  * @vmid: vm instance to flush
  * @vmid: vm instance to flush
  *
  *
  * Flush the TLB for the requested page table.
  * Flush the TLB for the requested page table.
  */
  */
-static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
+static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
 					uint32_t vmid)
 					uint32_t vmid)
 {
 {
 	/* Use register 17 for GART */
 	/* Use register 17 for GART */
 	const unsigned eng = 17;
 	const unsigned eng = 17;
 	unsigned i, j;
 	unsigned i, j;
 
 
-	/* flush hdp cache */
-	adev->nbio_funcs->hdp_flush(adev);
-
-	spin_lock(&adev->mc.invalidate_lock);
+	spin_lock(&adev->gmc.invalidate_lock);
 
 
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
 		struct amdgpu_vmhub *hub = &adev->vmhub[i];
 		struct amdgpu_vmhub *hub = &adev->vmhub[i];
@@ -366,11 +364,52 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
 		DRM_ERROR("Timeout waiting for VM flush ACK!\n");
 		DRM_ERROR("Timeout waiting for VM flush ACK!\n");
 	}
 	}
 
 
-	spin_unlock(&adev->mc.invalidate_lock);
+	spin_unlock(&adev->gmc.invalidate_lock);
+}
+
+static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+					    unsigned vmid, uint64_t pd_addr)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
+	uint32_t req = gmc_v9_0_get_invalidate_req(vmid);
+	uint64_t flags = AMDGPU_PTE_VALID;
+	unsigned eng = ring->vm_inv_eng;
+
+	amdgpu_gmc_get_vm_pde(adev, -1, &pd_addr, &flags);
+	pd_addr |= flags;
+
+	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
+			      lower_32_bits(pd_addr));
+
+	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
+			      upper_32_bits(pd_addr));
+
+	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
+
+	/* wait for the invalidate to complete */
+	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
+				  1 << vmid, 1 << vmid);
+
+	return pd_addr;
+}
+
+static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
+					unsigned pasid)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t reg;
+
+	if (ring->funcs->vmhub == AMDGPU_GFXHUB)
+		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
+	else
+		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
+
+	amdgpu_ring_emit_wreg(ring, reg, pasid);
 }
 }
 
 
 /**
 /**
- * gmc_v9_0_gart_set_pte_pde - update the page tables using MMIO
+ * gmc_v9_0_set_pte_pde - update the page tables using MMIO
  *
  *
  * @adev: amdgpu_device pointer
  * @adev: amdgpu_device pointer
  * @cpu_pt_addr: cpu address of the page table
  * @cpu_pt_addr: cpu address of the page table
@@ -380,11 +419,9 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
  *
  *
  * Update the page tables using the CPU.
  * Update the page tables using the CPU.
  */
  */
-static int gmc_v9_0_gart_set_pte_pde(struct amdgpu_device *adev,
-					void *cpu_pt_addr,
-					uint32_t gpu_page_idx,
-					uint64_t addr,
-					uint64_t flags)
+static int gmc_v9_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
+				uint32_t gpu_page_idx, uint64_t addr,
+				uint64_t flags)
 {
 {
 	void __iomem *ptr = (void *)cpu_pt_addr;
 	void __iomem *ptr = (void *)cpu_pt_addr;
 	uint64_t value;
 	uint64_t value;
@@ -475,10 +512,10 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
 {
 {
 	if (!(*flags & AMDGPU_PDE_PTE))
 	if (!(*flags & AMDGPU_PDE_PTE))
 		*addr = adev->vm_manager.vram_base_offset + *addr -
 		*addr = adev->vm_manager.vram_base_offset + *addr -
-			adev->mc.vram_start;
+			adev->gmc.vram_start;
 	BUG_ON(*addr & 0xFFFF00000000003FULL);
 	BUG_ON(*addr & 0xFFFF00000000003FULL);
 
 
-	if (!adev->mc.translate_further)
+	if (!adev->gmc.translate_further)
 		return;
 		return;
 
 
 	if (level == AMDGPU_VM_PDB1) {
 	if (level == AMDGPU_VM_PDB1) {
@@ -494,34 +531,35 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
 	}
 	}
 }
 }
 
 
-static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
-	.flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb,
-	.set_pte_pde = gmc_v9_0_gart_set_pte_pde,
-	.get_invalidate_req = gmc_v9_0_get_invalidate_req,
+static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
+	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
+	.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
+	.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
+	.set_pte_pde = gmc_v9_0_set_pte_pde,
 	.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
 	.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
 	.get_vm_pde = gmc_v9_0_get_vm_pde
 	.get_vm_pde = gmc_v9_0_get_vm_pde
 };
 };
 
 
-static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev)
+static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
 {
 {
-	if (adev->gart.gart_funcs == NULL)
-		adev->gart.gart_funcs = &gmc_v9_0_gart_funcs;
+	if (adev->gmc.gmc_funcs == NULL)
+		adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
 }
 }
 
 
 static int gmc_v9_0_early_init(void *handle)
 static int gmc_v9_0_early_init(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
-	gmc_v9_0_set_gart_funcs(adev);
+	gmc_v9_0_set_gmc_funcs(adev);
 	gmc_v9_0_set_irq_funcs(adev);
 	gmc_v9_0_set_irq_funcs(adev);
 
 
-	adev->mc.shared_aperture_start = 0x2000000000000000ULL;
-	adev->mc.shared_aperture_end =
-		adev->mc.shared_aperture_start + (4ULL << 30) - 1;
-	adev->mc.private_aperture_start =
-		adev->mc.shared_aperture_end + 1;
-	adev->mc.private_aperture_end =
-		adev->mc.private_aperture_start + (4ULL << 30) - 1;
+	adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
+	adev->gmc.shared_aperture_end =
+		adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
+	adev->gmc.private_aperture_start =
+		adev->gmc.shared_aperture_end + 1;
+	adev->gmc.private_aperture_end =
+		adev->gmc.private_aperture_start + (4ULL << 30) - 1;
 
 
 	return 0;
 	return 0;
 }
 }
@@ -635,7 +673,7 @@ static int gmc_v9_0_late_init(void *handle)
 	for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
 	for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
 		BUG_ON(vm_inv_eng[i] > 16);
 		BUG_ON(vm_inv_eng[i] > 16);
 
 
-	if (adev->asic_type == CHIP_VEGA10) {
+	if (adev->asic_type == CHIP_VEGA10 && !amdgpu_sriov_vf(adev)) {
 		r = gmc_v9_0_ecc_available(adev);
 		r = gmc_v9_0_ecc_available(adev);
 		if (r == 1) {
 		if (r == 1) {
 			DRM_INFO("ECC is active.\n");
 			DRM_INFO("ECC is active.\n");
@@ -647,16 +685,16 @@ static int gmc_v9_0_late_init(void *handle)
 		}
 		}
 	}
 	}
 
 
-	return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
+	return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
 }
 }
 
 
 static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
 static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
-					struct amdgpu_mc *mc)
+					struct amdgpu_gmc *mc)
 {
 {
 	u64 base = 0;
 	u64 base = 0;
 	if (!amdgpu_sriov_vf(adev))
 	if (!amdgpu_sriov_vf(adev))
 		base = mmhub_v1_0_get_fb_location(adev);
 		base = mmhub_v1_0_get_fb_location(adev);
-	amdgpu_device_vram_location(adev, &adev->mc, base);
+	amdgpu_device_vram_location(adev, &adev->gmc, base);
 	amdgpu_device_gart_location(adev, mc);
 	amdgpu_device_gart_location(adev, mc);
 	/* base offset of vram pages */
 	/* base offset of vram pages */
 	if (adev->flags & AMD_IS_APU)
 	if (adev->flags & AMD_IS_APU)
@@ -680,10 +718,14 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
 	int chansize, numchan;
 	int chansize, numchan;
 	int r;
 	int r;
 
 
-	adev->mc.vram_width = amdgpu_atomfirmware_get_vram_width(adev);
-	if (!adev->mc.vram_width) {
+	if (amdgpu_emu_mode != 1)
+		adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev);
+	if (!adev->gmc.vram_width) {
 		/* hbm memory channel size */
 		/* hbm memory channel size */
-		chansize = 128;
+		if (adev->flags & AMD_IS_APU)
+			chansize = 64;
+		else
+			chansize = 128;
 
 
 		tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
 		tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
 		tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
 		tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
@@ -718,43 +760,49 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
 			numchan = 2;
 			numchan = 2;
 			break;
 			break;
 		}
 		}
-		adev->mc.vram_width = numchan * chansize;
+		adev->gmc.vram_width = numchan * chansize;
 	}
 	}
 
 
 	/* size in MB on si */
 	/* size in MB on si */
-	adev->mc.mc_vram_size =
+	adev->gmc.mc_vram_size =
 		adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL;
 		adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL;
-	adev->mc.real_vram_size = adev->mc.mc_vram_size;
+	adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
 
 
 	if (!(adev->flags & AMD_IS_APU)) {
 	if (!(adev->flags & AMD_IS_APU)) {
 		r = amdgpu_device_resize_fb_bar(adev);
 		r = amdgpu_device_resize_fb_bar(adev);
 		if (r)
 		if (r)
 			return r;
 			return r;
 	}
 	}
-	adev->mc.aper_base = pci_resource_start(adev->pdev, 0);
-	adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
+	adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+	adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
 
 
+#ifdef CONFIG_X86_64
+	if (adev->flags & AMD_IS_APU) {
+		adev->gmc.aper_base = gfxhub_v1_0_get_mc_fb_offset(adev);
+		adev->gmc.aper_size = adev->gmc.real_vram_size;
+	}
+#endif
 	/* In case the PCI BAR is larger than the actual amount of vram */
 	/* In case the PCI BAR is larger than the actual amount of vram */
-	adev->mc.visible_vram_size = adev->mc.aper_size;
-	if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
-		adev->mc.visible_vram_size = adev->mc.real_vram_size;
+	adev->gmc.visible_vram_size = adev->gmc.aper_size;
+	if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
+		adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
 
 
 	/* set the gart size */
 	/* set the gart size */
 	if (amdgpu_gart_size == -1) {
 	if (amdgpu_gart_size == -1) {
 		switch (adev->asic_type) {
 		switch (adev->asic_type) {
 		case CHIP_VEGA10:  /* all engines support GPUVM */
 		case CHIP_VEGA10:  /* all engines support GPUVM */
 		default:
 		default:
-			adev->mc.gart_size = 256ULL << 20;
+			adev->gmc.gart_size = 512ULL << 20;
 			break;
 			break;
 		case CHIP_RAVEN:   /* DCE SG support */
 		case CHIP_RAVEN:   /* DCE SG support */
-			adev->mc.gart_size = 1024ULL << 20;
+			adev->gmc.gart_size = 1024ULL << 20;
 			break;
 			break;
 		}
 		}
 	} else {
 	} else {
-		adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
+		adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
 	}
 	}
 
 
-	gmc_v9_0_vram_gtt_location(adev, &adev->mc);
+	gmc_v9_0_vram_gtt_location(adev, &adev->gmc);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -786,23 +834,21 @@ static int gmc_v9_0_sw_init(void *handle)
 	gfxhub_v1_0_init(adev);
 	gfxhub_v1_0_init(adev);
 	mmhub_v1_0_init(adev);
 	mmhub_v1_0_init(adev);
 
 
-	spin_lock_init(&adev->mc.invalidate_lock);
+	spin_lock_init(&adev->gmc.invalidate_lock);
 
 
+	adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev);
 	switch (adev->asic_type) {
 	switch (adev->asic_type) {
 	case CHIP_RAVEN:
 	case CHIP_RAVEN:
-		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
 		if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
 		if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
 			amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
 			amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
 		} else {
 		} else {
 			/* vm_size is 128TB + 512GB for legacy 3-level page support */
 			/* vm_size is 128TB + 512GB for legacy 3-level page support */
 			amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
 			amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
-			adev->mc.translate_further =
+			adev->gmc.translate_further =
 				adev->vm_manager.num_level > 1;
 				adev->vm_manager.num_level > 1;
 		}
 		}
 		break;
 		break;
 	case CHIP_VEGA10:
 	case CHIP_VEGA10:
-		/* XXX Don't know how to get VRAM type yet. */
-		adev->mc.vram_type = AMDGPU_VRAM_TYPE_HBM;
 		/*
 		/*
 		 * To fulfill 4-level page support,
 		 * To fulfill 4-level page support,
 		 * vm size is 256TB (48bit), maximum size of Vega10,
 		 * vm size is 256TB (48bit), maximum size of Vega10,
@@ -815,10 +861,10 @@ static int gmc_v9_0_sw_init(void *handle)
 	}
 	}
 
 
 	/* This interrupt is VMC page fault.*/
 	/* This interrupt is VMC page fault.*/
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0,
-				&adev->mc.vm_fault);
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_UTCL2, 0,
-				&adev->mc.vm_fault);
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, 0,
+				&adev->gmc.vm_fault);
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, 0,
+				&adev->gmc.vm_fault);
 
 
 	if (r)
 	if (r)
 		return r;
 		return r;
@@ -827,13 +873,13 @@ static int gmc_v9_0_sw_init(void *handle)
 	 * This is the max address of the GPU's
 	 * This is the max address of the GPU's
 	 * internal address space.
 	 * internal address space.
 	 */
 	 */
-	adev->mc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
+	adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
 
 
 	/*
 	/*
 	 * It needs to reserve 8M stolen memory for vega10
 	 * It needs to reserve 8M stolen memory for vega10
 	 * TODO: Figure out how to avoid that...
 	 * TODO: Figure out how to avoid that...
 	 */
 	 */
-	adev->mc.stolen_size = 8 * 1024 * 1024;
+	adev->gmc.stolen_size = 8 * 1024 * 1024;
 
 
 	/* set DMA mask + need_dma32 flags.
 	/* set DMA mask + need_dma32 flags.
 	 * PCIE - can handle 44-bits.
 	 * PCIE - can handle 44-bits.
@@ -975,7 +1021,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
 	WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
 	WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
 
 
 	/* After HDP is initialized, flush HDP.*/
 	/* After HDP is initialized, flush HDP.*/
-	adev->nbio_funcs->hdp_flush(adev);
+	adev->nbio_funcs->hdp_flush(adev, NULL);
 
 
 	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
 	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
 		value = false;
 		value = false;
@@ -984,10 +1030,10 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
 
 
 	gfxhub_v1_0_set_fault_enable_default(adev, value);
 	gfxhub_v1_0_set_fault_enable_default(adev, value);
 	mmhub_v1_0_set_fault_enable_default(adev, value);
 	mmhub_v1_0_set_fault_enable_default(adev, value);
-	gmc_v9_0_gart_flush_gpu_tlb(adev, 0);
+	gmc_v9_0_flush_gpu_tlb(adev, 0);
 
 
 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
-		 (unsigned)(adev->mc.gart_size >> 20),
+		 (unsigned)(adev->gmc.gart_size >> 20),
 		 (unsigned long long)adev->gart.table_addr);
 		 (unsigned long long)adev->gart.table_addr);
 	adev->gart.ready = true;
 	adev->gart.ready = true;
 	return 0;
 	return 0;
@@ -1038,7 +1084,7 @@ static int gmc_v9_0_hw_fini(void *handle)
 		return 0;
 		return 0;
 	}
 	}
 
 
-	amdgpu_irq_put(adev, &adev->mc.vm_fault, 0);
+	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
 	gmc_v9_0_gart_disable(adev);
 	gmc_v9_0_gart_disable(adev);
 
 
 	return 0;
 	return 0;

+ 2 - 2
drivers/gpu/drm/amd/amdgpu/iceland_ih.c

@@ -111,7 +111,7 @@ static int iceland_ih_irq_init(struct amdgpu_device *adev)
 	iceland_ih_disable_interrupts(adev);
 	iceland_ih_disable_interrupts(adev);
 
 
 	/* setup interrupt control */
 	/* setup interrupt control */
-	WREG32(mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8);
+	WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
 	interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
 	interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
 	/* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
 	/* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
 	 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
 	 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
@@ -260,7 +260,7 @@ static void iceland_ih_decode_iv(struct amdgpu_device *adev,
 	entry->src_data[0] = dw[1] & 0xfffffff;
 	entry->src_data[0] = dw[1] & 0xfffffff;
 	entry->ring_id = dw[2] & 0xff;
 	entry->ring_id = dw[2] & 0xff;
 	entry->vmid = (dw[2] >> 8) & 0xff;
 	entry->vmid = (dw[2] >> 8) & 0xff;
-	entry->pas_id = (dw[2] >> 16) & 0xffff;
+	entry->pasid = (dw[2] >> 16) & 0xffff;
 
 
 	/* wptr/rptr are in bytes! */
 	/* wptr/rptr are in bytes! */
 	adev->irq.ih.rptr += 16;
 	adev->irq.ih.rptr += 16;

+ 14 - 3
drivers/gpu/drm/amd/amdgpu/kv_dpm.c

@@ -42,6 +42,8 @@
 #define KV_MINIMUM_ENGINE_CLOCK         800
 #define KV_MINIMUM_ENGINE_CLOCK         800
 #define SMC_RAM_END                     0x40000
 #define SMC_RAM_END                     0x40000
 
 
+static const struct amd_pm_funcs kv_dpm_funcs;
+
 static void kv_dpm_set_irq_funcs(struct amdgpu_device *adev);
 static void kv_dpm_set_irq_funcs(struct amdgpu_device *adev);
 static int kv_enable_nb_dpm(struct amdgpu_device *adev,
 static int kv_enable_nb_dpm(struct amdgpu_device *adev,
 			    bool enable);
 			    bool enable);
@@ -2960,6 +2962,7 @@ static int kv_dpm_early_init(void *handle)
 {
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
+	adev->powerplay.pp_funcs = &kv_dpm_funcs;
 	kv_dpm_set_irq_funcs(adev);
 	kv_dpm_set_irq_funcs(adev);
 
 
 	return 0;
 	return 0;
@@ -3301,7 +3304,7 @@ static int kv_dpm_read_sensor(void *handle, int idx,
 	}
 	}
 }
 }
 
 
-const struct amd_ip_funcs kv_dpm_ip_funcs = {
+static const struct amd_ip_funcs kv_dpm_ip_funcs = {
 	.name = "kv_dpm",
 	.name = "kv_dpm",
 	.early_init = kv_dpm_early_init,
 	.early_init = kv_dpm_early_init,
 	.late_init = kv_dpm_late_init,
 	.late_init = kv_dpm_late_init,
@@ -3318,8 +3321,16 @@ const struct amd_ip_funcs kv_dpm_ip_funcs = {
 	.set_powergating_state = kv_dpm_set_powergating_state,
 	.set_powergating_state = kv_dpm_set_powergating_state,
 };
 };
 
 
-const struct amd_pm_funcs kv_dpm_funcs = {
-	.get_temperature = &kv_dpm_get_temp,
+const struct amdgpu_ip_block_version kv_smu_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_SMC,
+	.major = 1,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &kv_dpm_ip_funcs,
+};
+
+static const struct amd_pm_funcs kv_dpm_funcs = {
 	.pre_set_power_state = &kv_dpm_pre_set_power_state,
 	.pre_set_power_state = &kv_dpm_pre_set_power_state,
 	.set_power_state = &kv_dpm_set_power_state,
 	.set_power_state = &kv_dpm_set_power_state,
 	.post_set_power_state = &kv_dpm_post_set_power_state,
 	.post_set_power_state = &kv_dpm_post_set_power_state,

+ 50 - 42
drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c

@@ -50,7 +50,7 @@ static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
 	uint64_t value;
 	uint64_t value;
 
 
 	BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
 	BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
-	value = adev->gart.table_addr - adev->mc.vram_start +
+	value = adev->gart.table_addr - adev->gmc.vram_start +
 		adev->vm_manager.vram_base_offset;
 		adev->vm_manager.vram_base_offset;
 	value &= 0x0000FFFFFFFFF000ULL;
 	value &= 0x0000FFFFFFFFF000ULL;
 	value |= 0x1; /* valid bit */
 	value |= 0x1; /* valid bit */
@@ -67,14 +67,14 @@ static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
 	mmhub_v1_0_init_gart_pt_regs(adev);
 	mmhub_v1_0_init_gart_pt_regs(adev);
 
 
 	WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
 	WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
-		     (u32)(adev->mc.gart_start >> 12));
+		     (u32)(adev->gmc.gart_start >> 12));
 	WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
 	WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
-		     (u32)(adev->mc.gart_start >> 44));
+		     (u32)(adev->gmc.gart_start >> 44));
 
 
 	WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
 	WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
-		     (u32)(adev->mc.gart_end >> 12));
+		     (u32)(adev->gmc.gart_end >> 12));
 	WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
 	WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
-		     (u32)(adev->mc.gart_end >> 44));
+		     (u32)(adev->gmc.gart_end >> 44));
 }
 }
 
 
 static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
@@ -89,12 +89,12 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 
 
 	/* Program the system aperture low logical page number. */
 	/* Program the system aperture low logical page number. */
 	WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
 	WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
-		     adev->mc.vram_start >> 18);
+		     adev->gmc.vram_start >> 18);
 	WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
 	WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
-		     adev->mc.vram_end >> 18);
+		     adev->gmc.vram_end >> 18);
 
 
 	/* Set default page address. */
 	/* Set default page address. */
-	value = adev->vram_scratch.gpu_addr - adev->mc.vram_start +
+	value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
 		adev->vm_manager.vram_base_offset;
 		adev->vm_manager.vram_base_offset;
 	WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
 	WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
 		     (u32)(value >> 12));
 		     (u32)(value >> 12));
@@ -103,9 +103,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 
 
 	/* Program "protection fault". */
 	/* Program "protection fault". */
 	WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
 	WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
-		     (u32)(adev->dummy_page.addr >> 12));
+		     (u32)(adev->dummy_page_addr >> 12));
 	WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
 	WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
-		     (u32)((u64)adev->dummy_page.addr >> 44));
+		     (u32)((u64)adev->dummy_page_addr >> 44));
 
 
 	tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2);
 	tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2);
 	tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
 	tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
@@ -155,7 +155,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
 	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
 	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
 
 
-	if (adev->mc.translate_further) {
+	if (adev->gmc.translate_further) {
 		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
 		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
 		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
 		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
 				    L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
 				    L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
@@ -207,7 +207,7 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
 
 
 	num_level = adev->vm_manager.num_level;
 	num_level = adev->vm_manager.num_level;
 	block_size = adev->vm_manager.block_size;
 	block_size = adev->vm_manager.block_size;
-	if (adev->mc.translate_further)
+	if (adev->gmc.translate_further)
 		num_level -= 1;
 		num_level -= 1;
 	else
 	else
 		block_size -= 9;
 		block_size -= 9;
@@ -272,21 +272,21 @@ static const struct pctl_data pctl0_data[] = {
 	{0x11, 0x6a684},
 	{0x11, 0x6a684},
 	{0x19, 0xea68e},
 	{0x19, 0xea68e},
 	{0x29, 0xa69e},
 	{0x29, 0xa69e},
-	{0x2b, 0x34a6c0},
-	{0x61, 0x83a707},
-	{0xe6, 0x8a7a4},
-	{0xf0, 0x1a7b8},
-	{0xf3, 0xfa7cc},
-	{0x104, 0x17a7dd},
-	{0x11d, 0xa7dc},
-	{0x11f, 0x12a7f5},
-	{0x133, 0xa808},
-	{0x135, 0x12a810},
-	{0x149, 0x7a82c}
+	{0x2b, 0x0010a6c0},
+	{0x3d, 0x83a707},
+	{0xc2, 0x8a7a4},
+	{0xcc, 0x1a7b8},
+	{0xcf, 0xfa7cc},
+	{0xe0, 0x17a7dd},
+	{0xf9, 0xa7dc},
+	{0xfb, 0x12a7f5},
+	{0x10f, 0xa808},
+	{0x111, 0x12a810},
+	{0x125, 0x7a82c}
 };
 };
 #define PCTL0_DATA_LEN (ARRAY_SIZE(pctl0_data))
 #define PCTL0_DATA_LEN (ARRAY_SIZE(pctl0_data))
 
 
-#define PCTL0_RENG_EXEC_END_PTR 0x151
+#define PCTL0_RENG_EXEC_END_PTR 0x12d
 #define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE  0xa640
 #define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE  0xa640
 #define PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa833
 #define PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa833
 
 
@@ -385,10 +385,9 @@ void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev)
 	if (amdgpu_sriov_vf(adev))
 	if (amdgpu_sriov_vf(adev))
 		return;
 		return;
 
 
+	/****************** pctl0 **********************/
 	pctl0_misc = RREG32_SOC15(MMHUB, 0, mmPCTL0_MISC);
 	pctl0_misc = RREG32_SOC15(MMHUB, 0, mmPCTL0_MISC);
 	pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE);
 	pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE);
-	pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC);
-	pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE);
 
 
 	/* Light sleep must be disabled before writing to pctl0 registers */
 	/* Light sleep must be disabled before writing to pctl0 registers */
 	pctl0_misc &= ~PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
 	pctl0_misc &= ~PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
@@ -402,12 +401,13 @@ void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev)
 			pctl0_data[i].data);
 			pctl0_data[i].data);
         }
         }
 
 
-	/* Set the reng execute end ptr for pctl0 */
-	pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
-					PCTL0_RENG_EXECUTE,
-					RENG_EXECUTE_END_PTR,
-					PCTL0_RENG_EXEC_END_PTR);
-	WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
+	/* Re-enable light sleep */
+	pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
+	WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc);
+
+	/****************** pctl1 **********************/
+	pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC);
+	pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE);
 
 
 	/* Light sleep must be disabled before writing to pctl1 registers */
 	/* Light sleep must be disabled before writing to pctl1 registers */
 	pctl1_misc &= ~PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
 	pctl1_misc &= ~PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
@@ -421,20 +421,25 @@ void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev)
 			pctl1_data[i].data);
 			pctl1_data[i].data);
         }
         }
 
 
+	/* Re-enable light sleep */
+	pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
+	WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc);
+
+	mmhub_v1_0_power_gating_write_save_ranges(adev);
+
+	/* Set the reng execute end ptr for pctl0 */
+	pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
+					PCTL0_RENG_EXECUTE,
+					RENG_EXECUTE_END_PTR,
+					PCTL0_RENG_EXEC_END_PTR);
+	WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
+
 	/* Set the reng execute end ptr for pctl1 */
 	/* Set the reng execute end ptr for pctl1 */
 	pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
 	pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
 					PCTL1_RENG_EXECUTE,
 					PCTL1_RENG_EXECUTE,
 					RENG_EXECUTE_END_PTR,
 					RENG_EXECUTE_END_PTR,
 					PCTL1_RENG_EXEC_END_PTR);
 					PCTL1_RENG_EXEC_END_PTR);
 	WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
 	WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
-
-	mmhub_v1_0_power_gating_write_save_ranges(adev);
-
-	/* Re-enable light sleep */
-	pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
-	WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc);
-	pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
-	WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc);
 }
 }
 
 
 void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
 void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
@@ -466,6 +471,9 @@ void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
 						RENG_EXECUTE_ON_REG_UPDATE, 1);
 						RENG_EXECUTE_ON_REG_UPDATE, 1);
 		WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
 		WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
 
 
+		if (adev->powerplay.pp_funcs->set_mmhub_powergating_by_smu)
+			amdgpu_dpm_set_mmhub_powergating_by_smu(adev);
+
 	} else {
 	} else {
 		pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
 		pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
 						PCTL0_RENG_EXECUTE,
 						PCTL0_RENG_EXECUTE,
@@ -494,9 +502,9 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
 		 * SRIOV driver need to program them
 		 * SRIOV driver need to program them
 		 */
 		 */
 		WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE,
 		WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE,
-			     adev->mc.vram_start >> 24);
+			     adev->gmc.vram_start >> 24);
 		WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP,
 		WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP,
-			     adev->mc.vram_end >> 24);
+			     adev->gmc.vram_end >> 24);
 	}
 	}
 
 
 	/* GART Enable. */
 	/* GART Enable. */

+ 105 - 95
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c

@@ -33,56 +33,34 @@
 
 
 static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev)
 static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev)
 {
 {
-	u32 reg;
-	int timeout = AI_MAILBOX_TIMEDOUT;
-	u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID);
-
-	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-					     mmBIF_BX_PF0_MAILBOX_CONTROL));
-	reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_ACK, 1);
-	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-				       mmBIF_BX_PF0_MAILBOX_CONTROL), reg);
-
-	/*Wait for RCV_MSG_VALID to be 0*/
-	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-					     mmBIF_BX_PF0_MAILBOX_CONTROL));
-	while (reg & mask) {
-		if (timeout <= 0) {
-			pr_err("RCV_MSG_VALID is not cleared\n");
-			break;
-		}
-		mdelay(1);
-		timeout -=1;
-
-		reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-						     mmBIF_BX_PF0_MAILBOX_CONTROL));
-	}
+	WREG8(AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2);
 }
 }
 
 
 static void xgpu_ai_mailbox_set_valid(struct amdgpu_device *adev, bool val)
 static void xgpu_ai_mailbox_set_valid(struct amdgpu_device *adev, bool val)
 {
 {
-	u32 reg;
+	WREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE, val ? 1 : 0);
+}
 
 
-	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-					     mmBIF_BX_PF0_MAILBOX_CONTROL));
-	reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL,
-			    TRN_MSG_VALID, val ? 1 : 0);
-	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL),
-		      reg);
+/*
+ * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine
+ * RCV_MSG_VALID filed of BIF_BX_PF0_MAILBOX_CONTROL must already be set to 1
+ * by host.
+ *
+ * if called no in IRQ routine, this peek_msg cannot guaranteed to return the
+ * correct value since it doesn't return the RCV_DW0 under the case that
+ * RCV_MSG_VALID is set by host.
+ */
+static enum idh_event xgpu_ai_mailbox_peek_msg(struct amdgpu_device *adev)
+{
+	return RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
+				mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW0));
 }
 }
 
 
+
 static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev,
 static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev,
 				   enum idh_event event)
 				   enum idh_event event)
 {
 {
 	u32 reg;
 	u32 reg;
-	u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID);
-
-	if (event != IDH_FLR_NOTIFICATION_CMPL) {
-		reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-						     mmBIF_BX_PF0_MAILBOX_CONTROL));
-		if (!(reg & mask))
-			return -ENOENT;
-	}
 
 
 	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
 	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
 					     mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW0));
 					     mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW0));
@@ -94,54 +72,67 @@ static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev,
 	return 0;
 	return 0;
 }
 }
 
 
+static uint8_t xgpu_ai_peek_ack(struct amdgpu_device *adev) {
+	return RREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE) & 2;
+}
+
 static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
 static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
 {
 {
-	int r = 0, timeout = AI_MAILBOX_TIMEDOUT;
-	u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, TRN_MSG_ACK);
-	u32 reg;
+	int timeout  = AI_MAILBOX_POLL_ACK_TIMEDOUT;
+	u8 reg;
+
+	do {
+		reg = RREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE);
+		if (reg & 2)
+			return 0;
 
 
-	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-					     mmBIF_BX_PF0_MAILBOX_CONTROL));
-	while (!(reg & mask)) {
-		if (timeout <= 0) {
-			pr_err("Doesn't get ack from pf.\n");
-			r = -ETIME;
-			break;
-		}
 		mdelay(5);
 		mdelay(5);
 		timeout -= 5;
 		timeout -= 5;
+	} while (timeout > 1);
 
 
-		reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-						     mmBIF_BX_PF0_MAILBOX_CONTROL));
-	}
+	pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", AI_MAILBOX_POLL_ACK_TIMEDOUT);
 
 
-	return r;
+	return -ETIME;
 }
 }
 
 
 static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
 static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
 {
 {
-	int r = 0, timeout = AI_MAILBOX_TIMEDOUT;
-
-	r = xgpu_ai_mailbox_rcv_msg(adev, event);
-	while (r) {
-		if (timeout <= 0) {
-			pr_err("Doesn't get msg:%d from pf.\n", event);
-			r = -ETIME;
-			break;
-		}
-		mdelay(5);
-		timeout -= 5;
+	int r, timeout = AI_MAILBOX_POLL_MSG_TIMEDOUT;
 
 
+	do {
 		r = xgpu_ai_mailbox_rcv_msg(adev, event);
 		r = xgpu_ai_mailbox_rcv_msg(adev, event);
-	}
+		if (!r)
+			return 0;
 
 
-	return r;
+		msleep(10);
+		timeout -= 10;
+	} while (timeout > 1);
+
+	pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
+
+	return -ETIME;
 }
 }
 
 
 static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,
 static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,
 	      enum idh_request req, u32 data1, u32 data2, u32 data3) {
 	      enum idh_request req, u32 data1, u32 data2, u32 data3) {
 	u32 reg;
 	u32 reg;
 	int r;
 	int r;
+	uint8_t trn;
+
+	/* IMPORTANT:
+	 * clear TRN_MSG_VALID valid to clear host's RCV_MSG_ACK
+	 * and with host's RCV_MSG_ACK cleared hw automatically clear host's RCV_MSG_ACK
+	 * which lead to VF's TRN_MSG_ACK cleared, otherwise below xgpu_ai_poll_ack()
+	 * will return immediatly
+	 */
+	do {
+		xgpu_ai_mailbox_set_valid(adev, false);
+		trn = xgpu_ai_peek_ack(adev);
+		if (trn) {
+			pr_err("trn=%x ACK should not asssert! wait again !\n", trn);
+			msleep(1);
+		}
+	} while(trn);
 
 
 	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
 	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
 					     mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0));
 					     mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0));
@@ -245,15 +236,36 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
 {
 {
 	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
 	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
 	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
 	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
-
-	/* wait until RCV_MSG become 3 */
-	if (xgpu_ai_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) {
-		pr_err("failed to recieve FLR_CMPL\n");
-		return;
-	}
-
-	/* Trigger recovery due to world switch failure */
-	amdgpu_device_gpu_recover(adev, NULL, false);
+	int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
+	int locked;
+
+	/* block amdgpu_gpu_recover till msg FLR COMPLETE received,
+	 * otherwise the mailbox msg will be ruined/reseted by
+	 * the VF FLR.
+	 *
+	 * we can unlock the lock_reset to allow "amdgpu_job_timedout"
+	 * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
+	 * which means host side had finished this VF's FLR.
+	 */
+	locked = mutex_trylock(&adev->lock_reset);
+	if (locked)
+		adev->in_gpu_reset = 1;
+
+	do {
+		if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
+			goto flr_done;
+
+		msleep(10);
+		timeout -= 10;
+	} while (timeout > 1);
+
+flr_done:
+	if (locked)
+		mutex_unlock(&adev->lock_reset);
+
+	/* Trigger recovery for world switch failure if no TDR */
+	if (amdgpu_lockup_timeout == 0)
+		amdgpu_device_gpu_recover(adev, NULL, true);
 }
 }
 
 
 static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
 static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -274,24 +286,22 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
 				   struct amdgpu_irq_src *source,
 				   struct amdgpu_irq_src *source,
 				   struct amdgpu_iv_entry *entry)
 				   struct amdgpu_iv_entry *entry)
 {
 {
-	int r;
-
-	/* trigger gpu-reset by hypervisor only if TDR disbaled */
-	if (!amdgpu_gpu_recovery) {
-		/* see what event we get */
-		r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
-
-		/* sometimes the interrupt is delayed to inject to VM, so under such case
-		 * the IDH_FLR_NOTIFICATION is overwritten by VF FLR from GIM side, thus
-		 * above recieve message could be failed, we should schedule the flr_work
-		 * anyway
+	enum idh_event event = xgpu_ai_mailbox_peek_msg(adev);
+
+	switch (event) {
+		case IDH_FLR_NOTIFICATION:
+		if (amdgpu_sriov_runtime(adev))
+			schedule_work(&adev->virt.flr_work);
+		break;
+		/* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
+		 * it byfar since that polling thread will handle it,
+		 * other msg like flr complete is not handled here.
 		 */
 		 */
-		if (r) {
-			DRM_ERROR("FLR_NOTIFICATION is missed\n");
-			xgpu_ai_mailbox_send_ack(adev);
-		}
-
-		schedule_work(&adev->virt.flr_work);
+		case IDH_CLR_MSG_BUF:
+		case IDH_FLR_NOTIFICATION_CMPL:
+		case IDH_READY_TO_ACCESS_GPU:
+		default:
+		break;
 	}
 	}
 
 
 	return 0;
 	return 0;
@@ -319,11 +329,11 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev)
 {
 {
 	int r;
 	int r;
 
 
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq);
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq);
 	if (r)
 	if (r)
 		return r;
 		return r;
 
 
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq);
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq);
 	if (r) {
 	if (r) {
 		amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
 		amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
 		return r;
 		return r;

Some files were not shown because too many files changed in this diff