Browse Source

Merge tag 'drm-for-v4.17' of git://people.freedesktop.org/~airlied/linux

Pull drm updates from Dave Airlie:
 "Cannonlake and Vega12 support are probably the two major things. This
  pull lacks nouveau, Ben had some unforseen leave and a few other
  blockers so we'll see how things look or maybe leave it for this merge
  window.

  core:
   - Device links to handle sound/gpu pm dependency
   - Color encoding/range properties
   - Plane clipping into plane check helper
   - Backlight helpers
   - DP TP4 + HBR3 helper support

  amdgpu:
   - Vega12 support
   - Enable DC by default on all supported GPUs
   - Powerplay restructuring and cleanup
   - DC bandwidth calc updates
   - DC backlight on pre-DCE11
   - TTM backing store dropping support
   - SR-IOV fixes
   - Adding "wattman" like functionality
   - DC crc support
   - Improved DC dual-link handling

  amdkfd:
   - GPUVM support for dGPU
   - KFD events for dGPU
   - Enable PCIe atomics for dGPUs
   - HSA process eviction support
   - Live-lock fixes for process eviction
   - VM page table allocation fix for large-bar systems

  panel:
   - Raydium RM68200
   - AUO G104SN02 V2
   - KEO TX31D200VM0BAA
   - ARM Versatile panels

  i915:
   - Cannonlake support enabled
   - AUX-F port support added
   - Icelake base enabling until internal milestone of forcewake support
   - Query uAPI interface (used for GPU topology information currently)
   - Compressed framebuffer support for sprites
   - kmem cache shrinking when GPU is idle
   - Avoid boosting GPU when waited item is being processed already
   - Avoid retraining LSPCON link unnecessarily
   - Decrease request signaling latency
   - Deprecation of I915_SET_COLORKEY_NONE
   - Kerneldoc and compiler warning cleanup for upcoming CI enforcements
   - Full range ycbcr toggling
   - HDCP support

  i915/gvt:
   - Big refactor for shadow ppgtt
   - KBL context save/restore via LRI cmd (Weinan)
   - Properly unmap dma for guest page (Changbin)

  vmwgfx:
   - Lots of various improvements

  etnaviv:
   - Use the drm gpu scheduler
   - prep work for GC7000L support

  vc4:
   - fix alpha blending
   - Expose perf counters to userspace

  pl111:
   - Bandwidth checking/limiting
   - Versatile panel support

  sun4i:
   - A83T HDMI support
   - A80 support
   - YUV plane support
   - H3/H5 HDMI support

  omapdrm:
   - HPD support for DVI connector
   - remove lots of static variables

  msm:
   - DSI updates from 10nm / SDM845
   - fix for race condition with a3xx/a4xx fence completion irq
   - some refactoring/prep work for eventual a6xx support (ie. when we
     have a userspace)
   - a5xx debugfs enhancements
   - some mdp5 fixes/cleanups to prepare for eventually merging
     writeback
   - support (ie. when we have a userspace)

  tegra:
   - mmap() fixes for fbdev devices
   - Overlay plane for hw cursor fix
   - dma-buf cache maintenance support

  mali-dp:
   - YUV->RGB conversion support

  rockchip:
   - rk3399/chromebook fixes and improvements

  rcar-du:
   - LVDS support move to drm bridge
   - DT bindings for R8A77995
   - Driver/DT support for R8A77970

  tilcdc:
   - DRM panel support"

* tag 'drm-for-v4.17' of git://people.freedesktop.org/~airlied/linux: (1646 commits)
  drm/i915: Fix hibernation with ACPI S0 target state
  drm/i915/execlists: Use a locked clear_bit() for synchronisation with interrupt
  drm/i915: Specify which engines to reset following semaphore/event lockups
  drm/i915/dp: Write to SET_POWER dpcd to enable MST hub.
  drm/amdkfd: Use ordered workqueue to restore processes
  drm/amdgpu: Fix acquiring VM on large-BAR systems
  drm/amd/pp: clean header file hwmgr.h
  drm/amd/pp: use mlck_table.count for array loop index limit
  drm: Fix uabi regression by allowing garbage mode->type from userspace
  drm/amdgpu: Add an ATPX quirk for hybrid laptop
  drm/amdgpu: fix spelling mistake: "asssert" -> "assert"
  drm/amd/pp: Add new asic support in pp_psm.c
  drm/amd/pp: Clean up powerplay code on Vega12
  drm/amd/pp: Add smu irq handlers for legacy asics
  drm/amd/pp: Fix set wrong temperature range on smu7
  drm/amdgpu: Don't change preferred domian when fallback GTT v5
  drm/vmwgfx: Bump version patchlevel and date
  drm/vmwgfx: use monotonic event timestamps
  drm/vmwgfx: Unpin the screen object backup buffer when not used
  drm/vmwgfx: Stricter count of legacy surface device resources
  ...
Linus Torvalds 7 years ago
parent
commit
320b164abb
100 changed files with 5599 additions and 2842 deletions
  1. 58 0
      Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
  2. 9 4
      Documentation/devicetree/bindings/display/bridge/ti,ths813x.txt
  3. 1 0
      Documentation/devicetree/bindings/display/connector/dvi-connector.txt
  4. 3 21
      Documentation/devicetree/bindings/display/etnaviv/etnaviv-drm.txt
  5. 20 6
      Documentation/devicetree/bindings/display/msm/dsi.txt
  6. 31 0
      Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.txt
  7. 12 0
      Documentation/devicetree/bindings/display/panel/auo,g104sn02.txt
  8. 5 0
      Documentation/devicetree/bindings/display/panel/display-timing.txt
  9. 25 0
      Documentation/devicetree/bindings/display/panel/koe,tx31d200vm0baa.txt
  10. 2 0
      Documentation/devicetree/bindings/display/panel/orisetech,otm8009a.txt
  11. 25 0
      Documentation/devicetree/bindings/display/panel/raydium,rm68200.txt
  12. 4 0
      Documentation/devicetree/bindings/display/panel/simple-panel.txt
  13. 15 20
      Documentation/devicetree/bindings/display/renesas,du.txt
  14. 74 0
      Documentation/devicetree/bindings/display/rockchip/cdn-dp-rockchip.txt
  15. 1 1
      Documentation/devicetree/bindings/display/st,stm32-ltdc.txt
  16. 95 9
      Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt
  17. 1 0
      Documentation/devicetree/bindings/vendor-prefixes.txt
  18. 2 2
      Documentation/devicetree/overlay-notes.txt
  19. 21 0
      Documentation/gpu/drivers.rst
  20. 6 2
      Documentation/gpu/drm-kms.rst
  21. 1 8
      Documentation/gpu/index.rst
  22. 0 1
      Documentation/gpu/kms-properties.csv
  23. 17 0
      Documentation/gpu/todo.rst
  24. 11 1
      MAINTAINERS
  25. 1 1
      arch/x86/kernel/devicetree.c
  26. 1 0
      drivers/dma-buf/dma-fence.c
  27. 22 9
      drivers/dma-buf/reservation.c
  28. 5 5
      drivers/dma-buf/sw_sync.c
  29. 0 1
      drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
  30. 4 4
      drivers/gpu/drm/amd/amdgpu/Makefile
  31. 36 114
      drivers/gpu/drm/amd/amdgpu/amdgpu.h
  32. 119 41
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
  33. 96 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
  34. 179 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
  35. 68 13
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
  36. 69 14
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
  37. 1577 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
  38. 91 4
      drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
  39. 1 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
  40. 1 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
  41. 4 4
      drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
  42. 4 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
  43. 9 541
      drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
  44. 6 4
      drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
  45. 11 11
      drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
  46. 12 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
  47. 589 118
      drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
  48. 72 55
      drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
  49. 4 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
  50. 20 15
      drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
  51. 30 13
      drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
  52. 13 10
      drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
  53. 3 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
  54. 27 27
      drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
  55. 0 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
  56. 11 22
      drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
  57. 112 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
  58. 1 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
  59. 42 16
      drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
  60. 283 129
      drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
  61. 6 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
  62. 3 42
      drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
  63. 3 50
      drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
  64. 1 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
  65. 80 30
      drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
  66. 24 54
      drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
  67. 65 63
      drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
  68. 7 9
      drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
  69. 392 174
      drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
  70. 0 290
      drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
  71. 135 29
      drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
  72. 15 35
      drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
  73. 26 17
      drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
  74. 3 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
  75. 7 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
  76. 11 51
      drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
  77. 54 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
  78. 1 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
  79. 11 12
      drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
  80. 30 8
      drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
  81. 209 94
      drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
  82. 8 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
  83. 1 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
  84. 49 75
      drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
  85. 9 6
      drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
  86. 2 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
  87. 18 69
      drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
  88. 78 17
      drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
  89. 3 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
  90. 283 116
      drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
  91. 12 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
  92. 3 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
  93. 32 275
      drivers/gpu/drm/amd/amdgpu/ci_dpm.c
  94. 0 7
      drivers/gpu/drm/amd/amdgpu/ci_dpm.h
  95. 33 9
      drivers/gpu/drm/amd/amdgpu/cik.c
  96. 2 0
      drivers/gpu/drm/amd/amdgpu/cik.h
  97. 3 4
      drivers/gpu/drm/amd/amdgpu/cik_dpm.h
  98. 2 2
      drivers/gpu/drm/amd/amdgpu/cik_ih.c
  99. 14 27
      drivers/gpu/drm/amd/amdgpu/cik_sdma.c
  100. 2 2
      drivers/gpu/drm/amd/amdgpu/cz_ih.c

+ 58 - 0
Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt

@@ -0,0 +1,58 @@
+Renesas R-Car LVDS Encoder
+==========================
+
+These DT bindings describe the LVDS encoder embedded in the Renesas R-Car
+Gen2, R-Car Gen3 and RZ/G SoCs.
+
+Required properties:
+
+- compatible : Shall contain one of
+  - "renesas,r8a7743-lvds" for R8A7743 (RZ/G1M) compatible LVDS encoders
+  - "renesas,r8a7790-lvds" for R8A7790 (R-Car H2) compatible LVDS encoders
+  - "renesas,r8a7791-lvds" for R8A7791 (R-Car M2-W) compatible LVDS encoders
+  - "renesas,r8a7793-lvds" for R8A7793 (R-Car M2-N) compatible LVDS encoders
+  - "renesas,r8a7795-lvds" for R8A7795 (R-Car H3) compatible LVDS encoders
+  - "renesas,r8a7796-lvds" for R8A7796 (R-Car M3-W) compatible LVDS encoders
+  - "renesas,r8a77970-lvds" for R8A77970 (R-Car V3M) compatible LVDS encoders
+  - "renesas,r8a77995-lvds" for R8A77995 (R-Car D3) compatible LVDS encoders
+
+- reg: Base address and length for the memory-mapped registers
+- clocks: A phandle + clock-specifier pair for the functional clock
+- resets: A phandle + reset specifier for the module reset
+
+Required nodes:
+
+The LVDS encoder has two video ports. Their connections are modelled using the
+OF graph bindings specified in Documentation/devicetree/bindings/graph.txt.
+
+- Video port 0 corresponds to the parallel RGB input
+- Video port 1 corresponds to the LVDS output
+
+Each port shall have a single endpoint.
+
+
+Example:
+
+	lvds0: lvds@feb90000 {
+		compatible = "renesas,r8a7790-lvds";
+		reg = <0 0xfeb90000 0 0x1c>;
+		clocks = <&cpg CPG_MOD 726>;
+		resets = <&cpg 726>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
+				reg = <0>;
+				lvds0_in: endpoint {
+					remote-endpoint = <&du_out_lvds0>;
+				};
+			};
+			port@1 {
+				reg = <1>;
+				lvds0_out: endpoint {
+				};
+			};
+		};
+	};

+ 9 - 4
Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt → Documentation/devicetree/bindings/display/bridge/ti,ths813x.txt

@@ -1,11 +1,16 @@
-THS8135 Video DAC
------------------
+THS8134 and THS8135 Video DAC
+-----------------------------
 
-This is the binding for Texas Instruments THS8135 Video DAC bridge.
+This is the binding for Texas Instruments THS8134, THS8134A, THS8134B and
+THS8135 Video DAC bridges.
 
 Required properties:
 
-- compatible: Must be "ti,ths8135"
+- compatible: Must be one of
+  "ti,ths8134"
+  "ti,ths8134a," "ti,ths8134"
+  "ti,ths8134b", "ti,ths8134"
+  "ti,ths8135"
 
 Required nodes:
 

+ 1 - 0
Documentation/devicetree/bindings/display/connector/dvi-connector.txt

@@ -10,6 +10,7 @@ Optional properties:
 - analog: the connector has DVI analog pins
 - digital: the connector has DVI digital pins
 - dual-link: the connector has pins for DVI dual-link
+- hpd-gpios: HPD GPIO number
 
 Required nodes:
 - Video port for DVI input

+ 3 - 21
Documentation/devicetree/bindings/display/etnaviv/etnaviv-drm.txt

@@ -1,23 +1,3 @@
-Etnaviv DRM master device
-=========================
-
-The Etnaviv DRM master device is a virtual device needed to list all
-Vivante GPU cores that comprise the GPU subsystem.
-
-Required properties:
-- compatible: Should be one of
-    "fsl,imx-gpu-subsystem"
-    "marvell,dove-gpu-subsystem"
-- cores: Should contain a list of phandles pointing to Vivante GPU devices
-
-example:
-
-gpu-subsystem {
-	compatible = "fsl,imx-gpu-subsystem";
-	cores = <&gpu_2d>, <&gpu_3d>;
-};
-
-
 Vivante GPU core devices
 ========================
 
@@ -32,7 +12,9 @@ Required properties:
 - clocks: should contain one clock for entry in clock-names
   see Documentation/devicetree/bindings/clock/clock-bindings.txt
 - clock-names:
-   - "bus":    AXI/register clock
+   - "bus":    AXI/master interface clock
+   - "reg":    AHB/slave interface clock
+               (only required if GPU can gate slave interface independently)
    - "core":   GPU core clock
    - "shader": Shader clock (only required if GPU has feature PIPE_3D)
 

+ 20 - 6
Documentation/devicetree/bindings/display/msm/dsi.txt

@@ -7,8 +7,6 @@ Required properties:
 - reg: Physical base address and length of the registers of controller
 - reg-names: The names of register regions. The following regions are required:
   * "dsi_ctrl"
-- qcom,dsi-host-index: The ID of DSI controller hardware instance. This should
-  be 0 or 1, since we have 2 DSI controllers at most for now.
 - interrupts: The interrupt signal from the DSI block.
 - power-domains: Should be <&mmcc MDSS_GDSC>.
 - clocks: Phandles to device clocks.
@@ -22,6 +20,8 @@ Required properties:
   * "core"
   For DSIv2, we need an additional clock:
    * "src"
+  For DSI6G v2.0 onwards, we need also need the clock:
+   * "byte_intf"
 - assigned-clocks: Parents of "byte" and "pixel" for the given platform.
 - assigned-clock-parents: The Byte clock and Pixel clock PLL outputs provided
   by a DSI PHY block. See [1] for details on clock bindings.
@@ -88,21 +88,35 @@ Required properties:
   * "qcom,dsi-phy-28nm-lp"
   * "qcom,dsi-phy-20nm"
   * "qcom,dsi-phy-28nm-8960"
-- reg: Physical base address and length of the registers of PLL, PHY and PHY
-  regulator
+  * "qcom,dsi-phy-14nm"
+  * "qcom,dsi-phy-10nm"
+- reg: Physical base address and length of the registers of PLL, PHY. Some
+  revisions require the PHY regulator base address, whereas others require the
+  PHY lane base address. See below for each PHY revision.
 - reg-names: The names of register regions. The following regions are required:
+  For DSI 28nm HPM/LP/8960 PHYs and 20nm PHY:
   * "dsi_pll"
   * "dsi_phy"
   * "dsi_phy_regulator"
+  For DSI 14nm and 10nm PHYs:
+  * "dsi_pll"
+  * "dsi_phy"
+  * "dsi_phy_lane"
 - clock-cells: Must be 1. The DSI PHY block acts as a clock provider, creating
   2 clocks: A byte clock (index 0), and a pixel clock (index 1).
-- qcom,dsi-phy-index: The ID of DSI PHY hardware instance. This should
-  be 0 or 1, since we have 2 DSI PHYs at most for now.
 - power-domains: Should be <&mmcc MDSS_GDSC>.
 - clocks: Phandles to device clocks. See [1] for details on clock bindings.
 - clock-names: the following clocks are required:
   * "iface"
+  For 28nm HPM/LP, 28nm 8960 PHYs:
+- vddio-supply: phandle to vdd-io regulator device node
+  For 20nm PHY:
 - vddio-supply: phandle to vdd-io regulator device node
+- vcca-supply: phandle to vcca regulator device node
+  For 14nm PHY:
+- vcca-supply: phandle to vcca regulator device node
+  For 10nm PHY:
+- vdds-supply: phandle to vdds regulator device node
 
 Optional properties:
 - qcom,dsi-phy-regulator-ldo-mode: Boolean value indicating if the LDO mode PHY

+ 31 - 0
Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.txt

@@ -0,0 +1,31 @@
+ARM Versatile TFT Panels
+
+These panels are connected to the daughterboards found on the
+ARM Versatile reference designs.
+
+This device node must appear as a child to a "syscon"-compatible
+node.
+
+Required properties:
+- compatible: should be "arm,versatile-tft-panel"
+
+Required subnodes:
+- port: see display/panel/panel-common.txt, graph.txt
+
+
+Example:
+
+sysreg@0 {
+	compatible = "arm,versatile-sysreg", "syscon", "simple-mfd";
+	reg = <0x00000 0x1000>;
+
+	panel: display@0 {
+		compatible = "arm,versatile-tft-panel";
+
+		port {
+			panel_in: endpoint {
+				remote-endpoint = <&foo>;
+			};
+		};
+	};
+};

+ 12 - 0
Documentation/devicetree/bindings/display/panel/auo,g104sn02.txt

@@ -0,0 +1,12 @@
+AU Optronics Corporation 10.4" (800x600) color TFT LCD panel
+
+Required properties:
+- compatible: should be "auo,g104sn02"
+- power-supply: as specified in the base binding
+
+Optional properties:
+- backlight: as specified in the base binding
+- enable-gpios: as specified in the base binding
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.

+ 5 - 0
Documentation/devicetree/bindings/display/panel/display-timing.txt

@@ -80,6 +80,11 @@ The parameters are defined as:
   |          |        v                            |          |       |
   +----------+-------------------------------------+----------+-------+
 
+Note: In addition to being used as subnode(s) of display-timings, the timing
+      subnode may also be used on its own. This is appropriate if only one mode
+      need be conveyed. In this case, the node should be named 'panel-timing'.
+
+
 Example:
 
 	display-timings {

+ 25 - 0
Documentation/devicetree/bindings/display/panel/koe,tx31d200vm0baa.txt

@@ -0,0 +1,25 @@
+Kaohsiung Opto-Electronics. TX31D200VM0BAA 12.3" HSXGA LVDS panel
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
+
+Required properties:
+- compatible: should be "koe,tx31d200vm0baa"
+
+Optional properties:
+- backlight: phandle of the backlight device attached to the panel
+
+Optional nodes:
+- Video port for LVDS panel input.
+
+Example:
+	panel {
+		compatible = "koe,tx31d200vm0baa";
+		backlight = <&backlight_lvds>;
+
+		port {
+			panel_in: endpoint {
+				remote-endpoint = <&lvds0_out>;
+			};
+		};
+	};

+ 2 - 0
Documentation/devicetree/bindings/display/panel/orisetech,otm8009a.txt

@@ -9,6 +9,7 @@ Required properties:
 
 Optional properties:
   - reset-gpios: a GPIO spec for the reset pin (active low).
+  - power-supply: phandle of the regulator that provides the supply voltage.
 
 Example:
 &dsi {
@@ -17,5 +18,6 @@ Example:
 		compatible = "orisetech,otm8009a";
 		reg = <0>;
 		reset-gpios = <&gpioh 7 GPIO_ACTIVE_LOW>;
+		power-supply = <&v1v8>;
 	};
 };

+ 25 - 0
Documentation/devicetree/bindings/display/panel/raydium,rm68200.txt

@@ -0,0 +1,25 @@
+Raydium Semiconductor Corporation RM68200 5.5" 720p MIPI-DSI TFT LCD panel
+
+The Raydium Semiconductor Corporation RM68200 is a 5.5" 720x1280 TFT LCD
+panel connected using a MIPI-DSI video interface.
+
+Required properties:
+  - compatible: "raydium,rm68200"
+  - reg: the virtual channel number of a DSI peripheral
+
+Optional properties:
+  - reset-gpios: a GPIO spec for the reset pin (active low).
+  - power-supply: phandle of the regulator that provides the supply voltage.
+  - backlight: phandle of the backlight device attached to the panel.
+
+Example:
+&dsi {
+	...
+	panel@0 {
+		compatible = "raydium,rm68200";
+		reg = <0>;
+		reset-gpios = <&gpiof 15 GPIO_ACTIVE_LOW>;
+		power-supply = <&v1v8>;
+		backlight = <&pwm_backlight>;
+	};
+};

+ 4 - 0
Documentation/devicetree/bindings/display/panel/simple-panel.txt

@@ -1,4 +1,8 @@
 Simple display panel
+====================
+
+panel node
+----------
 
 Required properties:
 - power-supply: See panel-common.txt

+ 15 - 20
Documentation/devicetree/bindings/display/renesas,du.txt

@@ -13,13 +13,10 @@ Required Properties:
     - "renesas,du-r8a7794" for R8A7794 (R-Car E2) compatible DU
     - "renesas,du-r8a7795" for R8A7795 (R-Car H3) compatible DU
     - "renesas,du-r8a7796" for R8A7796 (R-Car M3-W) compatible DU
+    - "renesas,du-r8a77970" for R8A77970 (R-Car V3M) compatible DU
+    - "renesas,du-r8a77995" for R8A77995 (R-Car D3) compatible DU
 
-  - reg: A list of base address and length of each memory resource, one for
-    each entry in the reg-names property.
-  - reg-names: Name of the memory resources. The DU requires one memory
-    resource for the DU core (named "du") and one memory resource for each
-    LVDS encoder (named "lvds.x" with "x" being the LVDS controller numerical
-    index).
+  - reg: the memory-mapped I/O registers base address and length
 
   - interrupt-parent: phandle of the parent interrupt controller.
   - interrupts: Interrupt specifiers for the DU interrupts.
@@ -29,14 +26,13 @@ Required Properties:
   - clock-names: Name of the clocks. This property is model-dependent.
     - R8A7779 uses a single functional clock. The clock doesn't need to be
       named.
-    - All other DU instances use one functional clock per channel and one
-      clock per LVDS encoder (if available). The functional clocks must be
-      named "du.x" with "x" being the channel numerical index. The LVDS clocks
-      must be named "lvds.x" with "x" being the LVDS encoder numerical index.
-    - In addition to the functional and encoder clocks, all DU versions also
-      support externally supplied pixel clocks. Those clocks are optional.
-      When supplied they must be named "dclkin.x" with "x" being the input
-      clock numerical index.
+    - All other DU instances use one functional clock per channel The
+      functional clocks must be named "du.x" with "x" being the channel
+      numerical index.
+    - In addition to the functional clocks, all DU versions also support
+      externally supplied pixel clocks. Those clocks are optional. When
+      supplied they must be named "dclkin.x" with "x" being the input clock
+      numerical index.
 
   - vsps: A list of phandle and channel index tuples to the VSPs that handle
     the memory interfaces for the DU channels. The phandle identifies the VSP
@@ -63,15 +59,15 @@ corresponding to each DU output.
  R8A7794 (R-Car E2)   DPAD 0         DPAD 1         -              -
  R8A7795 (R-Car H3)   DPAD 0         HDMI 0         HDMI 1         LVDS 0
  R8A7796 (R-Car M3-W) DPAD 0         HDMI 0         LVDS 0         -
+ R8A77970 (R-Car V3M) DPAD 0         LVDS 0         -              -
+ R8A77995 (R-Car D3)  DPAD 0         LVDS 0         LVDS 1         -
 
 
 Example: R8A7795 (R-Car H3) ES2.0 DU
 
 	du: display@feb00000 {
 		compatible = "renesas,du-r8a7795";
-		reg = <0 0xfeb00000 0 0x80000>,
-		      <0 0xfeb90000 0 0x14>;
-		reg-names = "du", "lvds.0";
+		reg = <0 0xfeb00000 0 0x80000>;
 		interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 269 IRQ_TYPE_LEVEL_HIGH>,
@@ -79,9 +75,8 @@ Example: R8A7795 (R-Car H3) ES2.0 DU
 		clocks = <&cpg CPG_MOD 724>,
 			 <&cpg CPG_MOD 723>,
 			 <&cpg CPG_MOD 722>,
-			 <&cpg CPG_MOD 721>,
-			 <&cpg CPG_MOD 727>;
-		clock-names = "du.0", "du.1", "du.2", "du.3", "lvds.0";
+			 <&cpg CPG_MOD 721>;
+		clock-names = "du.0", "du.1", "du.2", "du.3";
 		vsps = <&vspd0 0>, <&vspd1 0>, <&vspd2 0>, <&vspd0 1>;
 
 		ports {

+ 74 - 0
Documentation/devicetree/bindings/display/rockchip/cdn-dp-rockchip.txt

@@ -0,0 +1,74 @@
+Rockchip RK3399 specific extensions to the cdn Display Port
+================================
+
+Required properties:
+- compatible: must be "rockchip,rk3399-cdn-dp"
+
+- reg: physical base address of the controller and length
+
+- clocks: from common clock binding: handle to dp clock.
+
+- clock-names: from common clock binding:
+	       Required elements: "core-clk" "pclk" "spdif" "grf"
+
+- resets : a list of phandle + reset specifier pairs
+- reset-names : string of reset names
+		Required elements: "apb", "core", "dptx", "spdif"
+- power-domains : power-domain property defined with a phandle
+		  to respective power domain.
+- assigned-clocks: main clock, should be <&cru SCLK_DP_CORE>
+- assigned-clock-rates : the DP core clk frequency, shall be: 100000000
+
+- rockchip,grf: this soc should set GRF regs, so need get grf here.
+
+- ports: contain a port nodes with endpoint definitions as defined in
+	 Documentation/devicetree/bindings/media/video-interfaces.txt.
+	 contained 2 endpoints, connecting to the output of vop.
+
+- phys: from general PHY binding: the phandle for the PHY device.
+
+- extcon: extcon specifier for the Power Delivery
+
+- #sound-dai-cells = it must be 1 if your system is using 2 DAIs: I2S, SPDIF
+
+-------------------------------------------------------------------------------
+
+Example:
+	cdn_dp: dp@fec00000 {
+		compatible = "rockchip,rk3399-cdn-dp";
+		reg = <0x0 0xfec00000 0x0 0x100000>;
+		interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cru SCLK_DP_CORE>, <&cru PCLK_DP_CTRL>,
+			 <&cru SCLK_SPDIF_REC_DPTX>, <&cru PCLK_VIO_GRF>;
+		clock-names = "core-clk", "pclk", "spdif", "grf";
+		assigned-clocks = <&cru SCLK_DP_CORE>;
+		assigned-clock-rates = <100000000>;
+		power-domains = <&power RK3399_PD_HDCP>;
+		phys = <&tcphy0_dp>, <&tcphy1_dp>;
+		resets = <&cru SRST_DPTX_SPDIF_REC>;
+		reset-names = "spdif";
+		extcon = <&fusb0>, <&fusb1>;
+		rockchip,grf = <&grf>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#sound-dai-cells = <1>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			dp_in: port {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				dp_in_vopb: endpoint@0 {
+					reg = <0>;
+					remote-endpoint = <&vopb_out_dp>;
+				};
+
+				dp_in_vopl: endpoint@1 {
+					reg = <1>;
+					remote-endpoint = <&vopl_out_dp>;
+				};
+			};
+		};
+	};

+ 1 - 1
Documentation/devicetree/bindings/display/st,stm32-ltdc.txt

@@ -98,7 +98,7 @@ Example 2: DSI panel
 			compatible = "st,stm32-dsi";
 			reg = <0x40016c00 0x800>;
 			clocks = <&rcc 1 CLK_F469_DSI>, <&clk_hse>;
-			clock-names = "ref", "pclk";
+			clock-names = "pclk", "ref";
 			resets = <&rcc STM32F4_APB2_RESET(DSI)>;
 			reset-names = "apb";
 

+ 95 - 9
Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt

@@ -64,6 +64,56 @@ Required properties:
     first port should be the input endpoint. The second should be the
     output, usually to an HDMI connector.
 
+DWC HDMI TX Encoder
+-------------------
+
+The HDMI transmitter is a Synopsys DesignWare HDMI 1.4 TX controller IP
+with Allwinner's own PHY IP. It supports audio and video outputs and CEC.
+
+These DT bindings follow the Synopsys DWC HDMI TX bindings defined in
+Documentation/devicetree/bindings/display/bridge/dw_hdmi.txt with the
+following device-specific properties.
+
+Required properties:
+
+  - compatible: value must be one of:
+    * "allwinner,sun8i-a83t-dw-hdmi"
+  - reg: base address and size of memory-mapped region
+  - reg-io-width: See dw_hdmi.txt. Shall be 1.
+  - interrupts: HDMI interrupt number
+  - clocks: phandles to the clocks feeding the HDMI encoder
+    * iahb: the HDMI bus clock
+    * isfr: the HDMI register clock
+    * tmds: TMDS clock
+  - clock-names: the clock names mentioned above
+  - resets: phandle to the reset controller
+  - reset-names: must be "ctrl"
+  - phys: phandle to the DWC HDMI PHY
+  - phy-names: must be "phy"
+
+  - ports: A ports node with endpoint definitions as defined in
+    Documentation/devicetree/bindings/media/video-interfaces.txt. The
+    first port should be the input endpoint. The second should be the
+    output, usually to an HDMI connector.
+
+DWC HDMI PHY
+------------
+
+Required properties:
+  - compatible: value must be one of:
+    * allwinner,sun8i-a83t-hdmi-phy
+    * allwinner,sun8i-h3-hdmi-phy
+  - reg: base address and size of memory-mapped region
+  - clocks: phandles to the clocks feeding the HDMI PHY
+    * bus: the HDMI PHY interface clock
+    * mod: the HDMI PHY module clock
+  - clock-names: the clock names mentioned above
+  - resets: phandle to the reset controller driving the PHY
+  - reset-names: must be "phy"
+
+H3 HDMI PHY requires additional clock:
+  - pll-0: parent of phy clock
+
 TV Encoder
 ----------
 
@@ -94,24 +144,29 @@ Required properties:
    * allwinner,sun7i-a20-tcon
    * allwinner,sun8i-a33-tcon
    * allwinner,sun8i-a83t-tcon-lcd
+   * allwinner,sun8i-a83t-tcon-tv
    * allwinner,sun8i-v3s-tcon
+   * allwinner,sun9i-a80-tcon-lcd
+   * allwinner,sun9i-a80-tcon-tv
  - reg: base address and size of memory-mapped region
  - interrupts: interrupt associated to this IP
- - clocks: phandles to the clocks feeding the TCON. Three are needed:
+ - clocks: phandles to the clocks feeding the TCON.
    - 'ahb': the interface clocks
-   - 'tcon-ch0': The clock driving the TCON channel 0
+   - 'tcon-ch0': The clock driving the TCON channel 0, if supported
  - resets: phandles to the reset controllers driving the encoder
-   - "lcd": the reset line for the TCON channel 0
+   - "lcd": the reset line for the TCON
+   - "edp": the reset line for the eDP block (A80 only)
 
  - clock-names: the clock names mentioned above
  - reset-names: the reset names mentioned above
- - clock-output-names: Name of the pixel clock created
+ - clock-output-names: Name of the pixel clock created, if TCON supports
+   channel 0.
 
 - ports: A ports node with endpoint definitions as defined in
   Documentation/devicetree/bindings/media/video-interfaces.txt. The
   first port should be the input endpoint, the second one the output
 
-  The output may have multiple endpoints. The TCON has two channels,
+  The output may have multiple endpoints. TCON can have 1 or 2 channels,
   usually with the first channel being used for the panels interfaces
   (RGB, LVDS, etc.), and the second being used for the outputs that
   require another controller (TV Encoder, HDMI, etc.). The endpoints
@@ -119,11 +174,13 @@ Required properties:
   channel the endpoint is associated to. If that property is not
   present, the endpoint number will be used as the channel number.
 
-On SoCs other than the A33 and V3s, there is one more clock required:
+For TCONs with channel 0, there is one more clock required:
+   - 'tcon-ch0': The clock driving the TCON channel 0
+For TCONs with channel 1, there is one more clock required:
    - 'tcon-ch1': The clock driving the TCON channel 1
 
-On SoCs that support LVDS (all SoCs but the A13, H3, H5 and V3s), you
-need one more reset line:
+When TCON support LVDS (all TCONs except TV TCON on A83T and those found
+in A13, H3, H5 and V3s SoCs), you need one more reset line:
    - 'lvds': The reset line driving the LVDS logic
 
 And on the A23, A31, A31s and A33, you need one more clock line:
@@ -134,7 +191,7 @@ DRC
 ---
 
 The DRC (Dynamic Range Controller), found in the latest Allwinner SoCs
-(A31, A23, A33), allows to dynamically adjust pixel
+(A31, A23, A33, A80), allows to dynamically adjust pixel
 brightness/contrast based on histogram measurements for LCD content
 adaptive backlight control.
 
@@ -144,6 +201,7 @@ Required properties:
     * allwinner,sun6i-a31-drc
     * allwinner,sun6i-a31s-drc
     * allwinner,sun8i-a33-drc
+    * allwinner,sun9i-a80-drc
   - reg: base address and size of the memory-mapped region.
   - interrupts: interrupt associated to this IP
   - clocks: phandles to the clocks feeding the DRC
@@ -170,6 +228,7 @@ Required properties:
     * allwinner,sun6i-a31-display-backend
     * allwinner,sun7i-a20-display-backend
     * allwinner,sun8i-a33-display-backend
+    * allwinner,sun9i-a80-display-backend
   - reg: base address and size of the memory-mapped region.
   - interrupts: interrupt associated to this IP
   - clocks: phandles to the clocks feeding the frontend and backend
@@ -191,6 +250,28 @@ On the A33, some additional properties are required:
   - resets and reset-names need to have a phandle to the SAT bus
     resets, whose name will be "sat"
 
+DEU
+---
+
+The DEU (Detail Enhancement Unit), found in the Allwinner A80 SoC,
+can sharpen the display content in both luma and chroma channels.
+
+Required properties:
+  - compatible: value must be one of:
+    * allwinner,sun9i-a80-deu
+  - reg: base address and size of the memory-mapped region.
+  - interrupts: interrupt associated to this IP
+  - clocks: phandles to the clocks feeding the DEU
+    * ahb: the DEU interface clock
+    * mod: the DEU module clock
+    * ram: the DEU DRAM clock
+  - clock-names: the clock names mentioned above
+  - resets: phandles to the reset line driving the DEU
+
+- ports: A ports node with endpoint definitions as defined in
+  Documentation/devicetree/bindings/media/video-interfaces.txt. The
+  first port should be the input endpoints, the second one the outputs
+
 Display Engine Frontend
 -----------------------
 
@@ -204,6 +285,7 @@ Required properties:
     * allwinner,sun6i-a31-display-frontend
     * allwinner,sun7i-a20-display-frontend
     * allwinner,sun8i-a33-display-frontend
+    * allwinner,sun9i-a80-display-frontend
   - reg: base address and size of the memory-mapped region.
   - interrupts: interrupt associated to this IP
   - clocks: phandles to the clocks feeding the frontend and backend
@@ -226,6 +308,8 @@ supported.
 Required properties:
   - compatible: value must be one of:
     * allwinner,sun8i-a83t-de2-mixer-0
+    * allwinner,sun8i-a83t-de2-mixer-1
+    * allwinner,sun8i-h3-de2-mixer-0
     * allwinner,sun8i-v3s-de2-mixer
   - reg: base address and size of the memory-mapped region.
   - clocks: phandles to the clocks feeding the mixer
@@ -256,7 +340,9 @@ Required properties:
     * allwinner,sun7i-a20-display-engine
     * allwinner,sun8i-a33-display-engine
     * allwinner,sun8i-a83t-display-engine
+    * allwinner,sun8i-h3-display-engine
     * allwinner,sun8i-v3s-display-engine
+    * allwinner,sun9i-a80-display-engine
 
   - allwinner,pipelines: list of phandle to the display engine
     frontends (DE 1.0) or mixers (DE 2.0) available.

+ 1 - 0
Documentation/devicetree/bindings/vendor-prefixes.txt

@@ -104,6 +104,7 @@ eeti	eGalax_eMPIA Technology Inc
 elan	Elan Microelectronic Corp.
 embest	Shenzhen Embest Technology Co., Ltd.
 emmicro	EM Microelectronic
+emtrion	emtrion GmbH
 energymicro	Silicon Laboratories (formerly Energy Micro AS)
 engicam	Engicam S.r.l.
 epcos	EPCOS AG

+ 2 - 2
Documentation/devicetree/overlay-notes.txt

@@ -87,8 +87,8 @@ Overlay in-kernel API
 
 The API is quite easy to use.
 
-1. Call of_overlay_apply() to create and apply an overlay changeset. The return
-value is an error or a cookie identifying this overlay.
+1. Call of_overlay_fdt_apply() to create and apply an overlay changeset. The
+return value is an error or a cookie identifying this overlay.
 
 2. Call of_overlay_remove() to remove and cleanup the overlay changeset
 previously created via the call to of_overlay_apply(). Removal of an overlay

+ 21 - 0
Documentation/gpu/drivers.rst

@@ -0,0 +1,21 @@
+========================
+GPU Driver Documentation
+========================
+
+.. toctree::
+
+   i915
+   meson
+   pl111
+   tegra
+   tinydrm
+   tve200
+   vc4
+   bridge/dw-hdmi
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`

+ 6 - 2
Documentation/gpu/drm-kms.rst

@@ -286,6 +286,9 @@ Atomic Mode Setting Function Reference
 .. kernel-doc:: drivers/gpu/drm/drm_atomic.c
    :export:
 
+.. kernel-doc:: drivers/gpu/drm/drm_atomic.c
+   :internal:
+
 CRTC Abstraction
 ================
 
@@ -547,8 +550,9 @@ Explicit Fencing Properties
 Existing KMS Properties
 -----------------------
 
-The following table gives description of drm properties exposed by
-various modules/drivers.
+The following table gives description of drm properties exposed by various
+modules/drivers. Because this table is very unwieldy, do not add any new
+properties here. Instead document them in a section above.
 
 .. csv-table::
    :header-rows: 1

+ 1 - 8
Documentation/gpu/index.rst

@@ -10,16 +10,9 @@ Linux GPU Driver Developer's Guide
    drm-kms
    drm-kms-helpers
    drm-uapi
-   i915
-   meson
-   pl111
-   tegra
-   tinydrm
-   tve200
-   vc4
+   drivers
    vga-switcheroo
    vgaarbiter
-   bridge/dw-hdmi
    todo
 
 .. only::  subproject and html

+ 0 - 1
Documentation/gpu/kms-properties.csv

@@ -1,5 +1,4 @@
 Owner Module/Drivers,Group,Property Name,Type,Property Values,Object attached,Description/Restrictions
-,,“scaling mode”,ENUM,"{ ""None"", ""Full"", ""Center"", ""Full aspect"" }",Connector,"Supported by: amdgpu, gma500, i915, nouveau and radeon."
 ,DVI-I,“subconnector”,ENUM,"{ “Unknown”, “DVI-D”, “DVI-A” }",Connector,TBD
 ,,“select subconnector”,ENUM,"{ “Automatic”, “DVI-D”, “DVI-A” }",Connector,TBD
 ,TV,“subconnector”,ENUM,"{ ""Unknown"", ""Composite"", ""SVIDEO"", ""Component"", ""SCART"" }",Connector,TBD

+ 17 - 0
Documentation/gpu/todo.rst

@@ -212,6 +212,16 @@ probably use drm_fb_helper_fbdev_teardown().
 
 Contact: Maintainer of the driver you plan to convert
 
+idr_init_base()
+---------------
+
+DRM core&drivers uses a lot of idr (integer lookup directories) for mapping
+userspace IDs to internal objects, and in most places ID=0 means NULL and hence
+is never used. Switching to idr_init_base() for these would make the idr more
+efficient.
+
+Contact: Daniel Vetter
+
 Core refactorings
 =================
 
@@ -440,5 +450,12 @@ See drivers/gpu/drm/amd/display/TODO for tasks.
 
 Contact: Harry Wentland, Alex Deucher
 
+i915
+----
+
+- Our early/late pm callbacks could be removed in favour of using
+  device_link_add to model the dependency between i915 and snd_had. See
+  https://dri.freedesktop.org/docs/drm/driver-api/device_link.html
+
 Outside DRM
 ===========

+ 11 - 1
MAINTAINERS

@@ -766,6 +766,8 @@ F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
 F:	drivers/gpu/drm/amd/amdkfd/
 F:	drivers/gpu/drm/amd/include/cik_structs.h
 F:	drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -4459,6 +4461,13 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 S:	Supported
 F:	drivers/gpu/drm/pl111/
 
+DRM DRIVER FOR ARM VERSATILE TFT PANELS
+M:	Linus Walleij <linus.walleij@linaro.org>
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+S:	Maintained
+F:	drivers/gpu/drm/panel/panel-arm-versatile.c
+F:	Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.txt
+
 DRM DRIVER FOR AST SERVER GRAPHICS CHIPS
 M:	Dave Airlie <airlied@redhat.com>
 S:	Odd Fixes
@@ -4613,8 +4622,8 @@ F:	include/uapi/drm/
 F:	include/linux/vga*
 
 DRM DRIVERS AND MISC GPU PATCHES
-M:	Daniel Vetter <daniel.vetter@intel.com>
 M:	Gustavo Padovan <gustavo@padovan.org>
+M:	Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
 M:	Sean Paul <seanpaul@chromium.org>
 W:	https://01.org/linuxgraphics/gfx-docs/maintainer-tools/drm-misc.html
 S:	Maintained
@@ -4740,6 +4749,7 @@ F:	drivers/gpu/drm/rcar-du/
 F:	drivers/gpu/drm/shmobile/
 F:	include/linux/platform_data/shmob_drm.h
 F:	Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt
+F:	Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
 F:	Documentation/devicetree/bindings/display/renesas,du.txt
 
 DRM DRIVERS FOR ROCKCHIP

+ 1 - 1
arch/x86/kernel/devicetree.c

@@ -259,7 +259,7 @@ static void __init dtb_apic_setup(void)
 	dtb_ioapic_setup();
 }
 
-#ifdef CONFIG_OF_FLATTREE
+#ifdef CONFIG_OF_EARLY_FLATTREE
 static void __init x86_flattree_get_config(void)
 {
 	u32 size, map_len;

+ 1 - 0
drivers/dma-buf/dma-fence.c

@@ -171,6 +171,7 @@ void dma_fence_release(struct kref *kref)
 
 	trace_dma_fence_destroy(fence);
 
+	/* Failed to signal before release, could be a refcounting issue */
 	WARN_ON(!list_empty(&fence->cb_list));
 
 	if (fence->ops->release)

+ 22 - 9
drivers/dma-buf/reservation.c

@@ -374,8 +374,9 @@ EXPORT_SYMBOL(reservation_object_copy_fences);
  * @pshared: the array of shared fence ptrs returned (array is krealloc'd to
  * the required size, and must be freed by caller)
  *
- * RETURNS
- * Zero or -errno
+ * Retrieve all fences from the reservation object. If the pointer for the
+ * exclusive fence is not specified the fence is put into the array of the
+ * shared fences as well. Returns either zero or -ENOMEM.
  */
 int reservation_object_get_fences_rcu(struct reservation_object *obj,
 				      struct dma_fence **pfence_excl,
@@ -389,8 +390,8 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
 
 	do {
 		struct reservation_object_list *fobj;
-		unsigned seq;
-		unsigned int i;
+		unsigned int i, seq;
+		size_t sz = 0;
 
 		shared_count = i = 0;
 
@@ -402,9 +403,14 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
 			goto unlock;
 
 		fobj = rcu_dereference(obj->fence);
-		if (fobj) {
+		if (fobj)
+			sz += sizeof(*shared) * fobj->shared_max;
+
+		if (!pfence_excl && fence_excl)
+			sz += sizeof(*shared);
+
+		if (sz) {
 			struct dma_fence **nshared;
-			size_t sz = sizeof(*shared) * fobj->shared_max;
 
 			nshared = krealloc(shared, sz,
 					   GFP_NOWAIT | __GFP_NOWARN);
@@ -420,13 +426,19 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
 				break;
 			}
 			shared = nshared;
-			shared_count = fobj->shared_count;
-
+			shared_count = fobj ? fobj->shared_count : 0;
 			for (i = 0; i < shared_count; ++i) {
 				shared[i] = rcu_dereference(fobj->shared[i]);
 				if (!dma_fence_get_rcu(shared[i]))
 					break;
 			}
+
+			if (!pfence_excl && fence_excl) {
+				shared[i] = fence_excl;
+				fence_excl = NULL;
+				++i;
+				++shared_count;
+			}
 		}
 
 		if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) {
@@ -448,7 +460,8 @@ unlock:
 
 	*pshared_count = shared_count;
 	*pshared = shared;
-	*pfence_excl = fence_excl;
+	if (pfence_excl)
+		*pfence_excl = fence_excl;
 
 	return ret;
 }

+ 5 - 5
drivers/dma-buf/sw_sync.c

@@ -43,14 +43,14 @@
  * timelines.
  *
  * Fences can be created with SW_SYNC_IOC_CREATE_FENCE ioctl with struct
- * sw_sync_ioctl_create_fence as parameter.
+ * sw_sync_create_fence_data as parameter.
  *
  * To increment the timeline counter, SW_SYNC_IOC_INC ioctl should be used
  * with the increment as u32. This will update the last signaled value
  * from the timeline and signal any fence that has a seqno smaller or equal
  * to it.
  *
- * struct sw_sync_ioctl_create_fence
+ * struct sw_sync_create_fence_data
  * @value:	the seqno to initialise the fence with
  * @name:	the name of the new sync point
  * @fence:	return the fd of the new sync_file with the created fence
@@ -235,10 +235,10 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
 
 /**
  * sync_pt_create() - creates a sync pt
- * @parent:	fence's parent sync_timeline
- * @inc:	value of the fence
+ * @obj:	parent sync_timeline
+ * @value:	value of the fence
  *
- * Creates a new sync_pt as a child of @parent.  @size bytes will be
+ * Creates a new sync_pt (fence) as a child of @parent.  @size bytes will be
  * allocated allowing for implementation specific data to be kept after
  * the generic sync_timeline struct. Returns the sync_pt object or
  * NULL in case of error.

+ 0 - 1
drivers/gpu/drm/amd/acp/include/acp_gfx_if.h

@@ -25,7 +25,6 @@
 #define _ACP_GFX_IF_H
 
 #include <linux/types.h>
-#include "cgs_linux.h"
 #include "cgs_common.h"
 
 int amd_acp_hw_init(struct cgs_device *cgs_device,

+ 4 - 4
drivers/gpu/drm/amd/amdgpu/Makefile

@@ -30,7 +30,6 @@ FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME)
 ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
 	-I$(FULL_AMD_PATH)/include \
 	-I$(FULL_AMD_PATH)/amdgpu \
-	-I$(FULL_AMD_PATH)/scheduler \
 	-I$(FULL_AMD_PATH)/powerplay/inc \
 	-I$(FULL_AMD_PATH)/acp/include \
 	-I$(FULL_AMD_DISPLAY_PATH) \
@@ -63,7 +62,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
 amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
 
 amdgpu-y += \
-	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o
+	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o
 
 # add GMC block
 amdgpu-y += \
@@ -88,8 +87,7 @@ amdgpu-y += \
 
 # add SMC block
 amdgpu-y += \
-	amdgpu_dpm.o \
-	amdgpu_powerplay.o
+	amdgpu_dpm.o
 
 # add DCE block
 amdgpu-y += \
@@ -130,6 +128,8 @@ amdgpu-y += \
 # add amdkfd interfaces
 amdgpu-y += \
 	 amdgpu_amdkfd.o \
+	 amdgpu_amdkfd_fence.o \
+	 amdgpu_amdkfd_gpuvm.o \
 	 amdgpu_amdkfd_gfx_v8.o
 
 # add cgs

+ 36 - 114
drivers/gpu/drm/amd/amdgpu/amdgpu.h

@@ -68,6 +68,7 @@
 #include "amdgpu_vce.h"
 #include "amdgpu_vcn.h"
 #include "amdgpu_mn.h"
+#include "amdgpu_gmc.h"
 #include "amdgpu_dm.h"
 #include "amdgpu_virt.h"
 #include "amdgpu_gart.h"
@@ -127,6 +128,7 @@ extern int amdgpu_job_hang_limit;
 extern int amdgpu_lbpw;
 extern int amdgpu_compute_multipipe;
 extern int amdgpu_gpu_recovery;
+extern int amdgpu_emu_mode;
 
 #ifdef CONFIG_DRM_AMDGPU_SI
 extern int amdgpu_si_support;
@@ -179,10 +181,6 @@ extern int amdgpu_cik_support;
 #define CIK_CURSOR_WIDTH 128
 #define CIK_CURSOR_HEIGHT 128
 
-/* GPU RESET flags */
-#define AMDGPU_RESET_INFO_VRAM_LOST  (1 << 0)
-#define AMDGPU_RESET_INFO_FULLRESET  (1 << 1)
-
 struct amdgpu_device;
 struct amdgpu_ib;
 struct amdgpu_cs_parser;
@@ -318,13 +316,6 @@ struct amdgpu_vm_pte_funcs {
 	void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
 			  uint64_t value, unsigned count,
 			  uint32_t incr);
-
-	/* maximum nums of PTEs/PDEs in a single operation */
-	uint32_t	set_max_nums_pte_pde;
-
-	/* number of dw to reserve per operation */
-	unsigned	set_pte_pde_num_dw;
-
 	/* for linear pte/pde updates without addr mapping */
 	void (*set_pte_pde)(struct amdgpu_ib *ib,
 			    uint64_t pe,
@@ -332,28 +323,6 @@ struct amdgpu_vm_pte_funcs {
 			    uint32_t incr, uint64_t flags);
 };
 
-/* provided by the gmc block */
-struct amdgpu_gart_funcs {
-	/* flush the vm tlb via mmio */
-	void (*flush_gpu_tlb)(struct amdgpu_device *adev,
-			      uint32_t vmid);
-	/* write pte/pde updates using the cpu */
-	int (*set_pte_pde)(struct amdgpu_device *adev,
-			   void *cpu_pt_addr, /* cpu addr of page table */
-			   uint32_t gpu_page_idx, /* pte/pde to update */
-			   uint64_t addr, /* addr to write into pte/pde */
-			   uint64_t flags); /* access flags */
-	/* enable/disable PRT support */
-	void (*set_prt)(struct amdgpu_device *adev, bool enable);
-	/* set pte flags based per asic */
-	uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
-				     uint32_t flags);
-	/* get the pde for a given mc addr */
-	void (*get_vm_pde)(struct amdgpu_device *adev, int level,
-			   u64 *dst, u64 *flags);
-	uint32_t (*get_invalidate_req)(unsigned int vmid);
-};
-
 /* provided by the ih block */
 struct amdgpu_ih_funcs {
 	/* ring read/write ptr handling, called from interrupt context */
@@ -370,14 +339,6 @@ struct amdgpu_ih_funcs {
 bool amdgpu_get_bios(struct amdgpu_device *adev);
 bool amdgpu_read_bios(struct amdgpu_device *adev);
 
-/*
- * Dummy page
- */
-struct amdgpu_dummy_page {
-	struct page	*page;
-	dma_addr_t	addr;
-};
-
 /*
  * Clocks
  */
@@ -418,8 +379,8 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 					struct drm_gem_object *gobj,
 					int flags);
-int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
-void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+					    struct dma_buf *dma_buf);
 struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
 void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
 void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
@@ -480,7 +441,7 @@ struct amdgpu_sa_bo {
 void amdgpu_gem_force_release(struct amdgpu_device *adev);
 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 			     int alignment, u32 initial_domain,
-			     u64 flags, bool kernel,
+			     u64 flags, enum ttm_bo_type type,
 			     struct reservation_object *resv,
 			     struct drm_gem_object **obj);
 
@@ -493,56 +454,6 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp,
 int amdgpu_fence_slab_init(void);
 void amdgpu_fence_slab_fini(void);
 
-/*
- * VMHUB structures, functions & helpers
- */
-struct amdgpu_vmhub {
-	uint32_t	ctx0_ptb_addr_lo32;
-	uint32_t	ctx0_ptb_addr_hi32;
-	uint32_t	vm_inv_eng0_req;
-	uint32_t	vm_inv_eng0_ack;
-	uint32_t	vm_context0_cntl;
-	uint32_t	vm_l2_pro_fault_status;
-	uint32_t	vm_l2_pro_fault_cntl;
-};
-
-/*
- * GPU MC structures, functions & helpers
- */
-struct amdgpu_mc {
-	resource_size_t		aper_size;
-	resource_size_t		aper_base;
-	resource_size_t		agp_base;
-	/* for some chips with <= 32MB we need to lie
-	 * about vram size near mc fb location */
-	u64			mc_vram_size;
-	u64			visible_vram_size;
-	u64			gart_size;
-	u64			gart_start;
-	u64			gart_end;
-	u64			vram_start;
-	u64			vram_end;
-	unsigned		vram_width;
-	u64			real_vram_size;
-	int			vram_mtrr;
-	u64                     mc_mask;
-	const struct firmware   *fw;	/* MC firmware */
-	uint32_t                fw_version;
-	struct amdgpu_irq_src	vm_fault;
-	uint32_t		vram_type;
-	uint32_t                srbm_soft_reset;
-	bool			prt_warning;
-	uint64_t		stolen_size;
-	/* apertures */
-	u64					shared_aperture_start;
-	u64					shared_aperture_end;
-	u64					private_aperture_start;
-	u64					private_aperture_end;
-	/* protects concurrent invalidation */
-	spinlock_t		invalidate_lock;
-	bool			translate_further;
-};
-
 /*
  * GPU doorbell structures, functions & helpers
  */
@@ -1125,8 +1036,9 @@ struct amdgpu_job {
 	void			*owner;
 	uint64_t		fence_ctx; /* the fence_context this job uses */
 	bool                    vm_needs_flush;
-	unsigned		vmid;
 	uint64_t		vm_pd_addr;
+	unsigned		vmid;
+	unsigned		pasid;
 	uint32_t		gds_base, gds_size;
 	uint32_t		gws_base, gws_size;
 	uint32_t		oa_base, oa_size;
@@ -1169,8 +1081,6 @@ struct amdgpu_wb {
 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb);
 
-void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
-
 /*
  * SDMA
  */
@@ -1288,6 +1198,11 @@ struct amdgpu_asic_funcs {
 	void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes);
 	/* get config memsize register */
 	u32 (*get_config_memsize)(struct amdgpu_device *adev);
+	/* flush hdp write queue */
+	void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+	/* invalidate hdp read cache */
+	void (*invalidate_hdp)(struct amdgpu_device *adev,
+			       struct amdgpu_ring *ring);
 };
 
 /*
@@ -1431,7 +1346,7 @@ struct amdgpu_nbio_funcs {
 	u32 (*get_pcie_data_offset)(struct amdgpu_device *adev);
 	u32 (*get_rev_id)(struct amdgpu_device *adev);
 	void (*mc_access_enable)(struct amdgpu_device *adev, bool enable);
-	void (*hdp_flush)(struct amdgpu_device *adev);
+	void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
 	u32 (*get_memsize)(struct amdgpu_device *adev);
 	void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
 				    bool use_doorbell, int doorbell_index);
@@ -1478,9 +1393,7 @@ enum amd_hw_ip_block_type {
 #define HWIP_MAX_INSTANCE	6
 
 struct amd_powerplay {
-	struct cgs_device *cgs_device;
 	void *pp_handle;
-	const struct amd_ip_funcs *ip_funcs;
 	const struct amd_pm_funcs *pp_funcs;
 };
 
@@ -1504,6 +1417,7 @@ struct amdgpu_device {
 	const struct amdgpu_asic_funcs	*asic_funcs;
 	bool				shutdown;
 	bool				need_dma32;
+	bool				need_swiotlb;
 	bool				accel_working;
 	struct work_struct		reset_work;
 	struct notifier_block		acpi_nb;
@@ -1573,9 +1487,9 @@ struct amdgpu_device {
 	struct amdgpu_clock            clock;
 
 	/* MC */
-	struct amdgpu_mc		mc;
+	struct amdgpu_gmc		gmc;
 	struct amdgpu_gart		gart;
-	struct amdgpu_dummy_page	dummy_page;
+	dma_addr_t			dummy_page_addr;
 	struct amdgpu_vm_manager	vm_manager;
 	struct amdgpu_vmhub             vmhub[AMDGPU_MAX_VMHUBS];
 
@@ -1714,6 +1628,9 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
 			uint32_t acc_flags);
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 		    uint32_t acc_flags);
+void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
+uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
+
 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg);
 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);
 
@@ -1725,6 +1642,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
 
+int emu_soc_asic_init(struct amdgpu_device *adev);
+
 /*
  * Registers read & write functions.
  */
@@ -1735,6 +1654,9 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
 #define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
 #define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
 
+#define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
+#define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
+
 #define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0)
 #define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX)
 #define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0))
@@ -1837,13 +1759,17 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
 #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
 #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
-#define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
-#define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
-#define amdgpu_gart_get_vm_pde(adev, level, dst, flags) (adev)->gart.gart_funcs->get_vm_pde((adev), (level), (dst), (flags))
+#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
+#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
+#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
+#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
+#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
+#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
+#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
+#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags))
 #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
 #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
-#define amdgpu_vm_get_pte_flags(adev, flags) (adev)->gart.gart_funcs->get_vm_pte_flags((adev),(flags))
 #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
 #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
 #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
@@ -1856,11 +1782,11 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
 #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
 #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
-#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
 #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
 #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
 #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
+#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
 #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
@@ -1870,7 +1796,6 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
 #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
 #define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
-#define amdgpu_display_vblank_wait(adev, crtc) (adev)->mode_info.funcs->vblank_wait((adev), (crtc))
 #define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
 #define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
 #define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h))
@@ -1893,20 +1818,17 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 			      struct amdgpu_job* job, bool force);
 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
 bool amdgpu_device_need_post(struct amdgpu_device *adev);
-void amdgpu_update_display_priority(struct amdgpu_device *adev);
+void amdgpu_display_update_priority(struct amdgpu_device *adev);
 
 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
 				  u64 num_vis_bytes);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
 void amdgpu_device_vram_location(struct amdgpu_device *adev,
-				 struct amdgpu_mc *mc, u64 base);
+				 struct amdgpu_gmc *mc, u64 base);
 void amdgpu_device_gart_location(struct amdgpu_device *adev,
-				 struct amdgpu_mc *mc);
+				 struct amdgpu_gmc *mc);
 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev);
-void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size);
-int amdgpu_ttm_init(struct amdgpu_device *adev);
-void amdgpu_ttm_fini(struct amdgpu_device *adev);
 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 					     const u32 *registers,
 					     const u32 array_size);

+ 119 - 41
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

@@ -30,6 +30,8 @@
 const struct kgd2kfd_calls *kgd2kfd;
 bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
 
+static const unsigned int compute_vmid_bitmap = 0xFF00;
+
 int amdgpu_amdkfd_init(void)
 {
 	int ret;
@@ -56,6 +58,7 @@ int amdgpu_amdkfd_init(void)
 #else
 	ret = -ENOENT;
 #endif
+	amdgpu_amdkfd_gpuvm_init_mem_limits();
 
 	return ret;
 }
@@ -78,10 +81,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 	switch (adev->asic_type) {
 #ifdef CONFIG_DRM_AMDGPU_CIK
 	case CHIP_KAVERI:
+	case CHIP_HAWAII:
 		kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
 		break;
 #endif
 	case CHIP_CARRIZO:
+	case CHIP_TONGA:
+	case CHIP_FIJI:
+	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
 		kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
 		break;
 	default:
@@ -132,9 +140,13 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 	int last_valid_bit;
 	if (adev->kfd) {
 		struct kgd2kfd_shared_resources gpu_resources = {
-			.compute_vmid_bitmap = 0xFF00,
+			.compute_vmid_bitmap = compute_vmid_bitmap,
 			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
-			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
+			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
+			.gpuvm_size = min(adev->vm_manager.max_pfn
+					  << AMDGPU_GPU_PAGE_SHIFT,
+					  AMDGPU_VA_HOLE_START),
+			.drm_render_minor = adev->ddev->render->index
 		};
 
 		/* this is going to have a few of the MSBs set that we need to
@@ -204,20 +216,14 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 			void **cpu_ptr)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-	struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
+	struct amdgpu_bo *bo = NULL;
 	int r;
+	uint64_t gpu_addr_tmp = 0;
+	void *cpu_ptr_tmp = NULL;
 
-	BUG_ON(kgd == NULL);
-	BUG_ON(gpu_addr == NULL);
-	BUG_ON(cpu_ptr == NULL);
-
-	*mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
-	if ((*mem) == NULL)
-		return -ENOMEM;
-
-	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
-			     AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, 0,
-			     &(*mem)->bo);
+	r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+			     AMDGPU_GEM_CREATE_CPU_GTT_USWC, ttm_bo_type_kernel,
+			     NULL, &bo);
 	if (r) {
 		dev_err(adev->dev,
 			"failed to allocate BO for amdkfd (%d)\n", r);
@@ -225,54 +231,53 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 	}
 
 	/* map the buffer */
-	r = amdgpu_bo_reserve((*mem)->bo, true);
+	r = amdgpu_bo_reserve(bo, true);
 	if (r) {
 		dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
 		goto allocate_mem_reserve_bo_failed;
 	}
 
-	r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
-				&(*mem)->gpu_addr);
+	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT,
+				&gpu_addr_tmp);
 	if (r) {
 		dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
 		goto allocate_mem_pin_bo_failed;
 	}
-	*gpu_addr = (*mem)->gpu_addr;
 
-	r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
+	r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
 	if (r) {
 		dev_err(adev->dev,
 			"(%d) failed to map bo to kernel for amdkfd\n", r);
 		goto allocate_mem_kmap_bo_failed;
 	}
-	*cpu_ptr = (*mem)->cpu_ptr;
 
-	amdgpu_bo_unreserve((*mem)->bo);
+	*mem_obj = bo;
+	*gpu_addr = gpu_addr_tmp;
+	*cpu_ptr = cpu_ptr_tmp;
+
+	amdgpu_bo_unreserve(bo);
 
 	return 0;
 
 allocate_mem_kmap_bo_failed:
-	amdgpu_bo_unpin((*mem)->bo);
+	amdgpu_bo_unpin(bo);
 allocate_mem_pin_bo_failed:
-	amdgpu_bo_unreserve((*mem)->bo);
+	amdgpu_bo_unreserve(bo);
 allocate_mem_reserve_bo_failed:
-	amdgpu_bo_unref(&(*mem)->bo);
+	amdgpu_bo_unref(&bo);
 
 	return r;
 }
 
 void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
 {
-	struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
-
-	BUG_ON(mem == NULL);
+	struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
 
-	amdgpu_bo_reserve(mem->bo, true);
-	amdgpu_bo_kunmap(mem->bo);
-	amdgpu_bo_unpin(mem->bo);
-	amdgpu_bo_unreserve(mem->bo);
-	amdgpu_bo_unref(&(mem->bo));
-	kfree(mem);
+	amdgpu_bo_reserve(bo, true);
+	amdgpu_bo_kunmap(bo);
+	amdgpu_bo_unpin(bo);
+	amdgpu_bo_unreserve(bo);
+	amdgpu_bo_unref(&(bo));
 }
 
 void get_local_mem_info(struct kgd_dev *kgd,
@@ -281,24 +286,29 @@ void get_local_mem_info(struct kgd_dev *kgd,
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 	uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask :
 					     ~((1ULL << 32) - 1);
-	resource_size_t aper_limit = adev->mc.aper_base + adev->mc.aper_size;
+	resource_size_t aper_limit = adev->gmc.aper_base + adev->gmc.aper_size;
 
 	memset(mem_info, 0, sizeof(*mem_info));
-	if (!(adev->mc.aper_base & address_mask || aper_limit & address_mask)) {
-		mem_info->local_mem_size_public = adev->mc.visible_vram_size;
-		mem_info->local_mem_size_private = adev->mc.real_vram_size -
-				adev->mc.visible_vram_size;
+	if (!(adev->gmc.aper_base & address_mask || aper_limit & address_mask)) {
+		mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
+		mem_info->local_mem_size_private = adev->gmc.real_vram_size -
+				adev->gmc.visible_vram_size;
 	} else {
 		mem_info->local_mem_size_public = 0;
-		mem_info->local_mem_size_private = adev->mc.real_vram_size;
+		mem_info->local_mem_size_private = adev->gmc.real_vram_size;
 	}
-	mem_info->vram_width = adev->mc.vram_width;
+	mem_info->vram_width = adev->gmc.vram_width;
 
 	pr_debug("Address base: %pap limit %pap public 0x%llx private 0x%llx\n",
-			&adev->mc.aper_base, &aper_limit,
+			&adev->gmc.aper_base, &aper_limit,
 			mem_info->local_mem_size_public,
 			mem_info->local_mem_size_private);
 
+	if (amdgpu_emu_mode == 1) {
+		mem_info->mem_clk_max = 100;
+		return;
+	}
+
 	if (amdgpu_sriov_vf(adev))
 		mem_info->mem_clk_max = adev->clock.default_mclk / 100;
 	else
@@ -319,6 +329,9 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 
 	/* the sclk is in quantas of 10kHz */
+	if (amdgpu_emu_mode == 1)
+		return 100;
+
 	if (amdgpu_sriov_vf(adev))
 		return adev->clock.default_sclk / 100;
 
@@ -354,3 +367,68 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
 
 	return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 }
+
+int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
+				uint32_t vmid, uint64_t gpu_addr,
+				uint32_t *ib_cmd, uint32_t ib_len)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	struct amdgpu_ring *ring;
+	struct dma_fence *f = NULL;
+	int ret;
+
+	switch (engine) {
+	case KGD_ENGINE_MEC1:
+		ring = &adev->gfx.compute_ring[0];
+		break;
+	case KGD_ENGINE_SDMA1:
+		ring = &adev->sdma.instance[0].ring;
+		break;
+	case KGD_ENGINE_SDMA2:
+		ring = &adev->sdma.instance[1].ring;
+		break;
+	default:
+		pr_err("Invalid engine in IB submission: %d\n", engine);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = amdgpu_job_alloc(adev, 1, &job, NULL);
+	if (ret)
+		goto err;
+
+	ib = &job->ibs[0];
+	memset(ib, 0, sizeof(struct amdgpu_ib));
+
+	ib->gpu_addr = gpu_addr;
+	ib->ptr = ib_cmd;
+	ib->length_dw = ib_len;
+	/* This works for NO_HWS. TODO: need to handle without knowing VMID */
+	job->vmid = vmid;
+
+	ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
+	if (ret) {
+		DRM_ERROR("amdgpu: failed to schedule IB.\n");
+		goto err_ib_sched;
+	}
+
+	ret = dma_fence_wait(f, false);
+
+err_ib_sched:
+	dma_fence_put(f);
+	amdgpu_job_free(job);
+err:
+	return ret;
+}
+
+bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
+{
+	if (adev->kfd) {
+		if ((1 << vmid) & compute_vmid_bitmap)
+			return true;
+	}
+
+	return false;
+}

+ 96 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

@@ -26,15 +26,71 @@
 #define AMDGPU_AMDKFD_H_INCLUDED
 
 #include <linux/types.h>
+#include <linux/mm.h>
 #include <linux/mmu_context.h>
 #include <kgd_kfd_interface.h>
+#include <drm/ttm/ttm_execbuf_util.h>
+#include "amdgpu_sync.h"
+#include "amdgpu_vm.h"
+
+extern const struct kgd2kfd_calls *kgd2kfd;
 
 struct amdgpu_device;
 
+struct kfd_bo_va_list {
+	struct list_head bo_list;
+	struct amdgpu_bo_va *bo_va;
+	void *kgd_dev;
+	bool is_mapped;
+	uint64_t va;
+	uint64_t pte_flags;
+};
+
 struct kgd_mem {
+	struct mutex lock;
 	struct amdgpu_bo *bo;
-	uint64_t gpu_addr;
-	void *cpu_ptr;
+	struct list_head bo_va_list;
+	/* protected by amdkfd_process_info.lock */
+	struct ttm_validate_buffer validate_list;
+	struct ttm_validate_buffer resv_list;
+	uint32_t domain;
+	unsigned int mapped_to_gpu_memory;
+	uint64_t va;
+
+	uint32_t mapping_flags;
+
+	struct amdkfd_process_info *process_info;
+
+	struct amdgpu_sync sync;
+
+	bool aql_queue;
+};
+
+/* KFD Memory Eviction */
+struct amdgpu_amdkfd_fence {
+	struct dma_fence base;
+	struct mm_struct *mm;
+	spinlock_t lock;
+	char timeline_name[TASK_COMM_LEN];
+};
+
+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+						       struct mm_struct *mm);
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
+
+struct amdkfd_process_info {
+	/* List head of all VMs that belong to a KFD process */
+	struct list_head vm_list_head;
+	/* List head for all KFD BOs that belong to a KFD process. */
+	struct list_head kfd_bo_list;
+	/* Lock to protect kfd_bo_list */
+	struct mutex lock;
+
+	/* Number of VMs */
+	unsigned int n_vms;
+	/* Eviction Fence */
+	struct amdgpu_amdkfd_fence *eviction_fence;
 };
 
 int amdgpu_amdkfd_init(void);
@@ -48,9 +104,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
 
+int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
+				uint32_t vmid, uint64_t gpu_addr,
+				uint32_t *ib_cmd, uint32_t ib_len);
+
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
 
+bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
+
 /* Shared API */
 int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 			void **mem_obj, uint64_t *gpu_addr,
@@ -79,4 +141,36 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
 		valid;							\
 	})
 
+/* GPUVM API */
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
+					  void **process_info,
+					  struct dma_fence **ef);
+int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
+					   struct file *filp,
+					   void **vm, void **process_info,
+					   struct dma_fence **ef);
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+				    struct amdgpu_vm *vm);
+void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
+uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
+int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
+		struct kgd_dev *kgd, uint64_t va, uint64_t size,
+		void *vm, struct kgd_mem **mem,
+		uint64_t *offset, uint32_t flags);
+int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
+		struct kgd_dev *kgd, struct kgd_mem *mem);
+int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
+		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
+int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
+		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
+int amdgpu_amdkfd_gpuvm_sync_memory(
+		struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
+		struct kgd_mem *mem, void **kptr, uint64_t *size);
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
+					    struct dma_fence **ef);
+
+void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
+void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
+
 #endif /* AMDGPU_AMDKFD_H_INCLUDED */

+ 179 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c

@@ -0,0 +1,179 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/stacktrace.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/sched/mm.h>
+#include "amdgpu_amdkfd.h"
+
+static const struct dma_fence_ops amdkfd_fence_ops;
+static atomic_t fence_seq = ATOMIC_INIT(0);
+
+/* Eviction Fence
+ * Fence helper functions to deal with KFD memory eviction.
+ * Big Idea - Since KFD submissions are done by user queues, a BO cannot be
+ *  evicted unless all the user queues for that process are evicted.
+ *
+ * All the BOs in a process share an eviction fence. When process X wants
+ * to map VRAM memory but TTM can't find enough space, TTM will attempt to
+ * evict BOs from its LRU list. TTM checks if the BO is valuable to evict
+ * by calling ttm_bo_driver->eviction_valuable().
+ *
+ * ttm_bo_driver->eviction_valuable() - will return false if the BO belongs
+ *  to process X. Otherwise, it will return true to indicate BO can be
+ *  evicted by TTM.
+ *
+ * If ttm_bo_driver->eviction_valuable returns true, then TTM will continue
+ * the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move
+ * --> amdgpu_copy_buffer(). This sets up job in GPU scheduler.
+ *
+ * GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to
+ *  nofity when the BO is free to move. fence_add_callback --> enable_signaling
+ *  --> amdgpu_amdkfd_fence.enable_signaling
+ *
+ * amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce
+ * user queues and signal fence. The work item will also start another delayed
+ * work item to restore BOs
+ */
+
+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+						       struct mm_struct *mm)
+{
+	struct amdgpu_amdkfd_fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (fence == NULL)
+		return NULL;
+
+	/* This reference gets released in amdkfd_fence_release */
+	mmgrab(mm);
+	fence->mm = mm;
+	get_task_comm(fence->timeline_name, current);
+	spin_lock_init(&fence->lock);
+
+	dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
+		   context, atomic_inc_return(&fence_seq));
+
+	return fence;
+}
+
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
+{
+	struct amdgpu_amdkfd_fence *fence;
+
+	if (!f)
+		return NULL;
+
+	fence = container_of(f, struct amdgpu_amdkfd_fence, base);
+	if (fence && f->ops == &amdkfd_fence_ops)
+		return fence;
+
+	return NULL;
+}
+
+static const char *amdkfd_fence_get_driver_name(struct dma_fence *f)
+{
+	return "amdgpu_amdkfd_fence";
+}
+
+static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
+{
+	struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+	return fence->timeline_name;
+}
+
+/**
+ * amdkfd_fence_enable_signaling - This gets called when TTM wants to evict
+ *  a KFD BO and schedules a job to move the BO.
+ *  If fence is already signaled return true.
+ *  If fence is not signaled schedule a evict KFD process work item.
+ */
+static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
+{
+	struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+	if (!fence)
+		return false;
+
+	if (dma_fence_is_signaled(f))
+		return true;
+
+	if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f))
+		return true;
+
+	return false;
+}
+
+/**
+ * amdkfd_fence_release - callback that fence can be freed
+ *
+ * @fence: fence
+ *
+ * This function is called when the reference count becomes zero.
+ * Drops the mm_struct reference and RCU schedules freeing up the fence.
+ */
+static void amdkfd_fence_release(struct dma_fence *f)
+{
+	struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+	/* Unconditionally signal the fence. The process is getting
+	 * terminated.
+	 */
+	if (WARN_ON(!fence))
+		return; /* Not an amdgpu_amdkfd_fence */
+
+	mmdrop(fence->mm);
+	kfree_rcu(f, rcu);
+}
+
+/**
+ * amdkfd_fence_check_mm - Check if @mm is same as that of the fence @f
+ *  if same return TRUE else return FALSE.
+ *
+ * @f: [IN] fence
+ * @mm: [IN] mm that needs to be verified
+ */
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
+{
+	struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+	if (!fence)
+		return false;
+	else if (fence->mm == mm)
+		return true;
+
+	return false;
+}
+
+static const struct dma_fence_ops amdkfd_fence_ops = {
+	.get_driver_name = amdkfd_fence_get_driver_name,
+	.get_timeline_name = amdkfd_fence_get_timeline_name,
+	.enable_signaling = amdkfd_fence_enable_signaling,
+	.signaled = NULL,
+	.wait = dma_fence_default_wait,
+	.release = amdkfd_fence_release,
+};

+ 68 - 13
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c

@@ -139,11 +139,14 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 							uint8_t vmid);
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
 
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 static void set_scratch_backing_va(struct kgd_dev *kgd,
 					uint64_t va, uint32_t vmid);
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+		uint32_t page_table_base);
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
 
 /* Because of REG_GET_FIELD() being used, we put this function in the
  * asic specific file.
@@ -196,12 +199,26 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.address_watch_get_offset = kgd_address_watch_get_offset,
 	.get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
 	.get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
-	.write_vmid_invalidate_request = write_vmid_invalidate_request,
 	.get_fw_version = get_fw_version,
 	.set_scratch_backing_va = set_scratch_backing_va,
 	.get_tile_config = get_tile_config,
 	.get_cu_info = get_cu_info,
-	.get_vram_usage = amdgpu_amdkfd_get_vram_usage
+	.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
+	.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
+	.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
+	.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+	.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
+	.set_vm_context_page_table_base = set_vm_context_page_table_base,
+	.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
+	.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
+	.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
+	.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
+	.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
+	.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
+	.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
+	.invalidate_tlbs = invalidate_tlbs,
+	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
+	.submit_ib = amdgpu_amdkfd_submit_ib,
 };
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
@@ -787,14 +804,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 
 	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
-	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 }
 
 static void set_scratch_backing_va(struct kgd_dev *kgd,
@@ -812,8 +822,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 	const union amdgpu_firmware_header *hdr;
 
-	BUG_ON(kgd == NULL);
-
 	switch (type) {
 	case KGD_ENGINE_PFP:
 		hdr = (const union amdgpu_firmware_header *)
@@ -866,3 +874,50 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 	return hdr->common.ucode_version;
 }
 
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+			uint32_t page_table_base)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("trying to set page table base for wrong VMID\n");
+		return;
+	}
+	WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
+}
+
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	int vmid;
+	unsigned int tmp;
+
+	for (vmid = 0; vmid < 16; vmid++) {
+		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
+			continue;
+
+		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+			RREG32(mmVM_INVALIDATE_RESPONSE);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("non kfd vmid\n");
+		return 0;
+	}
+
+	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+	RREG32(mmVM_INVALIDATE_RESPONSE);
+	return 0;
+}

+ 69 - 14
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c

@@ -81,7 +81,6 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 				uint32_t queue_id);
 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 				unsigned int utimeout);
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
 static int kgd_address_watch_disable(struct kgd_dev *kgd);
 static int kgd_address_watch_execute(struct kgd_dev *kgd,
 					unsigned int watch_point_id,
@@ -99,10 +98,13 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
 		uint8_t vmid);
 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 		uint8_t vmid);
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 static void set_scratch_backing_va(struct kgd_dev *kgd,
 					uint64_t va, uint32_t vmid);
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+		uint32_t page_table_base);
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
 
 /* Because of REG_GET_FIELD() being used, we put this function in the
  * asic specific file.
@@ -157,12 +159,26 @@ static const struct kfd2kgd_calls kfd2kgd = {
 			get_atc_vmid_pasid_mapping_pasid,
 	.get_atc_vmid_pasid_mapping_valid =
 			get_atc_vmid_pasid_mapping_valid,
-	.write_vmid_invalidate_request = write_vmid_invalidate_request,
 	.get_fw_version = get_fw_version,
 	.set_scratch_backing_va = set_scratch_backing_va,
 	.get_tile_config = get_tile_config,
 	.get_cu_info = get_cu_info,
-	.get_vram_usage = amdgpu_amdkfd_get_vram_usage
+	.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
+	.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
+	.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
+	.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+	.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
+	.set_vm_context_page_table_base = set_vm_context_page_table_base,
+	.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
+	.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
+	.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
+	.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
+	.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
+	.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
+	.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
+	.invalidate_tlbs = invalidate_tlbs,
+	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
+	.submit_ib = amdgpu_amdkfd_submit_ib,
 };
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
@@ -704,14 +720,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 
 	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
-	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 }
 
 static int kgd_address_watch_disable(struct kgd_dev *kgd)
@@ -775,8 +784,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 	const union amdgpu_firmware_header *hdr;
 
-	BUG_ON(kgd == NULL);
-
 	switch (type) {
 	case KGD_ENGINE_PFP:
 		hdr = (const union amdgpu_firmware_header *)
@@ -828,3 +835,51 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 	/* Only 12 bit in use*/
 	return hdr->common.ucode_version;
 }
+
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+		uint32_t page_table_base)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("trying to set page table base for wrong VMID\n");
+		return;
+	}
+	WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
+}
+
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	int vmid;
+	unsigned int tmp;
+
+	for (vmid = 0; vmid < 16; vmid++) {
+		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
+			continue;
+
+		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+			RREG32(mmVM_INVALIDATE_RESPONSE);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("non kfd vmid %d\n", vmid);
+		return -EINVAL;
+	}
+
+	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+	RREG32(mmVM_INVALIDATE_RESPONSE);
+	return 0;
+}

+ 1577 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

@@ -0,0 +1,1577 @@
+/*
+ * Copyright 2014-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define pr_fmt(fmt) "kfd2kgd: " fmt
+
+#include <linux/list.h>
+#include <drm/drmP.h>
+#include "amdgpu_object.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_amdkfd.h"
+
+/* Special VM and GART address alignment needed for VI pre-Fiji due to
+ * a HW bug.
+ */
+#define VI_BO_SIZE_ALIGN (0x8000)
+
+/* Impose limit on how much memory KFD can use */
+static struct {
+	uint64_t max_system_mem_limit;
+	int64_t system_mem_used;
+	spinlock_t mem_limit_lock;
+} kfd_mem_limit;
+
+/* Struct used for amdgpu_amdkfd_bo_validate */
+struct amdgpu_vm_parser {
+	uint32_t        domain;
+	bool            wait;
+};
+
+static const char * const domain_bit_to_string[] = {
+		"CPU",
+		"GTT",
+		"VRAM",
+		"GDS",
+		"GWS",
+		"OA"
+};
+
+#define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
+
+
+
+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
+{
+	return (struct amdgpu_device *)kgd;
+}
+
+static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
+		struct kgd_mem *mem)
+{
+	struct kfd_bo_va_list *entry;
+
+	list_for_each_entry(entry, &mem->bo_va_list, bo_list)
+		if (entry->bo_va->base.vm == avm)
+			return false;
+
+	return true;
+}
+
+/* Set memory usage limits. Current, limits are
+ *  System (kernel) memory - 3/8th System RAM
+ */
+void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
+{
+	struct sysinfo si;
+	uint64_t mem;
+
+	si_meminfo(&si);
+	mem = si.totalram - si.totalhigh;
+	mem *= si.mem_unit;
+
+	spin_lock_init(&kfd_mem_limit.mem_limit_lock);
+	kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
+	pr_debug("Kernel memory limit %lluM\n",
+		(kfd_mem_limit.max_system_mem_limit >> 20));
+}
+
+static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
+					      uint64_t size, u32 domain)
+{
+	size_t acc_size;
+	int ret = 0;
+
+	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
+				       sizeof(struct amdgpu_bo));
+
+	spin_lock(&kfd_mem_limit.mem_limit_lock);
+	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
+		if (kfd_mem_limit.system_mem_used + (acc_size + size) >
+			kfd_mem_limit.max_system_mem_limit) {
+			ret = -ENOMEM;
+			goto err_no_mem;
+		}
+		kfd_mem_limit.system_mem_used += (acc_size + size);
+	}
+err_no_mem:
+	spin_unlock(&kfd_mem_limit.mem_limit_lock);
+	return ret;
+}
+
+static void unreserve_system_mem_limit(struct amdgpu_device *adev,
+				       uint64_t size, u32 domain)
+{
+	size_t acc_size;
+
+	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
+				       sizeof(struct amdgpu_bo));
+
+	spin_lock(&kfd_mem_limit.mem_limit_lock);
+	if (domain == AMDGPU_GEM_DOMAIN_GTT)
+		kfd_mem_limit.system_mem_used -= (acc_size + size);
+	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
+		  "kfd system memory accounting unbalanced");
+
+	spin_unlock(&kfd_mem_limit.mem_limit_lock);
+}
+
+void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
+{
+	spin_lock(&kfd_mem_limit.mem_limit_lock);
+
+	if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
+		kfd_mem_limit.system_mem_used -=
+			(bo->tbo.acc_size + amdgpu_bo_size(bo));
+	}
+	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
+		  "kfd system memory accounting unbalanced");
+
+	spin_unlock(&kfd_mem_limit.mem_limit_lock);
+}
+
+
+/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's
+ *  reservation object.
+ *
+ * @bo: [IN] Remove eviction fence(s) from this BO
+ * @ef: [IN] If ef is specified, then this eviction fence is removed if it
+ *  is present in the shared list.
+ * @ef_list: [OUT] Returns list of eviction fences. These fences are removed
+ *  from BO's reservation object shared list.
+ * @ef_count: [OUT] Number of fences in ef_list.
+ *
+ * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be
+ *  called to restore the eviction fences and to avoid memory leak. This is
+ *  useful for shared BOs.
+ * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held.
+ */
+static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
+					struct amdgpu_amdkfd_fence *ef,
+					struct amdgpu_amdkfd_fence ***ef_list,
+					unsigned int *ef_count)
+{
+	struct reservation_object_list *fobj;
+	struct reservation_object *resv;
+	unsigned int i = 0, j = 0, k = 0, shared_count;
+	unsigned int count = 0;
+	struct amdgpu_amdkfd_fence **fence_list;
+
+	if (!ef && !ef_list)
+		return -EINVAL;
+
+	if (ef_list) {
+		*ef_list = NULL;
+		*ef_count = 0;
+	}
+
+	resv = bo->tbo.resv;
+	fobj = reservation_object_get_list(resv);
+
+	if (!fobj)
+		return 0;
+
+	preempt_disable();
+	write_seqcount_begin(&resv->seq);
+
+	/* Go through all the shared fences in the resevation object. If
+	 * ef is specified and it exists in the list, remove it and reduce the
+	 * count. If ef is not specified, then get the count of eviction fences
+	 * present.
+	 */
+	shared_count = fobj->shared_count;
+	for (i = 0; i < shared_count; ++i) {
+		struct dma_fence *f;
+
+		f = rcu_dereference_protected(fobj->shared[i],
+					      reservation_object_held(resv));
+
+		if (ef) {
+			if (f->context == ef->base.context) {
+				dma_fence_put(f);
+				fobj->shared_count--;
+			} else {
+				RCU_INIT_POINTER(fobj->shared[j++], f);
+			}
+		} else if (to_amdgpu_amdkfd_fence(f))
+			count++;
+	}
+	write_seqcount_end(&resv->seq);
+	preempt_enable();
+
+	if (ef || !count)
+		return 0;
+
+	/* Alloc memory for count number of eviction fence pointers. Fill the
+	 * ef_list array and ef_count
+	 */
+	fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *),
+			     GFP_KERNEL);
+	if (!fence_list)
+		return -ENOMEM;
+
+	preempt_disable();
+	write_seqcount_begin(&resv->seq);
+
+	j = 0;
+	for (i = 0; i < shared_count; ++i) {
+		struct dma_fence *f;
+		struct amdgpu_amdkfd_fence *efence;
+
+		f = rcu_dereference_protected(fobj->shared[i],
+			reservation_object_held(resv));
+
+		efence = to_amdgpu_amdkfd_fence(f);
+		if (efence) {
+			fence_list[k++] = efence;
+			fobj->shared_count--;
+		} else {
+			RCU_INIT_POINTER(fobj->shared[j++], f);
+		}
+	}
+
+	write_seqcount_end(&resv->seq);
+	preempt_enable();
+
+	*ef_list = fence_list;
+	*ef_count = k;
+
+	return 0;
+}
+
+/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's
+ *  reservation object.
+ *
+ * @bo: [IN] Add eviction fences to this BO
+ * @ef_list: [IN] List of eviction fences to be added
+ * @ef_count: [IN] Number of fences in ef_list.
+ *
+ * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this
+ *  function.
+ */
+static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo,
+				struct amdgpu_amdkfd_fence **ef_list,
+				unsigned int ef_count)
+{
+	int i;
+
+	if (!ef_list || !ef_count)
+		return;
+
+	for (i = 0; i < ef_count; i++) {
+		amdgpu_bo_fence(bo, &ef_list[i]->base, true);
+		/* Re-adding the fence takes an additional reference. Drop that
+		 * reference.
+		 */
+		dma_fence_put(&ef_list[i]->base);
+	}
+
+	kfree(ef_list);
+}
+
+static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
+				     bool wait)
+{
+	struct ttm_operation_ctx ctx = { false, false };
+	int ret;
+
+	if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm),
+		 "Called with userptr BO"))
+		return -EINVAL;
+
+	amdgpu_ttm_placement_from_domain(bo, domain);
+
+	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (ret)
+		goto validate_fail;
+	if (wait) {
+		struct amdgpu_amdkfd_fence **ef_list;
+		unsigned int ef_count;
+
+		ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list,
+							  &ef_count);
+		if (ret)
+			goto validate_fail;
+
+		ttm_bo_wait(&bo->tbo, false, false);
+		amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count);
+	}
+
+validate_fail:
+	return ret;
+}
+
+static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo)
+{
+	struct amdgpu_vm_parser *p = param;
+
+	return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait);
+}
+
+/* vm_validate_pt_pd_bos - Validate page table and directory BOs
+ *
+ * Page directories are not updated here because huge page handling
+ * during page table updates can invalidate page directory entries
+ * again. Page directories are only updated after updating page
+ * tables.
+ */
+static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
+{
+	struct amdgpu_bo *pd = vm->root.base.bo;
+	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
+	struct amdgpu_vm_parser param;
+	uint64_t addr, flags = AMDGPU_PTE_VALID;
+	int ret;
+
+	param.domain = AMDGPU_GEM_DOMAIN_VRAM;
+	param.wait = false;
+
+	ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate,
+					&param);
+	if (ret) {
+		pr_err("amdgpu: failed to validate PT BOs\n");
+		return ret;
+	}
+
+	ret = amdgpu_amdkfd_validate(&param, pd);
+	if (ret) {
+		pr_err("amdgpu: failed to validate PD\n");
+		return ret;
+	}
+
+	addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
+	amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
+	vm->pd_phys_addr = addr;
+
+	if (vm->use_cpu_for_update) {
+		ret = amdgpu_bo_kmap(pd, NULL);
+		if (ret) {
+			pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
+			 struct dma_fence *f)
+{
+	int ret = amdgpu_sync_fence(adev, sync, f, false);
+
+	/* Sync objects can't handle multiple GPUs (contexts) updating
+	 * sync->last_vm_update. Fortunately we don't need it for
+	 * KFD's purposes, so we can just drop that fence.
+	 */
+	if (sync->last_vm_update) {
+		dma_fence_put(sync->last_vm_update);
+		sync->last_vm_update = NULL;
+	}
+
+	return ret;
+}
+
+static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
+{
+	struct amdgpu_bo *pd = vm->root.base.bo;
+	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
+	int ret;
+
+	ret = amdgpu_vm_update_directories(adev, vm);
+	if (ret)
+		return ret;
+
+	return sync_vm_fence(adev, sync, vm->last_update);
+}
+
+/* add_bo_to_vm - Add a BO to a VM
+ *
+ * Everything that needs to bo done only once when a BO is first added
+ * to a VM. It can later be mapped and unmapped many times without
+ * repeating these steps.
+ *
+ * 1. Allocate and initialize BO VA entry data structure
+ * 2. Add BO to the VM
+ * 3. Determine ASIC-specific PTE flags
+ * 4. Alloc page tables and directories if needed
+ * 4a.  Validate new page tables and directories
+ */
+static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
+		struct amdgpu_vm *vm, bool is_aql,
+		struct kfd_bo_va_list **p_bo_va_entry)
+{
+	int ret;
+	struct kfd_bo_va_list *bo_va_entry;
+	struct amdgpu_bo *pd = vm->root.base.bo;
+	struct amdgpu_bo *bo = mem->bo;
+	uint64_t va = mem->va;
+	struct list_head *list_bo_va = &mem->bo_va_list;
+	unsigned long bo_size = bo->tbo.mem.size;
+
+	if (!va) {
+		pr_err("Invalid VA when adding BO to VM\n");
+		return -EINVAL;
+	}
+
+	if (is_aql)
+		va += bo_size;
+
+	bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL);
+	if (!bo_va_entry)
+		return -ENOMEM;
+
+	pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
+			va + bo_size, vm);
+
+	/* Add BO to VM internal data structures*/
+	bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
+	if (!bo_va_entry->bo_va) {
+		ret = -EINVAL;
+		pr_err("Failed to add BO object to VM. ret == %d\n",
+				ret);
+		goto err_vmadd;
+	}
+
+	bo_va_entry->va = va;
+	bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev,
+							 mem->mapping_flags);
+	bo_va_entry->kgd_dev = (void *)adev;
+	list_add(&bo_va_entry->bo_list, list_bo_va);
+
+	if (p_bo_va_entry)
+		*p_bo_va_entry = bo_va_entry;
+
+	/* Allocate new page tables if needed and validate
+	 * them. Clearing of new page tables and validate need to wait
+	 * on move fences. We don't want that to trigger the eviction
+	 * fence, so remove it temporarily.
+	 */
+	amdgpu_amdkfd_remove_eviction_fence(pd,
+					vm->process_info->eviction_fence,
+					NULL, NULL);
+
+	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
+	if (ret) {
+		pr_err("Failed to allocate pts, err=%d\n", ret);
+		goto err_alloc_pts;
+	}
+
+	ret = vm_validate_pt_pd_bos(vm);
+	if (ret) {
+		pr_err("validate_pt_pd_bos() failed\n");
+		goto err_alloc_pts;
+	}
+
+	/* Add the eviction fence back */
+	amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
+
+	return 0;
+
+err_alloc_pts:
+	amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
+	amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
+	list_del(&bo_va_entry->bo_list);
+err_vmadd:
+	kfree(bo_va_entry);
+	return ret;
+}
+
+static void remove_bo_from_vm(struct amdgpu_device *adev,
+		struct kfd_bo_va_list *entry, unsigned long size)
+{
+	pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n",
+			entry->va,
+			entry->va + size, entry);
+	amdgpu_vm_bo_rmv(adev, entry->bo_va);
+	list_del(&entry->bo_list);
+	kfree(entry);
+}
+
+static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
+				struct amdkfd_process_info *process_info)
+{
+	struct ttm_validate_buffer *entry = &mem->validate_list;
+	struct amdgpu_bo *bo = mem->bo;
+
+	INIT_LIST_HEAD(&entry->head);
+	entry->shared = true;
+	entry->bo = &bo->tbo;
+	mutex_lock(&process_info->lock);
+	list_add_tail(&entry->head, &process_info->kfd_bo_list);
+	mutex_unlock(&process_info->lock);
+}
+
+/* Reserving a BO and its page table BOs must happen atomically to
+ * avoid deadlocks. Some operations update multiple VMs at once. Track
+ * all the reservation info in a context structure. Optionally a sync
+ * object can track VM updates.
+ */
+struct bo_vm_reservation_context {
+	struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */
+	unsigned int n_vms;		    /* Number of VMs reserved	    */
+	struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries  */
+	struct ww_acquire_ctx ticket;	    /* Reservation ticket	    */
+	struct list_head list, duplicates;  /* BO lists			    */
+	struct amdgpu_sync *sync;	    /* Pointer to sync object	    */
+	bool reserved;			    /* Whether BOs are reserved	    */
+};
+
+enum bo_vm_match {
+	BO_VM_NOT_MAPPED = 0,	/* Match VMs where a BO is not mapped */
+	BO_VM_MAPPED,		/* Match VMs where a BO is mapped     */
+	BO_VM_ALL,		/* Match all VMs a BO was added to    */
+};
+
+/**
+ * reserve_bo_and_vm - reserve a BO and a VM unconditionally.
+ * @mem: KFD BO structure.
+ * @vm: the VM to reserve.
+ * @ctx: the struct that will be used in unreserve_bo_and_vms().
+ */
+static int reserve_bo_and_vm(struct kgd_mem *mem,
+			      struct amdgpu_vm *vm,
+			      struct bo_vm_reservation_context *ctx)
+{
+	struct amdgpu_bo *bo = mem->bo;
+	int ret;
+
+	WARN_ON(!vm);
+
+	ctx->reserved = false;
+	ctx->n_vms = 1;
+	ctx->sync = &mem->sync;
+
+	INIT_LIST_HEAD(&ctx->list);
+	INIT_LIST_HEAD(&ctx->duplicates);
+
+	ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL);
+	if (!ctx->vm_pd)
+		return -ENOMEM;
+
+	ctx->kfd_bo.robj = bo;
+	ctx->kfd_bo.priority = 0;
+	ctx->kfd_bo.tv.bo = &bo->tbo;
+	ctx->kfd_bo.tv.shared = true;
+	ctx->kfd_bo.user_pages = NULL;
+	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
+
+	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
+
+	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
+				     false, &ctx->duplicates);
+	if (!ret)
+		ctx->reserved = true;
+	else {
+		pr_err("Failed to reserve buffers in ttm\n");
+		kfree(ctx->vm_pd);
+		ctx->vm_pd = NULL;
+	}
+
+	return ret;
+}
+
+/**
+ * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally
+ * @mem: KFD BO structure.
+ * @vm: the VM to reserve. If NULL, then all VMs associated with the BO
+ * is used. Otherwise, a single VM associated with the BO.
+ * @map_type: the mapping status that will be used to filter the VMs.
+ * @ctx: the struct that will be used in unreserve_bo_and_vms().
+ *
+ * Returns 0 for success, negative for failure.
+ */
+static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
+				struct amdgpu_vm *vm, enum bo_vm_match map_type,
+				struct bo_vm_reservation_context *ctx)
+{
+	struct amdgpu_bo *bo = mem->bo;
+	struct kfd_bo_va_list *entry;
+	unsigned int i;
+	int ret;
+
+	ctx->reserved = false;
+	ctx->n_vms = 0;
+	ctx->vm_pd = NULL;
+	ctx->sync = &mem->sync;
+
+	INIT_LIST_HEAD(&ctx->list);
+	INIT_LIST_HEAD(&ctx->duplicates);
+
+	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
+		if ((vm && vm != entry->bo_va->base.vm) ||
+			(entry->is_mapped != map_type
+			&& map_type != BO_VM_ALL))
+			continue;
+
+		ctx->n_vms++;
+	}
+
+	if (ctx->n_vms != 0) {
+		ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd),
+				     GFP_KERNEL);
+		if (!ctx->vm_pd)
+			return -ENOMEM;
+	}
+
+	ctx->kfd_bo.robj = bo;
+	ctx->kfd_bo.priority = 0;
+	ctx->kfd_bo.tv.bo = &bo->tbo;
+	ctx->kfd_bo.tv.shared = true;
+	ctx->kfd_bo.user_pages = NULL;
+	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
+
+	i = 0;
+	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
+		if ((vm && vm != entry->bo_va->base.vm) ||
+			(entry->is_mapped != map_type
+			&& map_type != BO_VM_ALL))
+			continue;
+
+		amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list,
+				&ctx->vm_pd[i]);
+		i++;
+	}
+
+	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
+				     false, &ctx->duplicates);
+	if (!ret)
+		ctx->reserved = true;
+	else
+		pr_err("Failed to reserve buffers in ttm.\n");
+
+	if (ret) {
+		kfree(ctx->vm_pd);
+		ctx->vm_pd = NULL;
+	}
+
+	return ret;
+}
+
+/**
+ * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context
+ * @ctx: Reservation context to unreserve
+ * @wait: Optionally wait for a sync object representing pending VM updates
+ * @intr: Whether the wait is interruptible
+ *
+ * Also frees any resources allocated in
+ * reserve_bo_and_(cond_)vm(s). Returns the status from
+ * amdgpu_sync_wait.
+ */
+static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
+				 bool wait, bool intr)
+{
+	int ret = 0;
+
+	if (wait)
+		ret = amdgpu_sync_wait(ctx->sync, intr);
+
+	if (ctx->reserved)
+		ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list);
+	kfree(ctx->vm_pd);
+
+	ctx->sync = NULL;
+
+	ctx->reserved = false;
+	ctx->vm_pd = NULL;
+
+	return ret;
+}
+
+static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
+				struct kfd_bo_va_list *entry,
+				struct amdgpu_sync *sync)
+{
+	struct amdgpu_bo_va *bo_va = entry->bo_va;
+	struct amdgpu_vm *vm = bo_va->base.vm;
+	struct amdgpu_bo *pd = vm->root.base.bo;
+
+	/* Remove eviction fence from PD (and thereby from PTs too as
+	 * they share the resv. object). Otherwise during PT update
+	 * job (see amdgpu_vm_bo_update_mapping), eviction fence would
+	 * get added to job->sync object and job execution would
+	 * trigger the eviction fence.
+	 */
+	amdgpu_amdkfd_remove_eviction_fence(pd,
+					    vm->process_info->eviction_fence,
+					    NULL, NULL);
+	amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
+
+	amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
+
+	/* Add the eviction fence back */
+	amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
+
+	sync_vm_fence(adev, sync, bo_va->last_pt_update);
+
+	return 0;
+}
+
+static int update_gpuvm_pte(struct amdgpu_device *adev,
+		struct kfd_bo_va_list *entry,
+		struct amdgpu_sync *sync)
+{
+	int ret;
+	struct amdgpu_vm *vm;
+	struct amdgpu_bo_va *bo_va;
+	struct amdgpu_bo *bo;
+
+	bo_va = entry->bo_va;
+	vm = bo_va->base.vm;
+	bo = bo_va->base.bo;
+
+	/* Update the page tables  */
+	ret = amdgpu_vm_bo_update(adev, bo_va, false);
+	if (ret) {
+		pr_err("amdgpu_vm_bo_update failed\n");
+		return ret;
+	}
+
+	return sync_vm_fence(adev, sync, bo_va->last_pt_update);
+}
+
+static int map_bo_to_gpuvm(struct amdgpu_device *adev,
+		struct kfd_bo_va_list *entry, struct amdgpu_sync *sync)
+{
+	int ret;
+
+	/* Set virtual address for the allocation */
+	ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0,
+			       amdgpu_bo_size(entry->bo_va->base.bo),
+			       entry->pte_flags);
+	if (ret) {
+		pr_err("Failed to map VA 0x%llx in vm. ret %d\n",
+				entry->va, ret);
+		return ret;
+	}
+
+	ret = update_gpuvm_pte(adev, entry, sync);
+	if (ret) {
+		pr_err("update_gpuvm_pte() failed\n");
+		goto update_gpuvm_pte_failed;
+	}
+
+	return 0;
+
+update_gpuvm_pte_failed:
+	unmap_bo_from_gpuvm(adev, entry, sync);
+	return ret;
+}
+
+static int process_validate_vms(struct amdkfd_process_info *process_info)
+{
+	struct amdgpu_vm *peer_vm;
+	int ret;
+
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node) {
+		ret = vm_validate_pt_pd_bos(peer_vm);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int process_update_pds(struct amdkfd_process_info *process_info,
+			      struct amdgpu_sync *sync)
+{
+	struct amdgpu_vm *peer_vm;
+	int ret;
+
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node) {
+		ret = vm_update_pds(peer_vm, sync);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
+		       struct dma_fence **ef)
+{
+	struct amdkfd_process_info *info = NULL;
+	int ret;
+
+	if (!*process_info) {
+		info = kzalloc(sizeof(*info), GFP_KERNEL);
+		if (!info)
+			return -ENOMEM;
+
+		mutex_init(&info->lock);
+		INIT_LIST_HEAD(&info->vm_list_head);
+		INIT_LIST_HEAD(&info->kfd_bo_list);
+
+		info->eviction_fence =
+			amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
+						   current->mm);
+		if (!info->eviction_fence) {
+			pr_err("Failed to create eviction fence\n");
+			ret = -ENOMEM;
+			goto create_evict_fence_fail;
+		}
+
+		*process_info = info;
+		*ef = dma_fence_get(&info->eviction_fence->base);
+	}
+
+	vm->process_info = *process_info;
+
+	/* Validate page directory and attach eviction fence */
+	ret = amdgpu_bo_reserve(vm->root.base.bo, true);
+	if (ret)
+		goto reserve_pd_fail;
+	ret = vm_validate_pt_pd_bos(vm);
+	if (ret) {
+		pr_err("validate_pt_pd_bos() failed\n");
+		goto validate_pd_fail;
+	}
+	ret = ttm_bo_wait(&vm->root.base.bo->tbo, false, false);
+	if (ret)
+		goto wait_pd_fail;
+	amdgpu_bo_fence(vm->root.base.bo,
+			&vm->process_info->eviction_fence->base, true);
+	amdgpu_bo_unreserve(vm->root.base.bo);
+
+	/* Update process info */
+	mutex_lock(&vm->process_info->lock);
+	list_add_tail(&vm->vm_list_node,
+			&(vm->process_info->vm_list_head));
+	vm->process_info->n_vms++;
+	mutex_unlock(&vm->process_info->lock);
+
+	return 0;
+
+wait_pd_fail:
+validate_pd_fail:
+	amdgpu_bo_unreserve(vm->root.base.bo);
+reserve_pd_fail:
+	vm->process_info = NULL;
+	if (info) {
+		/* Two fence references: one in info and one in *ef */
+		dma_fence_put(&info->eviction_fence->base);
+		dma_fence_put(*ef);
+		*ef = NULL;
+		*process_info = NULL;
+create_evict_fence_fail:
+		mutex_destroy(&info->lock);
+		kfree(info);
+	}
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
+					  void **process_info,
+					  struct dma_fence **ef)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct amdgpu_vm *new_vm;
+	int ret;
+
+	new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL);
+	if (!new_vm)
+		return -ENOMEM;
+
+	/* Initialize AMDGPU part of the VM */
+	ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0);
+	if (ret) {
+		pr_err("Failed init vm ret %d\n", ret);
+		goto amdgpu_vm_init_fail;
+	}
+
+	/* Initialize KFD part of the VM and process info */
+	ret = init_kfd_vm(new_vm, process_info, ef);
+	if (ret)
+		goto init_kfd_vm_fail;
+
+	*vm = (void *) new_vm;
+
+	return 0;
+
+init_kfd_vm_fail:
+	amdgpu_vm_fini(adev, new_vm);
+amdgpu_vm_init_fail:
+	kfree(new_vm);
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
+					   struct file *filp,
+					   void **vm, void **process_info,
+					   struct dma_fence **ef)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct drm_file *drm_priv = filp->private_data;
+	struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv;
+	struct amdgpu_vm *avm = &drv_priv->vm;
+	int ret;
+
+	/* Already a compute VM? */
+	if (avm->process_info)
+		return -EINVAL;
+
+	/* Convert VM into a compute VM */
+	ret = amdgpu_vm_make_compute(adev, avm);
+	if (ret)
+		return ret;
+
+	/* Initialize KFD part of the VM and process info */
+	ret = init_kfd_vm(avm, process_info, ef);
+	if (ret)
+		return ret;
+
+	*vm = (void *)avm;
+
+	return 0;
+}
+
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+				    struct amdgpu_vm *vm)
+{
+	struct amdkfd_process_info *process_info = vm->process_info;
+	struct amdgpu_bo *pd = vm->root.base.bo;
+
+	if (!process_info)
+		return;
+
+	/* Release eviction fence from PD */
+	amdgpu_bo_reserve(pd, false);
+	amdgpu_bo_fence(pd, NULL, false);
+	amdgpu_bo_unreserve(pd);
+
+	/* Update process info */
+	mutex_lock(&process_info->lock);
+	process_info->n_vms--;
+	list_del(&vm->vm_list_node);
+	mutex_unlock(&process_info->lock);
+
+	/* Release per-process resources when last compute VM is destroyed */
+	if (!process_info->n_vms) {
+		WARN_ON(!list_empty(&process_info->kfd_bo_list));
+
+		dma_fence_put(&process_info->eviction_fence->base);
+		mutex_destroy(&process_info->lock);
+		kfree(process_info);
+	}
+}
+
+void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+
+	if (WARN_ON(!kgd || !vm))
+		return;
+
+	pr_debug("Destroying process vm %p\n", vm);
+
+	/* Release the VM context */
+	amdgpu_vm_fini(adev, avm);
+	kfree(vm);
+}
+
+uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
+{
+	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+
+	return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
+}
+
+int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
+		struct kgd_dev *kgd, uint64_t va, uint64_t size,
+		void *vm, struct kgd_mem **mem,
+		uint64_t *offset, uint32_t flags)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+	struct amdgpu_bo *bo;
+	int byte_align;
+	u32 alloc_domain;
+	u64 alloc_flags;
+	uint32_t mapping_flags;
+	int ret;
+
+	/*
+	 * Check on which domain to allocate BO
+	 */
+	if (flags & ALLOC_MEM_FLAGS_VRAM) {
+		alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
+		alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
+		alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
+			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
+			AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+	} else if (flags & ALLOC_MEM_FLAGS_GTT) {
+		alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+		alloc_flags = 0;
+	} else {
+		return -EINVAL;
+	}
+
+	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+	if (!*mem)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&(*mem)->bo_va_list);
+	mutex_init(&(*mem)->lock);
+	(*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
+
+	/* Workaround for AQL queue wraparound bug. Map the same
+	 * memory twice. That means we only actually allocate half
+	 * the memory.
+	 */
+	if ((*mem)->aql_queue)
+		size = size >> 1;
+
+	/* Workaround for TLB bug on older VI chips */
+	byte_align = (adev->family == AMDGPU_FAMILY_VI &&
+			adev->asic_type != CHIP_FIJI &&
+			adev->asic_type != CHIP_POLARIS10 &&
+			adev->asic_type != CHIP_POLARIS11) ?
+			VI_BO_SIZE_ALIGN : 1;
+
+	mapping_flags = AMDGPU_VM_PAGE_READABLE;
+	if (flags & ALLOC_MEM_FLAGS_WRITABLE)
+		mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
+	if (flags & ALLOC_MEM_FLAGS_EXECUTABLE)
+		mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+	if (flags & ALLOC_MEM_FLAGS_COHERENT)
+		mapping_flags |= AMDGPU_VM_MTYPE_UC;
+	else
+		mapping_flags |= AMDGPU_VM_MTYPE_NC;
+	(*mem)->mapping_flags = mapping_flags;
+
+	amdgpu_sync_create(&(*mem)->sync);
+
+	ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
+	if (ret) {
+		pr_debug("Insufficient system memory\n");
+		goto err_reserve_system_mem;
+	}
+
+	pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
+			va, size, domain_string(alloc_domain));
+
+	ret = amdgpu_bo_create(adev, size, byte_align,
+				alloc_domain, alloc_flags, ttm_bo_type_device, NULL, &bo);
+	if (ret) {
+		pr_debug("Failed to create BO on domain %s. ret %d\n",
+				domain_string(alloc_domain), ret);
+		goto err_bo_create;
+	}
+	bo->kfd_bo = *mem;
+	(*mem)->bo = bo;
+
+	(*mem)->va = va;
+	(*mem)->domain = alloc_domain;
+	(*mem)->mapped_to_gpu_memory = 0;
+	(*mem)->process_info = avm->process_info;
+	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info);
+
+	if (offset)
+		*offset = amdgpu_bo_mmap_offset(bo);
+
+	return 0;
+
+err_bo_create:
+	unreserve_system_mem_limit(adev, size, alloc_domain);
+err_reserve_system_mem:
+	mutex_destroy(&(*mem)->lock);
+	kfree(*mem);
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
+		struct kgd_dev *kgd, struct kgd_mem *mem)
+{
+	struct amdkfd_process_info *process_info = mem->process_info;
+	unsigned long bo_size = mem->bo->tbo.mem.size;
+	struct kfd_bo_va_list *entry, *tmp;
+	struct bo_vm_reservation_context ctx;
+	struct ttm_validate_buffer *bo_list_entry;
+	int ret;
+
+	mutex_lock(&mem->lock);
+
+	if (mem->mapped_to_gpu_memory > 0) {
+		pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
+				mem->va, bo_size);
+		mutex_unlock(&mem->lock);
+		return -EBUSY;
+	}
+
+	mutex_unlock(&mem->lock);
+	/* lock is not needed after this, since mem is unused and will
+	 * be freed anyway
+	 */
+
+	/* Make sure restore workers don't access the BO any more */
+	bo_list_entry = &mem->validate_list;
+	mutex_lock(&process_info->lock);
+	list_del(&bo_list_entry->head);
+	mutex_unlock(&process_info->lock);
+
+	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
+	if (unlikely(ret))
+		return ret;
+
+	/* The eviction fence should be removed by the last unmap.
+	 * TODO: Log an error condition if the bo still has the eviction fence
+	 * attached
+	 */
+	amdgpu_amdkfd_remove_eviction_fence(mem->bo,
+					process_info->eviction_fence,
+					NULL, NULL);
+	pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
+		mem->va + bo_size * (1 + mem->aql_queue));
+
+	/* Remove from VM internal data structures */
+	list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list)
+		remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev,
+				entry, bo_size);
+
+	ret = unreserve_bo_and_vms(&ctx, false, false);
+
+	/* Free the sync object */
+	amdgpu_sync_free(&mem->sync);
+
+	/* Free the BO*/
+	amdgpu_bo_unref(&mem->bo);
+	mutex_destroy(&mem->lock);
+	kfree(mem);
+
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
+		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+	int ret;
+	struct amdgpu_bo *bo;
+	uint32_t domain;
+	struct kfd_bo_va_list *entry;
+	struct bo_vm_reservation_context ctx;
+	struct kfd_bo_va_list *bo_va_entry = NULL;
+	struct kfd_bo_va_list *bo_va_entry_aql = NULL;
+	unsigned long bo_size;
+
+	/* Make sure restore is not running concurrently.
+	 */
+	mutex_lock(&mem->process_info->lock);
+
+	mutex_lock(&mem->lock);
+
+	bo = mem->bo;
+
+	if (!bo) {
+		pr_err("Invalid BO when mapping memory to GPU\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	domain = mem->domain;
+	bo_size = bo->tbo.mem.size;
+
+	pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
+			mem->va,
+			mem->va + bo_size * (1 + mem->aql_queue),
+			vm, domain_string(domain));
+
+	ret = reserve_bo_and_vm(mem, vm, &ctx);
+	if (unlikely(ret))
+		goto out;
+
+	if (check_if_add_bo_to_vm(avm, mem)) {
+		ret = add_bo_to_vm(adev, mem, avm, false,
+				&bo_va_entry);
+		if (ret)
+			goto add_bo_to_vm_failed;
+		if (mem->aql_queue) {
+			ret = add_bo_to_vm(adev, mem, avm,
+					true, &bo_va_entry_aql);
+			if (ret)
+				goto add_bo_to_vm_failed_aql;
+		}
+	} else {
+		ret = vm_validate_pt_pd_bos(avm);
+		if (unlikely(ret))
+			goto add_bo_to_vm_failed;
+	}
+
+	if (mem->mapped_to_gpu_memory == 0) {
+		/* Validate BO only once. The eviction fence gets added to BO
+		 * the first time it is mapped. Validate will wait for all
+		 * background evictions to complete.
+		 */
+		ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
+		if (ret) {
+			pr_debug("Validate failed\n");
+			goto map_bo_to_gpuvm_failed;
+		}
+	}
+
+	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
+		if (entry->bo_va->base.vm == vm && !entry->is_mapped) {
+			pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
+					entry->va, entry->va + bo_size,
+					entry);
+
+			ret = map_bo_to_gpuvm(adev, entry, ctx.sync);
+			if (ret) {
+				pr_err("Failed to map radeon bo to gpuvm\n");
+				goto map_bo_to_gpuvm_failed;
+			}
+
+			ret = vm_update_pds(vm, ctx.sync);
+			if (ret) {
+				pr_err("Failed to update page directories\n");
+				goto map_bo_to_gpuvm_failed;
+			}
+
+			entry->is_mapped = true;
+			mem->mapped_to_gpu_memory++;
+			pr_debug("\t INC mapping count %d\n",
+					mem->mapped_to_gpu_memory);
+		}
+	}
+
+	if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count)
+		amdgpu_bo_fence(bo,
+				&avm->process_info->eviction_fence->base,
+				true);
+	ret = unreserve_bo_and_vms(&ctx, false, false);
+
+	goto out;
+
+map_bo_to_gpuvm_failed:
+	if (bo_va_entry_aql)
+		remove_bo_from_vm(adev, bo_va_entry_aql, bo_size);
+add_bo_to_vm_failed_aql:
+	if (bo_va_entry)
+		remove_bo_from_vm(adev, bo_va_entry, bo_size);
+add_bo_to_vm_failed:
+	unreserve_bo_and_vms(&ctx, false, false);
+out:
+	mutex_unlock(&mem->process_info->lock);
+	mutex_unlock(&mem->lock);
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
+		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct amdkfd_process_info *process_info =
+		((struct amdgpu_vm *)vm)->process_info;
+	unsigned long bo_size = mem->bo->tbo.mem.size;
+	struct kfd_bo_va_list *entry;
+	struct bo_vm_reservation_context ctx;
+	int ret;
+
+	mutex_lock(&mem->lock);
+
+	ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx);
+	if (unlikely(ret))
+		goto out;
+	/* If no VMs were reserved, it means the BO wasn't actually mapped */
+	if (ctx.n_vms == 0) {
+		ret = -EINVAL;
+		goto unreserve_out;
+	}
+
+	ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm);
+	if (unlikely(ret))
+		goto unreserve_out;
+
+	pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
+		mem->va,
+		mem->va + bo_size * (1 + mem->aql_queue),
+		vm);
+
+	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
+		if (entry->bo_va->base.vm == vm && entry->is_mapped) {
+			pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
+					entry->va,
+					entry->va + bo_size,
+					entry);
+
+			ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync);
+			if (ret == 0) {
+				entry->is_mapped = false;
+			} else {
+				pr_err("failed to unmap VA 0x%llx\n",
+						mem->va);
+				goto unreserve_out;
+			}
+
+			mem->mapped_to_gpu_memory--;
+			pr_debug("\t DEC mapping count %d\n",
+					mem->mapped_to_gpu_memory);
+		}
+	}
+
+	/* If BO is unmapped from all VMs, unfence it. It can be evicted if
+	 * required.
+	 */
+	if (mem->mapped_to_gpu_memory == 0 &&
+	    !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count)
+		amdgpu_amdkfd_remove_eviction_fence(mem->bo,
+						process_info->eviction_fence,
+						    NULL, NULL);
+
+unreserve_out:
+	unreserve_bo_and_vms(&ctx, false, false);
+out:
+	mutex_unlock(&mem->lock);
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_sync_memory(
+		struct kgd_dev *kgd, struct kgd_mem *mem, bool intr)
+{
+	struct amdgpu_sync sync;
+	int ret;
+
+	amdgpu_sync_create(&sync);
+
+	mutex_lock(&mem->lock);
+	amdgpu_sync_clone(&mem->sync, &sync);
+	mutex_unlock(&mem->lock);
+
+	ret = amdgpu_sync_wait(&sync, intr);
+	amdgpu_sync_free(&sync);
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
+		struct kgd_mem *mem, void **kptr, uint64_t *size)
+{
+	int ret;
+	struct amdgpu_bo *bo = mem->bo;
+
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+		pr_err("userptr can't be mapped to kernel\n");
+		return -EINVAL;
+	}
+
+	/* delete kgd_mem from kfd_bo_list to avoid re-validating
+	 * this BO in BO's restoring after eviction.
+	 */
+	mutex_lock(&mem->process_info->lock);
+
+	ret = amdgpu_bo_reserve(bo, true);
+	if (ret) {
+		pr_err("Failed to reserve bo. ret %d\n", ret);
+		goto bo_reserve_failed;
+	}
+
+	ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
+	if (ret) {
+		pr_err("Failed to pin bo. ret %d\n", ret);
+		goto pin_failed;
+	}
+
+	ret = amdgpu_bo_kmap(bo, kptr);
+	if (ret) {
+		pr_err("Failed to map bo to kernel. ret %d\n", ret);
+		goto kmap_failed;
+	}
+
+	amdgpu_amdkfd_remove_eviction_fence(
+		bo, mem->process_info->eviction_fence, NULL, NULL);
+	list_del_init(&mem->validate_list.head);
+
+	if (size)
+		*size = amdgpu_bo_size(bo);
+
+	amdgpu_bo_unreserve(bo);
+
+	mutex_unlock(&mem->process_info->lock);
+	return 0;
+
+kmap_failed:
+	amdgpu_bo_unpin(bo);
+pin_failed:
+	amdgpu_bo_unreserve(bo);
+bo_reserve_failed:
+	mutex_unlock(&mem->process_info->lock);
+
+	return ret;
+}
+
+/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
+ *   KFD process identified by process_info
+ *
+ * @process_info: amdkfd_process_info of the KFD process
+ *
+ * After memory eviction, restore thread calls this function. The function
+ * should be called when the Process is still valid. BO restore involves -
+ *
+ * 1.  Release old eviction fence and create new one
+ * 2.  Get two copies of PD BO list from all the VMs. Keep one copy as pd_list.
+ * 3   Use the second PD list and kfd_bo_list to create a list (ctx.list) of
+ *     BOs that need to be reserved.
+ * 4.  Reserve all the BOs
+ * 5.  Validate of PD and PT BOs.
+ * 6.  Validate all KFD BOs using kfd_bo_list and Map them and add new fence
+ * 7.  Add fence to all PD and PT BOs.
+ * 8.  Unreserve all BOs
+ */
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
+{
+	struct amdgpu_bo_list_entry *pd_bo_list;
+	struct amdkfd_process_info *process_info = info;
+	struct amdgpu_vm *peer_vm;
+	struct kgd_mem *mem;
+	struct bo_vm_reservation_context ctx;
+	struct amdgpu_amdkfd_fence *new_fence;
+	int ret = 0, i;
+	struct list_head duplicate_save;
+	struct amdgpu_sync sync_obj;
+
+	INIT_LIST_HEAD(&duplicate_save);
+	INIT_LIST_HEAD(&ctx.list);
+	INIT_LIST_HEAD(&ctx.duplicates);
+
+	pd_bo_list = kcalloc(process_info->n_vms,
+			     sizeof(struct amdgpu_bo_list_entry),
+			     GFP_KERNEL);
+	if (!pd_bo_list)
+		return -ENOMEM;
+
+	i = 0;
+	mutex_lock(&process_info->lock);
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			vm_list_node)
+		amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]);
+
+	/* Reserve all BOs and page tables/directory. Add all BOs from
+	 * kfd_bo_list to ctx.list
+	 */
+	list_for_each_entry(mem, &process_info->kfd_bo_list,
+			    validate_list.head) {
+
+		list_add_tail(&mem->resv_list.head, &ctx.list);
+		mem->resv_list.bo = mem->validate_list.bo;
+		mem->resv_list.shared = mem->validate_list.shared;
+	}
+
+	ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
+				     false, &duplicate_save);
+	if (ret) {
+		pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
+		goto ttm_reserve_fail;
+	}
+
+	amdgpu_sync_create(&sync_obj);
+
+	/* Validate PDs and PTs */
+	ret = process_validate_vms(process_info);
+	if (ret)
+		goto validate_map_fail;
+
+	/* Wait for PD/PTs validate to finish */
+	/* FIXME: I think this isn't needed */
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node) {
+		struct amdgpu_bo *bo = peer_vm->root.base.bo;
+
+		ttm_bo_wait(&bo->tbo, false, false);
+	}
+
+	/* Validate BOs and map them to GPUVM (update VM page tables). */
+	list_for_each_entry(mem, &process_info->kfd_bo_list,
+			    validate_list.head) {
+
+		struct amdgpu_bo *bo = mem->bo;
+		uint32_t domain = mem->domain;
+		struct kfd_bo_va_list *bo_va_entry;
+
+		ret = amdgpu_amdkfd_bo_validate(bo, domain, false);
+		if (ret) {
+			pr_debug("Memory eviction: Validate BOs failed. Try again\n");
+			goto validate_map_fail;
+		}
+
+		list_for_each_entry(bo_va_entry, &mem->bo_va_list,
+				    bo_list) {
+			ret = update_gpuvm_pte((struct amdgpu_device *)
+					      bo_va_entry->kgd_dev,
+					      bo_va_entry,
+					      &sync_obj);
+			if (ret) {
+				pr_debug("Memory eviction: update PTE failed. Try again\n");
+				goto validate_map_fail;
+			}
+		}
+	}
+
+	/* Update page directories */
+	ret = process_update_pds(process_info, &sync_obj);
+	if (ret) {
+		pr_debug("Memory eviction: update PDs failed. Try again\n");
+		goto validate_map_fail;
+	}
+
+	amdgpu_sync_wait(&sync_obj, false);
+
+	/* Release old eviction fence and create new one, because fence only
+	 * goes from unsignaled to signaled, fence cannot be reused.
+	 * Use context and mm from the old fence.
+	 */
+	new_fence = amdgpu_amdkfd_fence_create(
+				process_info->eviction_fence->base.context,
+				process_info->eviction_fence->mm);
+	if (!new_fence) {
+		pr_err("Failed to create eviction fence\n");
+		ret = -ENOMEM;
+		goto validate_map_fail;
+	}
+	dma_fence_put(&process_info->eviction_fence->base);
+	process_info->eviction_fence = new_fence;
+	*ef = dma_fence_get(&new_fence->base);
+
+	/* Wait for validate to finish and attach new eviction fence */
+	list_for_each_entry(mem, &process_info->kfd_bo_list,
+		validate_list.head)
+		ttm_bo_wait(&mem->bo->tbo, false, false);
+	list_for_each_entry(mem, &process_info->kfd_bo_list,
+		validate_list.head)
+		amdgpu_bo_fence(mem->bo,
+			&process_info->eviction_fence->base, true);
+
+	/* Attach eviction fence to PD / PT BOs */
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node) {
+		struct amdgpu_bo *bo = peer_vm->root.base.bo;
+
+		amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true);
+	}
+
+validate_map_fail:
+	ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list);
+	amdgpu_sync_free(&sync_obj);
+ttm_reserve_fail:
+	mutex_unlock(&process_info->lock);
+	kfree(pd_bo_list);
+	return ret;
+}

+ 91 - 4
drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c

@@ -114,6 +114,9 @@ union igp_info {
 	struct atom_integrated_system_info_v1_11 v11;
 };
 
+union umc_info {
+	struct atom_umc_info_v3_1 v31;
+};
 /*
  * Return vram width from integrated system info table, if available,
  * or 0 if not.
@@ -143,6 +146,94 @@ int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev)
 	return 0;
 }
 
+static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev,
+					       int atom_mem_type)
+{
+	int vram_type;
+
+	if (adev->flags & AMD_IS_APU) {
+		switch (atom_mem_type) {
+		case Ddr2MemType:
+		case LpDdr2MemType:
+			vram_type = AMDGPU_VRAM_TYPE_DDR2;
+			break;
+		case Ddr3MemType:
+		case LpDdr3MemType:
+			vram_type = AMDGPU_VRAM_TYPE_DDR3;
+			break;
+		case Ddr4MemType:
+		case LpDdr4MemType:
+			vram_type = AMDGPU_VRAM_TYPE_DDR4;
+			break;
+		default:
+			vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+			break;
+		}
+	} else {
+		switch (atom_mem_type) {
+		case ATOM_DGPU_VRAM_TYPE_GDDR5:
+			vram_type = AMDGPU_VRAM_TYPE_GDDR5;
+			break;
+		case ATOM_DGPU_VRAM_TYPE_HBM:
+			vram_type = AMDGPU_VRAM_TYPE_HBM;
+			break;
+		default:
+			vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+			break;
+		}
+	}
+
+	return vram_type;
+}
+/*
+ * Return vram type from either integrated system info table
+ * or umc info table, if available, or 0 (TYPE_UNKNOWN) if not
+ */
+int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	int index;
+	u16 data_offset, size;
+	union igp_info *igp_info;
+	union umc_info *umc_info;
+	u8 frev, crev;
+	u8 mem_type;
+
+	if (adev->flags & AMD_IS_APU)
+		index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+						    integratedsysteminfo);
+	else
+		index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+						    umc_info);
+	if (amdgpu_atom_parse_data_header(mode_info->atom_context,
+					  index, &size,
+					  &frev, &crev, &data_offset)) {
+		if (adev->flags & AMD_IS_APU) {
+			igp_info = (union igp_info *)
+				(mode_info->atom_context->bios + data_offset);
+			switch (crev) {
+			case 11:
+				mem_type = igp_info->v11.memorytype;
+				return convert_atom_mem_type_to_vram_type(adev, mem_type);
+			default:
+				return 0;
+			}
+		} else {
+			umc_info = (union umc_info *)
+				(mode_info->atom_context->bios + data_offset);
+			switch (crev) {
+			case 1:
+				mem_type = umc_info->v31.vram_type;
+				return convert_atom_mem_type_to_vram_type(adev, mem_type);
+			default:
+				return 0;
+			}
+		}
+	}
+
+	return 0;
+}
+
 union firmware_info {
 	struct atom_firmware_info_v3_1 v31;
 };
@@ -151,10 +242,6 @@ union smu_info {
 	struct atom_smu_info_v3_1 v31;
 };
 
-union umc_info {
-	struct atom_umc_info_v3_1 v31;
-};
-
 int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
 {
 	struct amdgpu_mode_info *mode_info = &adev->mode_info;

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h

@@ -28,6 +28,7 @@ bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev)
 void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
 
 #endif

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c

@@ -569,6 +569,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = {
 	{ 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX },
 	{ 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
 	{ 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX },
+	{ 0x1002, 0x67DF, 0x1028, 0x0774, AMDGPU_PX_QUIRK_FORCE_ATPX },
 	{ 0, 0, 0, 0, 0 },
 };
 

+ 4 - 4
drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c

@@ -80,8 +80,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
 	int time;
 
 	n = AMDGPU_BENCHMARK_ITERATIONS;
-	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, NULL,
-			     NULL, 0, &sobj);
+	r = amdgpu_bo_create(adev, size, PAGE_SIZE,sdomain, 0,
+			     ttm_bo_type_kernel, NULL, &sobj);
 	if (r) {
 		goto out_cleanup;
 	}
@@ -93,8 +93,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
 	if (r) {
 		goto out_cleanup;
 	}
-	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, NULL,
-			     NULL, 0, &dobj);
+	r = amdgpu_bo_create(adev, size, PAGE_SIZE, ddomain, 0,
+			     ttm_bo_type_kernel, NULL, &dobj);
 	if (r) {
 		goto out_cleanup;
 	}

+ 4 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c

@@ -233,8 +233,10 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
 	for (i = 0; i < list->num_entries; i++) {
 		unsigned priority = list->array[i].priority;
 
-		list_add_tail(&list->array[i].tv.head,
-			      &bucket[priority]);
+		if (!list->array[i].robj->parent)
+			list_add_tail(&list->array[i].tv.head,
+				      &bucket[priority]);
+
 		list->array[i].user_pages = NULL;
 	}
 

+ 9 - 541
drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c

@@ -24,12 +24,10 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
-#include <linux/acpi.h>
 #include <drm/drmP.h>
 #include <linux/firmware.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
-#include "cgs_linux.h"
 #include "atom.h"
 #include "amdgpu_ucode.h"
 
@@ -42,152 +40,6 @@ struct amdgpu_cgs_device {
 	struct amdgpu_device *adev =					\
 		((struct amdgpu_cgs_device *)cgs_device)->adev
 
-static void *amdgpu_cgs_register_pp_handle(struct cgs_device *cgs_device,
-			int (*call_back_func)(struct amd_pp_init *, void **))
-{
-	CGS_FUNC_ADEV;
-	struct amd_pp_init pp_init;
-	struct amd_powerplay *amd_pp;
-
-	if (call_back_func == NULL)
-		return NULL;
-
-	amd_pp = &(adev->powerplay);
-	pp_init.chip_family = adev->family;
-	pp_init.chip_id = adev->asic_type;
-	pp_init.pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : false;
-	pp_init.feature_mask = amdgpu_pp_feature_mask;
-	pp_init.device = cgs_device;
-	if (call_back_func(&pp_init, &(amd_pp->pp_handle)))
-		return NULL;
-
-	return adev->powerplay.pp_handle;
-}
-
-static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
-				    enum cgs_gpu_mem_type type,
-				    uint64_t size, uint64_t align,
-				    cgs_handle_t *handle)
-{
-	CGS_FUNC_ADEV;
-	uint16_t flags = 0;
-	int ret = 0;
-	uint32_t domain = 0;
-	struct amdgpu_bo *obj;
-
-	/* fail if the alignment is not a power of 2 */
-	if (((align != 1) && (align & (align - 1)))
-	    || size == 0 || align == 0)
-		return -EINVAL;
-
-
-	switch(type) {
-	case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB:
-	case CGS_GPU_MEM_TYPE__VISIBLE_FB:
-		flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-			AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
-		domain = AMDGPU_GEM_DOMAIN_VRAM;
-		break;
-	case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB:
-	case CGS_GPU_MEM_TYPE__INVISIBLE_FB:
-		flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
-			AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
-		domain = AMDGPU_GEM_DOMAIN_VRAM;
-		break;
-	case CGS_GPU_MEM_TYPE__GART_CACHEABLE:
-		domain = AMDGPU_GEM_DOMAIN_GTT;
-		break;
-	case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE:
-		flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
-		domain = AMDGPU_GEM_DOMAIN_GTT;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-
-	*handle = 0;
-
-	ret = amdgpu_bo_create(adev, size, align, true, domain, flags,
-			       NULL, NULL, 0, &obj);
-	if (ret) {
-		DRM_ERROR("(%d) bo create failed\n", ret);
-		return ret;
-	}
-	*handle = (cgs_handle_t)obj;
-
-	return ret;
-}
-
-static int amdgpu_cgs_free_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle)
-{
-	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-
-	if (obj) {
-		int r = amdgpu_bo_reserve(obj, true);
-		if (likely(r == 0)) {
-			amdgpu_bo_kunmap(obj);
-			amdgpu_bo_unpin(obj);
-			amdgpu_bo_unreserve(obj);
-		}
-		amdgpu_bo_unref(&obj);
-
-	}
-	return 0;
-}
-
-static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle,
-				   uint64_t *mcaddr)
-{
-	int r;
-	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-
-	WARN_ON_ONCE(obj->placement.num_placement > 1);
-
-	r = amdgpu_bo_reserve(obj, true);
-	if (unlikely(r != 0))
-		return r;
-	r = amdgpu_bo_pin(obj, obj->preferred_domains, mcaddr);
-	amdgpu_bo_unreserve(obj);
-	return r;
-}
-
-static int amdgpu_cgs_gunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle)
-{
-	int r;
-	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-	r = amdgpu_bo_reserve(obj, true);
-	if (unlikely(r != 0))
-		return r;
-	r = amdgpu_bo_unpin(obj);
-	amdgpu_bo_unreserve(obj);
-	return r;
-}
-
-static int amdgpu_cgs_kmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle,
-				   void **map)
-{
-	int r;
-	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-	r = amdgpu_bo_reserve(obj, true);
-	if (unlikely(r != 0))
-		return r;
-	r = amdgpu_bo_kmap(obj, map);
-	amdgpu_bo_unreserve(obj);
-	return r;
-}
-
-static int amdgpu_cgs_kunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle)
-{
-	int r;
-	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-	r = amdgpu_bo_reserve(obj, true);
-	if (unlikely(r != 0))
-		return r;
-	amdgpu_bo_kunmap(obj);
-	amdgpu_bo_unreserve(obj);
-	return r;
-}
 
 static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned offset)
 {
@@ -329,109 +181,6 @@ static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigne
 		adev->mode_info.atom_context, table, args);
 }
 
-struct cgs_irq_params {
-	unsigned src_id;
-	cgs_irq_source_set_func_t set;
-	cgs_irq_handler_func_t handler;
-	void *private_data;
-};
-
-static int cgs_set_irq_state(struct amdgpu_device *adev,
-			     struct amdgpu_irq_src *src,
-			     unsigned type,
-			     enum amdgpu_interrupt_state state)
-{
-	struct cgs_irq_params *irq_params =
-		(struct cgs_irq_params *)src->data;
-	if (!irq_params)
-		return -EINVAL;
-	if (!irq_params->set)
-		return -EINVAL;
-	return irq_params->set(irq_params->private_data,
-			       irq_params->src_id,
-			       type,
-			       (int)state);
-}
-
-static int cgs_process_irq(struct amdgpu_device *adev,
-			   struct amdgpu_irq_src *source,
-			   struct amdgpu_iv_entry *entry)
-{
-	struct cgs_irq_params *irq_params =
-		(struct cgs_irq_params *)source->data;
-	if (!irq_params)
-		return -EINVAL;
-	if (!irq_params->handler)
-		return -EINVAL;
-	return irq_params->handler(irq_params->private_data,
-				   irq_params->src_id,
-				   entry->iv_entry);
-}
-
-static const struct amdgpu_irq_src_funcs cgs_irq_funcs = {
-	.set = cgs_set_irq_state,
-	.process = cgs_process_irq,
-};
-
-static int amdgpu_cgs_add_irq_source(void *cgs_device,
-				     unsigned client_id,
-				     unsigned src_id,
-				     unsigned num_types,
-				     cgs_irq_source_set_func_t set,
-				     cgs_irq_handler_func_t handler,
-				     void *private_data)
-{
-	CGS_FUNC_ADEV;
-	int ret = 0;
-	struct cgs_irq_params *irq_params;
-	struct amdgpu_irq_src *source =
-		kzalloc(sizeof(struct amdgpu_irq_src), GFP_KERNEL);
-	if (!source)
-		return -ENOMEM;
-	irq_params =
-		kzalloc(sizeof(struct cgs_irq_params), GFP_KERNEL);
-	if (!irq_params) {
-		kfree(source);
-		return -ENOMEM;
-	}
-	source->num_types = num_types;
-	source->funcs = &cgs_irq_funcs;
-	irq_params->src_id = src_id;
-	irq_params->set = set;
-	irq_params->handler = handler;
-	irq_params->private_data = private_data;
-	source->data = (void *)irq_params;
-	ret = amdgpu_irq_add_id(adev, client_id, src_id, source);
-	if (ret) {
-		kfree(irq_params);
-		kfree(source);
-	}
-
-	return ret;
-}
-
-static int amdgpu_cgs_irq_get(void *cgs_device, unsigned client_id,
-			      unsigned src_id, unsigned type)
-{
-	CGS_FUNC_ADEV;
-
-	if (!adev->irq.client[client_id].sources)
-		return -EINVAL;
-
-	return amdgpu_irq_get(adev, adev->irq.client[client_id].sources[src_id], type);
-}
-
-static int amdgpu_cgs_irq_put(void *cgs_device, unsigned client_id,
-			      unsigned src_id, unsigned type)
-{
-	CGS_FUNC_ADEV;
-
-	if (!adev->irq.client[client_id].sources)
-		return -EINVAL;
-
-	return amdgpu_irq_put(adev, adev->irq.client[client_id].sources[src_id], type);
-}
-
 static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device,
 				  enum amd_ip_block_type block_type,
 				  enum amd_clockgating_state state)
@@ -801,11 +550,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 				else
 					strcpy(fw_name, "amdgpu/vega10_smc.bin");
 				break;
-			case CHIP_CARRIZO:
-			case CHIP_STONEY:
-			case CHIP_RAVEN:
-				adev->pm.fw_version = info->version;
-				return 0;
+			case CHIP_VEGA12:
+				strcpy(fw_name, "amdgpu/vega12_smc.bin");
+				break;
 			default:
 				DRM_ERROR("SMC firmware not supported\n");
 				return -EINVAL;
@@ -857,61 +604,6 @@ static int amdgpu_cgs_is_virtualization_enabled(void *cgs_device)
 	return amdgpu_sriov_vf(adev);
 }
 
-static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
-					struct cgs_system_info *sys_info)
-{
-	CGS_FUNC_ADEV;
-
-	if (NULL == sys_info)
-		return -ENODEV;
-
-	if (sizeof(struct cgs_system_info) != sys_info->size)
-		return -ENODEV;
-
-	switch (sys_info->info_id) {
-	case CGS_SYSTEM_INFO_ADAPTER_BDF_ID:
-		sys_info->value = adev->pdev->devfn | (adev->pdev->bus->number << 8);
-		break;
-	case CGS_SYSTEM_INFO_PCIE_GEN_INFO:
-		sys_info->value = adev->pm.pcie_gen_mask;
-		break;
-	case CGS_SYSTEM_INFO_PCIE_MLW:
-		sys_info->value = adev->pm.pcie_mlw_mask;
-		break;
-	case CGS_SYSTEM_INFO_PCIE_DEV:
-		sys_info->value = adev->pdev->device;
-		break;
-	case CGS_SYSTEM_INFO_PCIE_REV:
-		sys_info->value = adev->pdev->revision;
-		break;
-	case CGS_SYSTEM_INFO_CG_FLAGS:
-		sys_info->value = adev->cg_flags;
-		break;
-	case CGS_SYSTEM_INFO_PG_FLAGS:
-		sys_info->value = adev->pg_flags;
-		break;
-	case CGS_SYSTEM_INFO_GFX_CU_INFO:
-		sys_info->value = adev->gfx.cu_info.number;
-		break;
-	case CGS_SYSTEM_INFO_GFX_SE_INFO:
-		sys_info->value = adev->gfx.config.max_shader_engines;
-		break;
-	case CGS_SYSTEM_INFO_PCIE_SUB_SYS_ID:
-		sys_info->value = adev->pdev->subsystem_device;
-		break;
-	case CGS_SYSTEM_INFO_PCIE_SUB_SYS_VENDOR_ID:
-		sys_info->value = adev->pdev->subsystem_vendor;
-		break;
-	case CGS_SYSTEM_INFO_PCIE_BUS_DEVFN:
-		sys_info->value = adev->pdev->devfn;
-		break;
-	default:
-		return -ENODEV;
-	}
-
-	return 0;
-}
-
 static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
 					  struct cgs_display_info *info)
 {
@@ -922,12 +614,9 @@ static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
 		return -EINVAL;
 
 	mode_info = info->mode_info;
-	if (mode_info) {
+	if (mode_info)
 		/* if the displays are off, vblank time is max */
 		mode_info->vblank_time_us = 0xffffffff;
-		/* always set the reference clock */
-		mode_info->ref_clock = adev->clock.spll.reference_freq;
-	}
 
 	if (!amdgpu_device_has_dc_support(adev)) {
 		struct amdgpu_crtc *amdgpu_crtc;
@@ -953,6 +642,11 @@ static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
 								(amdgpu_crtc->v_border * 2);
 					mode_info->vblank_time_us = vblank_lines * line_time_us;
 					mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
+					/* we have issues with mclk switching with refresh rates
+					 * over 120 hz on the non-DC code.
+					 */
+					if (mode_info->refresh_rate > 120)
+						mode_info->vblank_time_us = 0;
 					mode_info = NULL;
 				}
 			}
@@ -977,223 +671,7 @@ static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool ena
 	return 0;
 }
 
-/** \brief evaluate acpi namespace object, handle or pathname must be valid
- *  \param cgs_device
- *  \param info input/output arguments for the control method
- *  \return status
- */
-
-#if defined(CONFIG_ACPI)
-static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
-				    struct cgs_acpi_method_info *info)
-{
-	CGS_FUNC_ADEV;
-	acpi_handle handle;
-	struct acpi_object_list input;
-	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
-	union acpi_object *params, *obj;
-	uint8_t name[5] = {'\0'};
-	struct cgs_acpi_method_argument *argument;
-	uint32_t i, count;
-	acpi_status status;
-	int result;
-
-	handle = ACPI_HANDLE(&adev->pdev->dev);
-	if (!handle)
-		return -ENODEV;
-
-	memset(&input, 0, sizeof(struct acpi_object_list));
-
-	/* validate input info */
-	if (info->size != sizeof(struct cgs_acpi_method_info))
-		return -EINVAL;
-
-	input.count = info->input_count;
-	if (info->input_count > 0) {
-		if (info->pinput_argument == NULL)
-			return -EINVAL;
-		argument = info->pinput_argument;
-		for (i = 0; i < info->input_count; i++) {
-			if (((argument->type == ACPI_TYPE_STRING) ||
-			     (argument->type == ACPI_TYPE_BUFFER)) &&
-			    (argument->pointer == NULL))
-				return -EINVAL;
-			argument++;
-		}
-	}
-
-	if (info->output_count > 0) {
-		if (info->poutput_argument == NULL)
-			return -EINVAL;
-		argument = info->poutput_argument;
-		for (i = 0; i < info->output_count; i++) {
-			if (((argument->type == ACPI_TYPE_STRING) ||
-				(argument->type == ACPI_TYPE_BUFFER))
-				&& (argument->pointer == NULL))
-				return -EINVAL;
-			argument++;
-		}
-	}
-
-	/* The path name passed to acpi_evaluate_object should be null terminated */
-	if ((info->field & CGS_ACPI_FIELD_METHOD_NAME) != 0) {
-		strncpy(name, (char *)&(info->name), sizeof(uint32_t));
-		name[4] = '\0';
-	}
-
-	/* parse input parameters */
-	if (input.count > 0) {
-		input.pointer = params =
-				kzalloc(sizeof(union acpi_object) * input.count, GFP_KERNEL);
-		if (params == NULL)
-			return -EINVAL;
-
-		argument = info->pinput_argument;
-
-		for (i = 0; i < input.count; i++) {
-			params->type = argument->type;
-			switch (params->type) {
-			case ACPI_TYPE_INTEGER:
-				params->integer.value = argument->value;
-				break;
-			case ACPI_TYPE_STRING:
-				params->string.length = argument->data_length;
-				params->string.pointer = argument->pointer;
-				break;
-			case ACPI_TYPE_BUFFER:
-				params->buffer.length = argument->data_length;
-				params->buffer.pointer = argument->pointer;
-				break;
-			default:
-				break;
-			}
-			params++;
-			argument++;
-		}
-	}
-
-	/* parse output info */
-	count = info->output_count;
-	argument = info->poutput_argument;
-
-	/* evaluate the acpi method */
-	status = acpi_evaluate_object(handle, name, &input, &output);
-
-	if (ACPI_FAILURE(status)) {
-		result = -EIO;
-		goto free_input;
-	}
-
-	/* return the output info */
-	obj = output.pointer;
-
-	if (count > 1) {
-		if ((obj->type != ACPI_TYPE_PACKAGE) ||
-			(obj->package.count != count)) {
-			result = -EIO;
-			goto free_obj;
-		}
-		params = obj->package.elements;
-	} else
-		params = obj;
-
-	if (params == NULL) {
-		result = -EIO;
-		goto free_obj;
-	}
-
-	for (i = 0; i < count; i++) {
-		if (argument->type != params->type) {
-			result = -EIO;
-			goto free_obj;
-		}
-		switch (params->type) {
-		case ACPI_TYPE_INTEGER:
-			argument->value = params->integer.value;
-			break;
-		case ACPI_TYPE_STRING:
-			if ((params->string.length != argument->data_length) ||
-				(params->string.pointer == NULL)) {
-				result = -EIO;
-				goto free_obj;
-			}
-			strncpy(argument->pointer,
-				params->string.pointer,
-				params->string.length);
-			break;
-		case ACPI_TYPE_BUFFER:
-			if (params->buffer.pointer == NULL) {
-				result = -EIO;
-				goto free_obj;
-			}
-			memcpy(argument->pointer,
-				params->buffer.pointer,
-				argument->data_length);
-			break;
-		default:
-			break;
-		}
-		argument++;
-		params++;
-	}
-
-	result = 0;
-free_obj:
-	kfree(obj);
-free_input:
-	kfree((void *)input.pointer);
-	return result;
-}
-#else
-static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
-				struct cgs_acpi_method_info *info)
-{
-	return -EIO;
-}
-#endif
-
-static int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device,
-					uint32_t acpi_method,
-					uint32_t acpi_function,
-					void *pinput, void *poutput,
-					uint32_t output_count,
-					uint32_t input_size,
-					uint32_t output_size)
-{
-	struct cgs_acpi_method_argument acpi_input[2] = { {0}, {0} };
-	struct cgs_acpi_method_argument acpi_output = {0};
-	struct cgs_acpi_method_info info = {0};
-
-	acpi_input[0].type = CGS_ACPI_TYPE_INTEGER;
-	acpi_input[0].data_length = sizeof(uint32_t);
-	acpi_input[0].value = acpi_function;
-
-	acpi_input[1].type = CGS_ACPI_TYPE_BUFFER;
-	acpi_input[1].data_length = input_size;
-	acpi_input[1].pointer = pinput;
-
-	acpi_output.type = CGS_ACPI_TYPE_BUFFER;
-	acpi_output.data_length = output_size;
-	acpi_output.pointer = poutput;
-
-	info.size = sizeof(struct cgs_acpi_method_info);
-	info.field = CGS_ACPI_FIELD_METHOD_NAME | CGS_ACPI_FIELD_INPUT_ARGUMENT_COUNT;
-	info.input_count = 2;
-	info.name = acpi_method;
-	info.pinput_argument = acpi_input;
-	info.output_count = output_count;
-	info.poutput_argument = &acpi_output;
-
-	return amdgpu_cgs_acpi_eval_object(cgs_device, &info);
-}
-
 static const struct cgs_ops amdgpu_cgs_ops = {
-	.alloc_gpu_mem = amdgpu_cgs_alloc_gpu_mem,
-	.free_gpu_mem = amdgpu_cgs_free_gpu_mem,
-	.gmap_gpu_mem = amdgpu_cgs_gmap_gpu_mem,
-	.gunmap_gpu_mem = amdgpu_cgs_gunmap_gpu_mem,
-	.kmap_gpu_mem = amdgpu_cgs_kmap_gpu_mem,
-	.kunmap_gpu_mem = amdgpu_cgs_kunmap_gpu_mem,
 	.read_register = amdgpu_cgs_read_register,
 	.write_register = amdgpu_cgs_write_register,
 	.read_ind_register = amdgpu_cgs_read_ind_register,
@@ -1208,18 +686,9 @@ static const struct cgs_ops amdgpu_cgs_ops = {
 	.set_clockgating_state = amdgpu_cgs_set_clockgating_state,
 	.get_active_displays_info = amdgpu_cgs_get_active_displays_info,
 	.notify_dpm_enabled = amdgpu_cgs_notify_dpm_enabled,
-	.call_acpi_method = amdgpu_cgs_call_acpi_method,
-	.query_system_info = amdgpu_cgs_query_system_info,
 	.is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled,
 	.enter_safe_mode = amdgpu_cgs_enter_safe_mode,
 	.lock_grbm_idx = amdgpu_cgs_lock_grbm_idx,
-	.register_pp_handle = amdgpu_cgs_register_pp_handle,
-};
-
-static const struct cgs_os_ops amdgpu_cgs_os_ops = {
-	.add_irq_source = amdgpu_cgs_add_irq_source,
-	.irq_get = amdgpu_cgs_irq_get,
-	.irq_put = amdgpu_cgs_irq_put
 };
 
 struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev)
@@ -1233,7 +702,6 @@ struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev)
 	}
 
 	cgs_device->base.ops = &amdgpu_cgs_ops;
-	cgs_device->base.os_ops = &amdgpu_cgs_os_ops;
 	cgs_device->adev = adev;
 
 	return (struct cgs_device *)cgs_device;

+ 6 - 4
drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c

@@ -878,7 +878,7 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
 		ret = connector_status_disconnected;
 
 	if (amdgpu_connector->ddc_bus)
-		dret = amdgpu_ddc_probe(amdgpu_connector, false);
+		dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
 	if (dret) {
 		amdgpu_connector->detected_by_load = false;
 		amdgpu_connector_free_edid(connector);
@@ -1003,7 +1003,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
 	}
 
 	if (amdgpu_connector->ddc_bus)
-		dret = amdgpu_ddc_probe(amdgpu_connector, false);
+		dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
 	if (dret) {
 		amdgpu_connector->detected_by_load = false;
 		amdgpu_connector_free_edid(connector);
@@ -1410,7 +1410,8 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
 			/* setup ddc on the bridge */
 			amdgpu_atombios_encoder_setup_ext_encoder_ddc(encoder);
 			/* bridge chips are always aux */
-			if (amdgpu_ddc_probe(amdgpu_connector, true)) /* try DDC */
+			/* try DDC */
+			if (amdgpu_display_ddc_probe(amdgpu_connector, true))
 				ret = connector_status_connected;
 			else if (amdgpu_connector->dac_load_detect) { /* try load detection */
 				const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
@@ -1430,7 +1431,8 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
 					ret = connector_status_connected;
 			} else {
 				/* try non-aux ddc (DP to DVI/HDMI/etc. adapter) */
-				if (amdgpu_ddc_probe(amdgpu_connector, false))
+				if (amdgpu_display_ddc_probe(amdgpu_connector,
+							     false))
 					ret = connector_status_connected;
 			}
 		}

+ 11 - 11
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

@@ -257,7 +257,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
 		return;
 	}
 
-	total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
+	total_vram = adev->gmc.real_vram_size - adev->vram_pin_size;
 	used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 	free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
 
@@ -302,8 +302,8 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
 	*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
 
 	/* Do the same for visible VRAM if half of it is free */
-	if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
-		u64 total_vis_vram = adev->mc.visible_vram_size;
+	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size) {
+		u64 total_vis_vram = adev->gmc.visible_vram_size;
 		u64 used_vis_vram =
 			amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 
@@ -346,8 +346,8 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 	struct ttm_operation_ctx ctx = {
 		.interruptible = true,
 		.no_wait_gpu = false,
-		.allow_reserved_eviction = false,
-		.resv = bo->tbo.resv
+		.resv = bo->tbo.resv,
+		.flags = 0
 	};
 	uint32_t domain;
 	int r;
@@ -359,7 +359,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 	 * to move it. Don't move anything if the threshold is zero.
 	 */
 	if (p->bytes_moved < p->bytes_moved_threshold) {
-		if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+		if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
 		    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
 			/* And don't move a CPU_ACCESS_REQUIRED BO to limited
 			 * visible VRAM if we've depleted our allowance to do
@@ -381,9 +381,9 @@ retry:
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 
 	p->bytes_moved += ctx.bytes_moved;
-	if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
 	    bo->tbo.mem.mem_type == TTM_PL_VRAM &&
-	    bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
+	    bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
 		p->bytes_moved_vis += ctx.bytes_moved;
 
 	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
@@ -437,9 +437,9 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
 		/* Good we can try to move this BO somewhere else */
 		amdgpu_ttm_placement_from_domain(bo, other);
 		update_bytes_moved_vis =
-			adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+			adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
 			bo->tbo.mem.mem_type == TTM_PL_VRAM &&
-			bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT;
+			bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT;
 		initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 		bytes_moved = atomic64_read(&adev->num_bytes_moved) -
@@ -542,7 +542,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	INIT_LIST_HEAD(&duplicates);
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
 
-	if (p->uf_entry.robj)
+	if (p->uf_entry.robj && !p->uf_entry.robj->parent)
 		list_add(&p->uf_entry.tv.head, &p->validated);
 
 	while (1) {

+ 12 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c

@@ -767,10 +767,21 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+
+	seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT));
+	return 0;
+}
+
 static const struct drm_info_list amdgpu_debugfs_list[] = {
 	{"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump},
 	{"amdgpu_test_ib", &amdgpu_debugfs_test_ib},
-	{"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram}
+	{"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram},
+	{"amdgpu_evict_gtt", &amdgpu_debugfs_evict_gtt},
 };
 
 int amdgpu_debugfs_init(struct amdgpu_device *adev)

+ 589 - 118
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

@@ -59,6 +59,7 @@
 #include "amdgpu_pm.h"
 
 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
 
 #define AMDGPU_RESUME_MS		2000
@@ -83,10 +84,21 @@ static const char *amdgpu_asic_name[] = {
 	"POLARIS11",
 	"POLARIS12",
 	"VEGA10",
+	"VEGA12",
 	"RAVEN",
 	"LAST",
 };
 
+static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
+
+/**
+ * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
+ *
+ * @dev: drm_device pointer
+ *
+ * Returns true if the device is a dGPU with HG/PX power control,
+ * otherwise return false.
+ */
 bool amdgpu_device_is_px(struct drm_device *dev)
 {
 	struct amdgpu_device *adev = dev->dev_private;
@@ -99,6 +111,15 @@ bool amdgpu_device_is_px(struct drm_device *dev)
 /*
  * MMIO register access helper functions.
  */
+/**
+ * amdgpu_mm_rreg - read a memory mapped IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @acc_flags: access flags which require special behavior
+ *
+ * Returns the 32 bit value from the offset specified.
+ */
 uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
 			uint32_t acc_flags)
 {
@@ -121,6 +142,58 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
 	return ret;
 }
 
+/*
+ * MMIO register read with bytes helper functions
+ * @offset:bytes offset from MMIO start
+ *
+*/
+
+/**
+ * amdgpu_mm_rreg8 - read a memory mapped IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @offset: byte aligned register offset
+ *
+ * Returns the 8 bit value from the offset specified.
+ */
+uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
+	if (offset < adev->rmmio_size)
+		return (readb(adev->rmmio + offset));
+	BUG();
+}
+
+/*
+ * MMIO register write with bytes helper functions
+ * @offset:bytes offset from MMIO start
+ * @value: the value want to be written to the register
+ *
+*/
+/**
+ * amdgpu_mm_wreg8 - read a memory mapped IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @offset: byte aligned register offset
+ * @value: 8 bit value to write
+ *
+ * Writes the value specified to the offset specified.
+ */
+void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
+	if (offset < adev->rmmio_size)
+		writeb(value, adev->rmmio + offset);
+	else
+		BUG();
+}
+
+/**
+ * amdgpu_mm_wreg - write to a memory mapped IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @v: 32 bit value to write to the register
+ * @acc_flags: access flags which require special behavior
+ *
+ * Writes the value specified to the offset specified.
+ */
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 		    uint32_t acc_flags)
 {
@@ -149,6 +222,14 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 	}
 }
 
+/**
+ * amdgpu_io_rreg - read an IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ *
+ * Returns the 32 bit value from the offset specified.
+ */
 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
 {
 	if ((reg * 4) < adev->rio_mem_size)
@@ -159,6 +240,15 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
 	}
 }
 
+/**
+ * amdgpu_io_wreg - write to an IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @v: 32 bit value to write to the register
+ *
+ * Writes the value specified to the offset specified.
+ */
 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 {
 	if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
@@ -327,6 +417,14 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
 	BUG();
 }
 
+/**
+ * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Allocates a scratch page of VRAM for use by various things in the
+ * driver.
+ */
 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
 {
 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
@@ -336,6 +434,13 @@ static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
 				       (void **)&adev->vram_scratch.ptr);
 }
 
+/**
+ * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Frees the VRAM scratch page.
+ */
 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
 {
 	amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
@@ -377,6 +482,14 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 	}
 }
 
+/**
+ * amdgpu_device_pci_config_reset - reset the GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Resets the GPU using the pci config reset sequence.
+ * Only applicable to asics prior to vega10.
+ */
 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
 {
 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
@@ -537,6 +650,7 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
 
 /**
  * amdgpu_device_vram_location - try to find VRAM location
+ *
  * @adev: amdgpu device structure holding all necessary informations
  * @mc: memory controller structure holding memory informations
  * @base: base address at which to put VRAM
@@ -545,7 +659,7 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
  * as parameter.
  */
 void amdgpu_device_vram_location(struct amdgpu_device *adev,
-				 struct amdgpu_mc *mc, u64 base)
+				 struct amdgpu_gmc *mc, u64 base)
 {
 	uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
 
@@ -560,6 +674,7 @@ void amdgpu_device_vram_location(struct amdgpu_device *adev,
 
 /**
  * amdgpu_device_gart_location - try to find GTT location
+ *
  * @adev: amdgpu device structure holding all necessary informations
  * @mc: memory controller structure holding memory informations
  *
@@ -571,11 +686,11 @@ void amdgpu_device_vram_location(struct amdgpu_device *adev,
  * FIXME: when reducing GTT size align new size on power of 2.
  */
 void amdgpu_device_gart_location(struct amdgpu_device *adev,
-				 struct amdgpu_mc *mc)
+				 struct amdgpu_gmc *mc)
 {
 	u64 size_af, size_bf;
 
-	size_af = adev->mc.mc_mask - mc->vram_end;
+	size_af = adev->gmc.mc_mask - mc->vram_end;
 	size_bf = mc->vram_start;
 	if (size_bf > size_af) {
 		if (mc->gart_size > size_bf) {
@@ -609,7 +724,7 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev,
  */
 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
 {
-	u64 space_needed = roundup_pow_of_two(adev->mc.real_vram_size);
+	u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
 	u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
 	struct pci_bus *root;
 	struct resource *res;
@@ -746,6 +861,16 @@ static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
 }
 
+/**
+ * amdgpu_device_check_block_size - validate the vm block size
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Validates the vm block size specified via module parameter.
+ * The vm block size defines number of bits in page table versus page directory,
+ * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
+ * page table and the remaining bits are in the page directory.
+ */
 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
 {
 	/* defines number of bits in page table versus page directory,
@@ -761,6 +886,14 @@ static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
 	}
 }
 
+/**
+ * amdgpu_device_check_vm_size - validate the vm size
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Validates the vm size in GB specified via module parameter.
+ * The VM size is the size of the GPU virtual memory space in GB.
+ */
 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
 {
 	/* no need to check the default value */
@@ -830,6 +963,8 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
 		dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
 		amdgpu_lockup_timeout = 10000;
 	}
+
+	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
 }
 
 /**
@@ -893,6 +1028,17 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
 	.can_switch = amdgpu_switcheroo_can_switch,
 };
 
+/**
+ * amdgpu_device_ip_set_clockgating_state - set the CG state
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ * @state: clockgating state (gate or ungate)
+ *
+ * Sets the requested clockgating state for all instances of
+ * the hardware IP specified.
+ * Returns the error code from the last instance.
+ */
 int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
 					   enum amd_ip_block_type block_type,
 					   enum amd_clockgating_state state)
@@ -915,6 +1061,17 @@ int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
 	return r;
 }
 
+/**
+ * amdgpu_device_ip_set_powergating_state - set the PG state
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ * @state: powergating state (gate or ungate)
+ *
+ * Sets the requested powergating state for all instances of
+ * the hardware IP specified.
+ * Returns the error code from the last instance.
+ */
 int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
 					   enum amd_ip_block_type block_type,
 					   enum amd_powergating_state state)
@@ -937,6 +1094,17 @@ int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
 	return r;
 }
 
+/**
+ * amdgpu_device_ip_get_clockgating_state - get the CG state
+ *
+ * @adev: amdgpu_device pointer
+ * @flags: clockgating feature flags
+ *
+ * Walks the list of IPs on the device and updates the clockgating
+ * flags for each IP.
+ * Updates @flags with the feature flags for each hardware IP where
+ * clockgating is enabled.
+ */
 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
 					    u32 *flags)
 {
@@ -950,6 +1118,15 @@ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
 	}
 }
 
+/**
+ * amdgpu_device_ip_wait_for_idle - wait for idle
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Waits for the request hardware IP to be idle.
+ * Returns 0 for success or a negative error code on failure.
+ */
 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
 				   enum amd_ip_block_type block_type)
 {
@@ -969,6 +1146,15 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
 
 }
 
+/**
+ * amdgpu_device_ip_is_idle - is the hardware IP idle
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Check if the hardware IP is idle or not.
+ * Returns true if it the IP is idle, false if not.
+ */
 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
 			      enum amd_ip_block_type block_type)
 {
@@ -984,6 +1170,15 @@ bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
 
 }
 
+/**
+ * amdgpu_device_ip_get_ip_block - get a hw IP pointer
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Returns a pointer to the hardware IP block structure
+ * if it exists for the asic, otherwise NULL.
+ */
 struct amdgpu_ip_block *
 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
 			      enum amd_ip_block_type type)
@@ -1037,7 +1232,7 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
 	if (!ip_block_version)
 		return -EINVAL;
 
-	DRM_DEBUG("add ip block number %d <%s>\n", adev->num_ip_blocks,
+	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
 		  ip_block_version->funcs->name);
 
 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
@@ -1045,6 +1240,18 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
 	return 0;
 }
 
+/**
+ * amdgpu_device_enable_virtual_display - enable virtual display feature
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Enabled the virtual display feature if the user has enabled it via
+ * the module parameter virtual_display.  This feature provides a virtual
+ * display hardware on headless boards or in virtualized environments.
+ * This function parses and validates the configuration string specified by
+ * the user and configues the virtual display configuration (number of
+ * virtual connectors, crtcs, etc.) specified.
+ */
 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
 {
 	adev->enable_virtual_display = false;
@@ -1090,6 +1297,16 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
 	}
 }
 
+/**
+ * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Parses the asic configuration parameters specified in the gpu info
+ * firmware and makes them availale to the driver for use in configuring
+ * the asic.
+ * Returns 0 on success, -EINVAL on failure.
+ */
 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 {
 	const char *chip_name;
@@ -1127,6 +1344,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 	case CHIP_VEGA10:
 		chip_name = "vega10";
 		break;
+	case CHIP_VEGA12:
+		chip_name = "vega12";
+		break;
 	case CHIP_RAVEN:
 		chip_name = "raven";
 		break;
@@ -1188,6 +1408,16 @@ out:
 	return err;
 }
 
+/**
+ * amdgpu_device_ip_early_init - run early init for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Early initialization pass for hardware IPs.  The hardware IPs that make
+ * up each asic are discovered each IP's early_init callback is run.  This
+ * is the first stage in initializing the asic.
+ * Returns 0 on success, negative error code on failure.
+ */
 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 {
 	int i, r;
@@ -1240,8 +1470,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 			return r;
 		break;
 #endif
-	case  CHIP_VEGA10:
-	case  CHIP_RAVEN:
+	case CHIP_VEGA10:
+	case CHIP_VEGA12:
+	case CHIP_RAVEN:
 		if (adev->asic_type == CHIP_RAVEN)
 			adev->family = AMDGPU_FAMILY_RV;
 		else
@@ -1297,6 +1528,17 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * amdgpu_device_ip_init - run init for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main initialization pass for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked and the sw_init and hw_init callbacks
+ * are run.  sw_init initializes the software state associated with each IP
+ * and hw_init initializes the hardware associated with each IP.
+ * Returns 0 on success, negative error code on failure.
+ */
 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 {
 	int i, r;
@@ -1311,6 +1553,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 			return r;
 		}
 		adev->ip_blocks[i].status.sw = true;
+
 		/* need to do gmc hw init early so we can allocate gpu mem */
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
 			r = amdgpu_device_vram_scratch_init(adev);
@@ -1344,8 +1587,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.sw)
 			continue;
-		/* gmc hw init is done early */
-		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC)
+		if (adev->ip_blocks[i].status.hw)
 			continue;
 		r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
 		if (r) {
@@ -1364,27 +1606,61 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
+ * this function before a GPU reset.  If the value is retained after a
+ * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
+ */
 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
 {
 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
 }
 
+/**
+ * amdgpu_device_check_vram_lost - check if vram is valid
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Checks the reset magic value written to the gart pointer in VRAM.
+ * The driver calls this after a GPU reset to see if the contents of
+ * VRAM is lost or now.
+ * returns true if vram is lost, false if not.
+ */
 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
 {
 	return !!memcmp(adev->gart.ptr, adev->reset_magic,
 			AMDGPU_RESET_MAGIC_NUM);
 }
 
+/**
+ * amdgpu_device_ip_late_set_cg_state - late init for clockgating
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Late initialization pass enabling clockgating for hardware IPs.
+ * The list of all the hardware IPs that make up the asic is walked and the
+ * set_clockgating_state callbacks are run.  This stage is run late
+ * in the init process.
+ * Returns 0 on success, negative error code on failure.
+ */
 static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
 {
 	int i = 0, r;
 
+	if (amdgpu_emu_mode == 1)
+		return 0;
+
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
 		/* skip CG for VCE/UVD, it's handled specially */
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
-		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) {
+		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
 			/* enable clockgating to save power */
 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
 										     AMD_CG_STATE_GATE);
@@ -1398,6 +1674,18 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * amdgpu_device_ip_late_init - run late init for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Late initialization pass for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked and the late_init callbacks are run.
+ * late_init covers any special initialization that an IP requires
+ * after all of the have been initialized or something that needs to happen
+ * late in the init process.
+ * Returns 0 on success, negative error code on failure.
+ */
 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
 {
 	int i = 0, r;
@@ -1424,6 +1712,17 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * amdgpu_device_ip_fini - run fini for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main teardown pass for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
+ * are run.  hw_fini tears down the hardware associated with each IP
+ * and sw_fini tears down any software state associated with each IP.
+ * Returns 0 on success, negative error code on failure.
+ */
 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 {
 	int i, r;
@@ -1433,7 +1732,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.hw)
 			continue;
-		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC &&
+			adev->ip_blocks[i].version->funcs->set_clockgating_state) {
 			/* ungate blocks before hw fini so that we can shutdown the blocks safely */
 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
 										     AMD_CG_STATE_UNGATE);
@@ -1458,7 +1758,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 			continue;
 
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
-			adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) {
+			adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+			adev->ip_blocks[i].version->funcs->set_clockgating_state) {
 			/* ungate blocks before hw fini so that we can shutdown the blocks safely */
 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
 										     AMD_CG_STATE_UNGATE);
@@ -1479,6 +1780,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 		adev->ip_blocks[i].status.hw = false;
 	}
 
+
 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
 		if (!adev->ip_blocks[i].status.sw)
 			continue;
@@ -1514,6 +1816,15 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * amdgpu_device_ip_late_init_func_handler - work handler for clockgating
+ *
+ * @work: work_struct
+ *
+ * Work handler for amdgpu_device_ip_late_set_cg_state.  We put the
+ * clockgating setup into a worker thread to speed up driver init and
+ * resume from suspend.
+ */
 static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
 {
 	struct amdgpu_device *adev =
@@ -1521,6 +1832,17 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
 	amdgpu_device_ip_late_set_cg_state(adev);
 }
 
+/**
+ * amdgpu_device_ip_suspend - run suspend for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main suspend function for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked, clockgating is disabled and the
+ * suspend callbacks are run.  suspend puts the hardware and software state
+ * in each IP into a state suitable for suspend.
+ * Returns 0 on success, negative error code on failure.
+ */
 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
 {
 	int i, r;
@@ -1539,7 +1861,8 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
 		/* ungate blocks so that suspend can properly shut them down */
-		if (i != AMD_IP_BLOCK_TYPE_SMC) {
+		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_SMC &&
+			adev->ip_blocks[i].version->funcs->set_clockgating_state) {
 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
 										     AMD_CG_STATE_UNGATE);
 			if (r) {
@@ -1585,6 +1908,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
 
 			r = block->version->funcs->hw_init(adev);
 			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
+			if (r)
+				return r;
 		}
 	}
 
@@ -1618,12 +1943,26 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
 
 			r = block->version->funcs->hw_init(adev);
 			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
+			if (r)
+				return r;
 		}
 	}
 
 	return 0;
 }
 
+/**
+ * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * First resume function for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked and the resume callbacks are run for
+ * COMMON, GMC, and IH.  resume puts the hardware into a functional state
+ * after a suspend and updates the software state as necessary.  This
+ * function is also used for restoring the GPU after a GPU reset.
+ * Returns 0 on success, negative error code on failure.
+ */
 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
 {
 	int i, r;
@@ -1632,9 +1971,8 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
-				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
-				adev->ip_blocks[i].version->type ==
-				AMD_IP_BLOCK_TYPE_IH) {
+		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
+		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
 			r = adev->ip_blocks[i].version->funcs->resume(adev);
 			if (r) {
 				DRM_ERROR("resume of IP block <%s> failed %d\n",
@@ -1647,6 +1985,19 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * First resume function for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked and the resume callbacks are run for
+ * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
+ * functional state after a suspend and updates the software state as
+ * necessary.  This function is also used for restoring the GPU after a GPU
+ * reset.
+ * Returns 0 on success, negative error code on failure.
+ */
 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
 {
 	int i, r;
@@ -1655,8 +2006,8 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
-				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
-				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH )
+		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
+		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)
 			continue;
 		r = adev->ip_blocks[i].version->funcs->resume(adev);
 		if (r) {
@@ -1669,6 +2020,18 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * amdgpu_device_ip_resume - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main resume function for hardware IPs.  The hardware IPs
+ * are split into two resume functions because they are
+ * are also used in in recovering from a GPU reset and some additional
+ * steps need to be take between them.  In this case (S3/S4) they are
+ * run sequentially.
+ * Returns 0 on success, negative error code on failure.
+ */
 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
 {
 	int r;
@@ -1681,6 +2044,13 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
 	return r;
 }
 
+/**
+ * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Query the VBIOS data tables to determine if the board supports SR-IOV.
+ */
 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
 {
 	if (amdgpu_sriov_vf(adev)) {
@@ -1697,6 +2067,14 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
 	}
 }
 
+/**
+ * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
+ *
+ * @asic_type: AMD asic type
+ *
+ * Check if there is DC (new modesetting infrastructre) support for an asic.
+ * returns true if DC has support, false if not.
+ */
 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
 {
 	switch (asic_type) {
@@ -1704,6 +2082,8 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
 	case CHIP_BONAIRE:
 	case CHIP_HAWAII:
 	case CHIP_KAVERI:
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
 	case CHIP_POLARIS11:
@@ -1714,10 +2094,8 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
 #if defined(CONFIG_DRM_AMD_DC_PRE_VEGA)
 		return amdgpu_dc != 0;
 #endif
-	case CHIP_KABINI:
-	case CHIP_MULLINS:
-		return amdgpu_dc > 0;
 	case CHIP_VEGA10:
+	case CHIP_VEGA12:
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
 	case CHIP_RAVEN:
 #endif
@@ -1771,14 +2149,16 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	adev->flags = flags;
 	adev->asic_type = flags & AMD_ASIC_MASK;
 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
-	adev->mc.gart_size = 512 * 1024 * 1024;
+	if (amdgpu_emu_mode == 1)
+		adev->usec_timeout *= 2;
+	adev->gmc.gart_size = 512 * 1024 * 1024;
 	adev->accel_working = false;
 	adev->num_rings = 0;
 	adev->mman.buffer_funcs = NULL;
 	adev->mman.buffer_funcs_ring = NULL;
 	adev->vm_manager.vm_pte_funcs = NULL;
 	adev->vm_manager.vm_pte_num_rings = 0;
-	adev->gart.gart_funcs = NULL;
+	adev->gmc.gmc_funcs = NULL;
 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
 
@@ -1867,6 +2247,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	if (adev->rio_mem == NULL)
 		DRM_INFO("PCI I/O BAR is not found.\n");
 
+	amdgpu_device_get_pcie_info(adev);
+
 	/* early init functions */
 	r = amdgpu_device_ip_early_init(adev);
 	if (r)
@@ -1885,6 +2267,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	if (runtime)
 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
 
+	if (amdgpu_emu_mode == 1) {
+		/* post the asic on emulation mode */
+		emu_soc_asic_init(adev);
+		goto fence_driver_init;
+	}
+
 	/* Read BIOS */
 	if (!amdgpu_get_bios(adev)) {
 		r = -EINVAL;
@@ -1937,6 +2325,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 			amdgpu_atombios_i2c_init(adev);
 	}
 
+fence_driver_init:
 	/* Fence driver */
 	r = amdgpu_fence_driver_init(adev);
 	if (r) {
@@ -1964,7 +2353,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		}
 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
-		amdgpu_device_ip_fini(adev);
 		goto failed;
 	}
 
@@ -2063,6 +2451,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 
 	DRM_INFO("amdgpu: finishing device.\n");
 	adev->shutdown = true;
+	/* disable all interrupts */
+	amdgpu_irq_disable_all(adev);
 	if (adev->mode_info.mode_config_initialized){
 		if (!amdgpu_device_has_dc_support(adev))
 			drm_crtc_force_disable_all(adev->ddev);
@@ -2071,6 +2461,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	}
 	amdgpu_ib_pool_fini(adev);
 	amdgpu_fence_driver_fini(adev);
+	amdgpu_pm_sysfs_fini(adev);
 	amdgpu_fbdev_fini(adev);
 	r = amdgpu_device_ip_fini(adev);
 	if (adev->firmware.gpu_info_fw) {
@@ -2082,7 +2473,10 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	/* free i2c buses */
 	if (!amdgpu_device_has_dc_support(adev))
 		amdgpu_i2c_fini(adev);
-	amdgpu_atombios_fini(adev);
+
+	if (amdgpu_emu_mode != 1)
+		amdgpu_atombios_fini(adev);
+
 	kfree(adev->bios);
 	adev->bios = NULL;
 	if (!pci_is_thunderbolt_attached(adev->pdev))
@@ -2096,7 +2490,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	iounmap(adev->rmmio);
 	adev->rmmio = NULL;
 	amdgpu_device_doorbell_fini(adev);
-	amdgpu_pm_sysfs_fini(adev);
 	amdgpu_debugfs_regs_cleanup(adev);
 }
 
@@ -2325,6 +2718,16 @@ unlock:
 	return r;
 }
 
+/**
+ * amdgpu_device_ip_check_soft_reset - did soft reset succeed
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * The list of all the hardware IPs that make up the asic is walked and
+ * the check_soft_reset callbacks are run.  check_soft_reset determines
+ * if the asic is still hung or not.
+ * Returns true if any of the IPs are still in a hung state, false if not.
+ */
 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
 {
 	int i;
@@ -2347,6 +2750,17 @@ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
 	return asic_hang;
 }
 
+/**
+ * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * The list of all the hardware IPs that make up the asic is walked and the
+ * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
+ * handles any IP specific hardware or software state changes that are
+ * necessary for a soft reset to succeed.
+ * Returns 0 on success, negative error code on failure.
+ */
 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
 {
 	int i, r = 0;
@@ -2365,6 +2779,15 @@ static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
+ * reset is necessary to recover.
+ * Returns true if a full asic reset is required, false if not.
+ */
 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
 {
 	int i;
@@ -2386,6 +2809,17 @@ static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
 	return false;
 }
 
+/**
+ * amdgpu_device_ip_soft_reset - do a soft reset
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * The list of all the hardware IPs that make up the asic is walked and the
+ * soft_reset callbacks are run if the block is hung.  soft_reset handles any
+ * IP specific hardware or software state changes that are necessary to soft
+ * reset the IP.
+ * Returns 0 on success, negative error code on failure.
+ */
 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
 {
 	int i, r = 0;
@@ -2404,6 +2838,17 @@ static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * amdgpu_device_ip_post_soft_reset - clean up from soft reset
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * The list of all the hardware IPs that make up the asic is walked and the
+ * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
+ * handles any IP specific hardware or software state changes that are
+ * necessary after the IP has been soft reset.
+ * Returns 0 on success, negative error code on failure.
+ */
 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
 {
 	int i, r = 0;
@@ -2421,6 +2866,19 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * amdgpu_device_recover_vram_from_shadow - restore shadowed VRAM buffers
+ *
+ * @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring for the engine handling the buffer operations
+ * @bo: amdgpu_bo buffer whose shadow is being restored
+ * @fence: dma_fence associated with the operation
+ *
+ * Restores the VRAM buffer contents from the shadow in GTT.  Used to
+ * restore things like GPUVM page tables after a GPU reset where
+ * the contents of VRAM might be lost.
+ * Returns 0 on success, negative error code on failure.
+ */
 static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
 						  struct amdgpu_ring *ring,
 						  struct amdgpu_bo *bo,
@@ -2456,17 +2914,81 @@ err:
 	return r;
 }
 
-/*
+/**
+ * amdgpu_device_handle_vram_lost - Handle the loss of VRAM contents
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
+ * restore things like GPUVM page tables after a GPU reset where
+ * the contents of VRAM might be lost.
+ * Returns 0 on success, 1 on failure.
+ */
+static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+	struct amdgpu_bo *bo, *tmp;
+	struct dma_fence *fence = NULL, *next = NULL;
+	long r = 1;
+	int i = 0;
+	long tmo;
+
+	if (amdgpu_sriov_runtime(adev))
+		tmo = msecs_to_jiffies(amdgpu_lockup_timeout);
+	else
+		tmo = msecs_to_jiffies(100);
+
+	DRM_INFO("recover vram bo from shadow start\n");
+	mutex_lock(&adev->shadow_list_lock);
+	list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
+		next = NULL;
+		amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
+		if (fence) {
+			r = dma_fence_wait_timeout(fence, false, tmo);
+			if (r == 0)
+				pr_err("wait fence %p[%d] timeout\n", fence, i);
+			else if (r < 0)
+				pr_err("wait fence %p[%d] interrupted\n", fence, i);
+			if (r < 1) {
+				dma_fence_put(fence);
+				fence = next;
+				break;
+			}
+			i++;
+		}
+
+		dma_fence_put(fence);
+		fence = next;
+	}
+	mutex_unlock(&adev->shadow_list_lock);
+
+	if (fence) {
+		r = dma_fence_wait_timeout(fence, false, tmo);
+		if (r == 0)
+			pr_err("wait fence %p[%d] timeout\n", fence, i);
+		else if (r < 0)
+			pr_err("wait fence %p[%d] interrupted\n", fence, i);
+
+	}
+	dma_fence_put(fence);
+
+	if (r > 0)
+		DRM_INFO("recover vram bo from shadow done\n");
+	else
+		DRM_ERROR("recover vram bo from shadow failed\n");
+
+	return (r > 0) ? 0 : 1;
+}
+
+/**
  * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough
  *
  * @adev: amdgpu device pointer
- * @reset_flags: output param tells caller the reset result
  *
  * attempt to do soft-reset or full-reset and reinitialize Asic
  * return 0 means successed otherwise failed
-*/
-static int amdgpu_device_reset(struct amdgpu_device *adev,
-			       uint64_t* reset_flags)
+ */
+static int amdgpu_device_reset(struct amdgpu_device *adev)
 {
 	bool need_full_reset, vram_lost = 0;
 	int r;
@@ -2481,7 +3003,6 @@ static int amdgpu_device_reset(struct amdgpu_device *adev,
 			DRM_INFO("soft reset failed, will fallback to full reset!\n");
 			need_full_reset = true;
 		}
-
 	}
 
 	if (need_full_reset) {
@@ -2530,28 +3051,21 @@ out:
 		}
 	}
 
-	if (reset_flags) {
-		if (vram_lost)
-			(*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST;
-
-		if (need_full_reset)
-			(*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET;
-	}
+	if (!r && ((need_full_reset && !(adev->flags & AMD_IS_APU)) || vram_lost))
+		r = amdgpu_device_handle_vram_lost(adev);
 
 	return r;
 }
 
-/*
+/**
  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
  *
  * @adev: amdgpu device pointer
- * @reset_flags: output param tells caller the reset result
  *
  * do VF FLR and reinitialize Asic
  * return 0 means successed otherwise failed
-*/
+ */
 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
-				     uint64_t *reset_flags,
 				     bool from_hypervisor)
 {
 	int r;
@@ -2573,28 +3087,20 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
 
 	/* now we are okay to resume SMC/CP/SDMA */
 	r = amdgpu_device_ip_reinit_late_sriov(adev);
+	amdgpu_virt_release_full_gpu(adev, true);
 	if (r)
 		goto error;
 
 	amdgpu_irq_gpu_reset_resume_helper(adev);
 	r = amdgpu_ib_ring_tests(adev);
-	if (r)
-		dev_err(adev->dev, "[GPU_RESET] ib ring test failed (%d).\n", r);
 
-error:
-	/* release full control of GPU after ib test */
-	amdgpu_virt_release_full_gpu(adev, true);
-
-	if (reset_flags) {
-		if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
-			(*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST;
-			atomic_inc(&adev->vram_lost_counter);
-		}
-
-		/* VF FLR or hotlink reset is always full-reset */
-		(*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET;
+	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
+		atomic_inc(&adev->vram_lost_counter);
+		r = amdgpu_device_handle_vram_lost(adev);
 	}
 
+error:
+
 	return r;
 }
 
@@ -2612,7 +3118,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 			      struct amdgpu_job *job, bool force)
 {
 	struct drm_atomic_state *state = NULL;
-	uint64_t reset_flags = 0;
 	int i, r, resched;
 
 	if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
@@ -2634,22 +3139,23 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
+
 	/* store modesetting */
 	if (amdgpu_device_has_dc_support(adev))
 		state = drm_atomic_helper_suspend(adev->ddev);
 
-	/* block scheduler */
+	/* block all schedulers and reset given job's ring */
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
 
 		if (!ring || !ring->sched.thread)
 			continue;
 
-		/* only focus on the ring hit timeout if &job not NULL */
+		kthread_park(ring->sched.thread);
+
 		if (job && job->ring->idx != i)
 			continue;
 
-		kthread_park(ring->sched.thread);
 		drm_sched_hw_job_reset(&ring->sched, &job->base);
 
 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
@@ -2657,68 +3163,24 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	}
 
 	if (amdgpu_sriov_vf(adev))
-		r = amdgpu_device_reset_sriov(adev, &reset_flags, job ? false : true);
+		r = amdgpu_device_reset_sriov(adev, job ? false : true);
 	else
-		r = amdgpu_device_reset(adev, &reset_flags);
-
-	if (!r) {
-		if (((reset_flags & AMDGPU_RESET_INFO_FULLRESET) && !(adev->flags & AMD_IS_APU)) ||
-			(reset_flags & AMDGPU_RESET_INFO_VRAM_LOST)) {
-			struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
-			struct amdgpu_bo *bo, *tmp;
-			struct dma_fence *fence = NULL, *next = NULL;
-
-			DRM_INFO("recover vram bo from shadow\n");
-			mutex_lock(&adev->shadow_list_lock);
-			list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
-				next = NULL;
-				amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
-				if (fence) {
-					r = dma_fence_wait(fence, false);
-					if (r) {
-						WARN(r, "recovery from shadow isn't completed\n");
-						break;
-					}
-				}
-
-				dma_fence_put(fence);
-				fence = next;
-			}
-			mutex_unlock(&adev->shadow_list_lock);
-			if (fence) {
-				r = dma_fence_wait(fence, false);
-				if (r)
-					WARN(r, "recovery from shadow isn't completed\n");
-			}
-			dma_fence_put(fence);
-		}
-
-		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-			struct amdgpu_ring *ring = adev->rings[i];
+		r = amdgpu_device_reset(adev);
 
-			if (!ring || !ring->sched.thread)
-				continue;
+	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+		struct amdgpu_ring *ring = adev->rings[i];
 
-			/* only focus on the ring hit timeout if &job not NULL */
-			if (job && job->ring->idx != i)
-				continue;
+		if (!ring || !ring->sched.thread)
+			continue;
 
+		/* only need recovery sched of the given job's ring
+		 * or all rings (in the case @job is NULL)
+		 * after above amdgpu_reset accomplished
+		 */
+		if ((!job || job->ring->idx == i) && !r)
 			drm_sched_job_recovery(&ring->sched);
-			kthread_unpark(ring->sched.thread);
-		}
-	} else {
-		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-			struct amdgpu_ring *ring = adev->rings[i];
 
-			if (!ring || !ring->sched.thread)
-				continue;
-
-			/* only focus on the ring hit timeout if &job not NULL */
-			if (job && job->ring->idx != i)
-				continue;
-
-			kthread_unpark(adev->rings[i]->sched.thread);
-		}
+		kthread_unpark(ring->sched.thread);
 	}
 
 	if (amdgpu_device_has_dc_support(adev)) {
@@ -2744,7 +3206,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	return r;
 }
 
-void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
+/**
+ * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Fetchs and stores in the driver the PCIE capabilities (gen speed
+ * and lanes) of the slot the device is in. Handles APUs and
+ * virtualized environments where PCIE config space may not be available.
+ */
+static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
 {
 	u32 mask;
 	int ret;

+ 72 - 55
drivers/gpu/drm/amd/amdgpu/amdgpu_display.c

@@ -29,6 +29,7 @@
 #include "amdgpu_i2c.h"
 #include "atom.h"
 #include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
 #include <asm/div64.h>
 
 #include <linux/pm_runtime.h>
@@ -36,7 +37,8 @@
 #include <drm/drm_edid.h>
 #include <drm/drm_fb_helper.h>
 
-static void amdgpu_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb)
+static void amdgpu_display_flip_callback(struct dma_fence *f,
+					 struct dma_fence_cb *cb)
 {
 	struct amdgpu_flip_work *work =
 		container_of(cb, struct amdgpu_flip_work, cb);
@@ -45,8 +47,8 @@ static void amdgpu_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb)
 	schedule_work(&work->flip_work.work);
 }
 
-static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work,
-				     struct dma_fence **f)
+static bool amdgpu_display_flip_handle_fence(struct amdgpu_flip_work *work,
+					     struct dma_fence **f)
 {
 	struct dma_fence *fence= *f;
 
@@ -55,14 +57,15 @@ static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work,
 
 	*f = NULL;
 
-	if (!dma_fence_add_callback(fence, &work->cb, amdgpu_flip_callback))
+	if (!dma_fence_add_callback(fence, &work->cb,
+				    amdgpu_display_flip_callback))
 		return true;
 
 	dma_fence_put(fence);
 	return false;
 }
 
-static void amdgpu_flip_work_func(struct work_struct *__work)
+static void amdgpu_display_flip_work_func(struct work_struct *__work)
 {
 	struct delayed_work *delayed_work =
 		container_of(__work, struct delayed_work, work);
@@ -76,20 +79,20 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
 	unsigned i;
 	int vpos, hpos;
 
-	if (amdgpu_flip_handle_fence(work, &work->excl))
+	if (amdgpu_display_flip_handle_fence(work, &work->excl))
 		return;
 
 	for (i = 0; i < work->shared_count; ++i)
-		if (amdgpu_flip_handle_fence(work, &work->shared[i]))
+		if (amdgpu_display_flip_handle_fence(work, &work->shared[i]))
 			return;
 
 	/* Wait until we're out of the vertical blank period before the one
 	 * targeted by the flip
 	 */
 	if (amdgpu_crtc->enabled &&
-	    (amdgpu_get_crtc_scanoutpos(adev->ddev, work->crtc_id, 0,
-					&vpos, &hpos, NULL, NULL,
-					&crtc->hwmode)
+	    (amdgpu_display_get_crtc_scanoutpos(adev->ddev, work->crtc_id, 0,
+						&vpos, &hpos, NULL, NULL,
+						&crtc->hwmode)
 	     & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) ==
 	    (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) &&
 	    (int)(work->target_vblank -
@@ -117,7 +120,7 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
 /*
  * Handle unpin events outside the interrupt handler proper.
  */
-static void amdgpu_unpin_work_func(struct work_struct *__work)
+static void amdgpu_display_unpin_work_func(struct work_struct *__work)
 {
 	struct amdgpu_flip_work *work =
 		container_of(__work, struct amdgpu_flip_work, unpin_work);
@@ -139,11 +142,11 @@ static void amdgpu_unpin_work_func(struct work_struct *__work)
 	kfree(work);
 }
 
-int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
-				 struct drm_framebuffer *fb,
-				 struct drm_pending_vblank_event *event,
-				 uint32_t page_flip_flags, uint32_t target,
-				 struct drm_modeset_acquire_ctx *ctx)
+int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
+				struct drm_framebuffer *fb,
+				struct drm_pending_vblank_event *event,
+				uint32_t page_flip_flags, uint32_t target,
+				struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_device *dev = crtc->dev;
 	struct amdgpu_device *adev = dev->dev_private;
@@ -162,8 +165,8 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
 	if (work == NULL)
 		return -ENOMEM;
 
-	INIT_DELAYED_WORK(&work->flip_work, amdgpu_flip_work_func);
-	INIT_WORK(&work->unpin_work, amdgpu_unpin_work_func);
+	INIT_DELAYED_WORK(&work->flip_work, amdgpu_display_flip_work_func);
+	INIT_WORK(&work->unpin_work, amdgpu_display_unpin_work_func);
 
 	work->event = event;
 	work->adev = adev;
@@ -189,7 +192,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
 		goto cleanup;
 	}
 
-	r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_VRAM, &base);
+	r = amdgpu_bo_pin(new_abo, amdgpu_display_framebuffer_domains(adev), &base);
 	if (unlikely(r != 0)) {
 		DRM_ERROR("failed to pin new abo buffer before flip\n");
 		goto unreserve;
@@ -207,7 +210,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
 	amdgpu_bo_unreserve(new_abo);
 
 	work->base = base;
-	work->target_vblank = target - drm_crtc_vblank_count(crtc) +
+	work->target_vblank = target - (uint32_t)drm_crtc_vblank_count(crtc) +
 		amdgpu_get_vblank_counter_kms(dev, work->crtc_id);
 
 	/* we borrow the event spin lock for protecting flip_wrok */
@@ -228,7 +231,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
 	/* update crtc fb */
 	crtc->primary->fb = fb;
 	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
-	amdgpu_flip_work_func(&work->flip_work.work);
+	amdgpu_display_flip_work_func(&work->flip_work.work);
 	return 0;
 
 pflip_cleanup:
@@ -254,8 +257,8 @@ cleanup:
 	return r;
 }
 
-int amdgpu_crtc_set_config(struct drm_mode_set *set,
-			   struct drm_modeset_acquire_ctx *ctx)
+int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
+				   struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_device *dev;
 	struct amdgpu_device *adev;
@@ -352,7 +355,7 @@ static const char *hpd_names[6] = {
 	"HPD6",
 };
 
-void amdgpu_print_display_setup(struct drm_device *dev)
+void amdgpu_display_print_display_setup(struct drm_device *dev)
 {
 	struct drm_connector *connector;
 	struct amdgpu_connector *amdgpu_connector;
@@ -429,11 +432,11 @@ void amdgpu_print_display_setup(struct drm_device *dev)
 }
 
 /**
- * amdgpu_ddc_probe
+ * amdgpu_display_ddc_probe
  *
  */
-bool amdgpu_ddc_probe(struct amdgpu_connector *amdgpu_connector,
-		       bool use_aux)
+bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
+			      bool use_aux)
 {
 	u8 out = 0x0;
 	u8 buf[8];
@@ -479,7 +482,7 @@ bool amdgpu_ddc_probe(struct amdgpu_connector *amdgpu_connector,
 	return true;
 }
 
-static void amdgpu_user_framebuffer_destroy(struct drm_framebuffer *fb)
+static void amdgpu_display_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
 
@@ -488,9 +491,10 @@ static void amdgpu_user_framebuffer_destroy(struct drm_framebuffer *fb)
 	kfree(amdgpu_fb);
 }
 
-static int amdgpu_user_framebuffer_create_handle(struct drm_framebuffer *fb,
-						  struct drm_file *file_priv,
-						  unsigned int *handle)
+static int amdgpu_display_user_framebuffer_create_handle(
+			struct drm_framebuffer *fb,
+			struct drm_file *file_priv,
+			unsigned int *handle)
 {
 	struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
 
@@ -498,15 +502,28 @@ static int amdgpu_user_framebuffer_create_handle(struct drm_framebuffer *fb,
 }
 
 static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
-	.destroy = amdgpu_user_framebuffer_destroy,
-	.create_handle = amdgpu_user_framebuffer_create_handle,
+	.destroy = amdgpu_display_user_framebuffer_destroy,
+	.create_handle = amdgpu_display_user_framebuffer_create_handle,
 };
 
-int
-amdgpu_framebuffer_init(struct drm_device *dev,
-			struct amdgpu_framebuffer *rfb,
-			const struct drm_mode_fb_cmd2 *mode_cmd,
-			struct drm_gem_object *obj)
+uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev)
+{
+	uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
+
+#if defined(CONFIG_DRM_AMD_DC)
+	if (adev->asic_type >= CHIP_CARRIZO && adev->asic_type < CHIP_RAVEN &&
+	    adev->flags & AMD_IS_APU &&
+	    amdgpu_device_asic_has_dc_support(adev->asic_type))
+		domain |= AMDGPU_GEM_DOMAIN_GTT;
+#endif
+
+	return domain;
+}
+
+int amdgpu_display_framebuffer_init(struct drm_device *dev,
+				    struct amdgpu_framebuffer *rfb,
+				    const struct drm_mode_fb_cmd2 *mode_cmd,
+				    struct drm_gem_object *obj)
 {
 	int ret;
 	rfb->obj = obj;
@@ -520,9 +537,9 @@ amdgpu_framebuffer_init(struct drm_device *dev,
 }
 
 struct drm_framebuffer *
-amdgpu_user_framebuffer_create(struct drm_device *dev,
-			       struct drm_file *file_priv,
-			       const struct drm_mode_fb_cmd2 *mode_cmd)
+amdgpu_display_user_framebuffer_create(struct drm_device *dev,
+				       struct drm_file *file_priv,
+				       const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct drm_gem_object *obj;
 	struct amdgpu_framebuffer *amdgpu_fb;
@@ -547,7 +564,7 @@ amdgpu_user_framebuffer_create(struct drm_device *dev,
 		return ERR_PTR(-ENOMEM);
 	}
 
-	ret = amdgpu_framebuffer_init(dev, amdgpu_fb, mode_cmd, obj);
+	ret = amdgpu_display_framebuffer_init(dev, amdgpu_fb, mode_cmd, obj);
 	if (ret) {
 		kfree(amdgpu_fb);
 		drm_gem_object_put_unlocked(obj);
@@ -558,7 +575,7 @@ amdgpu_user_framebuffer_create(struct drm_device *dev,
 }
 
 const struct drm_mode_config_funcs amdgpu_mode_funcs = {
-	.fb_create = amdgpu_user_framebuffer_create,
+	.fb_create = amdgpu_display_user_framebuffer_create,
 	.output_poll_changed = drm_fb_helper_output_poll_changed,
 };
 
@@ -580,7 +597,7 @@ static const struct drm_prop_enum_list amdgpu_dither_enum_list[] =
 	{ AMDGPU_FMT_DITHER_ENABLE, "on" },
 };
 
-int amdgpu_modeset_create_props(struct amdgpu_device *adev)
+int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
 {
 	int sz;
 
@@ -629,7 +646,7 @@ int amdgpu_modeset_create_props(struct amdgpu_device *adev)
 	return 0;
 }
 
-void amdgpu_update_display_priority(struct amdgpu_device *adev)
+void amdgpu_display_update_priority(struct amdgpu_device *adev)
 {
 	/* adjustment options for the display watermarks */
 	if ((amdgpu_disp_priority == 0) || (amdgpu_disp_priority > 2))
@@ -639,7 +656,7 @@ void amdgpu_update_display_priority(struct amdgpu_device *adev)
 
 }
 
-static bool is_hdtv_mode(const struct drm_display_mode *mode)
+static bool amdgpu_display_is_hdtv_mode(const struct drm_display_mode *mode)
 {
 	/* try and guess if this is a tv or a monitor */
 	if ((mode->vdisplay == 480 && mode->hdisplay == 720) || /* 480p */
@@ -651,9 +668,9 @@ static bool is_hdtv_mode(const struct drm_display_mode *mode)
 		return false;
 }
 
-bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
-				    const struct drm_display_mode *mode,
-				    struct drm_display_mode *adjusted_mode)
+bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
+					const struct drm_display_mode *mode,
+					struct drm_display_mode *adjusted_mode)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_encoder *encoder;
@@ -696,7 +713,7 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
 		    ((amdgpu_encoder->underscan_type == UNDERSCAN_ON) ||
 		     ((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) &&
 		      drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) &&
-		      is_hdtv_mode(mode)))) {
+		      amdgpu_display_is_hdtv_mode(mode)))) {
 			if (amdgpu_encoder->underscan_hborder != 0)
 				amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder;
 			else
@@ -764,10 +781,10 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
  * unknown small number of scanlines wrt. real scanout position.
  *
  */
-int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
-			       unsigned int flags, int *vpos, int *hpos,
-			       ktime_t *stime, ktime_t *etime,
-			       const struct drm_display_mode *mode)
+int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
+			unsigned int pipe, unsigned int flags, int *vpos,
+			int *hpos, ktime_t *stime, ktime_t *etime,
+			const struct drm_display_mode *mode)
 {
 	u32 vbl = 0, position = 0;
 	int vbl_start, vbl_end, vtotal, ret = 0;
@@ -859,7 +876,7 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 	return ret;
 }
 
-int amdgpu_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc)
+int amdgpu_display_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc)
 {
 	if (crtc < 0 || crtc >= adev->mode_info.num_crtc)
 		return AMDGPU_CRTC_IRQ_NONE;

+ 4 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_display.h

@@ -23,9 +23,10 @@
 #ifndef __AMDGPU_DISPLAY_H__
 #define __AMDGPU_DISPLAY_H__
 
+uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev);
 struct drm_framebuffer *
-amdgpu_user_framebuffer_create(struct drm_device *dev,
-			       struct drm_file *file_priv,
-			       const struct drm_mode_fb_cmd2 *mode_cmd);
+amdgpu_display_user_framebuffer_create(struct drm_device *dev,
+				       struct drm_file *file_priv,
+				       const struct drm_mode_fb_cmd2 *mode_cmd);
 
 #endif

+ 20 - 15
drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h

@@ -265,9 +265,6 @@ enum amdgpu_pcie_gen {
 #define amdgpu_dpm_read_sensor(adev, idx, value, size) \
 		((adev)->powerplay.pp_funcs->read_sensor((adev)->powerplay.pp_handle, (idx), (value), (size)))
 
-#define amdgpu_dpm_get_temperature(adev) \
-		((adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle))
-
 #define amdgpu_dpm_set_fan_control_mode(adev, m) \
 		((adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m)))
 
@@ -328,8 +325,8 @@ enum amdgpu_pcie_gen {
 #define amdgpu_dpm_set_mclk_od(adev, value) \
 		((adev)->powerplay.pp_funcs->set_mclk_od((adev)->powerplay.pp_handle, value))
 
-#define amdgpu_dpm_dispatch_task(adev, task_id, input, output)		\
-		((adev)->powerplay.pp_funcs->dispatch_tasks)((adev)->powerplay.pp_handle, (task_id), (input), (output))
+#define amdgpu_dpm_dispatch_task(adev, task_id, user_state)		\
+		((adev)->powerplay.pp_funcs->dispatch_tasks)((adev)->powerplay.pp_handle, (task_id), (user_state))
 
 #define amdgpu_dpm_check_state_equal(adev, cps, rps, equal) \
 		((adev)->powerplay.pp_funcs->check_state_equal((adev)->powerplay.pp_handle, (cps), (rps), (equal)))
@@ -344,17 +341,9 @@ enum amdgpu_pcie_gen {
 		((adev)->powerplay.pp_funcs->reset_power_profile_state(\
 			(adev)->powerplay.pp_handle, request))
 
-#define amdgpu_dpm_get_power_profile_state(adev, query) \
-		((adev)->powerplay.pp_funcs->get_power_profile_state(\
-			(adev)->powerplay.pp_handle, query))
-
-#define amdgpu_dpm_set_power_profile_state(adev, request) \
-		((adev)->powerplay.pp_funcs->set_power_profile_state(\
-			(adev)->powerplay.pp_handle, request))
-
-#define amdgpu_dpm_switch_power_profile(adev, type) \
+#define amdgpu_dpm_switch_power_profile(adev, type, en) \
 		((adev)->powerplay.pp_funcs->switch_power_profile(\
-			(adev)->powerplay.pp_handle, type))
+			(adev)->powerplay.pp_handle, type, en))
 
 #define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \
 		((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
@@ -366,6 +355,22 @@ enum amdgpu_pcie_gen {
 			(adev)->powerplay.pp_handle, virtual_addr_low, \
 			virtual_addr_hi, mc_addr_low, mc_addr_hi, size)
 
+#define amdgpu_dpm_get_power_profile_mode(adev, buf) \
+		((adev)->powerplay.pp_funcs->get_power_profile_mode(\
+			(adev)->powerplay.pp_handle, buf))
+
+#define amdgpu_dpm_set_power_profile_mode(adev, parameter, size) \
+		((adev)->powerplay.pp_funcs->set_power_profile_mode(\
+			(adev)->powerplay.pp_handle, parameter, size))
+
+#define amdgpu_dpm_odn_edit_dpm_table(adev, type, parameter, size) \
+		((adev)->powerplay.pp_funcs->odn_edit_dpm_table(\
+			(adev)->powerplay.pp_handle, type, parameter, size))
+
+#define amdgpu_dpm_set_mmhub_powergating_by_smu(adev) \
+		((adev)->powerplay.pp_funcs->set_mmhub_powergating_by_smu( \
+		(adev)->powerplay.pp_handle))
+
 struct amdgpu_dpm {
 	struct amdgpu_ps        *ps;
 	/* number of valid power states */

+ 30 - 13
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

@@ -73,9 +73,11 @@
  * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
  * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl
  * - 3.23.0 - Add query for VRAM lost counter
+ * - 3.24.0 - Add high priority compute support for gfx9
+ * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	23
+#define KMS_DRIVER_MINOR	25
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
@@ -119,7 +121,7 @@ uint amdgpu_pg_mask = 0xffffffff;
 uint amdgpu_sdma_phase_quantum = 32;
 char *amdgpu_disable_cu = NULL;
 char *amdgpu_virtual_display = NULL;
-uint amdgpu_pp_feature_mask = 0xffffffff;
+uint amdgpu_pp_feature_mask = 0xffffbfff;
 int amdgpu_ngg = 0;
 int amdgpu_prim_buf_per_se = 0;
 int amdgpu_pos_buf_per_se = 0;
@@ -129,6 +131,7 @@ int amdgpu_job_hang_limit = 0;
 int amdgpu_lbpw = -1;
 int amdgpu_compute_multipipe = -1;
 int amdgpu_gpu_recovery = -1; /* auto */
+int amdgpu_emu_mode = 0;
 
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -281,9 +284,12 @@ module_param_named(lbpw, amdgpu_lbpw, int, 0444);
 MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
 
-MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto");
+MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
 
+MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)");
+module_param_named(emu_mode, amdgpu_emu_mode, int, 0444);
+
 #ifdef CONFIG_DRM_AMDGPU_SI
 
 #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
@@ -538,6 +544,12 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
 	{0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
 	{0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	/* Vega 12 */
+	{0x1002, 0x69A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+	{0x1002, 0x69A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+	{0x1002, 0x69A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+	{0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+	{0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
 	/* Raven */
 	{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
 
@@ -576,6 +588,11 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 	struct drm_device *dev;
 	unsigned long flags = ent->driver_data;
 	int ret, retry = 0;
+	bool supports_atomic = false;
+
+	if (!amdgpu_virtual_display &&
+	    amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
+		supports_atomic = true;
 
 	if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
 		DRM_INFO("This hardware requires experimental hardware support.\n"
@@ -596,6 +613,13 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 	if (ret)
 		return ret;
 
+	/* warn the user if they mix atomic and non-atomic capable GPUs */
+	if ((kms_driver.driver_features & DRIVER_ATOMIC) && !supports_atomic)
+		DRM_ERROR("Mixing atomic and non-atomic capable GPUs!\n");
+	/* support atomic early so the atomic debugfs stuff gets created */
+	if (supports_atomic)
+		kms_driver.driver_features |= DRIVER_ATOMIC;
+
 	dev = drm_dev_alloc(&kms_driver, &pdev->dev);
 	if (IS_ERR(dev))
 		return PTR_ERR(dev);
@@ -720,7 +744,6 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
 
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 	drm_kms_helper_poll_disable(drm_dev);
-	vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_OFF);
 
 	ret = amdgpu_device_suspend(drm_dev, false, false);
 	pci_save_state(pdev);
@@ -757,7 +780,6 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
 
 	ret = amdgpu_device_resume(drm_dev, false, false);
 	drm_kms_helper_poll_enable(drm_dev);
-	vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_ON);
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
 	return 0;
 }
@@ -835,8 +857,8 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
 				 ktime_t *stime, ktime_t *etime,
 				 const struct drm_display_mode *mode)
 {
-	return amdgpu_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos,
-					  stime, etime, mode);
+	return amdgpu_display_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos,
+						  stime, etime, mode);
 }
 
 static struct drm_driver kms_driver = {
@@ -854,9 +876,6 @@ static struct drm_driver kms_driver = {
 	.disable_vblank = amdgpu_disable_vblank_kms,
 	.get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos,
 	.get_scanout_position = amdgpu_get_crtc_scanout_position,
-	.irq_preinstall = amdgpu_irq_preinstall,
-	.irq_postinstall = amdgpu_irq_postinstall,
-	.irq_uninstall = amdgpu_irq_uninstall,
 	.irq_handler = amdgpu_irq_handler,
 	.ioctls = amdgpu_ioctls_kms,
 	.gem_free_object_unlocked = amdgpu_gem_object_free,
@@ -869,9 +888,7 @@ static struct drm_driver kms_driver = {
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_export = amdgpu_gem_prime_export,
-	.gem_prime_import = drm_gem_prime_import,
-	.gem_prime_pin = amdgpu_gem_prime_pin,
-	.gem_prime_unpin = amdgpu_gem_prime_unpin,
+	.gem_prime_import = amdgpu_gem_prime_import,
 	.gem_prime_res_obj = amdgpu_gem_prime_res_obj,
 	.gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table,
 	.gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,

+ 13 - 10
drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c

@@ -38,6 +38,8 @@
 
 #include <linux/vga_switcheroo.h>
 
+#include "amdgpu_display.h"
+
 /* object hierarchy -
    this contains a helper + a amdgpu fb
    the helper contains a pointer to amdgpu framebuffer baseclass.
@@ -124,7 +126,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
 	struct drm_gem_object *gobj = NULL;
 	struct amdgpu_bo *abo = NULL;
 	bool fb_tiled = false; /* useful for testing */
-	u32 tiling_flags = 0;
+	u32 tiling_flags = 0, domain;
 	int ret;
 	int aligned_size, size;
 	int height = mode_cmd->height;
@@ -135,12 +137,12 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
 	/* need to align pitch with crtc limits */
 	mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
 						  fb_tiled);
+	domain = amdgpu_display_framebuffer_domains(adev);
 
 	height = ALIGN(mode_cmd->height, 8);
 	size = mode_cmd->pitches[0] * height;
 	aligned_size = ALIGN(size, PAGE_SIZE);
-	ret = amdgpu_gem_object_create(adev, aligned_size, 0,
-				       AMDGPU_GEM_DOMAIN_VRAM,
+	ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain,
 				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 				       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
 				       AMDGPU_GEM_CREATE_VRAM_CLEARED,
@@ -166,7 +168,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
 	}
 
 
-	ret = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, NULL);
+	ret = amdgpu_bo_pin(abo, domain, NULL);
 	if (ret) {
 		amdgpu_bo_unreserve(abo);
 		goto out_unref;
@@ -225,7 +227,8 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
 	info->par = rfbdev;
 	info->skip_vt_switch = true;
 
-	ret = amdgpu_framebuffer_init(adev->ddev, &rfbdev->rfb, &mode_cmd, gobj);
+	ret = amdgpu_display_framebuffer_init(adev->ddev, &rfbdev->rfb,
+					      &mode_cmd, gobj);
 	if (ret) {
 		DRM_ERROR("failed to initialize framebuffer %d\n", ret);
 		goto out;
@@ -242,8 +245,8 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
 
 	info->fbops = &amdgpufb_ops;
 
-	tmp = amdgpu_bo_gpu_offset(abo) - adev->mc.vram_start;
-	info->fix.smem_start = adev->mc.aper_base + tmp;
+	tmp = amdgpu_bo_gpu_offset(abo) - adev->gmc.vram_start;
+	info->fix.smem_start = adev->gmc.aper_base + tmp;
 	info->fix.smem_len = amdgpu_bo_size(abo);
 	info->screen_base = amdgpu_bo_kptr(abo);
 	info->screen_size = amdgpu_bo_size(abo);
@@ -252,7 +255,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
 
 	/* setup aperture base/size for vesafb takeover */
 	info->apertures->ranges[0].base = adev->ddev->mode_config.fb_base;
-	info->apertures->ranges[0].size = adev->mc.aper_size;
+	info->apertures->ranges[0].size = adev->gmc.aper_size;
 
 	/* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
 
@@ -262,7 +265,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
 	}
 
 	DRM_INFO("fb mappable at 0x%lX\n",  info->fix.smem_start);
-	DRM_INFO("vram apper at 0x%lX\n",  (unsigned long)adev->mc.aper_base);
+	DRM_INFO("vram apper at 0x%lX\n",  (unsigned long)adev->gmc.aper_base);
 	DRM_INFO("size %lu\n", (unsigned long)amdgpu_bo_size(abo));
 	DRM_INFO("fb depth is %d\n", fb->format->depth);
 	DRM_INFO("   pitch is %d\n", fb->pitches[0]);
@@ -319,7 +322,7 @@ int amdgpu_fbdev_init(struct amdgpu_device *adev)
 		return 0;
 
 	/* select 8 bpp console on low vram cards */
-	if (adev->mc.real_vram_size <= (32*1024*1024))
+	if (adev->gmc.real_vram_size <= (32*1024*1024))
 		bpp_sel = 8;
 
 	rfbdev = kzalloc(sizeof(struct amdgpu_fbdev), GFP_KERNEL);

+ 3 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c

@@ -435,7 +435,9 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 	if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) {
 		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
 				   num_hw_submission, amdgpu_job_hang_limit,
-				   msecs_to_jiffies(amdgpu_lockup_timeout), ring->name);
+				   (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ?
+				   MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(amdgpu_lockup_timeout),
+				   ring->name);
 		if (r) {
 			DRM_ERROR("Failed to create scheduler on ring %s.\n",
 				  ring->name);

+ 27 - 27
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c

@@ -68,17 +68,15 @@
  */
 static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
 {
-	if (adev->dummy_page.page)
+	struct page *dummy_page = adev->mman.bdev.glob->dummy_read_page;
+
+	if (adev->dummy_page_addr)
 		return 0;
-	adev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO);
-	if (adev->dummy_page.page == NULL)
-		return -ENOMEM;
-	adev->dummy_page.addr = pci_map_page(adev->pdev, adev->dummy_page.page,
-					0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	if (pci_dma_mapping_error(adev->pdev, adev->dummy_page.addr)) {
+	adev->dummy_page_addr = pci_map_page(adev->pdev, dummy_page, 0,
+					     PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	if (pci_dma_mapping_error(adev->pdev, adev->dummy_page_addr)) {
 		dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n");
-		__free_page(adev->dummy_page.page);
-		adev->dummy_page.page = NULL;
+		adev->dummy_page_addr = 0;
 		return -ENOMEM;
 	}
 	return 0;
@@ -93,12 +91,11 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
  */
 static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
 {
-	if (adev->dummy_page.page == NULL)
+	if (!adev->dummy_page_addr)
 		return;
-	pci_unmap_page(adev->pdev, adev->dummy_page.addr,
-			PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	__free_page(adev->dummy_page.page);
-	adev->dummy_page.page = NULL;
+	pci_unmap_page(adev->pdev, adev->dummy_page_addr,
+		       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	adev->dummy_page_addr = 0;
 }
 
 /**
@@ -116,11 +113,12 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
 	int r;
 
 	if (adev->gart.robj == NULL) {
-		r = amdgpu_bo_create(adev, adev->gart.table_size,
-				     PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
+		r = amdgpu_bo_create(adev, adev->gart.table_size, PAGE_SIZE,
+				     AMDGPU_GEM_DOMAIN_VRAM,
 				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-				     NULL, NULL, 0, &adev->gart.robj);
+				     ttm_bo_type_kernel, NULL,
+				     &adev->gart.robj);
 		if (r) {
 			return r;
 		}
@@ -236,18 +234,19 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
 		adev->gart.pages[p] = NULL;
 #endif
-		page_base = adev->dummy_page.addr;
+		page_base = adev->dummy_page_addr;
 		if (!adev->gart.ptr)
 			continue;
 
 		for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
-			amdgpu_gart_set_pte_pde(adev, adev->gart.ptr,
-						t, page_base, flags);
+			amdgpu_gmc_set_pte_pde(adev, adev->gart.ptr,
+					       t, page_base, flags);
 			page_base += AMDGPU_GPU_PAGE_SIZE;
 		}
 	}
 	mb();
-	amdgpu_gart_flush_gpu_tlb(adev, 0);
+	amdgpu_asic_flush_hdp(adev, NULL);
+	amdgpu_gmc_flush_gpu_tlb(adev, 0);
 	return 0;
 }
 
@@ -279,7 +278,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
 	for (i = 0; i < pages; i++) {
 		page_base = dma_addr[i];
 		for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
-			amdgpu_gart_set_pte_pde(adev, dst, t, page_base, flags);
+			amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, flags);
 			page_base += AMDGPU_GPU_PAGE_SIZE;
 		}
 	}
@@ -317,7 +316,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
 	t = offset / AMDGPU_GPU_PAGE_SIZE;
 	p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
 	for (i = 0; i < pages; i++, p++)
-		adev->gart.pages[p] = pagelist[i];
+		adev->gart.pages[p] = pagelist ? pagelist[i] : NULL;
 #endif
 
 	if (!adev->gart.ptr)
@@ -329,7 +328,8 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
 		return r;
 
 	mb();
-	amdgpu_gart_flush_gpu_tlb(adev, 0);
+	amdgpu_asic_flush_hdp(adev, NULL);
+	amdgpu_gmc_flush_gpu_tlb(adev, 0);
 	return 0;
 }
 
@@ -345,7 +345,7 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
 {
 	int r;
 
-	if (adev->dummy_page.page)
+	if (adev->dummy_page_addr)
 		return 0;
 
 	/* We need PAGE_SIZE >= AMDGPU_GPU_PAGE_SIZE */
@@ -357,8 +357,8 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
 	if (r)
 		return r;
 	/* Compute table size */
-	adev->gart.num_cpu_pages = adev->mc.gart_size / PAGE_SIZE;
-	adev->gart.num_gpu_pages = adev->mc.gart_size / AMDGPU_GPU_PAGE_SIZE;
+	adev->gart.num_cpu_pages = adev->gmc.gart_size / PAGE_SIZE;
+	adev->gart.num_gpu_pages = adev->gmc.gart_size / AMDGPU_GPU_PAGE_SIZE;
 	DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
 		 adev->gart.num_cpu_pages, adev->gart.num_gpu_pages);
 

+ 0 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h

@@ -31,7 +31,6 @@
  */
 struct amdgpu_device;
 struct amdgpu_bo;
-struct amdgpu_gart_funcs;
 
 #define AMDGPU_GPU_PAGE_SIZE 4096
 #define AMDGPU_GPU_PAGE_MASK (AMDGPU_GPU_PAGE_SIZE - 1)
@@ -52,8 +51,6 @@ struct amdgpu_gart {
 
 	/* Asic default pte flags */
 	uint64_t			gart_pte_flags;
-
-	const struct amdgpu_gart_funcs *gart_funcs;
 };
 
 int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);

+ 11 - 22
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

@@ -43,7 +43,7 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
 
 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 			     int alignment, u32 initial_domain,
-			     u64 flags, bool kernel,
+			     u64 flags, enum ttm_bo_type type,
 			     struct reservation_object *resv,
 			     struct drm_gem_object **obj)
 {
@@ -56,23 +56,11 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 		alignment = PAGE_SIZE;
 	}
 
-retry:
-	r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
-			     flags, NULL, resv, 0, &bo);
+	r = amdgpu_bo_create(adev, size, alignment, initial_domain,
+			     flags, type, resv, &bo);
 	if (r) {
-		if (r != -ERESTARTSYS) {
-			if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
-				flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
-				goto retry;
-			}
-
-			if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
-				initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
-				goto retry;
-			}
-			DRM_DEBUG("Failed to allocate GEM object (%ld, %d, %u, %d)\n",
-				  size, initial_domain, alignment, r);
-		}
+		DRM_DEBUG("Failed to allocate GEM object (%ld, %d, %u, %d)\n",
+			  size, initial_domain, alignment, r);
 		return r;
 	}
 	*obj = &bo->gem_base;
@@ -521,12 +509,13 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 		goto error;
 
 	if (operation == AMDGPU_VA_OP_MAP ||
-	    operation == AMDGPU_VA_OP_REPLACE)
+	    operation == AMDGPU_VA_OP_REPLACE) {
 		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		if (r)
+			goto error;
+	}
 
 	r = amdgpu_vm_update_directories(adev, vm);
-	if (r)
-		goto error;
 
 error:
 	if (r && r != -ERESTARTSYS)
@@ -632,7 +621,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 		if (r)
 			goto error_backoff;
 
-		va_flags = amdgpu_vm_get_pte_flags(adev, args->flags);
+		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
 		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
 				     args->offset_in_bo, args->map_size,
 				     va_flags);
@@ -652,7 +641,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 		if (r)
 			goto error_backoff;
 
-		va_flags = amdgpu_vm_get_pte_flags(adev, args->flags);
+		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
 		r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
 					     args->offset_in_bo, args->map_size,
 					     va_flags);

+ 112 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h

@@ -0,0 +1,112 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+#ifndef __AMDGPU_GMC_H__
+#define __AMDGPU_GMC_H__
+
+#include <linux/types.h>
+
+#include "amdgpu_irq.h"
+
+struct firmware;
+
+/*
+ * VMHUB structures, functions & helpers
+ */
+struct amdgpu_vmhub {
+	uint32_t	ctx0_ptb_addr_lo32;
+	uint32_t	ctx0_ptb_addr_hi32;
+	uint32_t	vm_inv_eng0_req;
+	uint32_t	vm_inv_eng0_ack;
+	uint32_t	vm_context0_cntl;
+	uint32_t	vm_l2_pro_fault_status;
+	uint32_t	vm_l2_pro_fault_cntl;
+};
+
+/*
+ * GPU MC structures, functions & helpers
+ */
+struct amdgpu_gmc_funcs {
+	/* flush the vm tlb via mmio */
+	void (*flush_gpu_tlb)(struct amdgpu_device *adev,
+			      uint32_t vmid);
+	/* flush the vm tlb via ring */
+	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
+				       uint64_t pd_addr);
+	/* Change the VMID -> PASID mapping */
+	void (*emit_pasid_mapping)(struct amdgpu_ring *ring, unsigned vmid,
+				   unsigned pasid);
+	/* write pte/pde updates using the cpu */
+	int (*set_pte_pde)(struct amdgpu_device *adev,
+			   void *cpu_pt_addr, /* cpu addr of page table */
+			   uint32_t gpu_page_idx, /* pte/pde to update */
+			   uint64_t addr, /* addr to write into pte/pde */
+			   uint64_t flags); /* access flags */
+	/* enable/disable PRT support */
+	void (*set_prt)(struct amdgpu_device *adev, bool enable);
+	/* set pte flags based per asic */
+	uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
+				     uint32_t flags);
+	/* get the pde for a given mc addr */
+	void (*get_vm_pde)(struct amdgpu_device *adev, int level,
+			   u64 *dst, u64 *flags);
+};
+
+struct amdgpu_gmc {
+	resource_size_t		aper_size;
+	resource_size_t		aper_base;
+	/* for some chips with <= 32MB we need to lie
+	 * about vram size near mc fb location */
+	u64			mc_vram_size;
+	u64			visible_vram_size;
+	u64			gart_size;
+	u64			gart_start;
+	u64			gart_end;
+	u64			vram_start;
+	u64			vram_end;
+	unsigned		vram_width;
+	u64			real_vram_size;
+	int			vram_mtrr;
+	u64                     mc_mask;
+	const struct firmware   *fw;	/* MC firmware */
+	uint32_t                fw_version;
+	struct amdgpu_irq_src	vm_fault;
+	uint32_t		vram_type;
+	uint32_t                srbm_soft_reset;
+	bool			prt_warning;
+	uint64_t		stolen_size;
+	/* apertures */
+	u64			shared_aperture_start;
+	u64			shared_aperture_end;
+	u64			private_aperture_start;
+	u64			private_aperture_end;
+	/* protects concurrent invalidation */
+	spinlock_t		invalidate_lock;
+	bool			translate_further;
+
+	const struct amdgpu_gmc_funcs	*gmc_funcs;
+};
+
+#endif

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c

@@ -56,7 +56,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man,
 		return -ENOMEM;
 
 	start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
-	size = (adev->mc.gart_size >> PAGE_SHIFT) - start;
+	size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
 	drm_mm_init(&mgr->mm, start, size);
 	spin_lock_init(&mgr->lock);
 	atomic64_set(&mgr->available, p_size);

+ 42 - 16
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

@@ -181,15 +181,18 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		}
 	}
 
-	if (ring->funcs->init_cond_exec)
+	if (job && ring->funcs->init_cond_exec)
 		patch_offset = amdgpu_ring_init_cond_exec(ring);
 
-	if (ring->funcs->emit_hdp_flush
 #ifdef CONFIG_X86_64
-	    && !(adev->flags & AMD_IS_APU)
+	if (!(adev->flags & AMD_IS_APU))
 #endif
-	   )
-		amdgpu_ring_emit_hdp_flush(ring);
+	{
+		if (ring->funcs->emit_hdp_flush)
+			amdgpu_ring_emit_hdp_flush(ring);
+		else
+			amdgpu_asic_flush_hdp(adev, ring);
+	}
 
 	skip_preamble = ring->current_ctx == fence_ctx;
 	need_ctx_switch = ring->current_ctx != fence_ctx;
@@ -219,12 +222,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (ring->funcs->emit_tmz)
 		amdgpu_ring_emit_tmz(ring, false);
 
-	if (ring->funcs->emit_hdp_invalidate
 #ifdef CONFIG_X86_64
-	    && !(adev->flags & AMD_IS_APU)
+	if (!(adev->flags & AMD_IS_APU))
 #endif
-	   )
-		amdgpu_ring_emit_hdp_invalidate(ring);
+		amdgpu_asic_invalidate_hdp(adev, ring);
 
 	r = amdgpu_fence_emit(ring, f);
 	if (r) {
@@ -278,11 +279,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
 		return r;
 	}
 
-	r = amdgpu_sa_bo_manager_start(adev, &adev->ring_tmp_bo);
-	if (r) {
-		return r;
-	}
-
 	adev->ib_pool_ready = true;
 	if (amdgpu_debugfs_sa_init(adev)) {
 		dev_err(adev->dev, "failed to register debugfs file for SA\n");
@@ -301,7 +297,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
 void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
 {
 	if (adev->ib_pool_ready) {
-		amdgpu_sa_bo_manager_suspend(adev, &adev->ring_tmp_bo);
 		amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo);
 		adev->ib_pool_ready = false;
 	}
@@ -321,14 +316,45 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 {
 	unsigned i;
 	int r, ret = 0;
+	long tmo_gfx, tmo_mm;
+
+	tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT;
+	if (amdgpu_sriov_vf(adev)) {
+		/* for MM engines in hypervisor side they are not scheduled together
+		 * with CP and SDMA engines, so even in exclusive mode MM engine could
+		 * still running on other VF thus the IB TEST TIMEOUT for MM engines
+		 * under SR-IOV should be set to a long time. 8 sec should be enough
+		 * for the MM comes back to this VF.
+		 */
+		tmo_mm = 8 * AMDGPU_IB_TEST_TIMEOUT;
+	}
+
+	if (amdgpu_sriov_runtime(adev)) {
+		/* for CP & SDMA engines since they are scheduled together so
+		 * need to make the timeout width enough to cover the time
+		 * cost waiting for it coming back under RUNTIME only
+		*/
+		tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT;
+	}
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
+		long tmo;
 
 		if (!ring || !ring->ready)
 			continue;
 
-		r = amdgpu_ring_test_ib(ring, AMDGPU_IB_TEST_TIMEOUT);
+		/* MM engine need more time */
+		if (ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
+			ring->funcs->type == AMDGPU_RING_TYPE_VCE ||
+			ring->funcs->type == AMDGPU_RING_TYPE_UVD_ENC ||
+			ring->funcs->type == AMDGPU_RING_TYPE_VCN_DEC ||
+			ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+			tmo = tmo_mm;
+		else
+			tmo = tmo_gfx;
+
+		r = amdgpu_ring_test_ib(ring, tmo);
 		if (r) {
 			ring->ready = false;
 

+ 283 - 129
drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c

@@ -40,6 +40,12 @@
  */
 static DEFINE_IDA(amdgpu_pasid_ida);
 
+/* Helper to free pasid from a fence callback */
+struct amdgpu_pasid_cb {
+	struct dma_fence_cb cb;
+	unsigned int pasid;
+};
+
 /**
  * amdgpu_pasid_alloc - Allocate a PASID
  * @bits: Maximum width of the PASID in bits, must be at least 1
@@ -63,6 +69,9 @@ int amdgpu_pasid_alloc(unsigned int bits)
 			break;
 	}
 
+	if (pasid >= 0)
+		trace_amdgpu_pasid_allocated(pasid);
+
 	return pasid;
 }
 
@@ -72,9 +81,86 @@ int amdgpu_pasid_alloc(unsigned int bits)
  */
 void amdgpu_pasid_free(unsigned int pasid)
 {
+	trace_amdgpu_pasid_freed(pasid);
 	ida_simple_remove(&amdgpu_pasid_ida, pasid);
 }
 
+static void amdgpu_pasid_free_cb(struct dma_fence *fence,
+				 struct dma_fence_cb *_cb)
+{
+	struct amdgpu_pasid_cb *cb =
+		container_of(_cb, struct amdgpu_pasid_cb, cb);
+
+	amdgpu_pasid_free(cb->pasid);
+	dma_fence_put(fence);
+	kfree(cb);
+}
+
+/**
+ * amdgpu_pasid_free_delayed - free pasid when fences signal
+ *
+ * @resv: reservation object with the fences to wait for
+ * @pasid: pasid to free
+ *
+ * Free the pasid only after all the fences in resv are signaled.
+ */
+void amdgpu_pasid_free_delayed(struct reservation_object *resv,
+			       unsigned int pasid)
+{
+	struct dma_fence *fence, **fences;
+	struct amdgpu_pasid_cb *cb;
+	unsigned count;
+	int r;
+
+	r = reservation_object_get_fences_rcu(resv, NULL, &count, &fences);
+	if (r)
+		goto fallback;
+
+	if (count == 0) {
+		amdgpu_pasid_free(pasid);
+		return;
+	}
+
+	if (count == 1) {
+		fence = fences[0];
+		kfree(fences);
+	} else {
+		uint64_t context = dma_fence_context_alloc(1);
+		struct dma_fence_array *array;
+
+		array = dma_fence_array_create(count, fences, context,
+					       1, false);
+		if (!array) {
+			kfree(fences);
+			goto fallback;
+		}
+		fence = &array->base;
+	}
+
+	cb = kmalloc(sizeof(*cb), GFP_KERNEL);
+	if (!cb) {
+		/* Last resort when we are OOM */
+		dma_fence_wait(fence, false);
+		dma_fence_put(fence);
+		amdgpu_pasid_free(pasid);
+	} else {
+		cb->pasid = pasid;
+		if (dma_fence_add_callback(fence, &cb->cb,
+					   amdgpu_pasid_free_cb))
+			amdgpu_pasid_free_cb(fence, &cb->cb);
+	}
+
+	return;
+
+fallback:
+	/* Not enough memory for the delayed delete, as last resort
+	 * block for all the fences to complete.
+	 */
+	reservation_object_wait_timeout_rcu(resv, true, false,
+					    MAX_SCHEDULE_TIMEOUT);
+	amdgpu_pasid_free(pasid);
+}
+
 /*
  * VMID manager
  *
@@ -96,164 +182,185 @@ bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
 		atomic_read(&adev->gpu_reset_counter);
 }
 
-/* idr_mgr->lock must be held */
-static int amdgpu_vmid_grab_reserved_locked(struct amdgpu_vm *vm,
-					    struct amdgpu_ring *ring,
-					    struct amdgpu_sync *sync,
-					    struct dma_fence *fence,
-					    struct amdgpu_job *job)
-{
-	struct amdgpu_device *adev = ring->adev;
-	unsigned vmhub = ring->funcs->vmhub;
-	uint64_t fence_context = adev->fence_context + ring->idx;
-	struct amdgpu_vmid *id = vm->reserved_vmid[vmhub];
-	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-	struct dma_fence *updates = sync->last_vm_update;
-	int r = 0;
-	struct dma_fence *flushed, *tmp;
-	bool needs_flush = vm->use_cpu_for_update;
-
-	flushed  = id->flushed_updates;
-	if ((amdgpu_vmid_had_gpu_reset(adev, id)) ||
-	    (atomic64_read(&id->owner) != vm->entity.fence_context) ||
-	    (job->vm_pd_addr != id->pd_gpu_addr) ||
-	    (updates && (!flushed || updates->context != flushed->context ||
-			dma_fence_is_later(updates, flushed))) ||
-	    (!id->last_flush || (id->last_flush->context != fence_context &&
-				 !dma_fence_is_signaled(id->last_flush)))) {
-		needs_flush = true;
-		/* to prevent one context starved by another context */
-		id->pd_gpu_addr = 0;
-		tmp = amdgpu_sync_peek_fence(&id->active, ring);
-		if (tmp) {
-			r = amdgpu_sync_fence(adev, sync, tmp, false);
-			return r;
-		}
-	}
-
-	/* Good we can use this VMID. Remember this submission as
-	* user of the VMID.
-	*/
-	r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
-	if (r)
-		goto out;
-
-	if (updates && (!flushed || updates->context != flushed->context ||
-			dma_fence_is_later(updates, flushed))) {
-		dma_fence_put(id->flushed_updates);
-		id->flushed_updates = dma_fence_get(updates);
-	}
-	id->pd_gpu_addr = job->vm_pd_addr;
-	atomic64_set(&id->owner, vm->entity.fence_context);
-	job->vm_needs_flush = needs_flush;
-	if (needs_flush) {
-		dma_fence_put(id->last_flush);
-		id->last_flush = NULL;
-	}
-	job->vmid = id - id_mgr->ids;
-	trace_amdgpu_vm_grab_id(vm, ring, job);
-out:
-	return r;
-}
-
 /**
- * amdgpu_vm_grab_id - allocate the next free VMID
+ * amdgpu_vm_grab_idle - grab idle VMID
  *
  * @vm: vm to allocate id for
  * @ring: ring we want to submit job to
  * @sync: sync object where we add dependencies
- * @fence: fence protecting ID from reuse
+ * @idle: resulting idle VMID
  *
- * Allocate an id for the vm, adding fences to the sync obj as necessary.
+ * Try to find an idle VMID, if none is idle add a fence to wait to the sync
+ * object. Returns -ENOMEM when we are out of memory.
  */
-int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
-		     struct amdgpu_sync *sync, struct dma_fence *fence,
-		     struct amdgpu_job *job)
+static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
+				 struct amdgpu_ring *ring,
+				 struct amdgpu_sync *sync,
+				 struct amdgpu_vmid **idle)
 {
 	struct amdgpu_device *adev = ring->adev;
 	unsigned vmhub = ring->funcs->vmhub;
 	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-	uint64_t fence_context = adev->fence_context + ring->idx;
-	struct dma_fence *updates = sync->last_vm_update;
-	struct amdgpu_vmid *id, *idle;
 	struct dma_fence **fences;
 	unsigned i;
-	int r = 0;
+	int r;
+
+	if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait))
+		return amdgpu_sync_fence(adev, sync, ring->vmid_wait, false);
 
-	mutex_lock(&id_mgr->lock);
-	if (vm->reserved_vmid[vmhub]) {
-		r = amdgpu_vmid_grab_reserved_locked(vm, ring, sync, fence, job);
-		mutex_unlock(&id_mgr->lock);
-		return r;
-	}
 	fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
-	if (!fences) {
-		mutex_unlock(&id_mgr->lock);
+	if (!fences)
 		return -ENOMEM;
-	}
+
 	/* Check if we have an idle VMID */
 	i = 0;
-	list_for_each_entry(idle, &id_mgr->ids_lru, list) {
-		fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
+	list_for_each_entry((*idle), &id_mgr->ids_lru, list) {
+		fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, ring);
 		if (!fences[i])
 			break;
 		++i;
 	}
 
 	/* If we can't find a idle VMID to use, wait till one becomes available */
-	if (&idle->list == &id_mgr->ids_lru) {
+	if (&(*idle)->list == &id_mgr->ids_lru) {
 		u64 fence_context = adev->vm_manager.fence_context + ring->idx;
 		unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
 		struct dma_fence_array *array;
 		unsigned j;
 
+		*idle = NULL;
 		for (j = 0; j < i; ++j)
 			dma_fence_get(fences[j]);
 
 		array = dma_fence_array_create(i, fences, fence_context,
-					   seqno, true);
+					       seqno, true);
 		if (!array) {
 			for (j = 0; j < i; ++j)
 				dma_fence_put(fences[j]);
 			kfree(fences);
-			r = -ENOMEM;
-			goto error;
+			return -ENOMEM;
 		}
 
+		r = amdgpu_sync_fence(adev, sync, &array->base, false);
+		dma_fence_put(ring->vmid_wait);
+		ring->vmid_wait = &array->base;
+		return r;
+	}
+	kfree(fences);
 
-		r = amdgpu_sync_fence(ring->adev, sync, &array->base, false);
-		dma_fence_put(&array->base);
-		if (r)
-			goto error;
+	return 0;
+}
 
-		mutex_unlock(&id_mgr->lock);
-		return 0;
+/**
+ * amdgpu_vm_grab_reserved - try to assign reserved VMID
+ *
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ * @sync: sync object where we add dependencies
+ * @fence: fence protecting ID from reuse
+ * @job: job who wants to use the VMID
+ *
+ * Try to assign a reserved VMID.
+ */
+static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
+				     struct amdgpu_ring *ring,
+				     struct amdgpu_sync *sync,
+				     struct dma_fence *fence,
+				     struct amdgpu_job *job,
+				     struct amdgpu_vmid **id)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned vmhub = ring->funcs->vmhub;
+	uint64_t fence_context = adev->fence_context + ring->idx;
+	struct dma_fence *updates = sync->last_vm_update;
+	bool needs_flush = vm->use_cpu_for_update;
+	int r = 0;
+
+	*id = vm->reserved_vmid[vmhub];
+	if (updates && (*id)->flushed_updates &&
+	    updates->context == (*id)->flushed_updates->context &&
+	    !dma_fence_is_later(updates, (*id)->flushed_updates))
+	    updates = NULL;
+
+	if ((*id)->owner != vm->entity.fence_context ||
+	    job->vm_pd_addr != (*id)->pd_gpu_addr ||
+	    updates || !(*id)->last_flush ||
+	    ((*id)->last_flush->context != fence_context &&
+	     !dma_fence_is_signaled((*id)->last_flush))) {
+		struct dma_fence *tmp;
 
+		/* to prevent one context starved by another context */
+		(*id)->pd_gpu_addr = 0;
+		tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
+		if (tmp) {
+			*id = NULL;
+			r = amdgpu_sync_fence(adev, sync, tmp, false);
+			return r;
+		}
+		needs_flush = true;
 	}
-	kfree(fences);
+
+	/* Good we can use this VMID. Remember this submission as
+	* user of the VMID.
+	*/
+	r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false);
+	if (r)
+		return r;
+
+	if (updates) {
+		dma_fence_put((*id)->flushed_updates);
+		(*id)->flushed_updates = dma_fence_get(updates);
+	}
+	job->vm_needs_flush = needs_flush;
+	return 0;
+}
+
+/**
+ * amdgpu_vm_grab_used - try to reuse a VMID
+ *
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ * @sync: sync object where we add dependencies
+ * @fence: fence protecting ID from reuse
+ * @job: job who wants to use the VMID
+ * @id: resulting VMID
+ *
+ * Try to reuse a VMID for this submission.
+ */
+static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
+				 struct amdgpu_ring *ring,
+				 struct amdgpu_sync *sync,
+				 struct dma_fence *fence,
+				 struct amdgpu_job *job,
+				 struct amdgpu_vmid **id)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned vmhub = ring->funcs->vmhub;
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	uint64_t fence_context = adev->fence_context + ring->idx;
+	struct dma_fence *updates = sync->last_vm_update;
+	int r;
 
 	job->vm_needs_flush = vm->use_cpu_for_update;
+
 	/* Check if we can use a VMID already assigned to this VM */
-	list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) {
-		struct dma_fence *flushed;
+	list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) {
 		bool needs_flush = vm->use_cpu_for_update;
+		struct dma_fence *flushed;
 
 		/* Check all the prerequisites to using this VMID */
-		if (amdgpu_vmid_had_gpu_reset(adev, id))
-			continue;
-
-		if (atomic64_read(&id->owner) != vm->entity.fence_context)
+		if ((*id)->owner != vm->entity.fence_context)
 			continue;
 
-		if (job->vm_pd_addr != id->pd_gpu_addr)
+		if ((*id)->pd_gpu_addr != job->vm_pd_addr)
 			continue;
 
-		if (!id->last_flush ||
-		    (id->last_flush->context != fence_context &&
-		     !dma_fence_is_signaled(id->last_flush)))
+		if (!(*id)->last_flush ||
+		    ((*id)->last_flush->context != fence_context &&
+		     !dma_fence_is_signaled((*id)->last_flush)))
 			needs_flush = true;
 
-		flushed  = id->flushed_updates;
+		flushed  = (*id)->flushed_updates;
 		if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
 			needs_flush = true;
 
@@ -261,47 +368,91 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		if (adev->asic_type < CHIP_VEGA10 && needs_flush)
 			continue;
 
-		/* Good we can use this VMID. Remember this submission as
+		/* Good, we can use this VMID. Remember this submission as
 		 * user of the VMID.
 		 */
-		r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
+		r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false);
 		if (r)
-			goto error;
+			return r;
 
 		if (updates && (!flushed || dma_fence_is_later(updates, flushed))) {
-			dma_fence_put(id->flushed_updates);
-			id->flushed_updates = dma_fence_get(updates);
+			dma_fence_put((*id)->flushed_updates);
+			(*id)->flushed_updates = dma_fence_get(updates);
 		}
 
-		if (needs_flush)
-			goto needs_flush;
-		else
-			goto no_flush_needed;
+		job->vm_needs_flush |= needs_flush;
+		return 0;
+	}
 
-	};
+	*id = NULL;
+	return 0;
+}
 
-	/* Still no ID to use? Then use the idle one found earlier */
-	id = idle;
+/**
+ * amdgpu_vm_grab_id - allocate the next free VMID
+ *
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ * @sync: sync object where we add dependencies
+ * @fence: fence protecting ID from reuse
+ * @job: job who wants to use the VMID
+ *
+ * Allocate an id for the vm, adding fences to the sync obj as necessary.
+ */
+int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+		     struct amdgpu_sync *sync, struct dma_fence *fence,
+		     struct amdgpu_job *job)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned vmhub = ring->funcs->vmhub;
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	struct amdgpu_vmid *idle = NULL;
+	struct amdgpu_vmid *id = NULL;
+	int r = 0;
 
-	/* Remember this submission as user of the VMID */
-	r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
-	if (r)
+	mutex_lock(&id_mgr->lock);
+	r = amdgpu_vmid_grab_idle(vm, ring, sync, &idle);
+	if (r || !idle)
 		goto error;
 
-	id->pd_gpu_addr = job->vm_pd_addr;
-	dma_fence_put(id->flushed_updates);
-	id->flushed_updates = dma_fence_get(updates);
-	atomic64_set(&id->owner, vm->entity.fence_context);
+	if (vm->reserved_vmid[vmhub]) {
+		r = amdgpu_vmid_grab_reserved(vm, ring, sync, fence, job, &id);
+		if (r || !id)
+			goto error;
+	} else {
+		r = amdgpu_vmid_grab_used(vm, ring, sync, fence, job, &id);
+		if (r)
+			goto error;
 
-needs_flush:
-	job->vm_needs_flush = true;
-	dma_fence_put(id->last_flush);
-	id->last_flush = NULL;
+		if (!id) {
+			struct dma_fence *updates = sync->last_vm_update;
 
-no_flush_needed:
-	list_move_tail(&id->list, &id_mgr->ids_lru);
+			/* Still no ID to use? Then use the idle one found earlier */
+			id = idle;
 
+			/* Remember this submission as user of the VMID */
+			r = amdgpu_sync_fence(ring->adev, &id->active,
+					      fence, false);
+			if (r)
+				goto error;
+
+			dma_fence_put(id->flushed_updates);
+			id->flushed_updates = dma_fence_get(updates);
+			job->vm_needs_flush = true;
+		}
+
+		list_move_tail(&id->list, &id_mgr->ids_lru);
+	}
+
+	id->pd_gpu_addr = job->vm_pd_addr;
+	id->owner = vm->entity.fence_context;
+
+	if (job->vm_needs_flush) {
+		dma_fence_put(id->last_flush);
+		id->last_flush = NULL;
+	}
 	job->vmid = id - id_mgr->ids;
+	job->pasid = vm->pasid;
 	trace_amdgpu_vm_grab_id(vm, ring, job);
 
 error:
@@ -370,13 +521,15 @@ void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
 	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 	struct amdgpu_vmid *id = &id_mgr->ids[vmid];
 
-	atomic64_set(&id->owner, 0);
+	mutex_lock(&id_mgr->lock);
+	id->owner = 0;
 	id->gds_base = 0;
 	id->gds_size = 0;
 	id->gws_base = 0;
 	id->gws_size = 0;
 	id->oa_base = 0;
 	id->oa_size = 0;
+	mutex_unlock(&id_mgr->lock);
 }
 
 /**
@@ -454,6 +607,7 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
 			amdgpu_sync_free(&id->active);
 			dma_fence_put(id->flushed_updates);
 			dma_fence_put(id->last_flush);
+			dma_fence_put(id->pasid_mapping);
 		}
 	}
 }

+ 6 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h

@@ -43,7 +43,7 @@ struct amdgpu_vmid {
 	struct list_head	list;
 	struct amdgpu_sync	active;
 	struct dma_fence	*last_flush;
-	atomic64_t		owner;
+	uint64_t		owner;
 
 	uint64_t		pd_gpu_addr;
 	/* last flushed PD/PT update */
@@ -57,6 +57,9 @@ struct amdgpu_vmid {
 	uint32_t		gws_size;
 	uint32_t		oa_base;
 	uint32_t		oa_size;
+
+	unsigned		pasid;
+	struct dma_fence	*pasid_mapping;
 };
 
 struct amdgpu_vmid_mgr {
@@ -69,6 +72,8 @@ struct amdgpu_vmid_mgr {
 
 int amdgpu_pasid_alloc(unsigned int bits);
 void amdgpu_pasid_free(unsigned int pasid);
+void amdgpu_pasid_free_delayed(struct reservation_object *resv,
+			       unsigned int pasid);
 
 bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
 			       struct amdgpu_vmid *id);

+ 3 - 42
drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h

@@ -25,51 +25,12 @@
 #define __AMDGPU_IH_H__
 
 #include <linux/chash.h>
+#include "soc15_ih_clientid.h"
 
 struct amdgpu_device;
- /*
-  * vega10+ IH clients
- */
-enum amdgpu_ih_clientid
-{
-    AMDGPU_IH_CLIENTID_IH	    = 0x00,
-    AMDGPU_IH_CLIENTID_ACP	    = 0x01,
-    AMDGPU_IH_CLIENTID_ATHUB	    = 0x02,
-    AMDGPU_IH_CLIENTID_BIF	    = 0x03,
-    AMDGPU_IH_CLIENTID_DCE	    = 0x04,
-    AMDGPU_IH_CLIENTID_ISP	    = 0x05,
-    AMDGPU_IH_CLIENTID_PCIE0	    = 0x06,
-    AMDGPU_IH_CLIENTID_RLC	    = 0x07,
-    AMDGPU_IH_CLIENTID_SDMA0	    = 0x08,
-    AMDGPU_IH_CLIENTID_SDMA1	    = 0x09,
-    AMDGPU_IH_CLIENTID_SE0SH	    = 0x0a,
-    AMDGPU_IH_CLIENTID_SE1SH	    = 0x0b,
-    AMDGPU_IH_CLIENTID_SE2SH	    = 0x0c,
-    AMDGPU_IH_CLIENTID_SE3SH	    = 0x0d,
-    AMDGPU_IH_CLIENTID_SYSHUB	    = 0x0e,
-    AMDGPU_IH_CLIENTID_THM	    = 0x0f,
-    AMDGPU_IH_CLIENTID_UVD	    = 0x10,
-    AMDGPU_IH_CLIENTID_VCE0	    = 0x11,
-    AMDGPU_IH_CLIENTID_VMC	    = 0x12,
-    AMDGPU_IH_CLIENTID_XDMA	    = 0x13,
-    AMDGPU_IH_CLIENTID_GRBM_CP	    = 0x14,
-    AMDGPU_IH_CLIENTID_ATS	    = 0x15,
-    AMDGPU_IH_CLIENTID_ROM_SMUIO    = 0x16,
-    AMDGPU_IH_CLIENTID_DF	    = 0x17,
-    AMDGPU_IH_CLIENTID_VCE1	    = 0x18,
-    AMDGPU_IH_CLIENTID_PWR	    = 0x19,
-    AMDGPU_IH_CLIENTID_UTCL2	    = 0x1b,
-    AMDGPU_IH_CLIENTID_EA	    = 0x1c,
-    AMDGPU_IH_CLIENTID_UTCL2LOG	    = 0x1d,
-    AMDGPU_IH_CLIENTID_MP0	    = 0x1e,
-    AMDGPU_IH_CLIENTID_MP1	    = 0x1f,
-
-    AMDGPU_IH_CLIENTID_MAX,
-
-    AMDGPU_IH_CLIENTID_VCN	    = AMDGPU_IH_CLIENTID_UVD
-};
 
 #define AMDGPU_IH_CLIENTID_LEGACY 0
+#define AMDGPU_IH_CLIENTID_MAX SOC15_IH_CLIENTID_MAX
 
 #define AMDGPU_PAGEFAULT_HASH_BITS 8
 struct amdgpu_retryfault_hashtable {
@@ -109,7 +70,7 @@ struct amdgpu_iv_entry {
 	unsigned vmid_src;
 	uint64_t timestamp;
 	unsigned timestamp_src;
-	unsigned pas_id;
+	unsigned pasid;
 	unsigned pasid_src;
 	unsigned src_data[AMDGPU_IH_SRC_DATA_MAX_SIZE_DW];
 	const uint32_t *iv_entry;

+ 3 - 50
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c

@@ -92,7 +92,7 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
 }
 
 /* Disable *all* interrupts */
-static void amdgpu_irq_disable_all(struct amdgpu_device *adev)
+void amdgpu_irq_disable_all(struct amdgpu_device *adev)
 {
 	unsigned long irqflags;
 	unsigned i, j, k;
@@ -122,55 +122,6 @@ static void amdgpu_irq_disable_all(struct amdgpu_device *adev)
 	spin_unlock_irqrestore(&adev->irq.lock, irqflags);
 }
 
-/**
- * amdgpu_irq_preinstall - drm irq preinstall callback
- *
- * @dev: drm dev pointer
- *
- * Gets the hw ready to enable irqs (all asics).
- * This function disables all interrupt sources on the GPU.
- */
-void amdgpu_irq_preinstall(struct drm_device *dev)
-{
-	struct amdgpu_device *adev = dev->dev_private;
-
-	/* Disable *all* interrupts */
-	amdgpu_irq_disable_all(adev);
-	/* Clear bits */
-	amdgpu_ih_process(adev);
-}
-
-/**
- * amdgpu_irq_postinstall - drm irq preinstall callback
- *
- * @dev: drm dev pointer
- *
- * Handles stuff to be done after enabling irqs (all asics).
- * Returns 0 on success.
- */
-int amdgpu_irq_postinstall(struct drm_device *dev)
-{
-	dev->max_vblank_count = 0x00ffffff;
-	return 0;
-}
-
-/**
- * amdgpu_irq_uninstall - drm irq uninstall callback
- *
- * @dev: drm dev pointer
- *
- * This function disables all interrupt sources on the GPU (all asics).
- */
-void amdgpu_irq_uninstall(struct drm_device *dev)
-{
-	struct amdgpu_device *adev = dev->dev_private;
-
-	if (adev == NULL) {
-		return;
-	}
-	amdgpu_irq_disable_all(adev);
-}
-
 /**
  * amdgpu_irq_handler - irq handler
  *
@@ -262,6 +213,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
 		cancel_work_sync(&adev->reset_work);
 		return r;
 	}
+	adev->ddev->max_vblank_count = 0x00ffffff;
 
 	DRM_DEBUG("amdgpu: irq initialized.\n");
 	return 0;
@@ -307,6 +259,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
 			}
 		}
 		kfree(adev->irq.client[i].sources);
+		adev->irq.client[i].sources = NULL;
 	}
 }
 

+ 1 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h

@@ -78,9 +78,7 @@ struct amdgpu_irq {
 	uint32_t                        srbm_soft_reset;
 };
 
-void amdgpu_irq_preinstall(struct drm_device *dev);
-int amdgpu_irq_postinstall(struct drm_device *dev);
-void amdgpu_irq_uninstall(struct drm_device *dev);
+void amdgpu_irq_disable_all(struct amdgpu_device *adev);
 irqreturn_t amdgpu_irq_handler(int irq, void *arg);
 
 int amdgpu_irq_init(struct amdgpu_device *adev);

+ 80 - 30
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

@@ -190,8 +190,12 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
 		fw_info->ver = adev->uvd.fw_version;
 		fw_info->feature = 0;
 		break;
+	case AMDGPU_INFO_FW_VCN:
+		fw_info->ver = adev->vcn.fw_version;
+		fw_info->feature = 0;
+		break;
 	case AMDGPU_INFO_FW_GMC:
-		fw_info->ver = adev->mc.fw_version;
+		fw_info->ver = adev->gmc.fw_version;
 		fw_info->feature = 0;
 		break;
 	case AMDGPU_INFO_FW_GFX_ME:
@@ -470,9 +474,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 	case AMDGPU_INFO_VRAM_GTT: {
 		struct drm_amdgpu_info_vram_gtt vram_gtt;
 
-		vram_gtt.vram_size = adev->mc.real_vram_size;
+		vram_gtt.vram_size = adev->gmc.real_vram_size;
 		vram_gtt.vram_size -= adev->vram_pin_size;
-		vram_gtt.vram_cpu_accessible_size = adev->mc.visible_vram_size;
+		vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size;
 		vram_gtt.vram_cpu_accessible_size -= (adev->vram_pin_size - adev->invisible_pin_size);
 		vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size;
 		vram_gtt.gtt_size *= PAGE_SIZE;
@@ -484,17 +488,17 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		struct drm_amdgpu_memory_info mem;
 
 		memset(&mem, 0, sizeof(mem));
-		mem.vram.total_heap_size = adev->mc.real_vram_size;
+		mem.vram.total_heap_size = adev->gmc.real_vram_size;
 		mem.vram.usable_heap_size =
-			adev->mc.real_vram_size - adev->vram_pin_size;
+			adev->gmc.real_vram_size - adev->vram_pin_size;
 		mem.vram.heap_usage =
 			amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 		mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
 
 		mem.cpu_accessible_vram.total_heap_size =
-			adev->mc.visible_vram_size;
+			adev->gmc.visible_vram_size;
 		mem.cpu_accessible_vram.usable_heap_size =
-			adev->mc.visible_vram_size -
+			adev->gmc.visible_vram_size -
 			(adev->vram_pin_size - adev->invisible_pin_size);
 		mem.cpu_accessible_vram.heap_usage =
 			amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
@@ -580,11 +584,16 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
 
 		vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
+		vm_size -= AMDGPU_VA_RESERVED_SIZE;
+
+		/* Older VCE FW versions are buggy and can handle only 40bits */
+		if (adev->vce.fw_version < AMDGPU_VCE_FW_53_45)
+			vm_size = min(vm_size, 1ULL << 40);
+
 		dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
 		dev_info.virtual_address_max =
 			min(vm_size, AMDGPU_VA_HOLE_START);
 
-		vm_size -= AMDGPU_VA_RESERVED_SIZE;
 		if (vm_size > AMDGPU_VA_HOLE_START) {
 			dev_info.high_va_offset = AMDGPU_VA_HOLE_END;
 			dev_info.high_va_max = AMDGPU_VA_HOLE_END | vm_size;
@@ -599,8 +608,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		       sizeof(adev->gfx.cu_info.ao_cu_bitmap));
 		memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],
 		       sizeof(adev->gfx.cu_info.bitmap));
-		dev_info.vram_type = adev->mc.vram_type;
-		dev_info.vram_bit_width = adev->mc.vram_width;
+		dev_info.vram_type = adev->gmc.vram_type;
+		dev_info.vram_bit_width = adev->gmc.vram_width;
 		dev_info.vce_harvest_config = adev->vce.harvest_config;
 		dev_info.gc_double_offchip_lds_buf =
 			adev->gfx.config.double_offchip_lds_buf;
@@ -758,6 +767,24 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 				return -EINVAL;
 			}
 			break;
+		case AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK:
+			/* get stable pstate sclk in Mhz */
+			if (amdgpu_dpm_read_sensor(adev,
+						   AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK,
+						   (void *)&ui32, &ui32_size)) {
+				return -EINVAL;
+			}
+			ui32 /= 100;
+			break;
+		case AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK:
+			/* get stable pstate mclk in Mhz */
+			if (amdgpu_dpm_read_sensor(adev,
+						   AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK,
+						   (void *)&ui32, &ui32_size)) {
+				return -EINVAL;
+			}
+			ui32 /= 100;
+			break;
 		default:
 			DRM_DEBUG_KMS("Invalid request %d\n",
 				      info->sensor_info.type);
@@ -805,7 +832,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 {
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_fpriv *fpriv;
-	int r;
+	int r, pasid;
 
 	file_priv->driver_priv = NULL;
 
@@ -819,28 +846,25 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 		goto out_suspend;
 	}
 
-	r = amdgpu_vm_init(adev, &fpriv->vm,
-			   AMDGPU_VM_CONTEXT_GFX, 0);
-	if (r) {
-		kfree(fpriv);
-		goto out_suspend;
+	pasid = amdgpu_pasid_alloc(16);
+	if (pasid < 0) {
+		dev_warn(adev->dev, "No more PASIDs available!");
+		pasid = 0;
 	}
+	r = amdgpu_vm_init(adev, &fpriv->vm, AMDGPU_VM_CONTEXT_GFX, pasid);
+	if (r)
+		goto error_pasid;
 
 	fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
 	if (!fpriv->prt_va) {
 		r = -ENOMEM;
-		amdgpu_vm_fini(adev, &fpriv->vm);
-		kfree(fpriv);
-		goto out_suspend;
+		goto error_vm;
 	}
 
 	if (amdgpu_sriov_vf(adev)) {
 		r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va);
-		if (r) {
-			amdgpu_vm_fini(adev, &fpriv->vm);
-			kfree(fpriv);
-			goto out_suspend;
-		}
+		if (r)
+			goto error_vm;
 	}
 
 	mutex_init(&fpriv->bo_list_lock);
@@ -849,6 +873,16 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
 
 	file_priv->driver_priv = fpriv;
+	goto out_suspend;
+
+error_vm:
+	amdgpu_vm_fini(adev, &fpriv->vm);
+
+error_pasid:
+	if (pasid)
+		amdgpu_pasid_free(pasid);
+
+	kfree(fpriv);
 
 out_suspend:
 	pm_runtime_mark_last_busy(dev->dev);
@@ -871,6 +905,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
 	struct amdgpu_bo_list *list;
+	struct amdgpu_bo *pd;
+	unsigned int pasid;
 	int handle;
 
 	if (!fpriv)
@@ -895,7 +931,13 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 		amdgpu_bo_unreserve(adev->virt.csa_obj);
 	}
 
+	pasid = fpriv->vm.pasid;
+	pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
+
 	amdgpu_vm_fini(adev, &fpriv->vm);
+	if (pasid)
+		amdgpu_pasid_free_delayed(pd->tbo.resv, pasid);
+	amdgpu_bo_unref(&pd);
 
 	idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
 		amdgpu_bo_list_free(list);
@@ -947,11 +989,11 @@ u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe)
 		 */
 		do {
 			count = amdgpu_display_vblank_get_counter(adev, pipe);
-			/* Ask amdgpu_get_crtc_scanoutpos to return vpos as
-			 * distance to start of vblank, instead of regular
-			 * vertical scanout pos.
+			/* Ask amdgpu_display_get_crtc_scanoutpos to return
+			 * vpos as distance to start of vblank, instead of
+			 * regular vertical scanout pos.
 			 */
-			stat = amdgpu_get_crtc_scanoutpos(
+			stat = amdgpu_display_get_crtc_scanoutpos(
 				dev, pipe, GET_DISTANCE_TO_VBLANKSTART,
 				&vpos, &hpos, NULL, NULL,
 				&adev->mode_info.crtcs[pipe]->base.hwmode);
@@ -992,7 +1034,7 @@ u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe)
 int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe)
 {
 	struct amdgpu_device *adev = dev->dev_private;
-	int idx = amdgpu_crtc_idx_to_irq_type(adev, pipe);
+	int idx = amdgpu_display_crtc_idx_to_irq_type(adev, pipe);
 
 	return amdgpu_irq_get(adev, &adev->crtc_irq, idx);
 }
@@ -1008,7 +1050,7 @@ int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe)
 void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe)
 {
 	struct amdgpu_device *adev = dev->dev_private;
-	int idx = amdgpu_crtc_idx_to_irq_type(adev, pipe);
+	int idx = amdgpu_display_crtc_idx_to_irq_type(adev, pipe);
 
 	amdgpu_irq_put(adev, &adev->crtc_irq, idx);
 }
@@ -1160,6 +1202,14 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
 			   i, fw_info.feature, fw_info.ver);
 	}
 
+	/* VCN */
+	query_fw.fw_type = AMDGPU_INFO_FW_VCN;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
 	return 0;
 }
 

+ 24 - 54
drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h

@@ -267,8 +267,6 @@ struct amdgpu_display_funcs {
 	void (*bandwidth_update)(struct amdgpu_device *adev);
 	/* get frame count */
 	u32 (*vblank_get_counter)(struct amdgpu_device *adev, int crtc);
-	/* wait for vblank */
-	void (*vblank_wait)(struct amdgpu_device *adev, int crtc);
 	/* set backlight level */
 	void (*backlight_set_level)(struct amdgpu_encoder *amdgpu_encoder,
 				    u8 level);
@@ -553,14 +551,6 @@ struct amdgpu_connector {
 	/* we need to mind the EDID between detect
 	   and get modes due to analog/digital/tvencoder */
 	struct edid *edid;
-	/* number of modes generated from EDID at 'dc_sink' */
-	int num_modes;
-	/* The 'old' sink - before an HPD.
-	 * The 'current' sink is in dc_link->sink. */
-	struct dc_sink *dc_sink;
-	struct dc_link *dc_link;
-	struct dc_sink *dc_em_sink;
-	const struct dc_stream *stream;
 	void *con_priv;
 	bool dac_load_detect;
 	bool detected_by_load; /* if the connection status was determined by load */
@@ -571,27 +561,6 @@ struct amdgpu_connector {
 	enum amdgpu_connector_audio audio;
 	enum amdgpu_connector_dither dither;
 	unsigned pixelclock_for_modeset;
-
-	struct drm_dp_mst_topology_mgr mst_mgr;
-	struct amdgpu_dm_dp_aux dm_dp_aux;
-	struct drm_dp_mst_port *port;
-	struct amdgpu_connector *mst_port;
-	struct amdgpu_encoder *mst_encoder;
-	struct semaphore mst_sem;
-
-	/* TODO see if we can merge with ddc_bus or make a dm_connector */
-	struct amdgpu_i2c_adapter *i2c;
-
-	/* Monitor range limits */
-	int min_vfreq ;
-	int max_vfreq ;
-	int pixel_clock_mhz;
-
-	/*freesync caps*/
-	struct mod_freesync_caps caps;
-
-	struct mutex hpd_lock;
-
 };
 
 /* TODO: start to use this struct and remove same field from base one */
@@ -609,7 +578,7 @@ struct amdgpu_mst_connector {
 #define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \
 				((em) == ATOM_ENCODER_MODE_DP_MST))
 
-/* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */
+/* Driver internal use only flags of amdgpu_display_get_crtc_scanoutpos() */
 #define DRM_SCANOUTPOS_VALID        (1 << 0)
 #define DRM_SCANOUTPOS_IN_VBLANK    (1 << 1)
 #define DRM_SCANOUTPOS_ACCURATE     (1 << 2)
@@ -628,30 +597,31 @@ bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder,
 u16 amdgpu_encoder_get_dp_bridge_encoder_id(struct drm_encoder *encoder);
 struct drm_encoder *amdgpu_get_external_encoder(struct drm_encoder *encoder);
 
-bool amdgpu_ddc_probe(struct amdgpu_connector *amdgpu_connector, bool use_aux);
+bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
+			      bool use_aux);
 
 void amdgpu_encoder_set_active_device(struct drm_encoder *encoder);
 
-int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
-			       unsigned int flags, int *vpos, int *hpos,
-			       ktime_t *stime, ktime_t *etime,
-			       const struct drm_display_mode *mode);
+int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
+			unsigned int pipe, unsigned int flags, int *vpos,
+			int *hpos, ktime_t *stime, ktime_t *etime,
+			const struct drm_display_mode *mode);
 
-int amdgpu_framebuffer_init(struct drm_device *dev,
-			     struct amdgpu_framebuffer *rfb,
-			     const struct drm_mode_fb_cmd2 *mode_cmd,
-			     struct drm_gem_object *obj);
+int amdgpu_display_framebuffer_init(struct drm_device *dev,
+				    struct amdgpu_framebuffer *rfb,
+				    const struct drm_mode_fb_cmd2 *mode_cmd,
+				    struct drm_gem_object *obj);
 
 int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb);
 
 void amdgpu_enc_destroy(struct drm_encoder *encoder);
 void amdgpu_copy_fb(struct drm_device *dev, struct drm_gem_object *dst_obj);
-bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
-					const struct drm_display_mode *mode,
-					struct drm_display_mode *adjusted_mode);
+bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
+				const struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode);
 void amdgpu_panel_mode_fixup(struct drm_encoder *encoder,
 			     struct drm_display_mode *adjusted_mode);
-int amdgpu_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc);
+int amdgpu_display_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc);
 
 /* fbdev layer */
 int amdgpu_fbdev_init(struct amdgpu_device *adev);
@@ -663,15 +633,15 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
 int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int bpp, bool tiled);
 
 /* amdgpu_display.c */
-void amdgpu_print_display_setup(struct drm_device *dev);
-int amdgpu_modeset_create_props(struct amdgpu_device *adev);
-int amdgpu_crtc_set_config(struct drm_mode_set *set,
-			   struct drm_modeset_acquire_ctx *ctx);
-int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
-				 struct drm_framebuffer *fb,
-				 struct drm_pending_vblank_event *event,
-				 uint32_t page_flip_flags, uint32_t target,
-				 struct drm_modeset_acquire_ctx *ctx);
+void amdgpu_display_print_display_setup(struct drm_device *dev);
+int amdgpu_display_modeset_create_props(struct amdgpu_device *adev);
+int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
+				   struct drm_modeset_acquire_ctx *ctx);
+int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
+				struct drm_framebuffer *fb,
+				struct drm_pending_vblank_event *event,
+				uint32_t page_flip_flags, uint32_t target,
+				struct drm_modeset_acquire_ctx *ctx);
 extern const struct drm_mode_config_funcs amdgpu_mode_funcs;
 
 #endif

+ 65 - 63
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

@@ -36,6 +36,7 @@
 #include <drm/drm_cache.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
 
 static bool amdgpu_need_backup(struct amdgpu_device *adev)
 {
@@ -54,6 +55,9 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
 	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
 
+	if (bo->kfd_bo)
+		amdgpu_amdkfd_unreserve_system_memory_limit(bo);
+
 	amdgpu_bo_kunmap(bo);
 
 	if (bo->gem_base.import_attach)
@@ -85,7 +89,7 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 	u32 c = 0;
 
 	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
-		unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
+		unsigned visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
 
 		places[c].fpfn = 0;
 		places[c].lpfn = 0;
@@ -105,7 +109,7 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 	if (domain & AMDGPU_GEM_DOMAIN_GTT) {
 		places[c].fpfn = 0;
 		if (flags & AMDGPU_GEM_CREATE_SHADOW)
-			places[c].lpfn = adev->mc.gart_size >> PAGE_SHIFT;
+			places[c].lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
 		else
 			places[c].lpfn = 0;
 		places[c].flags = TTM_PL_FLAG_TT;
@@ -171,13 +175,15 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
  * @size: size for the new BO
  * @align: alignment for the new BO
  * @domain: where to place it
- * @bo_ptr: resulting BO
+ * @bo_ptr: used to initialize BOs in structures
  * @gpu_addr: GPU addr of the pinned BO
  * @cpu_addr: optional CPU address mapping
  *
  * Allocates and pins a BO for kernel internal use, and returns it still
  * reserved.
  *
+ * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
+ *
  * Returns 0 on success, negative error code otherwise.
  */
 int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
@@ -189,10 +195,10 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
 	int r;
 
 	if (!*bo_ptr) {
-		r = amdgpu_bo_create(adev, size, align, true, domain,
+		r = amdgpu_bo_create(adev, size, align, domain,
 				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-				     NULL, NULL, 0, bo_ptr);
+				     ttm_bo_type_kernel, NULL, bo_ptr);
 		if (r) {
 			dev_err(adev->dev, "(%d) failed to allocate kernel bo\n",
 				r);
@@ -240,12 +246,14 @@ error_free:
  * @size: size for the new BO
  * @align: alignment for the new BO
  * @domain: where to place it
- * @bo_ptr: resulting BO
+ * @bo_ptr:  used to initialize BOs in structures
  * @gpu_addr: GPU addr of the pinned BO
  * @cpu_addr: optional CPU address mapping
  *
  * Allocates and pins a BO for kernel internal use.
  *
+ * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
+ *
  * Returns 0 on success, negative error code otherwise.
  */
 int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
@@ -333,24 +341,22 @@ fail:
 	return false;
 }
 
-static int amdgpu_bo_do_create(struct amdgpu_device *adev,
-			       unsigned long size, int byte_align,
-			       bool kernel, u32 domain, u64 flags,
-			       struct sg_table *sg,
+static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
+			       int byte_align, u32 domain,
+			       u64 flags, enum ttm_bo_type type,
 			       struct reservation_object *resv,
-			       uint64_t init_value,
 			       struct amdgpu_bo **bo_ptr)
 {
 	struct ttm_operation_ctx ctx = {
-		.interruptible = !kernel,
+		.interruptible = (type != ttm_bo_type_kernel),
 		.no_wait_gpu = false,
-		.allow_reserved_eviction = true,
-		.resv = resv
+		.resv = resv,
+		.flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
 	};
 	struct amdgpu_bo *bo;
-	enum ttm_bo_type type;
 	unsigned long page_align;
 	size_t acc_size;
+	u32 domains;
 	int r;
 
 	page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
@@ -359,13 +365,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 	if (!amdgpu_bo_validate_size(adev, size, domain))
 		return -ENOMEM;
 
-	if (kernel) {
-		type = ttm_bo_type_kernel;
-	} else if (sg) {
-		type = ttm_bo_type_sg;
-	} else {
-		type = ttm_bo_type_device;
-	}
 	*bo_ptr = NULL;
 
 	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
@@ -374,11 +373,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 	bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL);
 	if (bo == NULL)
 		return -ENOMEM;
-	r = drm_gem_object_init(adev->ddev, &bo->gem_base, size);
-	if (unlikely(r)) {
-		kfree(bo);
-		return r;
-	}
+	drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
 	INIT_LIST_HEAD(&bo->shadow_list);
 	INIT_LIST_HEAD(&bo->va);
 	bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
@@ -388,7 +383,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 					 AMDGPU_GEM_DOMAIN_GWS |
 					 AMDGPU_GEM_DOMAIN_OA);
 	bo->allowed_domains = bo->preferred_domains;
-	if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
+	if (type != ttm_bo_type_kernel &&
+	    bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
 		bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
 
 	bo->flags = flags;
@@ -422,30 +418,41 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 #endif
 
 	bo->tbo.bdev = &adev->mman.bdev;
-	amdgpu_ttm_placement_from_domain(bo, domain);
-
+	domains = bo->preferred_domains;
+retry:
+	amdgpu_ttm_placement_from_domain(bo, domains);
 	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
-				 &bo->placement, page_align, &ctx, NULL,
-				 acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
-	if (unlikely(r != 0))
+				 &bo->placement, page_align, &ctx, acc_size,
+				 NULL, resv, &amdgpu_ttm_bo_destroy);
+
+	if (unlikely(r && r != -ERESTARTSYS)) {
+		if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
+			bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+			goto retry;
+		} else if (domains != bo->preferred_domains) {
+			domains = bo->allowed_domains;
+			goto retry;
+		}
+	}
+	if (unlikely(r))
 		return r;
 
-	if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
 	    bo->tbo.mem.mem_type == TTM_PL_VRAM &&
-	    bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
+	    bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
 		amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
 					     ctx.bytes_moved);
 	else
 		amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
 
-	if (kernel)
+	if (type == ttm_bo_type_kernel)
 		bo->tbo.priority = 1;
 
 	if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
 	    bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
 		struct dma_fence *fence;
 
-		r = amdgpu_fill_buffer(bo, init_value, bo->tbo.resv, &fence);
+		r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence);
 		if (unlikely(r))
 			goto fail_unreserve;
 
@@ -482,12 +489,11 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
 	if (bo->shadow)
 		return 0;
 
-	r = amdgpu_bo_do_create(adev, size, byte_align, true,
-				AMDGPU_GEM_DOMAIN_GTT,
+	r = amdgpu_bo_do_create(adev, size, byte_align, AMDGPU_GEM_DOMAIN_GTT,
 				AMDGPU_GEM_CREATE_CPU_GTT_USWC |
 				AMDGPU_GEM_CREATE_SHADOW,
-				NULL, bo->tbo.resv, 0,
-				&bo->shadow);
+				ttm_bo_type_kernel,
+				bo->tbo.resv, &bo->shadow);
 	if (!r) {
 		bo->shadow->parent = amdgpu_bo_ref(bo);
 		mutex_lock(&adev->shadow_list_lock);
@@ -498,22 +504,17 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
 	return r;
 }
 
-/* init_value will only take effect when flags contains
- * AMDGPU_GEM_CREATE_VRAM_CLEARED.
- */
-int amdgpu_bo_create(struct amdgpu_device *adev,
-		     unsigned long size, int byte_align,
-		     bool kernel, u32 domain, u64 flags,
-		     struct sg_table *sg,
+int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size,
+		     int byte_align, u32 domain,
+		     u64 flags, enum ttm_bo_type type,
 		     struct reservation_object *resv,
-		     uint64_t init_value,
 		     struct amdgpu_bo **bo_ptr)
 {
 	uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
 	int r;
 
-	r = amdgpu_bo_do_create(adev, size, byte_align, kernel, domain,
-				parent_flags, sg, resv, init_value, bo_ptr);
+	r = amdgpu_bo_do_create(adev, size, byte_align, domain,
+				parent_flags, type, resv, bo_ptr);
 	if (r)
 		return r;
 
@@ -828,31 +829,32 @@ static const char *amdgpu_vram_names[] = {
 	"GDDR4",
 	"GDDR5",
 	"HBM",
-	"DDR3"
+	"DDR3",
+	"DDR4",
 };
 
 int amdgpu_bo_init(struct amdgpu_device *adev)
 {
 	/* reserve PAT memory space to WC for VRAM */
-	arch_io_reserve_memtype_wc(adev->mc.aper_base,
-				   adev->mc.aper_size);
+	arch_io_reserve_memtype_wc(adev->gmc.aper_base,
+				   adev->gmc.aper_size);
 
 	/* Add an MTRR for the VRAM */
-	adev->mc.vram_mtrr = arch_phys_wc_add(adev->mc.aper_base,
-					      adev->mc.aper_size);
+	adev->gmc.vram_mtrr = arch_phys_wc_add(adev->gmc.aper_base,
+					      adev->gmc.aper_size);
 	DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
-		 adev->mc.mc_vram_size >> 20,
-		 (unsigned long long)adev->mc.aper_size >> 20);
+		 adev->gmc.mc_vram_size >> 20,
+		 (unsigned long long)adev->gmc.aper_size >> 20);
 	DRM_INFO("RAM width %dbits %s\n",
-		 adev->mc.vram_width, amdgpu_vram_names[adev->mc.vram_type]);
+		 adev->gmc.vram_width, amdgpu_vram_names[adev->gmc.vram_type]);
 	return amdgpu_ttm_init(adev);
 }
 
 void amdgpu_bo_fini(struct amdgpu_device *adev)
 {
 	amdgpu_ttm_fini(adev);
-	arch_phys_wc_del(adev->mc.vram_mtrr);
-	arch_io_free_memtype_wc(adev->mc.aper_base, adev->mc.aper_size);
+	arch_phys_wc_del(adev->gmc.vram_mtrr);
+	arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
 }
 
 int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
@@ -982,7 +984,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 
 	size = bo->mem.num_pages << PAGE_SHIFT;
 	offset = bo->mem.start << PAGE_SHIFT;
-	if ((offset + size) <= adev->mc.visible_vram_size)
+	if ((offset + size) <= adev->gmc.visible_vram_size)
 		return 0;
 
 	/* Can't move a pinned BO to visible VRAM */
@@ -1005,7 +1007,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 	offset = bo->mem.start << PAGE_SHIFT;
 	/* this should never happen */
 	if (bo->mem.mem_type == TTM_PL_VRAM &&
-	    (offset + size) > adev->mc.visible_vram_size)
+	    (offset + size) > adev->gmc.visible_vram_size)
 		return -EINVAL;
 
 	return 0;

+ 7 - 9
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h

@@ -92,6 +92,8 @@ struct amdgpu_bo {
 		struct list_head	mn_list;
 		struct list_head	shadow_list;
 	};
+
+	struct kgd_mem                  *kfd_bo;
 };
 
 static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
@@ -201,13 +203,11 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
 	return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
 }
 
-int amdgpu_bo_create(struct amdgpu_device *adev,
-			    unsigned long size, int byte_align,
-			    bool kernel, u32 domain, u64 flags,
-			    struct sg_table *sg,
-			    struct reservation_object *resv,
-			    uint64_t init_value,
-			    struct amdgpu_bo **bo_ptr);
+int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size,
+		     int byte_align, u32 domain,
+		     u64 flags, enum ttm_bo_type type,
+		     struct reservation_object *resv,
+		     struct amdgpu_bo **bo_ptr);
 int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
 			      unsigned long size, int align,
 			      u32 domain, struct amdgpu_bo **bo_ptr,
@@ -282,8 +282,6 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
 				      struct amdgpu_sa_manager *sa_manager);
 int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
 				      struct amdgpu_sa_manager *sa_manager);
-int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
-					struct amdgpu_sa_manager *sa_manager);
 int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 		     struct amdgpu_sa_bo **sa_bo,
 		     unsigned size, unsigned align);

+ 392 - 174
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c

@@ -116,7 +116,7 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
 	}
 
 	if (adev->powerplay.pp_funcs->dispatch_tasks) {
-		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state, NULL);
+		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state);
 	} else {
 		mutex_lock(&adev->pm.mutex);
 		adev->pm.dpm.user_state = state;
@@ -316,7 +316,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 		if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
 		    state != POWER_STATE_TYPE_DEFAULT) {
 			amdgpu_dpm_dispatch_task(adev,
-					AMD_PP_TASK_ENABLE_USER_STATE, &state, NULL);
+					AMD_PP_TASK_ENABLE_USER_STATE, &state);
 			adev->pp_force_state_enabled = true;
 		}
 	}
@@ -360,6 +360,90 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
 	return count;
 }
 
+static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	int ret;
+	uint32_t parameter_size = 0;
+	long parameter[64];
+	char buf_cpy[128];
+	char *tmp_str;
+	char *sub_str;
+	const char delimiter[3] = {' ', '\n', '\0'};
+	uint32_t type;
+
+	if (count > 127)
+		return -EINVAL;
+
+	if (*buf == 's')
+		type = PP_OD_EDIT_SCLK_VDDC_TABLE;
+	else if (*buf == 'm')
+		type = PP_OD_EDIT_MCLK_VDDC_TABLE;
+	else if(*buf == 'r')
+		type = PP_OD_RESTORE_DEFAULT_TABLE;
+	else if (*buf == 'c')
+		type = PP_OD_COMMIT_DPM_TABLE;
+	else
+		return -EINVAL;
+
+	memcpy(buf_cpy, buf, count+1);
+
+	tmp_str = buf_cpy;
+
+	while (isspace(*++tmp_str));
+
+	while (tmp_str[0]) {
+		sub_str = strsep(&tmp_str, delimiter);
+		ret = kstrtol(sub_str, 0, &parameter[parameter_size]);
+		if (ret)
+			return -EINVAL;
+		parameter_size++;
+
+		while (isspace(*tmp_str))
+			tmp_str++;
+	}
+
+	if (adev->powerplay.pp_funcs->odn_edit_dpm_table)
+		ret = amdgpu_dpm_odn_edit_dpm_table(adev, type,
+						parameter, parameter_size);
+
+	if (ret)
+		return -EINVAL;
+
+	if (type == PP_OD_COMMIT_DPM_TABLE) {
+		if (adev->powerplay.pp_funcs->dispatch_tasks) {
+			amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL);
+			return count;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	return count;
+}
+
+static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	uint32_t size = 0;
+
+	if (adev->powerplay.pp_funcs->print_clock_levels) {
+		size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
+		size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
+		return size;
+	} else {
+		return snprintf(buf, PAGE_SIZE, "\n");
+	}
+
+}
+
 static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
 		struct device_attribute *attr,
 		char *buf)
@@ -530,7 +614,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
 		amdgpu_dpm_set_sclk_od(adev, (uint32_t)value);
 
 	if (adev->powerplay.pp_funcs->dispatch_tasks) {
-		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL);
+		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL);
 	} else {
 		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
 		amdgpu_pm_compute_clocks(adev);
@@ -574,7 +658,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
 		amdgpu_dpm_set_mclk_od(adev, (uint32_t)value);
 
 	if (adev->powerplay.pp_funcs->dispatch_tasks) {
-		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL);
+		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL);
 	} else {
 		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
 		amdgpu_pm_compute_clocks(adev);
@@ -584,159 +668,70 @@ fail:
 	return count;
 }
 
-static ssize_t amdgpu_get_pp_power_profile(struct device *dev,
-		char *buf, struct amd_pp_profile *query)
+static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
-	int ret = 0xff;
 
-	if (adev->powerplay.pp_funcs->get_power_profile_state)
-		ret = amdgpu_dpm_get_power_profile_state(
-				adev, query);
+	if (adev->powerplay.pp_funcs->get_power_profile_mode)
+		return amdgpu_dpm_get_power_profile_mode(adev, buf);
 
-	if (ret)
-		return ret;
-
-	return snprintf(buf, PAGE_SIZE,
-			"%d %d %d %d %d\n",
-			query->min_sclk / 100,
-			query->min_mclk / 100,
-			query->activity_threshold,
-			query->up_hyst,
-			query->down_hyst);
+	return snprintf(buf, PAGE_SIZE, "\n");
 }
 
-static ssize_t amdgpu_get_pp_gfx_power_profile(struct device *dev,
-		struct device_attribute *attr,
-		char *buf)
-{
-	struct amd_pp_profile query = {0};
-
-	query.type = AMD_PP_GFX_PROFILE;
-
-	return amdgpu_get_pp_power_profile(dev, buf, &query);
-}
 
-static ssize_t amdgpu_get_pp_compute_power_profile(struct device *dev,
+static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
 		struct device_attribute *attr,
-		char *buf)
-{
-	struct amd_pp_profile query = {0};
-
-	query.type = AMD_PP_COMPUTE_PROFILE;
-
-	return amdgpu_get_pp_power_profile(dev, buf, &query);
-}
-
-static ssize_t amdgpu_set_pp_power_profile(struct device *dev,
 		const char *buf,
-		size_t count,
-		struct amd_pp_profile *request)
+		size_t count)
 {
+	int ret = 0xff;
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
-	uint32_t loop = 0;
-	char *sub_str, buf_cpy[128], *tmp_str;
+	uint32_t parameter_size = 0;
+	long parameter[64];
+	char *sub_str, buf_cpy[128];
+	char *tmp_str;
+	uint32_t i = 0;
+	char tmp[2];
+	long int profile_mode = 0;
 	const char delimiter[3] = {' ', '\n', '\0'};
-	long int value;
-	int ret = 0xff;
-
-	if (strncmp("reset", buf, strlen("reset")) == 0) {
-		if (adev->powerplay.pp_funcs->reset_power_profile_state)
-			ret = amdgpu_dpm_reset_power_profile_state(
-					adev, request);
-		if (ret) {
-			count = -EINVAL;
-			goto fail;
-		}
-		return count;
-	}
-
-	if (strncmp("set", buf, strlen("set")) == 0) {
-		if (adev->powerplay.pp_funcs->set_power_profile_state)
-			ret = amdgpu_dpm_set_power_profile_state(
-					adev, request);
-
-		if (ret) {
-			count = -EINVAL;
-			goto fail;
-		}
-		return count;
-	}
 
-	if (count + 1 >= 128) {
-		count = -EINVAL;
+	tmp[0] = *(buf);
+	tmp[1] = '\0';
+	ret = kstrtol(tmp, 0, &profile_mode);
+	if (ret)
 		goto fail;
-	}
-
-	memcpy(buf_cpy, buf, count + 1);
-	tmp_str = buf_cpy;
-
-	while (tmp_str[0]) {
-		sub_str = strsep(&tmp_str, delimiter);
-		ret = kstrtol(sub_str, 0, &value);
-		if (ret) {
-			count = -EINVAL;
-			goto fail;
-		}
 
-		switch (loop) {
-		case 0:
-			/* input unit MHz convert to dpm table unit 10KHz*/
-			request->min_sclk = (uint32_t)value * 100;
-			break;
-		case 1:
-			/* input unit MHz convert to dpm table unit 10KHz*/
-			request->min_mclk = (uint32_t)value * 100;
-			break;
-		case 2:
-			request->activity_threshold = (uint16_t)value;
-			break;
-		case 3:
-			request->up_hyst = (uint8_t)value;
-			break;
-		case 4:
-			request->down_hyst = (uint8_t)value;
-			break;
-		default:
-			break;
+	if (profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
+		if (count < 2 || count > 127)
+			return -EINVAL;
+		while (isspace(*++buf))
+			i++;
+		memcpy(buf_cpy, buf, count-i);
+		tmp_str = buf_cpy;
+		while (tmp_str[0]) {
+			sub_str = strsep(&tmp_str, delimiter);
+			ret = kstrtol(sub_str, 0, &parameter[parameter_size]);
+			if (ret) {
+				count = -EINVAL;
+				goto fail;
+			}
+			parameter_size++;
+			while (isspace(*tmp_str))
+				tmp_str++;
 		}
-
-		loop++;
 	}
-	if (adev->powerplay.pp_funcs->set_power_profile_state)
-		ret = amdgpu_dpm_set_power_profile_state(adev, request);
-
-	if (ret)
-		count = -EINVAL;
+	parameter[parameter_size] = profile_mode;
+	if (adev->powerplay.pp_funcs->set_power_profile_mode)
+		ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size);
 
+	if (!ret)
+		return count;
 fail:
-	return count;
-}
-
-static ssize_t amdgpu_set_pp_gfx_power_profile(struct device *dev,
-		struct device_attribute *attr,
-		const char *buf,
-		size_t count)
-{
-	struct amd_pp_profile request = {0};
-
-	request.type = AMD_PP_GFX_PROFILE;
-
-	return amdgpu_set_pp_power_profile(dev, buf, count, &request);
-}
-
-static ssize_t amdgpu_set_pp_compute_power_profile(struct device *dev,
-		struct device_attribute *attr,
-		const char *buf,
-		size_t count)
-{
-	struct amd_pp_profile request = {0};
-
-	request.type = AMD_PP_COMPUTE_PROFILE;
-
-	return amdgpu_set_pp_power_profile(dev, buf, count, &request);
+	return -EINVAL;
 }
 
 static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
@@ -766,12 +761,12 @@ static DEVICE_ATTR(pp_sclk_od, S_IRUGO | S_IWUSR,
 static DEVICE_ATTR(pp_mclk_od, S_IRUGO | S_IWUSR,
 		amdgpu_get_pp_mclk_od,
 		amdgpu_set_pp_mclk_od);
-static DEVICE_ATTR(pp_gfx_power_profile, S_IRUGO | S_IWUSR,
-		amdgpu_get_pp_gfx_power_profile,
-		amdgpu_set_pp_gfx_power_profile);
-static DEVICE_ATTR(pp_compute_power_profile, S_IRUGO | S_IWUSR,
-		amdgpu_get_pp_compute_power_profile,
-		amdgpu_set_pp_compute_power_profile);
+static DEVICE_ATTR(pp_power_profile_mode, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_power_profile_mode,
+		amdgpu_set_pp_power_profile_mode);
+static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_od_clk_voltage,
+		amdgpu_set_pp_od_clk_voltage);
 
 static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 				      struct device_attribute *attr,
@@ -779,17 +774,23 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	struct drm_device *ddev = adev->ddev;
-	int temp;
+	int r, temp, size = sizeof(temp);
 
 	/* Can't get temperature when the card is off */
 	if  ((adev->flags & AMD_IS_PX) &&
 	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
 		return -EINVAL;
 
-	if (!adev->powerplay.pp_funcs->get_temperature)
-		temp = 0;
-	else
-		temp = amdgpu_dpm_get_temperature(adev);
+	/* sanity check PP is enabled */
+	if (!(adev->powerplay.pp_funcs &&
+	      adev->powerplay.pp_funcs->read_sensor))
+		return -EINVAL;
+
+	/* get the temperature */
+	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
+				   (void *)&temp, &size);
+	if (r)
+		return r;
 
 	return snprintf(buf, PAGE_SIZE, "%d\n", temp);
 }
@@ -834,6 +835,11 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
 	int err;
 	int value;
 
+	/* Can't adjust fan when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
 	if (!adev->powerplay.pp_funcs->set_fan_control_mode)
 		return -EINVAL;
 
@@ -868,6 +874,11 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
 	int err;
 	u32 value;
 
+	/* Can't adjust fan when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
 	err = kstrtou32(buf, 10, &value);
 	if (err)
 		return err;
@@ -891,6 +902,11 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
 	int err;
 	u32 speed = 0;
 
+	/* Can't adjust fan when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
 	if (adev->powerplay.pp_funcs->get_fan_speed_percent) {
 		err = amdgpu_dpm_get_fan_speed_percent(adev, &speed);
 		if (err)
@@ -910,6 +926,11 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
 	int err;
 	u32 speed = 0;
 
+	/* Can't adjust fan when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
 	if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
 		err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed);
 		if (err)
@@ -919,6 +940,175 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
 	return sprintf(buf, "%i\n", speed);
 }
 
+static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	struct drm_device *ddev = adev->ddev;
+	u32 vddgfx;
+	int r, size = sizeof(vddgfx);
+
+	/* Can't get voltage when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
+	/* sanity check PP is enabled */
+	if (!(adev->powerplay.pp_funcs &&
+	      adev->powerplay.pp_funcs->read_sensor))
+	      return -EINVAL;
+
+	/* get the voltage */
+	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX,
+				   (void *)&vddgfx, &size);
+	if (r)
+		return r;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", vddgfx);
+}
+
+static ssize_t amdgpu_hwmon_show_vddgfx_label(struct device *dev,
+					      struct device_attribute *attr,
+					      char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "vddgfx\n");
+}
+
+static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	struct drm_device *ddev = adev->ddev;
+	u32 vddnb;
+	int r, size = sizeof(vddnb);
+
+	/* only APUs have vddnb */
+	if  (adev->flags & AMD_IS_APU)
+		return -EINVAL;
+
+	/* Can't get voltage when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
+	/* sanity check PP is enabled */
+	if (!(adev->powerplay.pp_funcs &&
+	      adev->powerplay.pp_funcs->read_sensor))
+	      return -EINVAL;
+
+	/* get the voltage */
+	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB,
+				   (void *)&vddnb, &size);
+	if (r)
+		return r;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", vddnb);
+}
+
+static ssize_t amdgpu_hwmon_show_vddnb_label(struct device *dev,
+					      struct device_attribute *attr,
+					      char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "vddnb\n");
+}
+
+static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	struct drm_device *ddev = adev->ddev;
+	struct pp_gpu_power query = {0};
+	int r, size = sizeof(query);
+	unsigned uw;
+
+	/* Can't get power when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
+	/* sanity check PP is enabled */
+	if (!(adev->powerplay.pp_funcs &&
+	      adev->powerplay.pp_funcs->read_sensor))
+	      return -EINVAL;
+
+	/* get the voltage */
+	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER,
+				   (void *)&query, &size);
+	if (r)
+		return r;
+
+	/* convert to microwatts */
+	uw = (query.average_gpu_power >> 8) * 1000000;
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", uw);
+}
+
+static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	return sprintf(buf, "%i\n", 0);
+}
+
+static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	uint32_t limit = 0;
+
+	if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
+		adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true);
+		return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+	} else {
+		return snprintf(buf, PAGE_SIZE, "\n");
+	}
+}
+
+static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	uint32_t limit = 0;
+
+	if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
+		adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false);
+		return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+	} else {
+		return snprintf(buf, PAGE_SIZE, "\n");
+	}
+}
+
+
+static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	int err;
+	u32 value;
+
+	err = kstrtou32(buf, 10, &value);
+	if (err)
+		return err;
+
+	value = value / 1000000; /* convert to Watt */
+	if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) {
+		err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value);
+		if (err)
+			return err;
+	} else {
+		return -EINVAL;
+	}
+
+	return count;
+}
+
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
@@ -927,6 +1117,14 @@ static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_
 static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
 static SENSOR_DEVICE_ATTR(pwm1_max, S_IRUGO, amdgpu_hwmon_get_pwm1_max, NULL, 0);
 static SENSOR_DEVICE_ATTR(fan1_input, S_IRUGO, amdgpu_hwmon_get_fan1_input, NULL, 0);
+static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, amdgpu_hwmon_show_vddgfx, NULL, 0);
+static SENSOR_DEVICE_ATTR(in0_label, S_IRUGO, amdgpu_hwmon_show_vddgfx_label, NULL, 0);
+static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, amdgpu_hwmon_show_vddnb, NULL, 0);
+static SENSOR_DEVICE_ATTR(in1_label, S_IRUGO, amdgpu_hwmon_show_vddnb_label, NULL, 0);
+static SENSOR_DEVICE_ATTR(power1_average, S_IRUGO, amdgpu_hwmon_show_power_avg, NULL, 0);
+static SENSOR_DEVICE_ATTR(power1_cap_max, S_IRUGO, amdgpu_hwmon_show_power_cap_max, NULL, 0);
+static SENSOR_DEVICE_ATTR(power1_cap_min, S_IRUGO, amdgpu_hwmon_show_power_cap_min, NULL, 0);
+static SENSOR_DEVICE_ATTR(power1_cap, S_IRUGO | S_IWUSR, amdgpu_hwmon_show_power_cap, amdgpu_hwmon_set_power_cap, 0);
 
 static struct attribute *hwmon_attributes[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
@@ -937,6 +1135,14 @@ static struct attribute *hwmon_attributes[] = {
 	&sensor_dev_attr_pwm1_min.dev_attr.attr,
 	&sensor_dev_attr_pwm1_max.dev_attr.attr,
 	&sensor_dev_attr_fan1_input.dev_attr.attr,
+	&sensor_dev_attr_in0_input.dev_attr.attr,
+	&sensor_dev_attr_in0_label.dev_attr.attr,
+	&sensor_dev_attr_in1_input.dev_attr.attr,
+	&sensor_dev_attr_in1_label.dev_attr.attr,
+	&sensor_dev_attr_power1_average.dev_attr.attr,
+	&sensor_dev_attr_power1_cap_max.dev_attr.attr,
+	&sensor_dev_attr_power1_cap_min.dev_attr.attr,
+	&sensor_dev_attr_power1_cap.dev_attr.attr,
 	NULL
 };
 
@@ -947,9 +1153,19 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	umode_t effective_mode = attr->mode;
 
-	/* no skipping for powerplay */
-	if (adev->powerplay.cgs_device)
-		return effective_mode;
+	/* handle non-powerplay limitations */
+	if (!adev->powerplay.pp_handle) {
+		/* Skip fan attributes if fan is not present */
+		if (adev->pm.no_fan &&
+		    (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
+		     attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
+		     attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
+		     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
+			return 0;
+		/* requires powerplay */
+		if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr)
+			return 0;
+	}
 
 	/* Skip limit attributes if DPM is not enabled */
 	if (!adev->pm.dpm_enabled &&
@@ -961,14 +1177,6 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
 		return 0;
 
-	/* Skip fan attributes if fan is not present */
-	if (adev->pm.no_fan &&
-	    (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
-	     attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
-	     attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
-	     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
-		return 0;
-
 	/* mask fan attributes if we have no bindings for this asic to expose */
 	if ((!adev->powerplay.pp_funcs->get_fan_speed_percent &&
 	     attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't query fan */
@@ -982,6 +1190,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	     attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */
 		effective_mode &= ~S_IWUSR;
 
+	if ((adev->flags & AMD_IS_APU) &&
+	    (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr ||
+	     attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr||
+	     attr == &sensor_dev_attr_power1_cap.dev_attr.attr))
+		return 0;
+
 	/* hide max/min values if we can't both query and manage the fan */
 	if ((!adev->powerplay.pp_funcs->set_fan_speed_percent &&
 	     !adev->powerplay.pp_funcs->get_fan_speed_percent) &&
@@ -989,8 +1203,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
 		return 0;
 
-	/* requires powerplay */
-	if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr)
+	/* only APUs have vddnb */
+	if (!(adev->flags & AMD_IS_APU) &&
+	    (attr == &sensor_dev_attr_in1_input.dev_attr.attr ||
+	     attr == &sensor_dev_attr_in1_label.dev_attr.attr))
 		return 0;
 
 	return effective_mode;
@@ -1013,13 +1229,15 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work)
 			     pm.dpm.thermal.work);
 	/* switch to the thermal state */
 	enum amd_pm_state_type dpm_state = POWER_STATE_TYPE_INTERNAL_THERMAL;
+	int temp, size = sizeof(temp);
 
 	if (!adev->pm.dpm_enabled)
 		return;
 
-	if (adev->powerplay.pp_funcs->get_temperature) {
-		int temp = amdgpu_dpm_get_temperature(adev);
-
+	if (adev->powerplay.pp_funcs &&
+	    adev->powerplay.pp_funcs->read_sensor &&
+	    !amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
+				    (void *)&temp, &size)) {
 		if (temp < adev->pm.dpm.thermal.min_temp)
 			/* switch back the user state */
 			dpm_state = adev->pm.dpm.user_state;
@@ -1319,9 +1537,6 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 	if (adev->pm.dpm_enabled == 0)
 		return 0;
 
-	if (adev->powerplay.pp_funcs->get_temperature == NULL)
-		return 0;
-
 	adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev,
 								   DRIVER_NAME, adev,
 								   hwmon_groups);
@@ -1391,20 +1606,19 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 		return ret;
 	}
 	ret = device_create_file(adev->dev,
-			&dev_attr_pp_gfx_power_profile);
+			&dev_attr_pp_power_profile_mode);
 	if (ret) {
 		DRM_ERROR("failed to create device file	"
-				"pp_gfx_power_profile\n");
+				"pp_power_profile_mode\n");
 		return ret;
 	}
 	ret = device_create_file(adev->dev,
-			&dev_attr_pp_compute_power_profile);
+			&dev_attr_pp_od_clk_voltage);
 	if (ret) {
 		DRM_ERROR("failed to create device file	"
-				"pp_compute_power_profile\n");
+				"pp_od_clk_voltage\n");
 		return ret;
 	}
-
 	ret = amdgpu_debugfs_pm_init(adev);
 	if (ret) {
 		DRM_ERROR("Failed to register debugfs file for dpm!\n");
@@ -1437,9 +1651,9 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
 	device_remove_file(adev->dev, &dev_attr_pp_sclk_od);
 	device_remove_file(adev->dev, &dev_attr_pp_mclk_od);
 	device_remove_file(adev->dev,
-			&dev_attr_pp_gfx_power_profile);
+			&dev_attr_pp_power_profile_mode);
 	device_remove_file(adev->dev,
-			&dev_attr_pp_compute_power_profile);
+			&dev_attr_pp_od_clk_voltage);
 }
 
 void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
@@ -1462,7 +1676,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
 	}
 
 	if (adev->powerplay.pp_funcs->dispatch_tasks) {
-		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL, NULL);
+		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL);
 	} else {
 		mutex_lock(&adev->pm.mutex);
 		adev->pm.dpm.new_active_crtcs = 0;
@@ -1512,6 +1726,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
 		seq_printf(m, "\t%u MHz (MCLK)\n", value/100);
 	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&value, &size))
 		seq_printf(m, "\t%u MHz (SCLK)\n", value/100);
+	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK, (void *)&value, &size))
+		seq_printf(m, "\t%u MHz (PSTATE_SCLK)\n", value/100);
+	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK, (void *)&value, &size))
+		seq_printf(m, "\t%u MHz (PSTATE_MCLK)\n", value/100);
 	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&value, &size))
 		seq_printf(m, "\t%u mV (VDDGFX)\n", value);
 	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size))

+ 0 - 290
drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c

@@ -1,290 +0,0 @@
-/*
- * Copyright 2015 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-#include "atom.h"
-#include "amdgpu.h"
-#include "amd_shared.h"
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include "amdgpu_pm.h"
-#include <drm/amdgpu_drm.h>
-#include "amdgpu_powerplay.h"
-#include "si_dpm.h"
-#include "cik_dpm.h"
-#include "vi_dpm.h"
-
-static int amdgpu_pp_early_init(void *handle)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amd_powerplay *amd_pp;
-	int ret = 0;
-
-	amd_pp = &(adev->powerplay);
-	amd_pp->pp_handle = (void *)adev;
-
-	switch (adev->asic_type) {
-	case CHIP_POLARIS11:
-	case CHIP_POLARIS10:
-	case CHIP_POLARIS12:
-	case CHIP_TONGA:
-	case CHIP_FIJI:
-	case CHIP_TOPAZ:
-	case CHIP_CARRIZO:
-	case CHIP_STONEY:
-	case CHIP_VEGA10:
-	case CHIP_RAVEN:
-		amd_pp->cgs_device = amdgpu_cgs_create_device(adev);
-		amd_pp->ip_funcs = &pp_ip_funcs;
-		amd_pp->pp_funcs = &pp_dpm_funcs;
-		break;
-	/* These chips don't have powerplay implemenations */
-#ifdef CONFIG_DRM_AMDGPU_SI
-	case CHIP_TAHITI:
-	case CHIP_PITCAIRN:
-	case CHIP_VERDE:
-	case CHIP_OLAND:
-	case CHIP_HAINAN:
-		amd_pp->ip_funcs = &si_dpm_ip_funcs;
-		amd_pp->pp_funcs = &si_dpm_funcs;
-	break;
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
-	case CHIP_BONAIRE:
-	case CHIP_HAWAII:
-		if (amdgpu_dpm == -1) {
-			amd_pp->ip_funcs = &ci_dpm_ip_funcs;
-			amd_pp->pp_funcs = &ci_dpm_funcs;
-		} else {
-			amd_pp->cgs_device = amdgpu_cgs_create_device(adev);
-			amd_pp->ip_funcs = &pp_ip_funcs;
-			amd_pp->pp_funcs = &pp_dpm_funcs;
-		}
-		break;
-	case CHIP_KABINI:
-	case CHIP_MULLINS:
-	case CHIP_KAVERI:
-		amd_pp->ip_funcs = &kv_dpm_ip_funcs;
-		amd_pp->pp_funcs = &kv_dpm_funcs;
-		break;
-#endif
-	default:
-		ret = -EINVAL;
-		break;
-	}
-
-	if (adev->powerplay.ip_funcs->early_init)
-		ret = adev->powerplay.ip_funcs->early_init(
-					amd_pp->cgs_device ? amd_pp->cgs_device :
-					amd_pp->pp_handle);
-
-	return ret;
-}
-
-
-static int amdgpu_pp_late_init(void *handle)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->late_init)
-		ret = adev->powerplay.ip_funcs->late_init(
-					adev->powerplay.pp_handle);
-
-	return ret;
-}
-
-static int amdgpu_pp_sw_init(void *handle)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->sw_init)
-		ret = adev->powerplay.ip_funcs->sw_init(
-					adev->powerplay.pp_handle);
-
-	return ret;
-}
-
-static int amdgpu_pp_sw_fini(void *handle)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->sw_fini)
-		ret = adev->powerplay.ip_funcs->sw_fini(
-					adev->powerplay.pp_handle);
-	if (ret)
-		return ret;
-
-	return ret;
-}
-
-static int amdgpu_pp_hw_init(void *handle)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
-		amdgpu_ucode_init_bo(adev);
-
-	if (adev->powerplay.ip_funcs->hw_init)
-		ret = adev->powerplay.ip_funcs->hw_init(
-					adev->powerplay.pp_handle);
-
-	return ret;
-}
-
-static int amdgpu_pp_hw_fini(void *handle)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->hw_fini)
-		ret = adev->powerplay.ip_funcs->hw_fini(
-					adev->powerplay.pp_handle);
-
-	if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
-		amdgpu_ucode_fini_bo(adev);
-
-	return ret;
-}
-
-static void amdgpu_pp_late_fini(void *handle)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->late_fini)
-		adev->powerplay.ip_funcs->late_fini(
-			  adev->powerplay.pp_handle);
-
-	if (adev->powerplay.cgs_device)
-		amdgpu_cgs_destroy_device(adev->powerplay.cgs_device);
-}
-
-static int amdgpu_pp_suspend(void *handle)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->suspend)
-		ret = adev->powerplay.ip_funcs->suspend(
-					 adev->powerplay.pp_handle);
-	return ret;
-}
-
-static int amdgpu_pp_resume(void *handle)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->resume)
-		ret = adev->powerplay.ip_funcs->resume(
-					adev->powerplay.pp_handle);
-	return ret;
-}
-
-static int amdgpu_pp_set_clockgating_state(void *handle,
-					enum amd_clockgating_state state)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->set_clockgating_state)
-		ret = adev->powerplay.ip_funcs->set_clockgating_state(
-				adev->powerplay.pp_handle, state);
-	return ret;
-}
-
-static int amdgpu_pp_set_powergating_state(void *handle,
-					enum amd_powergating_state state)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->set_powergating_state)
-		ret = adev->powerplay.ip_funcs->set_powergating_state(
-				 adev->powerplay.pp_handle, state);
-	return ret;
-}
-
-
-static bool amdgpu_pp_is_idle(void *handle)
-{
-	bool ret = true;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->is_idle)
-		ret = adev->powerplay.ip_funcs->is_idle(
-					adev->powerplay.pp_handle);
-	return ret;
-}
-
-static int amdgpu_pp_wait_for_idle(void *handle)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->wait_for_idle)
-		ret = adev->powerplay.ip_funcs->wait_for_idle(
-					adev->powerplay.pp_handle);
-	return ret;
-}
-
-static int amdgpu_pp_soft_reset(void *handle)
-{
-	int ret = 0;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->powerplay.ip_funcs->soft_reset)
-		ret = adev->powerplay.ip_funcs->soft_reset(
-					adev->powerplay.pp_handle);
-	return ret;
-}
-
-static const struct amd_ip_funcs amdgpu_pp_ip_funcs = {
-	.name = "amdgpu_powerplay",
-	.early_init = amdgpu_pp_early_init,
-	.late_init = amdgpu_pp_late_init,
-	.sw_init = amdgpu_pp_sw_init,
-	.sw_fini = amdgpu_pp_sw_fini,
-	.hw_init = amdgpu_pp_hw_init,
-	.hw_fini = amdgpu_pp_hw_fini,
-	.late_fini = amdgpu_pp_late_fini,
-	.suspend = amdgpu_pp_suspend,
-	.resume = amdgpu_pp_resume,
-	.is_idle = amdgpu_pp_is_idle,
-	.wait_for_idle = amdgpu_pp_wait_for_idle,
-	.soft_reset = amdgpu_pp_soft_reset,
-	.set_clockgating_state = amdgpu_pp_set_clockgating_state,
-	.set_powergating_state = amdgpu_pp_set_powergating_state,
-};
-
-const struct amdgpu_ip_block_version amdgpu_pp_ip_block =
-{
-	.type = AMD_IP_BLOCK_TYPE_SMC,
-	.major = 1,
-	.minor = 0,
-	.rev = 0,
-	.funcs = &amdgpu_pp_ip_funcs,
-};

+ 135 - 29
drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c

@@ -26,9 +26,12 @@
 #include <drm/drmP.h>
 
 #include "amdgpu.h"
+#include "amdgpu_display.h"
 #include <drm/amdgpu_drm.h>
 #include <linux/dma-buf.h>
 
+static const struct dma_buf_ops amdgpu_dmabuf_ops;
+
 struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
 {
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -102,59 +105,95 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 	int ret;
 
 	ww_mutex_lock(&resv->lock, NULL);
-	ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, false,
-			       AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, 0, &bo);
-	ww_mutex_unlock(&resv->lock);
+	ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE,
+			       AMDGPU_GEM_DOMAIN_CPU, 0, ttm_bo_type_sg,
+			       resv, &bo);
 	if (ret)
-		return ERR_PTR(ret);
+		goto error;
 
-	bo->prime_shared_count = 1;
+	bo->tbo.sg = sg;
+	bo->tbo.ttm->sg = sg;
+	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+	if (attach->dmabuf->ops != &amdgpu_dmabuf_ops)
+		bo->prime_shared_count = 1;
+
+	ww_mutex_unlock(&resv->lock);
 	return &bo->gem_base;
+
+error:
+	ww_mutex_unlock(&resv->lock);
+	return ERR_PTR(ret);
 }
 
-int amdgpu_gem_prime_pin(struct drm_gem_object *obj)
+static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
+				 struct device *target_dev,
+				 struct dma_buf_attachment *attach)
 {
+	struct drm_gem_object *obj = dma_buf->priv;
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-	long ret = 0;
-
-	ret = amdgpu_bo_reserve(bo, false);
-	if (unlikely(ret != 0))
-		return ret;
-
-	/*
-	 * Wait for all shared fences to complete before we switch to future
-	 * use of exclusive fence on this prime shared bo.
-	 */
-	ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false,
-						  MAX_SCHEDULE_TIMEOUT);
-	if (unlikely(ret < 0)) {
-		DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret);
-		amdgpu_bo_unreserve(bo);
-		return ret;
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	long r;
+
+	r = drm_gem_map_attach(dma_buf, target_dev, attach);
+	if (r)
+		return r;
+
+	r = amdgpu_bo_reserve(bo, false);
+	if (unlikely(r != 0))
+		goto error_detach;
+
+
+	if (attach->dev->driver != adev->dev->driver) {
+		/*
+		 * Wait for all shared fences to complete before we switch to future
+		 * use of exclusive fence on this prime shared bo.
+		 */
+		r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
+							true, false,
+							MAX_SCHEDULE_TIMEOUT);
+		if (unlikely(r < 0)) {
+			DRM_DEBUG_PRIME("Fence wait failed: %li\n", r);
+			goto error_unreserve;
+		}
 	}
 
 	/* pin buffer into GTT */
-	ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
-	if (likely(ret == 0))
+	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
+	if (r)
+		goto error_unreserve;
+
+	if (attach->dev->driver != adev->dev->driver)
 		bo->prime_shared_count++;
 
+error_unreserve:
 	amdgpu_bo_unreserve(bo);
-	return ret;
+
+error_detach:
+	if (r)
+		drm_gem_map_detach(dma_buf, attach);
+	return r;
 }
 
-void amdgpu_gem_prime_unpin(struct drm_gem_object *obj)
+static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
+				  struct dma_buf_attachment *attach)
 {
+	struct drm_gem_object *obj = dma_buf->priv;
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	int ret = 0;
 
 	ret = amdgpu_bo_reserve(bo, true);
 	if (unlikely(ret != 0))
-		return;
+		goto error;
 
 	amdgpu_bo_unpin(bo);
-	if (bo->prime_shared_count)
+	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
 		bo->prime_shared_count--;
 	amdgpu_bo_unreserve(bo);
+
+error:
+	drm_gem_map_detach(dma_buf, attach);
 }
 
 struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
@@ -164,6 +203,50 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
 	return bo->tbo.resv;
 }
 
+static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
+				       enum dma_data_direction direction)
+{
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct ttm_operation_ctx ctx = { true, false };
+	u32 domain = amdgpu_display_framebuffer_domains(adev);
+	int ret;
+	bool reads = (direction == DMA_BIDIRECTIONAL ||
+		      direction == DMA_FROM_DEVICE);
+
+	if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT))
+		return 0;
+
+	/* move to gtt */
+	ret = amdgpu_bo_reserve(bo, false);
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
+		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+		ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	}
+
+	amdgpu_bo_unreserve(bo);
+	return ret;
+}
+
+static const struct dma_buf_ops amdgpu_dmabuf_ops = {
+	.attach = amdgpu_gem_map_attach,
+	.detach = amdgpu_gem_map_detach,
+	.map_dma_buf = drm_gem_map_dma_buf,
+	.unmap_dma_buf = drm_gem_unmap_dma_buf,
+	.release = drm_gem_dmabuf_release,
+	.begin_cpu_access = amdgpu_gem_begin_cpu_access,
+	.map = drm_gem_dmabuf_kmap,
+	.map_atomic = drm_gem_dmabuf_kmap_atomic,
+	.unmap = drm_gem_dmabuf_kunmap,
+	.unmap_atomic = drm_gem_dmabuf_kunmap_atomic,
+	.mmap = drm_gem_dmabuf_mmap,
+	.vmap = drm_gem_dmabuf_vmap,
+	.vunmap = drm_gem_dmabuf_vunmap,
+};
+
 struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 					struct drm_gem_object *gobj,
 					int flags)
@@ -176,7 +259,30 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 		return ERR_PTR(-EPERM);
 
 	buf = drm_gem_prime_export(dev, gobj, flags);
-	if (!IS_ERR(buf))
+	if (!IS_ERR(buf)) {
 		buf->file->f_mapping = dev->anon_inode->i_mapping;
+		buf->ops = &amdgpu_dmabuf_ops;
+	}
+
 	return buf;
 }
+
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+					    struct dma_buf *dma_buf)
+{
+	struct drm_gem_object *obj;
+
+	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
+		obj = dma_buf->priv;
+		if (obj->dev == dev) {
+			/*
+			 * Importing dmabuf exported from out own gem increases
+			 * refcount on gem itself instead of f_count of dmabuf.
+			 */
+			drm_gem_object_get(obj);
+			return obj;
+		}
+	}
+
+	return drm_gem_prime_import(dev, dma_buf);
+}

+ 15 - 35
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c

@@ -51,29 +51,11 @@ static int psp_sw_init(void *handle)
 
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
-		psp->init_microcode = psp_v3_1_init_microcode;
-		psp->bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv;
-		psp->bootloader_load_sos = psp_v3_1_bootloader_load_sos;
-		psp->prep_cmd_buf = psp_v3_1_prep_cmd_buf;
-		psp->ring_init = psp_v3_1_ring_init;
-		psp->ring_create = psp_v3_1_ring_create;
-		psp->ring_stop = psp_v3_1_ring_stop;
-		psp->ring_destroy = psp_v3_1_ring_destroy;
-		psp->cmd_submit = psp_v3_1_cmd_submit;
-		psp->compare_sram_data = psp_v3_1_compare_sram_data;
-		psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk;
-		psp->mode1_reset = psp_v3_1_mode1_reset;
+	case CHIP_VEGA12:
+		psp_v3_1_set_psp_funcs(psp);
 		break;
 	case CHIP_RAVEN:
-		psp->init_microcode = psp_v10_0_init_microcode;
-		psp->prep_cmd_buf = psp_v10_0_prep_cmd_buf;
-		psp->ring_init = psp_v10_0_ring_init;
-		psp->ring_create = psp_v10_0_ring_create;
-		psp->ring_stop = psp_v10_0_ring_stop;
-		psp->ring_destroy = psp_v10_0_ring_destroy;
-		psp->cmd_submit = psp_v10_0_cmd_submit;
-		psp->compare_sram_data = psp_v10_0_compare_sram_data;
-		psp->mode1_reset = psp_v10_0_mode1_reset;
+		psp_v10_0_set_psp_funcs(psp);
 		break;
 	default:
 		return -EINVAL;
@@ -81,6 +63,9 @@ static int psp_sw_init(void *handle)
 
 	psp->adev = adev;
 
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+		return 0;
+
 	ret = psp_init_microcode(psp);
 	if (ret) {
 		DRM_ERROR("Failed to load psp firmware!\n");
@@ -94,6 +79,9 @@ static int psp_sw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+		return 0;
+
 	release_firmware(adev->psp.sos_fw);
 	adev->psp.sos_fw = NULL;
 	release_firmware(adev->psp.asd_fw);
@@ -472,6 +460,9 @@ static int psp_suspend(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct psp_context *psp = &adev->psp;
 
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+		return 0;
+
 	ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
 	if (ret) {
 		DRM_ERROR("PSP ring stop failed\n");
@@ -512,19 +503,8 @@ failed:
 	return ret;
 }
 
-static bool psp_check_reset(void* handle)
+int psp_gpu_reset(struct amdgpu_device *adev)
 {
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	if (adev->flags & AMD_IS_APU)
-		return true;
-
-	return false;
-}
-
-static int psp_reset(void* handle)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	return psp_mode1_reset(&adev->psp);
 }
 
@@ -571,9 +551,9 @@ const struct amd_ip_funcs psp_ip_funcs = {
 	.suspend = psp_suspend,
 	.resume = psp_resume,
 	.is_idle = NULL,
-	.check_soft_reset = psp_check_reset,
+	.check_soft_reset = NULL,
 	.wait_for_idle = NULL,
-	.soft_reset = psp_reset,
+	.soft_reset = NULL,
 	.set_clockgating_state = psp_set_clockgating_state,
 	.set_powergating_state = psp_set_powergating_state,
 };

+ 26 - 17
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h

@@ -33,6 +33,8 @@
 #define PSP_ASD_SHARED_MEM_SIZE	0x4000
 #define PSP_1_MEG		0x100000
 
+struct psp_context;
+
 enum psp_ring_type
 {
 	PSP_RING_TYPE__INVALID = 0,
@@ -53,12 +55,8 @@ struct psp_ring
 	uint32_t			ring_size;
 };
 
-struct psp_context
+struct psp_funcs
 {
-	struct amdgpu_device            *adev;
-	struct psp_ring                 km_ring;
-	struct psp_gfx_cmd_resp		*cmd;
-
 	int (*init_microcode)(struct psp_context *psp);
 	int (*bootloader_load_sysdrv)(struct psp_context *psp);
 	int (*bootloader_load_sos)(struct psp_context *psp);
@@ -77,6 +75,15 @@ struct psp_context
 				  enum AMDGPU_UCODE_ID ucode_type);
 	bool (*smu_reload_quirk)(struct psp_context *psp);
 	int (*mode1_reset)(struct psp_context *psp);
+};
+
+struct psp_context
+{
+	struct amdgpu_device            *adev;
+	struct psp_ring                 km_ring;
+	struct psp_gfx_cmd_resp		*cmd;
+
+	const struct psp_funcs 		*funcs;
 
 	/* fence buffer */
 	struct amdgpu_bo 		*fw_pri_bo;
@@ -123,25 +130,25 @@ struct amdgpu_psp_funcs {
 					enum AMDGPU_UCODE_ID);
 };
 
-#define psp_prep_cmd_buf(ucode, type) (psp)->prep_cmd_buf((ucode), (type))
-#define psp_ring_init(psp, type) (psp)->ring_init((psp), (type))
-#define psp_ring_create(psp, type) (psp)->ring_create((psp), (type))
-#define psp_ring_stop(psp, type) (psp)->ring_stop((psp), (type))
-#define psp_ring_destroy(psp, type) ((psp)->ring_destroy((psp), (type)))
+#define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type))
+#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
+#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
+#define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type))
+#define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type)))
 #define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \
-		(psp)->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index))
+		(psp)->funcs->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index))
 #define psp_compare_sram_data(psp, ucode, type) \
-		(psp)->compare_sram_data((psp), (ucode), (type))
+		(psp)->funcs->compare_sram_data((psp), (ucode), (type))
 #define psp_init_microcode(psp) \
-		((psp)->init_microcode ? (psp)->init_microcode((psp)) : 0)
+		((psp)->funcs->init_microcode ? (psp)->funcs->init_microcode((psp)) : 0)
 #define psp_bootloader_load_sysdrv(psp) \
-		((psp)->bootloader_load_sysdrv ? (psp)->bootloader_load_sysdrv((psp)) : 0)
+		((psp)->funcs->bootloader_load_sysdrv ? (psp)->funcs->bootloader_load_sysdrv((psp)) : 0)
 #define psp_bootloader_load_sos(psp) \
-		((psp)->bootloader_load_sos ? (psp)->bootloader_load_sos((psp)) : 0)
+		((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0)
 #define psp_smu_reload_quirk(psp) \
-		((psp)->smu_reload_quirk ? (psp)->smu_reload_quirk((psp)) : false)
+		((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false)
 #define psp_mode1_reset(psp) \
-		((psp)->mode1_reset ? (psp)->mode1_reset((psp)) : false)
+		((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false)
 
 extern const struct amd_ip_funcs psp_ip_funcs;
 
@@ -151,4 +158,6 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
 
 extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
 
+int psp_gpu_reset(struct amdgpu_device *adev);
+
 #endif

+ 3 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c

@@ -360,6 +360,9 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 
 	amdgpu_debugfs_ring_fini(ring);
 
+	dma_fence_put(ring->vmid_wait);
+	ring->vmid_wait = NULL;
+
 	ring->adev->rings[ring->idx] = NULL;
 }
 

+ 7 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

@@ -26,6 +26,7 @@
 
 #include <drm/amdgpu_drm.h>
 #include <drm/gpu_scheduler.h>
+#include <drm/drm_print.h>
 
 /* max number of rings */
 #define AMDGPU_MAX_RINGS		18
@@ -35,8 +36,9 @@
 #define AMDGPU_MAX_UVD_ENC_RINGS	2
 
 /* some special values for the owner field */
-#define AMDGPU_FENCE_OWNER_UNDEFINED	((void*)0ul)
-#define AMDGPU_FENCE_OWNER_VM		((void*)1ul)
+#define AMDGPU_FENCE_OWNER_UNDEFINED	((void *)0ul)
+#define AMDGPU_FENCE_OWNER_VM		((void *)1ul)
+#define AMDGPU_FENCE_OWNER_KFD		((void *)2ul)
 
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
@@ -128,7 +130,6 @@ struct amdgpu_ring_funcs {
 	void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid,
 			      uint64_t pd_addr);
 	void (*emit_hdp_flush)(struct amdgpu_ring *ring);
-	void (*emit_hdp_invalidate)(struct amdgpu_ring *ring);
 	void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
 				uint32_t gds_base, uint32_t gds_size,
 				uint32_t gws_base, uint32_t gws_size,
@@ -151,6 +152,8 @@ struct amdgpu_ring_funcs {
 	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
 	void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
 	void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
+	void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
+			      uint32_t val, uint32_t mask);
 	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
 	/* priority functions */
 	void (*set_priority) (struct amdgpu_ring *ring,
@@ -195,6 +198,7 @@ struct amdgpu_ring {
 	u64			cond_exe_gpu_addr;
 	volatile u32		*cond_exe_cpu_addr;
 	unsigned		vm_inv_eng;
+	struct dma_fence	*vmid_wait;
 	bool			has_compute_vm_bug;
 
 	atomic_t		num_jobs[DRM_SCHED_PRIORITY_MAX];

+ 11 - 51
drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c

@@ -63,21 +63,27 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
 	for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
 		INIT_LIST_HEAD(&sa_manager->flist[i]);
 
-	r = amdgpu_bo_create(adev, size, align, true, domain,
-			     0, NULL, NULL, 0, &sa_manager->bo);
+	r = amdgpu_bo_create_kernel(adev, size, align, domain, &sa_manager->bo,
+				&sa_manager->gpu_addr, &sa_manager->cpu_ptr);
 	if (r) {
 		dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r);
 		return r;
 	}
 
+	memset(sa_manager->cpu_ptr, 0, sa_manager->size);
 	return r;
 }
 
 void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
-			       struct amdgpu_sa_manager *sa_manager)
+                              struct amdgpu_sa_manager *sa_manager)
 {
 	struct amdgpu_sa_bo *sa_bo, *tmp;
 
+	if (sa_manager->bo == NULL) {
+		dev_err(adev->dev, "no bo for sa manager\n");
+		return;
+	}
+
 	if (!list_empty(&sa_manager->olist)) {
 		sa_manager->hole = &sa_manager->olist,
 		amdgpu_sa_bo_try_free(sa_manager);
@@ -88,55 +94,9 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
 	list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
 		amdgpu_sa_bo_remove_locked(sa_bo);
 	}
-	amdgpu_bo_unref(&sa_manager->bo);
-	sa_manager->size = 0;
-}
-
-int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
-			       struct amdgpu_sa_manager *sa_manager)
-{
-	int r;
-
-	if (sa_manager->bo == NULL) {
-		dev_err(adev->dev, "no bo for sa manager\n");
-		return -EINVAL;
-	}
 
-	/* map the buffer */
-	r = amdgpu_bo_reserve(sa_manager->bo, false);
-	if (r) {
-		dev_err(adev->dev, "(%d) failed to reserve manager bo\n", r);
-		return r;
-	}
-	r = amdgpu_bo_pin(sa_manager->bo, sa_manager->domain, &sa_manager->gpu_addr);
-	if (r) {
-		amdgpu_bo_unreserve(sa_manager->bo);
-		dev_err(adev->dev, "(%d) failed to pin manager bo\n", r);
-		return r;
-	}
-	r = amdgpu_bo_kmap(sa_manager->bo, &sa_manager->cpu_ptr);
-	memset(sa_manager->cpu_ptr, 0, sa_manager->size);
-	amdgpu_bo_unreserve(sa_manager->bo);
-	return r;
-}
-
-int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
-				 struct amdgpu_sa_manager *sa_manager)
-{
-	int r;
-
-	if (sa_manager->bo == NULL) {
-		dev_err(adev->dev, "no bo for sa manager\n");
-		return -EINVAL;
-	}
-
-	r = amdgpu_bo_reserve(sa_manager->bo, true);
-	if (!r) {
-		amdgpu_bo_kunmap(sa_manager->bo);
-		amdgpu_bo_unpin(sa_manager->bo);
-		amdgpu_bo_unreserve(sa_manager->bo);
-	}
-	return r;
+	amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
+	sa_manager->size = 0;
 }
 
 static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)

+ 54 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c

@@ -31,6 +31,7 @@
 #include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
 
 struct amdgpu_sync_entry {
 	struct hlist_node	node;
@@ -85,11 +86,20 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
  */
 static void *amdgpu_sync_get_owner(struct dma_fence *f)
 {
-	struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
+	struct drm_sched_fence *s_fence;
+	struct amdgpu_amdkfd_fence *kfd_fence;
+
+	if (!f)
+		return AMDGPU_FENCE_OWNER_UNDEFINED;
 
+	s_fence = to_drm_sched_fence(f);
 	if (s_fence)
 		return s_fence->owner;
 
+	kfd_fence = to_amdgpu_amdkfd_fence(f);
+	if (kfd_fence)
+		return AMDGPU_FENCE_OWNER_KFD;
+
 	return AMDGPU_FENCE_OWNER_UNDEFINED;
 }
 
@@ -204,11 +214,18 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 	for (i = 0; i < flist->shared_count; ++i) {
 		f = rcu_dereference_protected(flist->shared[i],
 					      reservation_object_held(resv));
+		/* We only want to trigger KFD eviction fences on
+		 * evict or move jobs. Skip KFD fences otherwise.
+		 */
+		fence_owner = amdgpu_sync_get_owner(f);
+		if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
+		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+			continue;
+
 		if (amdgpu_sync_same_dev(adev, f)) {
 			/* VM updates are only interesting
 			 * for other VM updates and moves.
 			 */
-			fence_owner = amdgpu_sync_get_owner(f);
 			if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
 			    (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
 			    ((owner == AMDGPU_FENCE_OWNER_VM) !=
@@ -305,6 +322,41 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit
 	return NULL;
 }
 
+/**
+ * amdgpu_sync_clone - clone a sync object
+ *
+ * @source: sync object to clone
+ * @clone: pointer to destination sync object
+ *
+ * Adds references to all unsignaled fences in @source to @clone. Also
+ * removes signaled fences from @source while at it.
+ */
+int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
+{
+	struct amdgpu_sync_entry *e;
+	struct hlist_node *tmp;
+	struct dma_fence *f;
+	int i, r;
+
+	hash_for_each_safe(source->fences, i, tmp, e, node) {
+		f = e->fence;
+		if (!dma_fence_is_signaled(f)) {
+			r = amdgpu_sync_fence(NULL, clone, f, e->explicit);
+			if (r)
+				return r;
+		} else {
+			hash_del(&e->node);
+			dma_fence_put(f);
+			kmem_cache_free(amdgpu_sync_slab, e);
+		}
+	}
+
+	dma_fence_put(clone->last_vm_update);
+	clone->last_vm_update = dma_fence_get(source->last_vm_update);
+
+	return 0;
+}
+
 int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
 {
 	struct amdgpu_sync_entry *e;

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h

@@ -50,6 +50,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
 				     struct amdgpu_ring *ring);
 struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit);
+int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
 int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
 void amdgpu_sync_free(struct amdgpu_sync *sync);
 int amdgpu_sync_init(void);

+ 11 - 12
drivers/gpu/drm/amd/amdgpu/amdgpu_test.c

@@ -42,7 +42,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 	/* Number of tests =
 	 * (Total GTT - IB pool - writeback page - ring buffers) / test size
 	 */
-	n = adev->mc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024;
+	n = adev->gmc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024;
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 		if (adev->rings[i])
 			n -= adev->rings[i]->ring_size;
@@ -59,9 +59,8 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 		goto out_cleanup;
 	}
 
-	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
-			     AMDGPU_GEM_DOMAIN_VRAM, 0,
-			     NULL, NULL, 0, &vram_obj);
+	r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 0,
+			     ttm_bo_type_kernel, NULL, &vram_obj);
 	if (r) {
 		DRM_ERROR("Failed to create VRAM object\n");
 		goto out_cleanup;
@@ -80,9 +79,9 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 		void **vram_start, **vram_end;
 		struct dma_fence *fence = NULL;
 
-		r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
-				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
-				     NULL, 0, gtt_obj + i);
+		r = amdgpu_bo_create(adev, size, PAGE_SIZE,
+				     AMDGPU_GEM_DOMAIN_GTT, 0,
+				     ttm_bo_type_kernel, NULL, gtt_obj + i);
 		if (r) {
 			DRM_ERROR("Failed to create GTT object %d\n", i);
 			goto out_lclean;
@@ -142,10 +141,10 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 					  "0x%16llx/0x%16llx)\n",
 					  i, *vram_start, gart_start,
 					  (unsigned long long)
-					  (gart_addr - adev->mc.gart_start +
+					  (gart_addr - adev->gmc.gart_start +
 					   (void*)gart_start - gtt_map),
 					  (unsigned long long)
-					  (vram_addr - adev->mc.vram_start +
+					  (vram_addr - adev->gmc.vram_start +
 					   (void*)gart_start - gtt_map));
 				amdgpu_bo_kunmap(vram_obj);
 				goto out_lclean_unpin;
@@ -187,10 +186,10 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 					  "0x%16llx/0x%16llx)\n",
 					  i, *gart_start, vram_start,
 					  (unsigned long long)
-					  (vram_addr - adev->mc.vram_start +
+					  (vram_addr - adev->gmc.vram_start +
 					   (void*)vram_start - vram_map),
 					  (unsigned long long)
-					  (gart_addr - adev->mc.gart_start +
+					  (gart_addr - adev->gmc.gart_start +
 					   (void*)vram_start - vram_map));
 				amdgpu_bo_kunmap(gtt_obj[i]);
 				goto out_lclean_unpin;
@@ -200,7 +199,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 		amdgpu_bo_kunmap(gtt_obj[i]);
 
 		DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n",
-			 gart_addr - adev->mc.gart_start);
+			 gart_addr - adev->gmc.gart_start);
 		continue;
 
 out_lclean_unpin:

+ 30 - 8
drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h

@@ -86,7 +86,7 @@ TRACE_EVENT(amdgpu_iv,
 			     __field(unsigned, vmid_src)
 			     __field(uint64_t, timestamp)
 			     __field(unsigned, timestamp_src)
-			     __field(unsigned, pas_id)
+			     __field(unsigned, pasid)
 			     __array(unsigned, src_data, 4)
 			    ),
 	    TP_fast_assign(
@@ -97,16 +97,16 @@ TRACE_EVENT(amdgpu_iv,
 			   __entry->vmid_src = iv->vmid_src;
 			   __entry->timestamp = iv->timestamp;
 			   __entry->timestamp_src = iv->timestamp_src;
-			   __entry->pas_id = iv->pas_id;
+			   __entry->pasid = iv->pasid;
 			   __entry->src_data[0] = iv->src_data[0];
 			   __entry->src_data[1] = iv->src_data[1];
 			   __entry->src_data[2] = iv->src_data[2];
 			   __entry->src_data[3] = iv->src_data[3];
 			   ),
-	    TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pas_id:%u src_data: %08x %08x %08x %08x\n",
+	    TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x\n",
 		      __entry->client_id, __entry->src_id,
 		      __entry->ring_id, __entry->vmid,
-		      __entry->timestamp, __entry->pas_id,
+		      __entry->timestamp, __entry->pasid,
 		      __entry->src_data[0], __entry->src_data[1],
 		      __entry->src_data[2], __entry->src_data[3])
 );
@@ -217,7 +217,7 @@ TRACE_EVENT(amdgpu_vm_grab_id,
 		     struct amdgpu_job *job),
 	    TP_ARGS(vm, ring, job),
 	    TP_STRUCT__entry(
-			     __field(struct amdgpu_vm *, vm)
+			     __field(u32, pasid)
 			     __field(u32, ring)
 			     __field(u32, vmid)
 			     __field(u32, vm_hub)
@@ -226,15 +226,15 @@ TRACE_EVENT(amdgpu_vm_grab_id,
 			     ),
 
 	    TP_fast_assign(
-			   __entry->vm = vm;
+			   __entry->pasid = vm->pasid;
 			   __entry->ring = ring->idx;
 			   __entry->vmid = job->vmid;
 			   __entry->vm_hub = ring->funcs->vmhub,
 			   __entry->pd_addr = job->vm_pd_addr;
 			   __entry->needs_flush = job->vm_needs_flush;
 			   ),
-	    TP_printk("vm=%p, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
-		      __entry->vm, __entry->ring, __entry->vmid,
+	    TP_printk("pasid=%d, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
+		      __entry->pasid, __entry->ring, __entry->vmid,
 		      __entry->vm_hub, __entry->pd_addr, __entry->needs_flush)
 );
 
@@ -378,6 +378,28 @@ TRACE_EVENT(amdgpu_vm_flush,
 		      __entry->vm_hub,__entry->pd_addr)
 );
 
+DECLARE_EVENT_CLASS(amdgpu_pasid,
+	    TP_PROTO(unsigned pasid),
+	    TP_ARGS(pasid),
+	    TP_STRUCT__entry(
+			     __field(unsigned, pasid)
+			     ),
+	    TP_fast_assign(
+			   __entry->pasid = pasid;
+			   ),
+	    TP_printk("pasid=%u", __entry->pasid)
+);
+
+DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_allocated,
+	    TP_PROTO(unsigned pasid),
+	    TP_ARGS(pasid)
+);
+
+DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_freed,
+	    TP_PROTO(unsigned pasid),
+	    TP_ARGS(pasid)
+);
+
 TRACE_EVENT(amdgpu_bo_list_set,
 	    TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo),
 	    TP_ARGS(list, bo),

+ 209 - 94
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

@@ -46,6 +46,7 @@
 #include "amdgpu.h"
 #include "amdgpu_object.h"
 #include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
 #include "bif/bif_4_1_d.h"
 
 #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
@@ -161,7 +162,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 		break;
 	case TTM_PL_TT:
 		man->func = &amdgpu_gtt_mgr_func;
-		man->gpu_offset = adev->mc.gart_start;
+		man->gpu_offset = adev->gmc.gart_start;
 		man->available_caching = TTM_PL_MASK_CACHING;
 		man->default_caching = TTM_PL_FLAG_CACHED;
 		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA;
@@ -169,7 +170,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 	case TTM_PL_VRAM:
 		/* "On-card" video ram */
 		man->func = &amdgpu_vram_mgr_func;
-		man->gpu_offset = adev->mc.vram_start;
+		man->gpu_offset = adev->gmc.vram_start;
 		man->flags = TTM_MEMTYPE_FLAG_FIXED |
 			     TTM_MEMTYPE_FLAG_MAPPABLE;
 		man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
@@ -203,6 +204,12 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 		.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
 	};
 
+	if (bo->type == ttm_bo_type_sg) {
+		placement->num_placement = 0;
+		placement->num_busy_placement = 0;
+		return;
+	}
+
 	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
 		placement->placement = &placements;
 		placement->busy_placement = &placements;
@@ -213,13 +220,11 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 	abo = ttm_to_amdgpu_bo(bo);
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
-		if (adev->mman.buffer_funcs &&
-		    adev->mman.buffer_funcs_ring &&
-		    adev->mman.buffer_funcs_ring->ready == false) {
+		if (!adev->mman.buffer_funcs_enabled) {
 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
-		} else if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+		} else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
 			   !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
-			unsigned fpfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
+			unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
 			struct drm_mm_node *node = bo->mem.mm_node;
 			unsigned long pages_left;
 
@@ -260,6 +265,13 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 {
 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
 
+	/*
+	 * Don't verify access for KFD BOs. They don't have a GEM
+	 * object associated with them.
+	 */
+	if (abo->kfd_bo)
+		return 0;
+
 	if (amdgpu_ttm_tt_get_usermm(bo->ttm))
 		return -EPERM;
 	return drm_vma_node_verify_access(&abo->gem_base.vma_node,
@@ -331,7 +343,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 	const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
 					AMDGPU_GPU_PAGE_SIZE);
 
-	if (!ring->ready) {
+	if (!adev->mman.buffer_funcs_enabled) {
 		DRM_ERROR("Trying to move memory with ring turned off.\n");
 		return -EINVAL;
 	}
@@ -577,12 +589,9 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 		amdgpu_move_null(bo, new_mem);
 		return 0;
 	}
-	if (adev->mman.buffer_funcs == NULL ||
-	    adev->mman.buffer_funcs_ring == NULL ||
-	    !adev->mman.buffer_funcs_ring->ready) {
-		/* use memcpy */
+
+	if (!adev->mman.buffer_funcs_enabled)
 		goto memcpy;
-	}
 
 	if (old_mem->mem_type == TTM_PL_VRAM &&
 	    new_mem->mem_type == TTM_PL_SYSTEM) {
@@ -621,6 +630,7 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
 {
 	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
+	struct drm_mm_node *mm_node = mem->mm_node;
 
 	mem->bus.addr = NULL;
 	mem->bus.offset = 0;
@@ -638,9 +648,18 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
 	case TTM_PL_VRAM:
 		mem->bus.offset = mem->start << PAGE_SHIFT;
 		/* check if it's visible */
-		if ((mem->bus.offset + mem->bus.size) > adev->mc.visible_vram_size)
+		if ((mem->bus.offset + mem->bus.size) > adev->gmc.visible_vram_size)
 			return -EINVAL;
-		mem->bus.base = adev->mc.aper_base;
+		/* Only physically contiguous buffers apply. In a contiguous
+		 * buffer, size of the first mm_node would match the number of
+		 * pages in ttm_mem_reg.
+		 */
+		if (adev->mman.aper_base_kaddr &&
+		    (mm_node->size == mem->num_pages))
+			mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
+					mem->bus.offset;
+
+		mem->bus.base = adev->gmc.aper_base;
 		mem->bus.is_iomem = true;
 		break;
 	default:
@@ -674,7 +693,6 @@ struct amdgpu_ttm_gup_task_list {
 
 struct amdgpu_ttm_tt {
 	struct ttm_dma_tt	ttm;
-	struct amdgpu_device	*adev;
 	u64			offset;
 	uint64_t		userptr;
 	struct mm_struct	*usermm;
@@ -832,6 +850,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
 				   struct ttm_mem_reg *bo_mem)
 {
+	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
 	struct amdgpu_ttm_tt *gtt = (void*)ttm;
 	uint64_t flags;
 	int r = 0;
@@ -858,9 +877,9 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
 		return 0;
 	}
 
-	flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
+	flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
 	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
-	r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
+	r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
 		ttm->pages, gtt->ttm.dma_address, flags);
 
 	if (r)
@@ -891,7 +910,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
 	placement.num_busy_placement = 1;
 	placement.busy_placement = &placements;
 	placements.fpfn = 0;
-	placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT;
+	placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
 	placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
 		TTM_PL_FLAG_TT;
 
@@ -937,6 +956,7 @@ int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
 
 static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
 {
+	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	int r;
 
@@ -947,7 +967,7 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
 		return 0;
 
 	/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
-	r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
+	r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
 	if (r)
 		DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
 			  gtt->ttm.ttm.num_pages, gtt->offset);
@@ -968,22 +988,20 @@ static struct ttm_backend_func amdgpu_backend_func = {
 	.destroy = &amdgpu_ttm_backend_destroy,
 };
 
-static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev,
-				    unsigned long size, uint32_t page_flags,
-				    struct page *dummy_read_page)
+static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
+					   uint32_t page_flags)
 {
 	struct amdgpu_device *adev;
 	struct amdgpu_ttm_tt *gtt;
 
-	adev = amdgpu_ttm_adev(bdev);
+	adev = amdgpu_ttm_adev(bo->bdev);
 
 	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
 	if (gtt == NULL) {
 		return NULL;
 	}
 	gtt->ttm.ttm.func = &amdgpu_backend_func;
-	gtt->adev = adev;
-	if (ttm_dma_tt_init(&gtt->ttm, bdev, size, page_flags, dummy_read_page)) {
+	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
 		kfree(gtt);
 		return NULL;
 	}
@@ -997,9 +1015,6 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
 
-	if (ttm->state != tt_unpopulated)
-		return 0;
-
 	if (gtt && gtt->userptr) {
 		ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
 		if (!ttm->sg)
@@ -1012,13 +1027,14 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
 
 	if (slave && ttm->sg) {
 		drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
-						 gtt->ttm.dma_address, ttm->num_pages);
+						 gtt->ttm.dma_address,
+						 ttm->num_pages);
 		ttm->state = tt_unbound;
 		return 0;
 	}
 
 #ifdef CONFIG_SWIOTLB
-	if (swiotlb_nr_tbl()) {
+	if (adev->need_swiotlb && swiotlb_nr_tbl()) {
 		return ttm_dma_populate(&gtt->ttm, adev->dev, ctx);
 	}
 #endif
@@ -1045,7 +1061,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
 	adev = amdgpu_ttm_adev(ttm->bdev);
 
 #ifdef CONFIG_SWIOTLB
-	if (swiotlb_nr_tbl()) {
+	if (adev->need_swiotlb && swiotlb_nr_tbl()) {
 		ttm_dma_unpopulate(&gtt->ttm, adev->dev);
 		return;
 	}
@@ -1170,6 +1186,23 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 {
 	unsigned long num_pages = bo->mem.num_pages;
 	struct drm_mm_node *node = bo->mem.mm_node;
+	struct reservation_object_list *flist;
+	struct dma_fence *f;
+	int i;
+
+	/* If bo is a KFD BO, check if the bo belongs to the current process.
+	 * If true, then return false as any KFD process needs all its BOs to
+	 * be resident to run successfully
+	 */
+	flist = reservation_object_get_list(bo->resv);
+	if (flist) {
+		for (i = 0; i < flist->shared_count; ++i) {
+			f = rcu_dereference_protected(flist->shared[i],
+				reservation_object_held(bo->resv));
+			if (amdkfd_fence_check_mm(f, current->mm))
+				return false;
+		}
+	}
 
 	switch (bo->mem.mem_type) {
 	case TTM_PL_TT:
@@ -1212,7 +1245,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 	nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
 	pos = (nodes->start << PAGE_SHIFT) + offset;
 
-	while (len && pos < adev->mc.mc_vram_size) {
+	while (len && pos < adev->gmc.mc_vram_size) {
 		uint64_t aligned_pos = pos & ~(uint64_t)3;
 		uint32_t bytes = 4 - (pos & 3);
 		uint32_t shift = (pos & 3) * 8;
@@ -1298,7 +1331,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
 	struct ttm_operation_ctx ctx = { false, false };
 	int r = 0;
 	int i;
-	u64 vram_size = adev->mc.visible_vram_size;
+	u64 vram_size = adev->gmc.visible_vram_size;
 	u64 offset = adev->fw_vram_usage.start_offset;
 	u64 size = adev->fw_vram_usage.size;
 	struct amdgpu_bo *bo;
@@ -1309,11 +1342,12 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
 	if (adev->fw_vram_usage.size > 0 &&
 		adev->fw_vram_usage.size <= vram_size) {
 
-		r = amdgpu_bo_create(adev, adev->fw_vram_usage.size,
-			PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
-			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-			AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0,
-			&adev->fw_vram_usage.reserved_bo);
+		r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE,
+				     AMDGPU_GEM_DOMAIN_VRAM,
+				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
+				     ttm_bo_type_kernel, NULL,
+				     &adev->fw_vram_usage.reserved_bo);
 		if (r)
 			goto error_create;
 
@@ -1387,8 +1421,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 		return r;
 	}
 	adev->mman.initialized = true;
+
+	/* We opt to avoid OOM on system pages allocations */
+	adev->mman.bdev.no_retry = true;
+
 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
-				adev->mc.real_vram_size >> PAGE_SHIFT);
+				adev->gmc.real_vram_size >> PAGE_SHIFT);
 	if (r) {
 		DRM_ERROR("Failed initializing VRAM heap.\n");
 		return r;
@@ -1397,11 +1435,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	/* Reduce size of CPU-visible VRAM if requested */
 	vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024;
 	if (amdgpu_vis_vram_limit > 0 &&
-	    vis_vram_limit <= adev->mc.visible_vram_size)
-		adev->mc.visible_vram_size = vis_vram_limit;
+	    vis_vram_limit <= adev->gmc.visible_vram_size)
+		adev->gmc.visible_vram_size = vis_vram_limit;
 
 	/* Change the size here instead of the init above so only lpfn is affected */
-	amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
+	amdgpu_ttm_set_buffer_funcs_status(adev, false);
+#ifdef CONFIG_64BIT
+	adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
+						adev->gmc.visible_vram_size);
+#endif
 
 	/*
 	 *The reserved vram for firmware must be pinned to the specified
@@ -1412,21 +1454,21 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 		return r;
 	}
 
-	r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE,
+	r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
 				    AMDGPU_GEM_DOMAIN_VRAM,
 				    &adev->stolen_vga_memory,
 				    NULL, NULL);
 	if (r)
 		return r;
 	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
-		 (unsigned) (adev->mc.real_vram_size / (1024 * 1024)));
+		 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
 
 	if (amdgpu_gtt_size == -1) {
 		struct sysinfo si;
 
 		si_meminfo(&si);
 		gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
-			       adev->mc.mc_vram_size),
+			       adev->gmc.mc_vram_size),
 			       ((uint64_t)si.totalram * si.mem_unit * 3/4));
 	}
 	else
@@ -1494,6 +1536,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
 	amdgpu_ttm_debugfs_fini(adev);
 	amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
 	amdgpu_ttm_fw_reserve_vram_fini(adev);
+	if (adev->mman.aper_base_kaddr)
+		iounmap(adev->mman.aper_base_kaddr);
+	adev->mman.aper_base_kaddr = NULL;
 
 	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
 	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
@@ -1509,18 +1554,30 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
 	DRM_INFO("amdgpu: ttm finalized\n");
 }
 
-/* this should only be called at bootup or when userspace
- * isn't running */
-void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size)
+/**
+ * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: true when we can use buffer functions.
+ *
+ * Enable/disable use of buffer functions during suspend/resume. This should
+ * only be called at bootup or when userspace isn't running.
+ */
+void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
 {
-	struct ttm_mem_type_manager *man;
+	struct ttm_mem_type_manager *man = &adev->mman.bdev.man[TTM_PL_VRAM];
+	uint64_t size;
 
-	if (!adev->mman.initialized)
+	if (!adev->mman.initialized || adev->in_gpu_reset)
 		return;
 
-	man = &adev->mman.bdev.man[TTM_PL_VRAM];
 	/* this just adjusts TTM size idea, which sets lpfn to the correct value */
+	if (enable)
+		size = adev->gmc.real_vram_size;
+	else
+		size = adev->gmc.visible_vram_size;
 	man->size = size >> PAGE_SHIFT;
+	adev->mman.buffer_funcs_enabled = enable;
 }
 
 int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -1559,7 +1616,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
 	BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
 	       AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
 
-	*addr = adev->mc.gart_start;
+	*addr = adev->gmc.gart_start;
 	*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
 		AMDGPU_GPU_PAGE_SIZE;
 
@@ -1619,6 +1676,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 	unsigned i;
 	int r;
 
+	if (direct_submit && !ring->ready) {
+		DRM_ERROR("Trying to move memory with ring turned off.\n");
+		return -EINVAL;
+	}
+
 	max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
 	num_loops = DIV_ROUND_UP(byte_count, max_bytes);
 	num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw;
@@ -1677,13 +1739,12 @@ error_free:
 }
 
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
-		       uint64_t src_data,
+		       uint32_t src_data,
 		       struct reservation_object *resv,
 		       struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	uint32_t max_bytes = 8 *
-			adev->vm_manager.vm_pte_funcs->set_max_nums_pte_pde;
+	uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 
 	struct drm_mm_node *mm_node;
@@ -1693,7 +1754,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 	struct amdgpu_job *job;
 	int r;
 
-	if (!ring->ready) {
+	if (!adev->mman.buffer_funcs_enabled) {
 		DRM_ERROR("Trying to clear memory with ring turned off.\n");
 		return -EINVAL;
 	}
@@ -1714,9 +1775,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 		num_pages -= mm_node->size;
 		++mm_node;
 	}
-
-	/* num of dwords for each SDMA_OP_PTEPDE cmd */
-	num_dw = num_loops * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
+	num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
 
 	/* for IB padding */
 	num_dw += 64;
@@ -1741,16 +1800,12 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 		uint32_t byte_count = mm_node->size << PAGE_SHIFT;
 		uint64_t dst_addr;
 
-		WARN_ONCE(byte_count & 0x7, "size should be a multiple of 8");
-
 		dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem);
 		while (byte_count) {
 			uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
 
-			amdgpu_vm_set_pte_pde(adev, &job->ibs[0],
-					dst_addr, 0,
-					cur_size_in_bytes >> 3, 0,
-					src_data);
+			amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data,
+						dst_addr, cur_size_in_bytes);
 
 			dst_addr += cur_size_in_bytes;
 			byte_count -= cur_size_in_bytes;
@@ -1811,14 +1866,14 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
-	if (*pos >= adev->mc.mc_vram_size)
+	if (*pos >= adev->gmc.mc_vram_size)
 		return -ENXIO;
 
 	while (size) {
 		unsigned long flags;
 		uint32_t value;
 
-		if (*pos >= adev->mc.mc_vram_size)
+		if (*pos >= adev->gmc.mc_vram_size)
 			return result;
 
 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
@@ -1850,14 +1905,14 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
-	if (*pos >= adev->mc.mc_vram_size)
+	if (*pos >= adev->gmc.mc_vram_size)
 		return -ENXIO;
 
 	while (size) {
 		unsigned long flags;
 		uint32_t value;
 
-		if (*pos >= adev->mc.mc_vram_size)
+		if (*pos >= adev->gmc.mc_vram_size)
 			return result;
 
 		r = get_user(value, (uint32_t *)buf);
@@ -1935,38 +1990,98 @@ static const struct file_operations amdgpu_ttm_gtt_fops = {
 
 #endif
 
-static ssize_t amdgpu_iova_to_phys_read(struct file *f, char __user *buf,
-				   size_t size, loff_t *pos)
+static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
+				 size_t size, loff_t *pos)
 {
 	struct amdgpu_device *adev = file_inode(f)->i_private;
-	int r;
-	uint64_t phys;
 	struct iommu_domain *dom;
+	ssize_t result = 0;
+	int r;
 
-	// always return 8 bytes
-	if (size != 8)
-		return -EINVAL;
+	dom = iommu_get_domain_for_dev(adev->dev);
 
-	// only accept page addresses
-	if (*pos & 0xFFF)
-		return -EINVAL;
+	while (size) {
+		phys_addr_t addr = *pos & PAGE_MASK;
+		loff_t off = *pos & ~PAGE_MASK;
+		size_t bytes = PAGE_SIZE - off;
+		unsigned long pfn;
+		struct page *p;
+		void *ptr;
+
+		bytes = bytes < size ? bytes : size;
+
+		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
+
+		pfn = addr >> PAGE_SHIFT;
+		if (!pfn_valid(pfn))
+			return -EPERM;
+
+		p = pfn_to_page(pfn);
+		if (p->mapping != adev->mman.bdev.dev_mapping)
+			return -EPERM;
+
+		ptr = kmap(p);
+		r = copy_to_user(buf, ptr + off, bytes);
+		kunmap(p);
+		if (r)
+			return -EFAULT;
+
+		size -= bytes;
+		*pos += bytes;
+		result += bytes;
+	}
+
+	return result;
+}
+
+static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
+				 size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	struct iommu_domain *dom;
+	ssize_t result = 0;
+	int r;
 
 	dom = iommu_get_domain_for_dev(adev->dev);
-	if (dom)
-		phys = iommu_iova_to_phys(dom, *pos);
-	else
-		phys = *pos;
 
-	r = copy_to_user(buf, &phys, 8);
-	if (r)
-		return -EFAULT;
+	while (size) {
+		phys_addr_t addr = *pos & PAGE_MASK;
+		loff_t off = *pos & ~PAGE_MASK;
+		size_t bytes = PAGE_SIZE - off;
+		unsigned long pfn;
+		struct page *p;
+		void *ptr;
+
+		bytes = bytes < size ? bytes : size;
+
+		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
 
-	return 8;
+		pfn = addr >> PAGE_SHIFT;
+		if (!pfn_valid(pfn))
+			return -EPERM;
+
+		p = pfn_to_page(pfn);
+		if (p->mapping != adev->mman.bdev.dev_mapping)
+			return -EPERM;
+
+		ptr = kmap(p);
+		r = copy_from_user(ptr + off, buf, bytes);
+		kunmap(p);
+		if (r)
+			return -EFAULT;
+
+		size -= bytes;
+		*pos += bytes;
+		result += bytes;
+	}
+
+	return result;
 }
 
-static const struct file_operations amdgpu_ttm_iova_fops = {
+static const struct file_operations amdgpu_ttm_iomem_fops = {
 	.owner = THIS_MODULE,
-	.read = amdgpu_iova_to_phys_read,
+	.read = amdgpu_iomem_read,
+	.write = amdgpu_iomem_write,
 	.llseek = default_llseek
 };
 
@@ -1979,7 +2094,7 @@ static const struct {
 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
 	{ "amdgpu_gtt", &amdgpu_ttm_gtt_fops, TTM_PL_TT },
 #endif
-	{ "amdgpu_iova", &amdgpu_ttm_iova_fops, TTM_PL_SYSTEM },
+	{ "amdgpu_iomem", &amdgpu_ttm_iomem_fops, TTM_PL_SYSTEM },
 };
 
 #endif
@@ -2001,16 +2116,16 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
 		if (IS_ERR(ent))
 			return PTR_ERR(ent);
 		if (ttm_debugfs_entries[count].domain == TTM_PL_VRAM)
-			i_size_write(ent->d_inode, adev->mc.mc_vram_size);
+			i_size_write(ent->d_inode, adev->gmc.mc_vram_size);
 		else if (ttm_debugfs_entries[count].domain == TTM_PL_TT)
-			i_size_write(ent->d_inode, adev->mc.gart_size);
+			i_size_write(ent->d_inode, adev->gmc.gart_size);
 		adev->mman.debugfs_entries[count] = ent;
 	}
 
 	count = ARRAY_SIZE(amdgpu_ttm_debugfs_list);
 
 #ifdef CONFIG_SWIOTLB
-	if (!swiotlb_nr_tbl())
+	if (!(adev->need_swiotlb && swiotlb_nr_tbl()))
 		--count;
 #endif
 

+ 8 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

@@ -44,6 +44,7 @@ struct amdgpu_mman {
 	struct ttm_bo_device		bdev;
 	bool				mem_global_referenced;
 	bool				initialized;
+	void __iomem			*aper_base_kaddr;
 
 #if defined(CONFIG_DEBUG_FS)
 	struct dentry			*debugfs_entries[8];
@@ -52,6 +53,7 @@ struct amdgpu_mman {
 	/* buffer handling */
 	const struct amdgpu_buffer_funcs	*buffer_funcs;
 	struct amdgpu_ring			*buffer_funcs_ring;
+	bool					buffer_funcs_enabled;
 
 	struct mutex				gtt_window_lock;
 	/* Scheduler entity for buffer moves */
@@ -74,6 +76,11 @@ int amdgpu_gtt_mgr_recover(struct ttm_mem_type_manager *man);
 uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
 uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
 
+int amdgpu_ttm_init(struct amdgpu_device *adev);
+void amdgpu_ttm_fini(struct amdgpu_device *adev);
+void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
+					bool enable);
+
 int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 		       uint64_t dst_offset, uint32_t byte_count,
 		       struct reservation_object *resv,
@@ -86,7 +93,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 			       struct reservation_object *resv,
 			       struct dma_fence **f);
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
-			uint64_t src_data,
+			uint32_t src_data,
 			struct reservation_object *resv,
 			struct dma_fence **fence);
 

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c

@@ -271,6 +271,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
 			return AMDGPU_FW_LOAD_SMU;
 	case CHIP_VEGA10:
 	case CHIP_RAVEN:
+	case CHIP_VEGA12:
 		if (!load_type)
 			return AMDGPU_FW_LOAD_DIRECT;
 		else

+ 49 - 75
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c

@@ -68,6 +68,7 @@
 #define FIRMWARE_POLARIS12	"amdgpu/polaris12_uvd.bin"
 
 #define FIRMWARE_VEGA10		"amdgpu/vega10_uvd.bin"
+#define FIRMWARE_VEGA12		"amdgpu/vega12_uvd.bin"
 
 #define mmUVD_GPCOM_VCPU_DATA0_VEGA10 (0x03c4 + 0x7e00)
 #define mmUVD_GPCOM_VCPU_DATA1_VEGA10 (0x03c5 + 0x7e00)
@@ -110,6 +111,7 @@ MODULE_FIRMWARE(FIRMWARE_POLARIS11);
 MODULE_FIRMWARE(FIRMWARE_POLARIS12);
 
 MODULE_FIRMWARE(FIRMWARE_VEGA10);
+MODULE_FIRMWARE(FIRMWARE_VEGA12);
 
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
 
@@ -161,11 +163,14 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 	case CHIP_POLARIS11:
 		fw_name = FIRMWARE_POLARIS11;
 		break;
+	case CHIP_POLARIS12:
+		fw_name = FIRMWARE_POLARIS12;
+		break;
 	case CHIP_VEGA10:
 		fw_name = FIRMWARE_VEGA10;
 		break;
-	case CHIP_POLARIS12:
-		fw_name = FIRMWARE_POLARIS12;
+	case CHIP_VEGA12:
+		fw_name = FIRMWARE_VEGA12;
 		break;
 	default:
 		return -EINVAL;
@@ -955,37 +960,28 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
 static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 			       bool direct, struct dma_fence **fence)
 {
-	struct ttm_operation_ctx ctx = { true, false };
-	struct ttm_validate_buffer tv;
-	struct ww_acquire_ctx ticket;
-	struct list_head head;
+	struct amdgpu_device *adev = ring->adev;
+	struct dma_fence *f = NULL;
 	struct amdgpu_job *job;
 	struct amdgpu_ib *ib;
-	struct dma_fence *f = NULL;
-	struct amdgpu_device *adev = ring->adev;
-	uint64_t addr;
 	uint32_t data[4];
-	int i, r;
-
-	memset(&tv, 0, sizeof(tv));
-	tv.bo = &bo->tbo;
-
-	INIT_LIST_HEAD(&head);
-	list_add(&tv.head, &head);
+	uint64_t addr;
+	long r;
+	int i;
 
-	r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL);
-	if (r)
-		return r;
+	amdgpu_bo_kunmap(bo);
+	amdgpu_bo_unpin(bo);
 
 	if (!ring->adev->uvd.address_64_bit) {
+		struct ttm_operation_ctx ctx = { true, false };
+
 		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
 		amdgpu_uvd_force_into_uvd_segment(bo);
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+		if (r)
+			goto err;
 	}
 
-	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-	if (r)
-		goto err;
-
 	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
 	if (r)
 		goto err;
@@ -1017,6 +1013,14 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	ib->length_dw = 16;
 
 	if (direct) {
+		r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
+							true, false,
+							msecs_to_jiffies(10));
+		if (r == 0)
+			r = -ETIMEDOUT;
+		if (r < 0)
+			goto err_free;
+
 		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
 		job->fence = dma_fence_get(f);
 		if (r)
@@ -1024,17 +1028,23 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 
 		amdgpu_job_free(job);
 	} else {
+		r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
+				     AMDGPU_FENCE_OWNER_UNDEFINED, false);
+		if (r)
+			goto err_free;
+
 		r = amdgpu_job_submit(job, ring, &adev->uvd.entity,
 				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
 		if (r)
 			goto err_free;
 	}
 
-	ttm_eu_fence_buffer_objects(&ticket, &head, f);
+	amdgpu_bo_fence(bo, f, false);
+	amdgpu_bo_unreserve(bo);
+	amdgpu_bo_unref(&bo);
 
 	if (fence)
 		*fence = dma_fence_get(f);
-	amdgpu_bo_unref(&bo);
 	dma_fence_put(f);
 
 	return 0;
@@ -1043,7 +1053,8 @@ err_free:
 	amdgpu_job_free(job);
 
 err:
-	ttm_eu_backoff_reservation(&ticket, &head);
+	amdgpu_bo_unreserve(bo);
+	amdgpu_bo_unref(&bo);
 	return r;
 }
 
@@ -1054,31 +1065,16 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 			      struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_bo *bo;
+	struct amdgpu_bo *bo = NULL;
 	uint32_t *msg;
 	int r, i;
 
-	r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
-			     AMDGPU_GEM_DOMAIN_VRAM,
-			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-			     NULL, NULL, 0, &bo);
+	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM,
+				      &bo, NULL, (void **)&msg);
 	if (r)
 		return r;
 
-	r = amdgpu_bo_reserve(bo, false);
-	if (r) {
-		amdgpu_bo_unref(&bo);
-		return r;
-	}
-
-	r = amdgpu_bo_kmap(bo, (void **)&msg);
-	if (r) {
-		amdgpu_bo_unreserve(bo);
-		amdgpu_bo_unref(&bo);
-		return r;
-	}
-
 	/* stitch together an UVD create msg */
 	msg[0] = cpu_to_le32(0x00000de4);
 	msg[1] = cpu_to_le32(0x00000000);
@@ -1094,9 +1090,6 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 	for (i = 11; i < 1024; ++i)
 		msg[i] = cpu_to_le32(0x0);
 
-	amdgpu_bo_kunmap(bo);
-	amdgpu_bo_unreserve(bo);
-
 	return amdgpu_uvd_send_msg(ring, bo, true, fence);
 }
 
@@ -1104,31 +1097,16 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 			       bool direct, struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_bo *bo;
+	struct amdgpu_bo *bo = NULL;
 	uint32_t *msg;
 	int r, i;
 
-	r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
-			     AMDGPU_GEM_DOMAIN_VRAM,
-			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-			     NULL, NULL, 0, &bo);
+	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM,
+				      &bo, NULL, (void **)&msg);
 	if (r)
 		return r;
 
-	r = amdgpu_bo_reserve(bo, false);
-	if (r) {
-		amdgpu_bo_unref(&bo);
-		return r;
-	}
-
-	r = amdgpu_bo_kmap(bo, (void **)&msg);
-	if (r) {
-		amdgpu_bo_unreserve(bo);
-		amdgpu_bo_unref(&bo);
-		return r;
-	}
-
 	/* stitch together an UVD destroy msg */
 	msg[0] = cpu_to_le32(0x00000de4);
 	msg[1] = cpu_to_le32(0x00000002);
@@ -1137,9 +1115,6 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	for (i = 4; i < 1024; ++i)
 		msg[i] = cpu_to_le32(0x0);
 
-	amdgpu_bo_kunmap(bo);
-	amdgpu_bo_unreserve(bo);
-
 	return amdgpu_uvd_send_msg(ring, bo, direct, fence);
 }
 
@@ -1149,9 +1124,6 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 		container_of(work, struct amdgpu_device, uvd.idle_work.work);
 	unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
 
-	if (amdgpu_sriov_vf(adev))
-		return;
-
 	if (fences == 0) {
 		if (adev->pm.dpm_enabled) {
 			amdgpu_dpm_enable_uvd(adev, false);
@@ -1171,11 +1143,12 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
-	bool set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
+	bool set_clocks;
 
 	if (amdgpu_sriov_vf(adev))
 		return;
 
+	set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
 	if (set_clocks) {
 		if (adev->pm.dpm_enabled) {
 			amdgpu_dpm_enable_uvd(adev, true);
@@ -1191,7 +1164,8 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
 
 void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
 {
-	schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
+	if (!amdgpu_sriov_vf(ring->adev))
+		schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
 }
 
 /**

+ 9 - 6
drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c

@@ -55,6 +55,7 @@
 #define FIRMWARE_POLARIS12         "amdgpu/polaris12_vce.bin"
 
 #define FIRMWARE_VEGA10		"amdgpu/vega10_vce.bin"
+#define FIRMWARE_VEGA12		"amdgpu/vega12_vce.bin"
 
 #ifdef CONFIG_DRM_AMDGPU_CIK
 MODULE_FIRMWARE(FIRMWARE_BONAIRE);
@@ -72,6 +73,7 @@ MODULE_FIRMWARE(FIRMWARE_POLARIS11);
 MODULE_FIRMWARE(FIRMWARE_POLARIS12);
 
 MODULE_FIRMWARE(FIRMWARE_VEGA10);
+MODULE_FIRMWARE(FIRMWARE_VEGA12);
 
 static void amdgpu_vce_idle_work_handler(struct work_struct *work);
 
@@ -127,11 +129,14 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
 	case CHIP_POLARIS11:
 		fw_name = FIRMWARE_POLARIS11;
 		break;
+	case CHIP_POLARIS12:
+		fw_name = FIRMWARE_POLARIS12;
+		break;
 	case CHIP_VEGA10:
 		fw_name = FIRMWARE_VEGA10;
 		break;
-	case CHIP_POLARIS12:
-		fw_name = FIRMWARE_POLARIS12;
+	case CHIP_VEGA12:
+		fw_name = FIRMWARE_VEGA12;
 		break;
 
 	default:
@@ -300,9 +305,6 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work)
 		container_of(work, struct amdgpu_device, vce.idle_work.work);
 	unsigned i, count = 0;
 
-	if (amdgpu_sriov_vf(adev))
-		return;
-
 	for (i = 0; i < adev->vce.num_rings; i++)
 		count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
 
@@ -362,7 +364,8 @@ void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
  */
 void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring)
 {
-	schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
+	if (!amdgpu_sriov_vf(ring->adev))
+		schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
 }
 
 /**

+ 2 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h

@@ -30,6 +30,8 @@
 #define AMDGPU_VCE_HARVEST_VCE0 (1 << 0)
 #define AMDGPU_VCE_HARVEST_VCE1 (1 << 1)
 
+#define AMDGPU_VCE_FW_53_45	((53 << 24) | (45 << 16))
+
 struct amdgpu_vce {
 	struct amdgpu_bo	*vcpu_bo;
 	uint64_t		gpu_addr;

+ 18 - 69
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c

@@ -270,34 +270,17 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
 	return r;
 }
 
-static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
-			       bool direct, struct dma_fence **fence)
+static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
+				   struct amdgpu_bo *bo, bool direct,
+				   struct dma_fence **fence)
 {
-	struct ttm_operation_ctx ctx = { true, false };
-	struct ttm_validate_buffer tv;
-	struct ww_acquire_ctx ticket;
-	struct list_head head;
+	struct amdgpu_device *adev = ring->adev;
+	struct dma_fence *f = NULL;
 	struct amdgpu_job *job;
 	struct amdgpu_ib *ib;
-	struct dma_fence *f = NULL;
-	struct amdgpu_device *adev = ring->adev;
 	uint64_t addr;
 	int i, r;
 
-	memset(&tv, 0, sizeof(tv));
-	tv.bo = &bo->tbo;
-
-	INIT_LIST_HEAD(&head);
-	list_add(&tv.head, &head);
-
-	r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL);
-	if (r)
-		return r;
-
-	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-	if (r)
-		goto err;
-
 	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
 	if (r)
 		goto err;
@@ -330,11 +313,12 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *b
 			goto err_free;
 	}
 
-	ttm_eu_fence_buffer_objects(&ticket, &head, f);
+	amdgpu_bo_fence(bo, f, false);
+	amdgpu_bo_unreserve(bo);
+	amdgpu_bo_unref(&bo);
 
 	if (fence)
 		*fence = dma_fence_get(f);
-	amdgpu_bo_unref(&bo);
 	dma_fence_put(f);
 
 	return 0;
@@ -343,7 +327,8 @@ err_free:
 	amdgpu_job_free(job);
 
 err:
-	ttm_eu_backoff_reservation(&ticket, &head);
+	amdgpu_bo_unreserve(bo);
+	amdgpu_bo_unref(&bo);
 	return r;
 }
 
@@ -351,31 +336,16 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
 			      struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_bo *bo;
+	struct amdgpu_bo *bo = NULL;
 	uint32_t *msg;
 	int r, i;
 
-	r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
-			     AMDGPU_GEM_DOMAIN_VRAM,
-			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-			     NULL, NULL, 0, &bo);
+	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM,
+				      &bo, NULL, (void **)&msg);
 	if (r)
 		return r;
 
-	r = amdgpu_bo_reserve(bo, false);
-	if (r) {
-		amdgpu_bo_unref(&bo);
-		return r;
-	}
-
-	r = amdgpu_bo_kmap(bo, (void **)&msg);
-	if (r) {
-		amdgpu_bo_unreserve(bo);
-		amdgpu_bo_unref(&bo);
-		return r;
-	}
-
 	msg[0] = cpu_to_le32(0x00000028);
 	msg[1] = cpu_to_le32(0x00000038);
 	msg[2] = cpu_to_le32(0x00000001);
@@ -393,9 +363,6 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
 	for (i = 14; i < 1024; ++i)
 		msg[i] = cpu_to_le32(0x0);
 
-	amdgpu_bo_kunmap(bo);
-	amdgpu_bo_unreserve(bo);
-
 	return amdgpu_vcn_dec_send_msg(ring, bo, true, fence);
 }
 
@@ -403,31 +370,16 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
 			       bool direct, struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_bo *bo;
+	struct amdgpu_bo *bo = NULL;
 	uint32_t *msg;
 	int r, i;
 
-	r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
-			     AMDGPU_GEM_DOMAIN_VRAM,
-			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-			     NULL, NULL, 0, &bo);
+	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM,
+				      &bo, NULL, (void **)&msg);
 	if (r)
 		return r;
 
-	r = amdgpu_bo_reserve(bo, false);
-	if (r) {
-		amdgpu_bo_unref(&bo);
-		return r;
-	}
-
-	r = amdgpu_bo_kmap(bo, (void **)&msg);
-	if (r) {
-		amdgpu_bo_unreserve(bo);
-		amdgpu_bo_unref(&bo);
-		return r;
-	}
-
 	msg[0] = cpu_to_le32(0x00000028);
 	msg[1] = cpu_to_le32(0x00000018);
 	msg[2] = cpu_to_le32(0x00000000);
@@ -437,9 +389,6 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
 	for (i = 6; i < 1024; ++i)
 		msg[i] = cpu_to_le32(0x0);
 
-	amdgpu_bo_kunmap(bo);
-	amdgpu_bo_unreserve(bo);
-
 	return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence);
 }
 

+ 78 - 17
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c

@@ -22,7 +22,21 @@
  */
 
 #include "amdgpu.h"
-#define MAX_KIQ_REG_WAIT	100000000 /* in usecs */
+#define MAX_KIQ_REG_WAIT	5000 /* in usecs, 5ms */
+#define MAX_KIQ_REG_BAILOUT_INTERVAL	5 /* in msecs, 5ms */
+#define MAX_KIQ_REG_TRY 20
+
+uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
+{
+	uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
+
+	addr -= AMDGPU_VA_RESERVED_SIZE;
+
+	if (addr >= AMDGPU_VA_HOLE_START)
+		addr |= AMDGPU_VA_HOLE_END;
+
+	return addr;
+}
 
 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
 {
@@ -55,14 +69,14 @@ void amdgpu_free_static_csa(struct amdgpu_device *adev) {
 
 /*
  * amdgpu_map_static_csa should be called during amdgpu_vm_init
- * it maps virtual address "AMDGPU_VA_RESERVED_SIZE - AMDGPU_CSA_SIZE"
- * to this VM, and each command submission of GFX should use this virtual
- * address within META_DATA init package to support SRIOV gfx preemption.
+ * it maps virtual address amdgpu_csa_vaddr() to this VM, and each command
+ * submission of GFX should use this virtual address within META_DATA init
+ * package to support SRIOV gfx preemption.
  */
-
 int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			  struct amdgpu_bo_va **bo_va)
 {
+	uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_VA_HOLE_MASK;
 	struct ww_acquire_ctx ticket;
 	struct list_head list;
 	struct amdgpu_bo_list_entry pd;
@@ -90,7 +104,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		return -ENOMEM;
 	}
 
-	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, AMDGPU_CSA_VADDR,
+	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
 				AMDGPU_CSA_SIZE);
 	if (r) {
 		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
@@ -99,7 +113,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		return r;
 	}
 
-	r = amdgpu_vm_bo_map(adev, *bo_va, AMDGPU_CSA_VADDR, 0, AMDGPU_CSA_SIZE,
+	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, AMDGPU_CSA_SIZE,
 			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
 			     AMDGPU_PTE_EXECUTABLE);
 
@@ -125,9 +139,9 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 
 uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
 {
-	signed long r;
+	signed long r, cnt = 0;
 	unsigned long flags;
-	uint32_t val, seq;
+	uint32_t seq;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 	struct amdgpu_ring *ring = &kiq->ring;
 
@@ -141,18 +155,39 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
-	if (r < 1) {
-		DRM_ERROR("wait for kiq fence error: %ld\n", r);
-		return ~0;
+
+	/* don't wait anymore for gpu reset case because this way may
+	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+	 * never return if we keep waiting in virt_kiq_rreg, which cause
+	 * gpu_recover() hang there.
+	 *
+	 * also don't wait anymore for IRQ context
+	 * */
+	if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
+		goto failed_kiq_read;
+
+	if (in_interrupt())
+		might_sleep();
+
+	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
 	}
-	val = adev->wb.wb[adev->virt.reg_val_offs];
 
-	return val;
+	if (cnt > MAX_KIQ_REG_TRY)
+		goto failed_kiq_read;
+
+	return adev->wb.wb[adev->virt.reg_val_offs];
+
+failed_kiq_read:
+	pr_err("failed to read reg:%x\n", reg);
+	return ~0;
 }
 
 void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 {
-	signed long r;
+	signed long r, cnt = 0;
 	unsigned long flags;
 	uint32_t seq;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
@@ -168,8 +203,34 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
-	if (r < 1)
-		DRM_ERROR("wait for kiq fence error: %ld\n", r);
+
+	/* don't wait anymore for gpu reset case because this way may
+	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+	 * never return if we keep waiting in virt_kiq_rreg, which cause
+	 * gpu_recover() hang there.
+	 *
+	 * also don't wait anymore for IRQ context
+	 * */
+	if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
+		goto failed_kiq_write;
+
+	if (in_interrupt())
+		might_sleep();
+
+	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+
+		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+	}
+
+	if (cnt > MAX_KIQ_REG_TRY)
+		goto failed_kiq_write;
+
+	return;
+
+failed_kiq_write:
+	pr_err("failed to write reg:%x\n", reg);
 }
 
 /**

+ 3 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h

@@ -251,8 +251,7 @@ struct amdgpu_virt {
 	uint32_t gim_feature;
 };
 
-#define AMDGPU_CSA_SIZE    (8 * 1024)
-#define AMDGPU_CSA_VADDR   (AMDGPU_VA_RESERVED_SIZE - AMDGPU_CSA_SIZE)
+#define AMDGPU_CSA_SIZE		(8 * 1024)
 
 #define amdgpu_sriov_enabled(adev) \
 ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
@@ -279,6 +278,8 @@ static inline bool is_virtual_machine(void)
 }
 
 struct amdgpu_vm;
+
+uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev);
 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 int amdgpu_allocate_static_csa(struct amdgpu_device *adev);
 int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,

+ 283 - 116
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

@@ -32,6 +32,7 @@
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
 
 /*
  * GPUVM
@@ -75,7 +76,8 @@ struct amdgpu_pte_update_params {
 	/* indirect buffer to fill with commands */
 	struct amdgpu_ib *ib;
 	/* Function which actually does the update */
-	void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe,
+	void (*func)(struct amdgpu_pte_update_params *params,
+		     struct amdgpu_bo *bo, uint64_t pe,
 		     uint64_t addr, unsigned count, uint32_t incr,
 		     uint64_t flags);
 	/* The next two are used during VM update by CPU
@@ -256,6 +258,104 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
 	return ready;
 }
 
+/**
+ * amdgpu_vm_clear_bo - initially clear the PDs/PTs
+ *
+ * @adev: amdgpu_device pointer
+ * @bo: BO to clear
+ * @level: level this BO is at
+ *
+ * Root PD needs to be reserved when calling this.
+ */
+static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
+			      struct amdgpu_vm *vm, struct amdgpu_bo *bo,
+			      unsigned level, bool pte_support_ats)
+{
+	struct ttm_operation_ctx ctx = { true, false };
+	struct dma_fence *fence = NULL;
+	unsigned entries, ats_entries;
+	struct amdgpu_ring *ring;
+	struct amdgpu_job *job;
+	uint64_t addr;
+	int r;
+
+	addr = amdgpu_bo_gpu_offset(bo);
+	entries = amdgpu_bo_size(bo) / 8;
+
+	if (pte_support_ats) {
+		if (level == adev->vm_manager.root_level) {
+			ats_entries = amdgpu_vm_level_shift(adev, level);
+			ats_entries += AMDGPU_GPU_PAGE_SHIFT;
+			ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
+			ats_entries = min(ats_entries, entries);
+			entries -= ats_entries;
+		} else {
+			ats_entries = entries;
+			entries = 0;
+		}
+	} else {
+		ats_entries = 0;
+	}
+
+	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+
+	r = reservation_object_reserve_shared(bo->tbo.resv);
+	if (r)
+		return r;
+
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (r)
+		goto error;
+
+	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
+	if (r)
+		goto error;
+
+	if (ats_entries) {
+		uint64_t ats_value;
+
+		ats_value = AMDGPU_PTE_DEFAULT_ATC;
+		if (level != AMDGPU_VM_PTB)
+			ats_value |= AMDGPU_PDE_PTE;
+
+		amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
+				      ats_entries, 0, ats_value);
+		addr += ats_entries * 8;
+	}
+
+	if (entries)
+		amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
+				      entries, 0, 0);
+
+	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+
+	WARN_ON(job->ibs[0].length_dw > 64);
+	r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
+			     AMDGPU_FENCE_OWNER_UNDEFINED, false);
+	if (r)
+		goto error_free;
+
+	r = amdgpu_job_submit(job, ring, &vm->entity,
+			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
+	if (r)
+		goto error_free;
+
+	amdgpu_bo_fence(bo, fence, true);
+	dma_fence_put(fence);
+
+	if (bo->shadow)
+		return amdgpu_vm_clear_bo(adev, vm, bo->shadow,
+					  level, pte_support_ats);
+
+	return 0;
+
+error_free:
+	amdgpu_job_free(job);
+
+error:
+	return r;
+}
+
 /**
  * amdgpu_vm_alloc_levels - allocate the PD/PT levels
  *
@@ -270,13 +370,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 				  struct amdgpu_vm *vm,
 				  struct amdgpu_vm_pt *parent,
 				  uint64_t saddr, uint64_t eaddr,
-				  unsigned level)
+				  unsigned level, bool ats)
 {
 	unsigned shift = amdgpu_vm_level_shift(adev, level);
 	unsigned pt_idx, from, to;
-	int r;
 	u64 flags;
-	uint64_t init_value = 0;
+	int r;
 
 	if (!parent->entries) {
 		unsigned num_entries = amdgpu_vm_num_entries(adev, level);
@@ -299,21 +398,13 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 	saddr = saddr & ((1 << shift) - 1);
 	eaddr = eaddr & ((1 << shift) - 1);
 
-	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
-			AMDGPU_GEM_CREATE_VRAM_CLEARED;
+	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
 	if (vm->use_cpu_for_update)
 		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 	else
 		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
 				AMDGPU_GEM_CREATE_SHADOW);
 
-	if (vm->pte_support_ats) {
-		init_value = AMDGPU_PTE_DEFAULT_ATC;
-		if (level != AMDGPU_VM_PTB)
-			init_value |= AMDGPU_PDE_PTE;
-
-	}
-
 	/* walk over the address space and allocate the page tables */
 	for (pt_idx = from; pt_idx <= to; ++pt_idx) {
 		struct reservation_object *resv = vm->root.base.bo->tbo.resv;
@@ -323,16 +414,23 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 		if (!entry->base.bo) {
 			r = amdgpu_bo_create(adev,
 					     amdgpu_vm_bo_size(adev, level),
-					     AMDGPU_GPU_PAGE_SIZE, true,
-					     AMDGPU_GEM_DOMAIN_VRAM,
-					     flags,
-					     NULL, resv, init_value, &pt);
+					     AMDGPU_GPU_PAGE_SIZE,
+					     AMDGPU_GEM_DOMAIN_VRAM, flags,
+					     ttm_bo_type_kernel, resv, &pt);
 			if (r)
 				return r;
 
+			r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats);
+			if (r) {
+				amdgpu_bo_unref(&pt->shadow);
+				amdgpu_bo_unref(&pt);
+				return r;
+			}
+
 			if (vm->use_cpu_for_update) {
 				r = amdgpu_bo_kmap(pt, NULL);
 				if (r) {
+					amdgpu_bo_unref(&pt->shadow);
 					amdgpu_bo_unref(&pt);
 					return r;
 				}
@@ -356,7 +454,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 			uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
 				((1 << shift) - 1);
 			r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
-						   sub_eaddr, level);
+						   sub_eaddr, level, ats);
 			if (r)
 				return r;
 		}
@@ -379,26 +477,29 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 			struct amdgpu_vm *vm,
 			uint64_t saddr, uint64_t size)
 {
-	uint64_t last_pfn;
 	uint64_t eaddr;
+	bool ats = false;
 
 	/* validate the parameters */
 	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
 		return -EINVAL;
 
 	eaddr = saddr + size - 1;
-	last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE;
-	if (last_pfn >= adev->vm_manager.max_pfn) {
-		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
-			last_pfn, adev->vm_manager.max_pfn);
-		return -EINVAL;
-	}
+
+	if (vm->pte_support_ats)
+		ats = saddr < AMDGPU_VA_HOLE_START;
 
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
 
+	if (eaddr >= adev->vm_manager.max_pfn) {
+		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
+			eaddr, adev->vm_manager.max_pfn);
+		return -EINVAL;
+	}
+
 	return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
-				      adev->vm_manager.root_level);
+				      adev->vm_manager.root_level, ats);
 }
 
 /**
@@ -465,7 +566,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 
 static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
 {
-	return (adev->mc.real_vram_size == adev->mc.visible_vram_size);
+	return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size);
 }
 
 /**
@@ -491,14 +592,24 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 		id->oa_base != job->oa_base ||
 		id->oa_size != job->oa_size);
 	bool vm_flush_needed = job->vm_needs_flush;
+	bool pasid_mapping_needed = id->pasid != job->pasid ||
+		!id->pasid_mapping ||
+		!dma_fence_is_signaled(id->pasid_mapping);
+	struct dma_fence *fence = NULL;
 	unsigned patch_offset = 0;
 	int r;
 
 	if (amdgpu_vmid_had_gpu_reset(adev, id)) {
 		gds_switch_needed = true;
 		vm_flush_needed = true;
+		pasid_mapping_needed = true;
 	}
 
+	gds_switch_needed &= !!ring->funcs->emit_gds_switch;
+	vm_flush_needed &= !!ring->funcs->emit_vm_flush;
+	pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
+		ring->funcs->emit_wreg;
+
 	if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
 		return 0;
 
@@ -508,23 +619,36 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 	if (need_pipe_sync)
 		amdgpu_ring_emit_pipeline_sync(ring);
 
-	if (ring->funcs->emit_vm_flush && vm_flush_needed) {
-		struct dma_fence *fence;
-
+	if (vm_flush_needed) {
 		trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
 		amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
+	}
+
+	if (pasid_mapping_needed)
+		amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
 
+	if (vm_flush_needed || pasid_mapping_needed) {
 		r = amdgpu_fence_emit(ring, &fence);
 		if (r)
 			return r;
+	}
 
+	if (vm_flush_needed) {
 		mutex_lock(&id_mgr->lock);
 		dma_fence_put(id->last_flush);
-		id->last_flush = fence;
-		id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
+		id->last_flush = dma_fence_get(fence);
+		id->current_gpu_reset_count =
+			atomic_read(&adev->gpu_reset_counter);
 		mutex_unlock(&id_mgr->lock);
 	}
 
+	if (pasid_mapping_needed) {
+		id->pasid = job->pasid;
+		dma_fence_put(id->pasid_mapping);
+		id->pasid_mapping = dma_fence_get(fence);
+	}
+	dma_fence_put(fence);
+
 	if (ring->funcs->emit_gds_switch && gds_switch_needed) {
 		id->gds_base = job->gds_base;
 		id->gds_size = job->gds_size;
@@ -578,6 +702,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
  * amdgpu_vm_do_set_ptes - helper to call the right asic function
  *
  * @params: see amdgpu_pte_update_params definition
+ * @bo: PD/PT to update
  * @pe: addr of the page entry
  * @addr: dst addr to write into pe
  * @count: number of page entries to update
@@ -588,10 +713,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
  * to setup the page table using the DMA.
  */
 static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
+				  struct amdgpu_bo *bo,
 				  uint64_t pe, uint64_t addr,
 				  unsigned count, uint32_t incr,
 				  uint64_t flags)
 {
+	pe += amdgpu_bo_gpu_offset(bo);
 	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
 
 	if (count < 3) {
@@ -608,6 +735,7 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
  * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
  *
  * @params: see amdgpu_pte_update_params definition
+ * @bo: PD/PT to update
  * @pe: addr of the page entry
  * @addr: dst addr to write into pe
  * @count: number of page entries to update
@@ -617,13 +745,14 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
  * Traces the parameters and calls the DMA function to copy the PTEs.
  */
 static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
+				   struct amdgpu_bo *bo,
 				   uint64_t pe, uint64_t addr,
 				   unsigned count, uint32_t incr,
 				   uint64_t flags)
 {
 	uint64_t src = (params->src + (addr >> 12) * 8);
 
-
+	pe += amdgpu_bo_gpu_offset(bo);
 	trace_amdgpu_vm_copy_ptes(pe, src, count);
 
 	amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count);
@@ -657,6 +786,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
  * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
  *
  * @params: see amdgpu_pte_update_params definition
+ * @bo: PD/PT to update
  * @pe: kmap addr of the page entry
  * @addr: dst addr to write into pe
  * @count: number of page entries to update
@@ -666,6 +796,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
  * Write count number of PT/PD entries directly.
  */
 static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
+				   struct amdgpu_bo *bo,
 				   uint64_t pe, uint64_t addr,
 				   unsigned count, uint32_t incr,
 				   uint64_t flags)
@@ -673,14 +804,16 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
 	unsigned int i;
 	uint64_t value;
 
+	pe += (unsigned long)amdgpu_bo_kptr(bo);
+
 	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
 
 	for (i = 0; i < count; i++) {
 		value = params->pages_addr ?
 			amdgpu_vm_map_gart(params->pages_addr, addr) :
 			addr;
-		amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
-					i, value, flags);
+		amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
+				       i, value, flags);
 		addr += incr;
 	}
 }
@@ -714,8 +847,7 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
 				 struct amdgpu_vm_pt *parent,
 				 struct amdgpu_vm_pt *entry)
 {
-	struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo;
-	uint64_t pd_addr, shadow_addr = 0;
+	struct amdgpu_bo *bo = parent->base.bo, *pbo;
 	uint64_t pde, pt, flags;
 	unsigned level;
 
@@ -723,29 +855,17 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
 	if (entry->huge)
 		return;
 
-	if (vm->use_cpu_for_update) {
-		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
-	} else {
-		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
-		shadow = parent->base.bo->shadow;
-		if (shadow)
-			shadow_addr = amdgpu_bo_gpu_offset(shadow);
-	}
-
-	for (level = 0, pbo = parent->base.bo->parent; pbo; ++level)
+	for (level = 0, pbo = bo->parent; pbo; ++level)
 		pbo = pbo->parent;
 
 	level += params->adev->vm_manager.root_level;
-	pt = amdgpu_bo_gpu_offset(bo);
+	pt = amdgpu_bo_gpu_offset(entry->base.bo);
 	flags = AMDGPU_PTE_VALID;
-	amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags);
-	if (shadow) {
-		pde = shadow_addr + (entry - parent->entries) * 8;
-		params->func(params, pde, pt, 1, 0, flags);
-	}
-
-	pde = pd_addr + (entry - parent->entries) * 8;
-	params->func(params, pde, pt, 1, 0, flags);
+	amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags);
+	pde = (entry - parent->entries) * 8;
+	if (bo->shadow)
+		params->func(params, bo->shadow, pde, pt, 1, 0, flags);
+	params->func(params, bo, pde, pt, 1, 0, flags);
 }
 
 /*
@@ -856,7 +976,7 @@ restart:
 	if (vm->use_cpu_for_update) {
 		/* Flush HDP */
 		mb();
-		amdgpu_gart_flush_gpu_tlb(adev, 0);
+		amdgpu_asic_flush_hdp(adev, NULL);
 	} else if (params.ib->length_dw == 0) {
 		amdgpu_job_free(job);
 	} else {
@@ -870,11 +990,6 @@ restart:
 		amdgpu_ring_pad_ib(ring, params.ib);
 		amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
 				 AMDGPU_FENCE_OWNER_VM, false);
-		if (root->shadow)
-			amdgpu_sync_resv(adev, &job->sync,
-					 root->shadow->tbo.resv,
-					 AMDGPU_FENCE_OWNER_VM, false);
-
 		WARN_ON(params.ib->length_dw > ndw);
 		r = amdgpu_job_submit(job, ring, &vm->entity,
 				      AMDGPU_FENCE_OWNER_VM, &fence);
@@ -946,7 +1061,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 					unsigned nptes, uint64_t dst,
 					uint64_t flags)
 {
-	uint64_t pd_addr, pde;
+	uint64_t pde;
 
 	/* In the case of a mixed PT the PDE must point to it*/
 	if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
@@ -967,21 +1082,12 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 	}
 
 	entry->huge = true;
-	amdgpu_gart_get_vm_pde(p->adev, AMDGPU_VM_PDB0,
-			       &dst, &flags);
+	amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags);
 
-	if (p->func == amdgpu_vm_cpu_set_ptes) {
-		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
-	} else {
-		if (parent->base.bo->shadow) {
-			pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow);
-			pde = pd_addr + (entry - parent->entries) * 8;
-			p->func(p, pde, dst, 1, 0, flags);
-		}
-		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
-	}
-	pde = pd_addr + (entry - parent->entries) * 8;
-	p->func(p, pde, dst, 1, 0, flags);
+	pde = (entry - parent->entries) * 8;
+	if (parent->base.bo->shadow)
+		p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags);
+	p->func(p, parent->base.bo, pde, dst, 1, 0, flags);
 }
 
 /**
@@ -1007,7 +1113,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	uint64_t addr, pe_start;
 	struct amdgpu_bo *pt;
 	unsigned nptes;
-	bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);
 
 	/* walk over the address space and update the page tables */
 	for (addr = start; addr < end; addr += nptes,
@@ -1030,20 +1135,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 			continue;
 
 		pt = entry->base.bo;
-		if (use_cpu_update) {
-			pe_start = (unsigned long)amdgpu_bo_kptr(pt);
-		} else {
-			if (pt->shadow) {
-				pe_start = amdgpu_bo_gpu_offset(pt->shadow);
-				pe_start += (addr & mask) * 8;
-				params->func(params, pe_start, dst, nptes,
-					     AMDGPU_GPU_PAGE_SIZE, flags);
-			}
-			pe_start = amdgpu_bo_gpu_offset(pt);
-		}
-
-		pe_start += (addr & mask) * 8;
-		params->func(params, pe_start, dst, nptes,
+		pe_start = (addr & mask) * 8;
+		if (pt->shadow)
+			params->func(params, pt->shadow, pe_start, dst, nptes,
+				     AMDGPU_GPU_PAGE_SIZE, flags);
+		params->func(params, pt, pe_start, dst, nptes,
 			     AMDGPU_GPU_PAGE_SIZE, flags);
 	}
 
@@ -1204,11 +1300,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 	} else {
 		/* set page commands needed */
-		ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
+		ndw += ncmds * 10;
 
 		/* extra commands for begin/end fragments */
-		ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw
-				* adev->vm_manager.fragment_size;
+		ndw += 2 * 10 * adev->vm_manager.fragment_size;
 
 		params.func = amdgpu_vm_do_set_ptes;
 	}
@@ -1457,7 +1552,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	if (vm->use_cpu_for_update) {
 		/* Flush HDP */
 		mb();
-		amdgpu_gart_flush_gpu_tlb(adev, 0);
+		amdgpu_asic_flush_hdp(adev, NULL);
 	}
 
 	spin_lock(&vm->status_lock);
@@ -1485,7 +1580,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
 
 	spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
 	enable = !!atomic_read(&adev->vm_manager.num_prt_users);
-	adev->gart.gart_funcs->set_prt(adev, enable);
+	adev->gmc.gmc_funcs->set_prt(adev, enable);
 	spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
 }
 
@@ -1494,7 +1589,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
  */
 static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
 {
-	if (!adev->gart.gart_funcs->set_prt)
+	if (!adev->gmc.gmc_funcs->set_prt)
 		return;
 
 	if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
@@ -1529,7 +1624,7 @@ static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
 {
 	struct amdgpu_prt_cb *cb;
 
-	if (!adev->gart.gart_funcs->set_prt)
+	if (!adev->gmc.gmc_funcs->set_prt)
 		return;
 
 	cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
@@ -1623,16 +1718,16 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			  struct dma_fence **fence)
 {
 	struct amdgpu_bo_va_mapping *mapping;
+	uint64_t init_pte_value = 0;
 	struct dma_fence *f = NULL;
 	int r;
-	uint64_t init_pte_value = 0;
 
 	while (!list_empty(&vm->freed)) {
 		mapping = list_first_entry(&vm->freed,
 			struct amdgpu_bo_va_mapping, list);
 		list_del(&mapping->list);
 
-		if (vm->pte_support_ats)
+		if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START)
 			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
 
 		r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
@@ -2262,11 +2357,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 {
 	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
 		AMDGPU_VM_PTE_COUNT(adev) * 8);
-	uint64_t init_pde_value = 0, flags;
 	unsigned ring_instance;
 	struct amdgpu_ring *ring;
 	struct drm_sched_rq *rq;
 	unsigned long size;
+	uint64_t flags;
 	int r, i;
 
 	vm->va = RB_ROOT_CACHED;
@@ -2295,33 +2390,27 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
 						AMDGPU_VM_USE_CPU_FOR_COMPUTE);
 
-		if (adev->asic_type == CHIP_RAVEN) {
+		if (adev->asic_type == CHIP_RAVEN)
 			vm->pte_support_ats = true;
-			init_pde_value = AMDGPU_PTE_DEFAULT_ATC
-					| AMDGPU_PDE_PTE;
-
-		}
-	} else
+	} else {
 		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
 						AMDGPU_VM_USE_CPU_FOR_GFX);
+	}
 	DRM_DEBUG_DRIVER("VM update mode is %s\n",
 			 vm->use_cpu_for_update ? "CPU" : "SDMA");
 	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
 		  "CPU update of VM recommended only for large BAR system\n");
 	vm->last_update = NULL;
 
-	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
-			AMDGPU_GEM_CREATE_VRAM_CLEARED;
+	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
 	if (vm->use_cpu_for_update)
 		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 	else
-		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
-				AMDGPU_GEM_CREATE_SHADOW);
+		flags |= AMDGPU_GEM_CREATE_SHADOW;
 
 	size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
-	r = amdgpu_bo_create(adev, size, align, true, AMDGPU_GEM_DOMAIN_VRAM,
-			     flags, NULL, NULL, init_pde_value,
-			     &vm->root.base.bo);
+	r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags,
+			     ttm_bo_type_kernel, NULL, &vm->root.base.bo);
 	if (r)
 		goto error_free_sched_entity;
 
@@ -2329,6 +2418,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	if (r)
 		goto error_free_root;
 
+	r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
+			       adev->vm_manager.root_level,
+			       vm->pte_support_ats);
+	if (r)
+		goto error_unreserve;
+
 	vm->root.base.vm = vm;
 	list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
 	list_add_tail(&vm->root.base.vm_status, &vm->evicted);
@@ -2352,6 +2447,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 	return 0;
 
+error_unreserve:
+	amdgpu_bo_unreserve(vm->root.base.bo);
+
 error_free_root:
 	amdgpu_bo_unref(&vm->root.base.bo->shadow);
 	amdgpu_bo_unref(&vm->root.base.bo);
@@ -2363,6 +2461,73 @@ error_free_sched_entity:
 	return r;
 }
 
+/**
+ * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
+ *
+ * This only works on GFX VMs that don't have any BOs added and no
+ * page tables allocated yet.
+ *
+ * Changes the following VM parameters:
+ * - use_cpu_for_update
+ * - pte_supports_ats
+ * - pasid (old PASID is released, because compute manages its own PASIDs)
+ *
+ * Reinitializes the page directory to reflect the changed ATS
+ * setting. May leave behind an unused shadow BO for the page
+ * directory when switching from SDMA updates to CPU updates.
+ *
+ * Returns 0 for success, -errno for errors.
+ */
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+	bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
+	int r;
+
+	r = amdgpu_bo_reserve(vm->root.base.bo, true);
+	if (r)
+		return r;
+
+	/* Sanity checks */
+	if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
+		r = -EINVAL;
+		goto error;
+	}
+
+	/* Check if PD needs to be reinitialized and do it before
+	 * changing any other state, in case it fails.
+	 */
+	if (pte_support_ats != vm->pte_support_ats) {
+		r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
+			       adev->vm_manager.root_level,
+			       pte_support_ats);
+		if (r)
+			goto error;
+	}
+
+	/* Update VM state */
+	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
+				    AMDGPU_VM_USE_CPU_FOR_COMPUTE);
+	vm->pte_support_ats = pte_support_ats;
+	DRM_DEBUG_DRIVER("VM update mode is %s\n",
+			 vm->use_cpu_for_update ? "CPU" : "SDMA");
+	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
+		  "CPU update of VM recommended only for large BAR system\n");
+
+	if (vm->pasid) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
+		idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
+		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+
+		vm->pasid = 0;
+	}
+
+error:
+	amdgpu_bo_unreserve(vm->root.base.bo);
+	return r;
+}
+
 /**
  * amdgpu_vm_free_levels - free PD/PT levels
  *
@@ -2405,11 +2570,13 @@ static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
 	struct amdgpu_bo_va_mapping *mapping, *tmp;
-	bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
+	bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
 	struct amdgpu_bo *root;
 	u64 fault;
 	int i, r;
 
+	amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
+
 	/* Clear pending page faults from IH when the VM is destroyed */
 	while (kfifo_get(&vm->faults, &fault))
 		amdgpu_ih_clear_fault(adev, fault);

+ 12 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

@@ -28,6 +28,7 @@
 #include <linux/kfifo.h>
 #include <linux/rbtree.h>
 #include <drm/gpu_scheduler.h>
+#include <drm/drm_file.h>
 
 #include "amdgpu_sync.h"
 #include "amdgpu_ring.h"
@@ -99,7 +100,7 @@ struct amdgpu_bo_list_entry;
 #define AMDGPU_MMHUB				1
 
 /* hardcode that limit for now */
-#define AMDGPU_VA_RESERVED_SIZE			(8ULL << 20)
+#define AMDGPU_VA_RESERVED_SIZE			(1ULL << 20)
 
 /* VA hole for 48bit addresses on Vega10 */
 #define AMDGPU_VA_HOLE_START			0x0000800000000000ULL
@@ -206,6 +207,15 @@ struct amdgpu_vm {
 
 	/* Limit non-retry fault storms */
 	unsigned int		fault_credit;
+
+	/* Points to the KFD process VM info */
+	struct amdkfd_process_info *process_info;
+
+	/* List node in amdkfd_process_info.vm_list_head */
+	struct list_head	vm_list_node;
+
+	/* Valid while the PD is reserved or fenced */
+	uint64_t		pd_phys_addr;
 };
 
 struct amdgpu_vm_manager {
@@ -250,6 +260,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev);
 void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		   int vm_context, unsigned int pasid);
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
 				  unsigned int pasid);

+ 3 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

@@ -89,11 +89,11 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
 	uint64_t start = node->start << PAGE_SHIFT;
 	uint64_t end = (node->size + node->start) << PAGE_SHIFT;
 
-	if (start >= adev->mc.visible_vram_size)
+	if (start >= adev->gmc.visible_vram_size)
 		return 0;
 
-	return (end > adev->mc.visible_vram_size ?
-		adev->mc.visible_vram_size : end) - start;
+	return (end > adev->gmc.visible_vram_size ?
+		adev->gmc.visible_vram_size : end) - start;
 }
 
 /**

+ 32 - 275
drivers/gpu/drm/amd/amdgpu/ci_dpm.c

@@ -65,6 +65,8 @@ MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
 #define VOLTAGE_VID_OFFSET_SCALE1    625
 #define VOLTAGE_VID_OFFSET_SCALE2    100
 
+static const struct amd_pm_funcs ci_dpm_funcs;
+
 static const struct ci_pt_defaults defaults_hawaii_xt =
 {
 	1, 0xF, 0xFD, 0x19, 5, 0x14, 0, 0xB0000,
@@ -905,7 +907,7 @@ static bool ci_dpm_vblank_too_short(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	u32 vblank_time = amdgpu_dpm_get_vblank_time(adev);
-	u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300;
+	u32 switch_limit = adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300;
 
 	/* disable mclk switching if the refresh is >120Hz, even if the
 	 * blanking period would allow it
@@ -2954,7 +2956,7 @@ static int ci_calculate_mclk_params(struct amdgpu_device *adev,
 	mpll_ad_func_cntl &= ~MPLL_AD_FUNC_CNTL__YCLK_POST_DIV_MASK;
 	mpll_ad_func_cntl |= (mpll_param.post_div << MPLL_AD_FUNC_CNTL__YCLK_POST_DIV__SHIFT);
 
-	if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
+	if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
 		mpll_dq_func_cntl &= ~(MPLL_DQ_FUNC_CNTL__YCLK_SEL_MASK |
 				MPLL_AD_FUNC_CNTL__YCLK_POST_DIV_MASK);
 		mpll_dq_func_cntl |= (mpll_param.yclk_sel << MPLL_DQ_FUNC_CNTL__YCLK_SEL__SHIFT) |
@@ -3077,7 +3079,7 @@ static int ci_populate_single_memory_level(struct amdgpu_device *adev,
 	    (memory_clock <= pi->mclk_strobe_mode_threshold))
 		memory_level->StrobeEnable = 1;
 
-	if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
+	if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
 		memory_level->StrobeRatio =
 			ci_get_mclk_frequency_ratio(memory_clock, memory_level->StrobeEnable);
 		if (pi->mclk_edc_enable_threshold &&
@@ -3695,40 +3697,6 @@ static int ci_find_boot_level(struct ci_single_dpm_table *table,
 	return ret;
 }
 
-static void ci_save_default_power_profile(struct amdgpu_device *adev)
-{
-	struct ci_power_info *pi = ci_get_pi(adev);
-	struct SMU7_Discrete_GraphicsLevel *levels =
-				pi->smc_state_table.GraphicsLevel;
-	uint32_t min_level = 0;
-
-	pi->default_gfx_power_profile.activity_threshold =
-			be16_to_cpu(levels[0].ActivityLevel);
-	pi->default_gfx_power_profile.up_hyst = levels[0].UpH;
-	pi->default_gfx_power_profile.down_hyst = levels[0].DownH;
-	pi->default_gfx_power_profile.type = AMD_PP_GFX_PROFILE;
-
-	pi->default_compute_power_profile = pi->default_gfx_power_profile;
-	pi->default_compute_power_profile.type = AMD_PP_COMPUTE_PROFILE;
-
-	/* Optimize compute power profile: Use only highest
-	 * 2 power levels (if more than 2 are available), Hysteresis:
-	 * 0ms up, 5ms down
-	 */
-	if (pi->smc_state_table.GraphicsDpmLevelCount > 2)
-		min_level = pi->smc_state_table.GraphicsDpmLevelCount - 2;
-	else if (pi->smc_state_table.GraphicsDpmLevelCount == 2)
-		min_level = 1;
-	pi->default_compute_power_profile.min_sclk =
-			be32_to_cpu(levels[min_level].SclkFrequency);
-
-	pi->default_compute_power_profile.up_hyst = 0;
-	pi->default_compute_power_profile.down_hyst = 5;
-
-	pi->gfx_power_profile = pi->default_gfx_power_profile;
-	pi->compute_power_profile = pi->default_compute_power_profile;
-}
-
 static int ci_init_smc_table(struct amdgpu_device *adev)
 {
 	struct ci_power_info *pi = ci_get_pi(adev);
@@ -3752,7 +3720,7 @@ static int ci_init_smc_table(struct amdgpu_device *adev)
 	if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_STEPVDDC)
 		table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC;
 
-	if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5)
+	if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5)
 		table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5;
 
 	if (ulv->supported) {
@@ -3874,8 +3842,6 @@ static int ci_init_smc_table(struct amdgpu_device *adev)
 	if (ret)
 		return ret;
 
-	ci_save_default_power_profile(adev);
-
 	return 0;
 }
 
@@ -4549,12 +4515,12 @@ static int ci_set_mc_special_registers(struct amdgpu_device *adev,
 			for (k = 0; k < table->num_entries; k++) {
 				table->mc_reg_table_entry[k].mc_data[j] =
 					(temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff);
-				if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5)
+				if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5)
 					table->mc_reg_table_entry[k].mc_data[j] |= 0x100;
 			}
 			j++;
 
-			if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) {
+			if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) {
 				if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
 					return -EINVAL;
 				table->mc_reg_address[j].s1 = mmMC_PMG_AUTO_CMD;
@@ -6277,6 +6243,8 @@ static int ci_dpm_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	adev->powerplay.pp_funcs = &ci_dpm_funcs;
+	adev->powerplay.pp_handle = adev;
 	ci_dpm_set_irq_funcs(adev);
 
 	return 0;
@@ -6639,9 +6607,10 @@ static int ci_dpm_force_clock_level(void *handle,
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct ci_power_info *pi = ci_get_pi(adev);
 
-	if (adev->pm.dpm.forced_level & (AMD_DPM_FORCED_LEVEL_AUTO |
-				AMD_DPM_FORCED_LEVEL_LOW |
-				AMD_DPM_FORCED_LEVEL_HIGH))
+	if (adev->pm.dpm.forced_level != AMD_DPM_FORCED_LEVEL_MANUAL)
+		return -EINVAL;
+
+	if (mask == 0)
 		return -EINVAL;
 
 	switch (type) {
@@ -6662,15 +6631,15 @@ static int ci_dpm_force_clock_level(void *handle,
 	case PP_PCIE:
 	{
 		uint32_t tmp = mask & pi->dpm_level_enable_mask.pcie_dpm_enable_mask;
-		uint32_t level = 0;
 
-		while (tmp >>= 1)
-			level++;
-
-		if (!pi->pcie_dpm_key_disabled)
-			amdgpu_ci_send_msg_to_smc_with_parameter(adev,
+		if (!pi->pcie_dpm_key_disabled) {
+			if (fls(tmp) != ffs(tmp))
+				amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_PCIeDPM_UnForceLevel);
+			else
+				amdgpu_ci_send_msg_to_smc_with_parameter(adev,
 					PPSMC_MSG_PCIeDPM_ForceLevel,
-					level);
+					fls(tmp) - 1);
+		}
 		break;
 	}
 	default:
@@ -6752,222 +6721,6 @@ static int ci_dpm_set_mclk_od(void *handle, uint32_t value)
 	return 0;
 }
 
-static int ci_dpm_get_power_profile_state(void *handle,
-		struct amd_pp_profile *query)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct ci_power_info *pi = ci_get_pi(adev);
-
-	if (!pi || !query)
-		return -EINVAL;
-
-	if (query->type == AMD_PP_GFX_PROFILE)
-		memcpy(query, &pi->gfx_power_profile,
-				sizeof(struct amd_pp_profile));
-	else if (query->type == AMD_PP_COMPUTE_PROFILE)
-		memcpy(query, &pi->compute_power_profile,
-				sizeof(struct amd_pp_profile));
-	else
-		return -EINVAL;
-
-	return 0;
-}
-
-static int ci_populate_requested_graphic_levels(struct amdgpu_device *adev,
-		struct amd_pp_profile *request)
-{
-	struct ci_power_info *pi = ci_get_pi(adev);
-	struct ci_dpm_table *dpm_table = &(pi->dpm_table);
-	struct SMU7_Discrete_GraphicsLevel *levels =
-			pi->smc_state_table.GraphicsLevel;
-	uint32_t array = pi->dpm_table_start +
-			offsetof(SMU7_Discrete_DpmTable, GraphicsLevel);
-	uint32_t array_size = sizeof(struct SMU7_Discrete_GraphicsLevel) *
-			SMU7_MAX_LEVELS_GRAPHICS;
-	uint32_t i;
-
-	for (i = 0; i < dpm_table->sclk_table.count; i++) {
-		levels[i].ActivityLevel =
-				cpu_to_be16(request->activity_threshold);
-		levels[i].EnabledForActivity = 1;
-		levels[i].UpH = request->up_hyst;
-		levels[i].DownH = request->down_hyst;
-	}
-
-	return amdgpu_ci_copy_bytes_to_smc(adev, array, (uint8_t *)levels,
-				array_size, pi->sram_end);
-}
-
-static void ci_find_min_clock_masks(struct amdgpu_device *adev,
-		uint32_t *sclk_mask, uint32_t *mclk_mask,
-		uint32_t min_sclk, uint32_t min_mclk)
-{
-	struct ci_power_info *pi = ci_get_pi(adev);
-	struct ci_dpm_table *dpm_table = &(pi->dpm_table);
-	uint32_t i;
-
-	for (i = 0; i < dpm_table->sclk_table.count; i++) {
-		if (dpm_table->sclk_table.dpm_levels[i].enabled &&
-			dpm_table->sclk_table.dpm_levels[i].value >= min_sclk)
-			*sclk_mask |= 1 << i;
-	}
-
-	for (i = 0; i < dpm_table->mclk_table.count; i++) {
-		if (dpm_table->mclk_table.dpm_levels[i].enabled &&
-			dpm_table->mclk_table.dpm_levels[i].value >= min_mclk)
-			*mclk_mask |= 1 << i;
-	}
-}
-
-static int ci_set_power_profile_state(struct amdgpu_device *adev,
-		struct amd_pp_profile *request)
-{
-	struct ci_power_info *pi = ci_get_pi(adev);
-	int tmp_result, result = 0;
-	uint32_t sclk_mask = 0, mclk_mask = 0;
-
-	tmp_result = ci_freeze_sclk_mclk_dpm(adev);
-	if (tmp_result) {
-		DRM_ERROR("Failed to freeze SCLK MCLK DPM!");
-		result = tmp_result;
-	}
-
-	tmp_result = ci_populate_requested_graphic_levels(adev,
-			request);
-	if (tmp_result) {
-		DRM_ERROR("Failed to populate requested graphic levels!");
-		result = tmp_result;
-	}
-
-	tmp_result = ci_unfreeze_sclk_mclk_dpm(adev);
-	if (tmp_result) {
-		DRM_ERROR("Failed to unfreeze SCLK MCLK DPM!");
-		result = tmp_result;
-	}
-
-	ci_find_min_clock_masks(adev, &sclk_mask, &mclk_mask,
-			request->min_sclk, request->min_mclk);
-
-	if (sclk_mask) {
-		if (!pi->sclk_dpm_key_disabled)
-			amdgpu_ci_send_msg_to_smc_with_parameter(
-				adev,
-				PPSMC_MSG_SCLKDPM_SetEnabledMask,
-				pi->dpm_level_enable_mask.
-				sclk_dpm_enable_mask &
-				sclk_mask);
-	}
-
-	if (mclk_mask) {
-		if (!pi->mclk_dpm_key_disabled)
-			amdgpu_ci_send_msg_to_smc_with_parameter(
-				adev,
-				PPSMC_MSG_MCLKDPM_SetEnabledMask,
-				pi->dpm_level_enable_mask.
-				mclk_dpm_enable_mask &
-				mclk_mask);
-	}
-
-
-	return result;
-}
-
-static int ci_dpm_set_power_profile_state(void *handle,
-		struct amd_pp_profile *request)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct ci_power_info *pi = ci_get_pi(adev);
-	int ret = -1;
-
-	if (!pi || !request)
-		return -EINVAL;
-
-	if (adev->pm.dpm.forced_level !=
-			AMD_DPM_FORCED_LEVEL_AUTO)
-		return -EINVAL;
-
-	if (request->min_sclk ||
-		request->min_mclk ||
-		request->activity_threshold ||
-		request->up_hyst ||
-		request->down_hyst) {
-		if (request->type == AMD_PP_GFX_PROFILE)
-			memcpy(&pi->gfx_power_profile, request,
-					sizeof(struct amd_pp_profile));
-		else if (request->type == AMD_PP_COMPUTE_PROFILE)
-			memcpy(&pi->compute_power_profile, request,
-					sizeof(struct amd_pp_profile));
-		else
-			return -EINVAL;
-
-		if (request->type == pi->current_power_profile)
-			ret = ci_set_power_profile_state(
-					adev,
-					request);
-	} else {
-		/* set power profile if it exists */
-		switch (request->type) {
-		case AMD_PP_GFX_PROFILE:
-			ret = ci_set_power_profile_state(
-				adev,
-				&pi->gfx_power_profile);
-			break;
-		case AMD_PP_COMPUTE_PROFILE:
-			ret = ci_set_power_profile_state(
-				adev,
-				&pi->compute_power_profile);
-			break;
-		default:
-			return -EINVAL;
-		}
-	}
-
-	if (!ret)
-		pi->current_power_profile = request->type;
-
-	return 0;
-}
-
-static int ci_dpm_reset_power_profile_state(void *handle,
-		struct amd_pp_profile *request)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct ci_power_info *pi = ci_get_pi(adev);
-
-	if (!pi || !request)
-		return -EINVAL;
-
-	if (request->type == AMD_PP_GFX_PROFILE) {
-		pi->gfx_power_profile = pi->default_gfx_power_profile;
-		return ci_dpm_set_power_profile_state(adev,
-					  &pi->gfx_power_profile);
-	} else if (request->type == AMD_PP_COMPUTE_PROFILE) {
-		pi->compute_power_profile =
-			pi->default_compute_power_profile;
-		return ci_dpm_set_power_profile_state(adev,
-					  &pi->compute_power_profile);
-	} else
-		return -EINVAL;
-}
-
-static int ci_dpm_switch_power_profile(void *handle,
-		enum amd_pp_profile_type type)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct ci_power_info *pi = ci_get_pi(adev);
-	struct amd_pp_profile request = {0};
-
-	if (!pi)
-		return -EINVAL;
-
-	if (pi->current_power_profile != type) {
-		request.type = type;
-		return ci_dpm_set_power_profile_state(adev, &request);
-	}
-
-	return 0;
-}
-
 static int ci_dpm_read_sensor(void *handle, int idx,
 			      void *value, int *size)
 {
@@ -7011,7 +6764,7 @@ static int ci_dpm_read_sensor(void *handle, int idx,
 	}
 }
 
-const struct amd_ip_funcs ci_dpm_ip_funcs = {
+static const struct amd_ip_funcs ci_dpm_ip_funcs = {
 	.name = "ci_dpm",
 	.early_init = ci_dpm_early_init,
 	.late_init = ci_dpm_late_init,
@@ -7028,8 +6781,16 @@ const struct amd_ip_funcs ci_dpm_ip_funcs = {
 	.set_powergating_state = ci_dpm_set_powergating_state,
 };
 
-const struct amd_pm_funcs ci_dpm_funcs = {
-	.get_temperature = &ci_dpm_get_temp,
+const struct amdgpu_ip_block_version ci_smu_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_SMC,
+	.major = 7,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &ci_dpm_ip_funcs,
+};
+
+static const struct amd_pm_funcs ci_dpm_funcs = {
 	.pre_set_power_state = &ci_dpm_pre_set_power_state,
 	.set_power_state = &ci_dpm_set_power_state,
 	.post_set_power_state = &ci_dpm_post_set_power_state,
@@ -7053,10 +6814,6 @@ const struct amd_pm_funcs ci_dpm_funcs = {
 	.set_mclk_od = ci_dpm_set_mclk_od,
 	.check_state_equal = ci_check_state_equal,
 	.get_vce_clock_state = amdgpu_get_vce_clock_state,
-	.get_power_profile_state = ci_dpm_get_power_profile_state,
-	.set_power_profile_state = ci_dpm_set_power_profile_state,
-	.reset_power_profile_state = ci_dpm_reset_power_profile_state,
-	.switch_power_profile = ci_dpm_switch_power_profile,
 	.read_sensor = ci_dpm_read_sensor,
 };
 

+ 0 - 7
drivers/gpu/drm/amd/amdgpu/ci_dpm.h

@@ -295,13 +295,6 @@ struct ci_power_info {
 	bool fan_is_controlled_by_smc;
 	u32 t_min;
 	u32 fan_ctrl_default_mode;
-
-	/* power profile */
-	struct amd_pp_profile gfx_power_profile;
-	struct amd_pp_profile compute_power_profile;
-	struct amd_pp_profile default_gfx_power_profile;
-	struct amd_pp_profile default_compute_power_profile;
-	enum amd_pp_profile_type current_power_profile;
 };
 
 #define CISLANDS_VOLTAGE_CONTROL_NONE                   0x0

+ 33 - 9
drivers/gpu/drm/amd/amdgpu/cik.c

@@ -67,7 +67,6 @@
 
 #include "amdgpu_dm.h"
 #include "amdgpu_amdkfd.h"
-#include "amdgpu_powerplay.h"
 #include "dce_virtual.h"
 
 /*
@@ -1715,6 +1714,27 @@ static void cik_detect_hw_virtualization(struct amdgpu_device *adev)
 		adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
 }
 
+static void cik_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+	if (!ring || !ring->funcs->emit_wreg) {
+		WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
+		RREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL);
+	} else {
+		amdgpu_ring_emit_wreg(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
+	}
+}
+
+static void cik_invalidate_hdp(struct amdgpu_device *adev,
+			       struct amdgpu_ring *ring)
+{
+	if (!ring || !ring->funcs->emit_wreg) {
+		WREG32(mmHDP_DEBUG0, 1);
+		RREG32(mmHDP_DEBUG0);
+	} else {
+		amdgpu_ring_emit_wreg(ring, mmHDP_DEBUG0, 1);
+	}
+}
+
 static const struct amdgpu_asic_funcs cik_asic_funcs =
 {
 	.read_disabled_bios = &cik_read_disabled_bios,
@@ -1726,6 +1746,8 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
 	.set_uvd_clocks = &cik_set_uvd_clocks,
 	.set_vce_clocks = &cik_set_vce_clocks,
 	.get_config_memsize = &cik_get_config_memsize,
+	.flush_hdp = &cik_flush_hdp,
+	.invalidate_hdp = &cik_invalidate_hdp,
 };
 
 static int cik_common_early_init(void *handle)
@@ -1864,10 +1886,6 @@ static int cik_common_early_init(void *handle)
 		return -EINVAL;
 	}
 
-	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
-
-	amdgpu_device_get_pcie_info(adev);
-
 	return 0;
 }
 
@@ -1977,7 +1995,10 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
-		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+		if (amdgpu_dpm == -1)
+			amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
+		else
+			amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
 		if (adev->enable_virtual_display)
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
@@ -1995,7 +2016,10 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
-		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+		if (amdgpu_dpm == -1)
+			amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
+		else
+			amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
 		if (adev->enable_virtual_display)
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
@@ -2013,7 +2037,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
-		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+		amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
 		if (adev->enable_virtual_display)
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
@@ -2032,7 +2056,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
-		amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+		amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
 		if (adev->enable_virtual_display)
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)

+ 2 - 0
drivers/gpu/drm/amd/amdgpu/cik.h

@@ -24,6 +24,8 @@
 #ifndef __CIK_H__
 #define __CIK_H__
 
+#define CIK_FLUSH_GPU_TLB_NUM_WREG	3
+
 void cik_srbm_select(struct amdgpu_device *adev,
 		     u32 me, u32 pipe, u32 queue, u32 vmid);
 int cik_set_ip_blocks(struct amdgpu_device *adev);

+ 3 - 4
drivers/gpu/drm/amd/amdgpu/cik_dpm.h

@@ -24,8 +24,7 @@
 #ifndef __CIK_DPM_H__
 #define __CIK_DPM_H__
 
-extern const struct amd_ip_funcs ci_dpm_ip_funcs;
-extern const struct amd_ip_funcs kv_dpm_ip_funcs;
-extern const struct amd_pm_funcs ci_dpm_funcs;
-extern const struct amd_pm_funcs kv_dpm_funcs;
+extern const struct amdgpu_ip_block_version ci_smu_ip_block;
+extern const struct amdgpu_ip_block_version kv_smu_ip_block;
+
 #endif

+ 2 - 2
drivers/gpu/drm/amd/amdgpu/cik_ih.c

@@ -111,7 +111,7 @@ static int cik_ih_irq_init(struct amdgpu_device *adev)
 	cik_ih_disable_interrupts(adev);
 
 	/* setup interrupt control */
-	WREG32(mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8);
+	WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
 	interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
 	/* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
 	 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
@@ -281,7 +281,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev,
 	entry->src_data[0] = dw[1] & 0xfffffff;
 	entry->ring_id = dw[2] & 0xff;
 	entry->vmid = (dw[2] >> 8) & 0xff;
-	entry->pas_id = (dw[2] >> 16) & 0xffff;
+	entry->pasid = (dw[2] >> 16) & 0xffff;
 
 	/* wptr/rptr are in bytes! */
 	adev->irq.ih.rptr += 16;

+ 14 - 27
drivers/gpu/drm/amd/amdgpu/cik_sdma.c

@@ -261,13 +261,6 @@ static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
 }
 
-static void cik_sdma_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
-	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-	amdgpu_ring_write(ring, mmHDP_DEBUG0);
-	amdgpu_ring_write(ring, 1);
-}
-
 /**
  * cik_sdma_ring_emit_fence - emit a fence on the DMA ring
  *
@@ -317,7 +310,7 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
 
 	if ((adev->mman.buffer_funcs_ring == sdma0) ||
 	    (adev->mman.buffer_funcs_ring == sdma1))
-		amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
+			amdgpu_ttm_set_buffer_funcs_status(adev, false);
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
@@ -517,7 +510,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
 		}
 
 		if (adev->mman.buffer_funcs_ring == ring)
-			amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size);
+			amdgpu_ttm_set_buffer_funcs_status(adev, true);
 	}
 
 	return 0;
@@ -885,18 +878,7 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) |
 			  SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */
 
-	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-	if (vmid < 8) {
-		amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
-	} else {
-		amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
-	}
-	amdgpu_ring_write(ring, pd_addr >> 12);
-
-	/* flush TLB */
-	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
-	amdgpu_ring_write(ring, 1 << vmid);
+	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
 
 	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
@@ -906,6 +888,14 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
 }
 
+static void cik_sdma_ring_emit_wreg(struct amdgpu_ring *ring,
+				    uint32_t reg, uint32_t val)
+{
+	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+	amdgpu_ring_write(ring, reg);
+	amdgpu_ring_write(ring, val);
+}
+
 static void cik_enable_sdma_mgcg(struct amdgpu_device *adev,
 				 bool enable)
 {
@@ -1279,9 +1269,9 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
 	.set_wptr = cik_sdma_ring_set_wptr,
 	.emit_frame_size =
 		6 + /* cik_sdma_ring_emit_hdp_flush */
-		3 + /* cik_sdma_ring_emit_hdp_invalidate */
+		3 + /* hdp invalidate */
 		6 + /* cik_sdma_ring_emit_pipeline_sync */
-		12 + /* cik_sdma_ring_emit_vm_flush */
+		CIK_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* cik_sdma_ring_emit_vm_flush */
 		9 + 9 + 9, /* cik_sdma_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size = 7 + 4, /* cik_sdma_ring_emit_ib */
 	.emit_ib = cik_sdma_ring_emit_ib,
@@ -1289,11 +1279,11 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
 	.emit_pipeline_sync = cik_sdma_ring_emit_pipeline_sync,
 	.emit_vm_flush = cik_sdma_ring_emit_vm_flush,
 	.emit_hdp_flush = cik_sdma_ring_emit_hdp_flush,
-	.emit_hdp_invalidate = cik_sdma_ring_emit_hdp_invalidate,
 	.test_ring = cik_sdma_ring_test_ring,
 	.test_ib = cik_sdma_ring_test_ib,
 	.insert_nop = cik_sdma_ring_insert_nop,
 	.pad_ib = cik_sdma_ring_pad_ib,
+	.emit_wreg = cik_sdma_ring_emit_wreg,
 };
 
 static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
@@ -1391,9 +1381,6 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
 	.copy_pte = cik_sdma_vm_copy_pte,
 
 	.write_pte = cik_sdma_vm_write_pte,
-
-	.set_max_nums_pte_pde = 0x1fffff >> 3,
-	.set_pte_pde_num_dw = 10,
 	.set_pte_pde = cik_sdma_vm_set_pte_pde,
 };
 

+ 2 - 2
drivers/gpu/drm/amd/amdgpu/cz_ih.c

@@ -111,7 +111,7 @@ static int cz_ih_irq_init(struct amdgpu_device *adev)
 	cz_ih_disable_interrupts(adev);
 
 	/* setup interrupt control */
-	WREG32(mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8);
+	WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
 	interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
 	/* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
 	 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
@@ -260,7 +260,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev,
 	entry->src_data[0] = dw[1] & 0xfffffff;
 	entry->ring_id = dw[2] & 0xff;
 	entry->vmid = (dw[2] >> 8) & 0xff;
-	entry->pas_id = (dw[2] >> 16) & 0xffff;
+	entry->pasid = (dw[2] >> 16) & 0xffff;
 
 	/* wptr/rptr are in bytes! */
 	adev->irq.ih.rptr += 16;

Some files were not shown because too many files changed in this diff