浏览代码

Merge tag 'drm-next-2018-10-24' of git://anongit.freedesktop.org/drm/drm

Pull drm updates from Dave Airlie:
 "This is going to rebuild more than drm as it adds a new helper to
  list.h for doing bulk updates. Seemed like a reasonable addition to
  me.

  Otherwise the usual merge window stuff lots of i915 and amdgpu, not so
  much nouveau, and piles of everything else.

  Core:
   - Adds a new list.h helper for doing bulk list updates for TTM.
   - Don't leak fb address in smem_start to userspace (comes with EXPORT
     workaround for people using mali out of tree hacks)
   - udmabuf device to turn memfd regions into dma-buf
   - Per-plane blend mode property
   - ref/unref replacements with get/put
   - fbdev conflicting framebuffers code cleaned up
   - host-endian format variants
   - panel orientation quirk for Acer One 10

  bridge:
   - TI SN65DSI86 chip support

  vkms:
   - GEM support.
   - Cursor support

  amdgpu:
   - Merge amdkfd and amdgpu into one module
   - CEC over DP AUX support
   - Picasso APU support + VCN dynamic powergating
   - Raven2 APU support
   - Vega20 enablement + kfd support
   - ACP powergating improvements
   - ABGR/XBGR display support
   - VCN jpeg support
   - xGMI support
   - DC i2c/aux cleanup
   - Ycbcr 4:2:0 support
   - GPUVM improvements
   - Powerplay and powerplay endian fixes
   - Display underflow fixes

  vmwgfx:
   - Move vmwgfx specific TTM code to vmwgfx
   - Split out vmwgfx buffer/resource validation code
   - Atomic operation rework

  bochs:
   - use more helpers
   - format/byteorder improvements

  qxl:
   - use more helpers

  i915:
   - GGTT coherency getparam
   - Turn off resource streamer API
   - More Icelake enablement + DMC firmware
   - Full PPGTT for Ivybridge, Haswell and Valleyview
   - DDB distribution based on resolution
   - Limited range DP display support

  nouveau:
   - CEC over DP AUX support
   - Initial HDMI 2.0 support

  virtio-gpu:
   - vmap support for PRIME objects

  tegra:
   - Initial Tegra194 support
   - DMA/IOMMU integration fixes

  msm:
   - a6xx perf improvements + clock prefix
   - GPU preemption optimisations
   - a6xx devfreq support
   - cursor support

  rockchip:
   - PX30 support
   - rgb output interface support

  mediatek:
   - HDMI output support on mt2701 and mt7623

  rcar-du:
   - Interlaced modes on Gen3
   - LVDS on R8A77980
   - D3 and E3 SoC support

  hisilicon:
   - misc fixes

  mxsfb:
   - runtime pm support

  sun4i:
   - R40 TCON support
   - Allwinner A64 support
   - R40 HDMI support

  omapdrm:
   - Driver rework changing display pipeline ordering to use common code
   - DMM memory barrier and irq fixes
   - Errata workarounds

  exynos:
   - out-bridge support for LVDS bridge driver
   - Samsung 16x16 tiled format support
   - Plane alpha and pixel blend mode support

  tilcdc:
   - suspend/resume update

  mali-dp:
   - misc updates"

* tag 'drm-next-2018-10-24' of git://anongit.freedesktop.org/drm/drm: (1382 commits)
  firmware/dmc/icl: Add missing MODULE_FIRMWARE() for Icelake.
  drm/i915/icl: Fix signal_levels
  drm/i915/icl: Fix DDI/TC port clk_off bits
  drm/i915/icl: create function to identify combophy port
  drm/i915/gen9+: Fix initial readout for Y tiled framebuffers
  drm/i915: Large page offsets for pread/pwrite
  drm/i915/selftests: Disable shrinker across mmap-exhaustion
  drm/i915/dp: Link train Fallback on eDP only if fallback link BW can fit panel's native mode
  drm/i915: Fix intel_dp_mst_best_encoder()
  drm/i915: Skip vcpi allocation for MSTB ports that are gone
  drm/i915: Don't unset intel_connector->mst_port
  drm/i915: Only reset seqno if actually idle
  drm/i915: Use the correct crtc when sanitizing plane mapping
  drm/i915: Restore vblank interrupts earlier
  drm/i915: Check fb stride against plane max stride
  drm/amdgpu/vcn:Fix uninitialized symbol error
  drm: panel-orientation-quirks: Add quirk for Acer One 10 (S1003)
  drm/amd/amdgpu: Fix debugfs error handling
  drm/amdgpu: Update gc_9_0 golden settings.
  drm/amd/powerplay: update PPtable with DC BTC and Tvr SocLimit fields
  ...
Linus Torvalds 6 年之前
父节点
当前提交
53b3b6bbfd
共有 100 个文件被更改,包括 5329 次插入3346 次删除
  1. 23 0
      Documentation/devicetree/bindings/display/atmel/hlcdc-dc.txt
  2. 7 1
      Documentation/devicetree/bindings/display/bridge/lvds-transmitter.txt
  3. 13 1
      Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
  4. 87 0
      Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.txt
  5. 35 0
      Documentation/devicetree/bindings/display/bridge/toshiba,tc358764.txt
  6. 6 19
      Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt
  7. 145 8
      Documentation/devicetree/bindings/display/mipi-dsi-bus.txt
  8. 4 0
      Documentation/devicetree/bindings/display/renesas,du.txt
  9. 3 0
      Documentation/devicetree/bindings/display/rockchip/rockchip-vop.txt
  10. 14 2
      Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt
  11. 1 0
      Documentation/gpu/drivers.rst
  12. 15 3
      Documentation/gpu/drm-kms.rst
  13. 3 3
      Documentation/gpu/drm-mm.rst
  14. 15 56
      Documentation/gpu/todo.rst
  15. 24 0
      Documentation/gpu/vkms.rst
  16. 1 0
      Documentation/ioctl/ioctl-number.txt
  17. 18 2
      MAINTAINERS
  18. 9 0
      drivers/dma-buf/Kconfig
  19. 1 0
      drivers/dma-buf/Makefile
  20. 0 1
      drivers/dma-buf/dma-buf.c
  21. 293 0
      drivers/dma-buf/udmabuf.c
  22. 20 2
      drivers/gpu/drm/Kconfig
  23. 2 1
      drivers/gpu/drm/Makefile
  24. 1 0
      drivers/gpu/drm/amd/amdgpu/Kconfig
  25. 12 6
      drivers/gpu/drm/amd/amdgpu/Makefile
  26. 55 649
      drivers/gpu/drm/amd/amdgpu/amdgpu.h
  27. 67 140
      drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
  28. 13 5
      drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
  29. 49 44
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
  30. 6 4
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
  31. 5 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
  32. 7 6
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
  33. 6 5
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
  34. 31 13
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
  35. 1 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
  36. 10 6
      drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
  37. 26 19
      drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
  38. 0 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
  39. 1 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
  40. 155 164
      drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
  41. 176 86
      drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
  42. 88 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
  43. 2 10
      drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
  44. 287 271
      drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
  45. 15 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
  46. 7 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
  47. 120 49
      drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
  48. 1 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
  49. 1 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
  50. 11 5
      drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
  51. 15 16
      drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
  52. 1 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
  53. 0 7
      drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
  54. 8 14
      drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
  55. 92 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
  56. 73 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
  57. 310 41
      drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
  58. 215 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
  59. 71 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
  60. 13 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
  61. 1 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
  62. 66 173
      drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
  63. 20 34
      drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
  64. 37 8
      drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
  65. 22 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
  66. 9 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
  67. 205 108
      drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
  68. 35 61
      drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
  69. 4 21
      drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
  70. 235 38
      drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
  71. 27 26
      drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
  72. 24 5
      drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
  73. 53 12
      drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
  74. 0 316
      drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
  75. 25 98
      drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
  76. 29 6
      drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
  77. 3 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
  78. 44 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
  79. 97 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
  80. 27 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
  81. 1 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
  82. 133 91
      drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
  83. 1 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
  84. 51 58
      drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
  85. 38 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
  86. 183 13
      drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
  87. 15 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
  88. 2 7
      drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
  89. 970 506
      drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
  90. 59 25
      drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
  91. 40 12
      drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
  92. 119 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
  93. 1 0
      drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
  94. 2 2
      drivers/gpu/drm/amd/amdgpu/ci_dpm.c
  95. 9 8
      drivers/gpu/drm/amd/amdgpu/cik.c
  96. 4 5
      drivers/gpu/drm/amd/amdgpu/cik_ih.c
  97. 12 14
      drivers/gpu/drm/amd/amdgpu/cik_sdma.c
  98. 4 5
      drivers/gpu/drm/amd/amdgpu/cz_ih.c
  99. 16 5
      drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
  100. 16 5
      drivers/gpu/drm/amd/amdgpu/dce_v11_0.c

+ 23 - 0
Documentation/devicetree/bindings/display/atmel/hlcdc-dc.txt

@@ -15,6 +15,13 @@ Required children nodes:
  to external devices using the OF graph reprensentation (see ../graph.txt).
  At least one port node is required.
 
+Optional properties in grandchild nodes:
+ Any endpoint grandchild node may specify a desired video interface
+ according to ../../media/video-interfaces.txt, specifically
+ - bus-width: recognized values are <12>, <16>, <18> and <24>, and
+   override any output mode selection heuristic, forcing "rgb444",
+   "rgb565", "rgb666" and "rgb888" respectively.
+
 Example:
 
 	hlcdc: hlcdc@f0030000 {
@@ -50,3 +57,19 @@ Example:
 			#pwm-cells = <3>;
 		};
 	};
+
+Example 2: With a video interface override to force rgb565; as above
+but with these changes/additions:
+
+	&hlcdc {
+		hlcdc-display-controller {
+			pinctrl-names = "default";
+			pinctrl-0 = <&pinctrl_lcd_base &pinctrl_lcd_rgb565>;
+
+			port@0 {
+				hlcdc_panel_output: endpoint@0 {
+					bus-width = <16>;
+				};
+			};
+		};
+	};

+ 7 - 1
Documentation/devicetree/bindings/display/bridge/lvds-transmitter.txt

@@ -22,7 +22,13 @@ among others.
 
 Required properties:
 
-- compatible: Must be "lvds-encoder"
+- compatible: Must be one or more of the following
+  - "ti,ds90c185" for the TI DS90C185 FPD-Link Serializer
+  - "lvds-encoder" for a generic LVDS encoder device
+
+  When compatible with the generic version, nodes must list the
+  device-specific version corresponding to the device first
+  followed by the generic version.
 
 Required nodes:
 

+ 13 - 1
Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt

@@ -14,10 +14,22 @@ Required properties:
   - "renesas,r8a7795-lvds" for R8A7795 (R-Car H3) compatible LVDS encoders
   - "renesas,r8a7796-lvds" for R8A7796 (R-Car M3-W) compatible LVDS encoders
   - "renesas,r8a77970-lvds" for R8A77970 (R-Car V3M) compatible LVDS encoders
+  - "renesas,r8a77980-lvds" for R8A77980 (R-Car V3H) compatible LVDS encoders
+  - "renesas,r8a77990-lvds" for R8A77990 (R-Car E3) compatible LVDS encoders
   - "renesas,r8a77995-lvds" for R8A77995 (R-Car D3) compatible LVDS encoders
 
 - reg: Base address and length for the memory-mapped registers
-- clocks: A phandle + clock-specifier pair for the functional clock
+- clocks: A list of phandles + clock-specifier pairs, one for each entry in
+  the clock-names property.
+- clock-names: Name of the clocks. This property is model-dependent.
+  - The functional clock, which mandatory for all models, shall be listed
+    first, and shall be named "fck".
+  - On R8A77990 and R8A77995, the LVDS encoder can use the EXTAL or
+    DU_DOTCLKINx clocks. Those clocks are optional. When supplied they must be
+    named "extal" and "dclkin.x" respectively, with "x" being the DU_DOTCLKIN
+    numerical index.
+  - When the clocks property only contains the functional clock, the
+    clock-names property may be omitted.
 - resets: A phandle + reset specifier for the module reset
 
 Required nodes:

+ 87 - 0
Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.txt

@@ -0,0 +1,87 @@
+SN65DSI86 DSI to eDP bridge chip
+--------------------------------
+
+This is the binding for Texas Instruments SN65DSI86 bridge.
+http://www.ti.com/general/docs/lit/getliterature.tsp?genericPartNumber=sn65dsi86&fileType=pdf
+
+Required properties:
+- compatible: Must be "ti,sn65dsi86"
+- reg: i2c address of the chip, 0x2d as per datasheet
+- enable-gpios: gpio specification for bridge_en pin (active high)
+
+- vccio-supply: A 1.8V supply that powers up the digital IOs.
+- vpll-supply: A 1.8V supply that powers up the displayport PLL.
+- vcca-supply: A 1.2V supply that powers up the analog circuits.
+- vcc-supply: A 1.2V supply that powers up the digital core.
+
+Optional properties:
+- interrupts-extended: Specifier for the SN65DSI86 interrupt line.
+
+- gpio-controller: Marks the device has a GPIO controller.
+- #gpio-cells    : Should be two. The first cell is the pin number and
+                   the second cell is used to specify flags.
+                   See ../../gpio/gpio.txt for more information.
+- #pwm-cells : Should be one. See ../../pwm/pwm.txt for description of
+               the cell formats.
+
+- clock-names: should be "refclk"
+- clocks: Specification for input reference clock. The reference
+	  clock rate must be 12 MHz, 19.2 MHz, 26 MHz, 27 MHz or 38.4 MHz.
+
+- data-lanes: See ../../media/video-interface.txt
+- lane-polarities: See ../../media/video-interface.txt
+
+- suspend-gpios: specification for GPIO1 pin on bridge (active low)
+
+Required nodes:
+This device has two video ports. Their connections are modelled using the
+OF graph bindings specified in Documentation/devicetree/bindings/graph.txt.
+
+- Video port 0 for DSI input
+- Video port 1 for eDP output
+
+Example
+-------
+
+edp-bridge@2d {
+	compatible = "ti,sn65dsi86";
+	#address-cells = <1>;
+	#size-cells = <0>;
+	reg = <0x2d>;
+
+	enable-gpios = <&msmgpio 33 GPIO_ACTIVE_HIGH>;
+	suspend-gpios = <&msmgpio 34 GPIO_ACTIVE_LOW>;
+
+	interrupts-extended = <&gpio3 4 IRQ_TYPE_EDGE_FALLING>;
+
+	vccio-supply = <&pm8916_l17>;
+	vcca-supply = <&pm8916_l6>;
+	vpll-supply = <&pm8916_l17>;
+	vcc-supply = <&pm8916_l6>;
+
+	clock-names = "refclk";
+	clocks = <&input_refclk>;
+
+	ports {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		port@0 {
+			reg = <0>;
+
+			edp_bridge_in: endpoint {
+				remote-endpoint = <&dsi_out>;
+			};
+		};
+
+		port@1 {
+			reg = <1>;
+
+			edp_bridge_out: endpoint {
+				data-lanes = <2 1 3 0>;
+				lane-polarities = <0 1 0 1>;
+				remote-endpoint = <&edp_panel_in>;
+			};
+		};
+	};
+}

+ 35 - 0
Documentation/devicetree/bindings/display/bridge/toshiba,tc358764.txt

@@ -0,0 +1,35 @@
+TC358764 MIPI-DSI to LVDS panel bridge
+
+Required properties:
+  - compatible: "toshiba,tc358764"
+  - reg: the virtual channel number of a DSI peripheral
+  - vddc-supply: core voltage supply, 1.2V
+  - vddio-supply: I/O voltage supply, 1.8V or 3.3V
+  - vddlvds-supply: LVDS1/2 voltage supply, 3.3V
+  - reset-gpios: a GPIO spec for the reset pin
+
+The device node can contain following 'port' child nodes,
+according to the OF graph bindings defined in [1]:
+  0: DSI Input, not required, if the bridge is DSI controlled
+  1: LVDS Output, mandatory
+
+[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
+
+Example:
+
+	bridge@0 {
+		reg = <0>;
+		compatible = "toshiba,tc358764";
+		vddc-supply = <&vcc_1v2_reg>;
+		vddio-supply = <&vcc_1v8_reg>;
+		vddlvds-supply = <&vcc_3v3_reg>;
+		reset-gpios = <&gpd1 6 GPIO_ACTIVE_LOW>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		port@1 {
+			reg = <1>;
+			lvds_ep: endpoint {
+				remote-endpoint = <&panel_ep>;
+			};
+		};
+	};

+ 6 - 19
Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt

@@ -21,6 +21,9 @@ Required properties:
   - samsung,pll-clock-frequency: specifies frequency of the oscillator clock
   - #address-cells, #size-cells: should be set respectively to <1> and <0>
     according to DSI host bindings (see MIPI DSI bindings [1])
+  - samsung,burst-clock-frequency: specifies DSI frequency in high-speed burst
+    mode
+  - samsung,esc-clock-frequency: specifies DSI frequency in escape mode
 
 Optional properties:
   - power-domains: a phandle to DSIM power domain node
@@ -29,25 +32,9 @@ Child nodes:
   Should contain DSI peripheral nodes (see MIPI DSI bindings [1]).
 
 Video interfaces:
-  Device node can contain video interface port nodes according to [2].
-  The following are properties specific to those nodes:
-
-  port node inbound:
-    - reg: (required) must be 0.
-  port node outbound:
-    - reg: (required) must be 1.
-
-  endpoint node connected from mic node (reg = 0):
-    - remote-endpoint: specifies the endpoint in mic node. This node is required
-		       for Exynos5433 mipi dsi. So mic can access to panel node
-		       throughout this dsi node.
-  endpoint node connected to panel node (reg = 1):
-    - remote-endpoint: specifies the endpoint in panel node. This node is
-		       required in all kinds of exynos mipi dsi to represent
-		       the connection between mipi dsi and panel.
-    - samsung,burst-clock-frequency: specifies DSI frequency in high-speed burst
-      mode
-    - samsung,esc-clock-frequency: specifies DSI frequency in escape mode
+  Device node can contain following video interface port nodes according to [2]:
+  0: RGB input,
+  1: DSI output
 
 [1]: Documentation/devicetree/bindings/display/mipi-dsi-bus.txt
 [2]: Documentation/devicetree/bindings/media/video-interfaces.txt

+ 145 - 8
Documentation/devicetree/bindings/display/mipi-dsi-bus.txt

@@ -16,7 +16,7 @@ The following assumes that only a single peripheral is connected to a DSI
 host. Experience shows that this is true for the large majority of setups.
 
 DSI host
---------
+========
 
 In addition to the standard properties and those defined by the parent bus of
 a DSI host, the following properties apply to a node representing a DSI host.
@@ -29,12 +29,24 @@ Required properties:
 - #size-cells: Should be 0. There are cases where it makes sense to use a
   different value here. See below.
 
+Optional properties:
+- clock-master: boolean. Should be enabled if the host is being used in
+  conjunction with another DSI host to drive the same peripheral. Hardware
+  supporting such a configuration generally requires the data on both the busses
+  to be driven by the same clock. Only the DSI host instance controlling this
+  clock should contain this property.
+
 DSI peripheral
---------------
+==============
+
+Peripherals with DSI as control bus, or no control bus
+------------------------------------------------------
 
-Peripherals are represented as child nodes of the DSI host's node. Properties
-described here apply to all DSI peripherals, but individual bindings may want
-to define additional, device-specific properties.
+Peripherals with the DSI bus as the primary control bus, or peripherals with
+no control bus but use the DSI bus to transmit pixel data are represented
+as child nodes of the DSI host's node. Properties described here apply to all
+DSI peripherals, but individual bindings may want to define additional,
+device-specific properties.
 
 Required properties:
 - reg: The virtual channel number of a DSI peripheral. Must be in the range
@@ -49,9 +61,37 @@ case two alternative representations can be chosen:
   property is the number of the first virtual channel and the second cell is
   the number of consecutive virtual channels.
 
-Example
--------
-
+Peripherals with a different control bus
+----------------------------------------
+
+There are peripherals that have I2C/SPI (or some other non-DSI bus) as the
+primary control bus, but are also connected to a DSI bus (mostly for the data
+path). Connections between such peripherals and a DSI host can be represented
+using the graph bindings [1], [2].
+
+Peripherals that support dual channel DSI
+-----------------------------------------
+
+Peripherals with higher bandwidth requirements can be connected to 2 DSI
+busses. Each DSI bus/channel drives some portion of the pixel data (generally
+left/right half of each line of the display, or even/odd lines of the display).
+The graph bindings should be used to represent the multiple DSI busses that are
+connected to this peripheral. Each DSI host's output endpoint can be linked to
+an input endpoint of the DSI peripheral.
+
+[1] Documentation/devicetree/bindings/graph.txt
+[2] Documentation/devicetree/bindings/media/video-interfaces.txt
+
+Examples
+========
+- (1), (2) and (3) are examples of a DSI host and peripheral on the DSI bus
+  with different virtual channel configurations.
+- (4) is an example of a peripheral on a I2C control bus connected to a
+  DSI host using of-graph bindings.
+- (5) is an example of 2 DSI hosts driving a dual-channel DSI peripheral,
+  which uses I2C as its primary control bus.
+
+1)
 	dsi-host {
 		...
 
@@ -67,6 +107,7 @@ Example
 		...
 	};
 
+2)
 	dsi-host {
 		...
 
@@ -82,6 +123,7 @@ Example
 		...
 	};
 
+3)
 	dsi-host {
 		...
 
@@ -96,3 +138,98 @@ Example
 
 		...
 	};
+
+4)
+	i2c-host {
+		...
+
+		dsi-bridge@35 {
+			compatible = "...";
+			reg = <0x35>;
+
+			ports {
+				...
+
+				port {
+					bridge_mipi_in: endpoint {
+						remote-endpoint = <&host_mipi_out>;
+					};
+				};
+			};
+		};
+	};
+
+	dsi-host {
+		...
+
+		ports {
+			...
+
+			port {
+				host_mipi_out: endpoint {
+					remote-endpoint = <&bridge_mipi_in>;
+				};
+			};
+		};
+	};
+
+5)
+	i2c-host {
+		dsi-bridge@35 {
+			compatible = "...";
+			reg = <0x35>;
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@0 {
+					reg = <0>;
+					dsi0_in: endpoint {
+						remote-endpoint = <&dsi0_out>;
+					};
+				};
+
+				port@1 {
+					reg = <1>;
+					dsi1_in: endpoint {
+						remote-endpoint = <&dsi1_out>;
+					};
+				};
+			};
+		};
+	};
+
+	dsi0-host {
+		...
+
+		/*
+		 * this DSI instance drives the clock for both the host
+		 * controllers
+		 */
+		clock-master;
+
+		ports {
+			...
+
+			port {
+				dsi0_out: endpoint {
+					remote-endpoint = <&dsi0_in>;
+				};
+			};
+		};
+	};
+
+	dsi1-host {
+		...
+
+		ports {
+			...
+
+			port {
+				dsi1_out: endpoint {
+					remote-endpoint = <&dsi1_in>;
+				};
+			};
+		};
+	};

+ 4 - 0
Documentation/devicetree/bindings/display/renesas,du.txt

@@ -15,6 +15,8 @@ Required Properties:
     - "renesas,du-r8a7796" for R8A7796 (R-Car M3-W) compatible DU
     - "renesas,du-r8a77965" for R8A77965 (R-Car M3-N) compatible DU
     - "renesas,du-r8a77970" for R8A77970 (R-Car V3M) compatible DU
+    - "renesas,du-r8a77980" for R8A77980 (R-Car V3H) compatible DU
+    - "renesas,du-r8a77990" for R8A77990 (R-Car E3) compatible DU
     - "renesas,du-r8a77995" for R8A77995 (R-Car D3) compatible DU
 
   - reg: the memory-mapped I/O registers base address and length
@@ -61,6 +63,8 @@ corresponding to each DU output.
  R8A7796 (R-Car M3-W)   DPAD 0         HDMI 0         LVDS 0         -
  R8A77965 (R-Car M3-N)  DPAD 0         HDMI 0         LVDS 0         -
  R8A77970 (R-Car V3M)   DPAD 0         LVDS 0         -              -
+ R8A77980 (R-Car V3H)   DPAD 0         LVDS 0         -              -
+ R8A77990 (R-Car E3)    DPAD 0         LVDS 0         LVDS 1         -
  R8A77995 (R-Car D3)    DPAD 0         LVDS 0         LVDS 1         -
 
 

+ 3 - 0
Documentation/devicetree/bindings/display/rockchip/rockchip-vop.txt

@@ -8,6 +8,9 @@ Required properties:
 - compatible: value should be one of the following
 		"rockchip,rk3036-vop";
 		"rockchip,rk3126-vop";
+		"rockchip,px30-vop-lit";
+		"rockchip,px30-vop-big";
+		"rockchip,rk3188-vop";
 		"rockchip,rk3288-vop";
 		"rockchip,rk3368-vop";
 		"rockchip,rk3366-vop";

+ 14 - 2
Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt

@@ -78,6 +78,7 @@ Required properties:
 
   - compatible: value must be one of:
     * "allwinner,sun8i-a83t-dw-hdmi"
+    * "allwinner,sun50i-a64-dw-hdmi", "allwinner,sun8i-a83t-dw-hdmi"
   - reg: base address and size of memory-mapped region
   - reg-io-width: See dw_hdmi.txt. Shall be 1.
   - interrupts: HDMI interrupt number
@@ -96,6 +97,9 @@ Required properties:
     first port should be the input endpoint. The second should be the
     output, usually to an HDMI connector.
 
+Optional properties:
+  - hvcc-supply: the VCC power supply of the controller
+
 DWC HDMI PHY
 ------------
 
@@ -103,6 +107,7 @@ Required properties:
   - compatible: value must be one of:
     * allwinner,sun8i-a83t-hdmi-phy
     * allwinner,sun8i-h3-hdmi-phy
+    * allwinner,sun8i-r40-hdmi-phy
     * allwinner,sun50i-a64-hdmi-phy
   - reg: base address and size of memory-mapped region
   - clocks: phandles to the clocks feeding the HDMI PHY
@@ -112,9 +117,9 @@ Required properties:
   - resets: phandle to the reset controller driving the PHY
   - reset-names: must be "phy"
 
-H3 and A64 HDMI PHY require additional clocks:
+H3, A64 and R40 HDMI PHY require additional clocks:
   - pll-0: parent of phy clock
-  - pll-1: second possible phy clock parent (A64 only)
+  - pll-1: second possible phy clock parent (A64/R40 only)
 
 TV Encoder
 ----------
@@ -151,6 +156,8 @@ Required properties:
    * allwinner,sun8i-v3s-tcon
    * allwinner,sun9i-a80-tcon-lcd
    * allwinner,sun9i-a80-tcon-tv
+   * "allwinner,sun50i-a64-tcon-lcd", "allwinner,sun8i-a83t-tcon-lcd"
+   * "allwinner,sun50i-a64-tcon-tv", "allwinner,sun8i-a83t-tcon-tv"
  - reg: base address and size of memory-mapped region
  - interrupts: interrupt associated to this IP
  - clocks: phandles to the clocks feeding the TCON.
@@ -369,7 +376,11 @@ Required properties:
     * allwinner,sun8i-a83t-de2-mixer-0
     * allwinner,sun8i-a83t-de2-mixer-1
     * allwinner,sun8i-h3-de2-mixer-0
+    * allwinner,sun8i-r40-de2-mixer-0
+    * allwinner,sun8i-r40-de2-mixer-1
     * allwinner,sun8i-v3s-de2-mixer
+    * allwinner,sun50i-a64-de2-mixer-0
+    * allwinner,sun50i-a64-de2-mixer-1
   - reg: base address and size of the memory-mapped region.
   - clocks: phandles to the clocks feeding the mixer
     * bus: the mixer interface clock
@@ -403,6 +414,7 @@ Required properties:
     * allwinner,sun8i-r40-display-engine
     * allwinner,sun8i-v3s-display-engine
     * allwinner,sun9i-a80-display-engine
+    * allwinner,sun50i-a64-display-engine
 
   - allwinner,pipelines: list of phandle to the display engine
     frontends (DE 1.0) or mixers (DE 2.0) available.

+ 1 - 0
Documentation/gpu/drivers.rst

@@ -13,6 +13,7 @@ GPU Driver Documentation
    tve200
    v3d
    vc4
+   vkms
    bridge/dw-hdmi
    xen-front
 

+ 15 - 3
Documentation/gpu/drm-kms.rst

@@ -287,8 +287,14 @@ Atomic Mode Setting Function Reference
 .. kernel-doc:: drivers/gpu/drm/drm_atomic.c
    :export:
 
-.. kernel-doc:: drivers/gpu/drm/drm_atomic.c
-   :internal:
+Atomic Mode Setting IOCTL and UAPI Functions
+--------------------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_atomic_uapi.c
+   :doc: overview
+
+.. kernel-doc:: drivers/gpu/drm/drm_atomic_uapi.c
+   :export:
 
 CRTC Abstraction
 ================
@@ -323,6 +329,12 @@ Frame Buffer Functions Reference
 DRM Format Handling
 ===================
 
+.. kernel-doc:: include/uapi/drm/drm_fourcc.h
+   :doc: overview
+
+Format Functions Reference
+--------------------------
+
 .. kernel-doc:: include/drm/drm_fourcc.h
    :internal:
 
@@ -560,7 +572,7 @@ Tile Group Property
 Explicit Fencing Properties
 ---------------------------
 
-.. kernel-doc:: drivers/gpu/drm/drm_atomic.c
+.. kernel-doc:: drivers/gpu/drm/drm_atomic_uapi.c
    :doc: explicit fencing properties
 
 Existing KMS Properties

+ 3 - 3
Documentation/gpu/drm-mm.rst

@@ -297,7 +297,7 @@ made up of several fields, the more interesting ones being:
 	struct vm_operations_struct {
 		void (*open)(struct vm_area_struct * area);
 		void (*close)(struct vm_area_struct * area);
-		int (*fault)(struct vm_fault *vmf);
+		vm_fault_t (*fault)(struct vm_fault *vmf);
 	};
 
 
@@ -505,7 +505,7 @@ GPU Scheduler
 Overview
 --------
 
-.. kernel-doc:: drivers/gpu/drm/scheduler/gpu_scheduler.c
+.. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
    :doc: Overview
 
 Scheduler Function References
@@ -514,5 +514,5 @@ Scheduler Function References
 .. kernel-doc:: include/drm/gpu_scheduler.h
    :internal:
 
-.. kernel-doc:: drivers/gpu/drm/scheduler/gpu_scheduler.c
+.. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
    :export:

+ 15 - 56
Documentation/gpu/todo.rst

@@ -127,7 +127,8 @@ interfaces to fix these issues:
   the acquire context explicitly on stack and then also pass it down into
   drivers explicitly so that the legacy-on-atomic functions can use them.
 
-  Except for some driver code this is done.
+  Except for some driver code this is done. This task should be finished by
+  adding WARN_ON(!drm_drv_uses_atomic_modeset) in drm_modeset_lock_all().
 
 * A bunch of the vtable hooks are now in the wrong place: DRM has a split
   between core vfunc tables (named ``drm_foo_funcs``), which are used to
@@ -137,13 +138,6 @@ interfaces to fix these issues:
   ``_helper_funcs`` since they are not part of the core ABI. There's a
   ``FIXME`` comment in the kerneldoc for each such case in ``drm_crtc.h``.
 
-* There's a new helper ``drm_atomic_helper_best_encoder()`` which could be
-  used by all atomic drivers which don't select the encoder for a given
-  connector at runtime. That's almost all of them, and would allow us to get
-  rid of a lot of ``best_encoder`` boilerplate in drivers.
-
-  This was almost done, but new drivers added a few more cases again.
-
 Contact: Daniel Vetter
 
 Get rid of dev->struct_mutex from GEM drivers
@@ -164,9 +158,8 @@ private lock. The tricky part is the BO free functions, since those can't
 reliably take that lock any more. Instead state needs to be protected with
 suitable subordinate locks or some cleanup work pushed to a worker thread. For
 performance-critical drivers it might also be better to go with a more
-fine-grained per-buffer object and per-context lockings scheme. Currently the
-following drivers still use ``struct_mutex``: ``msm``, ``omapdrm`` and
-``udl``.
+fine-grained per-buffer object and per-context lockings scheme. Currently only the
+``msm`` driver still use ``struct_mutex``.
 
 Contact: Daniel Vetter, respective driver maintainers
 
@@ -190,7 +183,8 @@ Convert drivers to use simple modeset suspend/resume
 
 Most drivers (except i915 and nouveau) that use
 drm_atomic_helper_suspend/resume() can probably be converted to use
-drm_mode_config_helper_suspend/resume().
+drm_mode_config_helper_suspend/resume(). Also there's still open-coded version
+of the atomic suspend/resume code in older atomic modeset drivers.
 
 Contact: Maintainer of the driver you plan to convert
 
@@ -246,20 +240,10 @@ Core refactorings
 Clean up the DRM header mess
 ----------------------------
 
-Currently the DRM subsystem has only one global header, ``drmP.h``. This is
-used both for functions exported to helper libraries and drivers and functions
-only used internally in the ``drm.ko`` module. The goal would be to move all
-header declarations not needed outside of ``drm.ko`` into
-``drivers/gpu/drm/drm_*_internal.h`` header files. ``EXPORT_SYMBOL`` also
-needs to be dropped for these functions.
-
-This would nicely tie in with the below task to create kerneldoc after the API
-is cleaned up. Or with the "hide legacy cruft better" task.
-
-Note that this is well in progress, but ``drmP.h`` is still huge. The updated
-plan is to switch to per-file driver API headers, which will also structure
-the kerneldoc better. This should also allow more fine-grained ``#include``
-directives.
+The DRM subsystem originally had only one huge global header, ``drmP.h``. This
+is now split up, but many source files still include it. The remaining part of
+the cleanup work here is to replace any ``#include <drm/drmP.h>`` by only the
+headers needed (and fixing up any missing pre-declarations in the headers).
 
 In the end no .c file should need to include ``drmP.h`` anymore.
 
@@ -278,26 +262,6 @@ See https://dri.freedesktop.org/docs/drm/ for what's there already.
 
 Contact: Daniel Vetter
 
-Hide legacy cruft better
-------------------------
-
-Way back DRM supported only drivers which shadow-attached to PCI devices with
-userspace or fbdev drivers setting up outputs. Modern DRM drivers take charge
-of the entire device, you can spot them with the DRIVER_MODESET flag.
-
-Unfortunately there's still large piles of legacy code around which needs to
-be hidden so that driver writers don't accidentally end up using it. And to
-prevent security issues in those legacy IOCTLs from being exploited on modern
-drivers. This has multiple possible subtasks:
-
-* Extract support code for legacy features into a ``drm-legacy.ko`` kernel
-  module and compile it only when one of the legacy drivers is enabled.
-
-This is mostly done, the only thing left is to split up ``drm_irq.c`` into
-legacy cruft and the parts needed by modern KMS drivers.
-
-Contact: Daniel Vetter
-
 Make panic handling work
 ------------------------
 
@@ -396,17 +360,12 @@ converting things over. For modeset tests we also first need a bit of
 infrastructure to use dumb buffers for untiled buffers, to be able to run all
 the non-i915 specific modeset tests.
 
-Contact: Daniel Vetter
-
-Create a virtual KMS driver for testing (vkms)
-----------------------------------------------
-
-With all the latest helpers it should be fairly simple to create a virtual KMS
-driver useful for testing, or for running X or similar on headless machines
-(to be able to still use the GPU). This would be similar to vgem, but aimed at
-the modeset side.
+Extend virtual test driver (VKMS)
+---------------------------------
 
-Once the basics are there there's tons of possibilities to extend it.
+See the documentation of :ref:`VKMS <vkms>` for more details. This is an ideal
+internship task, since it only requires a virtual machine and can be sized to
+fit the available time.
 
 Contact: Daniel Vetter
 

+ 24 - 0
Documentation/gpu/vkms.rst

@@ -0,0 +1,24 @@
+.. _vkms:
+
+==========================================
+ drm/vkms Virtual Kernel Modesetting
+==========================================
+
+.. kernel-doc:: drivers/gpu/drm/vkms/vkms_drv.c
+   :doc: vkms (Virtual Kernel Modesetting)
+
+TODO
+====
+
+CRC API
+-------
+
+- Optimize CRC computation ``compute_crc()`` and plane blending ``blend()``
+
+- Use the alpha value to blend vaddr_src with vaddr_dst instead of
+  overwriting it in ``blend()``.
+
+- Add igt test to check cleared alpha value for XRGB plane format.
+
+- Add igt test to check extreme alpha values i.e. fully opaque and fully
+  transparent (intermediate values are affected by hw-specific rounding modes).

+ 1 - 0
Documentation/ioctl/ioctl-number.txt

@@ -272,6 +272,7 @@ Code  Seq#(hex)	Include File		Comments
 't'	90-91	linux/toshiba.h		toshiba and toshiba_acpi SMM
 'u'	00-1F	linux/smb_fs.h		gone
 'u'	20-3F	linux/uvcvideo.h	USB video class host driver
+'u'	40-4f	linux/udmabuf.h		userspace dma-buf misc device
 'v'	00-1F	linux/ext2_fs.h		conflict!
 'v'	00-1F	linux/fs.h		conflict!
 'v'	00-0F	linux/sonypi.h		conflict!

+ 18 - 2
MAINTAINERS

@@ -4756,8 +4756,11 @@ F:	drivers/gpu/drm/tdfx/
 
 DRM DRIVER FOR USB DISPLAYLINK VIDEO ADAPTERS
 M:	Dave Airlie <airlied@redhat.com>
+R:	Sean Paul <sean@poorly.run>
+L:	dri-devel@lists.freedesktop.org
 S:	Odd Fixes
 F:	drivers/gpu/drm/udl/
+T:	git git://anongit.freedesktop.org/drm/drm-misc
 
 DRM DRIVER FOR VMWARE VIRTUAL GPU
 M:	"VMware Graphics" <linux-graphics-maintainer@vmware.com>
@@ -4787,8 +4790,8 @@ F:	include/uapi/drm/
 F:	include/linux/vga*
 
 DRM DRIVERS AND MISC GPU PATCHES
-M:	Gustavo Padovan <gustavo@padovan.org>
 M:	Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
+M:	Maxime Ripard <maxime.ripard@bootlin.com>
 M:	Sean Paul <sean@poorly.run>
 W:	https://01.org/linuxgraphics/gfx-docs/maintainer-tools/drm-misc.html
 S:	Maintained
@@ -4857,6 +4860,7 @@ F:	drivers/gpu/drm/fsl-dcu/
 F:	Documentation/devicetree/bindings/display/fsl,dcu.txt
 F:	Documentation/devicetree/bindings/display/fsl,tcon.txt
 F:	Documentation/devicetree/bindings/display/panel/nec,nl4827hc19-05b.txt
+T:	git git://anongit.freedesktop.org/drm/drm-misc
 
 DRM DRIVERS FOR FREESCALE IMX
 M:	Philipp Zabel <p.zabel@pengutronix.de>
@@ -4906,9 +4910,10 @@ F:	Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt
 
 DRM DRIVERS FOR RENESAS
 M:	Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+M:	Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
 L:	dri-devel@lists.freedesktop.org
 L:	linux-renesas-soc@vger.kernel.org
-T:	git git://linuxtv.org/pinchartl/fbdev
+T:	git git://linuxtv.org/pinchartl/media drm/du/next
 S:	Supported
 F:	drivers/gpu/drm/rcar-du/
 F:	drivers/gpu/drm/shmobile/
@@ -10001,9 +10006,12 @@ F:	drivers/media/tuners/mxl5007t.*
 
 MXSFB DRM DRIVER
 M:	Marek Vasut <marex@denx.de>
+M:	Stefan Agner <stefan@agner.ch>
+L:	dri-devel@lists.freedesktop.org
 S:	Supported
 F:	drivers/gpu/drm/mxsfb/
 F:	Documentation/devicetree/bindings/display/mxsfb.txt
+T:	git git://anongit.freedesktop.org/drm/drm-misc
 
 MYLEX DAC960 PCI RAID Controller
 M:	Hannes Reinecke <hare@kernel.org>
@@ -15530,6 +15538,14 @@ S:	Maintained
 F:	lib/iov_iter.c
 F:	include/linux/uio.h
 
+USERSPACE DMA BUFFER DRIVER
+M:	Gerd Hoffmann <kraxel@redhat.com>
+S:	Maintained
+L:	dri-devel@lists.freedesktop.org
+F:	drivers/dma-buf/udmabuf.c
+F:	include/uapi/linux/udmabuf.h
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+
 USERSPACE I/O (UIO)
 M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 S:	Maintained

+ 9 - 0
drivers/dma-buf/Kconfig

@@ -30,4 +30,13 @@ config SW_SYNC
 	  WARNING: improper use of this can result in deadlocking kernel
 	  drivers from userspace. Intended for test and debug only.
 
+config UDMABUF
+	bool "userspace dmabuf misc driver"
+	default n
+	depends on DMA_SHARED_BUFFER
+	depends on MEMFD_CREATE || COMPILE_TEST
+	help
+	  A driver to let userspace turn memfd regions into dma-bufs.
+	  Qemu can use this to create host dmabufs for guest framebuffers.
+
 endmenu

+ 1 - 0
drivers/dma-buf/Makefile

@@ -1,3 +1,4 @@
 obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o seqno-fence.o
 obj-$(CONFIG_SYNC_FILE)		+= sync_file.o
 obj-$(CONFIG_SW_SYNC)		+= sw_sync.o sync_debug.o
+obj-$(CONFIG_UDMABUF)		+= udmabuf.o

+ 0 - 1
drivers/dma-buf/dma-buf.c

@@ -405,7 +405,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
 			  || !exp_info->ops->map_dma_buf
 			  || !exp_info->ops->unmap_dma_buf
 			  || !exp_info->ops->release
-			  || !exp_info->ops->map
 			  || !exp_info->ops->mmap)) {
 		return ERR_PTR(-EINVAL);
 	}

+ 293 - 0
drivers/dma-buf/udmabuf.c

@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/cred.h>
+#include <linux/device.h>
+#include <linux/dma-buf.h>
+#include <linux/highmem.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/memfd.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/shmem_fs.h>
+#include <linux/slab.h>
+#include <linux/udmabuf.h>
+
+static const u32    list_limit = 1024;  /* udmabuf_create_list->count limit */
+static const size_t size_limit_mb = 64; /* total dmabuf size, in megabytes  */
+
+struct udmabuf {
+	pgoff_t pagecount;
+	struct page **pages;
+};
+
+static int udmabuf_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct udmabuf *ubuf = vma->vm_private_data;
+
+	vmf->page = ubuf->pages[vmf->pgoff];
+	get_page(vmf->page);
+	return 0;
+}
+
+static const struct vm_operations_struct udmabuf_vm_ops = {
+	.fault = udmabuf_vm_fault,
+};
+
+static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
+{
+	struct udmabuf *ubuf = buf->priv;
+
+	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
+		return -EINVAL;
+
+	vma->vm_ops = &udmabuf_vm_ops;
+	vma->vm_private_data = ubuf;
+	return 0;
+}
+
+static struct sg_table *map_udmabuf(struct dma_buf_attachment *at,
+				    enum dma_data_direction direction)
+{
+	struct udmabuf *ubuf = at->dmabuf->priv;
+	struct sg_table *sg;
+	int ret;
+
+	sg = kzalloc(sizeof(*sg), GFP_KERNEL);
+	if (!sg)
+		return ERR_PTR(-ENOMEM);
+	ret = sg_alloc_table_from_pages(sg, ubuf->pages, ubuf->pagecount,
+					0, ubuf->pagecount << PAGE_SHIFT,
+					GFP_KERNEL);
+	if (ret < 0)
+		goto err;
+	if (!dma_map_sg(at->dev, sg->sgl, sg->nents, direction)) {
+		ret = -EINVAL;
+		goto err;
+	}
+	return sg;
+
+err:
+	sg_free_table(sg);
+	kfree(sg);
+	return ERR_PTR(ret);
+}
+
+static void unmap_udmabuf(struct dma_buf_attachment *at,
+			  struct sg_table *sg,
+			  enum dma_data_direction direction)
+{
+	sg_free_table(sg);
+	kfree(sg);
+}
+
+static void release_udmabuf(struct dma_buf *buf)
+{
+	struct udmabuf *ubuf = buf->priv;
+	pgoff_t pg;
+
+	for (pg = 0; pg < ubuf->pagecount; pg++)
+		put_page(ubuf->pages[pg]);
+	kfree(ubuf->pages);
+	kfree(ubuf);
+}
+
+static void *kmap_udmabuf(struct dma_buf *buf, unsigned long page_num)
+{
+	struct udmabuf *ubuf = buf->priv;
+	struct page *page = ubuf->pages[page_num];
+
+	return kmap(page);
+}
+
+static void kunmap_udmabuf(struct dma_buf *buf, unsigned long page_num,
+			   void *vaddr)
+{
+	kunmap(vaddr);
+}
+
+static const struct dma_buf_ops udmabuf_ops = {
+	.map_dma_buf	  = map_udmabuf,
+	.unmap_dma_buf	  = unmap_udmabuf,
+	.release	  = release_udmabuf,
+	.map		  = kmap_udmabuf,
+	.unmap		  = kunmap_udmabuf,
+	.mmap		  = mmap_udmabuf,
+};
+
+#define SEALS_WANTED (F_SEAL_SHRINK)
+#define SEALS_DENIED (F_SEAL_WRITE)
+
+static long udmabuf_create(const struct udmabuf_create_list *head,
+			   const struct udmabuf_create_item *list)
+{
+	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+	struct file *memfd = NULL;
+	struct udmabuf *ubuf;
+	struct dma_buf *buf;
+	pgoff_t pgoff, pgcnt, pgidx, pgbuf = 0, pglimit;
+	struct page *page;
+	int seals, ret = -EINVAL;
+	u32 i, flags;
+
+	ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL);
+	if (!ubuf)
+		return -ENOMEM;
+
+	pglimit = (size_limit_mb * 1024 * 1024) >> PAGE_SHIFT;
+	for (i = 0; i < head->count; i++) {
+		if (!IS_ALIGNED(list[i].offset, PAGE_SIZE))
+			goto err;
+		if (!IS_ALIGNED(list[i].size, PAGE_SIZE))
+			goto err;
+		ubuf->pagecount += list[i].size >> PAGE_SHIFT;
+		if (ubuf->pagecount > pglimit)
+			goto err;
+	}
+	ubuf->pages = kmalloc_array(ubuf->pagecount, sizeof(*ubuf->pages),
+				    GFP_KERNEL);
+	if (!ubuf->pages) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	pgbuf = 0;
+	for (i = 0; i < head->count; i++) {
+		ret = -EBADFD;
+		memfd = fget(list[i].memfd);
+		if (!memfd)
+			goto err;
+		if (!shmem_mapping(file_inode(memfd)->i_mapping))
+			goto err;
+		seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
+		if (seals == -EINVAL)
+			goto err;
+		ret = -EINVAL;
+		if ((seals & SEALS_WANTED) != SEALS_WANTED ||
+		    (seals & SEALS_DENIED) != 0)
+			goto err;
+		pgoff = list[i].offset >> PAGE_SHIFT;
+		pgcnt = list[i].size   >> PAGE_SHIFT;
+		for (pgidx = 0; pgidx < pgcnt; pgidx++) {
+			page = shmem_read_mapping_page(
+				file_inode(memfd)->i_mapping, pgoff + pgidx);
+			if (IS_ERR(page)) {
+				ret = PTR_ERR(page);
+				goto err;
+			}
+			ubuf->pages[pgbuf++] = page;
+		}
+		fput(memfd);
+		memfd = NULL;
+	}
+
+	exp_info.ops  = &udmabuf_ops;
+	exp_info.size = ubuf->pagecount << PAGE_SHIFT;
+	exp_info.priv = ubuf;
+
+	buf = dma_buf_export(&exp_info);
+	if (IS_ERR(buf)) {
+		ret = PTR_ERR(buf);
+		goto err;
+	}
+
+	flags = 0;
+	if (head->flags & UDMABUF_FLAGS_CLOEXEC)
+		flags |= O_CLOEXEC;
+	return dma_buf_fd(buf, flags);
+
+err:
+	while (pgbuf > 0)
+		put_page(ubuf->pages[--pgbuf]);
+	if (memfd)
+		fput(memfd);
+	kfree(ubuf->pages);
+	kfree(ubuf);
+	return ret;
+}
+
+static long udmabuf_ioctl_create(struct file *filp, unsigned long arg)
+{
+	struct udmabuf_create create;
+	struct udmabuf_create_list head;
+	struct udmabuf_create_item list;
+
+	if (copy_from_user(&create, (void __user *)arg,
+			   sizeof(create)))
+		return -EFAULT;
+
+	head.flags  = create.flags;
+	head.count  = 1;
+	list.memfd  = create.memfd;
+	list.offset = create.offset;
+	list.size   = create.size;
+
+	return udmabuf_create(&head, &list);
+}
+
+static long udmabuf_ioctl_create_list(struct file *filp, unsigned long arg)
+{
+	struct udmabuf_create_list head;
+	struct udmabuf_create_item *list;
+	int ret = -EINVAL;
+	u32 lsize;
+
+	if (copy_from_user(&head, (void __user *)arg, sizeof(head)))
+		return -EFAULT;
+	if (head.count > list_limit)
+		return -EINVAL;
+	lsize = sizeof(struct udmabuf_create_item) * head.count;
+	list = memdup_user((void __user *)(arg + sizeof(head)), lsize);
+	if (IS_ERR(list))
+		return PTR_ERR(list);
+
+	ret = udmabuf_create(&head, list);
+	kfree(list);
+	return ret;
+}
+
+static long udmabuf_ioctl(struct file *filp, unsigned int ioctl,
+			  unsigned long arg)
+{
+	long ret;
+
+	switch (ioctl) {
+	case UDMABUF_CREATE:
+		ret = udmabuf_ioctl_create(filp, arg);
+		break;
+	case UDMABUF_CREATE_LIST:
+		ret = udmabuf_ioctl_create_list(filp, arg);
+		break;
+	default:
+		ret = -ENOTTY;
+		break;
+	}
+	return ret;
+}
+
+static const struct file_operations udmabuf_fops = {
+	.owner		= THIS_MODULE,
+	.unlocked_ioctl = udmabuf_ioctl,
+};
+
+static struct miscdevice udmabuf_misc = {
+	.minor          = MISC_DYNAMIC_MINOR,
+	.name           = "udmabuf",
+	.fops           = &udmabuf_fops,
+};
+
+static int __init udmabuf_dev_init(void)
+{
+	return misc_register(&udmabuf_misc);
+}
+
+static void __exit udmabuf_dev_exit(void)
+{
+	misc_deregister(&udmabuf_misc);
+}
+
+module_init(udmabuf_dev_init)
+module_exit(udmabuf_dev_exit)
+
+MODULE_AUTHOR("Gerd Hoffmann <kraxel@redhat.com>");
+MODULE_LICENSE("GPL v2");

+ 20 - 2
drivers/gpu/drm/Kconfig

@@ -110,6 +110,26 @@ config DRM_FBDEV_OVERALLOC
 	  is 100. Typical values for double buffering will be 200,
 	  triple buffering 300.
 
+config DRM_FBDEV_LEAK_PHYS_SMEM
+	bool "Shamelessly allow leaking of fbdev physical address (DANGEROUS)"
+	depends on DRM_FBDEV_EMULATION && EXPERT
+	default n
+	help
+	  In order to keep user-space compatibility, we want in certain
+	  use-cases to keep leaking the fbdev physical address to the
+	  user-space program handling the fbdev buffer.
+	  This affects, not only, Amlogic, Allwinner or Rockchip devices
+	  with ARM Mali GPUs using an userspace Blob.
+	  This option is not supported by upstream developers and should be
+	  removed as soon as possible and be considered as a broken and
+	  legacy behaviour from a modern fbdev device driver.
+
+	  Please send any bug reports when using this to your proprietary
+	  software vendor that requires this.
+
+	  If in doubt, say "N" or spread the word to your closed source
+	  library vendor.
+
 config DRM_LOAD_EDID_FIRMWARE
 	bool "Allow to specify an EDID data set instead of probing for it"
 	depends on DRM
@@ -285,8 +305,6 @@ source "drivers/gpu/drm/bridge/Kconfig"
 
 source "drivers/gpu/drm/sti/Kconfig"
 
-source "drivers/gpu/drm/amd/amdkfd/Kconfig"
-
 source "drivers/gpu/drm/imx/Kconfig"
 
 source "drivers/gpu/drm/v3d/Kconfig"

+ 2 - 1
drivers/gpu/drm/Makefile

@@ -18,7 +18,8 @@ drm-y       :=	drm_auth.o drm_bufs.o drm_cache.o \
 		drm_encoder.o drm_mode_object.o drm_property.o \
 		drm_plane.o drm_color_mgmt.o drm_print.o \
 		drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
-		drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o
+		drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o \
+		drm_atomic_uapi.o
 
 drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
 drm-$(CONFIG_DRM_VM) += drm_vm.o

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/Kconfig

@@ -42,3 +42,4 @@ config DRM_AMDGPU_GART_DEBUGFS
 
 source "drivers/gpu/drm/amd/acp/Kconfig"
 source "drivers/gpu/drm/amd/display/Kconfig"
+source "drivers/gpu/drm/amd/amdkfd/Kconfig"

+ 12 - 6
drivers/gpu/drm/amd/amdgpu/Makefile

@@ -35,7 +35,8 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
 	-I$(FULL_AMD_DISPLAY_PATH) \
 	-I$(FULL_AMD_DISPLAY_PATH)/include \
 	-I$(FULL_AMD_DISPLAY_PATH)/dc \
-	-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm
+	-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
+	-I$(FULL_AMD_PATH)/amdkfd
 
 amdgpu-y := amdgpu_drv.o
 
@@ -51,8 +52,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
 	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
-	amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o \
-	amdgpu_ids.o
+	amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
+	amdgpu_gmc.o amdgpu_xgmi.o
 
 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
@@ -62,7 +63,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce
 
 amdgpu-y += \
 	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
-	vega20_reg_init.o
+	vega20_reg_init.o nbio_v7_4.o
 
 # add DF block
 amdgpu-y += \
@@ -73,7 +74,7 @@ amdgpu-y += \
 amdgpu-y += \
 	gmc_v7_0.o \
 	gmc_v8_0.o \
-	gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o
+	gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o
 
 # add IH block
 amdgpu-y += \
@@ -88,7 +89,8 @@ amdgpu-y += \
 amdgpu-y += \
 	amdgpu_psp.o \
 	psp_v3_1.o \
-	psp_v10_0.o
+	psp_v10_0.o \
+	psp_v11_0.o
 
 # add SMC block
 amdgpu-y += \
@@ -108,6 +110,7 @@ amdgpu-y += \
 
 # add async DMA block
 amdgpu-y += \
+	amdgpu_sdma.o \
 	sdma_v2_4.o \
 	sdma_v3_0.o \
 	sdma_v4_0.o
@@ -134,6 +137,9 @@ amdgpu-y += \
 amdgpu-y += amdgpu_amdkfd.o
 
 ifneq ($(CONFIG_HSA_AMD),)
+AMDKFD_PATH := ../amdkfd
+include $(FULL_AMD_PATH)/amdkfd/Makefile
+amdgpu-y += $(AMDKFD_FILES)
 amdgpu-y += \
 	 amdgpu_amdkfd_fence.o \
 	 amdgpu_amdkfd_gpuvm.o \

+ 55 - 649
drivers/gpu/drm/amd/amdgpu/amdgpu.h

@@ -28,6 +28,8 @@
 #ifndef __AMDGPU_H__
 #define __AMDGPU_H__
 
+#include "amdgpu_ctx.h"
+
 #include <linux/atomic.h>
 #include <linux/wait.h>
 #include <linux/list.h>
@@ -69,12 +71,32 @@
 #include "amdgpu_vcn.h"
 #include "amdgpu_mn.h"
 #include "amdgpu_gmc.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_sdma.h"
 #include "amdgpu_dm.h"
 #include "amdgpu_virt.h"
 #include "amdgpu_gart.h"
 #include "amdgpu_debugfs.h"
 #include "amdgpu_job.h"
 #include "amdgpu_bo_list.h"
+#include "amdgpu_gem.h"
+
+#define MAX_GPU_INSTANCE		16
+
+struct amdgpu_gpu_instance
+{
+	struct amdgpu_device		*adev;
+	int				mgpu_fan_enabled;
+};
+
+struct amdgpu_mgpu_info
+{
+	struct amdgpu_gpu_instance	gpu_ins[MAX_GPU_INSTANCE];
+	struct mutex			mutex;
+	uint32_t			num_gpu;
+	uint32_t			num_dgpu;
+	uint32_t			num_apu;
+};
 
 /*
  * Modules parameters.
@@ -129,6 +151,7 @@ extern int amdgpu_compute_multipipe;
 extern int amdgpu_gpu_recovery;
 extern int amdgpu_emu_mode;
 extern uint amdgpu_smu_memory_pool_size;
+extern struct amdgpu_mgpu_info mgpu_info;
 
 #ifdef CONFIG_DRM_AMDGPU_SI
 extern int amdgpu_si_support;
@@ -148,9 +171,6 @@ extern int amdgpu_cik_support;
 #define AMDGPUFB_CONN_LIMIT			4
 #define AMDGPU_BIOS_NUM_SCRATCH			16
 
-/* max number of IP instances */
-#define AMDGPU_MAX_SDMA_INSTANCES		2
-
 /* hard reset data */
 #define AMDGPU_ASIC_RESET_DATA                  0x39d5e86b
 
@@ -171,13 +191,6 @@ extern int amdgpu_cik_support;
 #define AMDGPU_RESET_VCE			(1 << 13)
 #define AMDGPU_RESET_VCE1			(1 << 14)
 
-/* GFX current status */
-#define AMDGPU_GFX_NORMAL_MODE			0x00000000L
-#define AMDGPU_GFX_SAFE_MODE			0x00000001L
-#define AMDGPU_GFX_PG_DISABLED_MODE		0x00000002L
-#define AMDGPU_GFX_CG_DISABLED_MODE		0x00000004L
-#define AMDGPU_GFX_LBPW_DISABLED_MODE		0x00000008L
-
 /* max cursor sizes (in pixels) */
 #define CIK_CURSOR_WIDTH 128
 #define CIK_CURSOR_HEIGHT 128
@@ -205,13 +218,6 @@ enum amdgpu_cp_irq {
 	AMDGPU_CP_IRQ_LAST
 };
 
-enum amdgpu_sdma_irq {
-	AMDGPU_SDMA_IRQ_TRAP0 = 0,
-	AMDGPU_SDMA_IRQ_TRAP1,
-
-	AMDGPU_SDMA_IRQ_LAST
-};
-
 enum amdgpu_thermal_irq {
 	AMDGPU_THERMAL_IRQ_LOW_TO_HIGH = 0,
 	AMDGPU_THERMAL_IRQ_HIGH_TO_LOW,
@@ -224,6 +230,10 @@ enum amdgpu_kiq_irq {
 	AMDGPU_CP_KIQ_IRQ_LAST
 };
 
+#define MAX_KIQ_REG_WAIT       5000 /* in usecs, 5ms */
+#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
+#define MAX_KIQ_REG_TRY 20
+
 int amdgpu_device_ip_set_clockgating_state(void *dev,
 					   enum amd_ip_block_type block_type,
 					   enum amd_clockgating_state state);
@@ -271,70 +281,6 @@ amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
 			       const struct amdgpu_ip_block_version *ip_block_version);
 
-/* provided by hw blocks that can move/clear data.  e.g., gfx or sdma */
-struct amdgpu_buffer_funcs {
-	/* maximum bytes in a single operation */
-	uint32_t	copy_max_bytes;
-
-	/* number of dw to reserve per operation */
-	unsigned	copy_num_dw;
-
-	/* used for buffer migration */
-	void (*emit_copy_buffer)(struct amdgpu_ib *ib,
-				 /* src addr in bytes */
-				 uint64_t src_offset,
-				 /* dst addr in bytes */
-				 uint64_t dst_offset,
-				 /* number of byte to transfer */
-				 uint32_t byte_count);
-
-	/* maximum bytes in a single operation */
-	uint32_t	fill_max_bytes;
-
-	/* number of dw to reserve per operation */
-	unsigned	fill_num_dw;
-
-	/* used for buffer clearing */
-	void (*emit_fill_buffer)(struct amdgpu_ib *ib,
-				 /* value to write to memory */
-				 uint32_t src_data,
-				 /* dst addr in bytes */
-				 uint64_t dst_offset,
-				 /* number of byte to fill */
-				 uint32_t byte_count);
-};
-
-/* provided by hw blocks that can write ptes, e.g., sdma */
-struct amdgpu_vm_pte_funcs {
-	/* number of dw to reserve per operation */
-	unsigned	copy_pte_num_dw;
-
-	/* copy pte entries from GART */
-	void (*copy_pte)(struct amdgpu_ib *ib,
-			 uint64_t pe, uint64_t src,
-			 unsigned count);
-
-	/* write pte one entry at a time with addr mapping */
-	void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
-			  uint64_t value, unsigned count,
-			  uint32_t incr);
-	/* for linear pte/pde updates without addr mapping */
-	void (*set_pte_pde)(struct amdgpu_ib *ib,
-			    uint64_t pe,
-			    uint64_t addr, unsigned count,
-			    uint32_t incr, uint64_t flags);
-};
-
-/* provided by the ih block */
-struct amdgpu_ih_funcs {
-	/* ring read/write ptr handling, called from interrupt context */
-	u32 (*get_wptr)(struct amdgpu_device *adev);
-	bool (*prescreen_iv)(struct amdgpu_device *adev);
-	void (*decode_iv)(struct amdgpu_device *adev,
-			  struct amdgpu_iv_entry *entry);
-	void (*set_rptr)(struct amdgpu_device *adev);
-};
-
 /*
  * BIOS.
  */
@@ -360,34 +306,6 @@ struct amdgpu_clock {
 	uint32_t max_pixel_clock;
 };
 
-/*
- * GEM.
- */
-
-#define AMDGPU_GEM_DOMAIN_MAX		0x3
-#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base)
-
-void amdgpu_gem_object_free(struct drm_gem_object *obj);
-int amdgpu_gem_object_open(struct drm_gem_object *obj,
-				struct drm_file *file_priv);
-void amdgpu_gem_object_close(struct drm_gem_object *obj,
-				struct drm_file *file_priv);
-unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
-				 struct dma_buf_attachment *attach,
-				 struct sg_table *sg);
-struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
-					struct drm_gem_object *gobj,
-					int flags);
-struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
-					    struct dma_buf *dma_buf);
-struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
-void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
-void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
-int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
-
 /* sub-allocation manager, it has to be protected by another lock.
  * By conception this is an helper for other part of the driver
  * like the indirect buffer or semaphore, which both have their
@@ -437,22 +355,6 @@ struct amdgpu_sa_bo {
 	struct dma_fence	        *fence;
 };
 
-/*
- * GEM objects.
- */
-void amdgpu_gem_force_release(struct amdgpu_device *adev);
-int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
-			     int alignment, u32 initial_domain,
-			     u64 flags, enum ttm_bo_type type,
-			     struct reservation_object *resv,
-			     struct drm_gem_object **obj);
-
-int amdgpu_mode_dumb_create(struct drm_file *file_priv,
-			    struct drm_device *dev,
-			    struct drm_mode_create_dumb *args);
-int amdgpu_mode_dumb_mmap(struct drm_file *filp,
-			  struct drm_device *dev,
-			  uint32_t handle, uint64_t *offset_p);
 int amdgpu_fence_slab_init(void);
 void amdgpu_fence_slab_fini(void);
 
@@ -525,16 +427,25 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
 	AMDGPU_DOORBELL64_GFX_RING0               = 0x8b,
 
 	/*
-	 * Other graphics doorbells can be allocated here: from 0x8c to 0xef
+	 * Other graphics doorbells can be allocated here: from 0x8c to 0xdf
 	 * Graphics voltage island aperture 1
-	 * default non-graphics QWORD index is 0xF0 - 0xFF inclusive
+	 * default non-graphics QWORD index is 0xe0 - 0xFF inclusive
 	 */
 
-	/* sDMA engines */
-	AMDGPU_DOORBELL64_sDMA_ENGINE0            = 0xF0,
-	AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0     = 0xF1,
-	AMDGPU_DOORBELL64_sDMA_ENGINE1            = 0xF2,
-	AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1     = 0xF3,
+	/* sDMA engines  reserved from 0xe0 -oxef  */
+	AMDGPU_DOORBELL64_sDMA_ENGINE0            = 0xE0,
+	AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0     = 0xE1,
+	AMDGPU_DOORBELL64_sDMA_ENGINE1            = 0xE8,
+	AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1     = 0xE9,
+
+	/* For vega10 sriov, the sdma doorbell must be fixed as follow
+	 * to keep the same setting with host driver, or it will
+	 * happen conflicts
+	 */
+	AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0            = 0xF0,
+	AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE0     = 0xF1,
+	AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1            = 0xF2,
+	AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE1     = 0xF3,
 
 	/* Interrupt handler */
 	AMDGPU_DOORBELL64_IH                      = 0xF4,  /* For legacy interrupt ring buffer */
@@ -599,84 +510,6 @@ struct amdgpu_ib {
 
 extern const struct drm_sched_backend_ops amdgpu_sched_ops;
 
-/*
- * Queue manager
- */
-struct amdgpu_queue_mapper {
-	int 		hw_ip;
-	struct mutex	lock;
-	/* protected by lock */
-	struct amdgpu_ring *queue_map[AMDGPU_MAX_RINGS];
-};
-
-struct amdgpu_queue_mgr {
-	struct amdgpu_queue_mapper mapper[AMDGPU_MAX_IP_NUM];
-};
-
-int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
-			  struct amdgpu_queue_mgr *mgr);
-int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
-			  struct amdgpu_queue_mgr *mgr);
-int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
-			 struct amdgpu_queue_mgr *mgr,
-			 u32 hw_ip, u32 instance, u32 ring,
-			 struct amdgpu_ring **out_ring);
-
-/*
- * context related structures
- */
-
-struct amdgpu_ctx_ring {
-	uint64_t		sequence;
-	struct dma_fence	**fences;
-	struct drm_sched_entity	entity;
-};
-
-struct amdgpu_ctx {
-	struct kref		refcount;
-	struct amdgpu_device    *adev;
-	struct amdgpu_queue_mgr queue_mgr;
-	unsigned		reset_counter;
-	unsigned        reset_counter_query;
-	uint32_t		vram_lost_counter;
-	spinlock_t		ring_lock;
-	struct dma_fence	**fences;
-	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
-	bool			preamble_presented;
-	enum drm_sched_priority init_priority;
-	enum drm_sched_priority override_priority;
-	struct mutex            lock;
-	atomic_t	guilty;
-};
-
-struct amdgpu_ctx_mgr {
-	struct amdgpu_device	*adev;
-	struct mutex		lock;
-	/* protected by lock */
-	struct idr		ctx_handles;
-};
-
-struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
-int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
-
-int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
-			      struct dma_fence *fence, uint64_t *seq);
-struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
-				   struct amdgpu_ring *ring, uint64_t seq);
-void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
-				  enum drm_sched_priority priority);
-
-int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
-		     struct drm_file *filp);
-
-int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);
-
-void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
-void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
-void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr);
-void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
-
-
 /*
  * file private structure
  */
@@ -690,271 +523,6 @@ struct amdgpu_fpriv {
 	struct amdgpu_ctx_mgr	ctx_mgr;
 };
 
-/*
- * GFX stuff
- */
-#include "clearstate_defs.h"
-
-struct amdgpu_rlc_funcs {
-	void (*enter_safe_mode)(struct amdgpu_device *adev);
-	void (*exit_safe_mode)(struct amdgpu_device *adev);
-};
-
-struct amdgpu_rlc {
-	/* for power gating */
-	struct amdgpu_bo	*save_restore_obj;
-	uint64_t		save_restore_gpu_addr;
-	volatile uint32_t	*sr_ptr;
-	const u32               *reg_list;
-	u32                     reg_list_size;
-	/* for clear state */
-	struct amdgpu_bo	*clear_state_obj;
-	uint64_t		clear_state_gpu_addr;
-	volatile uint32_t	*cs_ptr;
-	const struct cs_section_def   *cs_data;
-	u32                     clear_state_size;
-	/* for cp tables */
-	struct amdgpu_bo	*cp_table_obj;
-	uint64_t		cp_table_gpu_addr;
-	volatile uint32_t	*cp_table_ptr;
-	u32                     cp_table_size;
-
-	/* safe mode for updating CG/PG state */
-	bool in_safe_mode;
-	const struct amdgpu_rlc_funcs *funcs;
-
-	/* for firmware data */
-	u32 save_and_restore_offset;
-	u32 clear_state_descriptor_offset;
-	u32 avail_scratch_ram_locations;
-	u32 reg_restore_list_size;
-	u32 reg_list_format_start;
-	u32 reg_list_format_separate_start;
-	u32 starting_offsets_start;
-	u32 reg_list_format_size_bytes;
-	u32 reg_list_size_bytes;
-	u32 reg_list_format_direct_reg_list_length;
-	u32 save_restore_list_cntl_size_bytes;
-	u32 save_restore_list_gpm_size_bytes;
-	u32 save_restore_list_srm_size_bytes;
-
-	u32 *register_list_format;
-	u32 *register_restore;
-	u8 *save_restore_list_cntl;
-	u8 *save_restore_list_gpm;
-	u8 *save_restore_list_srm;
-
-	bool is_rlc_v2_1;
-};
-
-#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
-
-struct amdgpu_mec {
-	struct amdgpu_bo	*hpd_eop_obj;
-	u64			hpd_eop_gpu_addr;
-	struct amdgpu_bo	*mec_fw_obj;
-	u64			mec_fw_gpu_addr;
-	u32 num_mec;
-	u32 num_pipe_per_mec;
-	u32 num_queue_per_pipe;
-	void			*mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
-
-	/* These are the resources for which amdgpu takes ownership */
-	DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
-};
-
-struct amdgpu_kiq {
-	u64			eop_gpu_addr;
-	struct amdgpu_bo	*eop_obj;
-	spinlock_t              ring_lock;
-	struct amdgpu_ring	ring;
-	struct amdgpu_irq_src	irq;
-};
-
-/*
- * GPU scratch registers structures, functions & helpers
- */
-struct amdgpu_scratch {
-	unsigned		num_reg;
-	uint32_t                reg_base;
-	uint32_t		free_mask;
-};
-
-/*
- * GFX configurations
- */
-#define AMDGPU_GFX_MAX_SE 4
-#define AMDGPU_GFX_MAX_SH_PER_SE 2
-
-struct amdgpu_rb_config {
-	uint32_t rb_backend_disable;
-	uint32_t user_rb_backend_disable;
-	uint32_t raster_config;
-	uint32_t raster_config_1;
-};
-
-struct gb_addr_config {
-	uint16_t pipe_interleave_size;
-	uint8_t num_pipes;
-	uint8_t max_compress_frags;
-	uint8_t num_banks;
-	uint8_t num_se;
-	uint8_t num_rb_per_se;
-};
-
-struct amdgpu_gfx_config {
-	unsigned max_shader_engines;
-	unsigned max_tile_pipes;
-	unsigned max_cu_per_sh;
-	unsigned max_sh_per_se;
-	unsigned max_backends_per_se;
-	unsigned max_texture_channel_caches;
-	unsigned max_gprs;
-	unsigned max_gs_threads;
-	unsigned max_hw_contexts;
-	unsigned sc_prim_fifo_size_frontend;
-	unsigned sc_prim_fifo_size_backend;
-	unsigned sc_hiz_tile_fifo_size;
-	unsigned sc_earlyz_tile_fifo_size;
-
-	unsigned num_tile_pipes;
-	unsigned backend_enable_mask;
-	unsigned mem_max_burst_length_bytes;
-	unsigned mem_row_size_in_kb;
-	unsigned shader_engine_tile_size;
-	unsigned num_gpus;
-	unsigned multi_gpu_tile_size;
-	unsigned mc_arb_ramcfg;
-	unsigned gb_addr_config;
-	unsigned num_rbs;
-	unsigned gs_vgt_table_depth;
-	unsigned gs_prim_buffer_depth;
-
-	uint32_t tile_mode_array[32];
-	uint32_t macrotile_mode_array[16];
-
-	struct gb_addr_config gb_addr_config_fields;
-	struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
-
-	/* gfx configure feature */
-	uint32_t double_offchip_lds_buf;
-	/* cached value of DB_DEBUG2 */
-	uint32_t db_debug2;
-};
-
-struct amdgpu_cu_info {
-	uint32_t simd_per_cu;
-	uint32_t max_waves_per_simd;
-	uint32_t wave_front_size;
-	uint32_t max_scratch_slots_per_cu;
-	uint32_t lds_size;
-
-	/* total active CU number */
-	uint32_t number;
-	uint32_t ao_cu_mask;
-	uint32_t ao_cu_bitmap[4][4];
-	uint32_t bitmap[4][4];
-};
-
-struct amdgpu_gfx_funcs {
-	/* get the gpu clock counter */
-	uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
-	void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
-	void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields);
-	void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst);
-	void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst);
-	void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue);
-};
-
-struct amdgpu_ngg_buf {
-	struct amdgpu_bo	*bo;
-	uint64_t		gpu_addr;
-	uint32_t		size;
-	uint32_t		bo_size;
-};
-
-enum {
-	NGG_PRIM = 0,
-	NGG_POS,
-	NGG_CNTL,
-	NGG_PARAM,
-	NGG_BUF_MAX
-};
-
-struct amdgpu_ngg {
-	struct amdgpu_ngg_buf	buf[NGG_BUF_MAX];
-	uint32_t		gds_reserve_addr;
-	uint32_t		gds_reserve_size;
-	bool			init;
-};
-
-struct sq_work {
-	struct work_struct	work;
-	unsigned ih_data;
-};
-
-struct amdgpu_gfx {
-	struct mutex			gpu_clock_mutex;
-	struct amdgpu_gfx_config	config;
-	struct amdgpu_rlc		rlc;
-	struct amdgpu_mec		mec;
-	struct amdgpu_kiq		kiq;
-	struct amdgpu_scratch		scratch;
-	const struct firmware		*me_fw;	/* ME firmware */
-	uint32_t			me_fw_version;
-	const struct firmware		*pfp_fw; /* PFP firmware */
-	uint32_t			pfp_fw_version;
-	const struct firmware		*ce_fw;	/* CE firmware */
-	uint32_t			ce_fw_version;
-	const struct firmware		*rlc_fw; /* RLC firmware */
-	uint32_t			rlc_fw_version;
-	const struct firmware		*mec_fw; /* MEC firmware */
-	uint32_t			mec_fw_version;
-	const struct firmware		*mec2_fw; /* MEC2 firmware */
-	uint32_t			mec2_fw_version;
-	uint32_t			me_feature_version;
-	uint32_t			ce_feature_version;
-	uint32_t			pfp_feature_version;
-	uint32_t			rlc_feature_version;
-	uint32_t			rlc_srlc_fw_version;
-	uint32_t			rlc_srlc_feature_version;
-	uint32_t			rlc_srlg_fw_version;
-	uint32_t			rlc_srlg_feature_version;
-	uint32_t			rlc_srls_fw_version;
-	uint32_t			rlc_srls_feature_version;
-	uint32_t			mec_feature_version;
-	uint32_t			mec2_feature_version;
-	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
-	unsigned			num_gfx_rings;
-	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
-	unsigned			num_compute_rings;
-	struct amdgpu_irq_src		eop_irq;
-	struct amdgpu_irq_src		priv_reg_irq;
-	struct amdgpu_irq_src		priv_inst_irq;
-	struct amdgpu_irq_src		cp_ecc_error_irq;
-	struct amdgpu_irq_src		sq_irq;
-	struct sq_work			sq_work;
-
-	/* gfx status */
-	uint32_t			gfx_current_status;
-	/* ce ram size*/
-	unsigned			ce_ram_size;
-	struct amdgpu_cu_info		cu_info;
-	const struct amdgpu_gfx_funcs	*funcs;
-
-	/* reset mask */
-	uint32_t                        grbm_soft_reset;
-	uint32_t                        srbm_soft_reset;
-	/* s3/s4 mask */
-	bool                            in_suspend;
-	/* NGG */
-	struct amdgpu_ngg		ngg;
-
-	/* pipe reservation */
-	struct mutex			pipe_reserve_mutex;
-	DECLARE_BITMAP			(pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
-};
-
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		  unsigned size, struct amdgpu_ib *ib);
 void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
@@ -986,7 +554,7 @@ struct amdgpu_cs_parser {
 
 	/* scheduler job object */
 	struct amdgpu_job	*job;
-	struct amdgpu_ring	*ring;
+	struct drm_sched_entity	*entity;
 
 	/* buffer objects */
 	struct ww_acquire_ctx		ticket;
@@ -1037,58 +605,6 @@ struct amdgpu_wb {
 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb);
 
-/*
- * SDMA
- */
-struct amdgpu_sdma_instance {
-	/* SDMA firmware */
-	const struct firmware	*fw;
-	uint32_t		fw_version;
-	uint32_t		feature_version;
-
-	struct amdgpu_ring	ring;
-	bool			burst_nop;
-};
-
-struct amdgpu_sdma {
-	struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
-#ifdef CONFIG_DRM_AMDGPU_SI
-	//SI DMA has a difference trap irq number for the second engine
-	struct amdgpu_irq_src	trap_irq_1;
-#endif
-	struct amdgpu_irq_src	trap_irq;
-	struct amdgpu_irq_src	illegal_inst_irq;
-	int			num_instances;
-	uint32_t                    srbm_soft_reset;
-};
-
-/*
- * Firmware
- */
-enum amdgpu_firmware_load_type {
-	AMDGPU_FW_LOAD_DIRECT = 0,
-	AMDGPU_FW_LOAD_SMU,
-	AMDGPU_FW_LOAD_PSP,
-};
-
-struct amdgpu_firmware {
-	struct amdgpu_firmware_info ucode[AMDGPU_UCODE_ID_MAXIMUM];
-	enum amdgpu_firmware_load_type load_type;
-	struct amdgpu_bo *fw_buf;
-	unsigned int fw_size;
-	unsigned int max_ucodes;
-	/* firmwares are loaded by psp instead of smu from vega10 */
-	const struct amdgpu_psp_funcs *funcs;
-	struct amdgpu_bo *rbuf;
-	struct mutex mutex;
-
-	/* gpu info firmware data pointer */
-	const struct firmware *gpu_info_fw;
-
-	void *fw_buf_ptr;
-	uint64_t fw_buf_mc;
-};
-
 /*
  * Benchmarking
  */
@@ -1100,31 +616,6 @@ void amdgpu_benchmark(struct amdgpu_device *adev, int test_number);
  */
 void amdgpu_test_moves(struct amdgpu_device *adev);
 
-
-/*
- * amdgpu smumgr functions
- */
-struct amdgpu_smumgr_funcs {
-	int (*check_fw_load_finish)(struct amdgpu_device *adev, uint32_t fwtype);
-	int (*request_smu_load_fw)(struct amdgpu_device *adev);
-	int (*request_smu_specific_fw)(struct amdgpu_device *adev, uint32_t fwtype);
-};
-
-/*
- * amdgpu smumgr
- */
-struct amdgpu_smumgr {
-	struct amdgpu_bo *toc_buf;
-	struct amdgpu_bo *smu_buf;
-	/* asic priv smu data */
-	void *priv;
-	spinlock_t smu_lock;
-	/* smumgr functions */
-	const struct amdgpu_smumgr_funcs *smumgr_funcs;
-	/* ucode loading complete flag */
-	uint32_t fw_flags;
-};
-
 /*
  * ASIC specific register table accessible by UMD
  */
@@ -1166,23 +657,9 @@ struct amdgpu_asic_funcs {
 /*
  * IOCTL.
  */
-int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
-			    struct drm_file *filp);
 int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp);
 
-int amdgpu_gem_info_ioctl(struct drm_device *dev, void *data,
-			  struct drm_file *filp);
-int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *filp);
-int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
-			  struct drm_file *filp);
-int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *filp);
-int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
-			  struct drm_file *filp);
-int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *filp);
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *filp);
@@ -1190,9 +667,6 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
 int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp);
 
-int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *filp);
-
 /* VRAM scratch page for HDP bug, default vram page */
 struct amdgpu_vram_scratch {
 	struct amdgpu_bo		*robj;
@@ -1477,9 +951,6 @@ struct amdgpu_device {
 	u32				cg_flags;
 	u32				pg_flags;
 
-	/* amdgpu smumgr */
-	struct amdgpu_smumgr smu;
-
 	/* gfx */
 	struct amdgpu_gfx		gfx;
 
@@ -1544,6 +1015,9 @@ struct amdgpu_device {
 	bool has_hw_reset;
 	u8				reset_magic[AMDGPU_RESET_MAGIC_NUM];
 
+	/* s3/s4 mask */
+	bool                            in_suspend;
+
 	/* record last mm index being written through WREG32*/
 	unsigned long last_mm_index;
 	bool                            in_gpu_reset;
@@ -1666,22 +1140,6 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define RBIOS16(i) (RBIOS8(i) | (RBIOS8((i)+1) << 8))
 #define RBIOS32(i) ((RBIOS16(i)) | (RBIOS16((i)+2) << 16))
 
-static inline struct amdgpu_sdma_instance *
-amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-	int i;
-
-	for (i = 0; i < adev->sdma.num_instances; i++)
-		if (&adev->sdma.instance[i].ring == ring)
-			break;
-
-	if (i < AMDGPU_MAX_SDMA_INSTANCES)
-		return &adev->sdma.instance[i];
-	else
-		return NULL;
-}
-
 /*
  * ASICs macro.
  */
@@ -1700,74 +1158,16 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
 #define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
 #define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
-#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
-#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
-#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
-#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
-#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
-#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags))
-#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
-#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
-#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
-#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
-#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib)))
-#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
-#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
-#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
-#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
-#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
-#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c))
-#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
-#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
-#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
-#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
-#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
-#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
-#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
-#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
-#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
-#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
-#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
-#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
-#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
-#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
-#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
-#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
-#define amdgpu_ih_prescreen_iv(adev) (adev)->irq.ih_funcs->prescreen_iv((adev))
-#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
-#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
-#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
-#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
-#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
-#define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h))
-#define amdgpu_display_hpd_set_polarity(adev, h) (adev)->mode_info.funcs->hpd_set_polarity((adev), (h))
-#define amdgpu_display_hpd_get_gpio_reg(adev) (adev)->mode_info.funcs->hpd_get_gpio_reg((adev))
-#define amdgpu_display_bandwidth_update(adev) (adev)->mode_info.funcs->bandwidth_update((adev))
-#define amdgpu_display_page_flip(adev, crtc, base, async) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base), (async))
-#define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos))
-#define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
-#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
-#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib),  (s), (d), (b))
-#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
-#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
-#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
-#define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a))
-#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
-#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q))
 
 /* Common functions */
+bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
-			      struct amdgpu_job* job, bool force);
+			      struct amdgpu_job* job);
 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
 bool amdgpu_device_need_post(struct amdgpu_device *adev);
-void amdgpu_display_update_priority(struct amdgpu_device *adev);
 
 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
 				  u64 num_vis_bytes);
-void amdgpu_device_vram_location(struct amdgpu_device *adev,
-				 struct amdgpu_gmc *mc, u64 base);
-void amdgpu_device_gart_location(struct amdgpu_device *adev,
-				 struct amdgpu_gmc *mc);
 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev);
 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 					     const u32 *registers,
@@ -1818,6 +1218,12 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
 long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
 			     unsigned long arg);
 
+
+/*
+ * functions used by amdgpu_xgmi.c
+ */
+int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
+
 /*
  * functions used by amdgpu_encoder.c
  */

+ 67 - 140
drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c

@@ -116,136 +116,47 @@ static int acp_sw_fini(void *handle)
 	return 0;
 }
 
-/* power off a tile/block within ACP */
-static int acp_suspend_tile(void *cgs_dev, int tile)
-{
-	u32 val = 0;
-	u32 count = 0;
-
-	if ((tile  < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) {
-		pr_err("Invalid ACP tile : %d to suspend\n", tile);
-		return -1;
-	}
-
-	val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile);
-	val &= ACP_TILE_ON_MASK;
-
-	if (val == 0x0) {
-		val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
-		val = val | (1 << tile);
-		cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
-		cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG,
-					0x500 + tile);
-
-		count = ACP_TIMEOUT_LOOP;
-		while (true) {
-			val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0
-								+ tile);
-			val = val & ACP_TILE_ON_MASK;
-			if (val == ACP_TILE_OFF_MASK)
-				break;
-			if (--count == 0) {
-				pr_err("Timeout reading ACP PGFSM status\n");
-				return -ETIMEDOUT;
-			}
-			udelay(100);
-		}
-
-		val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
-
-		val |= ACP_TILE_OFF_RETAIN_REG_MASK;
-		cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
-	}
-	return 0;
-}
-
-/* power on a tile/block within ACP */
-static int acp_resume_tile(void *cgs_dev, int tile)
-{
-	u32 val = 0;
-	u32 count = 0;
-
-	if ((tile  < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) {
-		pr_err("Invalid ACP tile to resume\n");
-		return -1;
-	}
-
-	val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile);
-	val = val & ACP_TILE_ON_MASK;
-
-	if (val != 0x0) {
-		cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG,
-					0x600 + tile);
-		count = ACP_TIMEOUT_LOOP;
-		while (true) {
-			val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0
-							+ tile);
-			val = val & ACP_TILE_ON_MASK;
-			if (val == 0x0)
-				break;
-			if (--count == 0) {
-				pr_err("Timeout reading ACP PGFSM status\n");
-				return -ETIMEDOUT;
-			}
-			udelay(100);
-		}
-		val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
-		if (tile == ACP_TILE_P1)
-			val = val & (ACP_TILE_P1_MASK);
-		else if (tile == ACP_TILE_P2)
-			val = val & (ACP_TILE_P2_MASK);
-
-		cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
-	}
-	return 0;
-}
-
 struct acp_pm_domain {
-	void *cgs_dev;
+	void *adev;
 	struct generic_pm_domain gpd;
 };
 
 static int acp_poweroff(struct generic_pm_domain *genpd)
 {
-	int i, ret;
 	struct acp_pm_domain *apd;
+	struct amdgpu_device *adev;
 
 	apd = container_of(genpd, struct acp_pm_domain, gpd);
 	if (apd != NULL) {
-		/* Donot return abruptly if any of power tile fails to suspend.
-		 * Log it and continue powering off other tile
-		 */
-		for (i = 4; i >= 0 ; i--) {
-			ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i);
-			if (ret)
-				pr_err("ACP tile %d tile suspend failed\n", i);
-		}
+		adev = apd->adev;
+	/* call smu to POWER GATE ACP block
+	 * smu will
+	 * 1. turn off the acp clock
+	 * 2. power off the acp tiles
+	 * 3. check and enter ulv state
+	 */
+		if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+			amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
 	}
 	return 0;
 }
 
 static int acp_poweron(struct generic_pm_domain *genpd)
 {
-	int i, ret;
 	struct acp_pm_domain *apd;
+	struct amdgpu_device *adev;
 
 	apd = container_of(genpd, struct acp_pm_domain, gpd);
 	if (apd != NULL) {
-		for (i = 0; i < 2; i++) {
-			ret = acp_resume_tile(apd->cgs_dev, ACP_TILE_P1 + i);
-			if (ret) {
-				pr_err("ACP tile %d resume failed\n", i);
-				break;
-			}
-		}
-
-		/* Disable DSPs which are not going to be used */
-		for (i = 0; i < 3; i++) {
-			ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_DSP0 + i);
-			/* Continue suspending other DSP, even if one fails */
-			if (ret)
-				pr_err("ACP DSP %d suspend failed\n", i);
-		}
+		adev = apd->adev;
+	/* call smu to UNGATE ACP block
+	 * smu will
+	 * 1. exit ulv
+	 * 2. turn on acp clock
+	 * 3. power on acp tiles
+	 */
+		if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+			amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
 	}
 	return 0;
 }
@@ -289,30 +200,31 @@ static int acp_hw_init(void *handle)
 	r = amd_acp_hw_init(adev->acp.cgs_device,
 			    ip_block->version->major, ip_block->version->minor);
 	/* -ENODEV means board uses AZ rather than ACP */
-	if (r == -ENODEV)
+	if (r == -ENODEV) {
+		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
 		return 0;
-	else if (r)
+	} else if (r) {
 		return r;
+	}
 
 	if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
 		return -EINVAL;
 
 	acp_base = adev->rmmio_base;
 
-	if (adev->asic_type != CHIP_STONEY) {
-		adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
-		if (adev->acp.acp_genpd == NULL)
-			return -ENOMEM;
 
-		adev->acp.acp_genpd->gpd.name = "ACP_AUDIO";
-		adev->acp.acp_genpd->gpd.power_off = acp_poweroff;
-		adev->acp.acp_genpd->gpd.power_on = acp_poweron;
+	adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
+	if (adev->acp.acp_genpd == NULL)
+		return -ENOMEM;
+
+	adev->acp.acp_genpd->gpd.name = "ACP_AUDIO";
+	adev->acp.acp_genpd->gpd.power_off = acp_poweroff;
+	adev->acp.acp_genpd->gpd.power_on = acp_poweron;
 
 
-		adev->acp.acp_genpd->cgs_dev = adev->acp.cgs_device;
+	adev->acp.acp_genpd->adev = adev;
 
-		pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false);
-	}
+	pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false);
 
 	adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell),
 							GFP_KERNEL);
@@ -429,17 +341,16 @@ static int acp_hw_init(void *handle)
 	if (r)
 		return r;
 
-	if (adev->asic_type != CHIP_STONEY) {
-		for (i = 0; i < ACP_DEVS ; i++) {
-			dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
-			r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev);
-			if (r) {
-				dev_err(dev, "Failed to add dev to genpd\n");
-				return r;
-			}
+	for (i = 0; i < ACP_DEVS ; i++) {
+		dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
+		r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev);
+		if (r) {
+			dev_err(dev, "Failed to add dev to genpd\n");
+			return r;
 		}
 	}
 
+
 	/* Assert Soft reset of ACP */
 	val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
 
@@ -497,8 +408,10 @@ static int acp_hw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	/* return early if no ACP */
-	if (!adev->acp.acp_cell)
+	if (!adev->acp.acp_genpd) {
+		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
 		return 0;
+	}
 
 	/* Assert Soft reset of ACP */
 	val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
@@ -536,19 +449,17 @@ static int acp_hw_fini(void *handle)
 		udelay(100);
 	}
 
-	if (adev->acp.acp_genpd) {
-		for (i = 0; i < ACP_DEVS ; i++) {
-			dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
-			ret = pm_genpd_remove_device(dev);
-			/* If removal fails, dont giveup and try rest */
-			if (ret)
-				dev_err(dev, "remove dev from genpd failed\n");
-		}
-		kfree(adev->acp.acp_genpd);
+	for (i = 0; i < ACP_DEVS ; i++) {
+		dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
+		ret = pm_genpd_remove_device(dev);
+		/* If removal fails, dont giveup and try rest */
+		if (ret)
+			dev_err(dev, "remove dev from genpd failed\n");
 	}
 
 	mfd_remove_devices(adev->acp.parent);
 	kfree(adev->acp.acp_res);
+	kfree(adev->acp.acp_genpd);
 	kfree(adev->acp.acp_cell);
 
 	return 0;
@@ -556,11 +467,21 @@ static int acp_hw_fini(void *handle)
 
 static int acp_suspend(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	/* power up on suspend */
+	if (!adev->acp.acp_cell)
+		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
 	return 0;
 }
 
 static int acp_resume(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	/* power down again on resume */
+	if (!adev->acp.acp_cell)
+		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
 	return 0;
 }
 
@@ -593,6 +514,12 @@ static int acp_set_clockgating_state(void *handle,
 static int acp_set_powergating_state(void *handle,
 				     enum amd_powergating_state state)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	bool enable = state == AMD_PG_STATE_GATE ? true : false;
+
+	if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable);
+
 	return 0;
 }
 

+ 13 - 5
drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c

@@ -31,6 +31,7 @@
 #include <drm/drm_crtc_helper.h>
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
+#include "amdgpu_display.h"
 #include "amd_acpi.h"
 #include "atom.h"
 
@@ -358,7 +359,9 @@ out:
  *
  * Checks the acpi event and if it matches an atif event,
  * handles it.
- * Returns NOTIFY code
+ *
+ * Returns:
+ * NOTIFY_BAD or NOTIFY_DONE, depending on the event.
  */
 static int amdgpu_atif_handler(struct amdgpu_device *adev,
 			       struct acpi_bus_event *event)
@@ -372,11 +375,16 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
 	if (strcmp(event->device_class, ACPI_VIDEO_CLASS) != 0)
 		return NOTIFY_DONE;
 
+	/* Is this actually our event? */
 	if (!atif ||
 	    !atif->notification_cfg.enabled ||
-	    event->type != atif->notification_cfg.command_code)
-		/* Not our event */
-		return NOTIFY_DONE;
+	    event->type != atif->notification_cfg.command_code) {
+		/* These events will generate keypresses otherwise */
+		if (event->type == ACPI_VIDEO_NOTIFY_PROBE)
+			return NOTIFY_BAD;
+		else
+			return NOTIFY_DONE;
+	}
 
 	if (atif->functions.sbios_requests) {
 		struct atif_sbios_requests req;
@@ -385,7 +393,7 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
 		count = amdgpu_atif_get_sbios_requests(atif, &req);
 
 		if (count <= 0)
-			return NOTIFY_DONE;
+			return NOTIFY_BAD;
 
 		DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count);
 

+ 49 - 44
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

@@ -28,7 +28,6 @@
 #include <linux/module.h>
 
 const struct kgd2kfd_calls *kgd2kfd;
-bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
 
 static const unsigned int compute_vmid_bitmap = 0xFF00;
 
@@ -36,45 +35,23 @@ int amdgpu_amdkfd_init(void)
 {
 	int ret;
 
-#if defined(CONFIG_HSA_AMD_MODULE)
-	int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
-
-	kgd2kfd_init_p = symbol_request(kgd2kfd_init);
-
-	if (kgd2kfd_init_p == NULL)
-		return -ENOENT;
-
-	ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd);
-	if (ret) {
-		symbol_put(kgd2kfd_init);
-		kgd2kfd = NULL;
-	}
-
-
-#elif defined(CONFIG_HSA_AMD)
-
+#ifdef CONFIG_HSA_AMD
 	ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
 	if (ret)
 		kgd2kfd = NULL;
-
+	amdgpu_amdkfd_gpuvm_init_mem_limits();
 #else
 	kgd2kfd = NULL;
 	ret = -ENOENT;
 #endif
 
-#if defined(CONFIG_HSA_AMD_MODULE) || defined(CONFIG_HSA_AMD)
-	amdgpu_amdkfd_gpuvm_init_mem_limits();
-#endif
-
 	return ret;
 }
 
 void amdgpu_amdkfd_fini(void)
 {
-	if (kgd2kfd) {
+	if (kgd2kfd)
 		kgd2kfd->exit();
-		symbol_put(kgd2kfd_init);
-	}
 }
 
 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
@@ -99,6 +76,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 		kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
 		break;
 	case CHIP_VEGA10:
+	case CHIP_VEGA20:
 	case CHIP_RAVEN:
 		kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
 		break;
@@ -146,7 +124,7 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
 
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
-	int i;
+	int i, n;
 	int last_valid_bit;
 	if (adev->kfd) {
 		struct kgd2kfd_shared_resources gpu_resources = {
@@ -155,7 +133,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
 			.gpuvm_size = min(adev->vm_manager.max_pfn
 					  << AMDGPU_GPU_PAGE_SHIFT,
-					  AMDGPU_VA_HOLE_START),
+					  AMDGPU_GMC_HOLE_START),
 			.drm_render_minor = adev->ddev->render->index
 		};
 
@@ -185,7 +163,15 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 				&gpu_resources.doorbell_physical_address,
 				&gpu_resources.doorbell_aperture_size,
 				&gpu_resources.doorbell_start_offset);
-		if (adev->asic_type >= CHIP_VEGA10) {
+
+		if (adev->asic_type < CHIP_VEGA10) {
+			kgd2kfd->device_init(adev->kfd, &gpu_resources);
+			return;
+		}
+
+		n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8;
+
+		for (i = 0; i < n; i += 2) {
 			/* On SOC15 the BIF is involved in routing
 			 * doorbells using the low 12 bits of the
 			 * address. Communicate the assignments to
@@ -193,20 +179,31 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 			 * process in case of 64-bit doorbells so we
 			 * can use each doorbell assignment twice.
 			 */
-			gpu_resources.sdma_doorbell[0][0] =
-				AMDGPU_DOORBELL64_sDMA_ENGINE0;
-			gpu_resources.sdma_doorbell[0][1] =
-				AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200;
-			gpu_resources.sdma_doorbell[1][0] =
-				AMDGPU_DOORBELL64_sDMA_ENGINE1;
-			gpu_resources.sdma_doorbell[1][1] =
-				AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200;
-			/* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for
-			 * SDMA, IH and VCN. So don't use them for the CP.
-			 */
-			gpu_resources.reserved_doorbell_mask = 0x1f0;
-			gpu_resources.reserved_doorbell_val  = 0x0f0;
+			if (adev->asic_type == CHIP_VEGA10) {
+				gpu_resources.sdma_doorbell[0][i] =
+					AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + (i >> 1);
+				gpu_resources.sdma_doorbell[0][i+1] =
+					AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1);
+				gpu_resources.sdma_doorbell[1][i] =
+					AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + (i >> 1);
+				gpu_resources.sdma_doorbell[1][i+1] =
+					AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1);
+			} else {
+				gpu_resources.sdma_doorbell[0][i] =
+					AMDGPU_DOORBELL64_sDMA_ENGINE0 + (i >> 1);
+				gpu_resources.sdma_doorbell[0][i+1] =
+					AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1);
+				gpu_resources.sdma_doorbell[1][i] =
+					AMDGPU_DOORBELL64_sDMA_ENGINE1 + (i >> 1);
+				gpu_resources.sdma_doorbell[1][i+1] =
+					AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1);
+			}
 		}
+		/* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for
+		 * SDMA, IH and VCN. So don't use them for the CP.
+		 */
+		gpu_resources.reserved_doorbell_mask = 0x1e0;
+		gpu_resources.reserved_doorbell_val  = 0x0e0;
 
 		kgd2kfd->device_init(adev->kfd, &gpu_resources);
 	}
@@ -267,7 +264,8 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 
-	amdgpu_device_gpu_recover(adev, NULL, false);
+	if (amdgpu_device_should_recover_gpu(adev))
+		amdgpu_device_gpu_recover(adev, NULL);
 }
 
 int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
@@ -437,6 +435,13 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
 	return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 }
 
+uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	return adev->gmc.xgmi.hive_id;
+}
+
 int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 				uint32_t vmid, uint64_t gpu_addr,
 				uint32_t *ib_cmd, uint32_t ib_len)
@@ -510,7 +515,7 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
 	return false;
 }
 
-#if !defined(CONFIG_HSA_AMD_MODULE) && !defined(CONFIG_HSA_AMD)
+#ifndef CONFIG_HSA_AMD
 bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
 {
 	return false;

+ 6 - 4
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

@@ -145,6 +145,7 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
 uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
 void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
 uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
+uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
 
 #define read_user_wptr(mmptr, wptr, dst)				\
 	({								\
@@ -162,17 +163,18 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
 	})
 
 /* GPUVM API */
-int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
-					void **process_info,
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int pasid,
+					void **vm, void **process_info,
 					struct dma_fence **ef);
 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
-					struct file *filp,
+					struct file *filp, unsigned int pasid,
 					void **vm, void **process_info,
 					struct dma_fence **ef);
 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
 				struct amdgpu_vm *vm);
 void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
-uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
+void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm);
+uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		struct kgd_dev *kgd, uint64_t va, uint64_t size,
 		void *vm, struct kgd_mem **mem,

+ 5 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c

@@ -142,7 +142,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 static void set_scratch_backing_va(struct kgd_dev *kgd,
 					uint64_t va, uint32_t vmid);
 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-		uint32_t page_table_base);
+		uint64_t page_table_base);
 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
 static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
 static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd);
@@ -205,6 +205,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
 	.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
 	.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+	.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
 	.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
 	.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
@@ -873,7 +874,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 }
 
 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-			uint32_t page_table_base)
+			uint64_t page_table_base)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 
@@ -881,7 +882,8 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 		pr_err("trying to set page table base for wrong VMID\n");
 		return;
 	}
-	WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
+	WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8,
+		lower_32_bits(page_table_base));
 }
 
 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)

+ 7 - 6
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c

@@ -45,8 +45,6 @@ enum hqd_dequeue_request_type {
 	RESET_WAVES
 };
 
-struct vi_sdma_mqd;
-
 /*
  * Register access functions
  */
@@ -100,7 +98,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 static void set_scratch_backing_va(struct kgd_dev *kgd,
 					uint64_t va, uint32_t vmid);
 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-		uint32_t page_table_base);
+		uint64_t page_table_base);
 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
 static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
 
@@ -164,6 +162,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
 	.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
 	.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+	.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
 	.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
 	.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
@@ -281,7 +280,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 
 	lock_srbm(kgd, mec, pipe, 0, 0);
 
-	WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK);
+	WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+			CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
 
 	unlock_srbm(kgd);
 
@@ -833,7 +833,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 }
 
 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-		uint32_t page_table_base)
+		uint64_t page_table_base)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 
@@ -841,7 +841,8 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 		pr_err("trying to set page table base for wrong VMID\n");
 		return;
 	}
-	WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
+	WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8,
+			lower_32_bits(page_table_base));
 }
 
 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)

+ 6 - 5
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c

@@ -138,7 +138,7 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 		uint8_t vmid);
 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-		uint32_t page_table_base);
+		uint64_t page_table_base);
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 static void set_scratch_backing_va(struct kgd_dev *kgd,
 					uint64_t va, uint32_t vmid);
@@ -201,6 +201,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
 	.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
 	.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+	.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
 	.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
 	.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
@@ -214,7 +215,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 	.submit_ib = amdgpu_amdkfd_submit_ib,
 	.gpu_recover = amdgpu_amdkfd_gpu_reset,
-	.set_compute_idle = amdgpu_amdkfd_set_compute_idle
+	.set_compute_idle = amdgpu_amdkfd_set_compute_idle,
+	.get_hive_id = amdgpu_amdkfd_get_hive_id,
 };
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
@@ -1011,11 +1013,10 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 }
 
 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-		uint32_t page_table_base)
+		uint64_t page_table_base)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-	uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT |
-		AMDGPU_PTE_VALID;
+	uint64_t base = page_table_base | AMDGPU_PTE_VALID;
 
 	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
 		pr_err("trying to set page table base for wrong VMID %u\n",

+ 31 - 13
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

@@ -364,7 +364,6 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
 	struct amdgpu_bo *pd = vm->root.base.bo;
 	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
 	struct amdgpu_vm_parser param;
-	uint64_t addr, flags = AMDGPU_PTE_VALID;
 	int ret;
 
 	param.domain = AMDGPU_GEM_DOMAIN_VRAM;
@@ -383,9 +382,7 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
 		return ret;
 	}
 
-	addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
-	amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
-	vm->pd_phys_addr = addr;
+	vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
 
 	if (vm->use_cpu_for_update) {
 		ret = amdgpu_bo_kmap(pd, NULL);
@@ -678,7 +675,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 	if (!ctx->vm_pd)
 		return -ENOMEM;
 
-	ctx->kfd_bo.robj = bo;
 	ctx->kfd_bo.priority = 0;
 	ctx->kfd_bo.tv.bo = &bo->tbo;
 	ctx->kfd_bo.tv.shared = true;
@@ -743,7 +739,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 			return -ENOMEM;
 	}
 
-	ctx->kfd_bo.robj = bo;
 	ctx->kfd_bo.priority = 0;
 	ctx->kfd_bo.tv.bo = &bo->tbo;
 	ctx->kfd_bo.tv.shared = true;
@@ -1003,8 +998,8 @@ create_evict_fence_fail:
 	return ret;
 }
 
-int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
-					  void **process_info,
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int pasid,
+					  void **vm, void **process_info,
 					  struct dma_fence **ef)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
@@ -1016,7 +1011,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
 		return -ENOMEM;
 
 	/* Initialize AMDGPU part of the VM */
-	ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0);
+	ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, pasid);
 	if (ret) {
 		pr_err("Failed init vm ret %d\n", ret);
 		goto amdgpu_vm_init_fail;
@@ -1039,7 +1034,7 @@ amdgpu_vm_init_fail:
 }
 
 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
-					   struct file *filp,
+					   struct file *filp, unsigned int pasid,
 					   void **vm, void **process_info,
 					   struct dma_fence **ef)
 {
@@ -1054,7 +1049,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
 		return -EINVAL;
 
 	/* Convert VM into a compute VM */
-	ret = amdgpu_vm_make_compute(adev, avm);
+	ret = amdgpu_vm_make_compute(adev, avm, pasid);
 	if (ret)
 		return ret;
 
@@ -1117,11 +1112,34 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
 	kfree(vm);
 }
 
-uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
+void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+        struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+
+	if (WARN_ON(!kgd || !vm))
+                return;
+
+        pr_debug("Releasing process vm %p\n", vm);
+
+        /* The original pasid of amdgpu vm has already been
+         * released during making a amdgpu vm to a compute vm
+         * The current pasid is managed by kfd and will be
+         * released on kfd process destroy. Set amdgpu pasid
+         * to 0 to avoid duplicate release.
+         */
+	amdgpu_vm_release_compute(adev, avm);
+}
+
+uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
 {
 	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+	struct amdgpu_bo *pd = avm->root.base.bo;
+	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
 
-	return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
+	if (adev->asic_type < CHIP_VEGA10)
+		return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
+	return avm->pd_phys_addr;
 }
 
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c

@@ -29,6 +29,7 @@
 #include "amdgpu_atombios.h"
 #include "amdgpu_atomfirmware.h"
 #include "amdgpu_i2c.h"
+#include "amdgpu_display.h"
 
 #include "atom.h"
 #include "atom-bits.h"

+ 10 - 6
drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c

@@ -117,6 +117,10 @@ union igp_info {
 union umc_info {
 	struct atom_umc_info_v3_1 v31;
 };
+
+union vram_info {
+	struct atom_vram_info_header_v2_3 v23;
+};
 /*
  * Return vram width from integrated system info table, if available,
  * or 0 if not.
@@ -174,7 +178,7 @@ static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev,
 		case ATOM_DGPU_VRAM_TYPE_GDDR5:
 			vram_type = AMDGPU_VRAM_TYPE_GDDR5;
 			break;
-		case ATOM_DGPU_VRAM_TYPE_HBM:
+		case ATOM_DGPU_VRAM_TYPE_HBM2:
 			vram_type = AMDGPU_VRAM_TYPE_HBM;
 			break;
 		default:
@@ -195,7 +199,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
 	int index;
 	u16 data_offset, size;
 	union igp_info *igp_info;
-	union umc_info *umc_info;
+	union vram_info *vram_info;
 	u8 frev, crev;
 	u8 mem_type;
 
@@ -204,7 +208,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
 						    integratedsysteminfo);
 	else
 		index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
-						    umc_info);
+						    vram_info);
 	if (amdgpu_atom_parse_data_header(mode_info->atom_context,
 					  index, &size,
 					  &frev, &crev, &data_offset)) {
@@ -219,11 +223,11 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
 				return 0;
 			}
 		} else {
-			umc_info = (union umc_info *)
+			vram_info = (union vram_info *)
 				(mode_info->atom_context->bios + data_offset);
 			switch (crev) {
-			case 1:
-				mem_type = umc_info->v31.vram_type;
+			case 3:
+				mem_type = vram_info->v23.vram_module[0].memory_type;
 				return convert_atom_mem_type_to_vram_type(adev, mem_type);
 			default:
 				return 0;

+ 26 - 19
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c

@@ -49,8 +49,11 @@ static void amdgpu_bo_list_free(struct kref *ref)
 						   refcount);
 	struct amdgpu_bo_list_entry *e;
 
-	amdgpu_bo_list_for_each_entry(e, list)
-		amdgpu_bo_unref(&e->robj);
+	amdgpu_bo_list_for_each_entry(e, list) {
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+
+		amdgpu_bo_unref(&bo);
+	}
 
 	call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);
 }
@@ -67,7 +70,8 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
 	unsigned i;
 	int r;
 
-	if (num_entries > SIZE_MAX / sizeof(struct amdgpu_bo_list_entry))
+	if (num_entries > (SIZE_MAX - sizeof(struct amdgpu_bo_list))
+				/ sizeof(struct amdgpu_bo_list_entry))
 		return -EINVAL;
 
 	size = sizeof(struct amdgpu_bo_list);
@@ -111,21 +115,20 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
 			entry = &array[last_entry++];
 		}
 
-		entry->robj = bo;
 		entry->priority = min(info[i].bo_priority,
 				      AMDGPU_BO_LIST_MAX_PRIORITY);
-		entry->tv.bo = &entry->robj->tbo;
-		entry->tv.shared = !entry->robj->prime_shared_count;
-
-		if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
-			list->gds_obj = entry->robj;
-		if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GWS)
-			list->gws_obj = entry->robj;
-		if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_OA)
-			list->oa_obj = entry->robj;
-
-		total_size += amdgpu_bo_size(entry->robj);
-		trace_amdgpu_bo_list_set(list, entry->robj);
+		entry->tv.bo = &bo->tbo;
+		entry->tv.shared = !bo->prime_shared_count;
+
+		if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
+			list->gds_obj = bo;
+		if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GWS)
+			list->gws_obj = bo;
+		if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_OA)
+			list->oa_obj = bo;
+
+		total_size += amdgpu_bo_size(bo);
+		trace_amdgpu_bo_list_set(list, bo);
 	}
 
 	list->first_userptr = first_userptr;
@@ -137,8 +140,11 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
 	return 0;
 
 error_free:
-	while (i--)
-		amdgpu_bo_unref(&array[i].robj);
+	while (i--) {
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
+
+		amdgpu_bo_unref(&bo);
+	}
 	kvfree(list);
 	return r;
 
@@ -190,9 +196,10 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
 	 * with the same priority, i.e. it must be stable.
 	 */
 	amdgpu_bo_list_for_each_entry(e, list) {
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 		unsigned priority = e->priority;
 
-		if (!e->robj->parent)
+		if (!bo->parent)
 			list_add_tail(&e->tv.head, &bucket[priority]);
 
 		e->user_pages = NULL;

+ 0 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h

@@ -32,7 +32,6 @@ struct amdgpu_bo_va;
 struct amdgpu_fpriv;
 
 struct amdgpu_bo_list_entry {
-	struct amdgpu_bo		*robj;
 	struct ttm_validate_buffer	tv;
 	struct amdgpu_bo_va		*bo_va;
 	uint32_t			priority;

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c

@@ -34,6 +34,7 @@
 #include "atombios_dp.h"
 #include "amdgpu_connectors.h"
 #include "amdgpu_i2c.h"
+#include "amdgpu_display.h"
 
 #include <linux/pm_runtime.h>
 

+ 155 - 164
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

@@ -32,12 +32,14 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_gmc.h"
+#include "amdgpu_gem.h"
 
 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 				      struct drm_amdgpu_cs_chunk_fence *data,
 				      uint32_t *offset)
 {
 	struct drm_gem_object *gobj;
+	struct amdgpu_bo *bo;
 	unsigned long size;
 	int r;
 
@@ -45,21 +47,21 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	if (gobj == NULL)
 		return -EINVAL;
 
-	p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+	bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
 	p->uf_entry.priority = 0;
-	p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
+	p->uf_entry.tv.bo = &bo->tbo;
 	p->uf_entry.tv.shared = true;
 	p->uf_entry.user_pages = NULL;
 
 	drm_gem_object_put_unlocked(gobj);
 
-	size = amdgpu_bo_size(p->uf_entry.robj);
+	size = amdgpu_bo_size(bo);
 	if (size != PAGE_SIZE || (data->offset + 8) > size) {
 		r = -EINVAL;
 		goto error_unref;
 	}
 
-	if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
 		r = -EINVAL;
 		goto error_unref;
 	}
@@ -69,7 +71,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	return 0;
 
 error_unref:
-	amdgpu_bo_unref(&p->uf_entry.robj);
+	amdgpu_bo_unref(&bo);
 	return r;
 }
 
@@ -228,7 +230,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
 		goto free_all_kdata;
 	}
 
-	if (p->uf_entry.robj)
+	if (p->uf_entry.tv.bo)
 		p->job->uf_addr = uf_offset;
 	kfree(chunk_array);
 
@@ -457,13 +459,13 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
 	     p->evictable = list_prev_entry(p->evictable, tv.head)) {
 
 		struct amdgpu_bo_list_entry *candidate = p->evictable;
-		struct amdgpu_bo *bo = candidate->robj;
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo);
 		struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 		bool update_bytes_moved_vis;
 		uint32_t other;
 
 		/* If we reached our current BO we can forget it */
-		if (candidate->robj == validated)
+		if (bo == validated)
 			break;
 
 		/* We can't move pinned BOs here */
@@ -528,7 +530,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 	int r;
 
 	list_for_each_entry(lobj, validated, tv.head) {
-		struct amdgpu_bo *bo = lobj->robj;
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
 		bool binding_userptr = false;
 		struct mm_struct *usermm;
 
@@ -603,7 +605,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	INIT_LIST_HEAD(&duplicates);
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
 
-	if (p->uf_entry.robj && !p->uf_entry.robj->parent)
+	if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
 		list_add(&p->uf_entry.tv.head, &p->validated);
 
 	while (1) {
@@ -619,7 +621,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 
 		INIT_LIST_HEAD(&need_pages);
 		amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-			struct amdgpu_bo *bo = e->robj;
+			struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 
 			if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
 				 &e->user_invalidated) && e->user_pages) {
@@ -638,7 +640,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 				list_del(&e->tv.head);
 				list_add(&e->tv.head, &need_pages);
 
-				amdgpu_bo_unreserve(e->robj);
+				amdgpu_bo_unreserve(bo);
 			}
 		}
 
@@ -657,7 +659,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 
 		/* Fill the page arrays for all userptrs. */
 		list_for_each_entry(e, &need_pages, tv.head) {
-			struct ttm_tt *ttm = e->robj->tbo.ttm;
+			struct ttm_tt *ttm = e->tv.bo->ttm;
 
 			e->user_pages = kvmalloc_array(ttm->num_pages,
 							 sizeof(struct page*),
@@ -716,23 +718,23 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	oa = p->bo_list->oa_obj;
 
 	amdgpu_bo_list_for_each_entry(e, p->bo_list)
-		e->bo_va = amdgpu_vm_bo_find(vm, e->robj);
+		e->bo_va = amdgpu_vm_bo_find(vm, ttm_to_amdgpu_bo(e->tv.bo));
 
 	if (gds) {
-		p->job->gds_base = amdgpu_bo_gpu_offset(gds);
-		p->job->gds_size = amdgpu_bo_size(gds);
+		p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
+		p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
 	}
 	if (gws) {
-		p->job->gws_base = amdgpu_bo_gpu_offset(gws);
-		p->job->gws_size = amdgpu_bo_size(gws);
+		p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
+		p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
 	}
 	if (oa) {
-		p->job->oa_base = amdgpu_bo_gpu_offset(oa);
-		p->job->oa_size = amdgpu_bo_size(oa);
+		p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
+		p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
 	}
 
-	if (!r && p->uf_entry.robj) {
-		struct amdgpu_bo *uf = p->uf_entry.robj;
+	if (!r && p->uf_entry.tv.bo) {
+		struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
 
 		r = amdgpu_ttm_alloc_gart(&uf->tbo);
 		p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
@@ -748,8 +750,7 @@ error_free_pages:
 		if (!e->user_pages)
 			continue;
 
-		release_pages(e->user_pages,
-			      e->robj->tbo.ttm->num_pages);
+		release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
 		kvfree(e->user_pages);
 	}
 
@@ -762,9 +763,11 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
 	int r;
 
 	list_for_each_entry(e, &p->validated, tv.head) {
-		struct reservation_object *resv = e->robj->tbo.resv;
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+		struct reservation_object *resv = bo->tbo.resv;
+
 		r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
-				     amdgpu_bo_explicit_sync(e->robj));
+				     amdgpu_bo_explicit_sync(bo));
 
 		if (r)
 			return r;
@@ -807,11 +810,16 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 	kfree(parser->chunks);
 	if (parser->job)
 		amdgpu_job_free(parser->job);
-	amdgpu_bo_unref(&parser->uf_entry.robj);
+	if (parser->uf_entry.tv.bo) {
+		struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
+
+		amdgpu_bo_unref(&uf);
+	}
 }
 
-static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
+static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 {
+	struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	struct amdgpu_device *adev = p->adev;
 	struct amdgpu_vm *vm = &fpriv->vm;
@@ -820,6 +828,71 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 	struct amdgpu_bo *bo;
 	int r;
 
+	/* Only for UVD/VCE VM emulation */
+	if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
+		unsigned i, j;
+
+		for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
+			struct drm_amdgpu_cs_chunk_ib *chunk_ib;
+			struct amdgpu_bo_va_mapping *m;
+			struct amdgpu_bo *aobj = NULL;
+			struct amdgpu_cs_chunk *chunk;
+			uint64_t offset, va_start;
+			struct amdgpu_ib *ib;
+			uint8_t *kptr;
+
+			chunk = &p->chunks[i];
+			ib = &p->job->ibs[j];
+			chunk_ib = chunk->kdata;
+
+			if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
+				continue;
+
+			va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
+			r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
+			if (r) {
+				DRM_ERROR("IB va_start is invalid\n");
+				return r;
+			}
+
+			if ((va_start + chunk_ib->ib_bytes) >
+			    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
+				return -EINVAL;
+			}
+
+			/* the IB should be reserved at this point */
+			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
+			if (r) {
+				return r;
+			}
+
+			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
+			kptr += va_start - offset;
+
+			if (ring->funcs->parse_cs) {
+				memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
+				amdgpu_bo_kunmap(aobj);
+
+				r = amdgpu_ring_parse_cs(ring, p, j);
+				if (r)
+					return r;
+			} else {
+				ib->ptr = (uint32_t *)kptr;
+				r = amdgpu_ring_patch_cs_in_place(ring, p, j);
+				amdgpu_bo_kunmap(aobj);
+				if (r)
+					return r;
+			}
+
+			j++;
+		}
+	}
+
+	if (!p->job->vm)
+		return amdgpu_cs_sync_rings(p);
+
+
 	r = amdgpu_vm_clear_freed(adev, vm, NULL);
 	if (r)
 		return r;
@@ -852,7 +925,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 		struct dma_fence *f;
 
 		/* ignore duplicates */
-		bo = e->robj;
+		bo = ttm_to_amdgpu_bo(e->tv.bo);
 		if (!bo)
 			continue;
 
@@ -882,101 +955,25 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;
 
+	r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
+	if (r)
+		return r;
+
+	p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
+
 	if (amdgpu_vm_debug) {
 		/* Invalidate all BOs to test for userspace bugs */
 		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
-			/* ignore duplicates */
-			if (!e->robj)
-				continue;
-
-			amdgpu_vm_bo_invalidate(adev, e->robj, false);
-		}
-	}
-
-	return r;
-}
-
-static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
-				 struct amdgpu_cs_parser *p)
-{
-	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
-	struct amdgpu_vm *vm = &fpriv->vm;
-	struct amdgpu_ring *ring = p->ring;
-	int r;
-
-	/* Only for UVD/VCE VM emulation */
-	if (p->ring->funcs->parse_cs || p->ring->funcs->patch_cs_in_place) {
-		unsigned i, j;
+			struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 
-		for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
-			struct drm_amdgpu_cs_chunk_ib *chunk_ib;
-			struct amdgpu_bo_va_mapping *m;
-			struct amdgpu_bo *aobj = NULL;
-			struct amdgpu_cs_chunk *chunk;
-			uint64_t offset, va_start;
-			struct amdgpu_ib *ib;
-			uint8_t *kptr;
-
-			chunk = &p->chunks[i];
-			ib = &p->job->ibs[j];
-			chunk_ib = chunk->kdata;
-
-			if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
+			/* ignore duplicates */
+			if (!bo)
 				continue;
 
-			va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK;
-			r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
-			if (r) {
-				DRM_ERROR("IB va_start is invalid\n");
-				return r;
-			}
-
-			if ((va_start + chunk_ib->ib_bytes) >
-			    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
-				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
-				return -EINVAL;
-			}
-
-			/* the IB should be reserved at this point */
-			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
-			if (r) {
-				return r;
-			}
-
-			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
-			kptr += va_start - offset;
-
-			if (p->ring->funcs->parse_cs) {
-				memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
-				amdgpu_bo_kunmap(aobj);
-
-				r = amdgpu_ring_parse_cs(ring, p, j);
-				if (r)
-					return r;
-			} else {
-				ib->ptr = (uint32_t *)kptr;
-				r = amdgpu_ring_patch_cs_in_place(ring, p, j);
-				amdgpu_bo_kunmap(aobj);
-				if (r)
-					return r;
-			}
-
-			j++;
+			amdgpu_vm_bo_invalidate(adev, bo, false);
 		}
 	}
 
-	if (p->job->vm) {
-		p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
-
-		r = amdgpu_bo_vm_update_pte(p);
-		if (r)
-			return r;
-
-		r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
-		if (r)
-			return r;
-	}
-
 	return amdgpu_cs_sync_rings(p);
 }
 
@@ -985,14 +982,15 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 {
 	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;
-	int i, j;
 	int r, ce_preempt = 0, de_preempt = 0;
+	struct amdgpu_ring *ring;
+	int i, j;
 
 	for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
 		struct amdgpu_cs_chunk *chunk;
 		struct amdgpu_ib *ib;
 		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
-		struct amdgpu_ring *ring;
+		struct drm_sched_entity *entity;
 
 		chunk = &parser->chunks[i];
 		ib = &parser->job->ibs[j];
@@ -1014,8 +1012,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 				return -EINVAL;
 		}
 
-		r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type,
-					 chunk_ib->ip_instance, chunk_ib->ring, &ring);
+		r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
+					  chunk_ib->ip_instance, chunk_ib->ring,
+					  &entity);
 		if (r)
 			return r;
 
@@ -1023,14 +1022,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 			parser->job->preamble_status |=
 				AMDGPU_PREAMBLE_IB_PRESENT;
 
-		if (parser->ring && parser->ring != ring)
+		if (parser->entity && parser->entity != entity)
 			return -EINVAL;
 
-		parser->ring = ring;
+		parser->entity = entity;
 
-		r =  amdgpu_ib_get(adev, vm,
-					ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
-					ib);
+		ring = to_amdgpu_ring(entity->rq->sched);
+		r =  amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
+				   chunk_ib->ib_bytes : 0, ib);
 		if (r) {
 			DRM_ERROR("Failed to get ib !\n");
 			return r;
@@ -1044,12 +1043,13 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 	}
 
 	/* UVD & VCE fw doesn't support user fences */
+	ring = to_amdgpu_ring(parser->entity->rq->sched);
 	if (parser->job->uf_addr && (
-	    parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
-	    parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
+	    ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
+	    ring->funcs->type == AMDGPU_RING_TYPE_VCE))
 		return -EINVAL;
 
-	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx);
+	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
 }
 
 static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
@@ -1065,24 +1065,23 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
 		sizeof(struct drm_amdgpu_cs_chunk_dep);
 
 	for (i = 0; i < num_deps; ++i) {
-		struct amdgpu_ring *ring;
 		struct amdgpu_ctx *ctx;
+		struct drm_sched_entity *entity;
 		struct dma_fence *fence;
 
 		ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
 		if (ctx == NULL)
 			return -EINVAL;
 
-		r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr,
-					 deps[i].ip_type,
-					 deps[i].ip_instance,
-					 deps[i].ring, &ring);
+		r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
+					  deps[i].ip_instance,
+					  deps[i].ring, &entity);
 		if (r) {
 			amdgpu_ctx_put(ctx);
 			return r;
 		}
 
-		fence = amdgpu_ctx_get_fence(ctx, ring,
+		fence = amdgpu_ctx_get_fence(ctx, entity,
 					     deps[i].handle);
 		if (IS_ERR(fence)) {
 			r = PTR_ERR(fence);
@@ -1105,7 +1104,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
 {
 	int r;
 	struct dma_fence *fence;
-	r = drm_syncobj_find_fence(p->filp, handle, &fence);
+	r = drm_syncobj_find_fence(p->filp, handle, 0, &fence);
 	if (r)
 		return r;
 
@@ -1194,16 +1193,16 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
 	int i;
 
 	for (i = 0; i < p->num_post_dep_syncobjs; ++i)
-		drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
+		drm_syncobj_replace_fence(p->post_dep_syncobjs[i], 0, p->fence);
 }
 
 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 			    union drm_amdgpu_cs *cs)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
-	struct amdgpu_ring *ring = p->ring;
-	struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
+	struct drm_sched_entity *entity = p->entity;
 	enum drm_sched_priority priority;
+	struct amdgpu_ring *ring;
 	struct amdgpu_bo_list_entry *e;
 	struct amdgpu_job *job;
 	uint64_t seq;
@@ -1220,7 +1219,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	/* No memory allocation is allowed while holding the mn lock */
 	amdgpu_mn_lock(p->mn);
 	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-		struct amdgpu_bo *bo = e->robj;
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 
 		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
 			r = -ERESTARTSYS;
@@ -1231,15 +1230,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	job->owner = p->filp;
 	p->fence = dma_fence_get(&job->base.s_fence->finished);
 
-	r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq);
-	if (r) {
-		dma_fence_put(p->fence);
-		dma_fence_put(&job->base.s_fence->finished);
-		amdgpu_job_free(job);
-		amdgpu_mn_unlock(p->mn);
-		return r;
-	}
-
+	amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
 	amdgpu_cs_post_dependencies(p);
 
 	if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
@@ -1261,6 +1252,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	ring = to_amdgpu_ring(entity->rq->sched);
 	amdgpu_ring_priority_get(ring, priority);
 
+	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
+
 	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
 	amdgpu_mn_unlock(p->mn);
 
@@ -1300,6 +1293,12 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	if (r)
 		goto out;
 
+	r = amdgpu_cs_dependencies(adev, &parser);
+	if (r) {
+		DRM_ERROR("Failed in the dependencies handling %d!\n", r);
+		goto out;
+	}
+
 	r = amdgpu_cs_parser_bos(&parser, data);
 	if (r) {
 		if (r == -ENOMEM)
@@ -1311,16 +1310,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 
 	reserved_buffers = true;
 
-	r = amdgpu_cs_dependencies(adev, &parser);
-	if (r) {
-		DRM_ERROR("Failed in the dependencies handling %d!\n", r);
-		goto out;
-	}
-
 	for (i = 0; i < parser.job->num_ibs; i++)
 		trace_amdgpu_cs(&parser, i);
 
-	r = amdgpu_cs_ib_vm_chunk(adev, &parser);
+	r = amdgpu_cs_vm_handling(&parser);
 	if (r)
 		goto out;
 
@@ -1344,9 +1337,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *filp)
 {
 	union drm_amdgpu_wait_cs *wait = data;
-	struct amdgpu_device *adev = dev->dev_private;
 	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
-	struct amdgpu_ring *ring = NULL;
+	struct drm_sched_entity *entity;
 	struct amdgpu_ctx *ctx;
 	struct dma_fence *fence;
 	long r;
@@ -1355,15 +1347,14 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
 	if (ctx == NULL)
 		return -EINVAL;
 
-	r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
-				 wait->in.ip_type, wait->in.ip_instance,
-				 wait->in.ring, &ring);
+	r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
+				  wait->in.ring, &entity);
 	if (r) {
 		amdgpu_ctx_put(ctx);
 		return r;
 	}
 
-	fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
+	fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
 	if (IS_ERR(fence))
 		r = PTR_ERR(fence);
 	else if (fence) {
@@ -1395,7 +1386,7 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
 					     struct drm_file *filp,
 					     struct drm_amdgpu_fence *user)
 {
-	struct amdgpu_ring *ring;
+	struct drm_sched_entity *entity;
 	struct amdgpu_ctx *ctx;
 	struct dma_fence *fence;
 	int r;
@@ -1404,14 +1395,14 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
 	if (ctx == NULL)
 		return ERR_PTR(-EINVAL);
 
-	r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type,
-				 user->ip_instance, user->ring, &ring);
+	r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
+				  user->ring, &entity);
 	if (r) {
 		amdgpu_ctx_put(ctx);
 		return ERR_PTR(r);
 	}
 
-	fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no);
+	fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
 	amdgpu_ctx_put(ctx);
 
 	return fence;

+ 176 - 86
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

@@ -27,6 +27,30 @@
 #include "amdgpu.h"
 #include "amdgpu_sched.h"
 
+#define to_amdgpu_ctx_entity(e)	\
+	container_of((e), struct amdgpu_ctx_entity, entity)
+
+const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
+	[AMDGPU_HW_IP_GFX]	=	1,
+	[AMDGPU_HW_IP_COMPUTE]	=	4,
+	[AMDGPU_HW_IP_DMA]	=	2,
+	[AMDGPU_HW_IP_UVD]	=	1,
+	[AMDGPU_HW_IP_VCE]	=	1,
+	[AMDGPU_HW_IP_UVD_ENC]	=	1,
+	[AMDGPU_HW_IP_VCN_DEC]	=	1,
+	[AMDGPU_HW_IP_VCN_ENC]	=	1,
+};
+
+static int amdgput_ctx_total_num_entities(void)
+{
+	unsigned i, num_entities = 0;
+
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+		num_entities += amdgpu_ctx_num_entities[i];
+
+	return num_entities;
+}
+
 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
 				      enum drm_sched_priority priority)
 {
@@ -48,6 +72,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			   struct drm_file *filp,
 			   struct amdgpu_ctx *ctx)
 {
+	unsigned num_entities = amdgput_ctx_total_num_entities();
 	unsigned i, j;
 	int r;
 
@@ -60,19 +85,33 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 
 	memset(ctx, 0, sizeof(*ctx));
 	ctx->adev = adev;
-	kref_init(&ctx->refcount);
-	spin_lock_init(&ctx->ring_lock);
-	ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
+
+	ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities,
 			      sizeof(struct dma_fence*), GFP_KERNEL);
 	if (!ctx->fences)
 		return -ENOMEM;
 
-	mutex_init(&ctx->lock);
+	ctx->entities[0] = kcalloc(num_entities,
+				   sizeof(struct amdgpu_ctx_entity),
+				   GFP_KERNEL);
+	if (!ctx->entities[0]) {
+		r = -ENOMEM;
+		goto error_free_fences;
+	}
 
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		ctx->rings[i].sequence = 1;
-		ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
+	for (i = 0; i < num_entities; ++i) {
+		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+
+		entity->sequence = 1;
+		entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
 	}
+	for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
+		ctx->entities[i] = ctx->entities[i - 1] +
+			amdgpu_ctx_num_entities[i - 1];
+
+	kref_init(&ctx->refcount);
+	spin_lock_init(&ctx->ring_lock);
+	mutex_init(&ctx->lock);
 
 	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
 	ctx->reset_counter_query = ctx->reset_counter;
@@ -80,31 +119,70 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	ctx->init_priority = priority;
 	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
 
-	/* create context entity for each ring */
-	for (i = 0; i < adev->num_rings; i++) {
-		struct amdgpu_ring *ring = adev->rings[i];
-		struct drm_sched_rq *rq;
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
+		struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
+		unsigned num_rings;
+
+		switch (i) {
+		case AMDGPU_HW_IP_GFX:
+			rings[0] = &adev->gfx.gfx_ring[0];
+			num_rings = 1;
+			break;
+		case AMDGPU_HW_IP_COMPUTE:
+			for (j = 0; j < adev->gfx.num_compute_rings; ++j)
+				rings[j] = &adev->gfx.compute_ring[j];
+			num_rings = adev->gfx.num_compute_rings;
+			break;
+		case AMDGPU_HW_IP_DMA:
+			for (j = 0; j < adev->sdma.num_instances; ++j)
+				rings[j] = &adev->sdma.instance[j].ring;
+			num_rings = adev->sdma.num_instances;
+			break;
+		case AMDGPU_HW_IP_UVD:
+			rings[0] = &adev->uvd.inst[0].ring;
+			num_rings = 1;
+			break;
+		case AMDGPU_HW_IP_VCE:
+			rings[0] = &adev->vce.ring[0];
+			num_rings = 1;
+			break;
+		case AMDGPU_HW_IP_UVD_ENC:
+			rings[0] = &adev->uvd.inst[0].ring_enc[0];
+			num_rings = 1;
+			break;
+		case AMDGPU_HW_IP_VCN_DEC:
+			rings[0] = &adev->vcn.ring_dec;
+			num_rings = 1;
+			break;
+		case AMDGPU_HW_IP_VCN_ENC:
+			rings[0] = &adev->vcn.ring_enc[0];
+			num_rings = 1;
+			break;
+		case AMDGPU_HW_IP_VCN_JPEG:
+			rings[0] = &adev->vcn.ring_jpeg;
+			num_rings = 1;
+			break;
+		}
 
-		rq = &ring->sched.sched_rq[priority];
+		for (j = 0; j < num_rings; ++j)
+			rqs[j] = &rings[j]->sched.sched_rq[priority];
 
-		if (ring == &adev->gfx.kiq.ring)
-			continue;
-
-		r = drm_sched_entity_init(&ctx->rings[i].entity,
-					  &rq, 1, &ctx->guilty);
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
+			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
+						  rqs, num_rings, &ctx->guilty);
 		if (r)
-			goto failed;
+			goto error_cleanup_entities;
 	}
 
-	r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
-	if (r)
-		goto failed;
-
 	return 0;
 
-failed:
-	for (j = 0; j < i; j++)
-		drm_sched_entity_destroy(&ctx->rings[j].entity);
+error_cleanup_entities:
+	for (i = 0; i < num_entities; ++i)
+		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+	kfree(ctx->entities[0]);
+
+error_free_fences:
 	kfree(ctx->fences);
 	ctx->fences = NULL;
 	return r;
@@ -113,25 +191,47 @@ failed:
 static void amdgpu_ctx_fini(struct kref *ref)
 {
 	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
+	unsigned num_entities = amdgput_ctx_total_num_entities();
 	struct amdgpu_device *adev = ctx->adev;
 	unsigned i, j;
 
 	if (!adev)
 		return;
 
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
+	for (i = 0; i < num_entities; ++i)
 		for (j = 0; j < amdgpu_sched_jobs; ++j)
-			dma_fence_put(ctx->rings[i].fences[j]);
+			dma_fence_put(ctx->entities[0][i].fences[j]);
 	kfree(ctx->fences);
-	ctx->fences = NULL;
-
-	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
+	kfree(ctx->entities[0]);
 
 	mutex_destroy(&ctx->lock);
 
 	kfree(ctx);
 }
 
+int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
+			  u32 ring, struct drm_sched_entity **entity)
+{
+	if (hw_ip >= AMDGPU_HW_IP_NUM) {
+		DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
+		return -EINVAL;
+	}
+
+	/* Right now all IPs have only one instance - multiple rings. */
+	if (instance != 0) {
+		DRM_DEBUG("invalid ip instance: %d\n", instance);
+		return -EINVAL;
+	}
+
+	if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
+		DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
+		return -EINVAL;
+	}
+
+	*entity = &ctx->entities[hw_ip][ring].entity;
+	return 0;
+}
+
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 			    struct amdgpu_fpriv *fpriv,
 			    struct drm_file *filp,
@@ -168,17 +268,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 static void amdgpu_ctx_do_release(struct kref *ref)
 {
 	struct amdgpu_ctx *ctx;
+	unsigned num_entities;
 	u32 i;
 
 	ctx = container_of(ref, struct amdgpu_ctx, refcount);
 
-	for (i = 0; i < ctx->adev->num_rings; i++) {
+	num_entities = 0;
+	for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
+		num_entities += amdgpu_ctx_num_entities[i];
 
-		if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
-			continue;
-
-		drm_sched_entity_destroy(&ctx->rings[i].entity);
-	}
+	for (i = 0; i < num_entities; i++)
+		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
 
 	amdgpu_ctx_fini(ref);
 }
@@ -334,56 +434,56 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
 	return 0;
 }
 
-int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
-			      struct dma_fence *fence, uint64_t* handler)
+void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+			  struct drm_sched_entity *entity,
+			  struct dma_fence *fence, uint64_t* handle)
 {
-	struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
-	uint64_t seq = cring->sequence;
-	unsigned idx = 0;
+	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
+	uint64_t seq = centity->sequence;
 	struct dma_fence *other = NULL;
+	unsigned idx = 0;
 
 	idx = seq & (amdgpu_sched_jobs - 1);
-	other = cring->fences[idx];
+	other = centity->fences[idx];
 	if (other)
 		BUG_ON(!dma_fence_is_signaled(other));
 
 	dma_fence_get(fence);
 
 	spin_lock(&ctx->ring_lock);
-	cring->fences[idx] = fence;
-	cring->sequence++;
+	centity->fences[idx] = fence;
+	centity->sequence++;
 	spin_unlock(&ctx->ring_lock);
 
 	dma_fence_put(other);
-	if (handler)
-		*handler = seq;
-
-	return 0;
+	if (handle)
+		*handle = seq;
 }
 
 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
-				       struct amdgpu_ring *ring, uint64_t seq)
+				       struct drm_sched_entity *entity,
+				       uint64_t seq)
 {
-	struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
+	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
 	struct dma_fence *fence;
 
 	spin_lock(&ctx->ring_lock);
 
 	if (seq == ~0ull)
-		seq = ctx->rings[ring->idx].sequence - 1;
+		seq = centity->sequence - 1;
 
-	if (seq >= cring->sequence) {
+	if (seq >= centity->sequence) {
 		spin_unlock(&ctx->ring_lock);
 		return ERR_PTR(-EINVAL);
 	}
 
 
-	if (seq + amdgpu_sched_jobs < cring->sequence) {
+	if (seq + amdgpu_sched_jobs < centity->sequence) {
 		spin_unlock(&ctx->ring_lock);
 		return NULL;
 	}
 
-	fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]);
+	fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
 	spin_unlock(&ctx->ring_lock);
 
 	return fence;
@@ -392,35 +492,28 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 				  enum drm_sched_priority priority)
 {
-	int i;
-	struct amdgpu_device *adev = ctx->adev;
-	struct drm_sched_rq *rq;
-	struct drm_sched_entity *entity;
-	struct amdgpu_ring *ring;
+	unsigned num_entities = amdgput_ctx_total_num_entities();
 	enum drm_sched_priority ctx_prio;
+	unsigned i;
 
 	ctx->override_priority = priority;
 
 	ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
 			ctx->init_priority : ctx->override_priority;
 
-	for (i = 0; i < adev->num_rings; i++) {
-		ring = adev->rings[i];
-		entity = &ctx->rings[i].entity;
-		rq = &ring->sched.sched_rq[ctx_prio];
-
-		if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
-			continue;
+	for (i = 0; i < num_entities; i++) {
+		struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
 
-		drm_sched_entity_set_rq(entity, rq);
+		drm_sched_entity_set_priority(entity, ctx_prio);
 	}
 }
 
-int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
+			       struct drm_sched_entity *entity)
 {
-	struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
-	unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
-	struct dma_fence *other = cring->fences[idx];
+	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
+	unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1);
+	struct dma_fence *other = centity->fences[idx];
 
 	if (other) {
 		signed long r;
@@ -444,6 +537,7 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
 
 void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
 {
+	unsigned num_entities = amdgput_ctx_total_num_entities();
 	struct amdgpu_ctx *ctx;
 	struct idr *idp;
 	uint32_t id, i;
@@ -459,13 +553,11 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
 			return;
 		}
 
-		for (i = 0; i < ctx->adev->num_rings; i++) {
+		for (i = 0; i < num_entities; i++) {
+			struct drm_sched_entity *entity;
 
-			if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
-				continue;
-
-			max_wait = drm_sched_entity_flush(&ctx->rings[i].entity,
-							  max_wait);
+			entity = &ctx->entities[0][i].entity;
+			max_wait = drm_sched_entity_flush(entity, max_wait);
 		}
 	}
 	mutex_unlock(&mgr->lock);
@@ -473,6 +565,7 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
 
 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 {
+	unsigned num_entities = amdgput_ctx_total_num_entities();
 	struct amdgpu_ctx *ctx;
 	struct idr *idp;
 	uint32_t id, i;
@@ -484,16 +577,13 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 		if (!ctx->adev)
 			return;
 
-		for (i = 0; i < ctx->adev->num_rings; i++) {
-
-			if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
-				continue;
-
-			if (kref_read(&ctx->refcount) == 1)
-				drm_sched_entity_fini(&ctx->rings[i].entity);
-			else
-				DRM_ERROR("ctx %p is still alive\n", ctx);
+		if (kref_read(&ctx->refcount) != 1) {
+			DRM_ERROR("ctx %p is still alive\n", ctx);
+			continue;
 		}
+
+		for (i = 0; i < num_entities; i++)
+			drm_sched_entity_fini(&ctx->entities[0][i].entity);
 	}
 }
 

+ 88 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

@@ -0,0 +1,88 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_CTX_H__
+#define __AMDGPU_CTX_H__
+
+#include "amdgpu_ring.h"
+
+struct drm_device;
+struct drm_file;
+struct amdgpu_fpriv;
+
+struct amdgpu_ctx_entity {
+	uint64_t		sequence;
+	struct dma_fence	**fences;
+	struct drm_sched_entity	entity;
+};
+
+struct amdgpu_ctx {
+	struct kref			refcount;
+	struct amdgpu_device		*adev;
+	unsigned			reset_counter;
+	unsigned			reset_counter_query;
+	uint32_t			vram_lost_counter;
+	spinlock_t			ring_lock;
+	struct dma_fence		**fences;
+	struct amdgpu_ctx_entity	*entities[AMDGPU_HW_IP_NUM];
+	bool				preamble_presented;
+	enum drm_sched_priority		init_priority;
+	enum drm_sched_priority		override_priority;
+	struct mutex			lock;
+	atomic_t			guilty;
+};
+
+struct amdgpu_ctx_mgr {
+	struct amdgpu_device	*adev;
+	struct mutex		lock;
+	/* protected by lock */
+	struct idr		ctx_handles;
+};
+
+extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM];
+
+struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
+int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
+
+int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
+			  u32 ring, struct drm_sched_entity **entity);
+void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+			  struct drm_sched_entity *entity,
+			  struct dma_fence *fence, uint64_t *seq);
+struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
+				       struct drm_sched_entity *entity,
+				       uint64_t seq);
+void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
+				  enum drm_sched_priority priority);
+
+int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *filp);
+
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
+			       struct drm_sched_entity *entity);
+
+void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
+
+#endif

+ 2 - 10
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c

@@ -826,21 +826,13 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
 {
 	struct drm_minor *minor = adev->ddev->primary;
 	struct dentry *ent, *root = minor->debugfs_root;
-	unsigned i, j;
+	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
 		ent = debugfs_create_file(debugfs_regs_names[i],
 					  S_IFREG | S_IRUGO, root,
 					  adev, debugfs_regs[i]);
-		if (IS_ERR(ent)) {
-			for (j = 0; j < i; j++) {
-				debugfs_remove(adev->debugfs_regs[i]);
-				adev->debugfs_regs[i] = NULL;
-			}
-			return PTR_ERR(ent);
-		}
-
-		if (!i)
+		if (!i && !IS_ERR_OR_NULL(ent))
 			i_size_write(ent->d_inode, adev->rmmio_size);
 		adev->debugfs_regs[i] = ent;
 	}

+ 287 - 271
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

@@ -62,6 +62,8 @@
 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
 
 #define AMDGPU_RESUME_MS		2000
 
@@ -651,71 +653,6 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
 		__clear_bit(wb, adev->wb.used);
 }
 
-/**
- * amdgpu_device_vram_location - try to find VRAM location
- *
- * @adev: amdgpu device structure holding all necessary informations
- * @mc: memory controller structure holding memory informations
- * @base: base address at which to put VRAM
- *
- * Function will try to place VRAM at base address provided
- * as parameter.
- */
-void amdgpu_device_vram_location(struct amdgpu_device *adev,
-				 struct amdgpu_gmc *mc, u64 base)
-{
-	uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
-
-	mc->vram_start = base;
-	mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
-	if (limit && limit < mc->real_vram_size)
-		mc->real_vram_size = limit;
-	dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
-			mc->mc_vram_size >> 20, mc->vram_start,
-			mc->vram_end, mc->real_vram_size >> 20);
-}
-
-/**
- * amdgpu_device_gart_location - try to find GART location
- *
- * @adev: amdgpu device structure holding all necessary informations
- * @mc: memory controller structure holding memory informations
- *
- * Function will place try to place GART before or after VRAM.
- *
- * If GART size is bigger than space left then we ajust GART size.
- * Thus function will never fails.
- */
-void amdgpu_device_gart_location(struct amdgpu_device *adev,
-				 struct amdgpu_gmc *mc)
-{
-	u64 size_af, size_bf;
-
-	mc->gart_size += adev->pm.smu_prv_buffer_size;
-
-	size_af = adev->gmc.mc_mask - mc->vram_end;
-	size_bf = mc->vram_start;
-	if (size_bf > size_af) {
-		if (mc->gart_size > size_bf) {
-			dev_warn(adev->dev, "limiting GART\n");
-			mc->gart_size = size_bf;
-		}
-		mc->gart_start = 0;
-	} else {
-		if (mc->gart_size > size_af) {
-			dev_warn(adev->dev, "limiting GART\n");
-			mc->gart_size = size_af;
-		}
-		/* VCE doesn't like it when BOs cross a 4GB segment, so align
-		 * the GART base on a 4GB boundary as well.
-		 */
-		mc->gart_start = ALIGN(mc->vram_end + 1, 0x100000000ULL);
-	}
-	mc->gart_end = mc->gart_start + mc->gart_size - 1;
-	dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
-			mc->gart_size >> 20, mc->gart_start, mc->gart_end);
-}
-
 /**
  * amdgpu_device_resize_fb_bar - try to resize FB BAR
  *
@@ -1397,7 +1334,12 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 		chip_name = "vega12";
 		break;
 	case CHIP_RAVEN:
-		chip_name = "raven";
+		if (adev->rev_id >= 8)
+			chip_name = "raven2";
+		else if (adev->pdev->device == 0x15d8)
+			chip_name = "picasso";
+		else
+			chip_name = "raven";
 		break;
 	}
 
@@ -1551,6 +1493,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 	}
 
 	adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
+	if (amdgpu_sriov_vf(adev))
+		adev->powerplay.pp_feature &= ~PP_GFXOFF_MASK;
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
@@ -1581,6 +1525,92 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
+{
+	int i, r;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.sw)
+			continue;
+		if (adev->ip_blocks[i].status.hw)
+			continue;
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
+			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+			if (r) {
+				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
+					  adev->ip_blocks[i].version->funcs->name, r);
+				return r;
+			}
+			adev->ip_blocks[i].status.hw = true;
+		}
+	}
+
+	return 0;
+}
+
+static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
+{
+	int i, r;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.sw)
+			continue;
+		if (adev->ip_blocks[i].status.hw)
+			continue;
+		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+		if (r) {
+			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
+				  adev->ip_blocks[i].version->funcs->name, r);
+			return r;
+		}
+		adev->ip_blocks[i].status.hw = true;
+	}
+
+	return 0;
+}
+
+static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
+{
+	int r = 0;
+	int i;
+
+	if (adev->asic_type >= CHIP_VEGA10) {
+		for (i = 0; i < adev->num_ip_blocks; i++) {
+			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
+				if (adev->in_gpu_reset || adev->in_suspend) {
+					if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset)
+						break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */
+					r = adev->ip_blocks[i].version->funcs->resume(adev);
+					if (r) {
+						DRM_ERROR("resume of IP block <%s> failed %d\n",
+							  adev->ip_blocks[i].version->funcs->name, r);
+						return r;
+					}
+				} else {
+					r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+					if (r) {
+						DRM_ERROR("hw_init of IP block <%s> failed %d\n",
+						  adev->ip_blocks[i].version->funcs->name, r);
+						return r;
+					}
+				}
+				adev->ip_blocks[i].status.hw = true;
+			}
+		}
+	}
+
+	if (adev->powerplay.pp_funcs->load_firmware) {
+		r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle);
+		if (r) {
+			pr_err("firmware loading failed\n");
+			return r;
+		}
+	}
+
+	return 0;
+}
+
 /**
  * amdgpu_device_ip_init - run init for hardware IPs
  *
@@ -1637,20 +1667,23 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 		}
 	}
 
-	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_blocks[i].status.sw)
-			continue;
-		if (adev->ip_blocks[i].status.hw)
-			continue;
-		r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
-		if (r) {
-			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
-				  adev->ip_blocks[i].version->funcs->name, r);
-			return r;
-		}
-		adev->ip_blocks[i].status.hw = true;
-	}
+	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
+	if (r)
+		return r;
+
+	r = amdgpu_device_ip_hw_init_phase1(adev);
+	if (r)
+		return r;
 
+	r = amdgpu_device_fw_loading(adev);
+	if (r)
+		return r;
+
+	r = amdgpu_device_ip_hw_init_phase2(adev);
+	if (r)
+		return r;
+
+	amdgpu_xgmi_add_device(adev);
 	amdgpu_amdkfd_device_init(adev);
 
 	if (amdgpu_sriov_vf(adev))
@@ -1690,25 +1723,28 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
 }
 
 /**
- * amdgpu_device_ip_late_set_cg_state - late init for clockgating
+ * amdgpu_device_set_cg_state - set clockgating for amdgpu device
  *
  * @adev: amdgpu_device pointer
  *
- * Late initialization pass enabling clockgating for hardware IPs.
  * The list of all the hardware IPs that make up the asic is walked and the
- * set_clockgating_state callbacks are run.  This stage is run late
- * in the init process.
+ * set_clockgating_state callbacks are run.
+ * Late initialization pass enabling clockgating for hardware IPs.
+ * Fini or suspend, pass disabling clockgating for hardware IPs.
  * Returns 0 on success, negative error code on failure.
  */
-static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
+
+static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
+						enum amd_clockgating_state state)
 {
-	int i = 0, r;
+	int i, j, r;
 
 	if (amdgpu_emu_mode == 1)
 		return 0;
 
-	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_blocks[i].status.valid)
+	for (j = 0; j < adev->num_ip_blocks; j++) {
+		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
+		if (!adev->ip_blocks[i].status.late_initialized)
 			continue;
 		/* skip CG for VCE/UVD, it's handled specially */
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
@@ -1717,7 +1753,7 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
 			/* enable clockgating to save power */
 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
-										     AMD_CG_STATE_GATE);
+										     state);
 			if (r) {
 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
 					  adev->ip_blocks[i].version->funcs->name, r);
@@ -1729,15 +1765,16 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
 	return 0;
 }
 
-static int amdgpu_device_ip_late_set_pg_state(struct amdgpu_device *adev)
+static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
 {
-	int i = 0, r;
+	int i, j, r;
 
 	if (amdgpu_emu_mode == 1)
 		return 0;
 
-	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_blocks[i].status.valid)
+	for (j = 0; j < adev->num_ip_blocks; j++) {
+		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
+		if (!adev->ip_blocks[i].status.late_initialized)
 			continue;
 		/* skip CG for VCE/UVD, it's handled specially */
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
@@ -1746,7 +1783,7 @@ static int amdgpu_device_ip_late_set_pg_state(struct amdgpu_device *adev)
 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
 			/* enable powergating to save power */
 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
-										     AMD_PG_STATE_GATE);
+											state);
 			if (r) {
 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
 					  adev->ip_blocks[i].version->funcs->name, r);
@@ -1774,7 +1811,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
 	int i = 0, r;
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_blocks[i].status.valid)
+		if (!adev->ip_blocks[i].status.hw)
 			continue;
 		if (adev->ip_blocks[i].version->funcs->late_init) {
 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
@@ -1783,12 +1820,12 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
 					  adev->ip_blocks[i].version->funcs->name, r);
 				return r;
 			}
-			adev->ip_blocks[i].status.late_initialized = true;
 		}
+		adev->ip_blocks[i].status.late_initialized = true;
 	}
 
-	amdgpu_device_ip_late_set_cg_state(adev);
-	amdgpu_device_ip_late_set_pg_state(adev);
+	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
+	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
 
 	queue_delayed_work(system_wq, &adev->late_init_work,
 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
@@ -1814,22 +1851,15 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 	int i, r;
 
 	amdgpu_amdkfd_device_fini(adev);
+
+	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+
 	/* need to disable SMC first */
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.hw)
 			continue;
-		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC &&
-			adev->ip_blocks[i].version->funcs->set_clockgating_state) {
-			/* ungate blocks before hw fini so that we can shutdown the blocks safely */
-			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
-										     AMD_CG_STATE_UNGATE);
-			if (r) {
-				DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
-					  adev->ip_blocks[i].version->funcs->name, r);
-				return r;
-			}
-			if (adev->powerplay.pp_funcs->set_powergating_by_smu)
-				amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false);
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
 			/* XXX handle errors */
 			if (r) {
@@ -1845,20 +1875,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 		if (!adev->ip_blocks[i].status.hw)
 			continue;
 
-		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
-			adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
-			adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
-			adev->ip_blocks[i].version->funcs->set_clockgating_state) {
-			/* ungate blocks before hw fini so that we can shutdown the blocks safely */
-			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
-										     AMD_CG_STATE_UNGATE);
-			if (r) {
-				DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
-					  adev->ip_blocks[i].version->funcs->name, r);
-				return r;
-			}
-		}
-
 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
 		/* XXX handle errors */
 		if (r) {
@@ -1875,6 +1891,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 			continue;
 
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+			amdgpu_ucode_free_bo(adev);
 			amdgpu_free_static_csa(adev);
 			amdgpu_device_wb_fini(adev);
 			amdgpu_device_vram_scratch_fini(adev);
@@ -1905,14 +1922,47 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 	return 0;
 }
 
+static int amdgpu_device_enable_mgpu_fan_boost(void)
+{
+	struct amdgpu_gpu_instance *gpu_ins;
+	struct amdgpu_device *adev;
+	int i, ret = 0;
+
+	mutex_lock(&mgpu_info.mutex);
+
+	/*
+	 * MGPU fan boost feature should be enabled
+	 * only when there are two or more dGPUs in
+	 * the system
+	 */
+	if (mgpu_info.num_dgpu < 2)
+		goto out;
+
+	for (i = 0; i < mgpu_info.num_dgpu; i++) {
+		gpu_ins = &(mgpu_info.gpu_ins[i]);
+		adev = gpu_ins->adev;
+		if (!(adev->flags & AMD_IS_APU) &&
+		    !gpu_ins->mgpu_fan_enabled &&
+		    adev->powerplay.pp_funcs &&
+		    adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
+			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
+			if (ret)
+				break;
+
+			gpu_ins->mgpu_fan_enabled = 1;
+		}
+	}
+
+out:
+	mutex_unlock(&mgpu_info.mutex);
+
+	return ret;
+}
+
 /**
- * amdgpu_device_ip_late_init_func_handler - work handler for clockgating
- *
- * @work: work_struct
+ * amdgpu_device_ip_late_init_func_handler - work handler for ib test
  *
- * Work handler for amdgpu_device_ip_late_set_cg_state.  We put the
- * clockgating setup into a worker thread to speed up driver init and
- * resume from suspend.
+ * @work: work_struct.
  */
 static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
 {
@@ -1923,6 +1973,23 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
 	r = amdgpu_ib_ring_tests(adev);
 	if (r)
 		DRM_ERROR("ib ring test failed (%d).\n", r);
+
+	r = amdgpu_device_enable_mgpu_fan_boost();
+	if (r)
+		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+}
+
+static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
+{
+	struct amdgpu_device *adev =
+		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
+
+	mutex_lock(&adev->gfx.gfx_off_mutex);
+	if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
+		if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
+			adev->gfx.gfx_off_state = true;
+	}
+	mutex_unlock(&adev->gfx.gfx_off_mutex);
 }
 
 /**
@@ -1940,23 +2007,14 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
 {
 	int i, r;
 
-	if (amdgpu_sriov_vf(adev))
-		amdgpu_virt_request_full_gpu(adev, false);
+	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
 
 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
 		/* displays are handled separately */
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
-			/* ungate blocks so that suspend can properly shut them down */
-			if (adev->ip_blocks[i].version->funcs->set_clockgating_state) {
-				r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
-											     AMD_CG_STATE_UNGATE);
-				if (r) {
-					DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
-						  adev->ip_blocks[i].version->funcs->name, r);
-				}
-			}
 			/* XXX handle errors */
 			r = adev->ip_blocks[i].version->funcs->suspend(adev);
 			/* XXX handle errors */
@@ -1967,9 +2025,6 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
 		}
 	}
 
-	if (amdgpu_sriov_vf(adev))
-		amdgpu_virt_release_full_gpu(adev, false);
-
 	return 0;
 }
 
@@ -1988,36 +2043,12 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
 {
 	int i, r;
 
-	if (amdgpu_sriov_vf(adev))
-		amdgpu_virt_request_full_gpu(adev, false);
-
-	/* ungate SMC block first */
-	r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC,
-						   AMD_CG_STATE_UNGATE);
-	if (r) {
-		DRM_ERROR("set_clockgating_state(ungate) SMC failed %d\n", r);
-	}
-
-	/* call smu to disable gfx off feature first when suspend */
-	if (adev->powerplay.pp_funcs->set_powergating_by_smu)
-		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false);
-
 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
 		/* displays are handled in phase1 */
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
 			continue;
-		/* ungate blocks so that suspend can properly shut them down */
-		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_SMC &&
-			adev->ip_blocks[i].version->funcs->set_clockgating_state) {
-			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
-										     AMD_CG_STATE_UNGATE);
-			if (r) {
-				DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
-					  adev->ip_blocks[i].version->funcs->name, r);
-			}
-		}
 		/* XXX handle errors */
 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
 		/* XXX handle errors */
@@ -2027,9 +2058,6 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
 		}
 	}
 
-	if (amdgpu_sriov_vf(adev))
-		amdgpu_virt_release_full_gpu(adev, false);
-
 	return 0;
 }
 
@@ -2048,11 +2076,17 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
 {
 	int r;
 
+	if (amdgpu_sriov_vf(adev))
+		amdgpu_virt_request_full_gpu(adev, false);
+
 	r = amdgpu_device_ip_suspend_phase1(adev);
 	if (r)
 		return r;
 	r = amdgpu_device_ip_suspend_phase2(adev);
 
+	if (amdgpu_sriov_vf(adev))
+		amdgpu_virt_release_full_gpu(adev, false);
+
 	return r;
 }
 
@@ -2178,7 +2212,8 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
 			continue;
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
-		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)
+		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
+		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
 			continue;
 		r = adev->ip_blocks[i].version->funcs->resume(adev);
 		if (r) {
@@ -2210,6 +2245,11 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
 	r = amdgpu_device_ip_resume_phase1(adev);
 	if (r)
 		return r;
+
+	r = amdgpu_device_fw_loading(adev);
+	if (r)
+		return r;
+
 	r = amdgpu_device_ip_resume_phase2(adev);
 
 	return r;
@@ -2335,7 +2375,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	adev->mman.buffer_funcs = NULL;
 	adev->mman.buffer_funcs_ring = NULL;
 	adev->vm_manager.vm_pte_funcs = NULL;
-	adev->vm_manager.vm_pte_num_rings = 0;
+	adev->vm_manager.vm_pte_num_rqs = 0;
 	adev->gmc.gmc_funcs = NULL;
 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
@@ -2367,6 +2407,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	mutex_init(&adev->gfx.gpu_clock_mutex);
 	mutex_init(&adev->srbm_mutex);
 	mutex_init(&adev->gfx.pipe_reserve_mutex);
+	mutex_init(&adev->gfx.gfx_off_mutex);
 	mutex_init(&adev->grbm_idx_mutex);
 	mutex_init(&adev->mn_lock);
 	mutex_init(&adev->virt.vf_errors.lock);
@@ -2393,7 +2434,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
 	INIT_DELAYED_WORK(&adev->late_init_work,
 			  amdgpu_device_ip_late_init_func_handler);
+	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
+			  amdgpu_device_delay_enable_gfx_off);
 
+	adev->gfx.gfx_off_req_count = 1;
 	adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
 
 	/* Registers mapping */
@@ -2700,11 +2744,14 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
 		return 0;
 
+	adev->in_suspend = true;
 	drm_kms_helper_poll_disable(dev);
 
 	if (fbcon)
 		amdgpu_fbdev_set_suspend(adev, 1);
 
+	cancel_delayed_work_sync(&adev->late_init_work);
+
 	if (!amdgpu_device_has_dc_support(adev)) {
 		/* turn off display hw */
 		drm_modeset_lock_all(dev);
@@ -2883,6 +2930,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 #ifdef CONFIG_PM
 	dev->dev->power.disable_depth--;
 #endif
+	adev->in_suspend = false;
+
 	return 0;
 }
 
@@ -3041,71 +3090,22 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
 }
 
 /**
- * amdgpu_device_recover_vram_from_shadow - restore shadowed VRAM buffers
- *
- * @adev: amdgpu_device pointer
- * @ring: amdgpu_ring for the engine handling the buffer operations
- * @bo: amdgpu_bo buffer whose shadow is being restored
- * @fence: dma_fence associated with the operation
- *
- * Restores the VRAM buffer contents from the shadow in GTT.  Used to
- * restore things like GPUVM page tables after a GPU reset where
- * the contents of VRAM might be lost.
- * Returns 0 on success, negative error code on failure.
- */
-static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
-						  struct amdgpu_ring *ring,
-						  struct amdgpu_bo *bo,
-						  struct dma_fence **fence)
-{
-	uint32_t domain;
-	int r;
-
-	if (!bo->shadow)
-		return 0;
-
-	r = amdgpu_bo_reserve(bo, true);
-	if (r)
-		return r;
-	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
-	/* if bo has been evicted, then no need to recover */
-	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
-		r = amdgpu_bo_validate(bo->shadow);
-		if (r) {
-			DRM_ERROR("bo validate failed!\n");
-			goto err;
-		}
-
-		r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
-						 NULL, fence, true);
-		if (r) {
-			DRM_ERROR("recover page table failed!\n");
-			goto err;
-		}
-	}
-err:
-	amdgpu_bo_unreserve(bo);
-	return r;
-}
-
-/**
- * amdgpu_device_handle_vram_lost - Handle the loss of VRAM contents
+ * amdgpu_device_recover_vram - Recover some VRAM contents
  *
  * @adev: amdgpu_device pointer
  *
  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
  * restore things like GPUVM page tables after a GPU reset where
  * the contents of VRAM might be lost.
- * Returns 0 on success, 1 on failure.
+ *
+ * Returns:
+ * 0 on success, negative error code on failure.
  */
-static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)
+static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
 {
-	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
-	struct amdgpu_bo *bo, *tmp;
 	struct dma_fence *fence = NULL, *next = NULL;
-	long r = 1;
-	int i = 0;
-	long tmo;
+	struct amdgpu_bo *shadow;
+	long r = 1, tmo;
 
 	if (amdgpu_sriov_runtime(adev))
 		tmo = msecs_to_jiffies(8000);
@@ -3114,44 +3114,40 @@ static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)
 
 	DRM_INFO("recover vram bo from shadow start\n");
 	mutex_lock(&adev->shadow_list_lock);
-	list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
-		next = NULL;
-		amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
+	list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
+
+		/* No need to recover an evicted BO */
+		if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
+		    shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
+			continue;
+
+		r = amdgpu_bo_restore_shadow(shadow, &next);
+		if (r)
+			break;
+
 		if (fence) {
 			r = dma_fence_wait_timeout(fence, false, tmo);
-			if (r == 0)
-				pr_err("wait fence %p[%d] timeout\n", fence, i);
-			else if (r < 0)
-				pr_err("wait fence %p[%d] interrupted\n", fence, i);
-			if (r < 1) {
-				dma_fence_put(fence);
-				fence = next;
+			dma_fence_put(fence);
+			fence = next;
+			if (r <= 0)
 				break;
-			}
-			i++;
+		} else {
+			fence = next;
 		}
-
-		dma_fence_put(fence);
-		fence = next;
 	}
 	mutex_unlock(&adev->shadow_list_lock);
 
-	if (fence) {
-		r = dma_fence_wait_timeout(fence, false, tmo);
-		if (r == 0)
-			pr_err("wait fence %p[%d] timeout\n", fence, i);
-		else if (r < 0)
-			pr_err("wait fence %p[%d] interrupted\n", fence, i);
-
-	}
+	if (fence)
+		tmo = dma_fence_wait_timeout(fence, false, tmo);
 	dma_fence_put(fence);
 
-	if (r > 0)
-		DRM_INFO("recover vram bo from shadow done\n");
-	else
+	if (r <= 0 || tmo <= 0) {
 		DRM_ERROR("recover vram bo from shadow failed\n");
+		return -EIO;
+	}
 
-	return (r > 0) ? 0 : 1;
+	DRM_INFO("recover vram bo from shadow done\n");
+	return 0;
 }
 
 /**
@@ -3204,6 +3200,10 @@ retry:
 			if (r)
 				goto out;
 
+			r = amdgpu_device_fw_loading(adev);
+			if (r)
+				return r;
+
 			r = amdgpu_device_ip_resume_phase2(adev);
 			if (r)
 				goto out;
@@ -3225,8 +3225,8 @@ out:
 		}
 	}
 
-	if (!r && ((need_full_reset && !(adev->flags & AMD_IS_APU)) || vram_lost))
-		r = amdgpu_device_handle_vram_lost(adev);
+	if (!r)
+		r = amdgpu_device_recover_vram(adev);
 
 	return r;
 }
@@ -3260,6 +3260,10 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
 	/* we need recover gart prior to run SMC/CP/SDMA resume */
 	amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
 
+	r = amdgpu_device_fw_loading(adev);
+	if (r)
+		return r;
+
 	/* now we are okay to resume SMC/CP/SDMA */
 	r = amdgpu_device_ip_reinit_late_sriov(adev);
 	if (r)
@@ -3272,38 +3276,50 @@ error:
 	amdgpu_virt_release_full_gpu(adev, true);
 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
 		atomic_inc(&adev->vram_lost_counter);
-		r = amdgpu_device_handle_vram_lost(adev);
+		r = amdgpu_device_recover_vram(adev);
 	}
 
 	return r;
 }
 
+/**
+ * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
+ * a hung GPU.
+ */
+bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
+{
+	if (!amdgpu_device_ip_check_soft_reset(adev)) {
+		DRM_INFO("Timeout, but no hardware hang detected.\n");
+		return false;
+	}
+
+	if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1  &&
+					 !amdgpu_sriov_vf(adev))) {
+		DRM_INFO("GPU recovery disabled.\n");
+		return false;
+	}
+
+	return true;
+}
+
 /**
  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
  *
  * @adev: amdgpu device pointer
  * @job: which job trigger hang
- * @force: forces reset regardless of amdgpu_gpu_recovery
  *
  * Attempt to reset the GPU if it has hung (all asics).
  * Returns 0 for success or an error on failure.
  */
 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
-			      struct amdgpu_job *job, bool force)
+			      struct amdgpu_job *job)
 {
 	int i, r, resched;
 
-	if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
-		DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
-		return 0;
-	}
-
-	if (!force && (amdgpu_gpu_recovery == 0 ||
-			(amdgpu_gpu_recovery == -1  && !amdgpu_sriov_vf(adev)))) {
-		DRM_INFO("GPU recovery disabled.\n");
-		return 0;
-	}
-
 	dev_info(adev->dev, "GPU reset begin!\n");
 
 	mutex_lock(&adev->lock_reset);

+ 15 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_display.h

@@ -23,6 +23,21 @@
 #ifndef __AMDGPU_DISPLAY_H__
 #define __AMDGPU_DISPLAY_H__
 
+#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
+#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
+#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
+#define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h))
+#define amdgpu_display_hpd_set_polarity(adev, h) (adev)->mode_info.funcs->hpd_set_polarity((adev), (h))
+#define amdgpu_display_hpd_get_gpio_reg(adev) (adev)->mode_info.funcs->hpd_get_gpio_reg((adev))
+#define amdgpu_display_bandwidth_update(adev) (adev)->mode_info.funcs->bandwidth_update((adev))
+#define amdgpu_display_page_flip(adev, crtc, base, async) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base), (async))
+#define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos))
+#define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
+#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
+
+int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *filp);
+void amdgpu_display_update_priority(struct amdgpu_device *adev);
 uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev);
 struct drm_framebuffer *
 amdgpu_display_user_framebuffer_create(struct drm_device *dev,

+ 7 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h

@@ -278,6 +278,9 @@ enum amdgpu_pcie_gen {
 #define amdgpu_dpm_get_fan_speed_rpm(adev, s) \
 		((adev)->powerplay.pp_funcs->get_fan_speed_rpm)((adev)->powerplay.pp_handle, (s))
 
+#define amdgpu_dpm_set_fan_speed_rpm(adev, s) \
+		((adev)->powerplay.pp_funcs->set_fan_speed_rpm)((adev)->powerplay.pp_handle, (s))
+
 #define amdgpu_dpm_get_sclk(adev, l) \
 		((adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l)))
 
@@ -357,6 +360,10 @@ enum amdgpu_pcie_gen {
 		((adev)->powerplay.pp_funcs->odn_edit_dpm_table(\
 			(adev)->powerplay.pp_handle, type, parameter, size))
 
+#define amdgpu_dpm_enable_mgpu_fan_boost(adev) \
+		((adev)->powerplay.pp_funcs->enable_mgpu_fan_boost(\
+			(adev)->powerplay.pp_handle))
+
 struct amdgpu_dpm {
 	struct amdgpu_ps        *ps;
 	/* number of valid power states */

+ 120 - 49
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

@@ -36,6 +36,7 @@
 
 #include "amdgpu.h"
 #include "amdgpu_irq.h"
+#include "amdgpu_gem.h"
 
 #include "amdgpu_amdkfd.h"
 
@@ -113,8 +114,8 @@ uint amdgpu_pg_mask = 0xffffffff;
 uint amdgpu_sdma_phase_quantum = 32;
 char *amdgpu_disable_cu = NULL;
 char *amdgpu_virtual_display = NULL;
-/* OverDrive(bit 14),gfxoff(bit 15),stutter mode(bit 17) disabled by default*/
-uint amdgpu_pp_feature_mask = 0xfffd3fff;
+/* OverDrive(bit 14) disabled by default*/
+uint amdgpu_pp_feature_mask = 0xffffbfff;
 int amdgpu_ngg = 0;
 int amdgpu_prim_buf_per_se = 0;
 int amdgpu_pos_buf_per_se = 0;
@@ -126,6 +127,9 @@ int amdgpu_compute_multipipe = -1;
 int amdgpu_gpu_recovery = -1; /* auto */
 int amdgpu_emu_mode = 0;
 uint amdgpu_smu_memory_pool_size = 0;
+struct amdgpu_mgpu_info mgpu_info = {
+	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
+};
 
 /**
  * DOC: vramlimit (int)
@@ -531,6 +535,102 @@ MODULE_PARM_DESC(smu_memory_pool_size,
 		"0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
 module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444);
 
+#ifdef CONFIG_HSA_AMD
+/**
+ * DOC: sched_policy (int)
+ * Set scheduling policy. Default is HWS(hardware scheduling) with over-subscription.
+ * Setting 1 disables over-subscription. Setting 2 disables HWS and statically
+ * assigns queues to HQDs.
+ */
+int sched_policy = KFD_SCHED_POLICY_HWS;
+module_param(sched_policy, int, 0444);
+MODULE_PARM_DESC(sched_policy,
+	"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
+
+/**
+ * DOC: hws_max_conc_proc (int)
+ * Maximum number of processes that HWS can schedule concurrently. The maximum is the
+ * number of VMIDs assigned to the HWS, which is also the default.
+ */
+int hws_max_conc_proc = 8;
+module_param(hws_max_conc_proc, int, 0444);
+MODULE_PARM_DESC(hws_max_conc_proc,
+	"Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
+
+/**
+ * DOC: cwsr_enable (int)
+ * CWSR(compute wave store and resume) allows the GPU to preempt shader execution in
+ * the middle of a compute wave. Default is 1 to enable this feature. Setting 0
+ * disables it.
+ */
+int cwsr_enable = 1;
+module_param(cwsr_enable, int, 0444);
+MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))");
+
+/**
+ * DOC: max_num_of_queues_per_device (int)
+ * Maximum number of queues per device. Valid setting is between 1 and 4096. Default
+ * is 4096.
+ */
+int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
+module_param(max_num_of_queues_per_device, int, 0444);
+MODULE_PARM_DESC(max_num_of_queues_per_device,
+	"Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
+
+/**
+ * DOC: send_sigterm (int)
+ * Send sigterm to HSA process on unhandled exceptions. Default is not to send sigterm
+ * but just print errors on dmesg. Setting 1 enables sending sigterm.
+ */
+int send_sigterm;
+module_param(send_sigterm, int, 0444);
+MODULE_PARM_DESC(send_sigterm,
+	"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
+
+/**
+ * DOC: debug_largebar (int)
+ * Set debug_largebar as 1 to enable simulating large-bar capability on non-large bar
+ * system. This limits the VRAM size reported to ROCm applications to the visible
+ * size, usually 256MB.
+ * Default value is 0, diabled.
+ */
+int debug_largebar;
+module_param(debug_largebar, int, 0444);
+MODULE_PARM_DESC(debug_largebar,
+	"Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
+
+/**
+ * DOC: ignore_crat (int)
+ * Ignore CRAT table during KFD initialization. By default, KFD uses the ACPI CRAT
+ * table to get information about AMD APUs. This option can serve as a workaround on
+ * systems with a broken CRAT table.
+ */
+int ignore_crat;
+module_param(ignore_crat, int, 0444);
+MODULE_PARM_DESC(ignore_crat,
+	"Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
+
+/**
+ * DOC: noretry (int)
+ * This parameter sets sh_mem_config.retry_disable. Default value, 0, enables retry.
+ * Setting 1 disables retry.
+ * Retry is needed for recoverable page faults.
+ */
+int noretry;
+module_param(noretry, int, 0644);
+MODULE_PARM_DESC(noretry,
+	"Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)");
+
+/**
+ * DOC: halt_if_hws_hang (int)
+ * Halt if HWS hang is detected. Default value, 0, disables the halt on hang.
+ * Setting 1 enables halt on hang.
+ */
+int halt_if_hws_hang;
+module_param(halt_if_hws_hang, int, 0644);
+MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
+#endif
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
 	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@@ -770,14 +870,15 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
 	{0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
 	/* Vega 20 */
-	{0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
-	{0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
-	{0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
-	{0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
-	{0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
-	{0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+	{0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+	{0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+	{0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+	{0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+	{0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+	{0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
 	/* Raven */
 	{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
+	{0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
 
 	{0, 0, 0}
 };
@@ -786,28 +887,6 @@ MODULE_DEVICE_TABLE(pci, pciidlist);
 
 static struct drm_driver kms_driver;
 
-static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev)
-{
-	struct apertures_struct *ap;
-	bool primary = false;
-
-	ap = alloc_apertures(1);
-	if (!ap)
-		return -ENOMEM;
-
-	ap->ranges[0].base = pci_resource_start(pdev, 0);
-	ap->ranges[0].size = pci_resource_len(pdev, 0);
-
-#ifdef CONFIG_X86
-	primary = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW;
-#endif
-	drm_fb_helper_remove_conflicting_framebuffers(ap, "amdgpudrmfb", primary);
-	kfree(ap);
-
-	return 0;
-}
-
-
 static int amdgpu_pci_probe(struct pci_dev *pdev,
 			    const struct pci_device_id *ent)
 {
@@ -826,30 +905,18 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 		return -ENODEV;
 	}
 
-	/*
-	 * Initialize amdkfd before starting radeon. If it was not loaded yet,
-	 * defer radeon probing
-	 */
-	ret = amdgpu_amdkfd_init();
-	if (ret == -EPROBE_DEFER)
-		return ret;
-
 	/* Get rid of things like offb */
-	ret = amdgpu_kick_out_firmware_fb(pdev);
+	ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "amdgpudrmfb");
 	if (ret)
 		return ret;
 
-	/* warn the user if they mix atomic and non-atomic capable GPUs */
-	if ((kms_driver.driver_features & DRIVER_ATOMIC) && !supports_atomic)
-		DRM_ERROR("Mixing atomic and non-atomic capable GPUs!\n");
-	/* support atomic early so the atomic debugfs stuff gets created */
-	if (supports_atomic)
-		kms_driver.driver_features |= DRIVER_ATOMIC;
-
 	dev = drm_dev_alloc(&kms_driver, &pdev->dev);
 	if (IS_ERR(dev))
 		return PTR_ERR(dev);
 
+	if (!supports_atomic)
+		dev->driver_features &= ~DRIVER_ATOMIC;
+
 	ret = pci_enable_device(pdev);
 	if (ret)
 		goto err_free;
@@ -882,8 +949,8 @@ amdgpu_pci_remove(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
 
-	drm_dev_unregister(dev);
-	drm_dev_put(dev);
+	DRM_ERROR("Device removal is currently not supported outside of fbcon\n");
+	drm_dev_unplug(dev);
 	pci_disable_device(pdev);
 	pci_set_drvdata(pdev, NULL);
 }
@@ -1101,7 +1168,7 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
 
 static struct drm_driver kms_driver = {
 	.driver_features =
-	    DRIVER_USE_AGP |
+	    DRIVER_USE_AGP | DRIVER_ATOMIC |
 	    DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
 	    DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
 	.load = amdgpu_driver_load_kms,
@@ -1178,6 +1245,10 @@ static int __init amdgpu_init(void)
 	pdriver = &amdgpu_kms_pci_driver;
 	driver->num_ioctls = amdgpu_max_kms_ioctl;
 	amdgpu_register_atpx_handler();
+
+	/* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
+	amdgpu_amdkfd_init();
+
 	/* let modprobe override vga console setting */
 	return pci_register_driver(pdriver);
 

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c

@@ -28,6 +28,7 @@
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
 #include "atom.h"
 #include "atombios_encoders.h"
 

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c

@@ -33,6 +33,7 @@
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "cikd.h"
+#include "amdgpu_gem.h"
 
 #include <drm/drm_fb_helper.h>
 

+ 11 - 5
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c

@@ -216,8 +216,10 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
  * Checks the current fence value and calculates the last
  * signalled fence value. Wakes the fence queue if the
  * sequence number has increased.
+ *
+ * Returns true if fence was processed
  */
-void amdgpu_fence_process(struct amdgpu_ring *ring)
+bool amdgpu_fence_process(struct amdgpu_ring *ring)
 {
 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
 	uint32_t seq, last_seq;
@@ -229,11 +231,12 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
 
 	} while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
 
-	if (seq != ring->fence_drv.sync_seq)
+	if (del_timer(&ring->fence_drv.fallback_timer) &&
+	    seq != ring->fence_drv.sync_seq)
 		amdgpu_fence_schedule_fallback(ring);
 
 	if (unlikely(seq == last_seq))
-		return;
+		return false;
 
 	last_seq &= drv->num_fences_mask;
 	seq &= drv->num_fences_mask;
@@ -260,6 +263,8 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
 
 		dma_fence_put(fence);
 	} while (last_seq != seq);
+
+	return true;
 }
 
 /**
@@ -274,7 +279,8 @@ static void amdgpu_fence_fallback(struct timer_list *t)
 	struct amdgpu_ring *ring = from_timer(ring, t,
 					      fence_drv.fallback_timer);
 
-	amdgpu_fence_process(ring);
+	if (amdgpu_fence_process(ring))
+		DRM_WARN("Fence fallback timer expired on ring %s\n", ring->name);
 }
 
 /**
@@ -701,7 +707,7 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data)
 	struct amdgpu_device *adev = dev->dev_private;
 
 	seq_printf(m, "gpu recover\n");
-	amdgpu_device_gpu_recover(adev, NULL, true);
+	amdgpu_device_gpu_recover(adev, NULL);
 
 	return 0;
 }

+ 15 - 16
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c

@@ -112,7 +112,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
 {
 	int r;
 
-	if (adev->gart.robj == NULL) {
+	if (adev->gart.bo == NULL) {
 		struct amdgpu_bo_param bp;
 
 		memset(&bp, 0, sizeof(bp));
@@ -123,7 +123,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
 			AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
 		bp.type = ttm_bo_type_kernel;
 		bp.resv = NULL;
-		r = amdgpu_bo_create(adev, &bp, &adev->gart.robj);
+		r = amdgpu_bo_create(adev, &bp, &adev->gart.bo);
 		if (r) {
 			return r;
 		}
@@ -145,19 +145,18 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev)
 {
 	int r;
 
-	r = amdgpu_bo_reserve(adev->gart.robj, false);
+	r = amdgpu_bo_reserve(adev->gart.bo, false);
 	if (unlikely(r != 0))
 		return r;
-	r = amdgpu_bo_pin(adev->gart.robj, AMDGPU_GEM_DOMAIN_VRAM);
+	r = amdgpu_bo_pin(adev->gart.bo, AMDGPU_GEM_DOMAIN_VRAM);
 	if (r) {
-		amdgpu_bo_unreserve(adev->gart.robj);
+		amdgpu_bo_unreserve(adev->gart.bo);
 		return r;
 	}
-	r = amdgpu_bo_kmap(adev->gart.robj, &adev->gart.ptr);
+	r = amdgpu_bo_kmap(adev->gart.bo, &adev->gart.ptr);
 	if (r)
-		amdgpu_bo_unpin(adev->gart.robj);
-	amdgpu_bo_unreserve(adev->gart.robj);
-	adev->gart.table_addr = amdgpu_bo_gpu_offset(adev->gart.robj);
+		amdgpu_bo_unpin(adev->gart.bo);
+	amdgpu_bo_unreserve(adev->gart.bo);
 	return r;
 }
 
@@ -173,14 +172,14 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
 {
 	int r;
 
-	if (adev->gart.robj == NULL) {
+	if (adev->gart.bo == NULL) {
 		return;
 	}
-	r = amdgpu_bo_reserve(adev->gart.robj, true);
+	r = amdgpu_bo_reserve(adev->gart.bo, true);
 	if (likely(r == 0)) {
-		amdgpu_bo_kunmap(adev->gart.robj);
-		amdgpu_bo_unpin(adev->gart.robj);
-		amdgpu_bo_unreserve(adev->gart.robj);
+		amdgpu_bo_kunmap(adev->gart.bo);
+		amdgpu_bo_unpin(adev->gart.bo);
+		amdgpu_bo_unreserve(adev->gart.bo);
 		adev->gart.ptr = NULL;
 	}
 }
@@ -196,10 +195,10 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
  */
 void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
 {
-	if (adev->gart.robj == NULL) {
+	if (adev->gart.bo == NULL) {
 		return;
 	}
-	amdgpu_bo_unref(&adev->gart.robj);
+	amdgpu_bo_unref(&adev->gart.bo);
 }
 
 /*

+ 1 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h

@@ -40,8 +40,7 @@ struct amdgpu_bo;
 #define AMDGPU_GPU_PAGES_IN_CPU_PAGE (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE)
 
 struct amdgpu_gart {
-	u64				table_addr;
-	struct amdgpu_bo		*robj;
+	struct amdgpu_bo		*bo;
 	void				*ptr;
 	unsigned			num_gpu_pages;
 	unsigned			num_cpu_pages;

+ 0 - 7
drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h

@@ -24,13 +24,6 @@
 #ifndef __AMDGPU_GDS_H__
 #define __AMDGPU_GDS_H__
 
-/* Because TTM request that alloacted buffer should be PAGE_SIZE aligned,
- * we should report GDS/GWS/OA size as PAGE_SIZE aligned
- * */
-#define AMDGPU_GDS_SHIFT	2
-#define AMDGPU_GWS_SHIFT	PAGE_SHIFT
-#define AMDGPU_OA_SHIFT		PAGE_SHIFT
-
 struct amdgpu_ring;
 struct amdgpu_bo;
 

+ 8 - 14
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

@@ -244,16 +244,10 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 			return -EINVAL;
 		}
 		flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
-		if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS)
-			size = size << AMDGPU_GDS_SHIFT;
-		else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS)
-			size = size << AMDGPU_GWS_SHIFT;
-		else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA)
-			size = size << AMDGPU_OA_SHIFT;
-		else
-			return -EINVAL;
+		/* GDS allocations must be DW aligned */
+		if (args->in.domains & AMDGPU_GEM_DOMAIN_GDS)
+			size = ALIGN(size, 4);
 	}
-	size = roundup(size, PAGE_SIZE);
 
 	if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
 		r = amdgpu_bo_reserve(vm->root.base.bo, false);
@@ -572,16 +566,16 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	if (args->va_address >= AMDGPU_VA_HOLE_START &&
-	    args->va_address < AMDGPU_VA_HOLE_END) {
+	if (args->va_address >= AMDGPU_GMC_HOLE_START &&
+	    args->va_address < AMDGPU_GMC_HOLE_END) {
 		dev_dbg(&dev->pdev->dev,
 			"va_address 0x%LX is in VA hole 0x%LX-0x%LX\n",
-			args->va_address, AMDGPU_VA_HOLE_START,
-			AMDGPU_VA_HOLE_END);
+			args->va_address, AMDGPU_GMC_HOLE_START,
+			AMDGPU_GMC_HOLE_END);
 		return -EINVAL;
 	}
 
-	args->va_address &= AMDGPU_VA_HOLE_MASK;
+	args->va_address &= AMDGPU_GMC_HOLE_MASK;
 
 	if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) {
 		dev_dbg(&dev->pdev->dev, "invalid flags combination 0x%08X\n",

+ 92 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h

@@ -0,0 +1,92 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_GEM_H__
+#define __AMDGPU_GEM_H__
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_gem.h>
+
+/*
+ * GEM.
+ */
+
+#define AMDGPU_GEM_DOMAIN_MAX		0x3
+#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base)
+
+void amdgpu_gem_object_free(struct drm_gem_object *obj);
+int amdgpu_gem_object_open(struct drm_gem_object *obj,
+				struct drm_file *file_priv);
+void amdgpu_gem_object_close(struct drm_gem_object *obj,
+				struct drm_file *file_priv);
+unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
+struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
+struct drm_gem_object *
+amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
+				 struct dma_buf_attachment *attach,
+				 struct sg_table *sg);
+struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
+					struct drm_gem_object *gobj,
+					int flags);
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+					    struct dma_buf *dma_buf);
+struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
+void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
+void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
+int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+
+/*
+ * GEM objects.
+ */
+void amdgpu_gem_force_release(struct amdgpu_device *adev);
+int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
+			     int alignment, u32 initial_domain,
+			     u64 flags, enum ttm_bo_type type,
+			     struct reservation_object *resv,
+			     struct drm_gem_object **obj);
+
+int amdgpu_mode_dumb_create(struct drm_file *file_priv,
+			    struct drm_device *dev,
+			    struct drm_mode_create_dumb *args);
+int amdgpu_mode_dumb_mmap(struct drm_file *filp,
+			  struct drm_device *dev,
+			  uint32_t handle, uint64_t *offset_p);
+
+int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *filp);
+int amdgpu_gem_info_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp);
+int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *filp);
+int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp);
+int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *filp);
+int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp);
+int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *filp);
+
+int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp);
+
+#endif

+ 73 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

@@ -26,9 +26,44 @@
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
 
+/* delay 0.1 second to enable gfx off feature */
+#define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
+
 /*
- * GPU scratch registers helpers function.
+ * GPU GFX IP block helpers function.
  */
+
+int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec,
+			    int pipe, int queue)
+{
+	int bit = 0;
+
+	bit += mec * adev->gfx.mec.num_pipe_per_mec
+		* adev->gfx.mec.num_queue_per_pipe;
+	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
+	bit += queue;
+
+	return bit;
+}
+
+void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
+			     int *mec, int *pipe, int *queue)
+{
+	*queue = bit % adev->gfx.mec.num_queue_per_pipe;
+	*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
+		% adev->gfx.mec.num_pipe_per_mec;
+	*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
+	       / adev->gfx.mec.num_pipe_per_mec;
+
+}
+
+bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
+				     int mec, int pipe, int queue)
+{
+	return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue),
+			adev->gfx.mec.queue_bitmap);
+}
+
 /**
  * amdgpu_gfx_scratch_get - Allocate a scratch register
  *
@@ -340,3 +375,40 @@ void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev)
 			      &ring->mqd_gpu_addr,
 			      &ring->mqd_ptr);
 }
+
+/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
+ *
+ * @adev: amdgpu_device pointer
+ * @bool enable true: enable gfx off feature, false: disable gfx off feature
+ *
+ * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
+ * 2. other client can send request to disable gfx off feature, the request should be honored.
+ * 3. other client can cancel their request of disable gfx off feature
+ * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
+ */
+
+void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
+{
+	if (!(adev->powerplay.pp_feature & PP_GFXOFF_MASK))
+		return;
+
+	if (!adev->powerplay.pp_funcs->set_powergating_by_smu)
+		return;
+
+
+	mutex_lock(&adev->gfx.gfx_off_mutex);
+
+	if (!enable)
+		adev->gfx.gfx_off_req_count++;
+	else if (adev->gfx.gfx_off_req_count > 0)
+		adev->gfx.gfx_off_req_count--;
+
+	if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
+		schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE);
+	} else if (!enable && adev->gfx.gfx_off_state) {
+		if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false))
+			adev->gfx.gfx_off_state = false;
+	}
+
+	mutex_unlock(&adev->gfx.gfx_off_mutex);
+}

+ 310 - 41
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h

@@ -24,28 +24,297 @@
 #ifndef __AMDGPU_GFX_H__
 #define __AMDGPU_GFX_H__
 
-int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg);
-void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);
+/*
+ * GFX stuff
+ */
+#include "clearstate_defs.h"
+#include "amdgpu_ring.h"
 
-void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
-		unsigned max_sh);
+/* GFX current status */
+#define AMDGPU_GFX_NORMAL_MODE			0x00000000L
+#define AMDGPU_GFX_SAFE_MODE			0x00000001L
+#define AMDGPU_GFX_PG_DISABLED_MODE		0x00000002L
+#define AMDGPU_GFX_CG_DISABLED_MODE		0x00000004L
+#define AMDGPU_GFX_LBPW_DISABLED_MODE		0x00000008L
 
-void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
 
-int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
-			     struct amdgpu_ring *ring,
-			     struct amdgpu_irq_src *irq);
+struct amdgpu_rlc_funcs {
+	void (*enter_safe_mode)(struct amdgpu_device *adev);
+	void (*exit_safe_mode)(struct amdgpu_device *adev);
+};
 
-void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
-			      struct amdgpu_irq_src *irq);
+struct amdgpu_rlc {
+	/* for power gating */
+	struct amdgpu_bo	*save_restore_obj;
+	uint64_t		save_restore_gpu_addr;
+	volatile uint32_t	*sr_ptr;
+	const u32               *reg_list;
+	u32                     reg_list_size;
+	/* for clear state */
+	struct amdgpu_bo	*clear_state_obj;
+	uint64_t		clear_state_gpu_addr;
+	volatile uint32_t	*cs_ptr;
+	const struct cs_section_def   *cs_data;
+	u32                     clear_state_size;
+	/* for cp tables */
+	struct amdgpu_bo	*cp_table_obj;
+	uint64_t		cp_table_gpu_addr;
+	volatile uint32_t	*cp_table_ptr;
+	u32                     cp_table_size;
 
-void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev);
-int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
-			unsigned hpd_size);
+	/* safe mode for updating CG/PG state */
+	bool in_safe_mode;
+	const struct amdgpu_rlc_funcs *funcs;
 
-int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
-				   unsigned mqd_size);
-void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev);
+	/* for firmware data */
+	u32 save_and_restore_offset;
+	u32 clear_state_descriptor_offset;
+	u32 avail_scratch_ram_locations;
+	u32 reg_restore_list_size;
+	u32 reg_list_format_start;
+	u32 reg_list_format_separate_start;
+	u32 starting_offsets_start;
+	u32 reg_list_format_size_bytes;
+	u32 reg_list_size_bytes;
+	u32 reg_list_format_direct_reg_list_length;
+	u32 save_restore_list_cntl_size_bytes;
+	u32 save_restore_list_gpm_size_bytes;
+	u32 save_restore_list_srm_size_bytes;
+
+	u32 *register_list_format;
+	u32 *register_restore;
+	u8 *save_restore_list_cntl;
+	u8 *save_restore_list_gpm;
+	u8 *save_restore_list_srm;
+
+	bool is_rlc_v2_1;
+};
+
+#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
+
+struct amdgpu_mec {
+	struct amdgpu_bo	*hpd_eop_obj;
+	u64			hpd_eop_gpu_addr;
+	struct amdgpu_bo	*mec_fw_obj;
+	u64			mec_fw_gpu_addr;
+	u32 num_mec;
+	u32 num_pipe_per_mec;
+	u32 num_queue_per_pipe;
+	void			*mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
+
+	/* These are the resources for which amdgpu takes ownership */
+	DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+};
+
+struct amdgpu_kiq {
+	u64			eop_gpu_addr;
+	struct amdgpu_bo	*eop_obj;
+	spinlock_t              ring_lock;
+	struct amdgpu_ring	ring;
+	struct amdgpu_irq_src	irq;
+};
+
+/*
+ * GPU scratch registers structures, functions & helpers
+ */
+struct amdgpu_scratch {
+	unsigned		num_reg;
+	uint32_t                reg_base;
+	uint32_t		free_mask;
+};
+
+/*
+ * GFX configurations
+ */
+#define AMDGPU_GFX_MAX_SE 4
+#define AMDGPU_GFX_MAX_SH_PER_SE 2
+
+struct amdgpu_rb_config {
+	uint32_t rb_backend_disable;
+	uint32_t user_rb_backend_disable;
+	uint32_t raster_config;
+	uint32_t raster_config_1;
+};
+
+struct gb_addr_config {
+	uint16_t pipe_interleave_size;
+	uint8_t num_pipes;
+	uint8_t max_compress_frags;
+	uint8_t num_banks;
+	uint8_t num_se;
+	uint8_t num_rb_per_se;
+};
+
+struct amdgpu_gfx_config {
+	unsigned max_shader_engines;
+	unsigned max_tile_pipes;
+	unsigned max_cu_per_sh;
+	unsigned max_sh_per_se;
+	unsigned max_backends_per_se;
+	unsigned max_texture_channel_caches;
+	unsigned max_gprs;
+	unsigned max_gs_threads;
+	unsigned max_hw_contexts;
+	unsigned sc_prim_fifo_size_frontend;
+	unsigned sc_prim_fifo_size_backend;
+	unsigned sc_hiz_tile_fifo_size;
+	unsigned sc_earlyz_tile_fifo_size;
+
+	unsigned num_tile_pipes;
+	unsigned backend_enable_mask;
+	unsigned mem_max_burst_length_bytes;
+	unsigned mem_row_size_in_kb;
+	unsigned shader_engine_tile_size;
+	unsigned num_gpus;
+	unsigned multi_gpu_tile_size;
+	unsigned mc_arb_ramcfg;
+	unsigned gb_addr_config;
+	unsigned num_rbs;
+	unsigned gs_vgt_table_depth;
+	unsigned gs_prim_buffer_depth;
+
+	uint32_t tile_mode_array[32];
+	uint32_t macrotile_mode_array[16];
+
+	struct gb_addr_config gb_addr_config_fields;
+	struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
+
+	/* gfx configure feature */
+	uint32_t double_offchip_lds_buf;
+	/* cached value of DB_DEBUG2 */
+	uint32_t db_debug2;
+};
+
+struct amdgpu_cu_info {
+	uint32_t simd_per_cu;
+	uint32_t max_waves_per_simd;
+	uint32_t wave_front_size;
+	uint32_t max_scratch_slots_per_cu;
+	uint32_t lds_size;
+
+	/* total active CU number */
+	uint32_t number;
+	uint32_t ao_cu_mask;
+	uint32_t ao_cu_bitmap[4][4];
+	uint32_t bitmap[4][4];
+};
+
+struct amdgpu_gfx_funcs {
+	/* get the gpu clock counter */
+	uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
+	void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num,
+			     u32 sh_num, u32 instance);
+	void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd,
+			       uint32_t wave, uint32_t *dst, int *no_fields);
+	void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd,
+				uint32_t wave, uint32_t thread, uint32_t start,
+				uint32_t size, uint32_t *dst);
+	void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd,
+				uint32_t wave, uint32_t start, uint32_t size,
+				uint32_t *dst);
+	void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe,
+				 u32 queue);
+};
+
+struct amdgpu_ngg_buf {
+	struct amdgpu_bo	*bo;
+	uint64_t		gpu_addr;
+	uint32_t		size;
+	uint32_t		bo_size;
+};
+
+enum {
+	NGG_PRIM = 0,
+	NGG_POS,
+	NGG_CNTL,
+	NGG_PARAM,
+	NGG_BUF_MAX
+};
+
+struct amdgpu_ngg {
+	struct amdgpu_ngg_buf	buf[NGG_BUF_MAX];
+	uint32_t		gds_reserve_addr;
+	uint32_t		gds_reserve_size;
+	bool			init;
+};
+
+struct sq_work {
+	struct work_struct	work;
+	unsigned ih_data;
+};
+
+struct amdgpu_gfx {
+	struct mutex			gpu_clock_mutex;
+	struct amdgpu_gfx_config	config;
+	struct amdgpu_rlc		rlc;
+	struct amdgpu_mec		mec;
+	struct amdgpu_kiq		kiq;
+	struct amdgpu_scratch		scratch;
+	const struct firmware		*me_fw;	/* ME firmware */
+	uint32_t			me_fw_version;
+	const struct firmware		*pfp_fw; /* PFP firmware */
+	uint32_t			pfp_fw_version;
+	const struct firmware		*ce_fw;	/* CE firmware */
+	uint32_t			ce_fw_version;
+	const struct firmware		*rlc_fw; /* RLC firmware */
+	uint32_t			rlc_fw_version;
+	const struct firmware		*mec_fw; /* MEC firmware */
+	uint32_t			mec_fw_version;
+	const struct firmware		*mec2_fw; /* MEC2 firmware */
+	uint32_t			mec2_fw_version;
+	uint32_t			me_feature_version;
+	uint32_t			ce_feature_version;
+	uint32_t			pfp_feature_version;
+	uint32_t			rlc_feature_version;
+	uint32_t			rlc_srlc_fw_version;
+	uint32_t			rlc_srlc_feature_version;
+	uint32_t			rlc_srlg_fw_version;
+	uint32_t			rlc_srlg_feature_version;
+	uint32_t			rlc_srls_fw_version;
+	uint32_t			rlc_srls_feature_version;
+	uint32_t			mec_feature_version;
+	uint32_t			mec2_feature_version;
+	bool				mec_fw_write_wait;
+	bool				me_fw_write_wait;
+	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
+	unsigned			num_gfx_rings;
+	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
+	unsigned			num_compute_rings;
+	struct amdgpu_irq_src		eop_irq;
+	struct amdgpu_irq_src		priv_reg_irq;
+	struct amdgpu_irq_src		priv_inst_irq;
+	struct amdgpu_irq_src		cp_ecc_error_irq;
+	struct amdgpu_irq_src		sq_irq;
+	struct sq_work			sq_work;
+
+	/* gfx status */
+	uint32_t			gfx_current_status;
+	/* ce ram size*/
+	unsigned			ce_ram_size;
+	struct amdgpu_cu_info		cu_info;
+	const struct amdgpu_gfx_funcs	*funcs;
+
+	/* reset mask */
+	uint32_t                        grbm_soft_reset;
+	uint32_t                        srbm_soft_reset;
+
+	/* NGG */
+	struct amdgpu_ngg		ngg;
+
+	/* gfx off */
+	bool                            gfx_off_state; /* true: enabled, false: disabled */
+	struct mutex                    gfx_off_mutex;
+	uint32_t                        gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */
+	struct delayed_work             gfx_off_delay_work;
+
+	/* pipe reservation */
+	struct mutex			pipe_reserve_mutex;
+	DECLARE_BITMAP			(pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+};
+
+#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
+#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
+#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q))
 
 /**
  * amdgpu_gfx_create_bitmask - create a bitmask
@@ -60,34 +329,34 @@ static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width)
 	return (u32)((1ULL << bit_width) - 1);
 }
 
-static inline int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev,
-					  int mec, int pipe, int queue)
-{
-	int bit = 0;
+int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg);
+void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);
 
-	bit += mec * adev->gfx.mec.num_pipe_per_mec
-		* adev->gfx.mec.num_queue_per_pipe;
-	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
-	bit += queue;
+void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
+				 unsigned max_sh);
 
-	return bit;
-}
+int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
+			     struct amdgpu_ring *ring,
+			     struct amdgpu_irq_src *irq);
 
-static inline void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
-					   int *mec, int *pipe, int *queue)
-{
-	*queue = bit % adev->gfx.mec.num_queue_per_pipe;
-	*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
-		% adev->gfx.mec.num_pipe_per_mec;
-	*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
-	       / adev->gfx.mec.num_pipe_per_mec;
+void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
+			      struct amdgpu_irq_src *irq);
 
-}
-static inline bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
-						   int mec, int pipe, int queue)
-{
-	return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue),
-			adev->gfx.mec.queue_bitmap);
-}
+void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev);
+int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
+			unsigned hpd_size);
+
+int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
+				   unsigned mqd_size);
+void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev);
+
+void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
+int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec,
+			    int pipe, int queue);
+void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
+			     int *mec, int *pipe, int *queue);
+bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,
+				     int pipe, int queue);
+void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
 
 #endif

+ 215 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c

@@ -0,0 +1,215 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include "amdgpu.h"
+
+/**
+ * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
+ *
+ * @bo: the BO to get the PDE for
+ * @level: the level in the PD hirarchy
+ * @addr: resulting addr
+ * @flags: resulting flags
+ *
+ * Get the address and flags to be used for a PDE (Page Directory Entry).
+ */
+void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
+			       uint64_t *addr, uint64_t *flags)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct ttm_dma_tt *ttm;
+
+	switch (bo->tbo.mem.mem_type) {
+	case TTM_PL_TT:
+		ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);
+		*addr = ttm->dma_address[0];
+		break;
+	case TTM_PL_VRAM:
+		*addr = amdgpu_bo_gpu_offset(bo);
+		break;
+	default:
+		*addr = 0;
+		break;
+	}
+	*flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, &bo->tbo.mem);
+	amdgpu_gmc_get_vm_pde(adev, level, addr, flags);
+}
+
+/**
+ * amdgpu_gmc_pd_addr - return the address of the root directory
+ *
+ */
+uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	uint64_t pd_addr;
+
+	/* TODO: move that into ASIC specific code */
+	if (adev->asic_type >= CHIP_VEGA10) {
+		uint64_t flags = AMDGPU_PTE_VALID;
+
+		amdgpu_gmc_get_pde_for_bo(bo, -1, &pd_addr, &flags);
+		pd_addr |= flags;
+	} else {
+		pd_addr = amdgpu_bo_gpu_offset(bo);
+	}
+	return pd_addr;
+}
+
+/**
+ * amdgpu_gmc_agp_addr - return the address in the AGP address space
+ *
+ * @tbo: TTM BO which needs the address, must be in GTT domain
+ *
+ * Tries to figure out how to access the BO through the AGP aperture. Returns
+ * AMDGPU_BO_INVALID_OFFSET if that is not possible.
+ */
+uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+	struct ttm_dma_tt *ttm;
+
+	if (bo->num_pages != 1 || bo->ttm->caching_state == tt_cached)
+		return AMDGPU_BO_INVALID_OFFSET;
+
+	ttm = container_of(bo->ttm, struct ttm_dma_tt, ttm);
+	if (ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size)
+		return AMDGPU_BO_INVALID_OFFSET;
+
+	return adev->gmc.agp_start + ttm->dma_address[0];
+}
+
+/**
+ * amdgpu_gmc_vram_location - try to find VRAM location
+ *
+ * @adev: amdgpu device structure holding all necessary informations
+ * @mc: memory controller structure holding memory informations
+ * @base: base address at which to put VRAM
+ *
+ * Function will try to place VRAM at base address provided
+ * as parameter.
+ */
+void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
+			      u64 base)
+{
+	uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
+
+	mc->vram_start = base;
+	mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
+	if (limit && limit < mc->real_vram_size)
+		mc->real_vram_size = limit;
+
+	if (mc->xgmi.num_physical_nodes == 0) {
+		mc->fb_start = mc->vram_start;
+		mc->fb_end = mc->vram_end;
+	}
+	dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
+			mc->mc_vram_size >> 20, mc->vram_start,
+			mc->vram_end, mc->real_vram_size >> 20);
+}
+
+/**
+ * amdgpu_gmc_gart_location - try to find GART location
+ *
+ * @adev: amdgpu device structure holding all necessary informations
+ * @mc: memory controller structure holding memory informations
+ *
+ * Function will place try to place GART before or after VRAM.
+ *
+ * If GART size is bigger than space left then we ajust GART size.
+ * Thus function will never fails.
+ */
+void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
+{
+	const uint64_t four_gb = 0x100000000ULL;
+	u64 size_af, size_bf;
+	/*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
+	u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
+
+	mc->gart_size += adev->pm.smu_prv_buffer_size;
+
+	/* VCE doesn't like it when BOs cross a 4GB segment, so align
+	 * the GART base on a 4GB boundary as well.
+	 */
+	size_bf = mc->fb_start;
+	size_af = max_mc_address + 1 - ALIGN(mc->fb_end + 1, four_gb);
+
+	if (mc->gart_size > max(size_bf, size_af)) {
+		dev_warn(adev->dev, "limiting GART\n");
+		mc->gart_size = max(size_bf, size_af);
+	}
+
+	if ((size_bf >= mc->gart_size && size_bf < size_af) ||
+	    (size_af < mc->gart_size))
+		mc->gart_start = 0;
+	else
+		mc->gart_start = max_mc_address - mc->gart_size + 1;
+
+	mc->gart_start &= ~(four_gb - 1);
+	mc->gart_end = mc->gart_start + mc->gart_size - 1;
+	dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
+			mc->gart_size >> 20, mc->gart_start, mc->gart_end);
+}
+
+/**
+ * amdgpu_gmc_agp_location - try to find AGP location
+ * @adev: amdgpu device structure holding all necessary informations
+ * @mc: memory controller structure holding memory informations
+ *
+ * Function will place try to find a place for the AGP BAR in the MC address
+ * space.
+ *
+ * AGP BAR will be assigned the largest available hole in the address space.
+ * Should be called after VRAM and GART locations are setup.
+ */
+void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
+{
+	const uint64_t sixteen_gb = 1ULL << 34;
+	const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
+	u64 size_af, size_bf;
+
+	if (mc->fb_start > mc->gart_start) {
+		size_bf = (mc->fb_start & sixteen_gb_mask) -
+			ALIGN(mc->gart_end + 1, sixteen_gb);
+		size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb);
+	} else {
+		size_bf = mc->fb_start & sixteen_gb_mask;
+		size_af = (mc->gart_start & sixteen_gb_mask) -
+			ALIGN(mc->fb_end + 1, sixteen_gb);
+	}
+
+	if (size_bf > size_af) {
+		mc->agp_start = (mc->fb_start - size_bf) & sixteen_gb_mask;
+		mc->agp_size = size_bf;
+	} else {
+		mc->agp_start = ALIGN(mc->fb_end + 1, sixteen_gb);
+		mc->agp_size = size_af;
+	}
+
+	mc->agp_end = mc->agp_start + mc->agp_size - 1;
+	dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n",
+			mc->agp_size >> 20, mc->agp_start, mc->agp_end);
+}

+ 71 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h

@@ -30,6 +30,19 @@
 
 #include "amdgpu_irq.h"
 
+/* VA hole for 48bit addresses on Vega10 */
+#define AMDGPU_GMC_HOLE_START	0x0000800000000000ULL
+#define AMDGPU_GMC_HOLE_END	0xffff800000000000ULL
+
+/*
+ * Hardware is programmed as if the hole doesn't exists with start and end
+ * address values.
+ *
+ * This mask is used to remove the upper 16bits of the VA and so come up with
+ * the linear addr value.
+ */
+#define AMDGPU_GMC_HOLE_MASK	0x0000ffffffffffffULL
+
 struct firmware;
 
 /*
@@ -74,6 +87,20 @@ struct amdgpu_gmc_funcs {
 			   u64 *dst, u64 *flags);
 };
 
+struct amdgpu_xgmi {
+	/* from psp */
+	u64 device_id;
+	u64 hive_id;
+	/* fixed per family */
+	u64 node_segment_size;
+	/* physical node (0-3) */
+	unsigned physical_node_id;
+	/* number of nodes (0-4) */
+	unsigned num_physical_nodes;
+	/* gpu list in the same hive */
+	struct list_head head;
+};
+
 struct amdgpu_gmc {
 	resource_size_t		aper_size;
 	resource_size_t		aper_base;
@@ -81,11 +108,22 @@ struct amdgpu_gmc {
 	 * about vram size near mc fb location */
 	u64			mc_vram_size;
 	u64			visible_vram_size;
+	u64			agp_size;
+	u64			agp_start;
+	u64			agp_end;
 	u64			gart_size;
 	u64			gart_start;
 	u64			gart_end;
 	u64			vram_start;
 	u64			vram_end;
+	/* FB region , it's same as local vram region in single GPU, in XGMI
+	 * configuration, this region covers all GPUs in the same hive ,
+	 * each GPU in the hive has the same view of this FB region .
+	 * GPU0's vram starts at offset (0 * segment size) ,
+	 * GPU1 starts at offset (1 * segment size), etc.
+	 */
+	u64			fb_start;
+	u64			fb_end;
 	unsigned		vram_width;
 	u64			real_vram_size;
 	int			vram_mtrr;
@@ -109,8 +147,17 @@ struct amdgpu_gmc {
 	atomic_t		vm_fault_info_updated;
 
 	const struct amdgpu_gmc_funcs	*gmc_funcs;
+
+	struct amdgpu_xgmi xgmi;
 };
 
+#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
+#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
+#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
+#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
+#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
+#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags))
+
 /**
  * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR
  *
@@ -126,4 +173,28 @@ static inline bool amdgpu_gmc_vram_full_visible(struct amdgpu_gmc *gmc)
 	return (gmc->real_vram_size == gmc->visible_vram_size);
 }
 
+/**
+ * amdgpu_gmc_sign_extend - sign extend the given gmc address
+ *
+ * @addr: address to extend
+ */
+static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr)
+{
+	if (addr >= AMDGPU_GMC_HOLE_START)
+		addr |= AMDGPU_GMC_HOLE_END;
+
+	return addr;
+}
+
+void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
+			       uint64_t *addr, uint64_t *flags);
+uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo);
+uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo);
+void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
+			      u64 base);
+void amdgpu_gmc_gart_location(struct amdgpu_device *adev,
+			      struct amdgpu_gmc *mc);
+void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
+			     struct amdgpu_gmc *mc);
+
 #endif

+ 13 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

@@ -32,6 +32,7 @@
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "atom.h"
+#include "amdgpu_trace.h"
 
 #define AMDGPU_IB_TEST_TIMEOUT	msecs_to_jiffies(1000)
 
@@ -170,6 +171,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	     (amdgpu_sriov_vf(adev) && need_ctx_switch) ||
 	     amdgpu_vm_need_pipeline_sync(ring, job))) {
 		need_pipe_sync = true;
+
+		if (tmp)
+			trace_amdgpu_ib_pipe_sync(job, tmp);
+
 		dma_fence_put(tmp);
 	}
 
@@ -349,6 +354,14 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 		if (!ring || !ring->ready)
 			continue;
 
+		/* skip IB tests for KIQ in general for the below reasons:
+		 * 1. We never submit IBs to the KIQ
+		 * 2. KIQ doesn't use the EOP interrupts,
+		 *    we use some other CP interrupt.
+		 */
+		if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+			continue;
+
 		/* MM engine need more time */
 		if (ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
 			ring->funcs->type == AMDGPU_RING_TYPE_VCE ||

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c

@@ -574,7 +574,7 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
 		/* skip over VMID 0, since it is the system VM */
 		for (j = 1; j < id_mgr->num_ids; ++j) {
 			amdgpu_vmid_reset(adev, i, j);
-			amdgpu_sync_create(&id_mgr->ids[i].active);
+			amdgpu_sync_create(&id_mgr->ids[j].active);
 			list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
 		}
 	}

+ 66 - 173
drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c

@@ -24,46 +24,21 @@
 #include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
-#include "amdgpu_amdkfd.h"
-
-/**
- * amdgpu_ih_ring_alloc - allocate memory for the IH ring
- *
- * @adev: amdgpu_device pointer
- *
- * Allocate a ring buffer for the interrupt controller.
- * Returns 0 for success, errors for failure.
- */
-static int amdgpu_ih_ring_alloc(struct amdgpu_device *adev)
-{
-	int r;
-
-	/* Allocate ring buffer */
-	if (adev->irq.ih.ring_obj == NULL) {
-		r = amdgpu_bo_create_kernel(adev, adev->irq.ih.ring_size,
-					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
-					    &adev->irq.ih.ring_obj,
-					    &adev->irq.ih.gpu_addr,
-					    (void **)&adev->irq.ih.ring);
-		if (r) {
-			DRM_ERROR("amdgpu: failed to create ih ring buffer (%d).\n", r);
-			return r;
-		}
-	}
-	return 0;
-}
 
 /**
  * amdgpu_ih_ring_init - initialize the IH state
  *
  * @adev: amdgpu_device pointer
+ * @ih: ih ring to initialize
+ * @ring_size: ring size to allocate
+ * @use_bus_addr: true when we can use dma_alloc_coherent
  *
  * Initializes the IH state and allocates a buffer
  * for the IH ring buffer.
  * Returns 0 for success, errors for failure.
  */
-int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
-			bool use_bus_addr)
+int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+			unsigned ring_size, bool use_bus_addr)
 {
 	u32 rb_bufsz;
 	int r;
@@ -71,70 +46,76 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
 	/* Align ring size */
 	rb_bufsz = order_base_2(ring_size / 4);
 	ring_size = (1 << rb_bufsz) * 4;
-	adev->irq.ih.ring_size = ring_size;
-	adev->irq.ih.ptr_mask = adev->irq.ih.ring_size - 1;
-	adev->irq.ih.rptr = 0;
-	adev->irq.ih.use_bus_addr = use_bus_addr;
+	ih->ring_size = ring_size;
+	ih->ptr_mask = ih->ring_size - 1;
+	ih->rptr = 0;
+	ih->use_bus_addr = use_bus_addr;
 
-	if (adev->irq.ih.use_bus_addr) {
-		if (!adev->irq.ih.ring) {
-			/* add 8 bytes for the rptr/wptr shadows and
-			 * add them to the end of the ring allocation.
-			 */
-			adev->irq.ih.ring = pci_alloc_consistent(adev->pdev,
-								 adev->irq.ih.ring_size + 8,
-								 &adev->irq.ih.rb_dma_addr);
-			if (adev->irq.ih.ring == NULL)
-				return -ENOMEM;
-			memset((void *)adev->irq.ih.ring, 0, adev->irq.ih.ring_size + 8);
-			adev->irq.ih.wptr_offs = (adev->irq.ih.ring_size / 4) + 0;
-			adev->irq.ih.rptr_offs = (adev->irq.ih.ring_size / 4) + 1;
-		}
-		return 0;
+	if (use_bus_addr) {
+		if (ih->ring)
+			return 0;
+
+		/* add 8 bytes for the rptr/wptr shadows and
+		 * add them to the end of the ring allocation.
+		 */
+		ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8,
+					      &ih->rb_dma_addr, GFP_KERNEL);
+		if (ih->ring == NULL)
+			return -ENOMEM;
+
+		memset((void *)ih->ring, 0, ih->ring_size + 8);
+		ih->wptr_offs = (ih->ring_size / 4) + 0;
+		ih->rptr_offs = (ih->ring_size / 4) + 1;
 	} else {
-		r = amdgpu_device_wb_get(adev, &adev->irq.ih.wptr_offs);
+		r = amdgpu_device_wb_get(adev, &ih->wptr_offs);
+		if (r)
+			return r;
+
+		r = amdgpu_device_wb_get(adev, &ih->rptr_offs);
 		if (r) {
-			dev_err(adev->dev, "(%d) ih wptr_offs wb alloc failed\n", r);
+			amdgpu_device_wb_free(adev, ih->wptr_offs);
 			return r;
 		}
 
-		r = amdgpu_device_wb_get(adev, &adev->irq.ih.rptr_offs);
+		r = amdgpu_bo_create_kernel(adev, ih->ring_size, PAGE_SIZE,
+					    AMDGPU_GEM_DOMAIN_GTT,
+					    &ih->ring_obj, &ih->gpu_addr,
+					    (void **)&ih->ring);
 		if (r) {
-			amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs);
-			dev_err(adev->dev, "(%d) ih rptr_offs wb alloc failed\n", r);
+			amdgpu_device_wb_free(adev, ih->rptr_offs);
+			amdgpu_device_wb_free(adev, ih->wptr_offs);
 			return r;
 		}
-
-		return amdgpu_ih_ring_alloc(adev);
 	}
+	return 0;
 }
 
 /**
  * amdgpu_ih_ring_fini - tear down the IH state
  *
  * @adev: amdgpu_device pointer
+ * @ih: ih ring to tear down
  *
  * Tears down the IH state and frees buffer
  * used for the IH ring buffer.
  */
-void amdgpu_ih_ring_fini(struct amdgpu_device *adev)
+void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
 {
-	if (adev->irq.ih.use_bus_addr) {
-		if (adev->irq.ih.ring) {
-			/* add 8 bytes for the rptr/wptr shadows and
-			 * add them to the end of the ring allocation.
-			 */
-			pci_free_consistent(adev->pdev, adev->irq.ih.ring_size + 8,
-					    (void *)adev->irq.ih.ring,
-					    adev->irq.ih.rb_dma_addr);
-			adev->irq.ih.ring = NULL;
-		}
+	if (ih->use_bus_addr) {
+		if (!ih->ring)
+			return;
+
+		/* add 8 bytes for the rptr/wptr shadows and
+		 * add them to the end of the ring allocation.
+		 */
+		dma_free_coherent(adev->dev, ih->ring_size + 8,
+				  (void *)ih->ring, ih->rb_dma_addr);
+		ih->ring = NULL;
 	} else {
-		amdgpu_bo_free_kernel(&adev->irq.ih.ring_obj,
-				      &adev->irq.ih.gpu_addr,
-				      (void **)&adev->irq.ih.ring);
-		amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs);
-		amdgpu_device_wb_free(adev, adev->irq.ih.rptr_offs);
+		amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr,
+				      (void **)&ih->ring);
+		amdgpu_device_wb_free(adev, ih->wptr_offs);
+		amdgpu_device_wb_free(adev, ih->rptr_offs);
 	}
 }
 
@@ -142,133 +123,45 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev)
  * amdgpu_ih_process - interrupt handler
  *
  * @adev: amdgpu_device pointer
+ * @ih: ih ring to process
  *
  * Interrupt hander (VI), walk the IH ring.
  * Returns irq process return code.
  */
-int amdgpu_ih_process(struct amdgpu_device *adev)
+int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+		      void (*callback)(struct amdgpu_device *adev,
+				       struct amdgpu_ih_ring *ih))
 {
-	struct amdgpu_iv_entry entry;
 	u32 wptr;
 
-	if (!adev->irq.ih.enabled || adev->shutdown)
+	if (!ih->enabled || adev->shutdown)
 		return IRQ_NONE;
 
 	wptr = amdgpu_ih_get_wptr(adev);
 
 restart_ih:
 	/* is somebody else already processing irqs? */
-	if (atomic_xchg(&adev->irq.ih.lock, 1))
+	if (atomic_xchg(&ih->lock, 1))
 		return IRQ_NONE;
 
-	DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, adev->irq.ih.rptr, wptr);
+	DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
 
 	/* Order reading of wptr vs. reading of IH ring data */
 	rmb();
 
-	while (adev->irq.ih.rptr != wptr) {
-		u32 ring_index = adev->irq.ih.rptr >> 2;
-
-		/* Prescreening of high-frequency interrupts */
-		if (!amdgpu_ih_prescreen_iv(adev)) {
-			adev->irq.ih.rptr &= adev->irq.ih.ptr_mask;
-			continue;
-		}
-
-		/* Before dispatching irq to IP blocks, send it to amdkfd */
-		amdgpu_amdkfd_interrupt(adev,
-				(const void *) &adev->irq.ih.ring[ring_index]);
-
-		entry.iv_entry = (const uint32_t *)
-			&adev->irq.ih.ring[ring_index];
-		amdgpu_ih_decode_iv(adev, &entry);
-		adev->irq.ih.rptr &= adev->irq.ih.ptr_mask;
-
-		amdgpu_irq_dispatch(adev, &entry);
+	while (ih->rptr != wptr) {
+		callback(adev, ih);
+		ih->rptr &= ih->ptr_mask;
 	}
+
 	amdgpu_ih_set_rptr(adev);
-	atomic_set(&adev->irq.ih.lock, 0);
+	atomic_set(&ih->lock, 0);
 
 	/* make sure wptr hasn't changed while processing */
 	wptr = amdgpu_ih_get_wptr(adev);
-	if (wptr != adev->irq.ih.rptr)
+	if (wptr != ih->rptr)
 		goto restart_ih;
 
 	return IRQ_HANDLED;
 }
 
-/**
- * amdgpu_ih_add_fault - Add a page fault record
- *
- * @adev: amdgpu device pointer
- * @key: 64-bit encoding of PASID and address
- *
- * This should be called when a retry page fault interrupt is
- * received. If this is a new page fault, it will be added to a hash
- * table. The return value indicates whether this is a new fault, or
- * a fault that was already known and is already being handled.
- *
- * If there are too many pending page faults, this will fail. Retry
- * interrupts should be ignored in this case until there is enough
- * free space.
- *
- * Returns 0 if the fault was added, 1 if the fault was already known,
- * -ENOSPC if there are too many pending faults.
- */
-int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key)
-{
-	unsigned long flags;
-	int r = -ENOSPC;
-
-	if (WARN_ON_ONCE(!adev->irq.ih.faults))
-		/* Should be allocated in <IP>_ih_sw_init on GPUs that
-		 * support retry faults and require retry filtering.
-		 */
-		return r;
-
-	spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
-
-	/* Only let the hash table fill up to 50% for best performance */
-	if (adev->irq.ih.faults->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1)))
-		goto unlock_out;
-
-	r = chash_table_copy_in(&adev->irq.ih.faults->hash, key, NULL);
-	if (!r)
-		adev->irq.ih.faults->count++;
-
-	/* chash_table_copy_in should never fail unless we're losing count */
-	WARN_ON_ONCE(r < 0);
-
-unlock_out:
-	spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
-	return r;
-}
-
-/**
- * amdgpu_ih_clear_fault - Remove a page fault record
- *
- * @adev: amdgpu device pointer
- * @key: 64-bit encoding of PASID and address
- *
- * This should be called when a page fault has been handled. Any
- * future interrupt with this key will be processed as a new
- * page fault.
- */
-void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key)
-{
-	unsigned long flags;
-	int r;
-
-	if (!adev->irq.ih.faults)
-		return;
-
-	spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
-
-	r = chash_table_remove(&adev->irq.ih.faults->hash, key, NULL);
-	if (!WARN_ON_ONCE(r < 0)) {
-		adev->irq.ih.faults->count--;
-		WARN_ON_ONCE(adev->irq.ih.faults->count < 0);
-	}
-
-	spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
-}

+ 20 - 34
drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h

@@ -24,20 +24,8 @@
 #ifndef __AMDGPU_IH_H__
 #define __AMDGPU_IH_H__
 
-#include <linux/chash.h>
-#include "soc15_ih_clientid.h"
-
 struct amdgpu_device;
-
-#define AMDGPU_IH_CLIENTID_LEGACY 0
-#define AMDGPU_IH_CLIENTID_MAX SOC15_IH_CLIENTID_MAX
-
-#define AMDGPU_PAGEFAULT_HASH_BITS 8
-struct amdgpu_retryfault_hashtable {
-	DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
-	spinlock_t	lock;
-	int		count;
-};
+struct amdgpu_iv_entry;
 
 /*
  * R6xx+ IH ring
@@ -57,30 +45,28 @@ struct amdgpu_ih_ring {
 	bool			use_doorbell;
 	bool			use_bus_addr;
 	dma_addr_t		rb_dma_addr; /* only used when use_bus_addr = true */
-	struct amdgpu_retryfault_hashtable *faults;
 };
 
-#define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4
-
-struct amdgpu_iv_entry {
-	unsigned client_id;
-	unsigned src_id;
-	unsigned ring_id;
-	unsigned vmid;
-	unsigned vmid_src;
-	uint64_t timestamp;
-	unsigned timestamp_src;
-	unsigned pasid;
-	unsigned pasid_src;
-	unsigned src_data[AMDGPU_IH_SRC_DATA_MAX_SIZE_DW];
-	const uint32_t *iv_entry;
+/* provided by the ih block */
+struct amdgpu_ih_funcs {
+	/* ring read/write ptr handling, called from interrupt context */
+	u32 (*get_wptr)(struct amdgpu_device *adev);
+	bool (*prescreen_iv)(struct amdgpu_device *adev);
+	void (*decode_iv)(struct amdgpu_device *adev,
+			  struct amdgpu_iv_entry *entry);
+	void (*set_rptr)(struct amdgpu_device *adev);
 };
 
-int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
-			bool use_bus_addr);
-void amdgpu_ih_ring_fini(struct amdgpu_device *adev);
-int amdgpu_ih_process(struct amdgpu_device *adev);
-int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key);
-void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key);
+#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
+#define amdgpu_ih_prescreen_iv(adev) (adev)->irq.ih_funcs->prescreen_iv((adev))
+#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
+#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
+
+int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+			unsigned ring_size, bool use_bus_addr);
+void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
+int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+		      void (*callback)(struct amdgpu_device *adev,
+				       struct amdgpu_ih_ring *ih));
 
 #endif

+ 37 - 8
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c

@@ -51,6 +51,7 @@
 #include "atom.h"
 #include "amdgpu_connectors.h"
 #include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
 
 #include <linux/pm_runtime.h>
 
@@ -105,8 +106,8 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
 	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
 						  reset_work);
 
-	if (!amdgpu_sriov_vf(adev))
-		amdgpu_device_gpu_recover(adev, NULL, false);
+	if (!amdgpu_sriov_vf(adev) && amdgpu_device_should_recover_gpu(adev))
+		amdgpu_device_gpu_recover(adev, NULL);
 }
 
 /**
@@ -123,7 +124,7 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev)
 	int r;
 
 	spin_lock_irqsave(&adev->irq.lock, irqflags);
-	for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) {
+	for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
 		if (!adev->irq.client[i].sources)
 			continue;
 
@@ -146,6 +147,34 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev)
 	spin_unlock_irqrestore(&adev->irq.lock, irqflags);
 }
 
+/**
+ * amdgpu_irq_callback - callback from the IH ring
+ *
+ * @adev: amdgpu device pointer
+ * @ih: amdgpu ih ring
+ *
+ * Callback from IH ring processing to handle the entry at the current position
+ * and advance the read pointer.
+ */
+static void amdgpu_irq_callback(struct amdgpu_device *adev,
+				struct amdgpu_ih_ring *ih)
+{
+	u32 ring_index = ih->rptr >> 2;
+	struct amdgpu_iv_entry entry;
+
+	/* Prescreening of high-frequency interrupts */
+	if (!amdgpu_ih_prescreen_iv(adev))
+		return;
+
+	/* Before dispatching irq to IP blocks, send it to amdkfd */
+	amdgpu_amdkfd_interrupt(adev, (const void *) &ih->ring[ring_index]);
+
+	entry.iv_entry = (const uint32_t *)&ih->ring[ring_index];
+	amdgpu_ih_decode_iv(adev, &entry);
+
+	amdgpu_irq_dispatch(adev, &entry);
+}
+
 /**
  * amdgpu_irq_handler - IRQ handler
  *
@@ -163,7 +192,7 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg)
 	struct amdgpu_device *adev = dev->dev_private;
 	irqreturn_t ret;
 
-	ret = amdgpu_ih_process(adev);
+	ret = amdgpu_ih_process(adev, &adev->irq.ih, amdgpu_irq_callback);
 	if (ret == IRQ_HANDLED)
 		pm_runtime_mark_last_busy(dev->dev);
 	return ret;
@@ -273,7 +302,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
 		cancel_work_sync(&adev->reset_work);
 	}
 
-	for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) {
+	for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
 		if (!adev->irq.client[i].sources)
 			continue;
 
@@ -313,7 +342,7 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev,
 		      unsigned client_id, unsigned src_id,
 		      struct amdgpu_irq_src *source)
 {
-	if (client_id >= AMDGPU_IH_CLIENTID_MAX)
+	if (client_id >= AMDGPU_IRQ_CLIENTID_MAX)
 		return -EINVAL;
 
 	if (src_id >= AMDGPU_MAX_IRQ_SRC_ID)
@@ -367,7 +396,7 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
 
 	trace_amdgpu_iv(entry);
 
-	if (client_id >= AMDGPU_IH_CLIENTID_MAX) {
+	if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) {
 		DRM_DEBUG("Invalid client_id in IV: %d\n", client_id);
 		return;
 	}
@@ -440,7 +469,7 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
 {
 	int i, j, k;
 
-	for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) {
+	for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
 		if (!adev->irq.client[i].sources)
 			continue;
 

+ 22 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h

@@ -25,19 +25,38 @@
 #define __AMDGPU_IRQ_H__
 
 #include <linux/irqdomain.h>
+#include "soc15_ih_clientid.h"
 #include "amdgpu_ih.h"
 
-#define AMDGPU_MAX_IRQ_SRC_ID	0x100
+#define AMDGPU_MAX_IRQ_SRC_ID		0x100
 #define AMDGPU_MAX_IRQ_CLIENT_ID	0x100
 
+#define AMDGPU_IRQ_CLIENTID_LEGACY	0
+#define AMDGPU_IRQ_CLIENTID_MAX		SOC15_IH_CLIENTID_MAX
+
+#define AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW	4
+
 struct amdgpu_device;
-struct amdgpu_iv_entry;
 
 enum amdgpu_interrupt_state {
 	AMDGPU_IRQ_STATE_DISABLE,
 	AMDGPU_IRQ_STATE_ENABLE,
 };
 
+struct amdgpu_iv_entry {
+	unsigned client_id;
+	unsigned src_id;
+	unsigned ring_id;
+	unsigned vmid;
+	unsigned vmid_src;
+	uint64_t timestamp;
+	unsigned timestamp_src;
+	unsigned pasid;
+	unsigned pasid_src;
+	unsigned src_data[AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW];
+	const uint32_t *iv_entry;
+};
+
 struct amdgpu_irq_src {
 	unsigned				num_types;
 	atomic_t				*enabled_types;
@@ -63,7 +82,7 @@ struct amdgpu_irq {
 	bool				installed;
 	spinlock_t			lock;
 	/* interrupt sources */
-	struct amdgpu_irq_client	client[AMDGPU_IH_CLIENTID_MAX];
+	struct amdgpu_irq_client	client[AMDGPU_IRQ_CLIENTID_MAX];
 
 	/* status, etc. */
 	bool				msi_enabled; /* msi enabled */

+ 9 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

@@ -33,11 +33,18 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
 	struct amdgpu_job *job = to_amdgpu_job(s_job);
 
+	if (amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
+		DRM_ERROR("ring %s timeout, but soft recovered\n",
+			  s_job->sched->name);
+		return;
+	}
+
 	DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
 		  job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
 		  ring->fence_drv.sync_seq);
 
-	amdgpu_device_gpu_recover(ring->adev, job, false);
+	if (amdgpu_device_should_recover_gpu(ring->adev))
+		amdgpu_device_gpu_recover(ring->adev, job);
 }
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
@@ -66,6 +73,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 	amdgpu_sync_create(&(*job)->sync);
 	amdgpu_sync_create(&(*job)->sched_sync);
 	(*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+	(*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET;
 
 	return 0;
 }
@@ -82,8 +90,6 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 	r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]);
 	if (r)
 		kfree(*job);
-	else
-		(*job)->vm_pd_addr = adev->gart.table_addr;
 
 	return r;
 }

+ 205 - 108
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

@@ -37,6 +37,32 @@
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_gem.h"
+#include "amdgpu_display.h"
+
+static void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
+{
+	struct amdgpu_gpu_instance *gpu_instance;
+	int i;
+
+	mutex_lock(&mgpu_info.mutex);
+
+	for (i = 0; i < mgpu_info.num_gpu; i++) {
+		gpu_instance = &(mgpu_info.gpu_ins[i]);
+		if (gpu_instance->adev == adev) {
+			mgpu_info.gpu_ins[i] =
+				mgpu_info.gpu_ins[mgpu_info.num_gpu - 1];
+			mgpu_info.num_gpu--;
+			if (adev->flags & AMD_IS_APU)
+				mgpu_info.num_apu--;
+			else
+				mgpu_info.num_dgpu--;
+			break;
+		}
+	}
+
+	mutex_unlock(&mgpu_info.mutex);
+}
 
 /**
  * amdgpu_driver_unload_kms - Main unload function for KMS.
@@ -53,6 +79,8 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
 	if (adev == NULL)
 		return;
 
+	amdgpu_unregister_gpu_instance(adev);
+
 	if (adev->rmmio == NULL)
 		goto done_free;
 
@@ -73,6 +101,31 @@ done_free:
 	dev->dev_private = NULL;
 }
 
+static void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
+{
+	struct amdgpu_gpu_instance *gpu_instance;
+
+	mutex_lock(&mgpu_info.mutex);
+
+	if (mgpu_info.num_gpu >= MAX_GPU_INSTANCE) {
+		DRM_ERROR("Cannot register more gpu instance\n");
+		mutex_unlock(&mgpu_info.mutex);
+		return;
+	}
+
+	gpu_instance = &(mgpu_info.gpu_ins[mgpu_info.num_gpu]);
+	gpu_instance->adev = adev;
+	gpu_instance->mgpu_fan_enabled = 0;
+
+	mgpu_info.num_gpu++;
+	if (adev->flags & AMD_IS_APU)
+		mgpu_info.num_apu++;
+	else
+		mgpu_info.num_dgpu++;
+
+	mutex_unlock(&mgpu_info.mutex);
+}
+
 /**
  * amdgpu_driver_load_kms - Main load function for KMS.
  *
@@ -167,6 +220,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
 		pm_runtime_put_autosuspend(dev->dev);
 	}
 
+	amdgpu_register_gpu_instance(adev);
 out:
 	if (r) {
 		/* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */
@@ -255,9 +309,130 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
 		fw_info->ver = adev->psp.asd_fw_version;
 		fw_info->feature = adev->psp.asd_feature_version;
 		break;
+	case AMDGPU_INFO_FW_DMCU:
+		fw_info->ver = adev->dm.dmcu_fw_version;
+		fw_info->feature = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
+			     struct drm_amdgpu_info *info,
+			     struct drm_amdgpu_info_hw_ip *result)
+{
+	uint32_t ib_start_alignment = 0;
+	uint32_t ib_size_alignment = 0;
+	enum amd_ip_block_type type;
+	unsigned int num_rings = 0;
+	unsigned int i, j;
+
+	if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
+		return -EINVAL;
+
+	switch (info->query_hw_ip.type) {
+	case AMDGPU_HW_IP_GFX:
+		type = AMD_IP_BLOCK_TYPE_GFX;
+		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+			if (adev->gfx.gfx_ring[i].ready)
+				++num_rings;
+		ib_start_alignment = 32;
+		ib_size_alignment = 32;
+		break;
+	case AMDGPU_HW_IP_COMPUTE:
+		type = AMD_IP_BLOCK_TYPE_GFX;
+		for (i = 0; i < adev->gfx.num_compute_rings; i++)
+			if (adev->gfx.compute_ring[i].ready)
+				++num_rings;
+		ib_start_alignment = 32;
+		ib_size_alignment = 32;
+		break;
+	case AMDGPU_HW_IP_DMA:
+		type = AMD_IP_BLOCK_TYPE_SDMA;
+		for (i = 0; i < adev->sdma.num_instances; i++)
+			if (adev->sdma.instance[i].ring.ready)
+				++num_rings;
+		ib_start_alignment = 256;
+		ib_size_alignment = 4;
+		break;
+	case AMDGPU_HW_IP_UVD:
+		type = AMD_IP_BLOCK_TYPE_UVD;
+		for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+			if (adev->uvd.harvest_config & (1 << i))
+				continue;
+
+			if (adev->uvd.inst[i].ring.ready)
+				++num_rings;
+		}
+		ib_start_alignment = 64;
+		ib_size_alignment = 64;
+		break;
+	case AMDGPU_HW_IP_VCE:
+		type = AMD_IP_BLOCK_TYPE_VCE;
+		for (i = 0; i < adev->vce.num_rings; i++)
+			if (adev->vce.ring[i].ready)
+				++num_rings;
+		ib_start_alignment = 4;
+		ib_size_alignment = 1;
+		break;
+	case AMDGPU_HW_IP_UVD_ENC:
+		type = AMD_IP_BLOCK_TYPE_UVD;
+		for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+			if (adev->uvd.harvest_config & (1 << i))
+				continue;
+
+			for (j = 0; j < adev->uvd.num_enc_rings; j++)
+				if (adev->uvd.inst[i].ring_enc[j].ready)
+					++num_rings;
+		}
+		ib_start_alignment = 64;
+		ib_size_alignment = 64;
+		break;
+	case AMDGPU_HW_IP_VCN_DEC:
+		type = AMD_IP_BLOCK_TYPE_VCN;
+		if (adev->vcn.ring_dec.ready)
+			++num_rings;
+		ib_start_alignment = 16;
+		ib_size_alignment = 16;
+		break;
+	case AMDGPU_HW_IP_VCN_ENC:
+		type = AMD_IP_BLOCK_TYPE_VCN;
+		for (i = 0; i < adev->vcn.num_enc_rings; i++)
+			if (adev->vcn.ring_enc[i].ready)
+				++num_rings;
+		ib_start_alignment = 64;
+		ib_size_alignment = 1;
+		break;
+	case AMDGPU_HW_IP_VCN_JPEG:
+		type = AMD_IP_BLOCK_TYPE_VCN;
+		if (adev->vcn.ring_jpeg.ready)
+			++num_rings;
+		ib_start_alignment = 16;
+		ib_size_alignment = 16;
+		break;
 	default:
 		return -EINVAL;
 	}
+
+	for (i = 0; i < adev->num_ip_blocks; i++)
+		if (adev->ip_blocks[i].version->type == type &&
+		    adev->ip_blocks[i].status.valid)
+			break;
+
+	if (i == adev->num_ip_blocks)
+		return 0;
+
+	num_rings = min(amdgpu_ctx_num_entities[info->query_hw_ip.type],
+			num_rings);
+
+	result->hw_ip_version_major = adev->ip_blocks[i].version->major;
+	result->hw_ip_version_minor = adev->ip_blocks[i].version->minor;
+	result->capabilities_flags = 0;
+	result->available_rings = (1 << num_rings) - 1;
+	result->ib_start_alignment = ib_start_alignment;
+	result->ib_size_alignment = ib_size_alignment;
 	return 0;
 }
 
@@ -286,7 +461,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 	struct drm_crtc *crtc;
 	uint32_t ui32 = 0;
 	uint64_t ui64 = 0;
-	int i, j, found;
+	int i, found;
 	int ui32_size = sizeof(ui32);
 
 	if (!info->return_size || !info->return_pointer)
@@ -316,101 +491,14 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
 	case AMDGPU_INFO_HW_IP_INFO: {
 		struct drm_amdgpu_info_hw_ip ip = {};
-		enum amd_ip_block_type type;
-		uint32_t ring_mask = 0;
-		uint32_t ib_start_alignment = 0;
-		uint32_t ib_size_alignment = 0;
-
-		if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
-			return -EINVAL;
+		int ret;
 
-		switch (info->query_hw_ip.type) {
-		case AMDGPU_HW_IP_GFX:
-			type = AMD_IP_BLOCK_TYPE_GFX;
-			for (i = 0; i < adev->gfx.num_gfx_rings; i++)
-				ring_mask |= adev->gfx.gfx_ring[i].ready << i;
-			ib_start_alignment = 32;
-			ib_size_alignment = 32;
-			break;
-		case AMDGPU_HW_IP_COMPUTE:
-			type = AMD_IP_BLOCK_TYPE_GFX;
-			for (i = 0; i < adev->gfx.num_compute_rings; i++)
-				ring_mask |= adev->gfx.compute_ring[i].ready << i;
-			ib_start_alignment = 32;
-			ib_size_alignment = 32;
-			break;
-		case AMDGPU_HW_IP_DMA:
-			type = AMD_IP_BLOCK_TYPE_SDMA;
-			for (i = 0; i < adev->sdma.num_instances; i++)
-				ring_mask |= adev->sdma.instance[i].ring.ready << i;
-			ib_start_alignment = 256;
-			ib_size_alignment = 4;
-			break;
-		case AMDGPU_HW_IP_UVD:
-			type = AMD_IP_BLOCK_TYPE_UVD;
-			for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
-				if (adev->uvd.harvest_config & (1 << i))
-					continue;
-				ring_mask |= adev->uvd.inst[i].ring.ready;
-			}
-			ib_start_alignment = 64;
-			ib_size_alignment = 64;
-			break;
-		case AMDGPU_HW_IP_VCE:
-			type = AMD_IP_BLOCK_TYPE_VCE;
-			for (i = 0; i < adev->vce.num_rings; i++)
-				ring_mask |= adev->vce.ring[i].ready << i;
-			ib_start_alignment = 4;
-			ib_size_alignment = 1;
-			break;
-		case AMDGPU_HW_IP_UVD_ENC:
-			type = AMD_IP_BLOCK_TYPE_UVD;
-			for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
-				if (adev->uvd.harvest_config & (1 << i))
-					continue;
-				for (j = 0; j < adev->uvd.num_enc_rings; j++)
-					ring_mask |= adev->uvd.inst[i].ring_enc[j].ready << j;
-			}
-			ib_start_alignment = 64;
-			ib_size_alignment = 64;
-			break;
-		case AMDGPU_HW_IP_VCN_DEC:
-			type = AMD_IP_BLOCK_TYPE_VCN;
-			ring_mask = adev->vcn.ring_dec.ready;
-			ib_start_alignment = 16;
-			ib_size_alignment = 16;
-			break;
-		case AMDGPU_HW_IP_VCN_ENC:
-			type = AMD_IP_BLOCK_TYPE_VCN;
-			for (i = 0; i < adev->vcn.num_enc_rings; i++)
-				ring_mask |= adev->vcn.ring_enc[i].ready << i;
-			ib_start_alignment = 64;
-			ib_size_alignment = 1;
-			break;
-		case AMDGPU_HW_IP_VCN_JPEG:
-			type = AMD_IP_BLOCK_TYPE_VCN;
-			ring_mask = adev->vcn.ring_jpeg.ready;
-			ib_start_alignment = 16;
-			ib_size_alignment = 16;
-			break;
-		default:
-			return -EINVAL;
-		}
+		ret = amdgpu_hw_ip_info(adev, info, &ip);
+		if (ret)
+			return ret;
 
-		for (i = 0; i < adev->num_ip_blocks; i++) {
-			if (adev->ip_blocks[i].version->type == type &&
-			    adev->ip_blocks[i].status.valid) {
-				ip.hw_ip_version_major = adev->ip_blocks[i].version->major;
-				ip.hw_ip_version_minor = adev->ip_blocks[i].version->minor;
-				ip.capabilities_flags = 0;
-				ip.available_rings = ring_mask;
-				ip.ib_start_alignment = ib_start_alignment;
-				ip.ib_size_alignment = ib_size_alignment;
-				break;
-			}
-		}
-		return copy_to_user(out, &ip,
-				    min((size_t)size, sizeof(ip))) ? -EFAULT : 0;
+		ret = copy_to_user(out, &ip, min((size_t)size, sizeof(ip)));
+		return ret ? -EFAULT : 0;
 	}
 	case AMDGPU_INFO_HW_IP_COUNT: {
 		enum amd_ip_block_type type;
@@ -492,13 +580,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		struct drm_amdgpu_info_gds gds_info;
 
 		memset(&gds_info, 0, sizeof(gds_info));
-		gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size >> AMDGPU_GDS_SHIFT;
-		gds_info.compute_partition_size = adev->gds.mem.cs_partition_size >> AMDGPU_GDS_SHIFT;
-		gds_info.gds_total_size = adev->gds.mem.total_size >> AMDGPU_GDS_SHIFT;
-		gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size >> AMDGPU_GWS_SHIFT;
-		gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size >> AMDGPU_GWS_SHIFT;
-		gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size >> AMDGPU_OA_SHIFT;
-		gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size >> AMDGPU_OA_SHIFT;
+		gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size;
+		gds_info.compute_partition_size = adev->gds.mem.cs_partition_size;
+		gds_info.gds_total_size = adev->gds.mem.total_size;
+		gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size;
+		gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size;
+		gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size;
+		gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size;
 		return copy_to_user(out, &gds_info,
 				    min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0;
 	}
@@ -617,16 +705,17 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		vm_size -= AMDGPU_VA_RESERVED_SIZE;
 
 		/* Older VCE FW versions are buggy and can handle only 40bits */
-		if (adev->vce.fw_version < AMDGPU_VCE_FW_53_45)
+		if (adev->vce.fw_version &&
+		    adev->vce.fw_version < AMDGPU_VCE_FW_53_45)
 			vm_size = min(vm_size, 1ULL << 40);
 
 		dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
 		dev_info.virtual_address_max =
-			min(vm_size, AMDGPU_VA_HOLE_START);
+			min(vm_size, AMDGPU_GMC_HOLE_START);
 
-		if (vm_size > AMDGPU_VA_HOLE_START) {
-			dev_info.high_va_offset = AMDGPU_VA_HOLE_END;
-			dev_info.high_va_max = AMDGPU_VA_HOLE_END | vm_size;
+		if (vm_size > AMDGPU_GMC_HOLE_START) {
+			dev_info.high_va_offset = AMDGPU_GMC_HOLE_END;
+			dev_info.high_va_max = AMDGPU_GMC_HOLE_END | vm_size;
 		}
 		dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
 		dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
@@ -941,10 +1030,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 
 	pm_runtime_get_sync(dev->dev);
 
-	if (adev->asic_type != CHIP_RAVEN) {
+	if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD) != NULL)
 		amdgpu_uvd_free_handles(adev, file_priv);
+	if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL)
 		amdgpu_vce_free_handles(adev, file_priv);
-	}
 
 	amdgpu_vm_bo_rmv(adev, fpriv->prt_va);
 
@@ -1262,6 +1351,14 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
 	seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n",
 		   fw_info.feature, fw_info.ver);
 
+	/* DMCU */
+	query_fw.fw_type = AMDGPU_INFO_FW_DMCU;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "DMCU feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
 
 	seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version);
 

+ 35 - 61
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

@@ -51,18 +51,6 @@
  *
  */
 
-static bool amdgpu_bo_need_backup(struct amdgpu_device *adev)
-{
-	if (adev->flags & AMD_IS_APU)
-		return false;
-
-	if (amdgpu_gpu_recovery == 0 ||
-	    (amdgpu_gpu_recovery == -1  && !amdgpu_sriov_vf(adev)))
-		return false;
-
-	return true;
-}
-
 /**
  * amdgpu_bo_subtract_pin_size - Remove BO from pin_size accounting
  *
@@ -163,10 +151,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 
 	if (domain & AMDGPU_GEM_DOMAIN_GTT) {
 		places[c].fpfn = 0;
-		if (flags & AMDGPU_GEM_CREATE_SHADOW)
-			places[c].lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
-		else
-			places[c].lpfn = 0;
+		places[c].lpfn = 0;
 		places[c].flags = TTM_PL_FLAG_TT;
 		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
 			places[c].flags |= TTM_PL_FLAG_WC |
@@ -253,6 +238,11 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
 	bool free = false;
 	int r;
 
+	if (!size) {
+		amdgpu_bo_unref(bo_ptr);
+		return 0;
+	}
+
 	memset(&bp, 0, sizeof(bp));
 	bp.size = size;
 	bp.byte_align = align;
@@ -346,7 +336,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
 	if (r)
 		return r;
 
-	amdgpu_bo_unreserve(*bo_ptr);
+	if (*bo_ptr)
+		amdgpu_bo_unreserve(*bo_ptr);
 
 	return 0;
 }
@@ -436,7 +427,11 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 	int r;
 
 	page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
-	size = ALIGN(size, PAGE_SIZE);
+	if (bp->domain & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS |
+			  AMDGPU_GEM_DOMAIN_OA))
+		size <<= PAGE_SHIFT;
+	else
+		size = ALIGN(size, PAGE_SIZE);
 
 	if (!amdgpu_bo_validate_size(adev, size, bp->domain))
 		return -ENOMEM;
@@ -451,7 +446,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 		return -ENOMEM;
 	drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
 	INIT_LIST_HEAD(&bo->shadow_list);
-	INIT_LIST_HEAD(&bo->va);
+	bo->vm_bo = NULL;
 	bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
 		bp->domain;
 	bo->allowed_domains = bo->preferred_domains;
@@ -541,7 +536,7 @@ fail_unreserve:
 }
 
 static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
-				   unsigned long size, int byte_align,
+				   unsigned long size,
 				   struct amdgpu_bo *bo)
 {
 	struct amdgpu_bo_param bp;
@@ -552,7 +547,6 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
 
 	memset(&bp, 0, sizeof(bp));
 	bp.size = size;
-	bp.byte_align = byte_align;
 	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
 	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
 		AMDGPU_GEM_CREATE_SHADOW;
@@ -563,7 +557,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
 	if (!r) {
 		bo->shadow->parent = amdgpu_bo_ref(bo);
 		mutex_lock(&adev->shadow_list_lock);
-		list_add_tail(&bo->shadow_list, &adev->shadow_list);
+		list_add_tail(&bo->shadow->shadow_list, &adev->shadow_list);
 		mutex_unlock(&adev->shadow_list_lock);
 	}
 
@@ -596,12 +590,12 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 	if (r)
 		return r;
 
-	if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_bo_need_backup(adev)) {
+	if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) {
 		if (!bp->resv)
 			WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
 							NULL));
 
-		r = amdgpu_bo_create_shadow(adev, bp->size, bp->byte_align, (*bo_ptr));
+		r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr);
 
 		if (!bp->resv)
 			reservation_object_unlock((*bo_ptr)->tbo.resv);
@@ -695,13 +689,10 @@ retry:
 }
 
 /**
- * amdgpu_bo_restore_from_shadow - restore an &amdgpu_bo buffer object
- * @adev: amdgpu device object
- * @ring: amdgpu_ring for the engine handling the buffer operations
- * @bo: &amdgpu_bo buffer to be restored
- * @resv: reservation object with embedded fence
+ * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow
+ *
+ * @shadow: &amdgpu_bo shadow to be restored
  * @fence: dma_fence associated with the operation
- * @direct: whether to submit the job directly
  *
  * Copies a buffer object's shadow content back to the object.
  * This is used for recovering a buffer from its shadow in case of a gpu
@@ -710,36 +701,19 @@ retry:
  * Returns:
  * 0 for success or a negative error code on failure.
  */
-int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
-				  struct amdgpu_ring *ring,
-				  struct amdgpu_bo *bo,
-				  struct reservation_object *resv,
-				  struct dma_fence **fence,
-				  bool direct)
+int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence)
 
 {
-	struct amdgpu_bo *shadow = bo->shadow;
-	uint64_t bo_addr, shadow_addr;
-	int r;
-
-	if (!shadow)
-		return -EINVAL;
+	struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev);
+	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+	uint64_t shadow_addr, parent_addr;
 
-	bo_addr = amdgpu_bo_gpu_offset(bo);
-	shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);
+	shadow_addr = amdgpu_bo_gpu_offset(shadow);
+	parent_addr = amdgpu_bo_gpu_offset(shadow->parent);
 
-	r = reservation_object_reserve_shared(bo->tbo.resv);
-	if (r)
-		goto err;
-
-	r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr,
-			       amdgpu_bo_size(bo), resv, fence,
-			       direct, false);
-	if (!r)
-		amdgpu_bo_fence(bo, *fence, true);
-
-err:
-	return r;
+	return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
+				  amdgpu_bo_size(shadow), NULL, fence,
+				  true, false);
 }
 
 /**
@@ -1019,10 +993,12 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
 int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
 {
 	/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
-	if (0 && (adev->flags & AMD_IS_APU)) {
+#ifndef CONFIG_HIBERNATION
+	if (adev->flags & AMD_IS_APU) {
 		/* Useless to evict on IGP chips */
 		return 0;
 	}
+#endif
 	return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM);
 }
 
@@ -1360,15 +1336,13 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
 u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
 {
 	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
-	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
-		     !amdgpu_gtt_mgr_has_gart_addr(&bo->tbo.mem));
 	WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
-		     !bo->pin_count);
+		     !bo->pin_count && bo->tbo.type != ttm_bo_type_kernel);
 	WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET);
 	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM &&
 		     !(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS));
 
-	return bo->tbo.offset;
+	return amdgpu_gmc_sign_extend(bo->tbo.offset);
 }
 
 /**

+ 4 - 21
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h

@@ -89,8 +89,8 @@ struct amdgpu_bo {
 	void				*metadata;
 	u32				metadata_size;
 	unsigned			prime_shared_count;
-	/* list of all virtual address to which this bo is associated to */
-	struct list_head		va;
+	/* per VM structure for page tables and with virtual addresses */
+	struct amdgpu_vm_bo_base	*vm_bo;
 	/* Constant after initialization */
 	struct drm_gem_object		gem_base;
 	struct amdgpu_bo		*parent;
@@ -193,19 +193,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo)
 	return drm_vma_node_offset_addr(&bo->tbo.vma_node);
 }
 
-/**
- * amdgpu_bo_gpu_accessible - return whether the bo is currently in memory that
- * is accessible to the GPU.
- */
-static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo)
-{
-	switch (bo->tbo.mem.mem_type) {
-	case TTM_PL_TT: return amdgpu_gtt_mgr_has_gart_addr(&bo->tbo.mem);
-	case TTM_PL_VRAM: return true;
-	default: return false;
-	}
-}
-
 /**
  * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
  */
@@ -286,12 +273,8 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
 			       struct reservation_object *resv,
 			       struct dma_fence **fence, bool direct);
 int amdgpu_bo_validate(struct amdgpu_bo *bo);
-int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
-				  struct amdgpu_ring *ring,
-				  struct amdgpu_bo *bo,
-				  struct reservation_object *resv,
-				  struct dma_fence **fence,
-				  bool direct);
+int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
+			     struct dma_fence **fence);
 uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
 					    uint32_t domain);
 

+ 235 - 38
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c

@@ -27,6 +27,7 @@
 #include "amdgpu_drv.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_dpm.h"
+#include "amdgpu_display.h"
 #include "atom.h"
 #include <linux/power_supply.h>
 #include <linux/hwmon.h>
@@ -473,6 +474,8 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
  * in each power level within a power state.  The pp_od_clk_voltage is used for
  * this.
  *
+ * < For Vega10 and previous ASICs >
+ *
  * Reading the file will display:
  *
  * - a list of engine clock levels and voltages labeled OD_SCLK
@@ -490,6 +493,44 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
  * "c" (commit) to the file to commit your changes.  If you want to reset to the
  * default power levels, write "r" (reset) to the file to reset them.
  *
+ *
+ * < For Vega20 >
+ *
+ * Reading the file will display:
+ *
+ * - minimum and maximum engine clock labeled OD_SCLK
+ *
+ * - maximum memory clock labeled OD_MCLK
+ *
+ * - three <frequency, voltage> points labeled OD_VDDC_CURVE.
+ *   They can be used to calibrate the sclk voltage curve.
+ *
+ * - a list of valid ranges for sclk, mclk, and voltage curve points
+ *   labeled OD_RANGE
+ *
+ * To manually adjust these settings:
+ *
+ * - First select manual using power_dpm_force_performance_level
+ *
+ * - For clock frequency setting, enter a new value by writing a
+ *   string that contains "s/m index clock" to the file. The index
+ *   should be 0 if to set minimum clock. And 1 if to set maximum
+ *   clock. E.g., "s 0 500" will update minimum sclk to be 500 MHz.
+ *   "m 1 800" will update maximum mclk to be 800Mhz.
+ *
+ *   For sclk voltage curve, enter the new values by writing a
+ *   string that contains "vc point clock voltage" to the file. The
+ *   points are indexed by 0, 1 and 2. E.g., "vc 0 300 600" will
+ *   update point1 with clock set as 300Mhz and voltage as
+ *   600mV. "vc 2 1000 1000" will update point3 with clock set
+ *   as 1000Mhz and voltage 1000mV.
+ *
+ * - When you have edited all of the states as needed, write "c" (commit)
+ *   to the file to commit your changes
+ *
+ * - If you want to reset to the default power levels, write "r" (reset)
+ *   to the file to reset them
+ *
  */
 
 static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
@@ -519,6 +560,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
 		type = PP_OD_RESTORE_DEFAULT_TABLE;
 	else if (*buf == 'c')
 		type = PP_OD_COMMIT_DPM_TABLE;
+	else if (!strncmp(buf, "vc", 2))
+		type = PP_OD_EDIT_VDDC_CURVE;
 	else
 		return -EINVAL;
 
@@ -526,6 +569,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
 
 	tmp_str = buf_cpy;
 
+	if (type == PP_OD_EDIT_VDDC_CURVE)
+		tmp_str++;
 	while (isspace(*++tmp_str));
 
 	while (tmp_str[0]) {
@@ -569,6 +614,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
 	if (adev->powerplay.pp_funcs->print_clock_levels) {
 		size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
 		size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
+		size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, buf+size);
 		size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size);
 		return size;
 	} else {
@@ -1074,12 +1120,19 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	int err;
 	u32 value;
+	u32 pwm_mode;
 
 	/* Can't adjust fan when the card is off */
 	if  ((adev->flags & AMD_IS_PX) &&
 	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
 		return -EINVAL;
 
+	pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
+	if (pwm_mode != AMD_FAN_CTRL_MANUAL) {
+		pr_info("manual fan speed control should be enabled first\n");
+		return -EINVAL;
+	}
+
 	err = kstrtou32(buf, 10, &value);
 	if (err)
 		return err;
@@ -1141,6 +1194,148 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
 	return sprintf(buf, "%i\n", speed);
 }
 
+static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	u32 min_rpm = 0;
+	u32 size = sizeof(min_rpm);
+	int r;
+
+	if (!adev->powerplay.pp_funcs->read_sensor)
+		return -EINVAL;
+
+	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM,
+				   (void *)&min_rpm, &size);
+	if (r)
+		return r;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", min_rpm);
+}
+
+static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	u32 max_rpm = 0;
+	u32 size = sizeof(max_rpm);
+	int r;
+
+	if (!adev->powerplay.pp_funcs->read_sensor)
+		return -EINVAL;
+
+	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM,
+				   (void *)&max_rpm, &size);
+	if (r)
+		return r;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", max_rpm);
+}
+
+static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	int err;
+	u32 rpm = 0;
+
+	/* Can't adjust fan when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
+	if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
+		err = amdgpu_dpm_get_fan_speed_rpm(adev, &rpm);
+		if (err)
+			return err;
+	}
+
+	return sprintf(buf, "%i\n", rpm);
+}
+
+static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	int err;
+	u32 value;
+	u32 pwm_mode;
+
+	pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
+	if (pwm_mode != AMD_FAN_CTRL_MANUAL)
+		return -ENODATA;
+
+	/* Can't adjust fan when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
+	err = kstrtou32(buf, 10, &value);
+	if (err)
+		return err;
+
+	if (adev->powerplay.pp_funcs->set_fan_speed_rpm) {
+		err = amdgpu_dpm_set_fan_speed_rpm(adev, value);
+		if (err)
+			return err;
+	}
+
+	return count;
+}
+
+static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
+					    struct device_attribute *attr,
+					    char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	u32 pwm_mode = 0;
+
+	if (!adev->powerplay.pp_funcs->get_fan_control_mode)
+		return -EINVAL;
+
+	pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
+
+	return sprintf(buf, "%i\n", pwm_mode == AMD_FAN_CTRL_AUTO ? 0 : 1);
+}
+
+static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
+					    struct device_attribute *attr,
+					    const char *buf,
+					    size_t count)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	int err;
+	int value;
+	u32 pwm_mode;
+
+	/* Can't adjust fan when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
+	if (!adev->powerplay.pp_funcs->set_fan_control_mode)
+		return -EINVAL;
+
+	err = kstrtoint(buf, 10, &value);
+	if (err)
+		return err;
+
+	if (value == 0)
+		pwm_mode = AMD_FAN_CTRL_AUTO;
+	else if (value == 1)
+		pwm_mode = AMD_FAN_CTRL_MANUAL;
+	else
+		return -EINVAL;
+
+	amdgpu_dpm_set_fan_control_mode(adev, pwm_mode);
+
+	return count;
+}
+
 static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
 					struct device_attribute *attr,
 					char *buf)
@@ -1360,8 +1555,16 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
  *
  * - pwm1_max: pulse width modulation fan control maximum level (255)
  *
+ * - fan1_min: an minimum value Unit: revolution/min (RPM)
+ *
+ * - fan1_max: an maxmum value Unit: revolution/max (RPM)
+ *
  * - fan1_input: fan speed in RPM
  *
+ * - fan[1-*]_target: Desired fan speed Unit: revolution/min (RPM)
+ *
+ * - fan[1-*]_enable: Enable or disable the sensors.1: Enable 0: Disable
+ *
  * You can use hwmon tools like sensors to view this information on your system.
  *
  */
@@ -1374,6 +1577,10 @@ static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_
 static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
 static SENSOR_DEVICE_ATTR(pwm1_max, S_IRUGO, amdgpu_hwmon_get_pwm1_max, NULL, 0);
 static SENSOR_DEVICE_ATTR(fan1_input, S_IRUGO, amdgpu_hwmon_get_fan1_input, NULL, 0);
+static SENSOR_DEVICE_ATTR(fan1_min, S_IRUGO, amdgpu_hwmon_get_fan1_min, NULL, 0);
+static SENSOR_DEVICE_ATTR(fan1_max, S_IRUGO, amdgpu_hwmon_get_fan1_max, NULL, 0);
+static SENSOR_DEVICE_ATTR(fan1_target, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_fan1_target, amdgpu_hwmon_set_fan1_target, 0);
+static SENSOR_DEVICE_ATTR(fan1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_fan1_enable, amdgpu_hwmon_set_fan1_enable, 0);
 static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, amdgpu_hwmon_show_vddgfx, NULL, 0);
 static SENSOR_DEVICE_ATTR(in0_label, S_IRUGO, amdgpu_hwmon_show_vddgfx_label, NULL, 0);
 static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, amdgpu_hwmon_show_vddnb, NULL, 0);
@@ -1392,6 +1599,10 @@ static struct attribute *hwmon_attributes[] = {
 	&sensor_dev_attr_pwm1_min.dev_attr.attr,
 	&sensor_dev_attr_pwm1_max.dev_attr.attr,
 	&sensor_dev_attr_fan1_input.dev_attr.attr,
+	&sensor_dev_attr_fan1_min.dev_attr.attr,
+	&sensor_dev_attr_fan1_max.dev_attr.attr,
+	&sensor_dev_attr_fan1_target.dev_attr.attr,
+	&sensor_dev_attr_fan1_enable.dev_attr.attr,
 	&sensor_dev_attr_in0_input.dev_attr.attr,
 	&sensor_dev_attr_in0_label.dev_attr.attr,
 	&sensor_dev_attr_in1_input.dev_attr.attr,
@@ -1410,13 +1621,16 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	umode_t effective_mode = attr->mode;
 
-
 	/* Skip fan attributes if fan is not present */
 	if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
 	    attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
 	    attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
 	    attr == &sensor_dev_attr_pwm1_min.dev_attr.attr ||
-	    attr == &sensor_dev_attr_fan1_input.dev_attr.attr))
+	    attr == &sensor_dev_attr_fan1_input.dev_attr.attr ||
+	    attr == &sensor_dev_attr_fan1_min.dev_attr.attr ||
+	    attr == &sensor_dev_attr_fan1_max.dev_attr.attr ||
+	    attr == &sensor_dev_attr_fan1_target.dev_attr.attr ||
+	    attr == &sensor_dev_attr_fan1_enable.dev_attr.attr))
 		return 0;
 
 	/* Skip limit attributes if DPM is not enabled */
@@ -1426,7 +1640,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	     attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
 	     attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
 	     attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
-	     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
+	     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr ||
+	     attr == &sensor_dev_attr_fan1_input.dev_attr.attr ||
+	     attr == &sensor_dev_attr_fan1_min.dev_attr.attr ||
+	     attr == &sensor_dev_attr_fan1_max.dev_attr.attr ||
+	     attr == &sensor_dev_attr_fan1_target.dev_attr.attr ||
+	     attr == &sensor_dev_attr_fan1_enable.dev_attr.attr))
 		return 0;
 
 	/* mask fan attributes if we have no bindings for this asic to expose */
@@ -1451,10 +1670,18 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	/* hide max/min values if we can't both query and manage the fan */
 	if ((!adev->powerplay.pp_funcs->set_fan_speed_percent &&
 	     !adev->powerplay.pp_funcs->get_fan_speed_percent) &&
+	     (!adev->powerplay.pp_funcs->set_fan_speed_rpm &&
+	     !adev->powerplay.pp_funcs->get_fan_speed_rpm) &&
 	    (attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
 	     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
 		return 0;
 
+	if ((!adev->powerplay.pp_funcs->set_fan_speed_rpm &&
+	     !adev->powerplay.pp_funcs->get_fan_speed_rpm) &&
+	    (attr == &sensor_dev_attr_fan1_max.dev_attr.attr ||
+	     attr == &sensor_dev_attr_fan1_min.dev_attr.attr))
+		return 0;
+
 	/* only APUs have vddnb */
 	if (!(adev->flags & AMD_IS_APU) &&
 	    (attr == &sensor_dev_attr_in1_input.dev_attr.attr ||
@@ -1719,18 +1946,6 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
 		mutex_lock(&adev->pm.mutex);
 		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
 		mutex_unlock(&adev->pm.mutex);
-	} else {
-		if (enable) {
-			mutex_lock(&adev->pm.mutex);
-			adev->pm.dpm.uvd_active = true;
-			adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD;
-			mutex_unlock(&adev->pm.mutex);
-		} else {
-			mutex_lock(&adev->pm.mutex);
-			adev->pm.dpm.uvd_active = false;
-			mutex_unlock(&adev->pm.mutex);
-		}
-		amdgpu_pm_compute_clocks(adev);
 	}
 }
 
@@ -1741,29 +1956,6 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
 		mutex_lock(&adev->pm.mutex);
 		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
 		mutex_unlock(&adev->pm.mutex);
-	} else {
-		if (enable) {
-			mutex_lock(&adev->pm.mutex);
-			adev->pm.dpm.vce_active = true;
-			/* XXX select vce level based on ring/task */
-			adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
-			mutex_unlock(&adev->pm.mutex);
-			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-							       AMD_CG_STATE_UNGATE);
-			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-							       AMD_PG_STATE_UNGATE);
-			amdgpu_pm_compute_clocks(adev);
-		} else {
-			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-							       AMD_PG_STATE_GATE);
-			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-							       AMD_CG_STATE_GATE);
-			mutex_lock(&adev->pm.mutex);
-			adev->pm.dpm.vce_active = false;
-			mutex_unlock(&adev->pm.mutex);
-			amdgpu_pm_compute_clocks(adev);
-		}
-
 	}
 }
 
@@ -1965,6 +2157,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
 static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev)
 {
 	uint32_t value;
+	uint64_t value64;
 	uint32_t query = 0;
 	int size;
 
@@ -2003,6 +2196,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
 		seq_printf(m, "GPU Load: %u %%\n", value);
 	seq_printf(m, "\n");
 
+	/* SMC feature mask */
+	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK, (void *)&value64, &size))
+		seq_printf(m, "SMC Feature Mask: 0x%016llx\n", value64);
+
 	/* UVD clocks */
 	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) {
 		if (!value) {

+ 27 - 26
drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c

@@ -35,6 +35,7 @@
 
 #include "amdgpu.h"
 #include "amdgpu_display.h"
+#include "amdgpu_gem.h"
 #include <drm/amdgpu_drm.h>
 #include <linux/dma-buf.h>
 
@@ -43,10 +44,10 @@ static const struct dma_buf_ops amdgpu_dmabuf_ops;
 /**
  * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
  * implementation
- * @obj: GEM buffer object
+ * @obj: GEM buffer object (BO)
  *
  * Returns:
- * A scatter/gather table for the pinned pages of the buffer object's memory.
+ * A scatter/gather table for the pinned pages of the BO's memory.
  */
 struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
 {
@@ -58,9 +59,9 @@ struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
 
 /**
  * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation
- * @obj: GEM buffer object
+ * @obj: GEM BO
  *
- * Sets up an in-kernel virtual mapping of the buffer object's memory.
+ * Sets up an in-kernel virtual mapping of the BO's memory.
  *
  * Returns:
  * The virtual address of the mapping or an error pointer.
@@ -80,10 +81,10 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj)
 
 /**
  * amdgpu_gem_prime_vunmap - &dma_buf_ops.vunmap implementation
- * @obj: GEM buffer object
- * @vaddr: virtual address (unused)
+ * @obj: GEM BO
+ * @vaddr: Virtual address (unused)
  *
- * Tears down the in-kernel virtual mapping of the buffer object's memory.
+ * Tears down the in-kernel virtual mapping of the BO's memory.
  */
 void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
 {
@@ -94,14 +95,14 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
 
 /**
  * amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation
- * @obj: GEM buffer object
- * @vma: virtual memory area
+ * @obj: GEM BO
+ * @vma: Virtual memory area
  *
- * Sets up a userspace mapping of the buffer object's memory in the given
+ * Sets up a userspace mapping of the BO's memory in the given
  * virtual memory area.
  *
  * Returns:
- * 0 on success or negative error code.
+ * 0 on success or a negative error code on failure.
  */
 int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
 {
@@ -144,10 +145,10 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma
  * @attach: DMA-buf attachment
  * @sg: Scatter/gather table
  *
- * Import shared DMA buffer memory exported by another device.
+ * Imports shared DMA buffer memory exported by another device.
  *
  * Returns:
- * A new GEM buffer object of the given DRM device, representing the memory
+ * A new GEM BO of the given DRM device, representing the memory
  * described by the given DMA-buf attachment and scatter/gather table.
  */
 struct drm_gem_object *
@@ -190,7 +191,7 @@ error:
 
 /**
  * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation
- * @dma_buf: shared DMA buffer
+ * @dma_buf: Shared DMA buffer
  * @attach: DMA-buf attachment
  *
  * Makes sure that the shared DMA buffer can be accessed by the target device.
@@ -198,7 +199,7 @@ error:
  * all DMA devices.
  *
  * Returns:
- * 0 on success or negative error code.
+ * 0 on success or a negative error code on failure.
  */
 static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
 				 struct dma_buf_attachment *attach)
@@ -250,11 +251,11 @@ error_detach:
 
 /**
  * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation
- * @dma_buf: shared DMA buffer
+ * @dma_buf: Shared DMA buffer
  * @attach: DMA-buf attachment
  *
  * This is called when a shared DMA buffer no longer needs to be accessible by
- * the other device. For now, simply unpins the buffer from GTT.
+ * another device. For now, simply unpins the buffer from GTT.
  */
 static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
 				  struct dma_buf_attachment *attach)
@@ -279,10 +280,10 @@ error:
 
 /**
  * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation
- * @obj: GEM buffer object
+ * @obj: GEM BO
  *
  * Returns:
- * The buffer object's reservation object.
+ * The BO's reservation object.
  */
 struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
 {
@@ -293,15 +294,15 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
 
 /**
  * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
- * @dma_buf: shared DMA buffer
- * @direction: direction of DMA transfer
+ * @dma_buf: Shared DMA buffer
+ * @direction: Direction of DMA transfer
  *
  * This is called before CPU access to the shared DMA buffer's memory. If it's
  * a read access, the buffer is moved to the GTT domain if possible, for optimal
  * CPU read performance.
  *
  * Returns:
- * 0 on success or negative error code.
+ * 0 on success or a negative error code on failure.
  */
 static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
 				       enum dma_data_direction direction)
@@ -348,14 +349,14 @@ static const struct dma_buf_ops amdgpu_dmabuf_ops = {
 /**
  * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
  * @dev: DRM device
- * @gobj: GEM buffer object
- * @flags: flags like DRM_CLOEXEC and DRM_RDWR
+ * @gobj: GEM BO
+ * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.
  *
  * The main work is done by the &drm_gem_prime_export helper, which in turn
  * uses &amdgpu_gem_prime_res_obj.
  *
  * Returns:
- * Shared DMA buffer representing the GEM buffer object from the given device.
+ * Shared DMA buffer representing the GEM BO from the given device.
  */
 struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 					struct drm_gem_object *gobj,
@@ -386,7 +387,7 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
  * uses &amdgpu_gem_prime_import_sg_table.
  *
  * Returns:
- * GEM buffer object representing the shared DMA buffer for the given device.
+ * GEM BO representing the shared DMA buffer for the given device.
  */
 struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
 					    struct dma_buf *dma_buf)

+ 24 - 5
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c

@@ -31,6 +31,7 @@
 #include "soc15_common.h"
 #include "psp_v3_1.h"
 #include "psp_v10_0.h"
+#include "psp_v11_0.h"
 
 static void psp_set_funcs(struct amdgpu_device *adev);
 
@@ -52,12 +53,14 @@ static int psp_sw_init(void *handle)
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
-	case CHIP_VEGA20:
 		psp_v3_1_set_psp_funcs(psp);
 		break;
 	case CHIP_RAVEN:
 		psp_v10_0_set_psp_funcs(psp);
 		break;
+	case CHIP_VEGA20:
+		psp_v11_0_set_psp_funcs(psp);
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -131,6 +134,13 @@ psp_cmd_submit_buf(struct psp_context *psp,
 		msleep(1);
 	}
 
+	/* the status field must be 0 after FW is loaded */
+	if (ucode && psp->cmd_buf_mem->resp.status) {
+		DRM_ERROR("failed loading with status (%d) and ucode id (%d)\n",
+			  psp->cmd_buf_mem->resp.status, ucode->ucode_id);
+		return -EINVAL;
+	}
+
 	if (ucode) {
 		ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo;
 		ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi;
@@ -160,7 +170,7 @@ static int psp_tmr_init(struct psp_context *psp)
 	 * Note: this memory need be reserved till the driver
 	 * uninitializes.
 	 */
-	ret = amdgpu_bo_create_kernel(psp->adev, 0x300000, 0x100000,
+	ret = amdgpu_bo_create_kernel(psp->adev, PSP_TMR_SIZE, 0x100000,
 				      AMDGPU_GEM_DOMAIN_VRAM,
 				      &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
 
@@ -176,7 +186,9 @@ static int psp_tmr_load(struct psp_context *psp)
 	if (!cmd)
 		return -ENOMEM;
 
-	psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, 0x300000);
+	psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, PSP_TMR_SIZE);
+	DRM_INFO("reserve 0x%x from 0x%llx for PSP TMR SIZE\n",
+			PSP_TMR_SIZE, psp->tmr_mc_addr);
 
 	ret = psp_cmd_submit_buf(psp, NULL, cmd,
 				 psp->fence_buf_mc_addr, 1);
@@ -440,8 +452,6 @@ static int psp_hw_fini(void *handle)
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
 		return 0;
 
-	amdgpu_ucode_fini_bo(adev);
-
 	psp_ring_destroy(psp, PSP_RING_TYPE__KM);
 
 	amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
@@ -594,3 +604,12 @@ const struct amdgpu_ip_block_version psp_v10_0_ip_block =
 	.rev = 0,
 	.funcs = &psp_ip_funcs,
 };
+
+const struct amdgpu_ip_block_version psp_v11_0_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_PSP,
+	.major = 11,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &psp_ip_funcs,
+};

+ 53 - 12
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h

@@ -32,8 +32,10 @@
 #define PSP_CMD_BUFFER_SIZE	0x1000
 #define PSP_ASD_SHARED_MEM_SIZE	0x4000
 #define PSP_1_MEG		0x100000
+#define PSP_TMR_SIZE	0x400000
 
 struct psp_context;
+struct psp_xgmi_topology_info;
 
 enum psp_ring_type
 {
@@ -63,18 +65,27 @@ struct psp_funcs
 	int (*prep_cmd_buf)(struct amdgpu_firmware_info *ucode,
 			    struct psp_gfx_cmd_resp *cmd);
 	int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type);
-	int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type);
+	int (*ring_create)(struct psp_context *psp,
+			   enum psp_ring_type ring_type);
 	int (*ring_stop)(struct psp_context *psp,
 			    enum psp_ring_type ring_type);
 	int (*ring_destroy)(struct psp_context *psp,
 			    enum psp_ring_type ring_type);
-	int (*cmd_submit)(struct psp_context *psp, struct amdgpu_firmware_info *ucode,
-			  uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index);
+	int (*cmd_submit)(struct psp_context *psp,
+			  struct amdgpu_firmware_info *ucode,
+			  uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
+			  int index);
 	bool (*compare_sram_data)(struct psp_context *psp,
 				  struct amdgpu_firmware_info *ucode,
 				  enum AMDGPU_UCODE_ID ucode_type);
 	bool (*smu_reload_quirk)(struct psp_context *psp);
 	int (*mode1_reset)(struct psp_context *psp);
+	uint64_t (*xgmi_get_device_id)(struct psp_context *psp);
+	uint64_t (*xgmi_get_hive_id)(struct psp_context *psp);
+	int (*xgmi_get_topology_info)(struct psp_context *psp, int number_devices,
+			struct psp_xgmi_topology_info *topology);
+	int (*xgmi_set_topology_info)(struct psp_context *psp, int number_devices,
+			struct psp_xgmi_topology_info *topology);
 };
 
 struct psp_context
@@ -83,11 +94,11 @@ struct psp_context
 	struct psp_ring                 km_ring;
 	struct psp_gfx_cmd_resp		*cmd;
 
-	const struct psp_funcs 		*funcs;
+	const struct psp_funcs		*funcs;
 
 	/* fence buffer */
-	struct amdgpu_bo 		*fw_pri_bo;
-	uint64_t 			fw_pri_mc_addr;
+	struct amdgpu_bo		*fw_pri_bo;
+	uint64_t			fw_pri_mc_addr;
 	void				*fw_pri_buf;
 
 	/* sos firmware */
@@ -100,8 +111,8 @@ struct psp_context
 	uint8_t				*sos_start_addr;
 
 	/* tmr buffer */
-	struct amdgpu_bo 		*tmr_bo;
-	uint64_t 			tmr_mc_addr;
+	struct amdgpu_bo		*tmr_bo;
+	uint64_t			tmr_mc_addr;
 	void				*tmr_buf;
 
 	/* asd firmware and buffer */
@@ -110,13 +121,13 @@ struct psp_context
 	uint32_t			asd_feature_version;
 	uint32_t			asd_ucode_size;
 	uint8_t				*asd_start_addr;
-	struct amdgpu_bo 		*asd_shared_bo;
-	uint64_t 			asd_shared_mc_addr;
+	struct amdgpu_bo		*asd_shared_bo;
+	uint64_t			asd_shared_mc_addr;
 	void				*asd_shared_buf;
 
 	/* fence buffer */
-	struct amdgpu_bo 		*fence_buf_bo;
-	uint64_t 			fence_buf_mc_addr;
+	struct amdgpu_bo		*fence_buf_bo;
+	uint64_t			fence_buf_mc_addr;
 	void				*fence_buf;
 
 	/* cmd buffer */
@@ -130,6 +141,23 @@ struct amdgpu_psp_funcs {
 					enum AMDGPU_UCODE_ID);
 };
 
+struct psp_xgmi_topology_info {
+	/* Generated by PSP to identify the GPU instance within xgmi connection */
+	uint64_t			device_id;
+	/*
+	 * If all bits set to 0 , driver indicates it wants to retrieve the xgmi
+	 * connection vector topology, but not access enable the connections
+	 * if some or all bits are set to 1, driver indicates it want to retrieve the
+	 * current xgmi topology and  access enable the link to GPU[i] associated
+	 * with the bit position in the  vector.
+	 * On return,: bits indicated which xgmi links are present/active depending
+	 * on the  value passed in. The relative bit offset for the  relative GPU index
+	 * within the  hive is always marked active.
+	 */
+	uint32_t			connection_mask;
+	uint32_t			reserved; /* must be  0 */
+};
+
 #define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type))
 #define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
 #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
@@ -149,6 +177,18 @@ struct amdgpu_psp_funcs {
 		((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false)
 #define psp_mode1_reset(psp) \
 		((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false)
+#define psp_xgmi_get_device_id(psp) \
+		((psp)->funcs->xgmi_get_device_id ? (psp)->funcs->xgmi_get_device_id((psp)) : 0)
+#define psp_xgmi_get_hive_id(psp) \
+		((psp)->funcs->xgmi_get_hive_id ? (psp)->funcs->xgmi_get_hive_id((psp)) : 0)
+#define psp_xgmi_get_topology_info(psp, num_device, topology) \
+		((psp)->funcs->xgmi_get_topology_info ? \
+		(psp)->funcs->xgmi_get_topology_info((psp), (num_device), (topology)) : -EINVAL)
+#define psp_xgmi_set_topology_info(psp, num_device, topology) \
+		((psp)->funcs->xgmi_set_topology_info ?	 \
+		(psp)->funcs->xgmi_set_topology_info((psp), (num_device), (topology)) : -EINVAL)
+
+#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
 
 extern const struct amd_ip_funcs psp_ip_funcs;
 
@@ -159,5 +199,6 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
 extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
 
 int psp_gpu_reset(struct amdgpu_device *adev);
+extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
 
 #endif

+ 0 - 316
drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c

@@ -1,316 +0,0 @@
-/*
- * Copyright 2017 Valve Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Andres Rodriguez
- */
-
-#include "amdgpu.h"
-#include "amdgpu_ring.h"
-
-static int amdgpu_queue_mapper_init(struct amdgpu_queue_mapper *mapper,
-				    int hw_ip)
-{
-	if (!mapper)
-		return -EINVAL;
-
-	if (hw_ip > AMDGPU_MAX_IP_NUM)
-		return -EINVAL;
-
-	mapper->hw_ip = hw_ip;
-	mutex_init(&mapper->lock);
-
-	memset(mapper->queue_map, 0, sizeof(mapper->queue_map));
-
-	return 0;
-}
-
-static struct amdgpu_ring *amdgpu_get_cached_map(struct amdgpu_queue_mapper *mapper,
-					  int ring)
-{
-	return mapper->queue_map[ring];
-}
-
-static int amdgpu_update_cached_map(struct amdgpu_queue_mapper *mapper,
-			     int ring, struct amdgpu_ring *pring)
-{
-	if (WARN_ON(mapper->queue_map[ring])) {
-		DRM_ERROR("Un-expected ring re-map\n");
-		return -EINVAL;
-	}
-
-	mapper->queue_map[ring] = pring;
-
-	return 0;
-}
-
-static int amdgpu_identity_map(struct amdgpu_device *adev,
-			       struct amdgpu_queue_mapper *mapper,
-			       u32 ring,
-			       struct amdgpu_ring **out_ring)
-{
-	switch (mapper->hw_ip) {
-	case AMDGPU_HW_IP_GFX:
-		*out_ring = &adev->gfx.gfx_ring[ring];
-		break;
-	case AMDGPU_HW_IP_COMPUTE:
-		*out_ring = &adev->gfx.compute_ring[ring];
-		break;
-	case AMDGPU_HW_IP_DMA:
-		*out_ring = &adev->sdma.instance[ring].ring;
-		break;
-	case AMDGPU_HW_IP_UVD:
-		*out_ring = &adev->uvd.inst[0].ring;
-		break;
-	case AMDGPU_HW_IP_VCE:
-		*out_ring = &adev->vce.ring[ring];
-		break;
-	case AMDGPU_HW_IP_UVD_ENC:
-		*out_ring = &adev->uvd.inst[0].ring_enc[ring];
-		break;
-	case AMDGPU_HW_IP_VCN_DEC:
-		*out_ring = &adev->vcn.ring_dec;
-		break;
-	case AMDGPU_HW_IP_VCN_ENC:
-		*out_ring = &adev->vcn.ring_enc[ring];
-		break;
-	case AMDGPU_HW_IP_VCN_JPEG:
-		*out_ring = &adev->vcn.ring_jpeg;
-		break;
-	default:
-		*out_ring = NULL;
-		DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
-		return -EINVAL;
-	}
-
-	return amdgpu_update_cached_map(mapper, ring, *out_ring);
-}
-
-static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip)
-{
-	switch (hw_ip) {
-	case AMDGPU_HW_IP_GFX:
-		return AMDGPU_RING_TYPE_GFX;
-	case AMDGPU_HW_IP_COMPUTE:
-		return AMDGPU_RING_TYPE_COMPUTE;
-	case AMDGPU_HW_IP_DMA:
-		return AMDGPU_RING_TYPE_SDMA;
-	case AMDGPU_HW_IP_UVD:
-		return AMDGPU_RING_TYPE_UVD;
-	case AMDGPU_HW_IP_VCE:
-		return AMDGPU_RING_TYPE_VCE;
-	default:
-		DRM_ERROR("Invalid HW IP specified %d\n", hw_ip);
-		return -1;
-	}
-}
-
-static int amdgpu_lru_map(struct amdgpu_device *adev,
-			  struct amdgpu_queue_mapper *mapper,
-			  u32 user_ring, bool lru_pipe_order,
-			  struct amdgpu_ring **out_ring)
-{
-	int r, i, j;
-	int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip);
-	int ring_blacklist[AMDGPU_MAX_RINGS];
-	struct amdgpu_ring *ring;
-
-	/* 0 is a valid ring index, so initialize to -1 */
-	memset(ring_blacklist, 0xff, sizeof(ring_blacklist));
-
-	for (i = 0, j = 0; i < AMDGPU_MAX_RINGS; i++) {
-		ring = mapper->queue_map[i];
-		if (ring)
-			ring_blacklist[j++] = ring->idx;
-	}
-
-	r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist,
-				j, lru_pipe_order, out_ring);
-	if (r)
-		return r;
-
-	return amdgpu_update_cached_map(mapper, user_ring, *out_ring);
-}
-
-/**
- * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct
- *
- * @adev: amdgpu_device pointer
- * @mgr: amdgpu_queue_mgr structure holding queue information
- *
- * Initialize the the selected @mgr (all asics).
- *
- * Returns 0 on success, error on failure.
- */
-int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
-			  struct amdgpu_queue_mgr *mgr)
-{
-	int i, r;
-
-	if (!adev || !mgr)
-		return -EINVAL;
-
-	memset(mgr, 0, sizeof(*mgr));
-
-	for (i = 0; i < AMDGPU_MAX_IP_NUM; ++i) {
-		r = amdgpu_queue_mapper_init(&mgr->mapper[i], i);
-		if (r)
-			return r;
-	}
-
-	return 0;
-}
-
-/**
- * amdgpu_queue_mgr_fini - de-initialize an amdgpu_queue_mgr struct
- *
- * @adev: amdgpu_device pointer
- * @mgr: amdgpu_queue_mgr structure holding queue information
- *
- * De-initialize the the selected @mgr (all asics).
- *
- * Returns 0 on success, error on failure.
- */
-int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
-			  struct amdgpu_queue_mgr *mgr)
-{
-	return 0;
-}
-
-/**
- * amdgpu_queue_mgr_map - Map a userspace ring id to an amdgpu_ring
- *
- * @adev: amdgpu_device pointer
- * @mgr: amdgpu_queue_mgr structure holding queue information
- * @hw_ip: HW IP enum
- * @instance: HW instance
- * @ring: user ring id
- * @our_ring: pointer to mapped amdgpu_ring
- *
- * Map a userspace ring id to an appropriate kernel ring. Different
- * policies are configurable at a HW IP level.
- *
- * Returns 0 on success, error on failure.
- */
-int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
-			 struct amdgpu_queue_mgr *mgr,
-			 u32 hw_ip, u32 instance, u32 ring,
-			 struct amdgpu_ring **out_ring)
-{
-	int i, r, ip_num_rings = 0;
-	struct amdgpu_queue_mapper *mapper = &mgr->mapper[hw_ip];
-
-	if (!adev || !mgr || !out_ring)
-		return -EINVAL;
-
-	if (hw_ip >= AMDGPU_MAX_IP_NUM)
-		return -EINVAL;
-
-	if (ring >= AMDGPU_MAX_RINGS)
-		return -EINVAL;
-
-	/* Right now all IPs have only one instance - multiple rings. */
-	if (instance != 0) {
-		DRM_DEBUG("invalid ip instance: %d\n", instance);
-		return -EINVAL;
-	}
-
-	switch (hw_ip) {
-	case AMDGPU_HW_IP_GFX:
-		ip_num_rings = adev->gfx.num_gfx_rings;
-		break;
-	case AMDGPU_HW_IP_COMPUTE:
-		ip_num_rings = adev->gfx.num_compute_rings;
-		break;
-	case AMDGPU_HW_IP_DMA:
-		ip_num_rings = adev->sdma.num_instances;
-		break;
-	case AMDGPU_HW_IP_UVD:
-		for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
-			if (!(adev->uvd.harvest_config & (1 << i)))
-				ip_num_rings++;
-		}
-		break;
-	case AMDGPU_HW_IP_VCE:
-		ip_num_rings = adev->vce.num_rings;
-		break;
-	case AMDGPU_HW_IP_UVD_ENC:
-		for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
-			if (!(adev->uvd.harvest_config & (1 << i)))
-				ip_num_rings++;
-		}
-		ip_num_rings =
-			adev->uvd.num_enc_rings * ip_num_rings;
-		break;
-	case AMDGPU_HW_IP_VCN_DEC:
-		ip_num_rings = 1;
-		break;
-	case AMDGPU_HW_IP_VCN_ENC:
-		ip_num_rings = adev->vcn.num_enc_rings;
-		break;
-	case AMDGPU_HW_IP_VCN_JPEG:
-		ip_num_rings = 1;
-		break;
-	default:
-		DRM_DEBUG("unknown ip type: %d\n", hw_ip);
-		return -EINVAL;
-	}
-
-	if (ring >= ip_num_rings) {
-		DRM_DEBUG("Ring index:%d exceeds maximum:%d for ip:%d\n",
-			  ring, ip_num_rings, hw_ip);
-		return -EINVAL;
-	}
-
-	mutex_lock(&mapper->lock);
-
-	*out_ring = amdgpu_get_cached_map(mapper, ring);
-	if (*out_ring) {
-		/* cache hit */
-		r = 0;
-		goto out_unlock;
-	}
-
-	switch (mapper->hw_ip) {
-	case AMDGPU_HW_IP_GFX:
-	case AMDGPU_HW_IP_UVD:
-	case AMDGPU_HW_IP_VCE:
-	case AMDGPU_HW_IP_UVD_ENC:
-	case AMDGPU_HW_IP_VCN_DEC:
-	case AMDGPU_HW_IP_VCN_ENC:
-	case AMDGPU_HW_IP_VCN_JPEG:
-		r = amdgpu_identity_map(adev, mapper, ring, out_ring);
-		break;
-	case AMDGPU_HW_IP_DMA:
-		r = amdgpu_lru_map(adev, mapper, ring, false, out_ring);
-		break;
-	case AMDGPU_HW_IP_COMPUTE:
-		r = amdgpu_lru_map(adev, mapper, ring, true, out_ring);
-		break;
-	default:
-		*out_ring = NULL;
-		r = -EINVAL;
-		DRM_DEBUG("unknown HW IP type: %d\n", mapper->hw_ip);
-	}
-
-out_unlock:
-	mutex_unlock(&mapper->lock);
-	return r;
-}

+ 25 - 98
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c

@@ -135,9 +135,6 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
 
 	if (ring->funcs->end_use)
 		ring->funcs->end_use(ring);
-
-	if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ)
-		amdgpu_ring_lru_touch(ring->adev, ring);
 }
 
 /**
@@ -320,8 +317,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 	ring->max_dw = max_dw;
 	ring->priority = DRM_SCHED_PRIORITY_NORMAL;
 	mutex_init(&ring->priority_mutex);
-	INIT_LIST_HEAD(&ring->lru_list);
-	amdgpu_ring_lru_touch(adev, ring);
 
 	for (i = 0; i < DRM_SCHED_PRIORITY_MAX; ++i)
 		atomic_set(&ring->num_jobs[i], 0);
@@ -368,99 +363,6 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 	ring->adev->rings[ring->idx] = NULL;
 }
 
-static void amdgpu_ring_lru_touch_locked(struct amdgpu_device *adev,
-					 struct amdgpu_ring *ring)
-{
-	/* list_move_tail handles the case where ring isn't part of the list */
-	list_move_tail(&ring->lru_list, &adev->ring_lru_list);
-}
-
-static bool amdgpu_ring_is_blacklisted(struct amdgpu_ring *ring,
-				       int *blacklist, int num_blacklist)
-{
-	int i;
-
-	for (i = 0; i < num_blacklist; i++) {
-		if (ring->idx == blacklist[i])
-			return true;
-	}
-
-	return false;
-}
-
-/**
- * amdgpu_ring_lru_get - get the least recently used ring for a HW IP block
- *
- * @adev: amdgpu_device pointer
- * @type: amdgpu_ring_type enum
- * @blacklist: blacklisted ring ids array
- * @num_blacklist: number of entries in @blacklist
- * @lru_pipe_order: find a ring from the least recently used pipe
- * @ring: output ring
- *
- * Retrieve the amdgpu_ring structure for the least recently used ring of
- * a specific IP block (all asics).
- * Returns 0 on success, error on failure.
- */
-int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
-			int *blacklist,	int num_blacklist,
-			bool lru_pipe_order, struct amdgpu_ring **ring)
-{
-	struct amdgpu_ring *entry;
-
-	/* List is sorted in LRU order, find first entry corresponding
-	 * to the desired HW IP */
-	*ring = NULL;
-	spin_lock(&adev->ring_lru_list_lock);
-	list_for_each_entry(entry, &adev->ring_lru_list, lru_list) {
-		if (entry->funcs->type != type)
-			continue;
-
-		if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist))
-			continue;
-
-		if (!*ring) {
-			*ring = entry;
-
-			/* We are done for ring LRU */
-			if (!lru_pipe_order)
-				break;
-		}
-
-		/* Move all rings on the same pipe to the end of the list */
-		if (entry->pipe == (*ring)->pipe)
-			amdgpu_ring_lru_touch_locked(adev, entry);
-	}
-
-	/* Move the ring we found to the end of the list */
-	if (*ring)
-		amdgpu_ring_lru_touch_locked(adev, *ring);
-
-	spin_unlock(&adev->ring_lru_list_lock);
-
-	if (!*ring) {
-		DRM_ERROR("Ring LRU contains no entries for ring type:%d\n", type);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/**
- * amdgpu_ring_lru_touch - mark a ring as recently being used
- *
- * @adev: amdgpu_device pointer
- * @ring: ring to touch
- *
- * Move @ring to the tail of the lru list
- */
-void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring)
-{
-	spin_lock(&adev->ring_lru_list_lock);
-	amdgpu_ring_lru_touch_locked(adev, ring);
-	spin_unlock(&adev->ring_lru_list_lock);
-}
-
 /**
  * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper
  *
@@ -481,6 +383,31 @@ void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
 	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
 }
 
+/**
+ * amdgpu_ring_soft_recovery - try to soft recover a ring lockup
+ *
+ * @ring: ring to try the recovery on
+ * @vmid: VMID we try to get going again
+ * @fence: timedout fence
+ *
+ * Tries to get a ring proceeding again when it is stuck.
+ */
+bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
+			       struct dma_fence *fence)
+{
+	ktime_t deadline = ktime_add_us(ktime_get(), 10000);
+
+	if (!ring->funcs->soft_recovery)
+		return false;
+
+	atomic_inc(&ring->adev->gpu_reset_counter);
+	while (!dma_fence_is_signaled(fence) &&
+	       ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0)
+		ring->funcs->soft_recovery(ring, vmid);
+
+	return dma_fence_is_signaled(fence);
+}
+
 /*
  * Debugfs info
  */

+ 29 - 6
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

@@ -97,7 +97,7 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
 int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,
 		      unsigned flags);
 int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
-void amdgpu_fence_process(struct amdgpu_ring *ring);
+bool amdgpu_fence_process(struct amdgpu_ring *ring);
 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
 signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
 				      uint32_t wait_seq,
@@ -168,6 +168,8 @@ struct amdgpu_ring_funcs {
 	/* priority functions */
 	void (*set_priority) (struct amdgpu_ring *ring,
 			      enum drm_sched_priority priority);
+	/* Try to soft recover the ring to make the fence signal */
+	void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
 };
 
 struct amdgpu_ring {
@@ -175,7 +177,6 @@ struct amdgpu_ring {
 	const struct amdgpu_ring_funcs	*funcs;
 	struct amdgpu_fence_driver	fence_drv;
 	struct drm_gpu_scheduler	sched;
-	struct list_head		lru_list;
 
 	struct amdgpu_bo	*ring_obj;
 	volatile uint32_t	*ring;
@@ -221,6 +222,30 @@ struct amdgpu_ring {
 #endif
 };
 
+#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
+#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib)))
+#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
+#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
+#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
+#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
+#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
+#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c))
+#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
+#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
+#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
+#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
+#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
+#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
+#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
+#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
+#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
+#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
+#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
+#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
+#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
+#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
+#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
+
 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
@@ -234,13 +259,11 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     unsigned ring_size, struct amdgpu_irq_src *irq_src,
 		     unsigned irq_type);
 void amdgpu_ring_fini(struct amdgpu_ring *ring);
-int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
-			int *blacklist, int num_blacklist,
-			bool lru_pipe_order, struct amdgpu_ring **ring);
-void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
 void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
 						uint32_t reg0, uint32_t val0,
 						uint32_t reg1, uint32_t val1);
+bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
+			       struct dma_fence *fence);
 
 static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
 {

+ 3 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c

@@ -226,6 +226,8 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
 	for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
 		struct amdgpu_sa_bo *sa_bo;
 
+		fences[i] = NULL;
+
 		if (list_empty(&sa_manager->flist[i]))
 			continue;
 
@@ -296,10 +298,8 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 
 	spin_lock(&sa_manager->wq.lock);
 	do {
-		for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
-			fences[i] = NULL;
+		for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
 			tries[i] = 0;
-		}
 
 		do {
 			amdgpu_sa_bo_try_free(sa_manager);

+ 44 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c

@@ -0,0 +1,44 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <drm/drmP.h>
+#include "amdgpu.h"
+#include "amdgpu_sdma.h"
+
+/*
+ * GPU SDMA IP block helpers function.
+ */
+
+struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	int i;
+
+	for (i = 0; i < adev->sdma.num_instances; i++)
+		if (&adev->sdma.instance[i].ring == ring)
+			break;
+
+	if (i < AMDGPU_MAX_SDMA_INSTANCES)
+		return &adev->sdma.instance[i];
+	else
+		return NULL;
+}

+ 97 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h

@@ -0,0 +1,97 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_SDMA_H__
+#define __AMDGPU_SDMA_H__
+
+/* max number of IP instances */
+#define AMDGPU_MAX_SDMA_INSTANCES		2
+
+enum amdgpu_sdma_irq {
+	AMDGPU_SDMA_IRQ_TRAP0 = 0,
+	AMDGPU_SDMA_IRQ_TRAP1,
+
+	AMDGPU_SDMA_IRQ_LAST
+};
+
+struct amdgpu_sdma_instance {
+	/* SDMA firmware */
+	const struct firmware	*fw;
+	uint32_t		fw_version;
+	uint32_t		feature_version;
+
+	struct amdgpu_ring	ring;
+	bool			burst_nop;
+};
+
+struct amdgpu_sdma {
+	struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
+	struct amdgpu_irq_src	trap_irq;
+	struct amdgpu_irq_src	illegal_inst_irq;
+	int			num_instances;
+	uint32_t                    srbm_soft_reset;
+};
+
+/*
+ * Provided by hw blocks that can move/clear data.  e.g., gfx or sdma
+ * But currently, we use sdma to move data.
+ */
+struct amdgpu_buffer_funcs {
+	/* maximum bytes in a single operation */
+	uint32_t	copy_max_bytes;
+
+	/* number of dw to reserve per operation */
+	unsigned	copy_num_dw;
+
+	/* used for buffer migration */
+	void (*emit_copy_buffer)(struct amdgpu_ib *ib,
+				 /* src addr in bytes */
+				 uint64_t src_offset,
+				 /* dst addr in bytes */
+				 uint64_t dst_offset,
+				 /* number of byte to transfer */
+				 uint32_t byte_count);
+
+	/* maximum bytes in a single operation */
+	uint32_t	fill_max_bytes;
+
+	/* number of dw to reserve per operation */
+	unsigned	fill_num_dw;
+
+	/* used for buffer clearing */
+	void (*emit_fill_buffer)(struct amdgpu_ib *ib,
+				 /* value to write to memory */
+				 uint32_t src_data,
+				 /* dst addr in bytes */
+				 uint64_t dst_offset,
+				 /* number of byte to fill */
+				 uint32_t byte_count);
+};
+
+#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib),  (s), (d), (b))
+#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
+
+struct amdgpu_sdma_instance *
+amdgpu_get_sdma_instance(struct amdgpu_ring *ring);
+
+#endif

+ 27 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h

@@ -103,7 +103,7 @@ TRACE_EVENT(amdgpu_iv,
 			   __entry->src_data[2] = iv->src_data[2];
 			   __entry->src_data[3] = iv->src_data[3];
 			   ),
-	    TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x\n",
+	    TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x",
 		      __entry->client_id, __entry->src_id,
 		      __entry->ring_id, __entry->vmid,
 		      __entry->timestamp, __entry->pasid,
@@ -150,10 +150,10 @@ TRACE_EVENT(amdgpu_cs,
 
 	    TP_fast_assign(
 			   __entry->bo_list = p->bo_list;
-			   __entry->ring = p->ring->idx;
+			   __entry->ring = to_amdgpu_ring(p->entity->rq->sched)->idx;
 			   __entry->dw = p->job->ibs[i].length_dw;
 			   __entry->fences = amdgpu_fence_count_emitted(
-				p->ring);
+				to_amdgpu_ring(p->entity->rq->sched));
 			   ),
 	    TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
 		      __entry->bo_list, __entry->ring, __entry->dw,
@@ -462,6 +462,30 @@ TRACE_EVENT(amdgpu_bo_move,
 			__entry->new_placement, __entry->bo_size)
 );
 
+TRACE_EVENT(amdgpu_ib_pipe_sync,
+	    TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence),
+	    TP_ARGS(sched_job, fence),
+	    TP_STRUCT__entry(
+			     __field(const char *,name)
+			     __field(uint64_t, id)
+			     __field(struct dma_fence *, fence)
+			     __field(uint64_t, ctx)
+			     __field(unsigned, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->name = sched_job->base.sched->name;
+			   __entry->id = sched_job->base.id;
+			   __entry->fence = fence;
+			   __entry->ctx = fence->context;
+			   __entry->seqno = fence->seqno;
+			   ),
+	    TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, context=%llu, seq=%u",
+		      __entry->name, __entry->id,
+		      __entry->fence, __entry->ctx,
+		      __entry->seqno)
+);
+
 #undef AMDGPU_JOB_GET_TIMELINE_NAME
 #endif
 

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c

@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: MIT
 /* Copyright Red Hat Inc 2010.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a

+ 133 - 91
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

@@ -47,6 +47,7 @@
 #include "amdgpu_object.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_sdma.h"
 #include "bif/bif_4_1_d.h"
 
 #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
@@ -255,6 +256,13 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 
 	abo = ttm_to_amdgpu_bo(bo);
 	switch (bo->mem.mem_type) {
+	case AMDGPU_PL_GDS:
+	case AMDGPU_PL_GWS:
+	case AMDGPU_PL_OA:
+		placement->num_placement = 0;
+		placement->num_busy_placement = 0;
+		return;
+
 	case TTM_PL_VRAM:
 		if (!adev->mman.buffer_funcs_enabled) {
 			/* Move to system memory */
@@ -282,6 +290,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 	case TTM_PL_TT:
 	default:
 		amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
+		break;
 	}
 	*placement = abo->placement;
 }
@@ -344,7 +353,7 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
 {
 	uint64_t addr = 0;
 
-	if (mem->mem_type != TTM_PL_TT || amdgpu_gtt_mgr_has_gart_addr(mem)) {
+	if (mm_node->start != AMDGPU_BO_INVALID_OFFSET) {
 		addr = mm_node->start << PAGE_SHIFT;
 		addr += bo->bdev->man[mem->mem_type].gpu_offset;
 	}
@@ -432,8 +441,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 		/* Map only what needs to be accessed. Map src to window 0 and
 		 * dst to window 1
 		 */
-		if (src->mem->mem_type == TTM_PL_TT &&
-		    !amdgpu_gtt_mgr_has_gart_addr(src->mem)) {
+		if (src->mem->start == AMDGPU_BO_INVALID_OFFSET) {
 			r = amdgpu_map_buffer(src->bo, src->mem,
 					PFN_UP(cur_size + src_page_offset),
 					src_node_start, 0, ring,
@@ -446,8 +454,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 			from += src_page_offset;
 		}
 
-		if (dst->mem->mem_type == TTM_PL_TT &&
-		    !amdgpu_gtt_mgr_has_gart_addr(dst->mem)) {
+		if (dst->mem->start == AMDGPU_BO_INVALID_OFFSET) {
 			r = amdgpu_map_buffer(dst->bo, dst->mem,
 					PFN_UP(cur_size + dst_page_offset),
 					dst_node_start, 1, ring,
@@ -525,7 +532,11 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 	if (r)
 		goto error;
 
-	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
+	/* Always block for VM page tables before committing the new location */
+	if (bo->type == ttm_bo_type_kernel)
+		r = ttm_bo_move_accel_cleanup(bo, fence, true, new_mem);
+	else
+		r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
 	dma_fence_put(fence);
 	return r;
 
@@ -676,6 +687,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 		amdgpu_move_null(bo, new_mem);
 		return 0;
 	}
+	if (old_mem->mem_type == AMDGPU_PL_GDS ||
+	    old_mem->mem_type == AMDGPU_PL_GWS ||
+	    old_mem->mem_type == AMDGPU_PL_OA ||
+	    new_mem->mem_type == AMDGPU_PL_GDS ||
+	    new_mem->mem_type == AMDGPU_PL_GWS ||
+	    new_mem->mem_type == AMDGPU_PL_OA) {
+		/* Nothing to save here */
+		amdgpu_move_null(bo, new_mem);
+		return 0;
+	}
 
 	if (!adev->mman.buffer_funcs_enabled)
 		goto memcpy;
@@ -1082,42 +1103,48 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
 	struct ttm_mem_reg tmp;
 	struct ttm_placement placement;
 	struct ttm_place placements;
-	uint64_t flags;
+	uint64_t addr, flags;
 	int r;
 
-	if (bo->mem.mem_type != TTM_PL_TT ||
-	    amdgpu_gtt_mgr_has_gart_addr(&bo->mem))
+	if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET)
 		return 0;
 
-	/* allocate GTT space */
-	tmp = bo->mem;
-	tmp.mm_node = NULL;
-	placement.num_placement = 1;
-	placement.placement = &placements;
-	placement.num_busy_placement = 1;
-	placement.busy_placement = &placements;
-	placements.fpfn = 0;
-	placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
-	placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
-		TTM_PL_FLAG_TT;
+	addr = amdgpu_gmc_agp_addr(bo);
+	if (addr != AMDGPU_BO_INVALID_OFFSET) {
+		bo->mem.start = addr >> PAGE_SHIFT;
+	} else {
 
-	r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
-	if (unlikely(r))
-		return r;
+		/* allocate GART space */
+		tmp = bo->mem;
+		tmp.mm_node = NULL;
+		placement.num_placement = 1;
+		placement.placement = &placements;
+		placement.num_busy_placement = 1;
+		placement.busy_placement = &placements;
+		placements.fpfn = 0;
+		placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
+		placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
+			TTM_PL_FLAG_TT;
+
+		r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
+		if (unlikely(r))
+			return r;
 
-	/* compute PTE flags for this buffer object */
-	flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
+		/* compute PTE flags for this buffer object */
+		flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
 
-	/* Bind pages */
-	gtt->offset = (u64)tmp.start << PAGE_SHIFT;
-	r = amdgpu_ttm_gart_bind(adev, bo, flags);
-	if (unlikely(r)) {
-		ttm_bo_mem_put(bo, &tmp);
-		return r;
+		/* Bind pages */
+		gtt->offset = (u64)tmp.start << PAGE_SHIFT;
+		r = amdgpu_ttm_gart_bind(adev, bo, flags);
+		if (unlikely(r)) {
+			ttm_bo_mem_put(bo, &tmp);
+			return r;
+		}
+
+		ttm_bo_mem_put(bo, &bo->mem);
+		bo->mem = tmp;
 	}
 
-	ttm_bo_mem_put(bo, &bo->mem);
-	bo->mem = tmp;
 	bo->offset = (bo->mem.start << PAGE_SHIFT) +
 		bo->bdev->man[bo->mem.mem_type].gpu_offset;
 
@@ -1427,13 +1454,14 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
 }
 
 /**
- * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
+ * amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object
  *
  * @ttm: The ttm_tt object to compute the flags for
  * @mem: The memory registry backing this ttm_tt object
+ *
+ * Figure out the flags to use for a VM PDE (Page Directory Entry).
  */
-uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
-				 struct ttm_mem_reg *mem)
+uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
 {
 	uint64_t flags = 0;
 
@@ -1447,6 +1475,22 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
 			flags |= AMDGPU_PTE_SNOOPED;
 	}
 
+	return flags;
+}
+
+/**
+ * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
+ *
+ * @ttm: The ttm_tt object to compute the flags for
+ * @mem: The memory registry backing this ttm_tt object
+
+ * Figure out the flags to use for a VM PTE (Page Table Entry).
+ */
+uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
+				 struct ttm_mem_reg *mem)
+{
+	uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem);
+
 	flags |= adev->gart.gart_pte_flags;
 	flags |= AMDGPU_PTE_READABLE;
 
@@ -1769,14 +1813,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	 * This is used for VGA emulation and pre-OS scanout buffers to
 	 * avoid display artifacts while transitioning between pre-OS
 	 * and driver.  */
-	if (adev->gmc.stolen_size) {
-		r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
-					    AMDGPU_GEM_DOMAIN_VRAM,
-					    &adev->stolen_vga_memory,
-					    NULL, NULL);
-		if (r)
-			return r;
-	}
+	r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_VRAM,
+				    &adev->stolen_vga_memory,
+				    NULL, NULL);
+	if (r)
+		return r;
 	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
 		 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
 
@@ -1803,45 +1845,45 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 		 (unsigned)(gtt_size / (1024 * 1024)));
 
 	/* Initialize various on-chip memory pools */
-	adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;
-	adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
-	adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT;
-	adev->gds.gws.total_size = adev->gds.gws.total_size << AMDGPU_GWS_SHIFT;
-	adev->gds.gws.gfx_partition_size = adev->gds.gws.gfx_partition_size << AMDGPU_GWS_SHIFT;
-	adev->gds.gws.cs_partition_size = adev->gds.gws.cs_partition_size << AMDGPU_GWS_SHIFT;
-	adev->gds.oa.total_size = adev->gds.oa.total_size << AMDGPU_OA_SHIFT;
-	adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size << AMDGPU_OA_SHIFT;
-	adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size << AMDGPU_OA_SHIFT;
-	/* GDS Memory */
-	if (adev->gds.mem.total_size) {
-		r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
-				   adev->gds.mem.total_size >> PAGE_SHIFT);
-		if (r) {
-			DRM_ERROR("Failed initializing GDS heap.\n");
-			return r;
-		}
+	r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
+			   adev->gds.mem.total_size);
+	if (r) {
+		DRM_ERROR("Failed initializing GDS heap.\n");
+		return r;
 	}
 
-	/* GWS */
-	if (adev->gds.gws.total_size) {
-		r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
-				   adev->gds.gws.total_size >> PAGE_SHIFT);
-		if (r) {
-			DRM_ERROR("Failed initializing gws heap.\n");
-			return r;
-		}
+	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
+				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
+				    &adev->gds.gds_gfx_bo, NULL, NULL);
+	if (r)
+		return r;
+
+	r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
+			   adev->gds.gws.total_size);
+	if (r) {
+		DRM_ERROR("Failed initializing gws heap.\n");
+		return r;
 	}
 
-	/* OA */
-	if (adev->gds.oa.total_size) {
-		r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
-				   adev->gds.oa.total_size >> PAGE_SHIFT);
-		if (r) {
-			DRM_ERROR("Failed initializing oa heap.\n");
-			return r;
-		}
+	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
+				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
+				    &adev->gds.gws_gfx_bo, NULL, NULL);
+	if (r)
+		return r;
+
+	r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
+			   adev->gds.oa.total_size);
+	if (r) {
+		DRM_ERROR("Failed initializing oa heap.\n");
+		return r;
 	}
 
+	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
+				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
+				    &adev->gds.oa_gfx_bo, NULL, NULL);
+	if (r)
+		return r;
+
 	/* Register debugfs entries for amdgpu_ttm */
 	r = amdgpu_ttm_debugfs_init(adev);
 	if (r) {
@@ -1876,12 +1918,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
 
 	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
 	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
-	if (adev->gds.mem.total_size)
-		ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
-	if (adev->gds.gws.total_size)
-		ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
-	if (adev->gds.oa.total_size)
-		ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
+	ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
+	ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
+	ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
 	ttm_bo_device_release(&adev->mman.bdev);
 	amdgpu_ttm_global_fini(adev);
 	adev->mman.initialized = false;
@@ -1987,7 +2026,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
 	src_addr = num_dw * 4;
 	src_addr += job->ibs[0].gpu_addr;
 
-	dst_addr = adev->gart.table_addr;
+	dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
 	dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
 	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
 				dst_addr, num_bytes);
@@ -2047,7 +2086,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 	if (r)
 		return r;
 
-	job->vm_needs_flush = vm_needs_flush;
+	if (vm_needs_flush) {
+		job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
+		job->vm_needs_flush = true;
+	}
 	if (resv) {
 		r = amdgpu_sync_resv(adev, &job->sync, resv,
 				     AMDGPU_FENCE_OWNER_UNDEFINED,
@@ -2183,7 +2225,7 @@ error_free:
 static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
-	unsigned ttm_pl = *(int *)node->info_ent->data;
+	unsigned ttm_pl = (uintptr_t)node->info_ent->data;
 	struct drm_device *dev = node->minor->dev;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl];
@@ -2193,12 +2235,12 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
 	return 0;
 }
 
-static int ttm_pl_vram = TTM_PL_VRAM;
-static int ttm_pl_tt = TTM_PL_TT;
-
 static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
-	{"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, &ttm_pl_vram},
-	{"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, &ttm_pl_tt},
+	{"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_VRAM},
+	{"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_TT},
+	{"amdgpu_gds_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GDS},
+	{"amdgpu_gws_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GWS},
+	{"amdgpu_oa_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_OA},
 	{"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL},
 #ifdef CONFIG_SWIOTLB
 	{"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL}

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

@@ -116,6 +116,7 @@ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
 				       int *last_invalidated);
 bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
+uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
 				 struct ttm_mem_reg *mem);
 

+ 51 - 58
drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c

@@ -277,6 +277,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
 	case CHIP_PITCAIRN:
 	case CHIP_VERDE:
 	case CHIP_OLAND:
+	case CHIP_HAINAN:
 		return AMDGPU_FW_LOAD_DIRECT;
 #endif
 #ifdef CONFIG_DRM_AMDGPU_CIK
@@ -296,19 +297,15 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 	case CHIP_VEGAM:
-		if (!load_type)
-			return AMDGPU_FW_LOAD_DIRECT;
-		else
-			return AMDGPU_FW_LOAD_SMU;
+		return AMDGPU_FW_LOAD_SMU;
 	case CHIP_VEGA10:
 	case CHIP_RAVEN:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 		if (!load_type)
 			return AMDGPU_FW_LOAD_DIRECT;
 		else
 			return AMDGPU_FW_LOAD_PSP;
-	case CHIP_VEGA20:
-		return AMDGPU_FW_LOAD_DIRECT;
 	default:
 		DRM_ERROR("Unknown firmware load type\n");
 	}
@@ -322,6 +319,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
 {
 	const struct common_firmware_header *header = NULL;
 	const struct gfx_firmware_header_v1_0 *cp_hdr = NULL;
+	const struct dmcu_firmware_header_v1_0 *dmcu_hdr = NULL;
 
 	if (NULL == ucode->fw)
 		return 0;
@@ -333,8 +331,8 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
 		return 0;
 
 	header = (const struct common_firmware_header *)ucode->fw->data;
-
 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
+	dmcu_hdr = (const struct dmcu_firmware_header_v1_0 *)ucode->fw->data;
 
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP ||
 	    (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
@@ -343,7 +341,9 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
 	     ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT &&
 	     ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL &&
 	     ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM &&
-	     ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) {
+	     ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM &&
+		 ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_ERAM &&
+		 ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_INTV)) {
 		ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
 
 		memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
@@ -365,6 +365,20 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
 					      le32_to_cpu(header->ucode_array_offset_bytes) +
 					      le32_to_cpu(cp_hdr->jt_offset) * 4),
 		       ucode->ucode_size);
+	} else if (ucode->ucode_id == AMDGPU_UCODE_ID_DMCU_ERAM) {
+		ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -
+				le32_to_cpu(dmcu_hdr->intv_size_bytes);
+
+		memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
+					      le32_to_cpu(header->ucode_array_offset_bytes)),
+		       ucode->ucode_size);
+	} else if (ucode->ucode_id == AMDGPU_UCODE_ID_DMCU_INTV) {
+		ucode->ucode_size = le32_to_cpu(dmcu_hdr->intv_size_bytes);
+
+		memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
+					      le32_to_cpu(header->ucode_array_offset_bytes) +
+					      le32_to_cpu(dmcu_hdr->intv_offset_bytes)),
+		       ucode->ucode_size);
 	} else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) {
 		ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes;
 		memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl,
@@ -406,32 +420,41 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
 	return 0;
 }
 
-int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
+int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
 {
-	uint64_t fw_offset = 0;
-	int i, err;
-	struct amdgpu_firmware_info *ucode = NULL;
-	const struct common_firmware_header *header = NULL;
-
-	if (!adev->firmware.fw_size) {
-		dev_warn(adev->dev, "No ip firmware need to load\n");
-		return 0;
-	}
-
-	if (!adev->in_gpu_reset) {
-		err = amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
-					amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
-					&adev->firmware.fw_buf,
-					&adev->firmware.fw_buf_mc,
-					&adev->firmware.fw_buf_ptr);
-		if (err) {
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) {
+		amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
+			amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+			&adev->firmware.fw_buf,
+			&adev->firmware.fw_buf_mc,
+			&adev->firmware.fw_buf_ptr);
+		if (!adev->firmware.fw_buf) {
 			dev_err(adev->dev, "failed to create kernel buffer for firmware.fw_buf\n");
-			goto failed;
+			return -ENOMEM;
+		} else if (amdgpu_sriov_vf(adev)) {
+			memset(adev->firmware.fw_buf_ptr, 0, adev->firmware.fw_size);
 		}
 	}
+	return 0;
+}
 
-	memset(adev->firmware.fw_buf_ptr, 0, adev->firmware.fw_size);
+void amdgpu_ucode_free_bo(struct amdgpu_device *adev)
+{
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT)
+		amdgpu_bo_free_kernel(&adev->firmware.fw_buf,
+		&adev->firmware.fw_buf_mc,
+		&adev->firmware.fw_buf_ptr);
+}
+
+int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
+{
+	uint64_t fw_offset = 0;
+	int i;
+	struct amdgpu_firmware_info *ucode = NULL;
 
+ /* for baremetal, the ucode is allocated in gtt, so don't need to fill the bo when reset/suspend */
+	if (!amdgpu_sriov_vf(adev) && (adev->in_gpu_reset || adev->in_suspend))
+		return 0;
 	/*
 	 * if SMU loaded firmware, it needn't add SMC, UVD, and VCE
 	 * ucode info here
@@ -448,7 +471,6 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
 	for (i = 0; i < adev->firmware.max_ucodes; i++) {
 		ucode = &adev->firmware.ucode[i];
 		if (ucode->fw) {
-			header = (const struct common_firmware_header *)ucode->fw->data;
 			amdgpu_ucode_init_single_fw(adev, ucode, adev->firmware.fw_buf_mc + fw_offset,
 						    adev->firmware.fw_buf_ptr + fw_offset);
 			if (i == AMDGPU_UCODE_ID_CP_MEC1 &&
@@ -463,33 +485,4 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
 		}
 	}
 	return 0;
-
-failed:
-	if (err)
-		adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT;
-
-	return err;
-}
-
-int amdgpu_ucode_fini_bo(struct amdgpu_device *adev)
-{
-	int i;
-	struct amdgpu_firmware_info *ucode = NULL;
-
-	if (!adev->firmware.fw_size)
-		return 0;
-
-	for (i = 0; i < adev->firmware.max_ucodes; i++) {
-		ucode = &adev->firmware.ucode[i];
-		if (ucode->fw) {
-			ucode->mc_addr = 0;
-			ucode->kaddr = NULL;
-		}
-	}
-
-	amdgpu_bo_free_kernel(&adev->firmware.fw_buf,
-				&adev->firmware.fw_buf_mc,
-				&adev->firmware.fw_buf_ptr);
-
-	return 0;
 }

+ 38 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h

@@ -157,6 +157,13 @@ struct gpu_info_firmware_header_v1_0 {
 	uint16_t version_minor; /* version */
 };
 
+/* version_major=1, version_minor=0 */
+struct dmcu_firmware_header_v1_0 {
+	struct common_firmware_header header;
+	uint32_t intv_offset_bytes; /* interrupt vectors offset from end of header, in bytes */
+	uint32_t intv_size_bytes;  /* size of interrupt vectors, in bytes */
+};
+
 /* header is fixed size */
 union amdgpu_firmware_header {
 	struct common_firmware_header common;
@@ -170,6 +177,7 @@ union amdgpu_firmware_header {
 	struct sdma_firmware_header_v1_0 sdma;
 	struct sdma_firmware_header_v1_1 sdma_v1_1;
 	struct gpu_info_firmware_header_v1_0 gpu_info;
+	struct dmcu_firmware_header_v1_0 dmcu;
 	uint8_t raw[0x100];
 };
 
@@ -193,8 +201,11 @@ enum AMDGPU_UCODE_ID {
 	AMDGPU_UCODE_ID_STORAGE,
 	AMDGPU_UCODE_ID_SMC,
 	AMDGPU_UCODE_ID_UVD,
+	AMDGPU_UCODE_ID_UVD1,
 	AMDGPU_UCODE_ID_VCE,
 	AMDGPU_UCODE_ID_VCN,
+	AMDGPU_UCODE_ID_DMCU_ERAM,
+	AMDGPU_UCODE_ID_DMCU_INTV,
 	AMDGPU_UCODE_ID_MAXIMUM,
 };
 
@@ -205,6 +216,12 @@ enum AMDGPU_UCODE_STATUS {
 	AMDGPU_UCODE_STATUS_LOADED,
 };
 
+enum amdgpu_firmware_load_type {
+	AMDGPU_FW_LOAD_DIRECT = 0,
+	AMDGPU_FW_LOAD_SMU,
+	AMDGPU_FW_LOAD_PSP,
+};
+
 /* conform to smu_ucode_xfer_cz.h */
 #define AMDGPU_SDMA0_UCODE_LOADED	0x00000001
 #define AMDGPU_SDMA1_UCODE_LOADED	0x00000002
@@ -232,6 +249,24 @@ struct amdgpu_firmware_info {
 	uint32_t tmr_mc_addr_hi;
 };
 
+struct amdgpu_firmware {
+	struct amdgpu_firmware_info ucode[AMDGPU_UCODE_ID_MAXIMUM];
+	enum amdgpu_firmware_load_type load_type;
+	struct amdgpu_bo *fw_buf;
+	unsigned int fw_size;
+	unsigned int max_ucodes;
+	/* firmwares are loaded by psp instead of smu from vega10 */
+	const struct amdgpu_psp_funcs *funcs;
+	struct amdgpu_bo *rbuf;
+	struct mutex mutex;
+
+	/* gpu info firmware data pointer */
+	const struct firmware *gpu_info_fw;
+
+	void *fw_buf_ptr;
+	uint64_t fw_buf_mc;
+};
+
 void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr);
 void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr);
 void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr);
@@ -241,8 +276,10 @@ void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr);
 int amdgpu_ucode_validate(const struct firmware *fw);
 bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
 				uint16_t hdr_major, uint16_t hdr_minor);
+
 int amdgpu_ucode_init_bo(struct amdgpu_device *adev);
-int amdgpu_ucode_fini_bo(struct amdgpu_device *adev);
+int amdgpu_ucode_create_bo(struct amdgpu_device *adev);
+void amdgpu_ucode_free_bo(struct amdgpu_device *adev);
 
 enum amdgpu_firmware_load_type
 amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);

+ 183 - 13
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c

@@ -36,14 +36,19 @@
 #include "soc15_common.h"
 
 #include "vcn/vcn_1_0_offset.h"
+#include "vcn/vcn_1_0_sh_mask.h"
 
 /* 1 second timeout */
 #define VCN_IDLE_TIMEOUT	msecs_to_jiffies(1000)
 
 /* Firmware Names */
 #define FIRMWARE_RAVEN		"amdgpu/raven_vcn.bin"
+#define FIRMWARE_PICASSO	"amdgpu/picasso_vcn.bin"
+#define FIRMWARE_RAVEN2		"amdgpu/raven2_vcn.bin"
 
 MODULE_FIRMWARE(FIRMWARE_RAVEN);
+MODULE_FIRMWARE(FIRMWARE_PICASSO);
+MODULE_FIRMWARE(FIRMWARE_RAVEN2);
 
 static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
 
@@ -59,7 +64,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 
 	switch (adev->asic_type) {
 	case CHIP_RAVEN:
-		fw_name = FIRMWARE_RAVEN;
+		if (adev->rev_id >= 8)
+			fw_name = FIRMWARE_RAVEN2;
+		else if (adev->pdev->device == 0x15d8)
+			fw_name = FIRMWARE_PICASSO;
+		else
+			fw_name = FIRMWARE_RAVEN;
 		break;
 	default:
 		return -EINVAL;
@@ -111,8 +121,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 			version_major, version_minor, family_id);
 	}
 
-	bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE
-		  +  AMDGPU_VCN_SESSION_SIZE * 40;
+	bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
 		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
 	r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
@@ -203,20 +212,164 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
 	return 0;
 }
 
+static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev,
+				     struct dpg_pause_state *new_state)
+{
+	int ret_code;
+	uint32_t reg_data = 0;
+	uint32_t reg_data2 = 0;
+	struct amdgpu_ring *ring;
+
+	/* pause/unpause if state is changed */
+	if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
+		DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
+			adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
+			new_state->fw_based, new_state->jpeg);
+
+		reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
+			(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+		if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+			ret_code = 0;
+
+			if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
+				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+						   UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
+						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+			if (!ret_code) {
+				/* pause DPG non-jpeg */
+				reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+				WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
+						   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+						   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
+
+				/* Restore */
+				ring = &adev->vcn.ring_enc[0];
+				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
+				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
+				WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+				WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+
+				ring = &adev->vcn.ring_enc[1];
+				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
+				WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+				WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+
+				ring = &adev->vcn.ring_dec;
+				WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2));
+				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+						   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
+						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+			}
+		} else {
+			/* unpause dpg non-jpeg, no need to wait */
+			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+			WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+		}
+		adev->vcn.pause_state.fw_based = new_state->fw_based;
+	}
+
+	/* pause/unpause if state is changed */
+	if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
+		DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
+			adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
+			new_state->fw_based, new_state->jpeg);
+
+		reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
+			(~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
+
+		if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
+			ret_code = 0;
+
+			if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
+				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+						   UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
+						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+			if (!ret_code) {
+				/* Make sure JPRG Snoop is disabled before sending the pause */
+				reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
+				reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
+				WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
+
+				/* pause DPG jpeg */
+				reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
+				WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
+							UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
+							UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
+
+				/* Restore */
+				ring = &adev->vcn.ring_jpeg;
+				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
+							UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
+							UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+							lower_32_bits(ring->gpu_addr));
+				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+							upper_32_bits(ring->gpu_addr));
+				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
+				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
+				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
+							UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+
+				ring = &adev->vcn.ring_dec;
+				WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2));
+				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+						   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
+						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+			}
+		} else {
+			/* unpause dpg jpeg, no need to wait */
+			reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
+			WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+		}
+		adev->vcn.pause_state.jpeg = new_state->jpeg;
+	}
+
+	return 0;
+}
+
 static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
 {
 	struct amdgpu_device *adev =
 		container_of(work, struct amdgpu_device, vcn.idle_work.work);
-	unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
-	unsigned i;
+	unsigned int fences = 0;
+	unsigned int i;
 
 	for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
 		fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]);
 	}
 
+	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{
+		struct dpg_pause_state new_state;
+
+		if (fences)
+			new_state.fw_based = VCN_DPG_STATE__PAUSE;
+		else
+			new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+		if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg))
+			new_state.jpeg = VCN_DPG_STATE__PAUSE;
+		else
+			new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+
+		amdgpu_vcn_pause_dpg_mode(adev, &new_state);
+	}
+
 	fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg);
+	fences += amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
 
 	if (fences == 0) {
+		amdgpu_gfx_off_ctrl(adev, true);
 		if (adev->pm.dpm_enabled)
 			amdgpu_dpm_enable_uvd(adev, false);
 		else
@@ -233,12 +386,29 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 	bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
 
 	if (set_clocks) {
+		amdgpu_gfx_off_ctrl(adev, false);
 		if (adev->pm.dpm_enabled)
 			amdgpu_dpm_enable_uvd(adev, true);
 		else
 			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
 							       AMD_PG_STATE_UNGATE);
 	}
+
+	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{
+		struct dpg_pause_state new_state;
+
+		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+			new_state.fw_based = VCN_DPG_STATE__PAUSE;
+		else
+			new_state.fw_based = adev->vcn.pause_state.fw_based;
+
+		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
+			new_state.jpeg = VCN_DPG_STATE__PAUSE;
+		else
+			new_state.jpeg = adev->vcn.pause_state.jpeg;
+
+		amdgpu_vcn_pause_dpg_mode(adev, &new_state);
+	}
 }
 
 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
@@ -253,7 +423,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
 	unsigned i;
 	int r;
 
-	WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD);
+	WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
 	r = amdgpu_ring_alloc(ring, 3);
 	if (r) {
 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
@@ -261,11 +431,11 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
 		return r;
 	}
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0));
 	amdgpu_ring_write(ring, 0xDEADBEEF);
 	amdgpu_ring_commit(ring);
 	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID));
+		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
 		if (tmp == 0xDEADBEEF)
 			break;
 		DRM_UDELAY(1);
@@ -605,7 +775,7 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
 	unsigned i;
 	int r;
 
-	WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD);
+	WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
 	r = amdgpu_ring_alloc(ring, 3);
 
 	if (r) {
@@ -615,12 +785,12 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
 	}
 
 	amdgpu_ring_write(ring,
-		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0, 0, 0));
+		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0));
 	amdgpu_ring_write(ring, 0xDEADBEEF);
 	amdgpu_ring_commit(ring);
 
 	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID));
+		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
 		if (tmp == 0xDEADBEEF)
 			break;
 		DRM_UDELAY(1);
@@ -654,7 +824,7 @@ static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle,
 
 	ib = &job->ibs[0];
 
-	ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH), 0, 0, PACKETJ_TYPE0);
+	ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, PACKETJ_TYPE0);
 	ib->ptr[1] = 0xDEADBEEF;
 	for (i = 2; i < 16; i += 2) {
 		ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
@@ -703,7 +873,7 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 		r = 0;
 
 	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH));
+		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
 		if (tmp == 0xDEADBEEF)
 			break;
 		DRM_UDELAY(1);

+ 15 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h

@@ -24,9 +24,9 @@
 #ifndef __AMDGPU_VCN_H__
 #define __AMDGPU_VCN_H__
 
-#define AMDGPU_VCN_STACK_SIZE		(200*1024)
-#define AMDGPU_VCN_HEAP_SIZE		(256*1024)
-#define AMDGPU_VCN_SESSION_SIZE		(50*1024)
+#define AMDGPU_VCN_STACK_SIZE		(128*1024)
+#define AMDGPU_VCN_CONTEXT_SIZE	(512*1024)
+
 #define AMDGPU_VCN_FIRMWARE_OFFSET	256
 #define AMDGPU_VCN_MAX_ENC_RINGS	3
 
@@ -56,6 +56,16 @@ enum engine_status_constants {
 	UVD_STATUS__RBC_BUSY = 0x1,
 };
 
+enum internal_dpg_state {
+	VCN_DPG_STATE__UNPAUSE = 0,
+	VCN_DPG_STATE__PAUSE,
+};
+
+struct dpg_pause_state {
+	enum internal_dpg_state fw_based;
+	enum internal_dpg_state jpeg;
+};
+
 struct amdgpu_vcn {
 	struct amdgpu_bo	*vcpu_bo;
 	void			*cpu_addr;
@@ -69,6 +79,8 @@ struct amdgpu_vcn {
 	struct amdgpu_ring	ring_jpeg;
 	struct amdgpu_irq_src	irq;
 	unsigned		num_enc_rings;
+	enum amd_powergating_state cur_state;
+	struct dpg_pause_state pause_state;
 };
 
 int amdgpu_vcn_sw_init(struct amdgpu_device *adev);

+ 2 - 7
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c

@@ -22,18 +22,13 @@
  */
 
 #include "amdgpu.h"
-#define MAX_KIQ_REG_WAIT	5000 /* in usecs, 5ms */
-#define MAX_KIQ_REG_BAILOUT_INTERVAL	5 /* in msecs, 5ms */
-#define MAX_KIQ_REG_TRY 20
 
 uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
 {
 	uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
 
 	addr -= AMDGPU_VA_RESERVED_SIZE;
-
-	if (addr >= AMDGPU_VA_HOLE_START)
-		addr |= AMDGPU_VA_HOLE_END;
+	addr = amdgpu_gmc_sign_extend(addr);
 
 	return addr;
 }
@@ -76,7 +71,7 @@ void amdgpu_free_static_csa(struct amdgpu_device *adev) {
 int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			  struct amdgpu_bo_va **bo_va)
 {
-	uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_VA_HOLE_MASK;
+	uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
 	struct ww_acquire_ctx ticket;
 	struct list_head list;
 	struct amdgpu_bo_list_entry pd;

+ 970 - 506
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

@@ -133,48 +133,6 @@ struct amdgpu_prt_cb {
 	struct dma_fence_cb cb;
 };
 
-/**
- * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
- *
- * @base: base structure for tracking BO usage in a VM
- * @vm: vm to which bo is to be added
- * @bo: amdgpu buffer object
- *
- * Initialize a bo_va_base structure and add it to the appropriate lists
- *
- */
-static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
-				   struct amdgpu_vm *vm,
-				   struct amdgpu_bo *bo)
-{
-	base->vm = vm;
-	base->bo = bo;
-	INIT_LIST_HEAD(&base->bo_list);
-	INIT_LIST_HEAD(&base->vm_status);
-
-	if (!bo)
-		return;
-	list_add_tail(&base->bo_list, &bo->va);
-
-	if (bo->tbo.type == ttm_bo_type_kernel)
-		list_move(&base->vm_status, &vm->relocated);
-
-	if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
-		return;
-
-	if (bo->preferred_domains &
-	    amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
-		return;
-
-	/*
-	 * we checked all the prerequisites, but it looks like this per vm bo
-	 * is currently evicted. add the bo to the evicted list to make sure it
-	 * is validated on next vm use to avoid fault.
-	 * */
-	list_move_tail(&base->vm_status, &vm->evicted);
-	base->moved = true;
-}
-
 /**
  * amdgpu_vm_level_shift - return the addr shift for each level
  *
@@ -232,6 +190,26 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
 		return AMDGPU_VM_PTE_COUNT(adev);
 }
 
+/**
+ * amdgpu_vm_entries_mask - the mask to get the entry number of a PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The mask to extract the entry number of a PD/PT from an address.
+ */
+static uint32_t amdgpu_vm_entries_mask(struct amdgpu_device *adev,
+				       unsigned int level)
+{
+	if (level <= adev->vm_manager.root_level)
+		return 0xffffffff;
+	else if (level != AMDGPU_VM_PTB)
+		return 0x1ff;
+	else
+		return AMDGPU_VM_PTE_COUNT(adev) - 1;
+}
+
 /**
  * amdgpu_vm_bo_size - returns the size of the BOs in bytes
  *
@@ -246,6 +224,382 @@ static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
 	return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
 }
 
+/**
+ * amdgpu_vm_bo_evicted - vm_bo is evicted
+ *
+ * @vm_bo: vm_bo which is evicted
+ *
+ * State for PDs/PTs and per VM BOs which are not at the location they should
+ * be.
+ */
+static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
+{
+	struct amdgpu_vm *vm = vm_bo->vm;
+	struct amdgpu_bo *bo = vm_bo->bo;
+
+	vm_bo->moved = true;
+	if (bo->tbo.type == ttm_bo_type_kernel)
+		list_move(&vm_bo->vm_status, &vm->evicted);
+	else
+		list_move_tail(&vm_bo->vm_status, &vm->evicted);
+}
+
+/**
+ * amdgpu_vm_bo_relocated - vm_bo is reloacted
+ *
+ * @vm_bo: vm_bo which is relocated
+ *
+ * State for PDs/PTs which needs to update their parent PD.
+ */
+static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
+{
+	list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
+}
+
+/**
+ * amdgpu_vm_bo_moved - vm_bo is moved
+ *
+ * @vm_bo: vm_bo which is moved
+ *
+ * State for per VM BOs which are moved, but that change is not yet reflected
+ * in the page tables.
+ */
+static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
+{
+	list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
+}
+
+/**
+ * amdgpu_vm_bo_idle - vm_bo is idle
+ *
+ * @vm_bo: vm_bo which is now idle
+ *
+ * State for PDs/PTs and per VM BOs which have gone through the state machine
+ * and are now idle.
+ */
+static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
+{
+	list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
+	vm_bo->moved = false;
+}
+
+/**
+ * amdgpu_vm_bo_invalidated - vm_bo is invalidated
+ *
+ * @vm_bo: vm_bo which is now invalidated
+ *
+ * State for normal BOs which are invalidated and that change not yet reflected
+ * in the PTs.
+ */
+static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
+{
+	spin_lock(&vm_bo->vm->invalidated_lock);
+	list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated);
+	spin_unlock(&vm_bo->vm->invalidated_lock);
+}
+
+/**
+ * amdgpu_vm_bo_done - vm_bo is done
+ *
+ * @vm_bo: vm_bo which is now done
+ *
+ * State for normal BOs which are invalidated and that change has been updated
+ * in the PTs.
+ */
+static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
+{
+	spin_lock(&vm_bo->vm->invalidated_lock);
+	list_del_init(&vm_bo->vm_status);
+	spin_unlock(&vm_bo->vm->invalidated_lock);
+}
+
+/**
+ * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
+ *
+ * @base: base structure for tracking BO usage in a VM
+ * @vm: vm to which bo is to be added
+ * @bo: amdgpu buffer object
+ *
+ * Initialize a bo_va_base structure and add it to the appropriate lists
+ *
+ */
+static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
+				   struct amdgpu_vm *vm,
+				   struct amdgpu_bo *bo)
+{
+	base->vm = vm;
+	base->bo = bo;
+	base->next = NULL;
+	INIT_LIST_HEAD(&base->vm_status);
+
+	if (!bo)
+		return;
+	base->next = bo->vm_bo;
+	bo->vm_bo = base;
+
+	if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
+		return;
+
+	vm->bulk_moveable = false;
+	if (bo->tbo.type == ttm_bo_type_kernel)
+		amdgpu_vm_bo_relocated(base);
+	else
+		amdgpu_vm_bo_idle(base);
+
+	if (bo->preferred_domains &
+	    amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
+		return;
+
+	/*
+	 * we checked all the prerequisites, but it looks like this per vm bo
+	 * is currently evicted. add the bo to the evicted list to make sure it
+	 * is validated on next vm use to avoid fault.
+	 * */
+	amdgpu_vm_bo_evicted(base);
+}
+
+/**
+ * amdgpu_vm_pt_parent - get the parent page directory
+ *
+ * @pt: child page table
+ *
+ * Helper to get the parent entry for the child page table. NULL if we are at
+ * the root page directory.
+ */
+static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt)
+{
+	struct amdgpu_bo *parent = pt->base.bo->parent;
+
+	if (!parent)
+		return NULL;
+
+	return container_of(parent->vm_bo, struct amdgpu_vm_pt, base);
+}
+
+/**
+ * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
+ */
+struct amdgpu_vm_pt_cursor {
+	uint64_t pfn;
+	struct amdgpu_vm_pt *parent;
+	struct amdgpu_vm_pt *entry;
+	unsigned level;
+};
+
+/**
+ * amdgpu_vm_pt_start - start PD/PT walk
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: amdgpu_vm structure
+ * @start: start address of the walk
+ * @cursor: state to initialize
+ *
+ * Initialize a amdgpu_vm_pt_cursor to start a walk.
+ */
+static void amdgpu_vm_pt_start(struct amdgpu_device *adev,
+			       struct amdgpu_vm *vm, uint64_t start,
+			       struct amdgpu_vm_pt_cursor *cursor)
+{
+	cursor->pfn = start;
+	cursor->parent = NULL;
+	cursor->entry = &vm->root;
+	cursor->level = adev->vm_manager.root_level;
+}
+
+/**
+ * amdgpu_vm_pt_descendant - go to child node
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk to the child node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev,
+				    struct amdgpu_vm_pt_cursor *cursor)
+{
+	unsigned mask, shift, idx;
+
+	if (!cursor->entry->entries)
+		return false;
+
+	BUG_ON(!cursor->entry->base.bo);
+	mask = amdgpu_vm_entries_mask(adev, cursor->level);
+	shift = amdgpu_vm_level_shift(adev, cursor->level);
+
+	++cursor->level;
+	idx = (cursor->pfn >> shift) & mask;
+	cursor->parent = cursor->entry;
+	cursor->entry = &cursor->entry->entries[idx];
+	return true;
+}
+
+/**
+ * amdgpu_vm_pt_sibling - go to sibling node
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk to the sibling node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev,
+				 struct amdgpu_vm_pt_cursor *cursor)
+{
+	unsigned shift, num_entries;
+
+	/* Root doesn't have a sibling */
+	if (!cursor->parent)
+		return false;
+
+	/* Go to our parents and see if we got a sibling */
+	shift = amdgpu_vm_level_shift(adev, cursor->level - 1);
+	num_entries = amdgpu_vm_num_entries(adev, cursor->level - 1);
+
+	if (cursor->entry == &cursor->parent->entries[num_entries - 1])
+		return false;
+
+	cursor->pfn += 1ULL << shift;
+	cursor->pfn &= ~((1ULL << shift) - 1);
+	++cursor->entry;
+	return true;
+}
+
+/**
+ * amdgpu_vm_pt_ancestor - go to parent node
+ *
+ * @cursor: current state
+ *
+ * Walk to the parent node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor)
+{
+	if (!cursor->parent)
+		return false;
+
+	--cursor->level;
+	cursor->entry = cursor->parent;
+	cursor->parent = amdgpu_vm_pt_parent(cursor->parent);
+	return true;
+}
+
+/**
+ * amdgpu_vm_pt_next - get next PD/PT in hieratchy
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk the PD/PT tree to the next node.
+ */
+static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
+			      struct amdgpu_vm_pt_cursor *cursor)
+{
+	/* First try a newborn child */
+	if (amdgpu_vm_pt_descendant(adev, cursor))
+		return;
+
+	/* If that didn't worked try to find a sibling */
+	while (!amdgpu_vm_pt_sibling(adev, cursor)) {
+		/* No sibling, go to our parents and grandparents */
+		if (!amdgpu_vm_pt_ancestor(cursor)) {
+			cursor->pfn = ~0ll;
+			return;
+		}
+	}
+}
+
+/**
+ * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: amdgpu_vm structure
+ * @start: start addr of the walk
+ * @cursor: state to initialize
+ *
+ * Start a walk and go directly to the leaf node.
+ */
+static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
+				    struct amdgpu_vm *vm, uint64_t start,
+				    struct amdgpu_vm_pt_cursor *cursor)
+{
+	amdgpu_vm_pt_start(adev, vm, start, cursor);
+	while (amdgpu_vm_pt_descendant(adev, cursor));
+}
+
+/**
+ * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk the PD/PT tree to the next leaf node.
+ */
+static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
+				   struct amdgpu_vm_pt_cursor *cursor)
+{
+	amdgpu_vm_pt_next(adev, cursor);
+	while (amdgpu_vm_pt_descendant(adev, cursor));
+}
+
+/**
+ * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the hierarchy
+ */
+#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)		\
+	for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));		\
+	     (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor)))
+
+/**
+ * amdgpu_vm_pt_first_dfs - start a deep first search
+ *
+ * @adev: amdgpu_device structure
+ * @vm: amdgpu_vm structure
+ * @cursor: state to initialize
+ *
+ * Starts a deep first traversal of the PD/PT tree.
+ */
+static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev,
+				   struct amdgpu_vm *vm,
+				   struct amdgpu_vm_pt_cursor *cursor)
+{
+	amdgpu_vm_pt_start(adev, vm, 0, cursor);
+	while (amdgpu_vm_pt_descendant(adev, cursor));
+}
+
+/**
+ * amdgpu_vm_pt_next_dfs - get the next node for a deep first search
+ *
+ * @adev: amdgpu_device structure
+ * @cursor: current state
+ *
+ * Move the cursor to the next node in a deep first search.
+ */
+static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
+				  struct amdgpu_vm_pt_cursor *cursor)
+{
+	if (!cursor->entry)
+		return;
+
+	if (!cursor->parent)
+		cursor->entry = NULL;
+	else if (amdgpu_vm_pt_sibling(adev, cursor))
+		while (amdgpu_vm_pt_descendant(adev, cursor));
+	else
+		amdgpu_vm_pt_ancestor(cursor);
+}
+
+/**
+ * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
+ */
+#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry)			\
+	for (amdgpu_vm_pt_first_dfs((adev), (vm), &(cursor)),			\
+	     (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\
+	     (entry); (entry) = (cursor).entry,					\
+	     amdgpu_vm_pt_next_dfs((adev), &(cursor)))
+
 /**
  * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
  *
@@ -260,14 +614,54 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
 			 struct amdgpu_bo_list_entry *entry)
 {
-	entry->robj = vm->root.base.bo;
 	entry->priority = 0;
-	entry->tv.bo = &entry->robj->tbo;
+	entry->tv.bo = &vm->root.base.bo->tbo;
 	entry->tv.shared = true;
 	entry->user_pages = NULL;
 	list_add(&entry->tv.head, validated);
 }
 
+/**
+ * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
+ *
+ * @adev: amdgpu device pointer
+ * @vm: vm providing the BOs
+ *
+ * Move all BOs to the end of LRU and remember their positions to put them
+ * together.
+ */
+void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm)
+{
+	struct ttm_bo_global *glob = adev->mman.bdev.glob;
+	struct amdgpu_vm_bo_base *bo_base;
+
+	if (vm->bulk_moveable) {
+		spin_lock(&glob->lru_lock);
+		ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
+		spin_unlock(&glob->lru_lock);
+		return;
+	}
+
+	memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
+
+	spin_lock(&glob->lru_lock);
+	list_for_each_entry(bo_base, &vm->idle, vm_status) {
+		struct amdgpu_bo *bo = bo_base->bo;
+
+		if (!bo->parent)
+			continue;
+
+		ttm_bo_move_to_lru_tail(&bo->tbo, &vm->lru_bulk_move);
+		if (bo->shadow)
+			ttm_bo_move_to_lru_tail(&bo->shadow->tbo,
+						&vm->lru_bulk_move);
+	}
+	spin_unlock(&glob->lru_lock);
+
+	vm->bulk_moveable = true;
+}
+
 /**
  * amdgpu_vm_validate_pt_bos - validate the page table BOs
  *
@@ -285,47 +679,36 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      int (*validate)(void *p, struct amdgpu_bo *bo),
 			      void *param)
 {
-	struct ttm_bo_global *glob = adev->mman.bdev.glob;
 	struct amdgpu_vm_bo_base *bo_base, *tmp;
 	int r = 0;
 
+	vm->bulk_moveable &= list_empty(&vm->evicted);
+
 	list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
 		struct amdgpu_bo *bo = bo_base->bo;
 
-		if (bo->parent) {
-			r = validate(param, bo);
-			if (r)
-				break;
-
-			spin_lock(&glob->lru_lock);
-			ttm_bo_move_to_lru_tail(&bo->tbo);
-			if (bo->shadow)
-				ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
-			spin_unlock(&glob->lru_lock);
-		}
+		r = validate(param, bo);
+		if (r)
+			break;
 
 		if (bo->tbo.type != ttm_bo_type_kernel) {
-			spin_lock(&vm->moved_lock);
-			list_move(&bo_base->vm_status, &vm->moved);
-			spin_unlock(&vm->moved_lock);
+			amdgpu_vm_bo_moved(bo_base);
 		} else {
-			list_move(&bo_base->vm_status, &vm->relocated);
+			if (vm->use_cpu_for_update)
+				r = amdgpu_bo_kmap(bo, NULL);
+			else
+				r = amdgpu_ttm_alloc_gart(&bo->tbo);
+			if (r)
+				break;
+			if (bo->shadow) {
+				r = amdgpu_ttm_alloc_gart(&bo->shadow->tbo);
+				if (r)
+					break;
+			}
+			amdgpu_vm_bo_relocated(bo_base);
 		}
 	}
 
-	spin_lock(&glob->lru_lock);
-	list_for_each_entry(bo_base, &vm->idle, vm_status) {
-		struct amdgpu_bo *bo = bo_base->bo;
-
-		if (!bo->parent)
-			continue;
-
-		ttm_bo_move_to_lru_tail(&bo->tbo);
-		if (bo->shadow)
-			ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
-	}
-	spin_unlock(&glob->lru_lock);
-
 	return r;
 }
 
@@ -376,7 +759,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 		if (level == adev->vm_manager.root_level) {
 			ats_entries = amdgpu_vm_level_shift(adev, level);
 			ats_entries += AMDGPU_GPU_PAGE_SHIFT;
-			ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
+			ats_entries = AMDGPU_GMC_HOLE_START >> ats_entries;
 			ats_entries = min(ats_entries, entries);
 			entries -= ats_entries;
 		} else {
@@ -397,6 +780,10 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	if (r)
 		goto error;
 
+	r = amdgpu_ttm_alloc_gart(&bo->tbo);
+	if (r)
+		return r;
+
 	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
 	if (r)
 		goto error;
@@ -448,159 +835,154 @@ error:
 }
 
 /**
- * amdgpu_vm_alloc_levels - allocate the PD/PT levels
+ * amdgpu_vm_bo_param - fill in parameters for PD/PT allocation
  *
  * @adev: amdgpu_device pointer
- * @vm: requested vm
- * @parent: parent PT
- * @saddr: start of the address range
- * @eaddr: end of the address range
- * @level: VMPT level
- * @ats: indicate ATS support from PTE
+ * @vm: requesting vm
+ * @bp: resulting BO allocation parameters
+ */
+static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			       int level, struct amdgpu_bo_param *bp)
+{
+	memset(bp, 0, sizeof(*bp));
+
+	bp->size = amdgpu_vm_bo_size(adev, level);
+	bp->byte_align = AMDGPU_GPU_PAGE_SIZE;
+	bp->domain = AMDGPU_GEM_DOMAIN_VRAM;
+	if (bp->size <= PAGE_SIZE && adev->asic_type >= CHIP_VEGA10 &&
+	    adev->flags & AMD_IS_APU)
+		bp->domain |= AMDGPU_GEM_DOMAIN_GTT;
+	bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain);
+	bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+		AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+	if (vm->use_cpu_for_update)
+		bp->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+	else if (!vm->root.base.bo || vm->root.base.bo->shadow)
+		bp->flags |= AMDGPU_GEM_CREATE_SHADOW;
+	bp->type = ttm_bo_type_kernel;
+	if (vm->root.base.bo)
+		bp->resv = vm->root.base.bo->tbo.resv;
+}
+
+/**
+ * amdgpu_vm_alloc_pts - Allocate page tables.
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: VM to allocate page tables for
+ * @saddr: Start address which needs to be allocated
+ * @size: Size from start address we need.
  *
  * Make sure the page directories and page tables are allocated
  *
  * Returns:
  * 0 on success, errno otherwise.
  */
-static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
-				  struct amdgpu_vm *vm,
-				  struct amdgpu_vm_pt *parent,
-				  uint64_t saddr, uint64_t eaddr,
-				  unsigned level, bool ats)
+int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
+			struct amdgpu_vm *vm,
+			uint64_t saddr, uint64_t size)
 {
-	unsigned shift = amdgpu_vm_level_shift(adev, level);
-	unsigned pt_idx, from, to;
-	u64 flags;
+	struct amdgpu_vm_pt_cursor cursor;
+	struct amdgpu_bo *pt;
+	bool ats = false;
+	uint64_t eaddr;
 	int r;
 
-	if (!parent->entries) {
-		unsigned num_entries = amdgpu_vm_num_entries(adev, level);
+	/* validate the parameters */
+	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
+		return -EINVAL;
+
+	eaddr = saddr + size - 1;
 
-		parent->entries = kvmalloc_array(num_entries,
-						   sizeof(struct amdgpu_vm_pt),
-						   GFP_KERNEL | __GFP_ZERO);
-		if (!parent->entries)
-			return -ENOMEM;
-		memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
-	}
+	if (vm->pte_support_ats)
+		ats = saddr < AMDGPU_GMC_HOLE_START;
 
-	from = saddr >> shift;
-	to = eaddr >> shift;
-	if (from >= amdgpu_vm_num_entries(adev, level) ||
-	    to >= amdgpu_vm_num_entries(adev, level))
+	saddr /= AMDGPU_GPU_PAGE_SIZE;
+	eaddr /= AMDGPU_GPU_PAGE_SIZE;
+
+	if (eaddr >= adev->vm_manager.max_pfn) {
+		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
+			eaddr, adev->vm_manager.max_pfn);
 		return -EINVAL;
+	}
 
-	++level;
-	saddr = saddr & ((1 << shift) - 1);
-	eaddr = eaddr & ((1 << shift) - 1);
+	for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
+		struct amdgpu_vm_pt *entry = cursor.entry;
+		struct amdgpu_bo_param bp;
 
-	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
-	if (vm->root.base.bo->shadow)
-		flags |= AMDGPU_GEM_CREATE_SHADOW;
-	if (vm->use_cpu_for_update)
-		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
-	else
-		flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
-
-	/* walk over the address space and allocate the page tables */
-	for (pt_idx = from; pt_idx <= to; ++pt_idx) {
-		struct reservation_object *resv = vm->root.base.bo->tbo.resv;
-		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
-		struct amdgpu_bo *pt;
-
-		if (!entry->base.bo) {
-			struct amdgpu_bo_param bp;
-
-			memset(&bp, 0, sizeof(bp));
-			bp.size = amdgpu_vm_bo_size(adev, level);
-			bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
-			bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
-			bp.flags = flags;
-			bp.type = ttm_bo_type_kernel;
-			bp.resv = resv;
-			r = amdgpu_bo_create(adev, &bp, &pt);
-			if (r)
-				return r;
+		if (cursor.level < AMDGPU_VM_PTB) {
+			unsigned num_entries;
 
-			r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats);
-			if (r) {
-				amdgpu_bo_unref(&pt->shadow);
-				amdgpu_bo_unref(&pt);
-				return r;
-			}
+			num_entries = amdgpu_vm_num_entries(adev, cursor.level);
+			entry->entries = kvmalloc_array(num_entries,
+							sizeof(*entry->entries),
+							GFP_KERNEL |
+							__GFP_ZERO);
+			if (!entry->entries)
+				return -ENOMEM;
+		}
 
-			if (vm->use_cpu_for_update) {
-				r = amdgpu_bo_kmap(pt, NULL);
-				if (r) {
-					amdgpu_bo_unref(&pt->shadow);
-					amdgpu_bo_unref(&pt);
-					return r;
-				}
-			}
 
-			/* Keep a reference to the root directory to avoid
-			* freeing them up in the wrong order.
-			*/
-			pt->parent = amdgpu_bo_ref(parent->base.bo);
+		if (entry->base.bo)
+			continue;
 
-			amdgpu_vm_bo_base_init(&entry->base, vm, pt);
-		}
+		amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
 
-		if (level < AMDGPU_VM_PTB) {
-			uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
-			uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
-				((1 << shift) - 1);
-			r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
-						   sub_eaddr, level, ats);
+		r = amdgpu_bo_create(adev, &bp, &pt);
+		if (r)
+			return r;
+
+		r = amdgpu_vm_clear_bo(adev, vm, pt, cursor.level, ats);
+		if (r)
+			goto error_free_pt;
+
+		if (vm->use_cpu_for_update) {
+			r = amdgpu_bo_kmap(pt, NULL);
 			if (r)
-				return r;
+				goto error_free_pt;
 		}
+
+		/* Keep a reference to the root directory to avoid
+		* freeing them up in the wrong order.
+		*/
+		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
+
+		amdgpu_vm_bo_base_init(&entry->base, vm, pt);
 	}
 
 	return 0;
-}
 
-/**
- * amdgpu_vm_alloc_pts - Allocate page tables.
- *
- * @adev: amdgpu_device pointer
- * @vm: VM to allocate page tables for
- * @saddr: Start address which needs to be allocated
- * @size: Size from start address we need.
+error_free_pt:
+	amdgpu_bo_unref(&pt->shadow);
+	amdgpu_bo_unref(&pt);
+	return r;
+}
+
+/**
+ * amdgpu_vm_free_pts - free PD/PT levels
  *
- * Make sure the page tables are allocated.
+ * @adev: amdgpu device structure
+ * @vm: amdgpu vm structure
  *
- * Returns:
- * 0 on success, errno otherwise.
+ * Free the page directory or page table level and all sub levels.
  */
-int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
-			struct amdgpu_vm *vm,
-			uint64_t saddr, uint64_t size)
+static void amdgpu_vm_free_pts(struct amdgpu_device *adev,
+			       struct amdgpu_vm *vm)
 {
-	uint64_t eaddr;
-	bool ats = false;
-
-	/* validate the parameters */
-	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
-		return -EINVAL;
-
-	eaddr = saddr + size - 1;
-
-	if (vm->pte_support_ats)
-		ats = saddr < AMDGPU_VA_HOLE_START;
+	struct amdgpu_vm_pt_cursor cursor;
+	struct amdgpu_vm_pt *entry;
 
-	saddr /= AMDGPU_GPU_PAGE_SIZE;
-	eaddr /= AMDGPU_GPU_PAGE_SIZE;
+	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) {
 
-	if (eaddr >= adev->vm_manager.max_pfn) {
-		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
-			eaddr, adev->vm_manager.max_pfn);
-		return -EINVAL;
+		if (entry->base.bo) {
+			entry->base.bo->vm_bo = NULL;
+			list_del(&entry->base.vm_status);
+			amdgpu_bo_unref(&entry->base.bo->shadow);
+			amdgpu_bo_unref(&entry->base.bo);
+		}
+		kvfree(entry->entries);
 	}
 
-	return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
-				      adev->vm_manager.root_level, ats);
+	BUG_ON(vm->root.base.bo);
 }
 
 /**
@@ -714,7 +1096,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 	}
 
 	gds_switch_needed &= !!ring->funcs->emit_gds_switch;
-	vm_flush_needed &= !!ring->funcs->emit_vm_flush;
+	vm_flush_needed &= !!ring->funcs->emit_vm_flush  &&
+			job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
 	pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
 		ring->funcs->emit_wreg;
 
@@ -799,12 +1182,13 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
 				       struct amdgpu_bo *bo)
 {
-	struct amdgpu_bo_va *bo_va;
+	struct amdgpu_vm_bo_base *base;
 
-	list_for_each_entry(bo_va, &bo->va, base.bo_list) {
-		if (bo_va->base.vm == vm) {
-			return bo_va;
-		}
+	for (base = bo->vm_bo; base; base = base->next) {
+		if (base->vm != vm)
+			continue;
+
+		return container_of(base, struct amdgpu_bo_va, base);
 	}
 	return NULL;
 }
@@ -957,6 +1341,22 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	return r;
 }
 
+/**
+ * amdgpu_vm_update_func - helper to call update function
+ *
+ * Calls the update function for both the given BO as well as its shadow.
+ */
+static void amdgpu_vm_update_func(struct amdgpu_pte_update_params *params,
+				  struct amdgpu_bo *bo,
+				  uint64_t pe, uint64_t addr,
+				  unsigned count, uint32_t incr,
+				  uint64_t flags)
+{
+	if (bo->shadow)
+		params->func(params, bo->shadow, pe, addr, count, incr, flags);
+	params->func(params, bo, pe, addr, count, incr, flags);
+}
+
 /*
  * amdgpu_vm_update_pde - update a single level in the hierarchy
  *
@@ -984,47 +1384,28 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
 		pbo = pbo->parent;
 
 	level += params->adev->vm_manager.root_level;
-	pt = amdgpu_bo_gpu_offset(entry->base.bo);
-	flags = AMDGPU_PTE_VALID;
-	amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags);
+	amdgpu_gmc_get_pde_for_bo(entry->base.bo, level, &pt, &flags);
 	pde = (entry - parent->entries) * 8;
-	if (bo->shadow)
-		params->func(params, bo->shadow, pde, pt, 1, 0, flags);
-	params->func(params, bo, pde, pt, 1, 0, flags);
+	amdgpu_vm_update_func(params, bo, pde, pt, 1, 0, flags);
 }
 
 /*
- * amdgpu_vm_invalidate_level - mark all PD levels as invalid
+ * amdgpu_vm_invalidate_pds - mark all PDs as invalid
  *
  * @adev: amdgpu_device pointer
  * @vm: related vm
- * @parent: parent PD
- * @level: VMPT level
  *
  * Mark all PD level as invalid after an error.
  */
-static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
-				       struct amdgpu_vm *vm,
-				       struct amdgpu_vm_pt *parent,
-				       unsigned level)
+static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev,
+				     struct amdgpu_vm *vm)
 {
-	unsigned pt_idx, num_entries;
-
-	/*
-	 * Recurse into the subdirectories. This recursion is harmless because
-	 * we only have a maximum of 5 layers.
-	 */
-	num_entries = amdgpu_vm_num_entries(adev, level);
-	for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) {
-		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
-
-		if (!entry->base.bo)
-			continue;
+	struct amdgpu_vm_pt_cursor cursor;
+	struct amdgpu_vm_pt *entry;
 
-		if (!entry->base.moved)
-			list_move(&entry->base.vm_status, &vm->relocated);
-		amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
-	}
+	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry)
+		if (entry->base.bo && !entry->base.moved)
+			amdgpu_vm_bo_relocated(&entry->base);
 }
 
 /*
@@ -1054,14 +1435,6 @@ restart:
 	params.adev = adev;
 
 	if (vm->use_cpu_for_update) {
-		struct amdgpu_vm_bo_base *bo_base;
-
-		list_for_each_entry(bo_base, &vm->relocated, vm_status) {
-			r = amdgpu_bo_kmap(bo_base->bo, NULL);
-			if (unlikely(r))
-				return r;
-		}
-
 		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
 		if (unlikely(r))
 			return r;
@@ -1078,25 +1451,16 @@ restart:
 	}
 
 	while (!list_empty(&vm->relocated)) {
-		struct amdgpu_vm_bo_base *bo_base, *parent;
 		struct amdgpu_vm_pt *pt, *entry;
-		struct amdgpu_bo *bo;
 
-		bo_base = list_first_entry(&vm->relocated,
-					   struct amdgpu_vm_bo_base,
-					   vm_status);
-		bo_base->moved = false;
-		list_del_init(&bo_base->vm_status);
+		entry = list_first_entry(&vm->relocated, struct amdgpu_vm_pt,
+					 base.vm_status);
+		amdgpu_vm_bo_idle(&entry->base);
 
-		bo = bo_base->bo->parent;
-		if (!bo)
+		pt = amdgpu_vm_pt_parent(entry);
+		if (!pt)
 			continue;
 
-		parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
-					  bo_list);
-		pt = container_of(parent, struct amdgpu_vm_pt, base);
-		entry = container_of(bo_base, struct amdgpu_vm_pt, base);
-
 		amdgpu_vm_update_pde(&params, vm, pt, entry);
 
 		if (!vm->use_cpu_for_update &&
@@ -1138,85 +1502,90 @@ restart:
 	return 0;
 
 error:
-	amdgpu_vm_invalidate_level(adev, vm, &vm->root,
-				   adev->vm_manager.root_level);
+	amdgpu_vm_invalidate_pds(adev, vm);
 	amdgpu_job_free(job);
 	return r;
 }
 
 /**
- * amdgpu_vm_find_entry - find the entry for an address
+ * amdgpu_vm_update_huge - figure out parameters for PTE updates
  *
- * @p: see amdgpu_pte_update_params definition
- * @addr: virtual address in question
- * @entry: resulting entry or NULL
- * @parent: parent entry
- *
- * Find the vm_pt entry and it's parent for the given address.
+ * Make sure to set the right flags for the PTEs at the desired level.
  */
-void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
-			 struct amdgpu_vm_pt **entry,
-			 struct amdgpu_vm_pt **parent)
-{
-	unsigned level = p->adev->vm_manager.root_level;
-
-	*parent = NULL;
-	*entry = &p->vm->root;
-	while ((*entry)->entries) {
-		unsigned shift = amdgpu_vm_level_shift(p->adev, level++);
+static void amdgpu_vm_update_huge(struct amdgpu_pte_update_params *params,
+				  struct amdgpu_bo *bo, unsigned level,
+				  uint64_t pe, uint64_t addr,
+				  unsigned count, uint32_t incr,
+				  uint64_t flags)
 
-		*parent = *entry;
-		*entry = &(*entry)->entries[addr >> shift];
-		addr &= (1ULL << shift) - 1;
+{
+	if (level != AMDGPU_VM_PTB) {
+		flags |= AMDGPU_PDE_PTE;
+		amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
 	}
 
-	if (level != AMDGPU_VM_PTB)
-		*entry = NULL;
+	amdgpu_vm_update_func(params, bo, pe, addr, count, incr, flags);
 }
 
 /**
- * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
+ * amdgpu_vm_fragment - get fragment for PTEs
  *
- * @p: see amdgpu_pte_update_params definition
- * @entry: vm_pt entry to check
- * @parent: parent entry
- * @nptes: number of PTEs updated with this operation
- * @dst: destination address where the PTEs should point to
- * @flags: access flags fro the PTEs
+ * @params: see amdgpu_pte_update_params definition
+ * @start: first PTE to handle
+ * @end: last PTE to handle
+ * @flags: hw mapping flags
+ * @frag: resulting fragment size
+ * @frag_end: end of this fragment
  *
- * Check if we can update the PD with a huge page.
+ * Returns the first possible fragment for the start and end address.
  */
-static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
-					struct amdgpu_vm_pt *entry,
-					struct amdgpu_vm_pt *parent,
-					unsigned nptes, uint64_t dst,
-					uint64_t flags)
+static void amdgpu_vm_fragment(struct amdgpu_pte_update_params *params,
+			       uint64_t start, uint64_t end, uint64_t flags,
+			       unsigned int *frag, uint64_t *frag_end)
 {
-	uint64_t pde;
+	/**
+	 * The MC L1 TLB supports variable sized pages, based on a fragment
+	 * field in the PTE. When this field is set to a non-zero value, page
+	 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
+	 * flags are considered valid for all PTEs within the fragment range
+	 * and corresponding mappings are assumed to be physically contiguous.
+	 *
+	 * The L1 TLB can store a single PTE for the whole fragment,
+	 * significantly increasing the space available for translation
+	 * caching. This leads to large improvements in throughput when the
+	 * TLB is under pressure.
+	 *
+	 * The L2 TLB distributes small and large fragments into two
+	 * asymmetric partitions. The large fragment cache is significantly
+	 * larger. Thus, we try to use large fragments wherever possible.
+	 * Userspace can support this by aligning virtual base address and
+	 * allocation size to the fragment size.
+	 *
+	 * Starting with Vega10 the fragment size only controls the L1. The L2
+	 * is now directly feed with small/huge/giant pages from the walker.
+	 */
+	unsigned max_frag;
 
-	/* In the case of a mixed PT the PDE must point to it*/
-	if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
-	    nptes == AMDGPU_VM_PTE_COUNT(p->adev)) {
-		/* Set the huge page flag to stop scanning at this PDE */
-		flags |= AMDGPU_PDE_PTE;
-	}
+	if (params->adev->asic_type < CHIP_VEGA10)
+		max_frag = params->adev->vm_manager.fragment_size;
+	else
+		max_frag = 31;
 
-	if (!(flags & AMDGPU_PDE_PTE)) {
-		if (entry->huge) {
-			/* Add the entry to the relocated list to update it. */
-			entry->huge = false;
-			list_move(&entry->base.vm_status, &p->vm->relocated);
-		}
+	/* system pages are non continuously */
+	if (params->src) {
+		*frag = 0;
+		*frag_end = end;
 		return;
 	}
 
-	entry->huge = true;
-	amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags);
-
-	pde = (entry - parent->entries) * 8;
-	if (parent->base.bo->shadow)
-		p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags);
-	p->func(p, parent->base.bo, pde, dst, 1, 0, flags);
+	/* This intentionally wraps around if no bit is set */
+	*frag = min((unsigned)ffs(start) - 1, (unsigned)fls64(end - start) - 1);
+	if (*frag >= max_frag) {
+		*frag = max_frag;
+		*frag_end = end & ~((1ULL << max_frag) - 1);
+	} else {
+		*frag_end = start + (1 << *frag);
+	}
 }
 
 /**
@@ -1234,112 +1603,105 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
  * 0 for success, -EINVAL for failure.
  */
 static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
-				  uint64_t start, uint64_t end,
-				  uint64_t dst, uint64_t flags)
+				 uint64_t start, uint64_t end,
+				 uint64_t dst, uint64_t flags)
 {
 	struct amdgpu_device *adev = params->adev;
-	const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
+	struct amdgpu_vm_pt_cursor cursor;
+	uint64_t frag_start = start, frag_end;
+	unsigned int frag;
 
-	uint64_t addr, pe_start;
-	struct amdgpu_bo *pt;
-	unsigned nptes;
+	/* figure out the initial fragment */
+	amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);
 
-	/* walk over the address space and update the page tables */
-	for (addr = start; addr < end; addr += nptes,
-	     dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
-		struct amdgpu_vm_pt *entry, *parent;
+	/* walk over the address space and update the PTs */
+	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
+	while (cursor.pfn < end) {
+		struct amdgpu_bo *pt = cursor.entry->base.bo;
+		unsigned shift, parent_shift, mask;
+		uint64_t incr, entry_end, pe_start;
 
-		amdgpu_vm_get_entry(params, addr, &entry, &parent);
-		if (!entry)
+		if (!pt)
 			return -ENOENT;
 
-		if ((addr & ~mask) == (end & ~mask))
-			nptes = end - addr;
-		else
-			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
-
-		amdgpu_vm_handle_huge_pages(params, entry, parent,
-					    nptes, dst, flags);
-		/* We don't need to update PTEs for huge pages */
-		if (entry->huge)
+		/* The root level can't be a huge page */
+		if (cursor.level == adev->vm_manager.root_level) {
+			if (!amdgpu_vm_pt_descendant(adev, &cursor))
+				return -ENOENT;
 			continue;
+		}
 
-		pt = entry->base.bo;
-		pe_start = (addr & mask) * 8;
-		if (pt->shadow)
-			params->func(params, pt->shadow, pe_start, dst, nptes,
-				     AMDGPU_GPU_PAGE_SIZE, flags);
-		params->func(params, pt, pe_start, dst, nptes,
-			     AMDGPU_GPU_PAGE_SIZE, flags);
-	}
-
-	return 0;
-}
+		/* First check if the entry is already handled */
+		if (cursor.pfn < frag_start) {
+			cursor.entry->huge = true;
+			amdgpu_vm_pt_next(adev, &cursor);
+			continue;
+		}
 
-/*
- * amdgpu_vm_frag_ptes - add fragment information to PTEs
- *
- * @params: see amdgpu_pte_update_params definition
- * @vm: requested vm
- * @start: first PTE to handle
- * @end: last PTE to handle
- * @dst: addr those PTEs should point to
- * @flags: hw mapping flags
- *
- * Returns:
- * 0 for success, -EINVAL for failure.
- */
-static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
-				uint64_t start, uint64_t end,
-				uint64_t dst, uint64_t flags)
-{
-	/**
-	 * The MC L1 TLB supports variable sized pages, based on a fragment
-	 * field in the PTE. When this field is set to a non-zero value, page
-	 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
-	 * flags are considered valid for all PTEs within the fragment range
-	 * and corresponding mappings are assumed to be physically contiguous.
-	 *
-	 * The L1 TLB can store a single PTE for the whole fragment,
-	 * significantly increasing the space available for translation
-	 * caching. This leads to large improvements in throughput when the
-	 * TLB is under pressure.
-	 *
-	 * The L2 TLB distributes small and large fragments into two
-	 * asymmetric partitions. The large fragment cache is significantly
-	 * larger. Thus, we try to use large fragments wherever possible.
-	 * Userspace can support this by aligning virtual base address and
-	 * allocation size to the fragment size.
-	 */
-	unsigned max_frag = params->adev->vm_manager.fragment_size;
-	int r;
+		/* If it isn't already handled it can't be a huge page */
+		if (cursor.entry->huge) {
+			/* Add the entry to the relocated list to update it. */
+			cursor.entry->huge = false;
+			amdgpu_vm_bo_relocated(&cursor.entry->base);
+		}
 
-	/* system pages are non continuously */
-	if (params->src || !(flags & AMDGPU_PTE_VALID))
-		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
-
-	while (start != end) {
-		uint64_t frag_flags, frag_end;
-		unsigned frag;
-
-		/* This intentionally wraps around if no bit is set */
-		frag = min((unsigned)ffs(start) - 1,
-			   (unsigned)fls64(end - start) - 1);
-		if (frag >= max_frag) {
-			frag_flags = AMDGPU_PTE_FRAG(max_frag);
-			frag_end = end & ~((1ULL << max_frag) - 1);
-		} else {
-			frag_flags = AMDGPU_PTE_FRAG(frag);
-			frag_end = start + (1 << frag);
+		shift = amdgpu_vm_level_shift(adev, cursor.level);
+		parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
+		if (adev->asic_type < CHIP_VEGA10) {
+			/* No huge page support before GMC v9 */
+			if (cursor.level != AMDGPU_VM_PTB) {
+				if (!amdgpu_vm_pt_descendant(adev, &cursor))
+					return -ENOENT;
+				continue;
+			}
+		} else if (frag < shift) {
+			/* We can't use this level when the fragment size is
+			 * smaller than the address shift. Go to the next
+			 * child entry and try again.
+			 */
+			if (!amdgpu_vm_pt_descendant(adev, &cursor))
+				return -ENOENT;
+			continue;
+		} else if (frag >= parent_shift) {
+			/* If the fragment size is even larger than the parent
+			 * shift we should go up one level and check it again.
+			 */
+			if (!amdgpu_vm_pt_ancestor(&cursor))
+				return -ENOENT;
+			continue;
 		}
 
-		r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
-					  flags | frag_flags);
-		if (r)
-			return r;
+		/* Looks good so far, calculate parameters for the update */
+		incr = AMDGPU_GPU_PAGE_SIZE << shift;
+		mask = amdgpu_vm_entries_mask(adev, cursor.level);
+		pe_start = ((cursor.pfn >> shift) & mask) * 8;
+		entry_end = (mask + 1) << shift;
+		entry_end += cursor.pfn & ~(entry_end - 1);
+		entry_end = min(entry_end, end);
+
+		do {
+			uint64_t upd_end = min(entry_end, frag_end);
+			unsigned nptes = (upd_end - frag_start) >> shift;
+
+			amdgpu_vm_update_huge(params, pt, cursor.level,
+					      pe_start, dst, nptes, incr,
+					      flags | AMDGPU_PTE_FRAG(frag));
+
+			pe_start += nptes * 8;
+			dst += nptes * AMDGPU_GPU_PAGE_SIZE << shift;
+
+			frag_start = upd_end;
+			if (frag_start >= frag_end) {
+				/* figure out the next fragment */
+				amdgpu_vm_fragment(params, frag_start, end,
+						   flags, &frag, &frag_end);
+				if (frag < shift)
+					break;
+			}
+		} while (frag_start < entry_end);
 
-		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
-		start = frag_end;
+		if (frag >= shift)
+			amdgpu_vm_pt_next(adev, &cursor);
 	}
 
 	return 0;
@@ -1401,8 +1763,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 		params.func = amdgpu_vm_cpu_set_ptes;
 		params.pages_addr = pages_addr;
-		return amdgpu_vm_frag_ptes(&params, start, last + 1,
-					   addr, flags);
+		return amdgpu_vm_update_ptes(&params, start, last + 1,
+					     addr, flags);
 	}
 
 	ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);
@@ -1481,7 +1843,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		goto error_free;
 
-	r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
+	r = amdgpu_vm_update_ptes(&params, start, last + 1, addr, flags);
 	if (r)
 		goto error_free;
 
@@ -1696,10 +2058,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 		amdgpu_asic_flush_hdp(adev, NULL);
 	}
 
-	spin_lock(&vm->moved_lock);
-	list_del_init(&bo_va->base.vm_status);
-	spin_unlock(&vm->moved_lock);
-
 	/* If the BO is not in its preferred location add it back to
 	 * the evicted list so that it gets validated again on the
 	 * next command submission.
@@ -1708,9 +2066,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 		uint32_t mem_type = bo->tbo.mem.mem_type;
 
 		if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
-			list_add_tail(&bo_va->base.vm_status, &vm->evicted);
+			amdgpu_vm_bo_evicted(&bo_va->base);
 		else
-			list_add(&bo_va->base.vm_status, &vm->idle);
+			amdgpu_vm_bo_idle(&bo_va->base);
+	} else {
+		amdgpu_vm_bo_done(&bo_va->base);
 	}
 
 	list_splice_init(&bo_va->invalids, &bo_va->valids);
@@ -1895,7 +2255,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			struct amdgpu_bo_va_mapping, list);
 		list_del(&mapping->list);
 
-		if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START)
+		if (vm->pte_support_ats &&
+		    mapping->start < AMDGPU_GMC_HOLE_START)
 			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
 
 		r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
@@ -1936,40 +2297,40 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 			   struct amdgpu_vm *vm)
 {
 	struct amdgpu_bo_va *bo_va, *tmp;
-	struct list_head moved;
+	struct reservation_object *resv;
 	bool clear;
 	int r;
 
-	INIT_LIST_HEAD(&moved);
-	spin_lock(&vm->moved_lock);
-	list_splice_init(&vm->moved, &moved);
-	spin_unlock(&vm->moved_lock);
+	list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
+		/* Per VM BOs never need to bo cleared in the page tables */
+		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		if (r)
+			return r;
+	}
 
-	list_for_each_entry_safe(bo_va, tmp, &moved, base.vm_status) {
-		struct reservation_object *resv = bo_va->base.bo->tbo.resv;
+	spin_lock(&vm->invalidated_lock);
+	while (!list_empty(&vm->invalidated)) {
+		bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
+					 base.vm_status);
+		resv = bo_va->base.bo->tbo.resv;
+		spin_unlock(&vm->invalidated_lock);
 
-		/* Per VM BOs never need to bo cleared in the page tables */
-		if (resv == vm->root.base.bo->tbo.resv)
-			clear = false;
 		/* Try to reserve the BO to avoid clearing its ptes */
-		else if (!amdgpu_vm_debug && reservation_object_trylock(resv))
+		if (!amdgpu_vm_debug && reservation_object_trylock(resv))
 			clear = false;
 		/* Somebody else is using the BO right now */
 		else
 			clear = true;
 
 		r = amdgpu_vm_bo_update(adev, bo_va, clear);
-		if (r) {
-			spin_lock(&vm->moved_lock);
-			list_splice(&moved, &vm->moved);
-			spin_unlock(&vm->moved_lock);
+		if (r)
 			return r;
-		}
 
-		if (!clear && resv != vm->root.base.bo->tbo.resv)
+		if (!clear)
 			reservation_object_unlock(resv);
-
+		spin_lock(&vm->invalidated_lock);
 	}
+	spin_unlock(&vm->invalidated_lock);
 
 	return 0;
 }
@@ -2034,9 +2395,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
 
 	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv &&
 	    !bo_va->base.moved) {
-		spin_lock(&vm->moved_lock);
 		list_move(&bo_va->base.vm_status, &vm->moved);
-		spin_unlock(&vm->moved_lock);
 	}
 	trace_amdgpu_vm_bo_map(bo_va, mapping);
 }
@@ -2388,13 +2747,27 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 		      struct amdgpu_bo_va *bo_va)
 {
 	struct amdgpu_bo_va_mapping *mapping, *next;
+	struct amdgpu_bo *bo = bo_va->base.bo;
 	struct amdgpu_vm *vm = bo_va->base.vm;
+	struct amdgpu_vm_bo_base **base;
+
+	if (bo) {
+		if (bo->tbo.resv == vm->root.base.bo->tbo.resv)
+			vm->bulk_moveable = false;
+
+		for (base = &bo_va->base.bo->vm_bo; *base;
+		     base = &(*base)->next) {
+			if (*base != &bo_va->base)
+				continue;
 
-	list_del(&bo_va->base.bo_list);
+			*base = bo_va->base.next;
+			break;
+		}
+	}
 
-	spin_lock(&vm->moved_lock);
+	spin_lock(&vm->invalidated_lock);
 	list_del(&bo_va->base.vm_status);
-	spin_unlock(&vm->moved_lock);
+	spin_unlock(&vm->invalidated_lock);
 
 	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
 		list_del(&mapping->list);
@@ -2432,30 +2805,24 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 	if (bo->parent && bo->parent->shadow == bo)
 		bo = bo->parent;
 
-	list_for_each_entry(bo_base, &bo->va, bo_list) {
+	for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
 		struct amdgpu_vm *vm = bo_base->vm;
-		bool was_moved = bo_base->moved;
 
-		bo_base->moved = true;
 		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
-			if (bo->tbo.type == ttm_bo_type_kernel)
-				list_move(&bo_base->vm_status, &vm->evicted);
-			else
-				list_move_tail(&bo_base->vm_status,
-					       &vm->evicted);
+			amdgpu_vm_bo_evicted(bo_base);
 			continue;
 		}
 
-		if (was_moved)
+		if (bo_base->moved)
 			continue;
+		bo_base->moved = true;
 
-		if (bo->tbo.type == ttm_bo_type_kernel) {
-			list_move(&bo_base->vm_status, &vm->relocated);
-		} else {
-			spin_lock(&bo_base->vm->moved_lock);
-			list_move(&bo_base->vm_status, &vm->moved);
-			spin_unlock(&bo_base->vm->moved_lock);
-		}
+		if (bo->tbo.type == ttm_bo_type_kernel)
+			amdgpu_vm_bo_relocated(bo_base);
+		else if (bo->tbo.resv == vm->root.base.bo->tbo.resv)
+			amdgpu_vm_bo_moved(bo_base);
+		else
+			amdgpu_vm_bo_invalidated(bo_base);
 	}
 }
 
@@ -2574,6 +2941,22 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
 		 adev->vm_manager.fragment_size);
 }
 
+static struct amdgpu_retryfault_hashtable *init_fault_hash(void)
+{
+	struct amdgpu_retryfault_hashtable *fault_hash;
+
+	fault_hash = kmalloc(sizeof(*fault_hash), GFP_KERNEL);
+	if (!fault_hash)
+		return fault_hash;
+
+	INIT_CHASH_TABLE(fault_hash->hash,
+			AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
+	spin_lock_init(&fault_hash->lock);
+	fault_hash->count = 0;
+
+	return fault_hash;
+}
+
 /**
  * amdgpu_vm_init - initialize a vm instance
  *
@@ -2592,13 +2975,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 {
 	struct amdgpu_bo_param bp;
 	struct amdgpu_bo *root;
-	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
-		AMDGPU_VM_PTE_COUNT(adev) * 8);
-	unsigned ring_instance;
-	struct amdgpu_ring *ring;
-	struct drm_sched_rq *rq;
-	unsigned long size;
-	uint64_t flags;
 	int r, i;
 
 	vm->va = RB_ROOT_CACHED;
@@ -2606,18 +2982,15 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		vm->reserved_vmid[i] = NULL;
 	INIT_LIST_HEAD(&vm->evicted);
 	INIT_LIST_HEAD(&vm->relocated);
-	spin_lock_init(&vm->moved_lock);
 	INIT_LIST_HEAD(&vm->moved);
 	INIT_LIST_HEAD(&vm->idle);
+	INIT_LIST_HEAD(&vm->invalidated);
+	spin_lock_init(&vm->invalidated_lock);
 	INIT_LIST_HEAD(&vm->freed);
 
 	/* create scheduler entity for page table updates */
-
-	ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
-	ring_instance %= adev->vm_manager.vm_pte_num_rings;
-	ring = adev->vm_manager.vm_pte_rings[ring_instance];
-	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
-	r = drm_sched_entity_init(&vm->entity, &rq, 1, NULL);
+	r = drm_sched_entity_init(&vm->entity, adev->vm_manager.vm_pte_rqs,
+				  adev->vm_manager.vm_pte_num_rqs, NULL);
 	if (r)
 		return r;
 
@@ -2639,20 +3012,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		  "CPU update of VM recommended only for large BAR system\n");
 	vm->last_update = NULL;
 
-	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
-	if (vm->use_cpu_for_update)
-		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
-	else if (vm_context != AMDGPU_VM_CONTEXT_COMPUTE)
-		flags |= AMDGPU_GEM_CREATE_SHADOW;
-
-	size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
-	memset(&bp, 0, sizeof(bp));
-	bp.size = size;
-	bp.byte_align = align;
-	bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
-	bp.flags = flags;
-	bp.type = ttm_bo_type_kernel;
-	bp.resv = NULL;
+	amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, &bp);
+	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
+		bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW;
 	r = amdgpu_bo_create(adev, &bp, &root);
 	if (r)
 		goto error_free_sched_entity;
@@ -2683,6 +3045,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		vm->pasid = pasid;
 	}
 
+	vm->fault_hash = init_fault_hash();
+	if (!vm->fault_hash) {
+		r = -ENOMEM;
+		goto error_free_root;
+	}
+
 	INIT_KFIFO(vm->faults);
 	vm->fault_credit = 16;
 
@@ -2722,7 +3090,7 @@ error_free_sched_entity:
  * Returns:
  * 0 for success, -errno for errors.
  */
-int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid)
 {
 	bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
 	int r;
@@ -2734,7 +3102,20 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	/* Sanity checks */
 	if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
 		r = -EINVAL;
-		goto error;
+		goto unreserve_bo;
+	}
+
+	if (pasid) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
+		r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
+			      GFP_ATOMIC);
+		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+
+		if (r == -ENOSPC)
+			goto unreserve_bo;
+		r = 0;
 	}
 
 	/* Check if PD needs to be reinitialized and do it before
@@ -2745,7 +3126,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 			       adev->vm_manager.root_level,
 			       pte_support_ats);
 		if (r)
-			goto error;
+			goto free_idr;
 	}
 
 	/* Update VM state */
@@ -2764,45 +3145,52 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
 		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
 
+		/* Free the original amdgpu allocated pasid
+		 * Will be replaced with kfd allocated pasid
+		 */
+		amdgpu_pasid_free(vm->pasid);
 		vm->pasid = 0;
 	}
 
 	/* Free the shadow bo for compute VM */
 	amdgpu_bo_unref(&vm->root.base.bo->shadow);
 
-error:
+	if (pasid)
+		vm->pasid = pasid;
+
+	goto unreserve_bo;
+
+free_idr:
+	if (pasid) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
+		idr_remove(&adev->vm_manager.pasid_idr, pasid);
+		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+	}
+unreserve_bo:
 	amdgpu_bo_unreserve(vm->root.base.bo);
 	return r;
 }
 
 /**
- * amdgpu_vm_free_levels - free PD/PT levels
- *
- * @adev: amdgpu device structure
- * @parent: PD/PT starting level to free
- * @level: level of parent structure
+ * amdgpu_vm_release_compute - release a compute vm
+ * @adev: amdgpu_device pointer
+ * @vm: a vm turned into compute vm by calling amdgpu_vm_make_compute
  *
- * Free the page directory or page table level and all sub levels.
+ * This is a correspondant of amdgpu_vm_make_compute. It decouples compute
+ * pasid from vm. Compute should stop use of vm after this call.
  */
-static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
-				  struct amdgpu_vm_pt *parent,
-				  unsigned level)
+void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
-	unsigned i, num_entries = amdgpu_vm_num_entries(adev, level);
+	if (vm->pasid) {
+		unsigned long flags;
 
-	if (parent->base.bo) {
-		list_del(&parent->base.bo_list);
-		list_del(&parent->base.vm_status);
-		amdgpu_bo_unref(&parent->base.bo->shadow);
-		amdgpu_bo_unref(&parent->base.bo);
+		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
+		idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
+		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
 	}
-
-	if (parent->entries)
-		for (i = 0; i < num_entries; i++)
-			amdgpu_vm_free_levels(adev, &parent->entries[i],
-					      level + 1);
-
-	kvfree(parent->entries);
+	vm->pasid = 0;
 }
 
 /**
@@ -2826,7 +3214,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
 	/* Clear pending page faults from IH when the VM is destroyed */
 	while (kfifo_get(&vm->faults, &fault))
-		amdgpu_ih_clear_fault(adev, fault);
+		amdgpu_vm_clear_fault(vm->fault_hash, fault);
 
 	if (vm->pasid) {
 		unsigned long flags;
@@ -2836,6 +3224,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
 	}
 
+	kfree(vm->fault_hash);
+	vm->fault_hash = NULL;
+
 	drm_sched_entity_destroy(&vm->entity);
 
 	if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
@@ -2862,8 +3253,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	if (r) {
 		dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
 	} else {
-		amdgpu_vm_free_levels(adev, &vm->root,
-				      adev->vm_manager.root_level);
+		amdgpu_vm_free_pts(adev, vm);
 		amdgpu_bo_unreserve(root);
 	}
 	amdgpu_bo_unref(&root);
@@ -2926,7 +3316,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 		adev->vm_manager.seqno[i] = 0;
 
-	atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
 	spin_lock_init(&adev->vm_manager.prt_lock);
 	atomic_set(&adev->vm_manager.num_prt_users, 0);
 
@@ -3002,7 +3391,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 /**
  * amdgpu_vm_get_task_info - Extracts task info for a PASID.
  *
- * @dev: drm device pointer
+ * @adev: drm device pointer
  * @pasid: PASID identifier for VM
  * @task_info: task_info to fill.
  */
@@ -3037,3 +3426,78 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
 		}
 	}
 }
+
+/**
+ * amdgpu_vm_add_fault - Add a page fault record to fault hash table
+ *
+ * @fault_hash: fault hash table
+ * @key: 64-bit encoding of PASID and address
+ *
+ * This should be called when a retry page fault interrupt is
+ * received. If this is a new page fault, it will be added to a hash
+ * table. The return value indicates whether this is a new fault, or
+ * a fault that was already known and is already being handled.
+ *
+ * If there are too many pending page faults, this will fail. Retry
+ * interrupts should be ignored in this case until there is enough
+ * free space.
+ *
+ * Returns 0 if the fault was added, 1 if the fault was already known,
+ * -ENOSPC if there are too many pending faults.
+ */
+int amdgpu_vm_add_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key)
+{
+	unsigned long flags;
+	int r = -ENOSPC;
+
+	if (WARN_ON_ONCE(!fault_hash))
+		/* Should be allocated in amdgpu_vm_init
+		 */
+		return r;
+
+	spin_lock_irqsave(&fault_hash->lock, flags);
+
+	/* Only let the hash table fill up to 50% for best performance */
+	if (fault_hash->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1)))
+		goto unlock_out;
+
+	r = chash_table_copy_in(&fault_hash->hash, key, NULL);
+	if (!r)
+		fault_hash->count++;
+
+	/* chash_table_copy_in should never fail unless we're losing count */
+	WARN_ON_ONCE(r < 0);
+
+unlock_out:
+	spin_unlock_irqrestore(&fault_hash->lock, flags);
+	return r;
+}
+
+/**
+ * amdgpu_vm_clear_fault - Remove a page fault record
+ *
+ * @fault_hash: fault hash table
+ * @key: 64-bit encoding of PASID and address
+ *
+ * This should be called when a page fault has been handled. Any
+ * future interrupt with this key will be processed as a new
+ * page fault.
+ */
+void amdgpu_vm_clear_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key)
+{
+	unsigned long flags;
+	int r;
+
+	if (!fault_hash)
+		return;
+
+	spin_lock_irqsave(&fault_hash->lock, flags);
+
+	r = chash_table_remove(&fault_hash->hash, key, NULL);
+	if (!WARN_ON_ONCE(r < 0)) {
+		fault_hash->count--;
+		WARN_ON_ONCE(fault_hash->count < 0);
+	}
+
+	spin_unlock_irqrestore(&fault_hash->lock, flags);
+}

+ 59 - 25
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

@@ -29,6 +29,8 @@
 #include <linux/rbtree.h>
 #include <drm/gpu_scheduler.h>
 #include <drm/drm_file.h>
+#include <drm/ttm/ttm_bo_driver.h>
+#include <linux/chash.h>
 
 #include "amdgpu_sync.h"
 #include "amdgpu_ring.h"
@@ -48,9 +50,6 @@ struct amdgpu_bo_list_entry;
 /* number of entries in page table */
 #define AMDGPU_VM_PTE_COUNT(adev) (1 << (adev)->vm_manager.block_size)
 
-/* PTBs (Page Table Blocks) need to be aligned to 32K */
-#define AMDGPU_VM_PTB_ALIGN_SIZE   32768
-
 #define AMDGPU_PTE_VALID	(1ULL << 0)
 #define AMDGPU_PTE_SYSTEM	(1ULL << 1)
 #define AMDGPU_PTE_SNOOPED	(1ULL << 2)
@@ -103,19 +102,6 @@ struct amdgpu_bo_list_entry;
 /* hardcode that limit for now */
 #define AMDGPU_VA_RESERVED_SIZE			(1ULL << 20)
 
-/* VA hole for 48bit addresses on Vega10 */
-#define AMDGPU_VA_HOLE_START			0x0000800000000000ULL
-#define AMDGPU_VA_HOLE_END			0xffff800000000000ULL
-
-/*
- * Hardware is programmed as if the hole doesn't exists with start and end
- * address values.
- *
- * This mask is used to remove the upper 16bits of the VA and so come up with
- * the linear addr value.
- */
-#define AMDGPU_VA_HOLE_MASK			0x0000ffffffffffffULL
-
 /* max vmids dedicated for process */
 #define AMDGPU_VM_MAX_RESERVED_VMID	1
 
@@ -143,7 +129,7 @@ struct amdgpu_vm_bo_base {
 	struct amdgpu_bo		*bo;
 
 	/* protected by bo being reserved */
-	struct list_head		bo_list;
+	struct amdgpu_vm_bo_base	*next;
 
 	/* protected by spinlock */
 	struct list_head		vm_status;
@@ -160,6 +146,27 @@ struct amdgpu_vm_pt {
 	struct amdgpu_vm_pt		*entries;
 };
 
+/* provided by hw blocks that can write ptes, e.g., sdma */
+struct amdgpu_vm_pte_funcs {
+	/* number of dw to reserve per operation */
+	unsigned	copy_pte_num_dw;
+
+	/* copy pte entries from GART */
+	void (*copy_pte)(struct amdgpu_ib *ib,
+			 uint64_t pe, uint64_t src,
+			 unsigned count);
+
+	/* write pte one entry at a time with addr mapping */
+	void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
+			  uint64_t value, unsigned count,
+			  uint32_t incr);
+	/* for linear pte/pde updates without addr mapping */
+	void (*set_pte_pde)(struct amdgpu_ib *ib,
+			    uint64_t pe,
+			    uint64_t addr, unsigned count,
+			    uint32_t incr, uint64_t flags);
+};
+
 #define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr))
 #define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48)
 #define AMDGPU_VM_FAULT_ADDR(fault)  ((u64)(fault) & 0xfffffffff000ULL)
@@ -172,6 +179,13 @@ struct amdgpu_task_info {
 	pid_t	tgid;
 };
 
+#define AMDGPU_PAGEFAULT_HASH_BITS 8
+struct amdgpu_retryfault_hashtable {
+	DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
+	spinlock_t	lock;
+	int		count;
+};
+
 struct amdgpu_vm {
 	/* tree of virtual addresses mapped */
 	struct rb_root_cached	va;
@@ -182,13 +196,16 @@ struct amdgpu_vm {
 	/* PT BOs which relocated and their parent need an update */
 	struct list_head	relocated;
 
-	/* BOs moved, but not yet updated in the PT */
+	/* per VM BOs moved, but not yet updated in the PT */
 	struct list_head	moved;
-	spinlock_t		moved_lock;
 
 	/* All BOs of this VM not currently in the state machine */
 	struct list_head	idle;
 
+	/* regular invalidated BOs, but not yet updated in the PT */
+	struct list_head	invalidated;
+	spinlock_t		invalidated_lock;
+
 	/* BO mappings freed, but not yet updated in the PT */
 	struct list_head	freed;
 
@@ -226,6 +243,12 @@ struct amdgpu_vm {
 
 	/* Some basic info about the task */
 	struct amdgpu_task_info task_info;
+
+	/* Store positions of group of BOs */
+	struct ttm_lru_bulk_move lru_bulk_move;
+	/* mark whether can do the bulk move */
+	bool			bulk_moveable;
+	struct amdgpu_retryfault_hashtable *fault_hash;
 };
 
 struct amdgpu_vm_manager {
@@ -244,10 +267,9 @@ struct amdgpu_vm_manager {
 	/* vram base address for page table entry  */
 	u64					vram_base_offset;
 	/* vm pte handling */
-	const struct amdgpu_vm_pte_funcs        *vm_pte_funcs;
-	struct amdgpu_ring                      *vm_pte_rings[AMDGPU_MAX_RINGS];
-	unsigned				vm_pte_num_rings;
-	atomic_t				vm_pte_next_ring;
+	const struct amdgpu_vm_pte_funcs	*vm_pte_funcs;
+	struct drm_sched_rq			*vm_pte_rqs[AMDGPU_MAX_RINGS];
+	unsigned				vm_pte_num_rqs;
 
 	/* partial resident texture handling */
 	spinlock_t				prt_lock;
@@ -266,11 +288,16 @@ struct amdgpu_vm_manager {
 	spinlock_t				pasid_lock;
 };
 
+#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
+#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
+#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
+
 void amdgpu_vm_manager_init(struct amdgpu_device *adev);
 void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		   int vm_context, unsigned int pasid);
-int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid);
+void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
 				  unsigned int pasid);
@@ -330,8 +357,15 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
 
 void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
-			 struct amdgpu_task_info *task_info);
+			     struct amdgpu_task_info *task_info);
 
 void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
 
+void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm);
+
+int amdgpu_vm_add_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key);
+
+void amdgpu_vm_clear_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key);
+
 #endif

+ 40 - 12
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

@@ -124,6 +124,28 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
 	return usage;
 }
 
+/**
+ * amdgpu_vram_mgr_virt_start - update virtual start address
+ *
+ * @mem: ttm_mem_reg to update
+ * @node: just allocated node
+ *
+ * Calculate a virtual BO start address to easily check if everything is CPU
+ * accessible.
+ */
+static void amdgpu_vram_mgr_virt_start(struct ttm_mem_reg *mem,
+				       struct drm_mm_node *node)
+{
+	unsigned long start;
+
+	start = node->start + node->size;
+	if (start > mem->num_pages)
+		start -= mem->num_pages;
+	else
+		start = 0;
+	mem->start = max(mem->start, start);
+}
+
 /**
  * amdgpu_vram_mgr_new - allocate new ranges
  *
@@ -176,10 +198,25 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 	pages_left = mem->num_pages;
 
 	spin_lock(&mgr->lock);
-	for (i = 0; i < num_nodes; ++i) {
+	for (i = 0; pages_left >= pages_per_node; ++i) {
+		unsigned long pages = rounddown_pow_of_two(pages_left);
+
+		r = drm_mm_insert_node_in_range(mm, &nodes[i], pages,
+						pages_per_node, 0,
+						place->fpfn, lpfn,
+						mode);
+		if (unlikely(r))
+			break;
+
+		usage += nodes[i].size << PAGE_SHIFT;
+		vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
+		amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
+		pages_left -= pages;
+	}
+
+	for (; pages_left; ++i) {
 		unsigned long pages = min(pages_left, pages_per_node);
 		uint32_t alignment = mem->page_alignment;
-		unsigned long start;
 
 		if (pages == pages_per_node)
 			alignment = pages_per_node;
@@ -193,16 +230,7 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 
 		usage += nodes[i].size << PAGE_SHIFT;
 		vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
-
-		/* Calculate a virtual BO start address to easily check if
-		 * everything is CPU accessible.
-		 */
-		start = nodes[i].start + nodes[i].size;
-		if (start > mem->num_pages)
-			start -= mem->num_pages;
-		else
-			start = 0;
-		mem->start = max(mem->start, start);
+		amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
 		pages_left -= pages;
 	}
 	spin_unlock(&mgr->lock);

+ 119 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c

@@ -0,0 +1,119 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#include <linux/list.h>
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+
+
+static DEFINE_MUTEX(xgmi_mutex);
+
+#define AMDGPU_MAX_XGMI_HIVE			8
+#define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE		4
+
+struct amdgpu_hive_info {
+	uint64_t		hive_id;
+	struct list_head	device_list;
+};
+
+static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE];
+static unsigned hive_count = 0;
+
+static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
+{
+	int i;
+	struct amdgpu_hive_info *tmp;
+
+	if (!adev->gmc.xgmi.hive_id)
+		return NULL;
+	for (i = 0 ; i < hive_count; ++i) {
+		tmp = &xgmi_hives[i];
+		if (tmp->hive_id == adev->gmc.xgmi.hive_id)
+			return tmp;
+	}
+	if (i >= AMDGPU_MAX_XGMI_HIVE)
+		return NULL;
+
+	/* initialize new hive if not exist */
+	tmp = &xgmi_hives[hive_count++];
+	tmp->hive_id = adev->gmc.xgmi.hive_id;
+	INIT_LIST_HEAD(&tmp->device_list);
+	return tmp;
+}
+
+int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
+{
+	struct psp_xgmi_topology_info tmp_topology[AMDGPU_MAX_XGMI_DEVICE_PER_HIVE];
+	struct amdgpu_hive_info *hive;
+	struct amdgpu_xgmi	*entry;
+	struct amdgpu_device 	*tmp_adev;
+
+	int count = 0, ret = -EINVAL;
+
+	if ((adev->asic_type < CHIP_VEGA20) ||
+		(adev->flags & AMD_IS_APU) )
+		return 0;
+	adev->gmc.xgmi.device_id = psp_xgmi_get_device_id(&adev->psp);
+	adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp);
+
+	memset(&tmp_topology[0], 0, sizeof(tmp_topology));
+	mutex_lock(&xgmi_mutex);
+	hive = amdgpu_get_xgmi_hive(adev);
+	if (!hive)
+		goto exit;
+
+	list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
+	list_for_each_entry(entry, &hive->device_list, head)
+		tmp_topology[count++].device_id = entry->device_id;
+
+	ret = psp_xgmi_get_topology_info(&adev->psp, count, tmp_topology);
+	if (ret) {
+		dev_err(adev->dev,
+			"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
+			adev->gmc.xgmi.device_id,
+			adev->gmc.xgmi.hive_id, ret);
+		goto exit;
+	}
+	/* Each psp need to set the latest topology */
+	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+		ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology);
+		if (ret) {
+			dev_err(tmp_adev->dev,
+				"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
+				tmp_adev->gmc.xgmi.device_id,
+				tmp_adev->gmc.xgmi.hive_id, ret);
+			/* To do : continue with some  node failed or disable the  whole  hive */
+			break;
+		}
+	}
+	if (!ret)
+		dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
+			adev->gmc.xgmi.physical_node_id,
+			adev->gmc.xgmi.hive_id);
+
+exit:
+	mutex_unlock(&xgmi_mutex);
+	return ret;
+}
+
+

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/atombios_encoders.c

@@ -28,6 +28,7 @@
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
 #include "atom.h"
 #include "atombios_encoders.h"
 #include "atombios_dp.h"

+ 2 - 2
drivers/gpu/drm/amd/amdgpu/ci_dpm.c

@@ -6277,12 +6277,12 @@ static int ci_dpm_sw_init(void *handle)
 	int ret;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 230,
+	ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 230,
 				&adev->pm.dpm.thermal.irq);
 	if (ret)
 		return ret;
 
-	ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 231,
+	ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 231,
 				&adev->pm.dpm.thermal.irq);
 	if (ret)
 		return ret;

+ 9 - 8
drivers/gpu/drm/amd/amdgpu/cik.c

@@ -2002,6 +2002,8 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
 		if (amdgpu_dpm == -1)
 			amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
 		else
@@ -2014,8 +2016,6 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
 #endif
 		else
 			amdgpu_device_ip_block_add(adev, &dce_v8_2_ip_block);
-		amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
-		amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
 		amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
 		amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
 		break;
@@ -2023,6 +2023,8 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v7_3_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
 		if (amdgpu_dpm == -1)
 			amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
 		else
@@ -2035,8 +2037,6 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
 #endif
 		else
 			amdgpu_device_ip_block_add(adev, &dce_v8_5_ip_block);
-		amdgpu_device_ip_block_add(adev, &gfx_v7_3_ip_block);
-		amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
 		amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
 		amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
 		break;
@@ -2044,6 +2044,8 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v7_1_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
 		amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
 		if (adev->enable_virtual_display)
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -2053,8 +2055,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
 #endif
 		else
 			amdgpu_device_ip_block_add(adev, &dce_v8_1_ip_block);
-		amdgpu_device_ip_block_add(adev, &gfx_v7_1_ip_block);
-		amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
+
 		amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
 		amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
 		break;
@@ -2063,6 +2064,8 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
 		amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
 		if (adev->enable_virtual_display)
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -2072,8 +2075,6 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
 #endif
 		else
 			amdgpu_device_ip_block_add(adev, &dce_v8_3_ip_block);
-		amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
-		amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
 		amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
 		amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
 		break;

+ 4 - 5
drivers/gpu/drm/amd/amdgpu/cik_ih.c

@@ -276,7 +276,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev,
 	dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
 	dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
 
-	entry->client_id = AMDGPU_IH_CLIENTID_LEGACY;
+	entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
 	entry->src_id = dw[0] & 0xff;
 	entry->src_data[0] = dw[1] & 0xfffffff;
 	entry->ring_id = dw[2] & 0xff;
@@ -318,7 +318,7 @@ static int cik_ih_sw_init(void *handle)
 	int r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	r = amdgpu_ih_ring_init(adev, 64 * 1024, false);
+	r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
 	if (r)
 		return r;
 
@@ -332,7 +332,7 @@ static int cik_ih_sw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	amdgpu_irq_fini(adev);
-	amdgpu_ih_ring_fini(adev);
+	amdgpu_ih_ring_fini(adev, &adev->irq.ih);
 	amdgpu_irq_remove_domain(adev);
 
 	return 0;
@@ -468,8 +468,7 @@ static const struct amdgpu_ih_funcs cik_ih_funcs = {
 
 static void cik_ih_set_interrupt_funcs(struct amdgpu_device *adev)
 {
-	if (adev->irq.ih_funcs == NULL)
-		adev->irq.ih_funcs = &cik_ih_funcs;
+	adev->irq.ih_funcs = &cik_ih_funcs;
 }
 
 const struct amdgpu_ip_block_version cik_ih_ip_block =

+ 12 - 14
drivers/gpu/drm/amd/amdgpu/cik_sdma.c

@@ -970,19 +970,19 @@ static int cik_sdma_sw_init(void *handle)
 	}
 
 	/* SDMA trap event */
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224,
+	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224,
 			      &adev->sdma.trap_irq);
 	if (r)
 		return r;
 
 	/* SDMA Privileged inst */
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241,
+	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241,
 			      &adev->sdma.illegal_inst_irq);
 	if (r)
 		return r;
 
 	/* SDMA Privileged inst */
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 247,
+	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 247,
 			      &adev->sdma.illegal_inst_irq);
 	if (r)
 		return r;
@@ -1370,10 +1370,8 @@ static const struct amdgpu_buffer_funcs cik_sdma_buffer_funcs = {
 
 static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev)
 {
-	if (adev->mman.buffer_funcs == NULL) {
-		adev->mman.buffer_funcs = &cik_sdma_buffer_funcs;
-		adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
-	}
+	adev->mman.buffer_funcs = &cik_sdma_buffer_funcs;
+	adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
 }
 
 static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
@@ -1386,16 +1384,16 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
 
 static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev)
 {
+	struct drm_gpu_scheduler *sched;
 	unsigned i;
 
-	if (adev->vm_manager.vm_pte_funcs == NULL) {
-		adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
-		for (i = 0; i < adev->sdma.num_instances; i++)
-			adev->vm_manager.vm_pte_rings[i] =
-				&adev->sdma.instance[i].ring;
-
-		adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
+	adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		sched = &adev->sdma.instance[i].ring.sched;
+		adev->vm_manager.vm_pte_rqs[i] =
+			&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
 	}
+	adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
 }
 
 const struct amdgpu_ip_block_version cik_sdma_ip_block =

+ 4 - 5
drivers/gpu/drm/amd/amdgpu/cz_ih.c

@@ -255,7 +255,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev,
 	dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
 	dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
 
-	entry->client_id = AMDGPU_IH_CLIENTID_LEGACY;
+	entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
 	entry->src_id = dw[0] & 0xff;
 	entry->src_data[0] = dw[1] & 0xfffffff;
 	entry->ring_id = dw[2] & 0xff;
@@ -297,7 +297,7 @@ static int cz_ih_sw_init(void *handle)
 	int r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	r = amdgpu_ih_ring_init(adev, 64 * 1024, false);
+	r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
 	if (r)
 		return r;
 
@@ -311,7 +311,7 @@ static int cz_ih_sw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	amdgpu_irq_fini(adev);
-	amdgpu_ih_ring_fini(adev);
+	amdgpu_ih_ring_fini(adev, &adev->irq.ih);
 	amdgpu_irq_remove_domain(adev);
 
 	return 0;
@@ -449,8 +449,7 @@ static const struct amdgpu_ih_funcs cz_ih_funcs = {
 
 static void cz_ih_set_interrupt_funcs(struct amdgpu_device *adev)
 {
-	if (adev->irq.ih_funcs == NULL)
-		adev->irq.ih_funcs = &cz_ih_funcs;
+	adev->irq.ih_funcs = &cz_ih_funcs;
 }
 
 const struct amdgpu_ip_block_version cz_ih_ip_block =

+ 16 - 5
drivers/gpu/drm/amd/amdgpu/dce_v10_0.c

@@ -31,6 +31,7 @@
 #include "atombios_encoders.h"
 #include "amdgpu_pll.h"
 #include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
 #include "dce_v10_0.h"
 
 #include "dce/dce_10_0_d.h"
@@ -1942,6 +1943,17 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
 		/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
 		bypass_lut = true;
 		break;
+	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_ABGR8888:
+		fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
+		fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
+		fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_RED_CROSSBAR, 2);
+		fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_BLUE_CROSSBAR, 2);
+#ifdef __BIG_ENDIAN
+		fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+					ENDIAN_8IN32);
+#endif
+		break;
 	default:
 		DRM_ERROR("Unsupported screen format %s\n",
 		          drm_get_format_name(target_fb->format->format, &format_name));
@@ -2734,19 +2746,19 @@ static int dce_v10_0_sw_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
-		r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
+		r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
 		if (r)
 			return r;
 	}
 
 	for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) {
-		r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq);
+		r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq);
 		if (r)
 			return r;
 	}
 
 	/* HPD hotplug */
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
+	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
 	if (r)
 		return r;
 
@@ -3558,8 +3570,7 @@ static const struct amdgpu_display_funcs dce_v10_0_display_funcs = {
 
 static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev)
 {
-	if (adev->mode_info.funcs == NULL)
-		adev->mode_info.funcs = &dce_v10_0_display_funcs;
+	adev->mode_info.funcs = &dce_v10_0_display_funcs;
 }
 
 static const struct amdgpu_irq_src_funcs dce_v10_0_crtc_irq_funcs = {

+ 16 - 5
drivers/gpu/drm/amd/amdgpu/dce_v11_0.c

@@ -31,6 +31,7 @@
 #include "atombios_encoders.h"
 #include "amdgpu_pll.h"
 #include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
 #include "dce_v11_0.h"
 
 #include "dce/dce_11_0_d.h"
@@ -1984,6 +1985,17 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
 		/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
 		bypass_lut = true;
 		break;
+	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_ABGR8888:
+		fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
+		fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
+		fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_RED_CROSSBAR, 2);
+		fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_BLUE_CROSSBAR, 2);
+#ifdef __BIG_ENDIAN
+		fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+					ENDIAN_8IN32);
+#endif
+		break;
 	default:
 		DRM_ERROR("Unsupported screen format %s\n",
 		          drm_get_format_name(target_fb->format->format, &format_name));
@@ -2855,19 +2867,19 @@ static int dce_v11_0_sw_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
-		r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
+		r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
 		if (r)
 			return r;
 	}
 
 	for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) {
-		r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq);
+		r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq);
 		if (r)
 			return r;
 	}
 
 	/* HPD hotplug */
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
+	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
 	if (r)
 		return r;
 
@@ -3690,8 +3702,7 @@ static const struct amdgpu_display_funcs dce_v11_0_display_funcs = {
 
 static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev)
 {
-	if (adev->mode_info.funcs == NULL)
-		adev->mode_info.funcs = &dce_v11_0_display_funcs;
+	adev->mode_info.funcs = &dce_v11_0_display_funcs;
 }
 
 static const struct amdgpu_irq_src_funcs dce_v11_0_crtc_irq_funcs = {

部分文件因为文件数量过多而无法显示