Эх сурвалжийг харах

Merge branch 'drm-next-4.13' of git://people.freedesktop.org/~agd5f/linux into drm-next

Fixes for 4.13:
- Various fixes for Raven
- Various fixes for Vega10
- Stability fixes for KIQ
- Fix reloading the driver
- Fix S3 on vega10
- Misc other fixes

* 'drm-next-4.13' of git://people.freedesktop.org/~agd5f/linux: (26 commits)
  drm/amd/powerplay: fix bug fail to remove sysfs when rmmod amdgpu.
  amdgpu: Set cik/si_support to 1 by default if radeon isn't built
  drm/amdgpu/gfx9: fix driver reload with KIQ
  drm/amdgpu/gfx8: fix driver reload with KIQ
  drm/amdgpu: Don't call amd_powerplay_destroy() if we don't have powerplay
  drm/ttm: Fix use-after-free in ttm_bo_clean_mm
  drm/amd/amdgpu: move get memory type function from early init to sw init
  drm/amdgpu/cgs: always set reference clock in mode_info
  drm/amdgpu: fix vblank_time when displays are off
  drm/amd/powerplay: power value format change for Vega10
  drm/amdgpu/gfx9: support the amdgpu.disable_cu option
  drm/amd/powerplay: change PPSMC_MSG_GetCurrPkgPwr for Vega10
  drm/amdgpu: Make amdgpu_cs_parser_init static (v2)
  drm/amdgpu/cs: fix a typo in a comment
  drm/amdgpu: Fix the exported always on CU bitmap
  drm/amdgpu/gfx9: gfx_v9_0_enable_gfx_static_mg_power_gating() can be static
  drm/amdgpu/psp: upper_32_bits/lower_32_bits for address setup
  drm/amd/powerplay/cz: print message if smc message fails
  drm/amdgpu: fix typo in amdgpu_debugfs_test_ib_init
  drm/amdgpu: enable mmhub pg on raven
  ...
Dave Airlie 8 жил өмнө
parent
commit
97eaf75338
28 өөрчлөгдсөн 687 нэмэгдсэн , 197 устгасан
  1. 5 3
      drivers/gpu/drm/amd/amdgpu/amdgpu.h
  2. 6 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
  3. 2 2
      drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
  4. 1 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
  5. 16 1
      drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
  6. 2 0
      drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
  7. 5 3
      drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
  8. 9 10
      drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
  9. 3 1
      drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
  10. 3 1
      drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
  11. 21 79
      drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
  12. 26 55
      drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
  13. 8 8
      drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
  14. 9 0
      drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
  15. 218 0
      drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
  16. 3 0
      drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
  17. 6 6
      drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
  18. 6 6
      drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
  19. 2 1
      drivers/gpu/drm/amd/amdgpu/soc15.c
  20. 1 0
      drivers/gpu/drm/amd/include/amd_shared.h
  21. 268 0
      drivers/gpu/drm/amd/include/vi_structs.h
  22. 16 7
      drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
  23. 9 0
      drivers/gpu/drm/amd/powerplay/hwmgr/vega10_pptable.h
  24. 33 9
      drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
  25. 2 2
      drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h
  26. 1 1
      drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
  27. 3 1
      drivers/gpu/drm/ttm/ttm_bo.c
  28. 3 0
      include/uapi/drm/amdgpu_drm.h

+ 5 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu.h

@@ -1028,12 +1028,15 @@ struct amdgpu_gfx_config {
 };
 
 struct amdgpu_cu_info {
-	uint32_t number; /* total active CU number */
-	uint32_t ao_cu_mask;
 	uint32_t max_waves_per_simd;
 	uint32_t wave_front_size;
 	uint32_t max_scratch_slots_per_cu;
 	uint32_t lds_size;
+
+	/* total active CU number */
+	uint32_t number;
+	uint32_t ao_cu_mask;
+	uint32_t ao_cu_bitmap[4][4];
 	uint32_t bitmap[4][4];
 };
 
@@ -1924,7 +1927,6 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev);
 bool amdgpu_need_post(struct amdgpu_device *adev);
 void amdgpu_update_display_priority(struct amdgpu_device *adev);
 
-int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);

+ 6 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c

@@ -838,6 +838,12 @@ static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
 		return -EINVAL;
 
 	mode_info = info->mode_info;
+	if (mode_info) {
+		/* if the displays are off, vblank time is max */
+		mode_info->vblank_time_us = 0xffffffff;
+		/* always set the reference clock */
+		mode_info->ref_clock = adev->clock.spll.reference_freq;
+	}
 
 	if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
 		list_for_each_entry(crtc,

+ 2 - 2
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

@@ -64,7 +64,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	return 0;
 }
 
-int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
+static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;
@@ -497,7 +497,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 				 &e->user_invalidated) && e->user_pages) {
 
 				/* We acquired a page array, but somebody
-				 * invalidated it. Free it an try again
+				 * invalidated it. Free it and try again
 				 */
 				release_pages(e->user_pages,
 					      e->robj->tbo.ttm->num_pages,

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

@@ -3804,7 +3804,7 @@ int amdgpu_debugfs_init(struct drm_minor *minor)
 	return 0;
 }
 #else
-static int amdgpu_debugfs_test_ib_init(struct amdgpu_device *adev)
+static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev)
 {
 	return 0;
 }

+ 16 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

@@ -67,9 +67,10 @@
  * - 3.15.0 - Export more gpu info for gfx9
  * - 3.16.0 - Add reserved vmid support
  * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS.
+ * - 3.18.0 - Export gpu always on cu bitmap
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	17
+#define KMS_DRIVER_MINOR	18
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
@@ -247,14 +248,28 @@ MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 =
 module_param_named(lbpw, amdgpu_lbpw, int, 0444);
 
 #ifdef CONFIG_DRM_AMDGPU_SI
+
+#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
 int amdgpu_si_support = 0;
 MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))");
+#else
+int amdgpu_si_support = 1;
+MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)");
+#endif
+
 module_param_named(si_support, amdgpu_si_support, int, 0444);
 #endif
 
 #ifdef CONFIG_DRM_AMDGPU_CIK
+
+#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
 int amdgpu_cik_support = 0;
 MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))");
+#else
+int amdgpu_cik_support = 1;
+MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)");
+#endif
+
 module_param_named(cik_support, amdgpu_cik_support, int, 0444);
 #endif
 

+ 2 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

@@ -594,6 +594,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		dev_info.cu_active_number = adev->gfx.cu_info.number;
 		dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
 		dev_info.ce_ram_size = adev->gfx.ce_ram_size;
+		memcpy(&dev_info.cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0],
+		       sizeof(adev->gfx.cu_info.ao_cu_bitmap));
 		memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],
 		       sizeof(adev->gfx.cu_info.bitmap));
 		dev_info.vram_type = adev->mc.vram_type;

+ 5 - 3
drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c

@@ -188,6 +188,9 @@ static int amdgpu_pp_hw_fini(void *handle)
 	int ret = 0;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (adev->pp_enabled && adev->pm.dpm_enabled)
+		amdgpu_pm_sysfs_fini(adev);
+
 	if (adev->powerplay.ip_funcs->hw_fini)
 		ret = adev->powerplay.ip_funcs->hw_fini(
 					adev->powerplay.pp_handle);
@@ -206,10 +209,9 @@ static void amdgpu_pp_late_fini(void *handle)
 		adev->powerplay.ip_funcs->late_fini(
 			  adev->powerplay.pp_handle);
 
-	if (adev->pp_enabled && adev->pm.dpm_enabled)
-		amdgpu_pm_sysfs_fini(adev);
 
-	amd_powerplay_destroy(adev->powerplay.pp_handle);
+	if (adev->pp_enabled)
+		amd_powerplay_destroy(adev->powerplay.pp_handle);
 }
 
 static int amdgpu_pp_suspend(void *handle)

+ 9 - 10
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c

@@ -152,8 +152,8 @@ static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd,
 				 uint64_t tmr_mc, uint32_t size)
 {
 	cmd->cmd_id = GFX_CMD_ID_SETUP_TMR;
-	cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = (uint32_t)tmr_mc;
-	cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = (uint32_t)(tmr_mc >> 32);
+	cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc);
+	cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc);
 	cmd->cmd.cmd_setup_tmr.buf_size = size;
 }
 
@@ -333,14 +333,11 @@ static int psp_load_fw(struct amdgpu_device *adev)
 {
 	int ret;
 	struct psp_context *psp = &adev->psp;
-	struct psp_gfx_cmd_resp *cmd;
 
-	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
-	if (!cmd)
+	psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+	if (!psp->cmd)
 		return -ENOMEM;
 
-	psp->cmd = cmd;
-
 	ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
 				      AMDGPU_GEM_DOMAIN_GTT,
 				      &psp->fw_pri_bo,
@@ -379,8 +376,6 @@ static int psp_load_fw(struct amdgpu_device *adev)
 	if (ret)
 		goto failed_mem;
 
-	kfree(cmd);
-
 	return 0;
 
 failed_mem:
@@ -390,7 +385,8 @@ failed_mem1:
 	amdgpu_bo_free_kernel(&psp->fw_pri_bo,
 			      &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
 failed:
-	kfree(cmd);
+	kfree(psp->cmd);
+	psp->cmd = NULL;
 	return ret;
 }
 
@@ -450,6 +446,9 @@ static int psp_hw_fini(void *handle)
 		amdgpu_bo_free_kernel(&psp->fence_buf_bo,
 				      &psp->fence_buf_mc_addr, &psp->fence_buf);
 
+	kfree(psp->cmd);
+	psp->cmd = NULL;
+
 	return 0;
 }
 

+ 3 - 1
drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c

@@ -3535,7 +3535,9 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
 				mask <<= 1;
 			}
 			active_cu_number += counter;
-			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+			if (i < 2 && j < 2)
+				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
 		}
 	}
 

+ 3 - 1
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c

@@ -5427,7 +5427,9 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
 				mask <<= 1;
 			}
 			active_cu_number += counter;
-			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+			if (i < 2 && j < 2)
+				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
 		}
 	}
 	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);

+ 21 - 79
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

@@ -40,7 +40,6 @@
 
 #include "bif/bif_5_0_d.h"
 #include "bif/bif_5_0_sh_mask.h"
-
 #include "gca/gfx_8_0_d.h"
 #include "gca/gfx_8_0_enum.h"
 #include "gca/gfx_8_0_sh_mask.h"
@@ -2100,7 +2099,7 @@ static int gfx_v8_0_sw_init(void *handle)
 		return r;
 
 	/* create MQD for all compute queues as well as KIQ for SRIOV case */
-	r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd));
+	r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
 	if (r)
 		return r;
 
@@ -4637,56 +4636,6 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
 	return r;
 }
 
-static int gfx_v8_0_kiq_kcq_disable(struct amdgpu_device *adev)
-{
-	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
-	uint32_t scratch, tmp = 0;
-	int r, i;
-
-	r = amdgpu_gfx_scratch_get(adev, &scratch);
-	if (r) {
-		DRM_ERROR("Failed to get scratch reg (%d).\n", r);
-		return r;
-	}
-	WREG32(scratch, 0xCAFEDEAD);
-
-	r = amdgpu_ring_alloc(kiq_ring, 6 + 3);
-	if (r) {
-		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
-		amdgpu_gfx_scratch_free(adev, scratch);
-		return r;
-	}
-	/* unmap queues */
-	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
-	amdgpu_ring_write(kiq_ring,
-			  PACKET3_UNMAP_QUEUES_ACTION(1)| /* RESET_QUEUES */
-			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(2)); /* select all queues */
-	amdgpu_ring_write(kiq_ring, 0);
-	amdgpu_ring_write(kiq_ring, 0);
-	amdgpu_ring_write(kiq_ring, 0);
-	amdgpu_ring_write(kiq_ring, 0);
-	/* write to scratch for completion */
-	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
-	amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
-	amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
-	amdgpu_ring_commit(kiq_ring);
-
-	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32(scratch);
-		if (tmp == 0xDEADBEEF)
-			break;
-		DRM_UDELAY(1);
-	}
-	if (i >= adev->usec_timeout) {
-		DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n",
-			  scratch, tmp);
-		r = -EINVAL;
-	}
-	amdgpu_gfx_scratch_free(adev, scratch);
-
-	return r;
-}
-
 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
 {
 	int i, r = 0;
@@ -4715,9 +4664,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
 	uint32_t tmp;
 
-	/* init the mqd struct */
-	memset(mqd, 0, sizeof(struct vi_mqd));
-
 	mqd->header = 0xC0310800;
 	mqd->compute_pipelinestat_enable = 0x00000001;
 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
@@ -4725,7 +4671,12 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
 	mqd->compute_misc_reserved = 0x00000003;
-
+	if (!(adev->flags & AMD_IS_APU)) {
+		mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
+					     + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
+		mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
+					     + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
+	}
 	eop_base_addr = ring->eop_gpu_addr >> 8;
 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
@@ -4890,7 +4841,6 @@ int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
 
 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
 {
-	int r = 0;
 	struct amdgpu_device *adev = ring->adev;
 	struct vi_mqd *mqd = ring->mqd_ptr;
 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
@@ -4900,44 +4850,32 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
 	if (adev->gfx.in_reset) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
-			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
 
 		/* reset ring buffer */
 		ring->wptr = 0;
 		amdgpu_ring_clear_ring(ring);
 		mutex_lock(&adev->srbm_mutex);
 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-		r = gfx_v8_0_deactivate_hqd(adev, 1);
-		if (r) {
-			dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name);
-			goto out_unlock;
-		}
 		gfx_v8_0_mqd_commit(adev, mqd);
 		vi_srbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
 	} else {
+		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
+		((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
+		((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
 		mutex_lock(&adev->srbm_mutex);
 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 		gfx_v8_0_mqd_init(ring);
-		r = gfx_v8_0_deactivate_hqd(adev, 1);
-		if (r) {
-			dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name);
-			goto out_unlock;
-		}
 		gfx_v8_0_mqd_commit(adev, mqd);
 		vi_srbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
 
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
-			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
 	}
 
-	return r;
-
-out_unlock:
-	vi_srbm_select(adev, 0, 0, 0, 0);
-	mutex_unlock(&adev->srbm_mutex);
-	return r;
+	return 0;
 }
 
 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
@@ -4947,6 +4885,9 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
 
 	if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
+		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
+		((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
+		((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
 		mutex_lock(&adev->srbm_mutex);
 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 		gfx_v8_0_mqd_init(ring);
@@ -4954,11 +4895,11 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
 		mutex_unlock(&adev->srbm_mutex);
 
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
-			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
 	} else if (adev->gfx.in_reset) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
-			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
 		/* reset ring buffer */
 		ring->wptr = 0;
 		amdgpu_ring_clear_ring(ring);
@@ -5138,7 +5079,6 @@ static int gfx_v8_0_hw_fini(void *handle)
 		pr_debug("For SRIOV client, shouldn't do anything.\n");
 		return 0;
 	}
-	gfx_v8_0_kiq_kcq_disable(adev);
 	gfx_v8_0_cp_enable(adev, false);
 	gfx_v8_0_rlc_stop(adev);
 
@@ -7080,7 +7020,9 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
 				mask <<= 1;
 			}
 			active_cu_number += counter;
-			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+			if (i < 2 && j < 2)
+				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
 		}
 	}
 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);

+ 26 - 55
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

@@ -1964,8 +1964,8 @@ static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
 }
 
-void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
-						bool enable)
+static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
+						       bool enable)
 {
 	uint32_t data, default_data;
 
@@ -1978,7 +1978,7 @@ void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
 }
 
-void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
+static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
 						bool enable)
 {
 	uint32_t data, default_data;
@@ -2502,56 +2502,6 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
 	return r;
 }
 
-static int gfx_v9_0_kiq_kcq_disable(struct amdgpu_device *adev)
-{
-	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
-	uint32_t scratch, tmp = 0;
-	int r, i;
-
-	r = amdgpu_gfx_scratch_get(adev, &scratch);
-	if (r) {
-		DRM_ERROR("Failed to get scratch reg (%d).\n", r);
-		return r;
-	}
-	WREG32(scratch, 0xCAFEDEAD);
-
-	r = amdgpu_ring_alloc(kiq_ring, 6 + 3);
-	if (r) {
-		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
-		amdgpu_gfx_scratch_free(adev, scratch);
-		return r;
-	}
-	/* unmap queues */
-	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
-	amdgpu_ring_write(kiq_ring,
-			  PACKET3_UNMAP_QUEUES_ACTION(1)| /* RESET_QUEUES */
-			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(2)); /* select all queues */
-	amdgpu_ring_write(kiq_ring, 0);
-	amdgpu_ring_write(kiq_ring, 0);
-	amdgpu_ring_write(kiq_ring, 0);
-	amdgpu_ring_write(kiq_ring, 0);
-	/* write to scratch for completion */
-	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
-	amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
-	amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
-	amdgpu_ring_commit(kiq_ring);
-
-	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32(scratch);
-		if (tmp == 0xDEADBEEF)
-			break;
-		DRM_UDELAY(1);
-	}
-	if (i >= adev->usec_timeout) {
-		DRM_ERROR("KCQ disable failed (scratch(0x%04X)=0x%08X)\n",
-			  scratch, tmp);
-		r = -EINVAL;
-	}
-	amdgpu_gfx_scratch_free(adev, scratch);
-
-	return r;
-}
-
 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -2996,7 +2946,6 @@ static int gfx_v9_0_hw_fini(void *handle)
 		pr_debug("For SRIOV client, shouldn't do anything.\n");
 		return 0;
 	}
-	gfx_v9_0_kiq_kcq_disable(adev);
 	gfx_v9_0_cp_enable(adev, false);
 	gfx_v9_0_rlc_stop(adev);
 
@@ -4416,6 +4365,20 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
 	}
 }
 
+static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
+						 u32 bitmap)
+{
+	u32 data;
+
+	if (!bitmap)
+		return;
+
+	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+
+	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
 {
 	u32 data, mask;
@@ -4436,10 +4399,13 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 {
 	int i, j, k, counter, active_cu_number = 0;
 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
+	unsigned disable_masks[4 * 2];
 
 	if (!adev || !cu_info)
 		return -EINVAL;
 
+	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
@@ -4447,6 +4413,9 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 			ao_bitmap = 0;
 			counter = 0;
 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
+			if (i < 4 && j < 2)
+				gfx_v9_0_set_user_cu_inactive_bitmap(
+					adev, disable_masks[i * 2 + j]);
 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
 			cu_info->bitmap[i][j] = bitmap;
 
@@ -4459,7 +4428,9 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 				mask <<= 1;
 			}
 			active_cu_number += counter;
-			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+			if (i < 2 && j < 2)
+				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
 		}
 	}
 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);

+ 8 - 8
drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c

@@ -794,14 +794,6 @@ static int gmc_v6_0_early_init(void *handle)
 	gmc_v6_0_set_gart_funcs(adev);
 	gmc_v6_0_set_irq_funcs(adev);
 
-	if (adev->flags & AMD_IS_APU) {
-		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
-	} else {
-		u32 tmp = RREG32(mmMC_SEQ_MISC0);
-		tmp &= MC_SEQ_MISC0__MT__MASK;
-		adev->mc.vram_type = gmc_v6_0_convert_vram_type(tmp);
-	}
-
 	return 0;
 }
 
@@ -821,6 +813,14 @@ static int gmc_v6_0_sw_init(void *handle)
 	int dma_bits;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (adev->flags & AMD_IS_APU) {
+		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+	} else {
+		u32 tmp = RREG32(mmMC_SEQ_MISC0);
+		tmp &= MC_SEQ_MISC0__MT__MASK;
+		adev->mc.vram_type = gmc_v6_0_convert_vram_type(tmp);
+	}
+
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault);
 	if (r)
 		return r;

+ 9 - 0
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

@@ -695,6 +695,15 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
 	else
 		nbio_v6_1_hdp_flush(adev);
 
+	switch (adev->asic_type) {
+	case CHIP_RAVEN:
+		mmhub_v1_0_initialize_power_gating(adev);
+		mmhub_v1_0_update_power_gating(adev, true);
+		break;
+	default:
+		break;
+	}
+
 	r = gfxhub_v1_0_gart_enable(adev);
 	if (r)
 		return r;

+ 218 - 0
drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c

@@ -244,6 +244,224 @@ static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev)
 	}
 }
 
+struct pctl_data {
+    uint32_t index;
+    uint32_t data;
+};
+
+const struct pctl_data pctl0_data[] = {
+    {0x0, 0x7a640},
+    {0x9, 0x2a64a},
+    {0xd, 0x2a680},
+    {0x11, 0x6a684},
+    {0x19, 0xea68e},
+    {0x29, 0xa69e},
+    {0x2b, 0x34a6c0},
+    {0x61, 0x83a707},
+    {0xe6, 0x8a7a4},
+    {0xf0, 0x1a7b8},
+    {0xf3, 0xfa7cc},
+    {0x104, 0x17a7dd},
+    {0x11d, 0xa7dc},
+    {0x11f, 0x12a7f5},
+    {0x133, 0xa808},
+    {0x135, 0x12a810},
+    {0x149, 0x7a82c}
+};
+#define PCTL0_DATA_LEN (sizeof(pctl0_data)/sizeof(pctl0_data[0]))
+
+#define PCTL0_RENG_EXEC_END_PTR 0x151
+#define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE  0xa640
+#define PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa833
+
+const struct pctl_data pctl1_data[] = {
+    {0x0, 0x39a000},
+    {0x3b, 0x44a040},
+    {0x81, 0x2a08d},
+    {0x85, 0x6ba094},
+    {0xf2, 0x18a100},
+    {0x10c, 0x4a132},
+    {0x112, 0xca141},
+    {0x120, 0x2fa158},
+    {0x151, 0x17a1d0},
+    {0x16a, 0x1a1e9},
+    {0x16d, 0x13a1ec},
+    {0x182, 0x7a201},
+    {0x18b, 0x3a20a},
+    {0x190, 0x7a580},
+    {0x199, 0xa590},
+    {0x19b, 0x4a594},
+    {0x1a1, 0x1a59c},
+    {0x1a4, 0x7a82c},
+    {0x1ad, 0xfa7cc},
+    {0x1be, 0x17a7dd},
+    {0x1d7, 0x12a810}
+};
+#define PCTL1_DATA_LEN (sizeof(pctl1_data)/sizeof(pctl1_data[0]))
+
+#define PCTL1_RENG_EXEC_END_PTR 0x1ea
+#define PCTL1_STCTRL_REG_SAVE_RANGE0_BASE  0xa000
+#define PCTL1_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa20d
+#define PCTL1_STCTRL_REG_SAVE_RANGE1_BASE  0xa580
+#define PCTL1_STCTRL_REG_SAVE_RANGE1_LIMIT 0xa59d
+#define PCTL1_STCTRL_REG_SAVE_RANGE2_BASE  0xa82c
+#define PCTL1_STCTRL_REG_SAVE_RANGE2_LIMIT 0xa833
+
+static void mmhub_v1_0_power_gating_write_save_ranges(struct amdgpu_device *adev)
+{
+	uint32_t tmp = 0;
+
+	/* PCTL0_STCTRL_REGISTER_SAVE_RANGE0 */
+	tmp = REG_SET_FIELD(tmp, PCTL0_STCTRL_REGISTER_SAVE_RANGE0,
+			STCTRL_REGISTER_SAVE_BASE,
+			PCTL0_STCTRL_REG_SAVE_RANGE0_BASE);
+	tmp = REG_SET_FIELD(tmp, PCTL0_STCTRL_REGISTER_SAVE_RANGE0,
+			STCTRL_REGISTER_SAVE_LIMIT,
+			PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT);
+	WREG32_SOC15(MMHUB, 0, mmPCTL0_STCTRL_REGISTER_SAVE_RANGE0, tmp);
+
+	/* PCTL1_STCTRL_REGISTER_SAVE_RANGE0 */
+	tmp = 0;
+	tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE0,
+			STCTRL_REGISTER_SAVE_BASE,
+			PCTL1_STCTRL_REG_SAVE_RANGE0_BASE);
+	tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE0,
+			STCTRL_REGISTER_SAVE_LIMIT,
+			PCTL1_STCTRL_REG_SAVE_RANGE0_LIMIT);
+	WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE0, tmp);
+
+	/* PCTL1_STCTRL_REGISTER_SAVE_RANGE1 */
+	tmp = 0;
+	tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE1,
+			STCTRL_REGISTER_SAVE_BASE,
+			PCTL1_STCTRL_REG_SAVE_RANGE1_BASE);
+	tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE1,
+			STCTRL_REGISTER_SAVE_LIMIT,
+			PCTL1_STCTRL_REG_SAVE_RANGE1_LIMIT);
+	WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE1, tmp);
+
+	/* PCTL1_STCTRL_REGISTER_SAVE_RANGE2 */
+	tmp = 0;
+	tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE2,
+			STCTRL_REGISTER_SAVE_BASE,
+			PCTL1_STCTRL_REG_SAVE_RANGE2_BASE);
+	tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE2,
+			STCTRL_REGISTER_SAVE_LIMIT,
+			PCTL1_STCTRL_REG_SAVE_RANGE2_LIMIT);
+	WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE2, tmp);
+}
+
+void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev)
+{
+	uint32_t pctl0_misc = 0;
+	uint32_t pctl0_reng_execute = 0;
+	uint32_t pctl1_misc = 0;
+	uint32_t pctl1_reng_execute = 0;
+	int i = 0;
+
+	if (amdgpu_sriov_vf(adev))
+		return;
+
+	pctl0_misc = RREG32_SOC15(MMHUB, 0, mmPCTL0_MISC);
+	pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE);
+	pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC);
+	pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE);
+
+	/* Light sleep must be disabled before writing to pctl0 registers */
+	pctl0_misc &= ~PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
+	WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc);
+
+	/* Write data used to access ram of register engine */
+	for (i = 0; i < PCTL0_DATA_LEN; i++) {
+                WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_RAM_INDEX,
+			pctl0_data[i].index);
+                WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_RAM_DATA,
+			pctl0_data[i].data);
+        }
+
+	/* Set the reng execute end ptr for pctl0 */
+	pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
+					PCTL0_RENG_EXECUTE,
+					RENG_EXECUTE_END_PTR,
+					PCTL0_RENG_EXEC_END_PTR);
+	WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
+
+	/* Light sleep must be disabled before writing to pctl1 registers */
+	pctl1_misc &= ~PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
+	WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc);
+
+	/* Write data used to access ram of register engine */
+	for (i = 0; i < PCTL1_DATA_LEN; i++) {
+                WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_RAM_INDEX,
+			pctl1_data[i].index);
+                WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_RAM_DATA,
+			pctl1_data[i].data);
+        }
+
+	/* Set the reng execute end ptr for pctl1 */
+	pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
+					PCTL1_RENG_EXECUTE,
+					RENG_EXECUTE_END_PTR,
+					PCTL1_RENG_EXEC_END_PTR);
+	WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
+
+	mmhub_v1_0_power_gating_write_save_ranges(adev);
+
+	/* Re-enable light sleep */
+	pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
+	WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc);
+	pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
+	WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc);
+}
+
+void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
+				bool enable)
+{
+	uint32_t pctl0_reng_execute = 0;
+	uint32_t pctl1_reng_execute = 0;
+
+	if (amdgpu_sriov_vf(adev))
+		return;
+
+	pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE);
+	pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE);
+
+	if (enable && adev->pg_flags & AMD_PG_SUPPORT_MMHUB) {
+		pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
+						PCTL0_RENG_EXECUTE,
+						RENG_EXECUTE_ON_PWR_UP, 1);
+		pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
+						PCTL0_RENG_EXECUTE,
+						RENG_EXECUTE_ON_REG_UPDATE, 1);
+		WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
+
+		pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
+						PCTL1_RENG_EXECUTE,
+						RENG_EXECUTE_ON_PWR_UP, 1);
+		pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
+						PCTL1_RENG_EXECUTE,
+						RENG_EXECUTE_ON_REG_UPDATE, 1);
+		WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
+
+	} else {
+		pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
+						PCTL0_RENG_EXECUTE,
+						RENG_EXECUTE_ON_PWR_UP, 0);
+		pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
+						PCTL0_RENG_EXECUTE,
+						RENG_EXECUTE_ON_REG_UPDATE, 0);
+		WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
+
+		pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
+						PCTL1_RENG_EXECUTE,
+						RENG_EXECUTE_ON_PWR_UP, 0);
+		pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
+						PCTL1_RENG_EXECUTE,
+						RENG_EXECUTE_ON_REG_UPDATE, 0);
+		WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
+	}
+}
+
 int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
 {
 	if (amdgpu_sriov_vf(adev)) {

+ 3 - 0
drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h

@@ -32,6 +32,9 @@ void mmhub_v1_0_init(struct amdgpu_device *adev);
 int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
 			       enum amd_clockgating_state state);
 void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev);
+void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
+                                bool enable);
 
 extern const struct amd_ip_funcs mmhub_v1_0_ip_funcs;
 extern const struct amdgpu_ip_block_version mmhub_v1_0_ip_block;

+ 6 - 6
drivers/gpu/drm/amd/amdgpu/psp_v10_0.c

@@ -96,8 +96,8 @@ int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cm
 	header = (struct common_firmware_header *)ucode->fw;
 
 	cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW;
-	cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = (uint32_t)fw_mem_mc_addr;
-	cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = (uint32_t)((uint64_t)fw_mem_mc_addr >> 32);
+	cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr);
+	cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr);
 	cmd->cmd.cmd_load_ip_fw.fw_size = le32_to_cpu(header->ucode_size_bytes);
 
 	ret = psp_v10_0_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
@@ -172,10 +172,10 @@ int psp_v10_0_cmd_submit(struct psp_context *psp,
 		write_frame = ring->ring_mem + (psp_write_ptr_reg / (sizeof(struct psp_gfx_rb_frame) / 4));
 
 	/* Update KM RB frame */
-	write_frame->cmd_buf_addr_hi = (unsigned int)(cmd_buf_mc_addr >> 32);
-	write_frame->cmd_buf_addr_lo = (unsigned int)(cmd_buf_mc_addr);
-	write_frame->fence_addr_hi = (unsigned int)(fence_mc_addr >> 32);
-	write_frame->fence_addr_lo = (unsigned int)(fence_mc_addr);
+	write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
+	write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
+	write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
+	write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
 	write_frame->fence_value = index;
 
 	/* Update the write Pointer in DWORDs */

+ 6 - 6
drivers/gpu/drm/amd/amdgpu/psp_v3_1.c

@@ -254,8 +254,8 @@ int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd
 	memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp));
 
 	cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW;
-	cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = (uint32_t)fw_mem_mc_addr;
-	cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = (uint32_t)((uint64_t)fw_mem_mc_addr >> 32);
+	cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr);
+	cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr);
 	cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size;
 
 	ret = psp_v3_1_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
@@ -375,10 +375,10 @@ int psp_v3_1_cmd_submit(struct psp_context *psp,
 	memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
 
 	/* Update KM RB frame */
-	write_frame->cmd_buf_addr_hi = (unsigned int)(cmd_buf_mc_addr >> 32);
-	write_frame->cmd_buf_addr_lo = (unsigned int)(cmd_buf_mc_addr);
-	write_frame->fence_addr_hi = (unsigned int)(fence_mc_addr >> 32);
-	write_frame->fence_addr_lo = (unsigned int)(fence_mc_addr);
+	write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
+	write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
+	write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
+	write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
 	write_frame->fence_value = index;
 
 	/* Update the write Pointer in DWORDs */

+ 2 - 1
drivers/gpu/drm/amd/amdgpu/soc15.c

@@ -625,7 +625,8 @@ static int soc15_common_early_init(void *handle)
 			AMD_CG_SUPPORT_MC_LS |
 			AMD_CG_SUPPORT_SDMA_MGCG |
 			AMD_CG_SUPPORT_SDMA_LS;
-		adev->pg_flags = AMD_PG_SUPPORT_SDMA;
+		adev->pg_flags = AMD_PG_SUPPORT_SDMA |
+				 AMD_PG_SUPPORT_MMHUB;
 		adev->external_rev_id = 0x1;
 		break;
 	default:

+ 1 - 0
drivers/gpu/drm/amd/include/amd_shared.h

@@ -184,6 +184,7 @@ enum amd_fan_ctrl_mode {
 #define AMD_PG_SUPPORT_SAMU			(1 << 10)
 #define AMD_PG_SUPPORT_GFX_QUICK_MG		(1 << 11)
 #define AMD_PG_SUPPORT_GFX_PIPELINE		(1 << 12)
+#define AMD_PG_SUPPORT_MMHUB			(1 << 13)
 
 enum amd_pm_state_type {
 	/* not used for dpm */

+ 268 - 0
drivers/gpu/drm/amd/include/vi_structs.h

@@ -156,6 +156,274 @@ struct vi_sdma_mqd {
 };
 
 struct vi_mqd {
+	uint32_t header;
+	uint32_t compute_dispatch_initiator;
+	uint32_t compute_dim_x;
+	uint32_t compute_dim_y;
+	uint32_t compute_dim_z;
+	uint32_t compute_start_x;
+	uint32_t compute_start_y;
+	uint32_t compute_start_z;
+	uint32_t compute_num_thread_x;
+	uint32_t compute_num_thread_y;
+	uint32_t compute_num_thread_z;
+	uint32_t compute_pipelinestat_enable;
+	uint32_t compute_perfcount_enable;
+	uint32_t compute_pgm_lo;
+	uint32_t compute_pgm_hi;
+	uint32_t compute_tba_lo;
+	uint32_t compute_tba_hi;
+	uint32_t compute_tma_lo;
+	uint32_t compute_tma_hi;
+	uint32_t compute_pgm_rsrc1;
+	uint32_t compute_pgm_rsrc2;
+	uint32_t compute_vmid;
+	uint32_t compute_resource_limits;
+	uint32_t compute_static_thread_mgmt_se0;
+	uint32_t compute_static_thread_mgmt_se1;
+	uint32_t compute_tmpring_size;
+	uint32_t compute_static_thread_mgmt_se2;
+	uint32_t compute_static_thread_mgmt_se3;
+	uint32_t compute_restart_x;
+	uint32_t compute_restart_y;
+	uint32_t compute_restart_z;
+	uint32_t compute_thread_trace_enable;
+	uint32_t compute_misc_reserved;
+	uint32_t compute_dispatch_id;
+	uint32_t compute_threadgroup_id;
+	uint32_t compute_relaunch;
+	uint32_t compute_wave_restore_addr_lo;
+	uint32_t compute_wave_restore_addr_hi;
+	uint32_t compute_wave_restore_control;
+	uint32_t reserved9;
+	uint32_t reserved10;
+	uint32_t reserved11;
+	uint32_t reserved12;
+	uint32_t reserved13;
+	uint32_t reserved14;
+	uint32_t reserved15;
+	uint32_t reserved16;
+	uint32_t reserved17;
+	uint32_t reserved18;
+	uint32_t reserved19;
+	uint32_t reserved20;
+	uint32_t reserved21;
+	uint32_t reserved22;
+	uint32_t reserved23;
+	uint32_t reserved24;
+	uint32_t reserved25;
+	uint32_t reserved26;
+	uint32_t reserved27;
+	uint32_t reserved28;
+	uint32_t reserved29;
+	uint32_t reserved30;
+	uint32_t reserved31;
+	uint32_t reserved32;
+	uint32_t reserved33;
+	uint32_t reserved34;
+	uint32_t compute_user_data_0;
+	uint32_t compute_user_data_1;
+	uint32_t compute_user_data_2;
+	uint32_t compute_user_data_3;
+	uint32_t compute_user_data_4;
+	uint32_t compute_user_data_5;
+	uint32_t compute_user_data_6;
+	uint32_t compute_user_data_7;
+	uint32_t compute_user_data_8;
+	uint32_t compute_user_data_9;
+	uint32_t compute_user_data_10;
+	uint32_t compute_user_data_11;
+	uint32_t compute_user_data_12;
+	uint32_t compute_user_data_13;
+	uint32_t compute_user_data_14;
+	uint32_t compute_user_data_15;
+	uint32_t cp_compute_csinvoc_count_lo;
+	uint32_t cp_compute_csinvoc_count_hi;
+	uint32_t reserved35;
+	uint32_t reserved36;
+	uint32_t reserved37;
+	uint32_t cp_mqd_query_time_lo;
+	uint32_t cp_mqd_query_time_hi;
+	uint32_t cp_mqd_connect_start_time_lo;
+	uint32_t cp_mqd_connect_start_time_hi;
+	uint32_t cp_mqd_connect_end_time_lo;
+	uint32_t cp_mqd_connect_end_time_hi;
+	uint32_t cp_mqd_connect_end_wf_count;
+	uint32_t cp_mqd_connect_end_pq_rptr;
+	uint32_t cp_mqd_connect_endvi_sdma_mqd_pq_wptr;
+	uint32_t cp_mqd_connect_end_ib_rptr;
+	uint32_t reserved38;
+	uint32_t reserved39;
+	uint32_t cp_mqd_save_start_time_lo;
+	uint32_t cp_mqd_save_start_time_hi;
+	uint32_t cp_mqd_save_end_time_lo;
+	uint32_t cp_mqd_save_end_time_hi;
+	uint32_t cp_mqd_restore_start_time_lo;
+	uint32_t cp_mqd_restore_start_time_hi;
+	uint32_t cp_mqd_restore_end_time_lo;
+	uint32_t cp_mqd_restore_end_time_hi;
+	uint32_t disable_queue;
+	uint32_t reserved41;
+	uint32_t gds_cs_ctxsw_cnt0;
+	uint32_t gds_cs_ctxsw_cnt1;
+	uint32_t gds_cs_ctxsw_cnt2;
+	uint32_t gds_cs_ctxsw_cnt3;
+	uint32_t reserved42;
+	uint32_t reserved43;
+	uint32_t cp_pq_exe_status_lo;
+	uint32_t cp_pq_exe_status_hi;
+	uint32_t cp_packet_id_lo;
+	uint32_t cp_packet_id_hi;
+	uint32_t cp_packet_exe_status_lo;
+	uint32_t cp_packet_exe_status_hi;
+	uint32_t gds_save_base_addr_lo;
+	uint32_t gds_save_base_addr_hi;
+	uint32_t gds_save_mask_lo;
+	uint32_t gds_save_mask_hi;
+	uint32_t ctx_save_base_addr_lo;
+	uint32_t ctx_save_base_addr_hi;
+	uint32_t dynamic_cu_mask_addr_lo;
+	uint32_t dynamic_cu_mask_addr_hi;
+	uint32_t cp_mqd_base_addr_lo;
+	uint32_t cp_mqd_base_addr_hi;
+	uint32_t cp_hqd_active;
+	uint32_t cp_hqd_vmid;
+	uint32_t cp_hqd_persistent_state;
+	uint32_t cp_hqd_pipe_priority;
+	uint32_t cp_hqd_queue_priority;
+	uint32_t cp_hqd_quantum;
+	uint32_t cp_hqd_pq_base_lo;
+	uint32_t cp_hqd_pq_base_hi;
+	uint32_t cp_hqd_pq_rptr;
+	uint32_t cp_hqd_pq_rptr_report_addr_lo;
+	uint32_t cp_hqd_pq_rptr_report_addr_hi;
+	uint32_t cp_hqd_pq_wptr_poll_addr_lo;
+	uint32_t cp_hqd_pq_wptr_poll_addr_hi;
+	uint32_t cp_hqd_pq_doorbell_control;
+	uint32_t cp_hqd_pq_wptr;
+	uint32_t cp_hqd_pq_control;
+	uint32_t cp_hqd_ib_base_addr_lo;
+	uint32_t cp_hqd_ib_base_addr_hi;
+	uint32_t cp_hqd_ib_rptr;
+	uint32_t cp_hqd_ib_control;
+	uint32_t cp_hqd_iq_timer;
+	uint32_t cp_hqd_iq_rptr;
+	uint32_t cp_hqd_dequeue_request;
+	uint32_t cp_hqd_dma_offload;
+	uint32_t cp_hqd_sema_cmd;
+	uint32_t cp_hqd_msg_type;
+	uint32_t cp_hqd_atomic0_preop_lo;
+	uint32_t cp_hqd_atomic0_preop_hi;
+	uint32_t cp_hqd_atomic1_preop_lo;
+	uint32_t cp_hqd_atomic1_preop_hi;
+	uint32_t cp_hqd_hq_status0;
+	uint32_t cp_hqd_hq_control0;
+	uint32_t cp_mqd_control;
+	uint32_t cp_hqd_hq_status1;
+	uint32_t cp_hqd_hq_control1;
+	uint32_t cp_hqd_eop_base_addr_lo;
+	uint32_t cp_hqd_eop_base_addr_hi;
+	uint32_t cp_hqd_eop_control;
+	uint32_t cp_hqd_eop_rptr;
+	uint32_t cp_hqd_eop_wptr;
+	uint32_t cp_hqd_eop_done_events;
+	uint32_t cp_hqd_ctx_save_base_addr_lo;
+	uint32_t cp_hqd_ctx_save_base_addr_hi;
+	uint32_t cp_hqd_ctx_save_control;
+	uint32_t cp_hqd_cntl_stack_offset;
+	uint32_t cp_hqd_cntl_stack_size;
+	uint32_t cp_hqd_wg_state_offset;
+	uint32_t cp_hqd_ctx_save_size;
+	uint32_t cp_hqd_gds_resource_state;
+	uint32_t cp_hqd_error;
+	uint32_t cp_hqd_eop_wptr_mem;
+	uint32_t cp_hqd_eop_dones;
+	uint32_t reserved46;
+	uint32_t reserved47;
+	uint32_t reserved48;
+	uint32_t reserved49;
+	uint32_t reserved50;
+	uint32_t reserved51;
+	uint32_t reserved52;
+	uint32_t reserved53;
+	uint32_t reserved54;
+	uint32_t reserved55;
+	uint32_t iqtimer_pkt_header;
+	uint32_t iqtimer_pkt_dw0;
+	uint32_t iqtimer_pkt_dw1;
+	uint32_t iqtimer_pkt_dw2;
+	uint32_t iqtimer_pkt_dw3;
+	uint32_t iqtimer_pkt_dw4;
+	uint32_t iqtimer_pkt_dw5;
+	uint32_t iqtimer_pkt_dw6;
+	uint32_t iqtimer_pkt_dw7;
+	uint32_t iqtimer_pkt_dw8;
+	uint32_t iqtimer_pkt_dw9;
+	uint32_t iqtimer_pkt_dw10;
+	uint32_t iqtimer_pkt_dw11;
+	uint32_t iqtimer_pkt_dw12;
+	uint32_t iqtimer_pkt_dw13;
+	uint32_t iqtimer_pkt_dw14;
+	uint32_t iqtimer_pkt_dw15;
+	uint32_t iqtimer_pkt_dw16;
+	uint32_t iqtimer_pkt_dw17;
+	uint32_t iqtimer_pkt_dw18;
+	uint32_t iqtimer_pkt_dw19;
+	uint32_t iqtimer_pkt_dw20;
+	uint32_t iqtimer_pkt_dw21;
+	uint32_t iqtimer_pkt_dw22;
+	uint32_t iqtimer_pkt_dw23;
+	uint32_t iqtimer_pkt_dw24;
+	uint32_t iqtimer_pkt_dw25;
+	uint32_t iqtimer_pkt_dw26;
+	uint32_t iqtimer_pkt_dw27;
+	uint32_t iqtimer_pkt_dw28;
+	uint32_t iqtimer_pkt_dw29;
+	uint32_t iqtimer_pkt_dw30;
+	uint32_t iqtimer_pkt_dw31;
+	uint32_t reserved56;
+	uint32_t reserved57;
+	uint32_t reserved58;
+	uint32_t set_resources_header;
+	uint32_t set_resources_dw1;
+	uint32_t set_resources_dw2;
+	uint32_t set_resources_dw3;
+	uint32_t set_resources_dw4;
+	uint32_t set_resources_dw5;
+	uint32_t set_resources_dw6;
+	uint32_t set_resources_dw7;
+	uint32_t reserved59;
+	uint32_t reserved60;
+	uint32_t reserved61;
+	uint32_t reserved62;
+	uint32_t reserved63;
+	uint32_t reserved64;
+	uint32_t reserved65;
+	uint32_t reserved66;
+	uint32_t reserved67;
+	uint32_t reserved68;
+	uint32_t reserved69;
+	uint32_t reserved70;
+	uint32_t reserved71;
+	uint32_t reserved72;
+	uint32_t reserved73;
+	uint32_t reserved74;
+	uint32_t reserved75;
+	uint32_t reserved76;
+	uint32_t reserved77;
+	uint32_t reserved78;
+	uint32_t reserved_t[256];
+};
+
+struct vi_mqd_allocation {
+	struct vi_mqd mqd;
+	uint32_t wptr_poll_mem;
+	uint32_t rptr_report_mem;
+	uint32_t dyamic_cu_mask;
+	uint32_t dyamic_rb_mask;
+};
+
+struct cz_mqd {
 	uint32_t header;
 	uint32_t compute_dispatch_initiator;
 	uint32_t compute_dim_x;

+ 16 - 7
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c

@@ -2865,6 +2865,7 @@ static int vega10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr,
 		void *state, struct pp_power_state *power_state,
 		void *pp_table, uint32_t classification_flag)
 {
+	ATOM_Vega10_GFXCLK_Dependency_Record_V2 *patom_record_V2;
 	struct vega10_power_state *vega10_power_state =
 			cast_phw_vega10_power_state(&(power_state->hardware));
 	struct vega10_performance_level *performance_level;
@@ -2941,11 +2942,16 @@ static int vega10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr,
 
 	performance_level = &(vega10_power_state->performance_levels
 				[vega10_power_state->performance_level_count++]);
-
 	performance_level->soc_clock = socclk_dep_table->entries
-			[state_entry->ucSocClockIndexHigh].ulClk;
-	performance_level->gfx_clock = gfxclk_dep_table->entries
+				[state_entry->ucSocClockIndexHigh].ulClk;
+	if (gfxclk_dep_table->ucRevId == 0) {
+		performance_level->gfx_clock = gfxclk_dep_table->entries
 			[state_entry->ucGfxClockIndexHigh].ulClk;
+	} else if (gfxclk_dep_table->ucRevId == 1) {
+		patom_record_V2 = (ATOM_Vega10_GFXCLK_Dependency_Record_V2 *)gfxclk_dep_table->entries;
+		performance_level->gfx_clock = patom_record_V2[state_entry->ucGfxClockIndexHigh].ulClk;
+	}
+
 	performance_level->mem_clock = mclk_dep_table->entries
 			[state_entry->ucMemClockIndexHigh].ulMemClk;
 	return 0;
@@ -3349,7 +3355,6 @@ static int vega10_populate_and_upload_sclk_mclk_dpm_levels(
 				dpm_table->
 				gfx_table.dpm_levels[dpm_table->gfx_table.count - 1].
 				value = sclk;
-
 				if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 						PHM_PlatformCaps_OD6PlusinACSupport) ||
 					phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
@@ -3472,7 +3477,6 @@ static int vega10_populate_and_upload_sclk_mclk_dpm_levels(
 					return result);
 		}
 	}
-
 	return result;
 }
 
@@ -3828,13 +3832,18 @@ static int vega10_dpm_get_mclk(struct pp_hwmgr *hwmgr, bool low)
 static int vega10_get_gpu_power(struct pp_hwmgr *hwmgr,
 		struct pp_gpu_power *query)
 {
+	uint32_t value;
+
 	PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(hwmgr->smumgr,
 			PPSMC_MSG_GetCurrPkgPwr),
 			"Failed to get current package power!",
 			return -EINVAL);
 
-	return vega10_read_arg_from_smc(hwmgr->smumgr,
-			&query->average_gpu_power);
+	vega10_read_arg_from_smc(hwmgr->smumgr, &value);
+	/* power value is an integer */
+	query->average_gpu_power = value << 8;
+
+	return 0;
 }
 
 static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx,

+ 9 - 0
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_pptable.h

@@ -144,6 +144,15 @@ typedef struct _ATOM_Vega10_GFXCLK_Dependency_Record {
 	USHORT usAVFSOffset;                                        /* AVFS Voltage offset */
 } ATOM_Vega10_GFXCLK_Dependency_Record;
 
+typedef struct _ATOM_Vega10_GFXCLK_Dependency_Record_V2 {
+	ULONG  ulClk;
+	UCHAR  ucVddInd;
+	USHORT usCKSVOffsetandDisable;
+	USHORT usAVFSOffset;
+	UCHAR  ucACGEnable;
+	UCHAR  ucReserved[3];
+} ATOM_Vega10_GFXCLK_Dependency_Record_V2;
+
 typedef struct _ATOM_Vega10_MCLK_Dependency_Record {
 	ULONG  ulMemClk;                                            /* Clock Frequency */
 	UCHAR  ucVddInd;                                            /* SOC_VDD index */

+ 33 - 9
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c

@@ -585,6 +585,7 @@ static int get_gfxclk_voltage_dependency_table(
 	uint32_t table_size, i;
 	struct phm_ppt_v1_clock_voltage_dependency_table
 				*clk_table;
+	ATOM_Vega10_GFXCLK_Dependency_Record_V2 *patom_record_v2;
 
 	PP_ASSERT_WITH_CODE((clk_dep_table->ucNumEntries != 0),
 			"Invalid PowerPlay Table!", return -1);
@@ -601,18 +602,41 @@ static int get_gfxclk_voltage_dependency_table(
 
 	clk_table->count = clk_dep_table->ucNumEntries;
 
-	for (i = 0; i < clk_table->count; i++) {
-		clk_table->entries[i].vddInd =
+	if (clk_dep_table->ucRevId == 0) {
+		for (i = 0; i < clk_table->count; i++) {
+			clk_table->entries[i].vddInd =
 				clk_dep_table->entries[i].ucVddInd;
-		clk_table->entries[i].clk =
+			clk_table->entries[i].clk =
 				le32_to_cpu(clk_dep_table->entries[i].ulClk);
-		clk_table->entries[i].cks_enable =
-				(((clk_dep_table->entries[i].usCKSVOffsetandDisable & 0x8000)
+			clk_table->entries[i].cks_enable =
+				(((le16_to_cpu(clk_dep_table->entries[i].usCKSVOffsetandDisable) & 0x8000)
 						>> 15) == 0) ? 1 : 0;
-		clk_table->entries[i].cks_voffset =
-				(clk_dep_table->entries[i].usCKSVOffsetandDisable & 0x7F);
-		clk_table->entries[i].sclk_offset =
-				clk_dep_table->entries[i].usAVFSOffset;
+			clk_table->entries[i].cks_voffset =
+				le16_to_cpu(clk_dep_table->entries[i].usCKSVOffsetandDisable) & 0x7F;
+			clk_table->entries[i].sclk_offset =
+				le16_to_cpu(clk_dep_table->entries[i].usAVFSOffset);
+		}
+	} else if (clk_dep_table->ucRevId == 1) {
+		patom_record_v2 = (ATOM_Vega10_GFXCLK_Dependency_Record_V2 *)clk_dep_table->entries;
+		for (i = 0; i < clk_table->count; i++) {
+			clk_table->entries[i].vddInd =
+					patom_record_v2->ucVddInd;
+			clk_table->entries[i].clk =
+					le32_to_cpu(patom_record_v2->ulClk);
+			clk_table->entries[i].cks_enable =
+					(((le16_to_cpu(patom_record_v2->usCKSVOffsetandDisable) & 0x8000)
+							>> 15) == 0) ? 1 : 0;
+			clk_table->entries[i].cks_voffset =
+					le16_to_cpu(patom_record_v2->usCKSVOffsetandDisable) & 0x7F;
+			clk_table->entries[i].sclk_offset =
+					le16_to_cpu(patom_record_v2->usAVFSOffset);
+			patom_record_v2++;
+		}
+	} else {
+		kfree(clk_table);
+		PP_ASSERT_WITH_CODE(false,
+			"Unsupported GFXClockDependencyTable Revision!",
+			return -EINVAL);
 	}
 
 	*pp_vega10_clk_dep_table = clk_table;

+ 2 - 2
drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h

@@ -124,8 +124,8 @@ typedef uint16_t PPSMC_Result;
 #define PPSMC_MSG_NumOfDisplays                  0x56
 #define PPSMC_MSG_ReadSerialNumTop32             0x58
 #define PPSMC_MSG_ReadSerialNumBottom32          0x59
-#define PPSMC_MSG_GetCurrPkgPwr                  0x5C
-#define PPSMC_Message_Count                      0x5D
+#define PPSMC_MSG_GetCurrPkgPwr                  0x61
+#define PPSMC_Message_Count                      0x62
 
 
 typedef int PPSMC_Msg;

+ 1 - 1
drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c

@@ -72,7 +72,7 @@ static int cz_send_msg_to_smc_async(struct pp_smumgr *smumgr,
 	result = SMUM_WAIT_FIELD_UNEQUAL(smumgr,
 					SMU_MP1_SRBM2P_RESP_0, CONTENT, 0);
 	if (result != 0) {
-		pr_err("cz_send_msg_to_smc_async failed\n");
+		pr_err("cz_send_msg_to_smc_async (0x%04x) failed\n", msg);
 		return result;
 	}
 

+ 3 - 1
drivers/gpu/drm/ttm/ttm_bo.c

@@ -1353,7 +1353,6 @@ int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type)
 		       mem_type);
 		return ret;
 	}
-	dma_fence_put(man->move);
 
 	man->use_type = false;
 	man->has_type = false;
@@ -1369,6 +1368,9 @@ int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type)
 		ret = (*man->func->takedown)(man);
 	}
 
+	dma_fence_put(man->move);
+	man->move = NULL;
+
 	return ret;
 }
 EXPORT_SYMBOL(ttm_bo_clean_mm);

+ 3 - 0
include/uapi/drm/amdgpu_drm.h

@@ -764,6 +764,7 @@ struct drm_amdgpu_info_device {
 	__u64 max_memory_clock;
 	/* cu information */
 	__u32 cu_active_number;
+	/* NOTE: cu_ao_mask is INVALID, DON'T use it */
 	__u32 cu_ao_mask;
 	__u32 cu_bitmap[4][4];
 	/** Render backend pipe mask. One render backend is CB+DB. */
@@ -818,6 +819,8 @@ struct drm_amdgpu_info_device {
 	/* max gs wavefront per vgt*/
 	__u32 max_gs_waves_per_vgt;
 	__u32 _pad1;
+	/* always on cu bitmap */
+	__u32 cu_ao_bitmap[4][4];
 };
 
 struct drm_amdgpu_info_hw_ip {