Jelajahi Sumber

drm/amdgpu: check if vram is lost v2

backup first 64 byte of gart table as reset magic, check if magic is same
after gpu hw reset.
v2: use memcmp instead of manual innovation.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Chunming Zhou 8 tahun lalu
induk
melakukan
0c49e0b8a4

+ 2 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu.h

@@ -1427,6 +1427,7 @@ typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
 typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
 typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
 
+#define AMDGPU_RESET_MAGIC_NUM 64
 struct amdgpu_device {
 	struct device			*dev;
 	struct drm_device		*ddev;
@@ -1619,6 +1620,7 @@ struct amdgpu_device {
 
 	/* record hw reset is performed */
 	bool has_hw_reset;
+	u8				reset_magic[AMDGPU_RESET_MAGIC_NUM];
 
 };
 

+ 19 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

@@ -1658,6 +1658,17 @@ static int amdgpu_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+static void amdgpu_fill_reset_magic(struct amdgpu_device *adev)
+{
+	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
+}
+
+static bool amdgpu_check_vram_lost(struct amdgpu_device *adev)
+{
+	return !!memcmp(adev->gart.ptr, adev->reset_magic,
+			AMDGPU_RESET_MAGIC_NUM);
+}
+
 static int amdgpu_late_init(struct amdgpu_device *adev)
 {
 	int i = 0, r;
@@ -1688,6 +1699,8 @@ static int amdgpu_late_init(struct amdgpu_device *adev)
 		}
 	}
 
+	amdgpu_fill_reset_magic(adev);
+
 	return 0;
 }
 
@@ -2762,7 +2775,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
 {
 	int i, r;
 	int resched;
-	bool need_full_reset;
+	bool need_full_reset, vram_lost = false;
 
 	if (!amdgpu_check_soft_reset(adev)) {
 		DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
@@ -2825,12 +2838,17 @@ retry:
 			r = amdgpu_resume_phase1(adev);
 			if (r)
 				goto out;
+			vram_lost = amdgpu_check_vram_lost(adev);
+			if (vram_lost)
+				DRM_ERROR("VRAM is lost!\n");
 			r = amdgpu_ttm_recover_gart(adev);
 			if (r)
 				goto out;
 			r = amdgpu_resume_phase2(adev);
 			if (r)
 				goto out;
+			if (vram_lost)
+				amdgpu_fill_reset_magic(adev);
 		}
 	}
 out: