Explorar o código

drm/amdgpu: Enable the gpu reset from KFD

Hook up the gpu_recover callback from KFD to amdgpu to enable
handling of GPU hangs detected by KFD.

Signed-off-by: Shaoyun Liu <Shaoyun.Liu@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Shaoyun Liu %!s(int64=7) %!d(string=hai) anos
pai
achega
24da5a9ca6

+ 7 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

@@ -263,6 +263,13 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
 	return r;
 	return r;
 }
 }
 
 
+void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	amdgpu_device_gpu_recover(adev, NULL, false);
+}
+
 int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 			void **mem_obj, uint64_t *gpu_addr,
 			void **mem_obj, uint64_t *gpu_addr,
 			void **cpu_ptr)
 			void **cpu_ptr)

+ 2 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

@@ -130,6 +130,8 @@ int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev);
 
 
 int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
 int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
 
 
+void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd);
+
 /* Shared API */
 /* Shared API */
 int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 			void **mem_obj, uint64_t *gpu_addr,
 			void **mem_obj, uint64_t *gpu_addr,

+ 2 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c

@@ -218,7 +218,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 	.submit_ib = amdgpu_amdkfd_submit_ib,
 	.submit_ib = amdgpu_amdkfd_submit_ib,
 	.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
 	.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
-	.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg
+	.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
+	.gpu_recover = amdgpu_amdkfd_gpu_reset
 };
 };
 
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)

+ 2 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c

@@ -176,7 +176,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.invalidate_tlbs = invalidate_tlbs,
 	.invalidate_tlbs = invalidate_tlbs,
 	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 	.submit_ib = amdgpu_amdkfd_submit_ib,
 	.submit_ib = amdgpu_amdkfd_submit_ib,
-	.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info
+	.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
+	.gpu_recover = amdgpu_amdkfd_gpu_reset
 };
 };
 
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)

+ 1 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c

@@ -213,6 +213,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.invalidate_tlbs = invalidate_tlbs,
 	.invalidate_tlbs = invalidate_tlbs,
 	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 	.submit_ib = amdgpu_amdkfd_submit_ib,
 	.submit_ib = amdgpu_amdkfd_submit_ib,
+	.gpu_recover = amdgpu_amdkfd_gpu_reset
 };
 };
 
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)