|
@@ -2648,22 +2648,23 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
|
|
|
|
|
/* block TTM */
|
|
|
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
|
|
|
+
|
|
|
/* store modesetting */
|
|
|
if (amdgpu_device_has_dc_support(adev))
|
|
|
state = drm_atomic_helper_suspend(adev->ddev);
|
|
|
|
|
|
- /* block scheduler */
|
|
|
+ /* block all schedulers and reset given job's ring */
|
|
|
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
|
|
struct amdgpu_ring *ring = adev->rings[i];
|
|
|
|
|
|
if (!ring || !ring->sched.thread)
|
|
|
continue;
|
|
|
|
|
|
- /* only focus on the ring hit timeout if &job not NULL */
|
|
|
+ kthread_park(ring->sched.thread);
|
|
|
+
|
|
|
if (job && job->ring->idx != i)
|
|
|
continue;
|
|
|
|
|
|
- kthread_park(ring->sched.thread);
|
|
|
drm_sched_hw_job_reset(&ring->sched, &job->base);
|
|
|
|
|
|
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
|
|
@@ -2706,33 +2707,22 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
|
|
}
|
|
|
dma_fence_put(fence);
|
|
|
}
|
|
|
+ }
|
|
|
|
|
|
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
|
|
- struct amdgpu_ring *ring = adev->rings[i];
|
|
|
-
|
|
|
- if (!ring || !ring->sched.thread)
|
|
|
- continue;
|
|
|
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
|
|
+ struct amdgpu_ring *ring = adev->rings[i];
|
|
|
|
|
|
- /* only focus on the ring hit timeout if &job not NULL */
|
|
|
- if (job && job->ring->idx != i)
|
|
|
- continue;
|
|
|
+ if (!ring || !ring->sched.thread)
|
|
|
+ continue;
|
|
|
|
|
|
+ /* only need recovery sched of the given job's ring
|
|
|
+ * or all rings (in the case @job is NULL)
|
|
|
+ * after above amdgpu_reset accomplished
|
|
|
+ */
|
|
|
+ if ((!job || job->ring->idx == i) && !r)
|
|
|
drm_sched_job_recovery(&ring->sched);
|
|
|
- kthread_unpark(ring->sched.thread);
|
|
|
- }
|
|
|
- } else {
|
|
|
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
|
|
- struct amdgpu_ring *ring = adev->rings[i];
|
|
|
|
|
|
- if (!ring || !ring->sched.thread)
|
|
|
- continue;
|
|
|
-
|
|
|
- /* only focus on the ring hit timeout if &job not NULL */
|
|
|
- if (job && job->ring->idx != i)
|
|
|
- continue;
|
|
|
-
|
|
|
- kthread_unpark(adev->rings[i]->sched.thread);
|
|
|
- }
|
|
|
+ kthread_unpark(ring->sched.thread);
|
|
|
}
|
|
|
|
|
|
if (amdgpu_device_has_dc_support(adev)) {
|