If GPU begin to do recovery, skip scheduling IBs. Otherwise
GPU recovery randomly fail.

Signed-off-by: Dennis Li <dennis...@amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index dcfe8a3b03ff..054d7b0357fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -212,6 +212,7 @@ static struct dma_fence *amdgpu_job_run(struct 
drm_sched_job *sched_job)
        struct dma_fence *fence = NULL, *finished;
        struct amdgpu_job *job;
        int r = 0;
+       int locked;
 
        job = to_amdgpu_job(sched_job);
        finished = &job->base.s_fence->finished;
@@ -220,6 +221,10 @@ static struct dma_fence *amdgpu_job_run(struct 
drm_sched_job *sched_job)
 
        trace_amdgpu_sched_run_job(job);
 
+       locked = down_read_trylock(&ring->adev->reset_sem);
+       if (!locked)
+               dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if 
GPU recovery */
+
        if (job->vram_lost_counter != 
atomic_read(&ring->adev->vram_lost_counter))
                dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if 
VRAM lost */
 
@@ -231,6 +236,10 @@ static struct dma_fence *amdgpu_job_run(struct 
drm_sched_job *sched_job)
                if (r)
                        DRM_ERROR("Error scheduling IBs (%d)\n", r);
        }
+
+       if (locked)
+               up_read(&ring->adev->reset_sem);
+
        /* if gpu reset, hw fence will be replaced here */
        dma_fence_put(job->fence);
        job->fence = dma_fence_get(fence);
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to