After a per-queue reset via MES, verify that the queue is functional by
performing a ring test. If the test fails, fall back to a pipe reset
to ensure proper recovery.

Signed-off-by: Jesse Zhang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 4db6b4e398e5..bec99d149845 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -7044,13 +7044,16 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
                               struct amdgpu_fence *timedout_fence)
 {
        struct amdgpu_device *adev = ring->adev;
+       int reset_mode = AMDGPU_RESET_TYPE_PER_QUEUE;
        int r = 0;
 
        amdgpu_ring_reset_helper_begin(ring, timedout_fence);
 
        r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true, 0);
+pipe_reset:
        if (r) {
                dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe 
reset\n", r);
+               reset_mode = AMDGPU_RESET_TYPE_PER_PIPE;
                amdgpu_gfx_mec_pre_pipe_reset(adev, ring);
                r = gfx_v11_0_reset_compute_pipe(ring);
                if (r) {
@@ -7071,6 +7074,13 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
                return r;
        }
 
+       if (reset_mode == AMDGPU_RESET_TYPE_PER_QUEUE) {
+               if (amdgpu_ring_reset_helper_end(ring, timedout_fence))
+                       goto pipe_reset;
+               else
+                       return 0;
+       }
+
        return amdgpu_ring_reset_helper_end(ring, timedout_fence);
 }
 
-- 
2.49.0

Reply via email to