After a per-queue reset via MES, verify that the queue is functional by performing a ring test. If the test fails, fall back to a full pipe reset to ensure proper recovery.
Signed-off-by: Jesse Zhang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 265ffca70ca2..66dc8098428f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -5428,13 +5428,16 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; + int reset_mode = AMDGPU_RESET_TYPE_PER_QUEUE; int r; amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true, 0); +pipe_reset: if (r) { dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); + reset_mode = AMDGPU_RESET_TYPE_PER_PIPE; amdgpu_gfx_mec_pre_pipe_reset(adev, ring); r = gfx_v12_0_reset_compute_pipe(ring); if (r) { @@ -5455,6 +5458,13 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, return r; } + if (reset_mode == AMDGPU_RESET_TYPE_PER_QUEUE) { + if (amdgpu_ring_reset_helper_end(ring, timedout_fence)) + goto pipe_reset; + else + return 0; + } + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } -- 2.49.0
