issues:
gpu_recover() is re-entered by the mailbox interrupt
handler mxgpu_nv.c

fix:
we need to bypass the gpu_recover() invoke in mailbox
interrupt as long as the timeout is not infinite (thus the TDR
will be triggered automatically after time out, no need to invoke
gpu_recover() through mailbox interrupt.

Signed-off-by: Monk Liu <monk....@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 0d8767e..1c3a7d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -269,7 +269,11 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct 
*work)
        }
 
        /* Trigger recovery for world switch failure if no TDR */
-       if (amdgpu_device_should_recover_gpu(adev))
+       if (amdgpu_device_should_recover_gpu(adev)
+               && (adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
+               adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
+               adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
+               adev->video_timeout == MAX_SCHEDULE_TIMEOUT))
                amdgpu_device_gpu_recover(adev, NULL);
 }
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to