In certain error scenarios (e.g., malformed commands), a fence may never become 
signaled, causing the kernel to hang indefinitely when waiting with 
MAX_SCHEDULE_TIMEOUT.
To prevent such hangs and ensure system responsiveness, replace the indefinite 
timeout with a reasonable 2-second limit.

This ensures that even if a fence never signals, the wait will time out and 
appropriate error handling can take place,
rather than stalling the driver indefinitely.

v2: make timeout per queue type (adev->gfx_timeout vs adev->compute_timeout vs 
adev->sdma_timeout) to be consistent with kernel queues. (Alex)

Suggested-by: Alex Deucher <[email protected]>
Signed-off-by: Jesse Zhang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 98110f543307..402307581293 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -367,11 +367,29 @@ static int amdgpu_userq_map_helper(struct 
amdgpu_usermode_queue *queue)
 static int amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue 
*queue)
 {
        struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
+       struct amdgpu_device *adev = uq_mgr->adev;
        struct dma_fence *f = queue->last_fence;
+       signed long timeout;
        int ret = 0;
 
+       /* Determine timeout based on queue type */
+       switch (queue->queue_type) {
+       case AMDGPU_RING_TYPE_GFX:
+               timeout = adev->gfx_timeout;
+               break;
+       case AMDGPU_RING_TYPE_COMPUTE:
+               timeout = adev->compute_timeout;
+               break;
+       case AMDGPU_RING_TYPE_SDMA:
+               timeout = adev->sdma_timeout;
+               break;
+       default:
+               timeout = adev->gfx_timeout;
+               break;
+       }
+
        if (f && !dma_fence_is_signaled(f)) {
-               ret = dma_fence_wait_timeout(f, true, MAX_SCHEDULE_TIMEOUT);
+               ret = dma_fence_wait_timeout(f, true, timeout);
                if (ret <= 0) {
                        drm_file_err(uq_mgr->file, "Timed out waiting for 
fence=%llu:%llu\n",
                                     f->context, f->seqno);
-- 
2.49.0

Reply via email to