Detect queue reset while a queue-scoped WAIT_EVENT waiter is blocked. If the selected queue enters the AMDGPU_USERQ_STATE_HUNG state while a queue-scoped WAIT_EVENT wait is in progress, return -EIO instead of waiting indefinitely.
The queue lookup uses the same doorbell-index keyed user queue mapping used by the existing USERQ_EOP path. This does not change WAIT_EVENT UAPI semantics. Changes in v5: - Use queue->qid instead of doorbell_index for queue-scoped event handling (Alex) Cc: Alex Deucher <[email protected]> Cc: Christian König <[email protected]> Signed-off-by: Srinivasan Shanmugam <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 20 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 2 ++ .../gpu/drm/amd/amdgpu/amdgpu_wait_event.c | 15 ++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 7d737d55c817..1dd0d5f152af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -160,6 +160,26 @@ static void amdgpu_userq_hang_detect_work(struct work_struct *work) amdgpu_userq_mgr_reset_work(&queue->userq_mgr->reset_work); } +int amdgpu_userq_wait_event_check_queue_reset(struct drm_file *file_priv, + u32 queue_id) +{ + struct amdgpu_fpriv *fpriv = file_priv->driver_priv; + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; + struct amdgpu_usermode_queue *queue; + + rcu_read_lock(); + + queue = xa_load(&uq_mgr->userq_xa, queue_id); + if (queue && queue->state == AMDGPU_USERQ_STATE_HUNG) { + rcu_read_unlock(); + return -EIO; + } + + rcu_read_unlock(); + + return 0; +} + /* * Start hang detection for a user queue fence. A delayed work will be scheduled * to reset the queues when the fence doesn't signal in time. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 66ef69311205..8a4d517e5247 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -178,6 +178,8 @@ void amdgpu_userq_reset_work(struct work_struct *work); void amdgpu_userq_pre_reset(struct amdgpu_device *adev); int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost); void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue); +int amdgpu_userq_wait_event_check_queue_reset(struct drm_file *file_priv, + u32 queue_id); void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell); int amdgpu_userq_input_va_validate(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_wait_event.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_wait_event.c index f900a7f5f90e..8ecc4e40a95c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_wait_event.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_wait_event.c @@ -29,6 +29,9 @@ #include "amdgpu.h" #include "amdgpu_wait_event.h" +int amdgpu_userq_wait_event_check_queue_reset(struct drm_file *file_priv, + u32 queue_id); + static int amdgpu_wait_event_do_compare(u64 addr, u64 value, u64 mask, u16 op) { u64 rvalue; @@ -282,6 +285,14 @@ void amdgpu_wait_event_push_gpu_reset(struct amdgpu_wait_event_mgr *mgr, amdgpu_wait_event_push_common(mgr, &data); } +static int amdgpu_wait_event_check_queue_reset( + struct drm_file *file_priv, + const struct drm_amdgpu_wait_event *args) +{ + return amdgpu_userq_wait_event_check_queue_reset(file_priv, + args->queue_id); +} + int amdgpu_wait_event_drm_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { @@ -348,6 +359,10 @@ int amdgpu_wait_event_drm_ioctl(struct drm_device *dev, void *data, if (signal_pending(current)) return -ERESTARTSYS; + ret = amdgpu_wait_event_check_queue_reset(file_priv, args); + if (ret) + return ret; + if (!timeout) { rec = amdgpu_wait_event_peek_match(mgr, args); if (rec) { -- 2.34.1
