Detect queue reset while a queue-scoped WAIT_EVENT waiter is blocked.

If the selected queue enters the AMDGPU_USERQ_STATE_HUNG state while a
queue-scoped WAIT_EVENT wait is in progress, return -EIO instead of
waiting indefinitely.

The queue lookup uses the same doorbell-index keyed user queue mapping
used by the existing USERQ_EOP path.

This does not change WAIT_EVENT UAPI semantics.

Changes in v5:
- Use queue->qid instead of doorbell_index for queue-scoped event
  handling (Alex)

Cc: Alex Deucher <[email protected]>
Cc: Christian König <[email protected]>
Signed-off-by: Srinivasan Shanmugam <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c     | 20 +++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h     |  2 ++
 .../gpu/drm/amd/amdgpu/amdgpu_wait_event.c    | 15 ++++++++++++++
 3 files changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 7d737d55c817..1dd0d5f152af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -160,6 +160,26 @@ static void amdgpu_userq_hang_detect_work(struct 
work_struct *work)
        amdgpu_userq_mgr_reset_work(&queue->userq_mgr->reset_work);
 }
 
+int amdgpu_userq_wait_event_check_queue_reset(struct drm_file *file_priv,
+                                             u32 queue_id)
+{
+       struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+       struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+       struct amdgpu_usermode_queue *queue;
+
+       rcu_read_lock();
+
+       queue = xa_load(&uq_mgr->userq_xa, queue_id);
+       if (queue && queue->state == AMDGPU_USERQ_STATE_HUNG) {
+               rcu_read_unlock();
+               return -EIO;
+       }
+
+       rcu_read_unlock();
+
+       return 0;
+}
+
 /*
  * Start hang detection for a user queue fence. A delayed work will be 
scheduled
  * to reset the queues when the fence doesn't signal in time.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index 66ef69311205..8a4d517e5247 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -178,6 +178,8 @@ void amdgpu_userq_reset_work(struct work_struct *work);
 void amdgpu_userq_pre_reset(struct amdgpu_device *adev);
 int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost);
 void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue);
+int amdgpu_userq_wait_event_check_queue_reset(struct drm_file *file_priv,
+                                             u32 queue_id);
 void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell);
 
 int amdgpu_userq_input_va_validate(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_wait_event.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_wait_event.c
index f900a7f5f90e..8ecc4e40a95c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_wait_event.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_wait_event.c
@@ -29,6 +29,9 @@
 #include "amdgpu.h"
 #include "amdgpu_wait_event.h"
 
+int amdgpu_userq_wait_event_check_queue_reset(struct drm_file *file_priv,
+                                             u32 queue_id);
+
 static int amdgpu_wait_event_do_compare(u64 addr, u64 value, u64 mask, u16 op)
 {
        u64 rvalue;
@@ -282,6 +285,14 @@ void amdgpu_wait_event_push_gpu_reset(struct 
amdgpu_wait_event_mgr *mgr,
        amdgpu_wait_event_push_common(mgr, &data);
 }
 
+static int amdgpu_wait_event_check_queue_reset(
+                               struct drm_file *file_priv,
+                               const struct drm_amdgpu_wait_event *args)
+{
+       return amdgpu_userq_wait_event_check_queue_reset(file_priv,
+                                                        args->queue_id);
+}
+
 int amdgpu_wait_event_drm_ioctl(struct drm_device *dev, void *data,
                                struct drm_file *file_priv)
 {
@@ -348,6 +359,10 @@ int amdgpu_wait_event_drm_ioctl(struct drm_device *dev, 
void *data,
                if (signal_pending(current))
                        return -ERESTARTSYS;
 
+               ret = amdgpu_wait_event_check_queue_reset(file_priv, args);
+               if (ret)
+                       return ret;
+
                if (!timeout) {
                        rec = amdgpu_wait_event_peek_match(mgr, args);
                        if (rec) {
-- 
2.34.1

Reply via email to