amdgpu: Convert amdgpu userqueue management from IDR to XArray

Christian König Wed, 24 Sep 2025 05:18:36 -0700

On 24.09.25 12:26, Zhang, Jesse(Jie) wrote:
> [AMD Official Use Only - AMD Internal Distribution Only]
> 
>> -----Original Message-----
>> From: Koenig, Christian <christian.koe...@amd.com>
>> Sent: Wednesday, September 24, 2025 5:29 PM
>> To: Zhang, Jesse(Jie) <jesse.zh...@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Deucher, Alexander <alexander.deuc...@amd.com>
>> Subject: Re: [v6 1/2] drm/amdgpu: Convert amdgpu userqueue management from
>> IDR to XArray
>>
>> On 24.09.25 09:34, Jesse.Zhang wrote:
>>> This commit refactors the AMDGPU userqueue management subsystem to
>>> replace IDR (ID Allocation) with XArray for improved performance,
>>> scalability, and maintainability. The changes address several issues
>>> with the previous IDR implementation and provide better locking semantics.
>>>
>>> Key changes:
>>>
>>> 1. **Global XArray Introduction**:
>>>    - Added `userq_doorbell_xa` to `struct amdgpu_device` for global queue
>> tracking
>>>    - Uses doorbell_index as key for efficient global lookup
>>>    - Replaces the previous `userq_mgr_list` linked list approach
>>>
>>> 2. **Per-process XArray Conversion**:
>>>    - Replaced `userq_idr` with `userq_mgr_xa` in `struct amdgpu_userq_mgr`
>>>    - Maintains per-process queue tracking with queue_id as key
>>>    - Uses XA_FLAGS_ALLOC for automatic ID allocation
>>>
>>> 3. **Locking Improvements**:
>>>    - Removed global `userq_mutex` from `struct amdgpu_device`
>>>    - Replaced with fine-grained XArray locking using XArray's internal
>>> spinlocks
>>>
>>> 4. **Runtime Idle Check Optimization**:
>>>    - Updated `amdgpu_runtime_idle_check_userq()` to use xa_empty
>>>
>>> 5. **Queue Management Functions**:
>>>    - Converted all IDR operations to equivalent XArray functions:
>>>      - `idr_alloc()` → `xa_alloc()`
>>>      - `idr_find()` → `xa_load()`
>>>      - `idr_remove()` → `xa_erase()`
>>>      - `idr_for_each()` → `xa_for_each()`
>>>
>>> Benefits:
>>> - **Performance**: XArray provides better scalability for large
>>> numbers of queues
>>> - **Memory Efficiency**: Reduced memory overhead compared to IDR
>>> - **Thread Safety**: Improved locking semantics with XArray's internal
>>> spinlocks
>>>
>>> v2: rename userq_global_xa/userq_xa to userq_doorbell_xa/userq_mgr_xa
>>>     Remove xa_lock and use its own lock.
>>>
>>> v3: Set queue->userq_mgr = uq_mgr in amdgpu_userq_create()
>>>
>>> Suggested-by: Christian König <christian.koe...@amd.com>
>>> Signed-off-by: Jesse Zhang <jesse.zh...@amd.com>
>>> ---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   8 +-
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |   3 +-
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |  16 +-
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c     | 143 +++++++++---------
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h     |   8 +-
>>>  .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   |   4 +-
>>>  drivers/gpu/drm/amd/amdgpu/mes_userqueue.c    |  22 ++-
>>>  7 files changed, 97 insertions(+), 107 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 2a0df4cabb99..ae10832a09f4 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -1174,6 +1174,12 @@ struct amdgpu_device {
>>>      * queue fence.
>>>      */
>>>     struct xarray                   userq_xa;
>>> +   /**
>>> +    * @userq_doorbell_xa: Global user queue map (doorbell index → queue)
>>> +    * Key: doorbell_index (unique global identifier for the queue)
>>> +    * Value: struct amdgpu_usermode_queue
>>> +    */
>>> +   struct xarray userq_doorbell_xa;
>>>
>>>     /* df */
>>>     struct amdgpu_df                df;
>>> @@ -1308,8 +1314,6 @@ struct amdgpu_device {
>>>      */
>>>     bool                            apu_prefer_gtt;
>>>
>>> -   struct list_head                userq_mgr_list;
>>> -   struct mutex                    userq_mutex;
>>>     bool                            userq_halt_for_enforce_isolation;
>>>     struct amdgpu_uid *uid_info;
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index 0fdfde3dcb9f..e066f7ff2a82 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -4483,7 +4483,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>>     mutex_init(&adev->gfx.userq_sch_mutex);
>>>     mutex_init(&adev->gfx.workload_profile_mutex);
>>>     mutex_init(&adev->vcn.workload_profile_mutex);
>>> -   mutex_init(&adev->userq_mutex);
>>>
>>>     amdgpu_device_init_apu_flags(adev);
>>>
>>> @@ -4511,7 +4510,7 @@ int amdgpu_device_init(struct amdgpu_device
>>> *adev,
>>>
>>>     INIT_LIST_HEAD(&adev->pm.od_kobj_list);
>>>
>>> -   INIT_LIST_HEAD(&adev->userq_mgr_list);
>>> +   xa_init(&adev->userq_doorbell_xa);
>>>
>>>     INIT_DELAYED_WORK(&adev->delayed_init_work,
>>>                       amdgpu_device_delayed_init_work_handler);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> index ece251cbe8c3..fdea7cd4b3ce 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> @@ -2771,22 +2771,8 @@ static int amdgpu_runtime_idle_check_userq(struct
>> device *dev)
>>>     struct pci_dev *pdev = to_pci_dev(dev);
>>>     struct drm_device *drm_dev = pci_get_drvdata(pdev);
>>>     struct amdgpu_device *adev = drm_to_adev(drm_dev);
>>> -   struct amdgpu_usermode_queue *queue;
>>> -   struct amdgpu_userq_mgr *uqm, *tmp;
>>> -   int queue_id;
>>> -   int ret = 0;
>>> -
>>> -   mutex_lock(&adev->userq_mutex);
>>> -   list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
>>> -           idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
>>> -                   ret = -EBUSY;
>>> -                   goto done;
>>> -           }
>>> -   }
>>> -done:
>>> -   mutex_unlock(&adev->userq_mutex);
>>>
>>> -   return ret;
>>> +   return xa_empty(&adev->userq_doorbell_xa) ? 0 : -EBUSY;
>>>  }
>>>
>>>  static int amdgpu_pmops_runtime_suspend(struct device *dev) diff
>>> --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>>> index b649ac0cedff..0712b88782c8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>>> @@ -180,17 +180,28 @@ amdgpu_userq_cleanup(struct amdgpu_userq_mgr
>>> *uq_mgr,  {
>>>     struct amdgpu_device *adev = uq_mgr->adev;
>>>     const struct amdgpu_userq_funcs *uq_funcs =
>>> adev->userq_funcs[queue->queue_type];
>>> +   unsigned long flags;
>>>
>>>     uq_funcs->mqd_destroy(uq_mgr, queue);
>>>     amdgpu_userq_fence_driver_free(queue);
>>> -   idr_remove(&uq_mgr->userq_idr, queue_id);
>>> +
>>> +   /* Use interrupt-safe locking since IRQ handlers may access these
>>> +XArrays */
>>
>> Wait a second where are IRQ handlers accessing those XAs?
> [Zhang, Jesse(Jie)] gfx_v11_0_eop_irq on gfx11 also accesses these XAs. With 
> lockdep enabled, a warning will appear without xa_lock_irqsave.


Yeah, but only the fence_drv xa needs to be irq save.

If the uq_mgr->userq_mgr_xa is used in the irq handler then something seems to 
be wrong here.

> We can replace it with xa_erase_irq, just like you suggested with 
> xa_store_irq.

Yeah that's certainly a good idea.

> 
>>
>>> +   xa_lock_irqsave(&uq_mgr->userq_mgr_xa, flags);
>>> +   __xa_erase(&uq_mgr->userq_mgr_xa, (unsigned long)queue_id);
>>> +   xa_unlock_irqrestore(&uq_mgr->userq_mgr_xa, flags);
>>> +
>>> +   xa_lock_irqsave(&adev->userq_doorbell_xa, flags);
>>> +   __xa_erase(&adev->userq_doorbell_xa, queue->doorbell_index);
>>> +   xa_unlock_irqrestore(&adev->userq_doorbell_xa, flags);
>>> +
>>> +   queue->userq_mgr = NULL;
>>>     kfree(queue);
>>>  }
>>>
>>>  static struct amdgpu_usermode_queue *  amdgpu_userq_find(struct
>>> amdgpu_userq_mgr *uq_mgr, int qid)  {
>>> -   return idr_find(&uq_mgr->userq_idr, qid);
>>> +   return xa_load(&uq_mgr->userq_mgr_xa, qid);
>>>  }
>>>
>>>  void
>>> @@ -462,8 +473,10 @@ amdgpu_userq_create(struct drm_file *filp, union
>> drm_amdgpu_userq *args)
>>>     struct amdgpu_db_info db_info;
>>>     char *queue_name;
>>>     bool skip_map_queue;
>>> +   unsigned long flags;
>>> +   u32 qid;
>>>     uint64_t index;
>>> -   int qid, r = 0;
>>> +   int r = 0;
>>>     int priority =
>>>             (args->in.flags &
>> AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >>
>>>
>>       AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
>>> @@ -486,7 +499,6 @@ amdgpu_userq_create(struct drm_file *filp, union
>> drm_amdgpu_userq *args)
>>>      *
>>>      * This will also make sure we have a valid eviction fence ready to be 
>>> used.
>>>      */
>>> -   mutex_lock(&adev->userq_mutex);
>>>     amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
>>>
>>>     uq_funcs = adev->userq_funcs[args->in.ip_type];
>>> @@ -546,9 +558,17 @@ amdgpu_userq_create(struct drm_file *filp, union
>> drm_amdgpu_userq *args)
>>>             goto unlock;
>>>     }
>>>
>>> +   xa_lock_irqsave(&adev->userq_doorbell_xa, flags);
>>> +   r =xa_err(__xa_store(&adev->userq_doorbell_xa, index, queue,
>> GFP_KERNEL));
>>> +   if (r) {
>>> +           kfree(queue);
>>> +           xa_unlock_irqrestore(&adev->userq_doorbell_xa, flags);
>>> +           goto unlock;
>>> +   }
>>> +   xa_unlock_irqrestore(&adev->userq_doorbell_xa, flags);
>>
>> You can use xa_store_irq() here.
>>
>>>
>>> -   qid = idr_alloc(&uq_mgr->userq_idr, queue, 1,
>> AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL);
>>> -   if (qid < 0) {
>>> +   r = xa_alloc(&uq_mgr->userq_mgr_xa, &qid, queue, XA_LIMIT(1,
>> AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL);
>>> +   if (r) {
>>>             drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n");
>>>             amdgpu_userq_fence_driver_free(queue);
>>>             uq_funcs->mqd_destroy(uq_mgr, queue); @@ -556,6 +576,7 @@
>>> amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
>>>             r = -ENOMEM;
>>>             goto unlock;
>>>     }
>>> +   queue->userq_mgr = uq_mgr;
>>>
>>>     /* don't map the queue if scheduling is halted */
>>>     if (adev->userq_halt_for_enforce_isolation && @@ -568,7 +589,7 @@
>>> amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
>>>             r = amdgpu_userq_map_helper(uq_mgr, queue);
>>>             if (r) {
>>>                     drm_file_err(uq_mgr->file, "Failed to map Queue\n");
>>> -                   idr_remove(&uq_mgr->userq_idr, qid);
>>> +                   xa_erase(&uq_mgr->userq_mgr_xa, qid);
>>>                     amdgpu_userq_fence_driver_free(queue);
>>>                     uq_funcs->mqd_destroy(uq_mgr, queue);
>>>                     kfree(queue);
>>> @@ -591,7 +612,6 @@ amdgpu_userq_create(struct drm_file *filp, union
>>> drm_amdgpu_userq *args)
>>>
>>>  unlock:
>>>     mutex_unlock(&uq_mgr->userq_mutex);
>>> -   mutex_unlock(&adev->userq_mutex);
>>>
>>>     return r;
>>>  }
>>> @@ -689,11 +709,11 @@ static int
>>>  amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)  {
>>>     struct amdgpu_usermode_queue *queue;
>>> -   int queue_id;
>>> +   unsigned long queue_id;
>>>     int ret = 0, r;
>>>
>>>     /* Resume all the queues for this process */
>>> -   idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
>>> +   xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
>>>             r = amdgpu_userq_restore_helper(uq_mgr, queue);
>>>             if (r)
>>>                     ret = r;
>>> @@ -846,11 +866,11 @@ static int
>>>  amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)  {
>>>     struct amdgpu_usermode_queue *queue;
>>> -   int queue_id;
>>> +   unsigned long queue_id;
>>>     int ret = 0, r;
>>>
>>>     /* Try to unmap all the queues in this process ctx */
>>> -   idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
>>> +   xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
>>>             r = amdgpu_userq_preempt_helper(uq_mgr, queue);
>>>             if (r)
>>>                     ret = r;
>>> @@ -865,9 +885,10 @@ static int
>>>  amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)  {
>>>     struct amdgpu_usermode_queue *queue;
>>> -   int queue_id, ret;
>>> +   unsigned long queue_id;
>>> +   int ret;
>>>
>>> -   idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
>>> +   xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
>>>             struct dma_fence *f = queue->last_fence;
>>>
>>>             if (!f || dma_fence_is_signaled(f)) @@ -920,44 +941,30 @@ int
>>> amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f
>>>                       struct amdgpu_device *adev)
>>>  {
>>>     mutex_init(&userq_mgr->userq_mutex);
>>> -   idr_init_base(&userq_mgr->userq_idr, 1);
>>> +   xa_init_flags(&userq_mgr->userq_mgr_xa, XA_FLAGS_ALLOC);
>>>     userq_mgr->adev = adev;
>>>     userq_mgr->file = file_priv;
>>>
>>> -   mutex_lock(&adev->userq_mutex);
>>> -   list_add(&userq_mgr->list, &adev->userq_mgr_list);
>>> -   mutex_unlock(&adev->userq_mutex);
>>> -
>>>     INIT_DELAYED_WORK(&userq_mgr->resume_work,
>> amdgpu_userq_restore_worker);
>>>     return 0;
>>>  }
>>>
>>>  void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)  {
>>> -   struct amdgpu_device *adev = userq_mgr->adev;
>>>     struct amdgpu_usermode_queue *queue;
>>> -   struct amdgpu_userq_mgr *uqm, *tmp;
>>> -   uint32_t queue_id;
>>> +   unsigned long queue_id;
>>>
>>>     cancel_delayed_work_sync(&userq_mgr->resume_work);
>>>
>>> -   mutex_lock(&adev->userq_mutex);
>>>     mutex_lock(&userq_mgr->userq_mutex);
>>> -   idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) {
>>> +   xa_for_each(&userq_mgr->userq_mgr_xa, queue_id, queue) {
>>>             amdgpu_userq_wait_for_last_fence(userq_mgr, queue);
>>>             amdgpu_userq_unmap_helper(userq_mgr, queue);
>>>             amdgpu_userq_cleanup(userq_mgr, queue, queue_id);
>>>     }
>>>
>>> -   list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
>>> -           if (uqm == userq_mgr) {
>>> -                   list_del(&uqm->list);
>>> -                   break;
>>> -           }
>>> -   }
>>> -   idr_destroy(&userq_mgr->userq_idr);
>>> +   xa_destroy(&userq_mgr->userq_mgr_xa);
>>>     mutex_unlock(&userq_mgr->userq_mutex);
>>> -   mutex_unlock(&adev->userq_mutex);
>>>     mutex_destroy(&userq_mgr->userq_mutex);
>>>  }
>>>
>>> @@ -965,25 +972,23 @@ int amdgpu_userq_suspend(struct amdgpu_device
>>> *adev)  {
>>>     u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
>>>     struct amdgpu_usermode_queue *queue;
>>> -   struct amdgpu_userq_mgr *uqm, *tmp;
>>> -   int queue_id;
>>> +   struct amdgpu_userq_mgr *uqm;
>>> +   unsigned long queue_id;
>>>     int ret = 0, r;
>>>
>>>     if (!ip_mask)
>>>             return 0;
>>>
>>> -   mutex_lock(&adev->userq_mutex);
>>> -   list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
>>> +   xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
>>> +           uqm = queue->userq_mgr;
>>
>> That function is used during reset, isn't it?
> [Zhang, Jesse(Jie)]  No, amdgpu_userq_suspend/amdgpu_userq_resume are not 
> used during reset.
> but They are called when executing the suspend command.  I tried the suspend 
> case, amdgpu_userq_suspend/amdgpu_userq_resume and no error occurred.

Testing can only disprove a design and not validate it.

During normal suspend/resume this isn't an issue because userspace is 
completely frozen.

But when this is used during reset as well we need to be super careful to not 
access fredd up memory.

Regards,
Christian.

>> If yes we somehow need to make sure that neither queue nor userq_mgr is 
>> properly
>> initialized and not freed up while the reset is ongoing.
>>
>> The easiest way of doing that is to hold the read side of the reset lock 
>> while
>> creating/destroying queues and the manager data structure.
>>
>>>             cancel_delayed_work_sync(&uqm->resume_work);
>>>             mutex_lock(&uqm->userq_mutex);
>>> -           idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
>>> -                   r = amdgpu_userq_unmap_helper(uqm, queue);
>>> -                   if (r)
>>> -                           ret = r;
>>> -           }
>>> +           r = amdgpu_userq_unmap_helper(uqm, queue);
>>> +           if (r)
>>> +                   ret = r;
>>>             mutex_unlock(&uqm->userq_mutex);
>>>     }
>>> -   mutex_unlock(&adev->userq_mutex);
>>> +
>>>     return ret;
>>>  }
>>>
>>> @@ -991,24 +996,22 @@ int amdgpu_userq_resume(struct amdgpu_device
>>> *adev)  {
>>>     u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
>>>     struct amdgpu_usermode_queue *queue;
>>> -   struct amdgpu_userq_mgr *uqm, *tmp;
>>> -   int queue_id;
>>> +   struct amdgpu_userq_mgr *uqm;
>>> +   unsigned long queue_id;
>>>     int ret = 0, r;
>>>
>>>     if (!ip_mask)
>>>             return 0;
>>>
>>> -   mutex_lock(&adev->userq_mutex);
>>> -   list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
>>> +   xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
>>> +           uqm = queue->userq_mgr;
>>
>> Not sure about those ones here, but the same point that applies to the reset
>> handling applies here as well.
>>
>> Apart from that the patch looks good to me.
>>
>> Regards,
>> Christian.
>>
>>>             mutex_lock(&uqm->userq_mutex);
>>> -           idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
>>> -                   r = amdgpu_userq_map_helper(uqm, queue);
>>> -                   if (r)
>>> -                           ret = r;
>>> -           }
>>> +           r = amdgpu_userq_map_helper(uqm, queue);
>>> +           if (r)
>>> +                   ret = r;
>>>             mutex_unlock(&uqm->userq_mutex);
>>>     }
>>> -   mutex_unlock(&adev->userq_mutex);
>>> +
>>>     return ret;
>>>  }
>>>
>>> @@ -1017,33 +1020,31 @@ int
>>> amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,  {
>>>     u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
>>>     struct amdgpu_usermode_queue *queue;
>>> -   struct amdgpu_userq_mgr *uqm, *tmp;
>>> -   int queue_id;
>>> +   struct amdgpu_userq_mgr *uqm;
>>> +   unsigned long queue_id;
>>>     int ret = 0, r;
>>>
>>>     /* only need to stop gfx/compute */
>>>     if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 <<
>> AMDGPU_HW_IP_COMPUTE))))
>>>             return 0;
>>>
>>> -   mutex_lock(&adev->userq_mutex);
>>>     if (adev->userq_halt_for_enforce_isolation)
>>>             dev_warn(adev->dev, "userq scheduling already stopped!\n");
>>>     adev->userq_halt_for_enforce_isolation = true;
>>> -   list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
>>> +   xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
>>> +           uqm = queue->userq_mgr;
>>>             cancel_delayed_work_sync(&uqm->resume_work);
>>>             mutex_lock(&uqm->userq_mutex);
>>> -           idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
>>> -                   if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
>>> -                        (queue->queue_type == AMDGPU_HW_IP_COMPUTE))
>> &&
>>> -                       (queue->xcp_id == idx)) {
>>> -                           r = amdgpu_userq_preempt_helper(uqm, queue);
>>> -                           if (r)
>>> -                                   ret = r;
>>> -                   }
>>> +           if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
>>> +                (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
>>> +               (queue->xcp_id == idx)) {
>>> +                   r = amdgpu_userq_preempt_helper(uqm, queue);
>>> +                   if (r)
>>> +                           ret = r;
>>>             }
>>>             mutex_unlock(&uqm->userq_mutex);
>>>     }
>>> -   mutex_unlock(&adev->userq_mutex);
>>> +
>>>     return ret;
>>>  }
>>>
>>> @@ -1052,21 +1053,20 @@ int
>>> amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
>> {
>>>     u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
>>>     struct amdgpu_usermode_queue *queue;
>>> -   struct amdgpu_userq_mgr *uqm, *tmp;
>>> -   int queue_id;
>>> +   struct amdgpu_userq_mgr *uqm;
>>> +   unsigned long queue_id;
>>>     int ret = 0, r;
>>>
>>>     /* only need to stop gfx/compute */
>>>     if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 <<
>> AMDGPU_HW_IP_COMPUTE))))
>>>             return 0;
>>>
>>> -   mutex_lock(&adev->userq_mutex);
>>>     if (!adev->userq_halt_for_enforce_isolation)
>>>             dev_warn(adev->dev, "userq scheduling already started!\n");
>>>     adev->userq_halt_for_enforce_isolation = false;
>>> -   list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
>>> +   xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
>>> +           uqm = queue->userq_mgr;
>>>             mutex_lock(&uqm->userq_mutex);
>>> -           idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
>>>                     if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
>>>                          (queue->queue_type == AMDGPU_HW_IP_COMPUTE))
>> &&
>>>                         (queue->xcp_id == idx)) {
>>> @@ -1074,9 +1074,8 @@ int
>> amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
>>>                             if (r)
>>>                                     ret = r;
>>>                     }
>>> -           }
>>>             mutex_unlock(&uqm->userq_mutex);
>>>     }
>>> -   mutex_unlock(&adev->userq_mutex);
>>> +
>>>     return ret;
>>>  }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>>> index c027dd916672..c562c8d2b53a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>>> @@ -88,11 +88,15 @@ struct amdgpu_userq_funcs {
>>>
>>>  /* Usermode queues for gfx */
>>>  struct amdgpu_userq_mgr {
>>> -   struct idr                      userq_idr;
>>> +   /**
>>> +    * @userq_mgr_xa: Per-process user queue map (queue ID → queue)
>>> +    * Key: queue_id (unique ID within the process's userq manager)
>>> +    * Value: struct amdgpu_usermode_queue
>>> +    */
>>> +   struct xarray                   userq_mgr_xa;
>>>     struct mutex                    userq_mutex;
>>>     struct amdgpu_device            *adev;
>>>     struct delayed_work             resume_work;
>>> -   struct list_head                list;
>>>     struct drm_file                 *file;
>>>  };
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>> index 95e91d1dc58a..cc597fae8971 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>> @@ -537,7 +537,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev,
>> void *data,
>>>     }
>>>
>>>     /* Retrieve the user queue */
>>> -   queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
>>> +   queue = xa_load(&userq_mgr->userq_mgr_xa, args->queue_id);
>>>     if (!queue) {
>>>             r = -ENOENT;
>>>             goto put_gobj_write;
>>> @@ -899,7 +899,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev,
>> void *data,
>>>              */
>>>             num_fences = dma_fence_dedup_array(fences, num_fences);
>>>
>>> -           waitq = idr_find(&userq_mgr->userq_idr, wait_info->waitq_id);
>>> +           waitq = xa_load(&userq_mgr->userq_mgr_xa, wait_info->waitq_id);
>>>             if (!waitq) {
>>>                     r = -EINVAL;
>>>                     goto free_fences;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
>>> b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
>>> index 2db9b2c63693..9e4229c79c69 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
>>> @@ -205,9 +205,9 @@ static int mes_userq_detect_and_reset(struct
>> amdgpu_device *adev,
>>>     int db_array_size = amdgpu_mes_get_hung_queue_db_array_size(adev);
>>>     struct mes_detect_and_reset_queue_input input;
>>>     struct amdgpu_usermode_queue *queue;
>>> -   struct amdgpu_userq_mgr *uqm, *tmp;
>>>     unsigned int hung_db_num = 0;
>>> -   int queue_id, r, i;
>>> +   unsigned long queue_id;
>>> +   int r, i;
>>>     u32 db_array[4];
>>>
>>>     if (db_array_size > 4) {
>>> @@ -227,16 +227,14 @@ static int mes_userq_detect_and_reset(struct
>> amdgpu_device *adev,
>>>     if (r) {
>>>             dev_err(adev->dev, "Failed to detect and reset queues, err 
>>> (%d)\n",
>> r);
>>>     } else if (hung_db_num) {
>>> -           list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) 
>>> {
>>> -                   idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
>>> -                           if (queue->queue_type == queue_type) {
>>> -                                   for (i = 0; i < hung_db_num; i++) {
>>> -                                           if (queue->doorbell_index ==
>> db_array[i]) {
>>> -                                                   queue->state =
>> AMDGPU_USERQ_STATE_HUNG;
>>> -                                                   atomic_inc(&adev-
>>> gpu_reset_counter);
>>> -
>>       amdgpu_userq_fence_driver_force_completion(queue);
>>> -
>>       drm_dev_wedged_event(adev_to_drm(adev),
>> DRM_WEDGE_RECOVERY_NONE, NULL);
>>> -                                           }
>>> +           xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
>>> +                   if (queue->queue_type == queue_type) {
>>> +                           for (i = 0; i < hung_db_num; i++) {
>>> +                                   if (queue->doorbell_index == 
>>> db_array[i]) {
>>> +                                           queue->state =
>> AMDGPU_USERQ_STATE_HUNG;
>>> +                                           atomic_inc(&adev-
>>> gpu_reset_counter);
>>> +
>>       amdgpu_userq_fence_driver_force_completion(queue);
>>> +
>>       drm_dev_wedged_event(adev_to_drm(adev),
>>> +DRM_WEDGE_RECOVERY_NONE, NULL);
>>>                                     }
>>>                             }
>>>                     }
>

Re: [v6 1/2] drm/amdgpu: Convert amdgpu userqueue management from IDR to XArray

Reply via email to