On 16.09.25 11:35, Jesse.Zhang wrote: > This commit refactors the AMDGPU userqueue management subsystem to replace > IDR (ID Allocation) with XArray for improved performance, scalability, and > maintainability. The changes address several issues with the previous IDR > implementation and provide better locking semantics. > > Key changes: > > 1. **Global XArray Introduction**: > - Added `userq_global_xa` to `struct amdgpu_device` for global queue > tracking > - Uses doorbell_index as key for efficient global lookup > - Replaces the previous `userq_mgr_list` linked list approach > > 2. **Per-process XArray Conversion**: > - Replaced `userq_idr` with `userq_xa` in `struct amdgpu_userq_mgr` > - Maintains per-process queue tracking with queue_id as key > - Uses XA_FLAGS_ALLOC for automatic ID allocation > > 3. **Locking Improvements**: > - Removed global `userq_mutex` from `struct amdgpu_device` > - Replaced with fine-grained XArray locking using `xa_lock()`/`xa_unlock()` > - Eliminated potential deadlocks from nested mutex operations > > 4. **Runtime Idle Check Optimization**: > - Updated `amdgpu_runtime_idle_check_userq()` to use XArray iteration > - Simplified logic and improved performance with direct XArray access > > 5. **Queue Management Functions**: > - Converted all IDR operations to equivalent XArray functions: > - `idr_alloc()` → `xa_alloc()` > - `idr_find()` → `xa_load()` > - `idr_remove()` → `xa_erase()` > - `idr_for_each()` → `xa_for_each()` > > 6. **Suspend/Resume Logic**: > - Rewrote suspend/resume functions to use XArray iteration > - Improved error handling and resource cleanup > - Fixed potential race conditions in queue state management > > 7. **DebugFS and IOCTL Updates**: > - Updated all userqueue-related IOCTL handlers for XArray compatibility > - Modified debugfs functionality to work with new XArray structure > > Benefits: > - **Performance**: XArray provides better scalability for large numbers of > queues > - **Memory Efficiency**: Reduced memory overhead compared to IDR > - **Thread Safety**: Improved locking semantics with XArray's internal > spinlocks > > Fixes addressed: > - Resolves soft lockup issues in queue cleanup operations > - Eliminates "scheduling while atomic" bugs in fini operations > - Prevents potential deadlocks in global queue management > - Improves error handling and resource cleanup reliability > > Suggested-by: Christian König <christian.koe...@amd.com> > Signed-off-by: Jesse Zhang <jesse.zh...@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 12 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 150 +++++++++--------- > drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 9 +- > .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 4 +- > drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 28 ++-- > 7 files changed, 109 insertions(+), 104 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 2a0df4cabb99..2800ef816e51 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1174,6 +1174,11 @@ struct amdgpu_device { > * queue fence. > */ > struct xarray userq_xa;
We should maybe rename that to userq_mgr_xa to clearly distinct it. > + /* Global queue index, > + * key: doorbell_index, > + * value: struct amdgpu_usermode_queue > + */ > + struct xarray userq_global_xa; And maybe name this userq_doorbell_xa. And BTW it would be really good if we start to use kerneldoc format for comments. > > /* df */ > struct amdgpu_df df; > @@ -1308,8 +1313,6 @@ struct amdgpu_device { > */ > bool apu_prefer_gtt; > > - struct list_head userq_mgr_list; > - struct mutex userq_mutex; > bool userq_halt_for_enforce_isolation; > struct amdgpu_uid *uid_info; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 0fdfde3dcb9f..4c9b4614be53 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -4483,7 +4483,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, > mutex_init(&adev->gfx.userq_sch_mutex); > mutex_init(&adev->gfx.workload_profile_mutex); > mutex_init(&adev->vcn.workload_profile_mutex); > - mutex_init(&adev->userq_mutex); > > amdgpu_device_init_apu_flags(adev); > > @@ -4511,7 +4510,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, > > INIT_LIST_HEAD(&adev->pm.od_kobj_list); > > - INIT_LIST_HEAD(&adev->userq_mgr_list); > + xa_init(&adev->userq_global_xa); > > INIT_DELAYED_WORK(&adev->delayed_init_work, > amdgpu_device_delayed_init_work_handler); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index ece251cbe8c3..d1d2fcbd4195 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -2772,20 +2772,18 @@ static int amdgpu_runtime_idle_check_userq(struct > device *dev) > struct drm_device *drm_dev = pci_get_drvdata(pdev); > struct amdgpu_device *adev = drm_to_adev(drm_dev); > struct amdgpu_usermode_queue *queue; > - struct amdgpu_userq_mgr *uqm, *tmp; > - int queue_id; > + unsigned long queue_id; > int ret = 0; > > - mutex_lock(&adev->userq_mutex); > - list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { > - idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { > + xa_lock(&adev->userq_global_xa); > + xa_for_each(&adev->userq_global_xa, queue_id, queue) { > + if (queue) { If I'm not completely mistaken that can be simplified by using xa_empty() now. > ret = -EBUSY; > goto done; > } > } > done: > - mutex_unlock(&adev->userq_mutex); > - > + xa_unlock(&adev->userq_global_xa); > return ret; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > index b649ac0cedff..c285bf927c46 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > @@ -183,14 +183,18 @@ amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr, > > uq_funcs->mqd_destroy(uq_mgr, queue); > amdgpu_userq_fence_driver_free(queue); > - idr_remove(&uq_mgr->userq_idr, queue_id); > + __xa_erase(&uq_mgr->userq_xa, (unsigned long)queue_id); > + > + /* Remove from global XArray */ > + __xa_erase(&adev->userq_global_xa, queue->doorbell_index); > + Those two need to be xa_erase() (without the __), since we are relying on XA locking now. > kfree(queue); > } > > static struct amdgpu_usermode_queue * > amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid) > { > - return idr_find(&uq_mgr->userq_idr, qid); > + return xa_load(&uq_mgr->userq_xa, qid); > } > > void > @@ -362,6 +366,7 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id) > int r = 0; > > cancel_delayed_work_sync(&uq_mgr->resume_work); > + xa_lock(&adev->userq_global_xa); > mutex_lock(&uq_mgr->userq_mutex); Wrong order, first mutex_lock(&uq_mgr->userq_mutex) and then xa_lock(&adev->userq_global_xa). It's probably best to not take that lock here at all and just use the right xa function in amdgpu_userq_cleanup(). You should also start testing patches with LOCKDEP enabled, that throws automatically warnings if something like that here is wrong. > > queue = amdgpu_userq_find(uq_mgr, queue_id); > @@ -389,6 +394,7 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id) > } > amdgpu_userq_cleanup(uq_mgr, queue, queue_id); > mutex_unlock(&uq_mgr->userq_mutex); > + xa_unlock(&adev->userq_global_xa); > > pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); > pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > @@ -462,8 +468,9 @@ amdgpu_userq_create(struct drm_file *filp, union > drm_amdgpu_userq *args) > struct amdgpu_db_info db_info; > char *queue_name; > bool skip_map_queue; > + u32 qid; > uint64_t index; > - int qid, r = 0; > + int r = 0; > int priority = > (args->in.flags & > AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >> > AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT; > @@ -471,7 +478,6 @@ amdgpu_userq_create(struct drm_file *filp, union > drm_amdgpu_userq *args) > r = amdgpu_userq_priority_permit(filp, priority); > if (r) > return r; > - > r = pm_runtime_get_sync(adev_to_drm(adev)->dev); > if (r < 0) { > drm_file_err(uq_mgr->file, "pm_runtime_get_sync() failed for > userqueue create\n"); > @@ -486,7 +492,6 @@ amdgpu_userq_create(struct drm_file *filp, union > drm_amdgpu_userq *args) > * > * This will also make sure we have a valid eviction fence ready to be > used. > */ > - mutex_lock(&adev->userq_mutex); > amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); > > uq_funcs = adev->userq_funcs[args->in.ip_type]; > @@ -546,9 +551,16 @@ amdgpu_userq_create(struct drm_file *filp, union > drm_amdgpu_userq *args) > goto unlock; > } > > + xa_lock(&adev->userq_global_xa); > + r =xa_err(__xa_store(&adev->userq_global_xa, index, queue, GFP_KERNEL)); > + xa_unlock(&adev->userq_global_xa); Just use xa_err(xa_store()) here. > + if (r) { > + kfree(queue); > + goto unlock; > + } > > - qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, > GFP_KERNEL); > - if (qid < 0) { > + r = xa_alloc(&uq_mgr->userq_xa, &qid, queue, XA_LIMIT(1, > AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL); > + if (r) { > drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n"); > amdgpu_userq_fence_driver_free(queue); > uq_funcs->mqd_destroy(uq_mgr, queue); > @@ -568,7 +580,7 @@ amdgpu_userq_create(struct drm_file *filp, union > drm_amdgpu_userq *args) > r = amdgpu_userq_map_helper(uq_mgr, queue); > if (r) { > drm_file_err(uq_mgr->file, "Failed to map Queue\n"); > - idr_remove(&uq_mgr->userq_idr, qid); > + xa_erase(&uq_mgr->userq_xa, qid); > amdgpu_userq_fence_driver_free(queue); > uq_funcs->mqd_destroy(uq_mgr, queue); > kfree(queue); > @@ -591,7 +603,6 @@ amdgpu_userq_create(struct drm_file *filp, union > drm_amdgpu_userq *args) > > unlock: > mutex_unlock(&uq_mgr->userq_mutex); > - mutex_unlock(&adev->userq_mutex); > > return r; > } > @@ -689,11 +700,11 @@ static int > amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) > { > struct amdgpu_usermode_queue *queue; > - int queue_id; > + unsigned long queue_id; > int ret = 0, r; > > /* Resume all the queues for this process */ > - idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { > + xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { > r = amdgpu_userq_restore_helper(uq_mgr, queue); > if (r) > ret = r; > @@ -846,11 +857,11 @@ static int > amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) > { > struct amdgpu_usermode_queue *queue; > - int queue_id; > + unsigned long queue_id; > int ret = 0, r; > > /* Try to unmap all the queues in this process ctx */ > - idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { > + xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { > r = amdgpu_userq_preempt_helper(uq_mgr, queue); > if (r) > ret = r; > @@ -865,9 +876,10 @@ static int > amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) > { > struct amdgpu_usermode_queue *queue; > - int queue_id, ret; > + unsigned long queue_id; > + int ret; > > - idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { > + xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { > struct dma_fence *f = queue->last_fence; > > if (!f || dma_fence_is_signaled(f)) > @@ -920,14 +932,10 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr > *userq_mgr, struct drm_file *f > struct amdgpu_device *adev) > { > mutex_init(&userq_mgr->userq_mutex); > - idr_init_base(&userq_mgr->userq_idr, 1); > + xa_init_flags(&userq_mgr->userq_xa, XA_FLAGS_ALLOC); > userq_mgr->adev = adev; > userq_mgr->file = file_priv; > > - mutex_lock(&adev->userq_mutex); > - list_add(&userq_mgr->list, &adev->userq_mgr_list); > - mutex_unlock(&adev->userq_mutex); > - > INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker); > return 0; > } > @@ -936,28 +944,19 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr > *userq_mgr) > { > struct amdgpu_device *adev = userq_mgr->adev; > struct amdgpu_usermode_queue *queue; > - struct amdgpu_userq_mgr *uqm, *tmp; > - uint32_t queue_id; > + unsigned long queue_id; > > cancel_delayed_work_sync(&userq_mgr->resume_work); > > - mutex_lock(&adev->userq_mutex); > - mutex_lock(&userq_mgr->userq_mutex); > - idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) { > + xa_lock(&adev->userq_global_xa); > + xa_for_each(&userq_mgr->userq_xa, queue_id, queue) { > amdgpu_userq_wait_for_last_fence(userq_mgr, queue); That can't be called while holding the XA lock and will raise lockdep warnings. But the XA lock is not necessary here any more since we are destroying the userq_mgr and nobody else can concurrently modify it. So just drop the xa_lock(). > amdgpu_userq_unmap_helper(userq_mgr, queue); > amdgpu_userq_cleanup(userq_mgr, queue, queue_id); > } > > - list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { > - if (uqm == userq_mgr) { > - list_del(&uqm->list); > - break; > - } > - } > - idr_destroy(&userq_mgr->userq_idr); > - mutex_unlock(&userq_mgr->userq_mutex); > - mutex_unlock(&adev->userq_mutex); > + xa_destroy(&userq_mgr->userq_xa); > + xa_unlock(&adev->userq_global_xa); > mutex_destroy(&userq_mgr->userq_mutex); > } > > @@ -965,25 +964,24 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev) > { > u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); > struct amdgpu_usermode_queue *queue; > - struct amdgpu_userq_mgr *uqm, *tmp; > - int queue_id; > + struct amdgpu_userq_mgr *uqm; > + unsigned long queue_id; > int ret = 0, r; > > if (!ip_mask) > return 0; > > - mutex_lock(&adev->userq_mutex); > - list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { > + xa_lock(&adev->userq_global_xa); > + xa_for_each(&adev->userq_global_xa, queue_id, queue) { > + uqm = queue->userq_mgr; > cancel_delayed_work_sync(&uqm->resume_work); Same here. Any *_sync() function can't be called while holding the XA lock. When exactly is that called? Only during suspend when userspace is frozen anyway? If yes you don't need the xa_lock() anyway. > mutex_lock(&uqm->userq_mutex); > - idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { > - r = amdgpu_userq_unmap_helper(uqm, queue); > - if (r) > - ret = r; > - } > - mutex_unlock(&uqm->userq_mutex); > + r = amdgpu_userq_unmap_helper(uqm, queue); > + if (r) > + ret = r; > + mutex_lock(&uqm->userq_mutex); > } > - mutex_unlock(&adev->userq_mutex); > + xa_unlock(&adev->userq_global_xa); > return ret; > } > > @@ -991,24 +989,23 @@ int amdgpu_userq_resume(struct amdgpu_device *adev) > { > u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); > struct amdgpu_usermode_queue *queue; > - struct amdgpu_userq_mgr *uqm, *tmp; > - int queue_id; > + struct amdgpu_userq_mgr *uqm; > + unsigned long queue_id; > int ret = 0, r; > > if (!ip_mask) > return 0; > > - mutex_lock(&adev->userq_mutex); > - list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { > + xa_lock(&adev->userq_global_xa); > + xa_for_each(&adev->userq_global_xa, queue_id, queue) { > + uqm = queue->userq_mgr; > mutex_lock(&uqm->userq_mutex); > - idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { > - r = amdgpu_userq_map_helper(uqm, queue); > - if (r) > - ret = r; > - } > + r = amdgpu_userq_map_helper(uqm, queue); Same here. Don't call xa_lock() at all. > + if (r) > + ret = r; > mutex_unlock(&uqm->userq_mutex); > } > - mutex_unlock(&adev->userq_mutex); > + xa_unlock(&adev->userq_global_xa); > return ret; > } > > @@ -1017,33 +1014,33 @@ int > amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, > { > u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); > struct amdgpu_usermode_queue *queue; > - struct amdgpu_userq_mgr *uqm, *tmp; > - int queue_id; > + struct amdgpu_userq_mgr *uqm; > + unsigned long queue_id; > int ret = 0, r; > > /* only need to stop gfx/compute */ > if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << > AMDGPU_HW_IP_COMPUTE)))) > return 0; > > - mutex_lock(&adev->userq_mutex); > if (adev->userq_halt_for_enforce_isolation) > dev_warn(adev->dev, "userq scheduling already stopped!\n"); > adev->userq_halt_for_enforce_isolation = true; > - list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { > + xa_lock(&adev->userq_global_xa); > + xa_for_each(&adev->userq_global_xa, queue_id, queue) { > cancel_delayed_work_sync(&uqm->resume_work); > + uqm = queue->userq_mgr; > mutex_lock(&uqm->userq_mutex); > - idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { > - if (((queue->queue_type == AMDGPU_HW_IP_GFX) || > - (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && > - (queue->xcp_id == idx)) { > - r = amdgpu_userq_preempt_helper(uqm, queue); > - if (r) > - ret = r; > - } > + if (((queue->queue_type == AMDGPU_HW_IP_GFX) || > + (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && > + (queue->xcp_id == idx)) { > + r = amdgpu_userq_preempt_helper(uqm, queue); > + if (r) > + ret = r; > } > mutex_unlock(&uqm->userq_mutex); > } > - mutex_unlock(&adev->userq_mutex); > + xa_unlock(&adev->userq_global_xa); > + > return ret; > } > > @@ -1052,21 +1049,21 @@ int > amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, > { > u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); > struct amdgpu_usermode_queue *queue; > - struct amdgpu_userq_mgr *uqm, *tmp; > - int queue_id; > + struct amdgpu_userq_mgr *uqm; > + unsigned long queue_id; > int ret = 0, r; > > /* only need to stop gfx/compute */ > if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << > AMDGPU_HW_IP_COMPUTE)))) > return 0; > > - mutex_lock(&adev->userq_mutex); > if (!adev->userq_halt_for_enforce_isolation) > dev_warn(adev->dev, "userq scheduling already started!\n"); > adev->userq_halt_for_enforce_isolation = false; > - list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { > - mutex_lock(&uqm->userq_mutex); > - idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { > + xa_lock(&adev->userq_global_xa); > + xa_for_each(&adev->userq_global_xa, queue_id, queue) { > + uqm = queue->userq_mgr; > + mutex_lock(&queue->userq_mgr->userq_mutex); > if (((queue->queue_type == AMDGPU_HW_IP_GFX) || > (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && > (queue->xcp_id == idx)) { > @@ -1074,9 +1071,8 @@ int > amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, > if (r) > ret = r; > } > - } > - mutex_unlock(&uqm->userq_mutex); > + mutex_unlock(&queue->userq_mgr->userq_mutex); > } > - mutex_unlock(&adev->userq_mutex); > + xa_unlock(&adev->userq_global_xa); > return ret; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > index c027dd916672..77125bbe3abc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > @@ -88,11 +88,16 @@ struct amdgpu_userq_funcs { > > /* Usermode queues for gfx */ > struct amdgpu_userq_mgr { > - struct idr userq_idr; > + //struct idr userq_idr; > + /* In-process queue index, > + * key: queue_id > + * value: struct amdgpu_usermode_queue > + */ > + struct xarray userq_xa; > struct mutex userq_mutex; > struct amdgpu_device *adev; > struct delayed_work resume_work; > - struct list_head list; > + //struct list_head list; > struct drm_file *file; > }; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c > index 95e91d1dc58a..32625079042d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c > @@ -537,7 +537,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, > void *data, > } > > /* Retrieve the user queue */ > - queue = idr_find(&userq_mgr->userq_idr, args->queue_id); > + queue = xa_load(&userq_mgr->userq_xa, args->queue_id); > if (!queue) { > r = -ENOENT; > goto put_gobj_write; > @@ -899,7 +899,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void > *data, > */ > num_fences = dma_fence_dedup_array(fences, num_fences); > > - waitq = idr_find(&userq_mgr->userq_idr, wait_info->waitq_id); > + waitq = xa_load(&userq_mgr->userq_xa, wait_info->waitq_id); > if (!waitq) { > r = -EINVAL; > goto free_fences; > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c > b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c > index 2db9b2c63693..0794bbb944ec 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c > +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c > @@ -205,9 +205,9 @@ static int mes_userq_detect_and_reset(struct > amdgpu_device *adev, > int db_array_size = amdgpu_mes_get_hung_queue_db_array_size(adev); > struct mes_detect_and_reset_queue_input input; > struct amdgpu_usermode_queue *queue; > - struct amdgpu_userq_mgr *uqm, *tmp; > unsigned int hung_db_num = 0; > - int queue_id, r, i; > + unsigned long queue_id; > + int r, i; > u32 db_array[4]; > > if (db_array_size > 4) { > @@ -227,20 +227,24 @@ static int mes_userq_detect_and_reset(struct > amdgpu_device *adev, > if (r) { > dev_err(adev->dev, "Failed to detect and reset queues, err > (%d)\n", r); > } else if (hung_db_num) { > - list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) > { > - idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { > - if (queue->queue_type == queue_type) { > - for (i = 0; i < hung_db_num; i++) { > - if (queue->doorbell_index == > db_array[i]) { > - queue->state = > AMDGPU_USERQ_STATE_HUNG; > - > atomic_inc(&adev->gpu_reset_counter); > - > amdgpu_userq_fence_driver_force_completion(queue); > - > drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, NULL); > - } > + xa_lock(&adev->userq_global_xa); > + xa_for_each(&adev->userq_global_xa, queue_id, queue) { > + mutex_lock(&queue->userq_mgr->userq_mutex); Same here. You can't lock an mutex while holding the XA lock. Regards, Christian. > + if (queue->queue_type == queue_type) { > + for (i = 0; i < hung_db_num; i++) { > + if (queue->doorbell_index == > db_array[i]) { > + queue = > xa_load(&adev->userq_global_xa, db_array[i]); > + queue->state = > AMDGPU_USERQ_STATE_HUNG; > + queue->state = > AMDGPU_USERQ_STATE_HUNG; > + > atomic_inc(&adev->gpu_reset_counter); > + > amdgpu_userq_fence_driver_force_completion(queue); > + > drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, NULL); > } > } > } > + mutex_unlock(&queue->userq_mgr->userq_mutex); > } > + xa_unlock(&adev->userq_global_xa); > } > > return r;