On 5/26/26 11:32, Prike Liang wrote:
> Avoid xarray extraction and temporary array allocation in
> amdgpu_userq_fence_alloc() when there are no pending wait-side fence
> driver references. This keeps the common fence emit path cheaper and
> efficient.
That's an absolute corner case we clearly don't need to optimize for.
In almost all cases we should have at least one remote fence driver here.
Regards,
Christian.
>
> Signed-off-by: Prike Liang <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 6 ++++--
> 1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> index 008330a0d852..2a2bf13a513d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> @@ -226,7 +226,7 @@ static int amdgpu_userq_fence_alloc(struct
> amdgpu_usermode_queue *userq,
> struct amdgpu_userq_fence *userq_fence;
> void *entry;
>
> - userq_fence = kmalloc(sizeof(*userq_fence), GFP_KERNEL);
> + userq_fence = kzalloc(sizeof(*userq_fence), GFP_KERNEL);
> if (!userq_fence)
> return -ENOMEM;
>
> @@ -235,6 +235,8 @@ static int amdgpu_userq_fence_alloc(struct
> amdgpu_usermode_queue *userq,
> * used as size to allocate the array.
> */
> mutex_lock(&userq->fence_drv_lock);
> + if (xa_empty(&userq->fence_drv_xa))
> + goto unlock;
> XA_STATE(xas, &userq->fence_drv_xa, 0);
>
> rcu_read_lock();
> @@ -256,7 +258,7 @@ static int amdgpu_userq_fence_alloc(struct
> amdgpu_usermode_queue *userq,
> xa_extract(&userq->fence_drv_xa, (void **)userq_fence->fence_drv_array,
> 0, ULONG_MAX, xas.xa_index, XA_PRESENT);
> xa_destroy(&userq->fence_drv_xa);
> -
> +unlock:
> mutex_unlock(&userq->fence_drv_lock);
>
> amdgpu_userq_fence_driver_get(fence_drv);