On Wed, Sep 3, 2025 at 4:49 AM Jesse.Zhang <jesse.zh...@amd.com> wrote:
>
> This patch modifies the user queue management to use preempt/restore
> operations instead of full map/unmap for queue eviction scenarios where
> applicable. The changes include:
>
> 1. Introduces new helper functions:
>    - amdgpu_userqueue_preempt_helper()
>    - amdgpu_userqueue_restore_helper()
>
> 2. Updates queue state management to track PREEMPTED state
>
> 3. Modifies eviction handling to use preempt instead of unmap:
>    - amdgpu_userq_evict_all() now uses preempt_helper
>    - amdgpu_userq_restore_all() now uses restore_helper
>
> The preempt/restore approach provides better performance during queue
> eviction by avoiding the overhead of full queue teardown and setup.
> Full map/unmap operations are still used for initial setup/teardown
> and system suspend scenarios.
>
> v2: rename amdgpu_userqueue_restore_helper/amdgpu_userqueue_preempt_helper to
> amdgpu_userq_restore_helper/amdgpu_userq_preempt_helper for consistency. 
> (Alex)
>
> Signed-off-by: Alex Deucher <alexander.deuc...@amd.com>
> Signed-off-by: Jesse Zhang <jesse.zh...@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 49 +++++++++++++++++++++--
>  1 file changed, 46 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> index aac0de86f3e8..54851ba8756a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> @@ -45,7 +45,7 @@ u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device 
> *adev)
>  }
>
>  static int
> -amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
> +amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr,
>                           struct amdgpu_usermode_queue *queue)
>  {
>         struct amdgpu_device *adev = uq_mgr->adev;
> @@ -54,6 +54,49 @@ amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
>         int r = 0;
>
>         if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
> +               r = userq_funcs->preempt(uq_mgr, queue);
> +               if (r) {
> +                       queue->state = AMDGPU_USERQ_STATE_HUNG;
> +               } else {
> +                       queue->state = AMDGPU_USERQ_STATE_PREEMPTED;
> +               }
> +       }
> +
> +       return r;
> +}
> +
> +static int
> +amdgpu_userq_restore_helper(struct amdgpu_userq_mgr *uq_mgr,
> +                       struct amdgpu_usermode_queue *queue)
> +{
> +       struct amdgpu_device *adev = uq_mgr->adev;
> +       const struct amdgpu_userq_funcs *userq_funcs =
> +               adev->userq_funcs[queue->queue_type];
> +       int r = 0;
> +
> +       if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED) {
> +               r = userq_funcs->restore(uq_mgr, queue);
> +               if (r) {
> +                       queue->state = AMDGPU_USERQ_STATE_HUNG;
> +               } else {
> +                       queue->state = AMDGPU_USERQ_STATE_MAPPED;
> +               }
> +       }
> +
> +       return r;
> +}
> +
> +static int
> +amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
> +                         struct amdgpu_usermode_queue *queue)
> +{
> +       struct amdgpu_device *adev = uq_mgr->adev;
> +       const struct amdgpu_userq_funcs *userq_funcs =
> +               adev->userq_funcs[queue->queue_type];
> +       int r = 0;
> +
> +       if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) ||
> +               (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {
>                 r = userq_funcs->unmap(uq_mgr, queue);
>                 if (r)
>                         queue->state = AMDGPU_USERQ_STATE_HUNG;
> @@ -536,7 +579,7 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
>
>         /* Resume all the queues for this process */
>         idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
> -               r = amdgpu_userq_map_helper(uq_mgr, queue);
> +               r = amdgpu_userq_restore_helper(uq_mgr, queue);
>                 if (r)
>                         ret = r;
>         }
> @@ -693,7 +736,7 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
>
>         /* Try to unmap all the queues in this process ctx */
>         idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
> -               r = amdgpu_userq_unmap_helper(uq_mgr, queue);
> +               r = amdgpu_userq_preempt_helper(uq_mgr, queue);
>                 if (r)
>                         ret = r;
>         }

I think amdgpu_userq_stop_sched_for_enforce_isolation() and
amdgpu_userq_start_sched_for_enforce_isolation() should use preempt
and restore as well.

Alex

> --
> 2.49.0
>

Reply via email to