AMD General

Regards,
      Prike

> -----Original Message-----
> From: Pierre-Eric Pelloux-Prayer <[email protected]>
> Sent: Tuesday, May 12, 2026 3:11 PM
> To: Khatri, Sunil <[email protected]>; Liang, Prike <[email protected]>;
> [email protected]
> Cc: Deucher, Alexander <[email protected]>; Koenig, Christian
> <[email protected]>; Pelloux-Prayer, Pierre-Eric <Pierre-eric.Pelloux-
> [email protected]>
> Subject: Re: [PATCH 1/4] drm/amdgpu: add userq create and destroy tracepoints
>
>
>
> Le 11/05/2026 à 15:57, Khatri, Sunil a écrit :
> > If i am not wrong Pierre eric did work on the traces for user queues.
> > I have dropped my patches for the same reason but not sure if the traces 
> > patches
> are merged. Could you check with him once ?
>
> No my patches aren't merged; their scope is to expose something similar to
> gpu_scheduler events to be able to observe user queues activity.
>
> >
> > On 11-05-2026 07:24 pm, Prike Liang wrote:
> >> Add ftrace events around user queue creation and destruction to
> >> profile queue setup and teardown latency.
>
> IMO these events look like something that could be done with the function 
> tracer
> (optionally using the func-args feature) by tracing amdgpu_userq_destroy /
> amdgpu_userq_create entry and exit.

Thanks for the suggestion, but while the function graph tracer can help capture 
function-level boundary metrics,
it doesn't provide queue context in an accurate or readable way. Patches #1 and 
#4 could supplement your
implementation and would help profile the userq creation, destroy, and eviction 
latency.

> Pierre-Eric
>
>
> >>
> >> Signed-off-by: Prike Liang <[email protected]>
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 58
> >> +++++++++++++++++++++++
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 11 +++++
> >>   2 files changed, 69 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> >> index d13e64a69e25..5a01f63d1f32 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> >> @@ -578,6 +578,64 @@ TRACE_EVENT(amdgpu_reset_reg_dumps,
> >>                 __entry->value)
> >>   );
> >> +DECLARE_EVENT_CLASS(amdgpu_userq_queue,
> >> +        TP_PROTO(struct amdgpu_usermode_queue *queue),
> >> +        TP_ARGS(queue),
> >> +        TP_STRUCT__entry(
> >> +                 __field(struct amdgpu_usermode_queue *, queue)
> >> +                 __field(u64, doorbell_index)
> >> +                 __field(int, queue_type)
> >> +                 __field(int, state)
> >> +                 __field(u32, xcp_id)
> >> +                 ),
> >> +        TP_fast_assign(
> >> +               __entry->queue = queue;
> >> +               __entry->doorbell_index = queue ?
> >> +queue->doorbell_index : 0;
> >> +               __entry->queue_type = queue ? queue->queue_type : -1;
> >> +               __entry->state = queue ? queue->state : -1;
> >> +               __entry->xcp_id = queue ? queue->xcp_id : 0;
> >> +               ),
> >> +        TP_printk("queue=%p, doorbell=%llu, type=%d, state=%d,
> >> +xcp_id=%u",
> >> +              __entry->queue, __entry->doorbell_index,
> >> +              __entry->queue_type, __entry->state, __entry->xcp_id)
> >> +); DEFINE_EVENT(amdgpu_userq_queue, amdgpu_userq_create_start,
> >> +         TP_PROTO(struct amdgpu_usermode_queue *queue),
> >> +         TP_ARGS(queue));
> >> +DEFINE_EVENT(amdgpu_userq_queue, amdgpu_userq_destroy_start,
> >> +         TP_PROTO(struct amdgpu_usermode_queue *queue),
> >> +         TP_ARGS(queue));
> >> +DECLARE_EVENT_CLASS(amdgpu_userq_queue_result,
> >> +        TP_PROTO(struct amdgpu_usermode_queue *queue, int result),
> >> +        TP_ARGS(queue, result),
> >> +        TP_STRUCT__entry(
> >> +                 __field(struct amdgpu_usermode_queue *, queue)
> >> +                 __field(u64, doorbell_index)
> >> +                 __field(int, queue_type)
> >> +                 __field(int, state)
> >> +                 __field(u32, xcp_id)
> >> +                 __field(int, result)
> >> +                 ),
> >> +        TP_fast_assign(
> >> +               __entry->queue = queue;
> >> +               __entry->doorbell_index = queue ?
> >> +queue->doorbell_index : 0;
> >> +               __entry->queue_type = queue ? queue->queue_type : -1;
> >> +               __entry->state = queue ? queue->state : -1;
> >> +               __entry->xcp_id = queue ? queue->xcp_id : 0;
> >> +               __entry->result = result;
> >> +               ),
> >> +        TP_printk("queue=%p, doorbell=%llu, type=%d, state=%d,
> >> +xcp_id=%u, result=%d",
> >> +              __entry->queue, __entry->doorbell_index,
> >> +              __entry->queue_type, __entry->state,
> >> +              __entry->xcp_id, __entry->result) );
> >> +DEFINE_EVENT(amdgpu_userq_queue_result, amdgpu_userq_create_end,
> >> +         TP_PROTO(struct amdgpu_usermode_queue *queue, int result),
> >> +         TP_ARGS(queue, result));
> >> +DEFINE_EVENT(amdgpu_userq_queue_result, amdgpu_userq_destroy_end,
> >> +         TP_PROTO(struct amdgpu_usermode_queue *queue, int result),
> >> +         TP_ARGS(queue, result));
> >> +
> >>   #undef AMDGPU_JOB_GET_TIMELINE_NAME
> >>   #endif
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >> index 3077ca4e27a0..50c46d31fbae 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >> @@ -33,6 +33,7 @@
> >>   #include "amdgpu_userq.h"
> >>   #include "amdgpu_hmm.h"
> >>   #include "amdgpu_userq_fence.h"
> >> +#include "amdgpu_trace.h"
> >>   u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
> >>   {
> >> @@ -617,6 +618,8 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr
> >> *uq_mgr, struct amdgpu_usermode_que
> >>       int r = 0;
> >> +    trace_amdgpu_userq_destroy_start(queue);
> >> +
> >>       cancel_delayed_work_sync(&uq_mgr->resume_work);
> >>       /* Cancel any pending hang detection work and cleanup */ @@
> >> -625,6 +628,7 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr
> >> *uq_mgr, struct amdgpu_usermode_que
> >>       r = amdgpu_bo_reserve(vm->root.bo, false);
> >>       if (r) {
> >>           drm_file_err(uq_mgr->file, "Failed to reserve root bo
> >> during userqueue destroy\n");
> >> +        trace_amdgpu_userq_destroy_end(queue, r);
> >>           return r;
> >>       }
> >>       amdgpu_userq_buffer_vas_list_cleanup(adev, queue); @@ -650,6
> >> +654,7 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr,
> >> struct amdgpu_usermode_que
> >>       amdgpu_bo_unpin(queue->wptr_obj.obj);
> >>       amdgpu_bo_unreserve(queue->wptr_obj.obj);
> >>       amdgpu_bo_unref(&queue->wptr_obj.obj);
> >> +    trace_amdgpu_userq_destroy_end(queue, r);
> >>       kfree(queue);
> >>       pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> >> @@ -754,6 +759,7 @@ amdgpu_userq_create(struct drm_file *filp, union
> >> drm_amdgpu_userq *args)
> >>       mutex_init(&queue->fence_drv_lock);
> >>       xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
> >> +    trace_amdgpu_userq_create_start(queue);
> >>       r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv);
> >>       if (r)
> >>           goto free_queue;
> >> @@ -809,6 +815,7 @@ amdgpu_userq_create(struct drm_file *filp, union
> >> drm_amdgpu_userq *args)
> >>            * This drops the extra and last reference which should
> >> take
> >>            * care of all cleanup.
> >>            */
> >> +        trace_amdgpu_userq_create_end(queue, r);
> >>           amdgpu_userq_put(queue);
> >>           amdgpu_userq_put(queue);
> >>           return r;
> >> @@ -826,6 +833,7 @@ amdgpu_userq_create(struct drm_file *filp, union
> >> drm_amdgpu_userq *args)
> >>           r = amdgpu_userq_map_helper(queue);
> >>           if (r) {
> >>               drm_file_err(uq_mgr->file, "Failed to map Queue\n");
> >> +            trace_amdgpu_userq_create_end(queue, r);
> >>               mutex_unlock(&uq_mgr->userq_mutex);
> >>               /* Prevent racing with close */
> >>               if (xa_erase(&uq_mgr->userq_xa, qid) == queue) @@
> >> -839,6 +847,7 @@ amdgpu_userq_create(struct drm_file *filp, union
> >> drm_amdgpu_userq *args)
> >>       atomic_inc(&uq_mgr->userq_count[queue->queue_type]);
> >>       amdgpu_debugfs_userq_init(filp, queue, qid);
> >> +    trace_amdgpu_userq_create_end(queue, 0);
> >>       amdgpu_userq_put(queue);
> >>       args->out.queue_id = qid;
> >>       return 0;
> >> @@ -853,6 +862,8 @@ amdgpu_userq_create(struct drm_file *filp, union
> >> drm_amdgpu_userq *args)
> >>   free_fence_drv:
> >>       amdgpu_userq_fence_driver_free(queue);
> >>   free_queue:
> >> +    if (queue)
> >> +        trace_amdgpu_userq_create_end(queue, r);
> >>       kfree(queue);
> >>   err_pm_runtime:
> >>       pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);

Reply via email to