AMD General
Hi @Pelloux-Prayer, Pierre-Eric Could you check whether this version meets
the UMR tool profiling objectives, or is anything missing?
Regards,
Prike
> -----Original Message-----
> From: Liang, Prike
> Sent: Tuesday, June 2, 2026 4:00 PM
> To: 'Liang, Prike' <[email protected]>; [email protected]
> Cc: Deucher, Alexander <[email protected]>; Koenig, Christian
> <[email protected]>; Pelloux-Prayer, Pierre-Eric <Pierre-eric.Pelloux-
> [email protected]>; Pelloux-Prayer, Pierre-Eric <Pierre-eric.Pelloux-
> [email protected]>
> Subject: RE: [PATCH 2/2] drm/amdgpu: add userq job and state transition trace
> events
>
>
> These patch sets can be beneficial for profiling userq state transitions and
> latency.
>
> Ping on this series.
>
> Regards,
> Prike
>
> > -----Original Message-----
> > From: amd-gfx <[email protected]> On Behalf Of
> > Liang, Prike
> > Sent: Friday, May 29, 2026 8:49 PM
> > To: [email protected]
> > Cc: Deucher, Alexander <[email protected]>; Koenig, Christian
> > <[email protected]>; Pelloux-Prayer, Pierre-Eric
> > <Pierre-eric.Pelloux- [email protected]>; Pelloux-Prayer, Pierre-Eric
> > <Pierre-eric.Pelloux- [email protected]>
> > Subject: RE: [PATCH 2/2] drm/amdgpu: add userq job and state
> > transition trace events
> >
> > AMD General
> >
> > Ping
> >
> > Regards,
> > Prike
> >
> > > -----Original Message-----
> > > From: Liang, Prike <[email protected]>
> > > Sent: Wednesday, May 27, 2026 8:20 PM
> > > To: [email protected]
> > > Cc: Deucher, Alexander <[email protected]>; Koenig,
> > > Christian <[email protected]>; Pelloux-Prayer, Pierre-Eric
> > > <Pierre-eric.Pelloux- [email protected]>; Pelloux-Prayer, Pierre-Eric
> > > <Pierre-eric.Pelloux- [email protected]>; Liang, Prike
> > > <[email protected]>
> > > Subject: [PATCH 2/2] drm/amdgpu: add userq job and state transition
> > > trace events
> > >
> > > From: Pierre-Eric Pelloux-Prayer
> > > <[email protected]>
> > >
> > > Add ftrace events for tracking the userq fence emit, signal and
> > > queue state
> > transition.
> > >
> > > Signed-off-by: Pierre-Eric Pelloux-Prayer
> > > <[email protected]>
> > > Signed-off-by: Prike Liang <[email protected]>
> > > ---
> > > drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 113 ++++++++++++++++++
> > > drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 21 ++++
> > > .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 12 +-
> > > 3 files changed, 143 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> > > index df98be22f1f5..ef6a1fb82ff3 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> > > @@ -28,6 +28,8 @@
> > > #include <linux/types.h>
> > > #include <linux/tracepoint.h>
> > >
> > > +#include "amdgpu_userq_fence.h"
> > > +
> > > #undef TRACE_SYSTEM
> > > #define TRACE_SYSTEM amdgpu
> > > #define TRACE_INCLUDE_FILE amdgpu_trace @@ -636,6 +638,117 @@
> > > DEFINE_EVENT(amdgpu_userq_queue_result, amdgpu_userq_destroy_end,
> > > TP_PROTO(struct amdgpu_usermode_queue *queue, int result),
> > > TP_ARGS(queue, result));
> > >
> > > +TRACE_EVENT(amdgpu_userq_job_run,
> > > + TP_PROTO(struct device *device, struct
> > > +amdgpu_usermode_queue
> > > *queue, struct amdgpu_userq_fence *fence),
> > > + TP_ARGS(device, queue, fence),
> > > + TP_STRUCT__entry(
> > > + __field(u64, fence_context)
> > > + __field(u64, fence_seqno)
> > > + __string(dev, dev_name(device))
> > > + __field(u64, doorbell_index)
> > > + __field(u64, client_id)
> > > + __field(u32, queue_type)
> > > + ),
> > > + TP_fast_assign(
> > > + __entry->fence_context = fence->base.context;
> > > + __entry->fence_seqno = fence->base.seqno;
> > > + __assign_str(dev);
> > > + __entry->doorbell_index = queue->doorbell_index;
> > > + __entry->client_id =
> > > queue->userq_mgr->file->client_id;
> > > + __entry->queue_type = queue->queue_type;
> > > + ),
> > > + TP_printk("dev=%s, client_id=%llu, type=%u, doorbell=%llu,
> > > fence=%llu:%llu",
> > > + __get_str(dev), __entry->client_id,
> > > + __entry->queue_type, __entry-
> > > >doorbell_index,
> > > + __entry->fence_context,
> > > + __entry->fence_seqno) );
> > > +
> > > +TRACE_EVENT(amdgpu_userq_job_queue,
> > > + TP_PROTO(struct device *device,
> > > + struct amdgpu_usermode_queue *queue),
> > > + TP_ARGS(device, queue),
> > > + TP_STRUCT__entry(__field(u64, context)
> > > + __string(dev, dev_name(device))
> > > + __field(u64, doorbell_index)
> > > + __field(u64, client_id)
> > > + __field(u32, queue_type)
> > > + ),
> > > + TP_fast_assign(__assign_str(dev);
> > > + __entry->doorbell_index = queue->doorbell_index;
> > > + __entry->queue_type = queue->queue_type;
> > > + __entry->client_id =
> > > queue->userq_mgr->file->client_id;
> > > + __entry->context = queue->fence_drv->context;
> > > + ),
> > > + TP_printk("dev=%s, client_id=%llu, type=%u, doorbell=%llu,
> > > context=%llu",
> > > + __get_str(dev), __entry->client_id,
> > > __entry->queue_type,
> > > + __entry->doorbell_index, __entry->context) );
> > > +
> > > +TRACE_EVENT(amdgpu_userq_job_add_dep,
> > > + TP_PROTO(struct device *device, struct
> > > +amdgpu_usermode_queue
> > > *queue, struct amdgpu_userq_fence *dep),
> > > + TP_ARGS(device, queue, dep),
> > > + TP_STRUCT__entry(
> > > + __field(u64, context)
> > > + __field(u64, dep_context)
> > > + __field(u64, dep_seqno)
> > > + __string(dev, dev_name(device))
> > > + __field(u64, doorbell_index)
> > > + __field(u64, client_id)
> > > + __field(u32, queue_type)
> > > + ),
> > > + TP_fast_assign(
> > > + __assign_str(dev);
> > > + __entry->doorbell_index = queue->doorbell_index;
> > > + __entry->queue_type = queue->queue_type;
> > > + __entry->client_id =
> > > queue->userq_mgr->file->client_id;
> > > + __entry->context = queue->fence_drv->context;
> > > + __entry->dep_context = dep->base.context;
> > > + __entry->dep_seqno = dep->base.seqno;
> > > + ),
> > > + TP_printk("dev=%s, client_id=%llu, type=%u, doorbell=%llu,
> > > + context=%llu
> > > depends on fence=%llu:%llu",
> > > + __get_str(dev), __entry->client_id,
> > > + __entry->queue_type, __entry-
> > > >doorbell_index, __entry->context,
> > > + __entry->dep_context,
> > > + __entry->dep_seqno) );
> > > +
> > > +TRACE_EVENT(amdgpu_userq_state_start,
> > > + TP_PROTO(struct amdgpu_usermode_queue *queue),
> > > + TP_ARGS(queue),
> > > + TP_STRUCT__entry(
> > > + __field(u64, doorbell_index)
> > > + __field(u64, client_id)
> > > + __field(u32, queue_type)
> > > + __field(u32, from)
> > > + ),
> > > + TP_fast_assign(
> > > + __entry->doorbell_index = queue->doorbell_index;
> > > + __entry->queue_type = queue->queue_type;
> > > + __entry->client_id =
> > > queue->userq_mgr->file->client_id;
> > > + __entry->from = queue->state;
> > > + ),
> > > + TP_printk("client_id=%llu, type=%u, doorbell=%llu, from=%d",
> > > + __entry->client_id, __entry->queue_type,
> > > +__entry->doorbell_index, __entry->from) );
> > > +
> > > +TRACE_EVENT(amdgpu_userq_state_changed,
> > > + TP_PROTO(struct amdgpu_usermode_queue *queue, enum
> > > amdgpu_userq_state new_state),
> > > + TP_ARGS(queue, new_state),
> > > + TP_STRUCT__entry(
> > > + __field(u64, doorbell_index)
> > > + __field(u64, client_id)
> > > + __field(u32, queue_type)
> > > + __field(u32, to)
> > > + ),
> > > + TP_fast_assign(
> > > + __entry->doorbell_index = queue->doorbell_index;
> > > + __entry->queue_type = queue->queue_type;
> > > + __entry->client_id =
> > > queue->userq_mgr->file->client_id;
> > > + __entry->to = new_state;
> > > + ),
> > > + TP_printk("client_id=%llu, type=%u, doorbell=%llu, to=%d",
> > > + __entry->client_id, __entry->queue_type,
> > > +__entry->doorbell_index, __entry->to) );
> > > +
> > > #undef AMDGPU_JOB_GET_TIMELINE_NAME #endif
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > > index 9dc6cb579ac7..536e73c7e9ef 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > > @@ -296,11 +296,15 @@ static int amdgpu_userq_preempt_helper(struct
> > > amdgpu_usermode_queue *queue)
> > > int r;
> > >
> > > if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
> > > + trace_amdgpu_userq_state_start(queue);
> > > +
> > > r = userq_funcs->preempt(queue);
> > > if (r) {
> > > + trace_amdgpu_userq_state_changed(queue,
> > > AMDGPU_USERQ_STATE_HUNG);
> > > queue->state = AMDGPU_USERQ_STATE_HUNG;
> > > return r;
> > > } else {
> > > + trace_amdgpu_userq_state_changed(queue,
> > > +AMDGPU_USERQ_STATE_PREEMPTED);
> > > queue->state = AMDGPU_USERQ_STATE_PREEMPTED;
> > > }
> > > }
> > > @@ -316,10 +320,14 @@ static int amdgpu_userq_restore_helper(struct
> > > amdgpu_usermode_queue *queue)
> > > int r = 0;
> > >
> > > if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED) {
> > > + trace_amdgpu_userq_state_start(queue);
> > > +
> > > r = userq_funcs->restore(queue);
> > > if (r) {
> > > + trace_amdgpu_userq_state_changed(queue,
> > > AMDGPU_USERQ_STATE_HUNG);
> > > queue->state = AMDGPU_USERQ_STATE_HUNG;
> > > } else {
> > > + trace_amdgpu_userq_state_changed(queue,
> > > AMDGPU_USERQ_STATE_MAPPED);
> > > queue->state = AMDGPU_USERQ_STATE_MAPPED;
> > > }
> > > }
> > > @@ -337,12 +345,15 @@ static int amdgpu_userq_unmap_helper(struct
> > > amdgpu_usermode_queue *queue)
> > >
> > > if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) ||
> > > (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {
> > > + trace_amdgpu_userq_state_start(queue);
> > >
> > > r = userq_funcs->unmap(queue);
> > > if (r) {
> > > + trace_amdgpu_userq_state_changed(queue,
> > > AMDGPU_USERQ_STATE_HUNG);
> > > queue->state = AMDGPU_USERQ_STATE_HUNG;
> > > return r;
> > > } else {
> > > + trace_amdgpu_userq_state_changed(queue,
> > > +AMDGPU_USERQ_STATE_UNMAPPED);
> > > queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
> > > }
> > > }
> > > @@ -359,11 +370,15 @@ static int amdgpu_userq_map_helper(struct
> > > amdgpu_usermode_queue *queue)
> > > int r;
> > >
> > > if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
> > > + trace_amdgpu_userq_state_start(queue);
> > > +
> > > r = userq_funcs->map(queue);
> > > if (r) {
> > > + trace_amdgpu_userq_state_changed(queue,
> > > AMDGPU_USERQ_STATE_HUNG);
> > > queue->state = AMDGPU_USERQ_STATE_HUNG;
> > > return r;
> > > } else {
> > > + trace_amdgpu_userq_state_changed(queue,
> > > AMDGPU_USERQ_STATE_MAPPED);
> > > queue->state = AMDGPU_USERQ_STATE_MAPPED;
> > > }
> > > }
> > > @@ -894,6 +909,7 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr
> > > *uq_mgr)
> > > if (!amdgpu_userq_buffer_vas_mapped(queue)) {
> > > drm_file_err(uq_mgr->file,
> > > "trying restore queue without va
> > > mapping\n");
> > > + trace_amdgpu_userq_state_changed(queue,
> > > +AMDGPU_USERQ_STATE_INVALID_VA);
> > > queue->state = AMDGPU_USERQ_STATE_INVALID_VA;
> > > continue;
> > > }
> > > @@ -1389,12 +1405,14 @@ void amdgpu_userq_pre_reset(struct
> > > amdgpu_device
> > > *adev)
> > > if (queue->state != AMDGPU_USERQ_STATE_MAPPED)
> > > continue;
> > >
> > > + trace_amdgpu_userq_state_start(queue);
> > > userq_funcs = adev->userq_funcs[queue->queue_type];
> > > userq_funcs->unmap(queue);
> > > /* just mark all queues as hung at this point.
> > > * if unmap succeeds, we could map again
> > > * in amdgpu_userq_post_reset() if vram is not lost
> > > */
> > > + trace_amdgpu_userq_state_changed(queue,
> > > AMDGPU_USERQ_STATE_HUNG);
> > > queue->state = AMDGPU_USERQ_STATE_HUNG;
> > > amdgpu_userq_fence_driver_force_completion(queue);
> > > }
> > > @@ -1413,6 +1431,8 @@ int amdgpu_userq_post_reset(struct
> > > amdgpu_device *adev, bool vram_lost)
> > >
> > > xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
> > > if (queue->state == AMDGPU_USERQ_STATE_HUNG &&
> > > !vram_lost) {
> > > + trace_amdgpu_userq_state_start(queue);
> > > +
> > > userq_funcs = adev->userq_funcs[queue->queue_type];
> > > /* Re-map queue */
> > > r = userq_funcs->map(queue); @@ -1420,6
> > > +1440,7 @@ int amdgpu_userq_post_reset(struct amdgpu_device *adev,
> > > bool
> > > vram_lost)
> > > dev_err(adev->dev, "Failed to remap
> > > queue %ld\n", queue_id);
> > > continue;
> > > }
> > > + trace_amdgpu_userq_state_changed(queue,
> > > AMDGPU_USERQ_STATE_MAPPED);
> > > queue->state = AMDGPU_USERQ_STATE_MAPPED;
> > > }
> > > }
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> > > index 008330a0d852..6071e83acd9e 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> > > @@ -30,7 +30,7 @@
> > > #include <drm/drm_syncobj.h>
> > >
> > > #include "amdgpu.h"
> > > -#include "amdgpu_userq_fence.h"
> > > +#include "amdgpu_trace.h"
> > >
> > > #define AMDGPU_USERQ_MAX_HANDLES (1U << 16)
> > >
> > > @@ -528,6 +528,8 @@ int amdgpu_userq_signal_ioctl(struct drm_device
> > > *dev, void *data,
> > > /* Create the new fence */
> > > amdgpu_userq_fence_init(queue, fence, wptr);
> > >
> > > + trace_amdgpu_userq_job_run(dev->dev, queue, fence);
> > > +
> > > mutex_unlock(&userq_mgr->userq_mutex);
> > >
> > > /*
> > > @@ -701,7 +703,7 @@ amdgpu_userq_wait_add_fence(struct
> > > drm_amdgpu_userq_wait *wait_info, }
> > >
> > > static int
> > > -amdgpu_userq_wait_return_fence_info(struct drm_file *filp,
> > > +amdgpu_userq_wait_return_fence_info(struct drm_device *dev, struct
> > > +drm_file *filp,
> > > struct drm_amdgpu_userq_wait *wait_info,
> > > u32 *syncobj_handles, u32
> > > *timeline_points,
> > > u32 *timeline_handles, @@ -835,6
> > > +837,8 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp,
> > > goto free_fences;
> > > }
> > >
> > > + trace_amdgpu_userq_job_queue(dev->dev, waitq);
> > > +
> > > for (i = 0, cnt = 0; i < num_fences; i++) {
> > > struct amdgpu_userq_fence_driver *fence_drv;
> > > struct amdgpu_userq_fence *userq_fence; @@ -869,6
> > > +873,8 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp,
> > >
> > > amdgpu_userq_fence_driver_get(fence_drv);
> > >
> > > + trace_amdgpu_userq_job_add_dep(dev->dev, waitq,
> > > + userq_fence);
> > > +
> > > /* Store drm syncobj's gpu va address and value */
> > > fence_info[cnt].va = fence_drv->va;
> > > fence_info[cnt].value = fences[i]->seqno; @@ -968,7
> > > +974,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void
> > > +*data,
> > > gobj_write,
> > > gobj_read);
> > > } else {
> > > - r = amdgpu_userq_wait_return_fence_info(filp, wait_info,
> > > + r = amdgpu_userq_wait_return_fence_info(dev, filp,
> > > + wait_info,
> > > syncobj_handles,
> > >
> > > timeline_points,
> > >
> > > timeline_handles,
> > > --
> > > 2.34.1