On 12-05-2026 07:38 am, Liang, Prike wrote:
AMD General

As far as I can see, there's no such implementation on the drm-next branch yet. 
I came up with the idea of tracking the userq lifetime quite a while ago and 
also raised it in the dev discussion group. This would not only help profile 
userq latency but also troubleshoot userq sequencing issues. Hi 
@Pelloux-Prayer, Pierre-Eric, could you please share the status of your userq 
ftrace work, or is it supplementary to what you already have? Ideally, we 
should land such an implementation to properly track and profile the userq flow.

Yeah i am aware of it that you did propose it. Also true that till now even i dont see any traces added in code. Check once with Pierre and i guess then good to go ahead.

Regards
Sunil Khatri


Regards,
       Prike

-----Original Message-----
From: Khatri, Sunil <[email protected]>
Sent: Monday, May 11, 2026 9:57 PM
To: Liang, Prike <[email protected]>; [email protected]
Cc: Deucher, Alexander <[email protected]>; Koenig, Christian
<[email protected]>; Pelloux-Prayer, Pierre-Eric <Pierre-eric.Pelloux-
[email protected]>
Subject: Re: [PATCH 1/4] drm/amdgpu: add userq create and destroy tracepoints

If i am not wrong Pierre eric did work on the traces for user queues. I have 
dropped
my patches for the same reason but not sure if the traces patches are merged. 
Could
you check with him once ?

On 11-05-2026 07:24 pm, Prike Liang wrote:
Add ftrace events around user queue creation and destruction to
profile queue setup and teardown latency.

Signed-off-by: Prike Liang <[email protected]>
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 58
+++++++++++++++++++++++
   drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 11 +++++
   2 files changed, 69 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index d13e64a69e25..5a01f63d1f32 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -578,6 +578,64 @@ TRACE_EVENT(amdgpu_reset_reg_dumps,
                   __entry->value)
   );

+DECLARE_EVENT_CLASS(amdgpu_userq_queue,
+       TP_PROTO(struct amdgpu_usermode_queue *queue),
+       TP_ARGS(queue),
+       TP_STRUCT__entry(
+                        __field(struct amdgpu_usermode_queue *, queue)
+                        __field(u64, doorbell_index)
+                        __field(int, queue_type)
+                        __field(int, state)
+                        __field(u32, xcp_id)
+                        ),
+       TP_fast_assign(
+                      __entry->queue = queue;
+                      __entry->doorbell_index = queue ? queue->doorbell_index :
0;
+                      __entry->queue_type = queue ? queue->queue_type : -1;
+                      __entry->state = queue ? queue->state : -1;
+                      __entry->xcp_id = queue ? queue->xcp_id : 0;
+                      ),
+       TP_printk("queue=%p, doorbell=%llu, type=%d, state=%d, xcp_id=%u",
+                 __entry->queue, __entry->doorbell_index,
+                 __entry->queue_type, __entry->state, __entry->xcp_id) );
+DEFINE_EVENT(amdgpu_userq_queue, amdgpu_userq_create_start,
+        TP_PROTO(struct amdgpu_usermode_queue *queue),
+        TP_ARGS(queue));
+DEFINE_EVENT(amdgpu_userq_queue, amdgpu_userq_destroy_start,
+        TP_PROTO(struct amdgpu_usermode_queue *queue),
+        TP_ARGS(queue));
+DECLARE_EVENT_CLASS(amdgpu_userq_queue_result,
+       TP_PROTO(struct amdgpu_usermode_queue *queue, int result),
+       TP_ARGS(queue, result),
+       TP_STRUCT__entry(
+                        __field(struct amdgpu_usermode_queue *, queue)
+                        __field(u64, doorbell_index)
+                        __field(int, queue_type)
+                        __field(int, state)
+                        __field(u32, xcp_id)
+                        __field(int, result)
+                        ),
+       TP_fast_assign(
+                      __entry->queue = queue;
+                      __entry->doorbell_index = queue ? queue->doorbell_index :
0;
+                      __entry->queue_type = queue ? queue->queue_type : -1;
+                      __entry->state = queue ? queue->state : -1;
+                      __entry->xcp_id = queue ? queue->xcp_id : 0;
+                      __entry->result = result;
+                      ),
+       TP_printk("queue=%p, doorbell=%llu, type=%d, state=%d, xcp_id=%u,
result=%d",
+                 __entry->queue, __entry->doorbell_index,
+                 __entry->queue_type, __entry->state,
+                 __entry->xcp_id, __entry->result) );
+DEFINE_EVENT(amdgpu_userq_queue_result, amdgpu_userq_create_end,
+        TP_PROTO(struct amdgpu_usermode_queue *queue, int result),
+        TP_ARGS(queue, result));
+DEFINE_EVENT(amdgpu_userq_queue_result, amdgpu_userq_destroy_end,
+        TP_PROTO(struct amdgpu_usermode_queue *queue, int result),
+        TP_ARGS(queue, result));
+
   #undef AMDGPU_JOB_GET_TIMELINE_NAME
   #endif

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 3077ca4e27a0..50c46d31fbae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -33,6 +33,7 @@
   #include "amdgpu_userq.h"
   #include "amdgpu_hmm.h"
   #include "amdgpu_userq_fence.h"
+#include "amdgpu_trace.h"

   u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
   {
@@ -617,6 +618,8 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr
*uq_mgr, struct amdgpu_usermode_que

     int r = 0;

+   trace_amdgpu_userq_destroy_start(queue);
+
     cancel_delayed_work_sync(&uq_mgr->resume_work);

     /* Cancel any pending hang detection work and cleanup */ @@ -625,6
+628,7 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct
amdgpu_usermode_que
     r = amdgpu_bo_reserve(vm->root.bo, false);
     if (r) {
             drm_file_err(uq_mgr->file, "Failed to reserve root bo during
userqueue destroy\n");
+           trace_amdgpu_userq_destroy_end(queue, r);
             return r;
     }
     amdgpu_userq_buffer_vas_list_cleanup(adev, queue); @@ -650,6 +654,7
@@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct
amdgpu_usermode_que
     amdgpu_bo_unpin(queue->wptr_obj.obj);
     amdgpu_bo_unreserve(queue->wptr_obj.obj);
     amdgpu_bo_unref(&queue->wptr_obj.obj);
+   trace_amdgpu_userq_destroy_end(queue, r);
     kfree(queue);

     pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -754,6 +759,7 @@ amdgpu_userq_create(struct drm_file *filp, union
drm_amdgpu_userq *args)

     mutex_init(&queue->fence_drv_lock);
     xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
+   trace_amdgpu_userq_create_start(queue);
     r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv);
     if (r)
             goto free_queue;
@@ -809,6 +815,7 @@ amdgpu_userq_create(struct drm_file *filp, union
drm_amdgpu_userq *args)
              * This drops the extra and last reference which should take
              * care of all cleanup.
              */
+           trace_amdgpu_userq_create_end(queue, r);
             amdgpu_userq_put(queue);
             amdgpu_userq_put(queue);
             return r;
@@ -826,6 +833,7 @@ amdgpu_userq_create(struct drm_file *filp, union
drm_amdgpu_userq *args)
             r = amdgpu_userq_map_helper(queue);
             if (r) {
                     drm_file_err(uq_mgr->file, "Failed to map Queue\n");
+                   trace_amdgpu_userq_create_end(queue, r);
                     mutex_unlock(&uq_mgr->userq_mutex);
                     /* Prevent racing with close */
                     if (xa_erase(&uq_mgr->userq_xa, qid) == queue) @@ -839,6
+847,7
@@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
     atomic_inc(&uq_mgr->userq_count[queue->queue_type]);

     amdgpu_debugfs_userq_init(filp, queue, qid);
+   trace_amdgpu_userq_create_end(queue, 0);
     amdgpu_userq_put(queue);
     args->out.queue_id = qid;
     return 0;
@@ -853,6 +862,8 @@ amdgpu_userq_create(struct drm_file *filp, union
drm_amdgpu_userq *args)
   free_fence_drv:
     amdgpu_userq_fence_driver_free(queue);
   free_queue:
+   if (queue)
+           trace_amdgpu_userq_create_end(queue, r);
     kfree(queue);
   err_pm_runtime:
     pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);

Reply via email to