From: Pierre-Eric Pelloux-Prayer <[email protected]>

Add ftrace events for tracking the userq fence emit, signal
and queue state transition.

Signed-off-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Signed-off-by: Prike Liang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h     | 129 ++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c     |  21 +++
 .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   |  13 +-
 3 files changed, 160 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 4ff8a4d7bb8b..32d8c36caaf3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -28,6 +28,8 @@
 #include <linux/types.h>
 #include <linux/tracepoint.h>
 
+#include "amdgpu_userq_fence.h"
+
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM amdgpu
 #define TRACE_INCLUDE_FILE amdgpu_trace
@@ -659,6 +661,133 @@ DEFINE_EVENT(amdgpu_userq_eviction_fence, 
amdgpu_userq_eviction_fence_enable_sig
 DEFINE_EVENT(amdgpu_userq_eviction_fence, amdgpu_userq_eviction_fence_signal,
            TP_PROTO(u64 context, u64 seqno),
            TP_ARGS(context, seqno));
+TRACE_EVENT(amdgpu_userq_job_run,
+           TP_PROTO(struct device *device, struct amdgpu_usermode_queue 
*queue, struct amdgpu_userq_fence *fence),
+           TP_ARGS(device, queue, fence),
+           TP_STRUCT__entry(
+                            __field(u64, fence_context)
+                            __field(u64, fence_seqno)
+                            __string(dev, dev_name(device))
+                            __field(u64, doorbell_index)
+                            __field(u64, client_id)
+                            __field(u32, queue_type)
+                            ),
+           TP_fast_assign(
+                          __entry->fence_context = fence->base.context;
+                          __entry->fence_seqno = fence->base.seqno;
+                          __assign_str(dev);
+                          __entry->doorbell_index = queue->doorbell_index;
+                          __entry->client_id = 
queue->userq_mgr->file->client_id;
+                          __entry->queue_type = queue->queue_type;
+                          ),
+           TP_printk("dev=%s, client_id=%llu, type=%u, doorbell=%llu, 
fence=%llu:%llu",
+                     __get_str(dev), __entry->client_id, __entry->queue_type, 
__entry->doorbell_index,
+                     __entry->fence_context,
+                     __entry->fence_seqno)
+);
+
+TRACE_EVENT(amdgpu_userq_job_done,
+           TP_PROTO(struct amdgpu_userq_fence *fence),
+           TP_ARGS(fence),
+           TP_STRUCT__entry(
+                            __field(u64, fence_context)
+                            __field(u64, fence_seqno)
+                            ),
+           TP_fast_assign(
+                          __entry->fence_context = fence->base.context;
+                          __entry->fence_seqno = fence->base.seqno;
+                          ),
+           TP_printk("fence=%llu:%llu",
+                     __entry->fence_context,
+                     __entry->fence_seqno)
+);
+
+TRACE_EVENT(amdgpu_userq_job_queue,
+           TP_PROTO(struct device *device,
+                    struct amdgpu_usermode_queue *queue),
+           TP_ARGS(device, queue),
+           TP_STRUCT__entry(__field(u64, context)
+                            __string(dev, dev_name(device))
+                            __field(u64, doorbell_index)
+                            __field(u64, client_id)
+                            __field(u32, queue_type)
+                            ),
+           TP_fast_assign(__assign_str(dev);
+                          __entry->doorbell_index = queue->doorbell_index;
+                          __entry->queue_type = queue->queue_type;
+                          __entry->client_id = 
queue->userq_mgr->file->client_id;
+                          __entry->context = queue->fence_drv->context;
+                         ),
+           TP_printk("dev=%s, client_id=%llu, type=%u, doorbell=%llu, 
context=%llu",
+                     __get_str(dev), __entry->client_id, __entry->queue_type,
+                     __entry->doorbell_index, __entry->context)
+);
+
+TRACE_EVENT(amdgpu_userq_job_add_dep,
+           TP_PROTO(struct device *device, struct amdgpu_usermode_queue 
*queue, struct amdgpu_userq_fence *dep),
+           TP_ARGS(device, queue, dep),
+           TP_STRUCT__entry(
+                            __field(u64, context)
+                            __field(u64, dep_context)
+                            __field(u64, dep_seqno)
+                            __string(dev, dev_name(device))
+                            __field(u64, doorbell_index)
+                            __field(u64, client_id)
+                            __field(u32, queue_type)
+                            ),
+           TP_fast_assign(
+                          __assign_str(dev);
+                          __entry->doorbell_index = queue->doorbell_index;
+                          __entry->queue_type = queue->queue_type;
+                          __entry->client_id = 
queue->userq_mgr->file->client_id;
+                          __entry->context = queue->fence_drv->context;
+                          __entry->dep_context = dep->base.context;
+                          __entry->dep_seqno = dep->base.seqno;
+                          ),
+           TP_printk("dev=%s, client_id=%llu, type=%u, doorbell=%llu, 
context=%llu depends on fence=%llu:%llu",
+                     __get_str(dev), __entry->client_id, __entry->queue_type, 
__entry->doorbell_index, __entry->context,
+                     __entry->dep_context,
+                     __entry->dep_seqno)
+);
+
+TRACE_EVENT(amdgpu_userq_state_start,
+           TP_PROTO(struct amdgpu_usermode_queue *queue),
+           TP_ARGS(queue),
+           TP_STRUCT__entry(
+                            __field(u64, doorbell_index)
+                            __field(u64, client_id)
+                            __field(u32, queue_type)
+                            __field(u32, from)
+                            ),
+           TP_fast_assign(
+                          __entry->doorbell_index = queue->doorbell_index;
+                          __entry->queue_type = queue->queue_type;
+                          __entry->client_id = 
queue->userq_mgr->file->client_id;
+                          __entry->from = queue->state;
+                          ),
+           TP_printk("client_id=%llu, type=%u, doorbell=%llu, from=%d",
+                     __entry->client_id, __entry->queue_type, 
__entry->doorbell_index, __entry->from)
+);
+
+TRACE_EVENT(amdgpu_userq_state_changed,
+           TP_PROTO(struct amdgpu_usermode_queue *queue, enum 
amdgpu_userq_state new_state),
+           TP_ARGS(queue, new_state),
+           TP_STRUCT__entry(
+                            __field(u64, doorbell_index)
+                            __field(u64, client_id)
+                            __field(u32, queue_type)
+                            __field(u32, to)
+                            ),
+           TP_fast_assign(
+                          __entry->doorbell_index = queue->doorbell_index;
+                          __entry->queue_type = queue->queue_type;
+                          __entry->client_id = 
queue->userq_mgr->file->client_id;
+                          __entry->to = new_state;
+                          ),
+           TP_printk("client_id=%llu, type=%u, doorbell=%llu, to=%d",
+                     __entry->client_id, __entry->queue_type, 
__entry->doorbell_index, __entry->to)
+);
+
 #undef AMDGPU_JOB_GET_TIMELINE_NAME
 #endif
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index e27f9a76f986..60d1186af286 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -329,11 +329,15 @@ static int amdgpu_userq_preempt_helper(struct 
amdgpu_usermode_queue *queue)
        int r;
 
        if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
+               trace_amdgpu_userq_state_start(queue);
+
                r = userq_funcs->preempt(queue);
                if (r) {
+                       trace_amdgpu_userq_state_changed(queue, 
AMDGPU_USERQ_STATE_HUNG);
                        queue->state = AMDGPU_USERQ_STATE_HUNG;
                        return r;
                } else {
+                       trace_amdgpu_userq_state_changed(queue, 
AMDGPU_USERQ_STATE_PREEMPTED);
                        queue->state = AMDGPU_USERQ_STATE_PREEMPTED;
                }
        }
@@ -349,10 +353,14 @@ static int amdgpu_userq_restore_helper(struct 
amdgpu_usermode_queue *queue)
        int r = 0;
 
        if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED) {
+               trace_amdgpu_userq_state_start(queue);
+
                r = userq_funcs->restore(queue);
                if (r) {
+                       trace_amdgpu_userq_state_changed(queue, 
AMDGPU_USERQ_STATE_HUNG);
                        queue->state = AMDGPU_USERQ_STATE_HUNG;
                } else {
+                       trace_amdgpu_userq_state_changed(queue, 
AMDGPU_USERQ_STATE_MAPPED);
                        queue->state = AMDGPU_USERQ_STATE_MAPPED;
                }
        }
@@ -370,12 +378,15 @@ static int amdgpu_userq_unmap_helper(struct 
amdgpu_usermode_queue *queue)
 
        if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) ||
            (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {
+               trace_amdgpu_userq_state_start(queue);
 
                r = userq_funcs->unmap(queue);
                if (r) {
+                       trace_amdgpu_userq_state_changed(queue, 
AMDGPU_USERQ_STATE_HUNG);
                        queue->state = AMDGPU_USERQ_STATE_HUNG;
                        return r;
                } else {
+                       trace_amdgpu_userq_state_changed(queue, 
AMDGPU_USERQ_STATE_UNMAPPED);
                        queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
                }
        }
@@ -392,11 +403,15 @@ static int amdgpu_userq_map_helper(struct 
amdgpu_usermode_queue *queue)
        int r;
 
        if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
+               trace_amdgpu_userq_state_start(queue);
+
                r = userq_funcs->map(queue);
                if (r) {
+                       trace_amdgpu_userq_state_changed(queue, 
AMDGPU_USERQ_STATE_HUNG);
                        queue->state = AMDGPU_USERQ_STATE_HUNG;
                        return r;
                } else {
+                       trace_amdgpu_userq_state_changed(queue, 
AMDGPU_USERQ_STATE_MAPPED);
                        queue->state = AMDGPU_USERQ_STATE_MAPPED;
                }
        }
@@ -1007,6 +1022,7 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
                if (!amdgpu_userq_buffer_vas_mapped(queue)) {
                        drm_file_err(uq_mgr->file,
                                     "trying restore queue without va 
mapping\n");
+                       trace_amdgpu_userq_state_changed(queue, 
AMDGPU_USERQ_STATE_INVALID_VA);
                        queue->state = AMDGPU_USERQ_STATE_INVALID_VA;
                        continue;
                }
@@ -1502,12 +1518,14 @@ void amdgpu_userq_pre_reset(struct amdgpu_device *adev)
                if (queue->state != AMDGPU_USERQ_STATE_MAPPED)
                        continue;
 
+               trace_amdgpu_userq_state_start(queue);
                userq_funcs = adev->userq_funcs[queue->queue_type];
                userq_funcs->unmap(queue);
                /* just mark all queues as hung at this point.
                 * if unmap succeeds, we could map again
                 * in amdgpu_userq_post_reset() if vram is not lost
                 */
+               trace_amdgpu_userq_state_changed(queue, 
AMDGPU_USERQ_STATE_HUNG);
                queue->state = AMDGPU_USERQ_STATE_HUNG;
                amdgpu_userq_fence_driver_force_completion(queue);
        }
@@ -1526,6 +1544,8 @@ int amdgpu_userq_post_reset(struct amdgpu_device *adev, 
bool vram_lost)
 
        xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
                if (queue->state == AMDGPU_USERQ_STATE_HUNG && !vram_lost) {
+                       trace_amdgpu_userq_state_start(queue);
+
                        userq_funcs = adev->userq_funcs[queue->queue_type];
                        /* Re-map queue */
                        r = userq_funcs->map(queue);
@@ -1533,6 +1553,7 @@ int amdgpu_userq_post_reset(struct amdgpu_device *adev, 
bool vram_lost)
                                dev_err(adev->dev, "Failed to remap queue 
%ld\n", queue_id);
                                continue;
                        }
+                       trace_amdgpu_userq_state_changed(queue, 
AMDGPU_USERQ_STATE_MAPPED);
                        queue->state = AMDGPU_USERQ_STATE_MAPPED;
                }
        }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 008330a0d852..00cc7194321c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -30,7 +30,7 @@
 #include <drm/drm_syncobj.h>
 
 #include "amdgpu.h"
-#include "amdgpu_userq_fence.h"
+#include "amdgpu_trace.h"
 
 #define AMDGPU_USERQ_MAX_HANDLES       (1U << 16)
 
@@ -169,6 +169,7 @@ amdgpu_userq_fence_driver_process(struct 
amdgpu_userq_fence_driver *fence_drv)
                fence = &userq_fence->base;
                list_del_init(&userq_fence->link);
                dma_fence_signal(fence);
+               trace_amdgpu_userq_job_done(userq_fence);
                /* Drop fence_drv_array outside fence_list_lock
                 * to avoid the recursion lock.
                 */
@@ -528,6 +529,8 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void 
*data,
        /* Create the new fence */
        amdgpu_userq_fence_init(queue, fence, wptr);
 
+       trace_amdgpu_userq_job_run(dev->dev, queue, fence);
+
        mutex_unlock(&userq_mgr->userq_mutex);
 
        /*
@@ -701,7 +704,7 @@ amdgpu_userq_wait_add_fence(struct drm_amdgpu_userq_wait 
*wait_info,
 }
 
 static int
-amdgpu_userq_wait_return_fence_info(struct drm_file *filp,
+amdgpu_userq_wait_return_fence_info(struct drm_device *dev, struct drm_file 
*filp,
                                    struct drm_amdgpu_userq_wait *wait_info,
                                    u32 *syncobj_handles, u32 *timeline_points,
                                    u32 *timeline_handles,
@@ -835,6 +838,8 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp,
                goto free_fences;
        }
 
+       trace_amdgpu_userq_job_queue(dev->dev, waitq);
+
        for (i = 0, cnt = 0; i < num_fences; i++) {
                struct amdgpu_userq_fence_driver *fence_drv;
                struct amdgpu_userq_fence *userq_fence;
@@ -869,6 +874,8 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp,
 
                amdgpu_userq_fence_driver_get(fence_drv);
 
+               trace_amdgpu_userq_job_add_dep(dev->dev, waitq, userq_fence);
+
                /* Store drm syncobj's gpu va address and value */
                fence_info[cnt].va = fence_drv->va;
                fence_info[cnt].value = fences[i]->seqno;
@@ -968,7 +975,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void 
*data,
                                                   gobj_write,
                                                   gobj_read);
        } else {
-               r = amdgpu_userq_wait_return_fence_info(filp, wait_info,
+               r = amdgpu_userq_wait_return_fence_info(dev, filp, wait_info,
                                                        syncobj_handles,
                                                        timeline_points,
                                                        timeline_handles,
-- 
2.34.1

Reply via email to