Module: Mesa
Branch: main
Commit: 020baed66e4ffe4595de2236d32562d74a6d66b0
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=020baed66e4ffe4595de2236d32562d74a6d66b0

Author: Emma Anholt <[email protected]>
Date:   Wed Mar  8 12:30:24 2023 -0800

tu/perfetto: Use tu_CmdBeginDebugUtilsLabelEXT as a stage event in perfetto.

This lets zink mark points of interest (particularly its barriers and
blits) with some useful data, for presenting in perfetto traces.

Closes: #8487
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22278>

---

 src/freedreno/vulkan/tu_device.cc      | 52 +++++++++++++++++++++
 src/freedreno/vulkan/tu_perfetto.cc    | 84 ++++++++++++++++++++++++++++++++--
 src/freedreno/vulkan/tu_perfetto.h     |  4 ++
 src/freedreno/vulkan/tu_tracepoints.py |  9 ++++
 src/util/perf/u_perfetto_renderpass.h  |  3 ++
 5 files changed, 149 insertions(+), 3 deletions(-)

diff --git a/src/freedreno/vulkan/tu_device.cc 
b/src/freedreno/vulkan/tu_device.cc
index 4d503e57073..7497a182bf0 100644
--- a/src/freedreno/vulkan/tu_device.cc
+++ b/src/freedreno/vulkan/tu_device.cc
@@ -3459,3 +3459,55 @@ tu_debug_bos_print_stats(struct tu_device *dev)
 
    mtx_unlock(&dev->bo_mutex);
 }
+
+void
+tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer,
+                              const VkDebugUtilsLabelEXT *pLabelInfo)
+{
+   VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
+
+   vk_common_CmdBeginDebugUtilsLabelEXT(_commandBuffer, pLabelInfo);
+
+   /* Note that the spec says:
+    *
+    * "An application may open a debug label region in one command buffer and
+    *  close it in another, or otherwise split debug label regions across
+    *  multiple command buffers or multiple queue submissions. When viewed
+    * from the linear series of submissions to a single queue, the calls to
+    *  vkCmdBeginDebugUtilsLabelEXT and vkCmdEndDebugUtilsLabelEXT must be
+    *  matched and balanced."
+    *
+    * But if you're beginning labeling during a renderpass and ending outside
+    * it, or vice versa, these trace ranges in perfetto will be unbalanced.  I
+    * expect that u_trace and perfetto will do something like take just one of
+    * the begins/ends, or drop the event entirely, but not crash.  Similarly,
+    * I think we'll have problems if the tracepoints are split across cmd
+    * buffers. Still, getting the simple case of cmd buffer annotation into
+    * perfetto should prove useful.
+    */
+   const char *label = pLabelInfo->pLabelName;
+   if (cmd_buffer->state.pass) {
+      trace_start_cmd_buffer_annotation_rp(
+         &cmd_buffer->trace, &cmd_buffer->draw_cs, strlen(label), label);
+   } else {
+      trace_start_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs,
+                                        strlen(label), label);
+   }
+}
+
+void
+tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)
+{
+   VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
+
+   if (cmd_buffer->vk.labels.size > 0) {
+      if (cmd_buffer->state.pass) {
+         trace_end_cmd_buffer_annotation_rp(&cmd_buffer->trace,
+                                            &cmd_buffer->draw_cs);
+      } else {
+         trace_end_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs);
+      }
+   }
+
+   vk_common_CmdEndDebugUtilsLabelEXT(_commandBuffer);
+}
diff --git a/src/freedreno/vulkan/tu_perfetto.cc 
b/src/freedreno/vulkan/tu_perfetto.cc
index d54e39a6da1..40e2f0e8f80 100644
--- a/src/freedreno/vulkan/tu_perfetto.cc
+++ b/src/freedreno/vulkan/tu_perfetto.cc
@@ -40,7 +40,9 @@ enum {
  */
 enum tu_stage_id {
    CMD_BUFFER_STAGE_ID,
+   CMD_BUFFER_ANNOTATION_STAGE_ID,
    RENDER_PASS_STAGE_ID,
+   CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
    BINNING_STAGE_ID,
    GMEM_STAGE_ID,
    BYPASS_STAGE_ID,
@@ -66,7 +68,9 @@ static const struct {
    const char *desc;
 } stages[] = {
    [CMD_BUFFER_STAGE_ID]     = { "Command Buffer" },
+   [CMD_BUFFER_ANNOTATION_STAGE_ID]     = { "Annotation", "Command Buffer 
Annotation" },
    [RENDER_PASS_STAGE_ID]    = { "Render Pass" },
+   [CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID]    = { "Annotation", "Render 
Pass Command Buffer Annotation" },
    [BINNING_STAGE_ID]        = { "Binning", "Perform Visibility pass and 
determine target bins" },
    [GMEM_STAGE_ID]           = { "GMEM", "Rendering to GMEM" },
    [BYPASS_STAGE_ID]         = { "Bypass", "Rendering to system memory" },
@@ -134,6 +138,9 @@ send_descriptors(TuRenderpassDataSource::TraceContext &ctx, 
uint64_t ts_ns)
 
    auto packet = ctx.NewTracePacket();
 
+   /* This must be set before interned data is sent. */
+   
packet->set_sequence_flags(perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
+
    packet->set_timestamp(0);
 
    auto event = packet->set_gpu_render_stage_event();
@@ -192,6 +199,7 @@ static void
 stage_start(struct tu_device *dev,
             uint64_t ts_ns,
             enum tu_stage_id stage_id,
+            const char *app_event,
             const void *payload = nullptr,
             size_t payload_size = 0,
             trace_payload_as_extra_func payload_as_extra = nullptr)
@@ -214,10 +222,18 @@ stage_start(struct tu_device *dev,
 
    *stage = (struct tu_perfetto_stage) {
       .stage_id = stage_id,
+      .stage_iid = 0,
       .start_ts = ts_ns,
       .payload = payload,
       .start_payload_function = (void *) payload_as_extra,
    };
+
+   if (app_event) {
+      TuRenderpassDataSource::Trace([=](auto tctx) {
+         stage->stage_iid =
+            tctx.GetDataSourceLocked()->debug_marker_stage(tctx, app_event);
+      });
+   }
 }
 
 static void
@@ -265,8 +281,11 @@ stage_end(struct tu_device *dev, uint64_t ts_ns, enum 
tu_stage_id stage_id,
       event->set_event_id(0); // ???
       event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);
       event->set_duration(ts_ns - stage->start_ts);
-      event->set_stage_id(stage->stage_id);
-      event->set_context((uintptr_t)dev);
+      if (stage->stage_iid)
+         event->set_stage_iid(stage->stage_iid);
+      else
+         event->set_stage_id(stage->stage_id);
+      event->set_context((uintptr_t) dev);
       event->set_submission_id(submission_id);
 
       if (stage->payload) {
@@ -385,6 +404,13 @@ tu_perfetto_submit(struct tu_device *dev, uint32_t 
submission_id)
 /*
  * Trace callbacks, called from u_trace once the timestamps from GPU have been
  * collected.
+ *
+ * The default "extra" funcs are code-generated into tu_tracepoints_perfetto.h
+ * and just take the tracepoint's args and add them as name/value pairs in the
+ * perfetto events.  This file can usually just map a tu_perfetto_* to
+ * stage_start/end with a call to that codegenned "extra" func.  But you can
+ * also provide your own entrypoint and extra funcs if you want to change that
+ * mapping.
  */
 
 #define CREATE_EVENT_CALLBACK(event_name, stage_id)                            
     \
@@ -393,7 +419,7 @@ tu_perfetto_submit(struct tu_device *dev, uint32_t 
submission_id)
       const struct trace_start_##event_name *payload)                          
     \
    {                                                                           
     \
       stage_start(                                                             
     \
-         dev, ts_ns, stage_id, payload, sizeof(*payload),                      
     \
+         dev, ts_ns, stage_id, NULL, payload, sizeof(*payload),                
     \
          (trace_payload_as_extra_func) 
&trace_payload_as_extra_start_##event_name); \
    }                                                                           
     \
                                                                                
     \
@@ -420,6 +446,58 @@ CREATE_EVENT_CALLBACK(gmem_load, GMEM_LOAD_STAGE_ID)
 CREATE_EVENT_CALLBACK(gmem_store, GMEM_STORE_STAGE_ID)
 CREATE_EVENT_CALLBACK(sysmem_resolve, SYSMEM_RESOLVE_STAGE_ID)
 
+void
+tu_perfetto_start_cmd_buffer_annotation(
+   struct tu_device *dev,
+   uint64_t ts_ns,
+   const void *flush_data,
+   const struct trace_start_cmd_buffer_annotation *payload)
+{
+   /* No extra func necessary, the only arg is in the end payload.*/
+   stage_start(dev, ts_ns, CMD_BUFFER_ANNOTATION_STAGE_ID, payload->str, 
payload,
+               sizeof(*payload), NULL);
+}
+
+void
+tu_perfetto_end_cmd_buffer_annotation(
+   struct tu_device *dev,
+   uint64_t ts_ns,
+   const void *flush_data,
+   const struct trace_end_cmd_buffer_annotation *payload)
+{
+   /* Pass the payload string as the app_event, which will appear right on the
+    * event block, rather than as metadata inside.
+    */
+   stage_end(dev, ts_ns, CMD_BUFFER_ANNOTATION_STAGE_ID, flush_data,
+             payload, NULL);
+}
+
+void
+tu_perfetto_start_cmd_buffer_annotation_rp(
+   struct tu_device *dev,
+   uint64_t ts_ns,
+   const void *flush_data,
+   const struct trace_start_cmd_buffer_annotation_rp *payload)
+{
+   /* No extra func necessary, the only arg is in the end payload.*/
+   stage_start(dev, ts_ns, CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
+               payload->str, payload, sizeof(*payload), NULL);
+}
+
+void
+tu_perfetto_end_cmd_buffer_annotation_rp(
+   struct tu_device *dev,
+   uint64_t ts_ns,
+   const void *flush_data,
+   const struct trace_end_cmd_buffer_annotation_rp *payload)
+{
+   /* Pass the payload string as the app_event, which will appear right on the
+    * event block, rather than as metadata inside.
+    */
+   stage_end(dev, ts_ns, CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
+             flush_data, payload, NULL);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/freedreno/vulkan/tu_perfetto.h 
b/src/freedreno/vulkan/tu_perfetto.h
index 12f2bce1678..92a56c6355d 100644
--- a/src/freedreno/vulkan/tu_perfetto.h
+++ b/src/freedreno/vulkan/tu_perfetto.h
@@ -22,6 +22,10 @@ struct tu_u_trace_submission_data;
 
 struct tu_perfetto_stage {
    int stage_id;
+   /* dynamically allocated stage iid, for app_events.  0 if stage_id should be
+    * used instead.
+    */
+   uint64_t stage_iid;
    uint64_t start_ts;
    const void* payload;
    void* start_payload_function;
diff --git a/src/freedreno/vulkan/tu_tracepoints.py 
b/src/freedreno/vulkan/tu_tracepoints.py
index e3d5da5b766..5af5ce69159 100644
--- a/src/freedreno/vulkan/tu_tracepoints.py
+++ b/src/freedreno/vulkan/tu_tracepoints.py
@@ -123,6 +123,15 @@ begin_end_tp('compute',
           Arg(type='uint16_t', var='num_groups_y',   c_format='%u'),
           Arg(type='uint16_t', var='num_groups_z',   c_format='%u')])
 
+
+# Annotations for Cmd(Begin|End)DebugUtilsLabelEXT
+for suffix in ["", "_rp"]:
+    begin_end_tp('cmd_buffer_annotation' + suffix,
+                    args=[ArgStruct(type='unsigned', var='len'),
+                          ArgStruct(type='const char *', var='str'),],
+                    tp_struct=[Arg(type='uint8_t', name='dummy', var='0', 
c_format='%hhu'),
+                               Arg(type='char', name='str', var='str', 
c_format='%s', length_arg='len + 1', copy_func='strncpy'),])
+
 utrace_generate(cpath=args.utrace_src,
                 hpath=args.utrace_hdr,
                 ctx_param='struct tu_device *dev',
diff --git a/src/util/perf/u_perfetto_renderpass.h 
b/src/util/perf/u_perfetto_renderpass.h
index a128cf6fd28..13aad3481eb 100644
--- a/src/util/perf/u_perfetto_renderpass.h
+++ b/src/util/perf/u_perfetto_renderpass.h
@@ -114,6 +114,9 @@ class MesaRenderpassDataSource
     * event in the UI, rather than needing to click into the event to find the
     * name in the metadata.  Intended for use with
     * vkCmdBeginDebugUtilsLabelEXT() and glPushDebugGroup().
+    *
+    * Note that SEQ_INCREMENTAL_STATE_CLEARED must have been set in the
+    * sequence before this is called.
     */
    uint64_t debug_marker_stage(TraceContext &ctx, const char *name)
    {

Reply via email to