Module: Mesa Branch: main Commit: 725ae34458ff3cbb9d87e08c8a73780672221a9e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=725ae34458ff3cbb9d87e08c8a73780672221a9e
Author: Danylo Piliaiev <[email protected]> Date: Fri Apr 15 17:26:48 2022 +0300 turnip: Add debug option to print gmem load/store skip stats TU_DEBUG=log_skip_gmem_ops would print stats about skipped gmem/load every second. Signed-off-by: Danylo Piliaiev <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15974> --- src/freedreno/vulkan/tu_clear_blit.c | 32 ++++++++++++++++++++++ src/freedreno/vulkan/tu_device.c | 6 ++++ src/freedreno/vulkan/tu_drm.c | 5 ++++ src/freedreno/vulkan/tu_kgsl.c | 5 ++++ src/freedreno/vulkan/tu_private.h | 7 +++++ src/freedreno/vulkan/tu_util.c | 53 ++++++++++++++++++++++++++++++++++++ src/freedreno/vulkan/tu_util.h | 3 ++ 7 files changed, 111 insertions(+) diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c index 555b5edf26f..0ef9cc8efe0 100644 --- a/src/freedreno/vulkan/tu_clear_blit.c +++ b/src/freedreno/vulkan/tu_clear_blit.c @@ -2808,6 +2808,22 @@ tu_begin_load_store_cond_exec(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool load) { tu_cond_exec_start(cs, CP_COND_REG_EXEC_0_MODE(PRED_TEST)); + + if (!unlikely(cmd->device->physical_device->instance->debug_flags & + TU_DEBUG_LOG_SKIP_GMEM_OPS)) + return; + + uint64_t result_iova; + if (load) + result_iova = global_iova(cmd, dbg_gmem_taken_loads); + else + result_iova = global_iova(cmd, dbg_gmem_taken_stores); + + tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 7); + tu_cs_emit(cs, CP_MEM_TO_MEM_0_NEG_B); + tu_cs_emit_qw(cs, result_iova); + tu_cs_emit_qw(cs, result_iova); + tu_cs_emit_qw(cs, global_iova(cmd, dbg_one)); } static void @@ -2815,6 +2831,22 @@ tu_end_load_store_cond_exec(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool load) { tu_cond_exec_end(cs); + + if (!unlikely(cmd->device->physical_device->instance->debug_flags & + TU_DEBUG_LOG_SKIP_GMEM_OPS)) + return; + + uint64_t result_iova; + if (load) + result_iova = global_iova(cmd, dbg_gmem_total_loads); + else + result_iova = global_iova(cmd, dbg_gmem_total_stores); + + tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 7); + tu_cs_emit(cs, CP_MEM_TO_MEM_0_NEG_B); + tu_cs_emit_qw(cs, result_iova); + tu_cs_emit_qw(cs, result_iova); + tu_cs_emit_qw(cs, global_iova(cmd, dbg_one)); } void diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index 51b2f2d5b47..de6d302bf04 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -340,6 +340,7 @@ static const struct debug_control tu_debug_options[] = { { "dontcare_as_load", TU_DEBUG_DONT_CARE_AS_LOAD }, { "rast_order", TU_DEBUG_RAST_ORDER }, { "unaligned_store", TU_DEBUG_UNALIGNED_STORE }, + { "log_skip_gmem_ops", TU_DEBUG_LOG_SKIP_GMEM_OPS }, { NULL, 0 } }; @@ -1810,6 +1811,11 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, struct tu6_global *global = device->global_bo->map; tu_init_clear_blit_shaders(device); global->predicate = 0; + global->dbg_one = (uint32_t)-1; + global->dbg_gmem_total_loads = 0; + global->dbg_gmem_taken_loads = 0; + global->dbg_gmem_total_stores = 0; + global->dbg_gmem_taken_stores = 0; tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK], &(VkClearColorValue) {}, false); tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_INT_TRANSPARENT_BLACK], diff --git a/src/freedreno/vulkan/tu_drm.c b/src/freedreno/vulkan/tu_drm.c index 3be7a5ea240..d27afdf94b5 100644 --- a/src/freedreno/vulkan/tu_drm.c +++ b/src/freedreno/vulkan/tu_drm.c @@ -1143,6 +1143,11 @@ tu_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit) submit->perf_pass_index : ~0; struct tu_queue_submit submit_req; + if (unlikely(queue->device->physical_device->instance->debug_flags & + TU_DEBUG_LOG_SKIP_GMEM_OPS)) { + tu_dbg_log_gmem_load_store_skips(queue->device); + } + pthread_mutex_lock(&queue->device->submit_mutex); VkResult ret = tu_queue_submit_create_locked(queue, submit, diff --git a/src/freedreno/vulkan/tu_kgsl.c b/src/freedreno/vulkan/tu_kgsl.c index cc7e25cd945..b1a4ae25fd3 100644 --- a/src/freedreno/vulkan/tu_kgsl.c +++ b/src/freedreno/vulkan/tu_kgsl.c @@ -362,6 +362,11 @@ tu_QueueSubmit(VkQueue _queue, TU_FROM_HANDLE(tu_syncobj, fence, _fence); VkResult result = VK_SUCCESS; + if (unlikely(queue->device->physical_device->instance->debug_flags & + TU_DEBUG_LOG_SKIP_GMEM_OPS)) { + tu_dbg_log_gmem_load_store_skips(queue->device); + } + uint32_t max_entry_count = 0; for (uint32_t i = 0; i < submitCount; ++i) { const VkSubmitInfo *submit = pSubmits + i; diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 52b4fc3bccb..5c387c15b06 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -269,6 +269,7 @@ enum tu_debug_flags TU_DEBUG_RAST_ORDER = 1 << 14, TU_DEBUG_UNALIGNED_STORE = 1 << 15, TU_DEBUG_LAYOUT = 1 << 16, + TU_DEBUG_LOG_SKIP_GMEM_OPS = 1 << 15, }; struct tu_instance @@ -490,6 +491,12 @@ struct tu6_global /* To know when renderpass stats for autotune are valid */ volatile uint32_t autotune_fence; + volatile uint32_t dbg_one; + volatile uint32_t dbg_gmem_total_loads; + volatile uint32_t dbg_gmem_taken_loads; + volatile uint32_t dbg_gmem_total_stores; + volatile uint32_t dbg_gmem_taken_stores; + /* note: larger global bo will be used for customBorderColors */ struct bcolor_entry bcolor_builtin[TU_BORDER_COLOR_BUILTIN], bcolor[]; }; diff --git a/src/freedreno/vulkan/tu_util.c b/src/freedreno/vulkan/tu_util.c index cf6c0bfea89..2d54af74e61 100644 --- a/src/freedreno/vulkan/tu_util.c +++ b/src/freedreno/vulkan/tu_util.c @@ -31,6 +31,7 @@ #include <string.h> #include "util/u_math.h" +#include "util/timespec.h" #include "vk_enum_to_str.h" void PRINTFLIKE(3, 4) @@ -216,3 +217,55 @@ tu_framebuffer_tiling_config(struct tu_framebuffer *fb, tu_tiling_config_update_pipe_layout(fb, device); tu_tiling_config_update_pipes(fb, device); } + +void +tu_dbg_log_gmem_load_store_skips(struct tu_device *device) +{ + static uint32_t last_skipped_loads = 0; + static uint32_t last_skipped_stores = 0; + static uint32_t last_total_loads = 0; + static uint32_t last_total_stores = 0; + static struct timespec last_time = {}; + + pthread_mutex_lock(&device->submit_mutex); + + struct timespec current_time; + clock_gettime(CLOCK_MONOTONIC, ¤t_time); + + if (timespec_sub_to_nsec(¤t_time, &last_time) > 1000 * 1000 * 1000) { + last_time = current_time; + } else { + pthread_mutex_unlock(&device->submit_mutex); + return; + } + + struct tu6_global *global = device->global_bo->map; + + uint32_t current_taken_loads = global->dbg_gmem_taken_loads; + uint32_t current_taken_stores = global->dbg_gmem_taken_stores; + uint32_t current_total_loads = global->dbg_gmem_total_loads; + uint32_t current_total_stores = global->dbg_gmem_total_stores; + + uint32_t skipped_loads = current_total_loads - current_taken_loads; + uint32_t skipped_stores = current_total_stores - current_taken_stores; + + uint32_t current_time_frame_skipped_loads = skipped_loads - last_skipped_loads; + uint32_t current_time_frame_skipped_stores = skipped_stores - last_skipped_stores; + + uint32_t current_time_frame_total_loads = current_total_loads - last_total_loads; + uint32_t current_time_frame_total_stores = current_total_stores - last_total_stores; + + mesa_logi("[GMEM] loads total: %u skipped: %.1f%%\n", + current_time_frame_total_loads, + current_time_frame_skipped_loads / (float) current_time_frame_total_loads * 100.f); + mesa_logi("[GMEM] stores total: %u skipped: %.1f%%\n", + current_time_frame_total_stores, + current_time_frame_skipped_stores / (float) current_time_frame_total_stores * 100.f); + + last_skipped_loads = skipped_loads; + last_skipped_stores = skipped_stores; + last_total_loads = current_total_loads; + last_total_stores = current_total_stores; + + pthread_mutex_unlock(&device->submit_mutex); +} \ No newline at end of file diff --git a/src/freedreno/vulkan/tu_util.h b/src/freedreno/vulkan/tu_util.h index 2ad7f86e36e..03d31f0c6c6 100644 --- a/src/freedreno/vulkan/tu_util.h +++ b/src/freedreno/vulkan/tu_util.h @@ -326,4 +326,7 @@ tu6_pack_border_color(struct bcolor_entry *bcolor, const VkClearColorValue *val, #undef PACK_F } +void +tu_dbg_log_gmem_load_store_skips(struct tu_device *device); + #endif /* TU_UTIL_H */
