Module: Mesa Branch: master Commit: 6ed433259140579a3e2599d8caa2797d3559978c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6ed433259140579a3e2599d8caa2797d3559978c
Author: Samuel Pitoiset <[email protected]> Date: Fri Dec 11 15:43:51 2020 +0100 radv: dump VA ranges history when a GPU hang is detected This is enabled only with RADV_DEBUG=hang. This adds a small Gitlab: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3904 Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Bas Nieuwenhuizen <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7891> --- src/amd/vulkan/radv_debug.c | 8 +++ src/amd/vulkan/radv_radeon_winsys.h | 2 + src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 64 +++++++++++++++++++++++ src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c | 4 ++ src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h | 5 ++ 5 files changed, 83 insertions(+) diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c index e12b50dabf3..106ff4a5495 100644 --- a/src/amd/vulkan/radv_debug.c +++ b/src/amd/vulkan/radv_debug.c @@ -734,6 +734,14 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs) fclose(f); } + /* Dump BO log. */ + snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_history.log"); + f = fopen(dump_path, "w+"); + if (f) { + device->ws->dump_bo_log(device->ws, f); + fclose(f); + } + /* Dump VM fault info. */ if (vm_fault_occurred) { snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log"); diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index 8cf1b38f242..be5dd62b638 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -301,6 +301,8 @@ struct radeon_winsys { void (*dump_bo_ranges)(struct radeon_winsys *ws, FILE *file); + void (*dump_bo_log)(struct radeon_winsys *ws, FILE *file); + int (*surface_init)(struct radeon_winsys *ws, const struct ac_surf_info *surf_info, struct radeon_surf *surf); diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c index 917eb9069ae..c04767cd3c7 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c @@ -39,6 +39,7 @@ #include "util/u_atomic.h" #include "util/u_memory.h" #include "util/u_math.h" +#include "util/os_time.h" static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo); @@ -282,6 +283,39 @@ radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent, return VK_SUCCESS; } +struct radv_amdgpu_winsys_bo_log { + struct list_head list; + uint64_t va; + uint64_t size; + uint64_t timestamp; /* CPU timestamp */ + uint8_t is_virtual : 1; + uint8_t destroyed : 1; +}; + +static void radv_amdgpu_log_bo(struct radv_amdgpu_winsys_bo *bo, + bool destroyed) +{ + struct radv_amdgpu_winsys *ws = bo->ws; + struct radv_amdgpu_winsys_bo_log *bo_log = NULL; + + if (!bo->ws->debug_log_bos) + return; + + bo_log = malloc(sizeof(*bo_log)); + if (!bo_log) + return; + + bo_log->va = bo->base.va; + bo_log->size = bo->size; + bo_log->timestamp = os_time_get_nano(); + bo_log->is_virtual = bo->is_virtual; + bo_log->destroyed = destroyed; + + u_rwlock_wrlock(&ws->log_bo_list_lock); + list_addtail(&bo_log->list, &ws->log_bo_list); + u_rwlock_wrunlock(&ws->log_bo_list_lock); +} + static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo) { struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); @@ -289,6 +323,9 @@ static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo) if (p_atomic_dec_return(&bo->ref_count)) return; + + radv_amdgpu_log_bo(bo, true); + if (bo->is_virtual) { for (uint32_t i = 0; i < bo->range_count; ++i) { radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i); @@ -391,6 +428,8 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, bo->ranges[0].bo_offset = 0; radv_amdgpu_winsys_virtual_map(bo, bo->ranges); + radv_amdgpu_log_bo(bo, false); + return (struct radeon_winsys_bo *)bo; } @@ -485,6 +524,8 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, align64(bo->size, ws->info.gart_page_size)); radv_amdgpu_add_buffer_to_global_list(bo); + radv_amdgpu_log_bo(bo, false); + return (struct radeon_winsys_bo *)bo; error_va_map: amdgpu_bo_free(buf_handle); @@ -592,6 +633,8 @@ radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, align64(bo->size, ws->info.gart_page_size)); radv_amdgpu_add_buffer_to_global_list(bo); + radv_amdgpu_log_bo(bo, false); + return (struct radeon_winsys_bo *)bo; error_va_map: @@ -672,6 +715,8 @@ radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, align64(bo->size, ws->info.gart_page_size)); radv_amdgpu_add_buffer_to_global_list(bo); + radv_amdgpu_log_bo(bo, false); + return (struct radeon_winsys_bo *)bo; error_va_map: amdgpu_va_range_free(va_handle); @@ -862,6 +907,24 @@ static int radv_amdgpu_bo_va_compare(const void *a, const void *b) return bo_a->base.va < bo_b->base.va ? -1 : bo_a->base.va > bo_b->base.va ? 1 : 0; } +static void radv_amdgpu_dump_bo_log(struct radeon_winsys *_ws, FILE *file) +{ + struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); + struct radv_amdgpu_winsys_bo_log *bo_log; + + if (!ws->debug_log_bos) + return; + + u_rwlock_rdlock(&ws->log_bo_list_lock); + LIST_FOR_EACH_ENTRY(bo_log, &ws->log_bo_list, list) { + fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, destroyed=%d, is_virtual=%d\n", + (long long)bo_log->timestamp, (long long)bo_log->va, + (long long)(bo_log->va + bo_log->size), + bo_log->destroyed, bo_log->is_virtual); + } + u_rwlock_rdunlock(&ws->log_bo_list_lock); +} + static void radv_amdgpu_dump_bo_ranges(struct radeon_winsys *_ws, FILE *file) { struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); @@ -905,4 +968,5 @@ void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws) ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind; ws->base.buffer_get_flags_from_fd = radv_amdgpu_bo_get_flags_from_fd; ws->base.dump_bo_ranges = radv_amdgpu_dump_bo_ranges; + ws->base.dump_bo_log = radv_amdgpu_dump_bo_log; } diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c index dc7a9fe1c36..9510d547f36 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c @@ -168,6 +168,7 @@ static void radv_amdgpu_winsys_destroy(struct radeon_winsys *rws) pthread_mutex_destroy(&ws->syncobj_lock); u_rwlock_destroy(&ws->global_bo_list_lock); + u_rwlock_destroy(&ws->log_bo_list_lock); ac_addrlib_destroy(ws->addrlib); amdgpu_device_deinitialize(ws->dev); FREE(rws); @@ -195,6 +196,7 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags) goto winsys_fail; ws->debug_all_bos = !!(debug_flags & RADV_DEBUG_ALL_BOS); + ws->debug_log_bos = debug_flags & RADV_DEBUG_HANG; if (debug_flags & RADV_DEBUG_NO_IBS) ws->use_ib_bos = false; @@ -203,6 +205,8 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags) ws->use_llvm = debug_flags & RADV_DEBUG_LLVM; list_inithead(&ws->global_bo_list); u_rwlock_init(&ws->global_bo_list_lock); + list_inithead(&ws->log_bo_list); + u_rwlock_init(&ws->log_bo_list_lock); pthread_mutex_init(&ws->syncobj_lock, NULL); ws->base.query_info = radv_amdgpu_winsys_query_info; ws->base.query_value = radv_amdgpu_winsys_query_value; diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h index 7807c4d947f..c316602af15 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h @@ -44,6 +44,7 @@ struct radv_amdgpu_winsys { struct ac_addrlib *addrlib; bool debug_all_bos; + bool debug_log_bos; bool use_ib_bos; bool zero_all_vram_allocs; bool use_local_bos; @@ -61,6 +62,10 @@ struct radv_amdgpu_winsys { pthread_mutex_t syncobj_lock; uint32_t *syncobj; uint32_t syncobj_count, syncobj_capacity; + + /* BO log */ + struct u_rwlock log_bo_list_lock; + struct list_head log_bo_list; }; static inline struct radv_amdgpu_winsys * _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
