Module: Mesa
Branch: master
Commit: 6ed433259140579a3e2599d8caa2797d3559978c
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6ed433259140579a3e2599d8caa2797d3559978c

Author: Samuel Pitoiset <[email protected]>
Date:   Fri Dec 11 15:43:51 2020 +0100

radv: dump VA ranges history when a GPU hang is detected

This is enabled only with RADV_DEBUG=hang. This adds a small

Gitlab: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3904
Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7891>

---

 src/amd/vulkan/radv_debug.c                       |  8 +++
 src/amd/vulkan/radv_radeon_winsys.h               |  2 +
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c     | 64 +++++++++++++++++++++++
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c |  4 ++
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h |  5 ++
 5 files changed, 83 insertions(+)

diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c
index e12b50dabf3..106ff4a5495 100644
--- a/src/amd/vulkan/radv_debug.c
+++ b/src/amd/vulkan/radv_debug.c
@@ -734,6 +734,14 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct 
radeon_cmdbuf *cs)
                fclose(f);
        }
 
+       /* Dump BO log. */
+       snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, 
"bo_history.log");
+       f = fopen(dump_path, "w+");
+       if (f) {
+               device->ws->dump_bo_log(device->ws, f);
+               fclose(f);
+       }
+
        /* Dump VM fault info. */
        if (vm_fault_occurred) {
                snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, 
"vm_fault.log");
diff --git a/src/amd/vulkan/radv_radeon_winsys.h 
b/src/amd/vulkan/radv_radeon_winsys.h
index 8cf1b38f242..be5dd62b638 100644
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -301,6 +301,8 @@ struct radeon_winsys {
 
        void (*dump_bo_ranges)(struct radeon_winsys *ws, FILE *file);
 
+       void (*dump_bo_log)(struct radeon_winsys *ws, FILE *file);
+
        int (*surface_init)(struct radeon_winsys *ws,
                            const struct ac_surf_info *surf_info,
                            struct radeon_surf *surf);
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c 
b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
index 917eb9069ae..c04767cd3c7 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
@@ -39,6 +39,7 @@
 #include "util/u_atomic.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
+#include "util/os_time.h"
 
 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo);
 
@@ -282,6 +283,39 @@ radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo 
*_parent,
        return VK_SUCCESS;
 }
 
+struct radv_amdgpu_winsys_bo_log {
+       struct list_head list;
+       uint64_t va;
+       uint64_t size;
+       uint64_t timestamp; /* CPU timestamp */
+       uint8_t is_virtual : 1;
+       uint8_t destroyed : 1;
+};
+
+static void radv_amdgpu_log_bo(struct radv_amdgpu_winsys_bo *bo,
+                              bool destroyed)
+{
+       struct radv_amdgpu_winsys *ws = bo->ws;
+       struct radv_amdgpu_winsys_bo_log *bo_log = NULL;
+
+       if (!bo->ws->debug_log_bos)
+               return;
+
+       bo_log = malloc(sizeof(*bo_log));
+       if (!bo_log)
+               return;
+
+       bo_log->va = bo->base.va;
+       bo_log->size = bo->size;
+       bo_log->timestamp = os_time_get_nano();
+       bo_log->is_virtual = bo->is_virtual;
+       bo_log->destroyed = destroyed;
+
+       u_rwlock_wrlock(&ws->log_bo_list_lock);
+       list_addtail(&bo_log->list, &ws->log_bo_list);
+       u_rwlock_wrunlock(&ws->log_bo_list_lock);
+}
+
 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
 {
        struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
@@ -289,6 +323,9 @@ static void radv_amdgpu_winsys_bo_destroy(struct 
radeon_winsys_bo *_bo)
 
        if (p_atomic_dec_return(&bo->ref_count))
                return;
+
+       radv_amdgpu_log_bo(bo, true);
+
        if (bo->is_virtual) {
                for (uint32_t i = 0; i < bo->range_count; ++i) {
                        radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i);
@@ -391,6 +428,8 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
                bo->ranges[0].bo_offset = 0;
 
                radv_amdgpu_winsys_virtual_map(bo, bo->ranges);
+               radv_amdgpu_log_bo(bo, false);
+
                return (struct radeon_winsys_bo *)bo;
        }
 
@@ -485,6 +524,8 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
                             align64(bo->size, ws->info.gart_page_size));
 
        radv_amdgpu_add_buffer_to_global_list(bo);
+       radv_amdgpu_log_bo(bo, false);
+
        return (struct radeon_winsys_bo *)bo;
 error_va_map:
        amdgpu_bo_free(buf_handle);
@@ -592,6 +633,8 @@ radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws,
                     align64(bo->size, ws->info.gart_page_size));
 
        radv_amdgpu_add_buffer_to_global_list(bo);
+       radv_amdgpu_log_bo(bo, false);
+
        return (struct radeon_winsys_bo *)bo;
 
 error_va_map:
@@ -672,6 +715,8 @@ radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
                             align64(bo->size, ws->info.gart_page_size));
 
        radv_amdgpu_add_buffer_to_global_list(bo);
+       radv_amdgpu_log_bo(bo, false);
+
        return (struct radeon_winsys_bo *)bo;
 error_va_map:
        amdgpu_va_range_free(va_handle);
@@ -862,6 +907,24 @@ static int radv_amdgpu_bo_va_compare(const void *a, const 
void *b)
        return bo_a->base.va < bo_b->base.va ? -1 : bo_a->base.va > 
bo_b->base.va ? 1 : 0;
 }
 
+static void radv_amdgpu_dump_bo_log(struct radeon_winsys *_ws, FILE *file)
+{
+       struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+       struct radv_amdgpu_winsys_bo_log *bo_log;
+
+       if (!ws->debug_log_bos)
+               return;
+
+       u_rwlock_rdlock(&ws->log_bo_list_lock);
+       LIST_FOR_EACH_ENTRY(bo_log, &ws->log_bo_list, list) {
+               fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, 
destroyed=%d, is_virtual=%d\n",
+                       (long long)bo_log->timestamp, (long long)bo_log->va,
+                       (long long)(bo_log->va + bo_log->size),
+                       bo_log->destroyed, bo_log->is_virtual);
+       }
+       u_rwlock_rdunlock(&ws->log_bo_list_lock);
+}
+
 static void radv_amdgpu_dump_bo_ranges(struct radeon_winsys *_ws, FILE *file)
 {
        struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
@@ -905,4 +968,5 @@ void radv_amdgpu_bo_init_functions(struct 
radv_amdgpu_winsys *ws)
        ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
        ws->base.buffer_get_flags_from_fd = radv_amdgpu_bo_get_flags_from_fd;
        ws->base.dump_bo_ranges = radv_amdgpu_dump_bo_ranges;
+       ws->base.dump_bo_log = radv_amdgpu_dump_bo_log;
 }
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c 
b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
index dc7a9fe1c36..9510d547f36 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
@@ -168,6 +168,7 @@ static void radv_amdgpu_winsys_destroy(struct radeon_winsys 
*rws)
 
        pthread_mutex_destroy(&ws->syncobj_lock);
        u_rwlock_destroy(&ws->global_bo_list_lock);
+       u_rwlock_destroy(&ws->log_bo_list_lock);
        ac_addrlib_destroy(ws->addrlib);
        amdgpu_device_deinitialize(ws->dev);
        FREE(rws);
@@ -195,6 +196,7 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, 
uint64_t perftest_flags)
                goto winsys_fail;
 
        ws->debug_all_bos = !!(debug_flags & RADV_DEBUG_ALL_BOS);
+       ws->debug_log_bos = debug_flags & RADV_DEBUG_HANG;
        if (debug_flags & RADV_DEBUG_NO_IBS)
                ws->use_ib_bos = false;
 
@@ -203,6 +205,8 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, 
uint64_t perftest_flags)
        ws->use_llvm = debug_flags & RADV_DEBUG_LLVM;
        list_inithead(&ws->global_bo_list);
        u_rwlock_init(&ws->global_bo_list_lock);
+       list_inithead(&ws->log_bo_list);
+       u_rwlock_init(&ws->log_bo_list_lock);
        pthread_mutex_init(&ws->syncobj_lock, NULL);
        ws->base.query_info = radv_amdgpu_winsys_query_info;
        ws->base.query_value = radv_amdgpu_winsys_query_value;
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h 
b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h
index 7807c4d947f..c316602af15 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h
@@ -44,6 +44,7 @@ struct radv_amdgpu_winsys {
        struct ac_addrlib *addrlib;
 
        bool debug_all_bos;
+       bool debug_log_bos;
        bool use_ib_bos;
        bool zero_all_vram_allocs;
        bool use_local_bos;
@@ -61,6 +62,10 @@ struct radv_amdgpu_winsys {
        pthread_mutex_t syncobj_lock;
        uint32_t *syncobj;
        uint32_t syncobj_count, syncobj_capacity;
+
+       /* BO log */
+       struct u_rwlock log_bo_list_lock;
+       struct list_head log_bo_list;
 };
 
 static inline struct radv_amdgpu_winsys *

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to