Module: Mesa
Branch: master
Commit: 56bff270febd4ab58a4bcb8fd5ab6787089513e0
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=56bff270febd4ab58a4bcb8fd5ab6787089513e0

Author: Samuel Pitoiset <[email protected]>
Date:   Fri Feb 26 15:19:25 2021 +0100

radeonsi,radv: do not overallocate the SQTT buffer size

The number of shader engines isn't always 4.

Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9307>

---

 src/amd/common/ac_sqtt.c               | 11 +++++++----
 src/amd/common/ac_sqtt.h               |  6 ++++--
 src/amd/vulkan/radv_sqtt.c             | 17 ++++++++++-------
 src/gallium/drivers/radeonsi/si_sqtt.c | 11 ++++++-----
 4 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/src/amd/common/ac_sqtt.c b/src/amd/common/ac_sqtt.c
index 863dfa2041e..1f8bda500ab 100644
--- a/src/amd/common/ac_sqtt.c
+++ b/src/amd/common/ac_sqtt.c
@@ -35,11 +35,13 @@ ac_thread_trace_get_info_offset(unsigned se)
 }
 
 uint64_t
-ac_thread_trace_get_data_offset(struct ac_thread_trace_data *data, unsigned se)
+ac_thread_trace_get_data_offset(const struct radeon_info *rad_info,
+                                const struct ac_thread_trace_data *data, 
unsigned se)
 {
+   unsigned max_se = rad_info->max_se;
    uint64_t data_offset;
 
-   data_offset = align64(sizeof(struct ac_thread_trace_info) * 4,
+   data_offset = align64(sizeof(struct ac_thread_trace_info) * max_se,
                1 << SQTT_BUFFER_ALIGN_SHIFT);
    data_offset += data->buffer_size * se;
 
@@ -53,9 +55,10 @@ ac_thread_trace_get_info_va(uint64_t va, unsigned se)
 }
 
 uint64_t
-ac_thread_trace_get_data_va(struct ac_thread_trace_data *data, uint64_t va, 
unsigned se)
+ac_thread_trace_get_data_va(const struct radeon_info *rad_info,
+                            const struct ac_thread_trace_data *data, uint64_t 
va, unsigned se)
 {
-   return va + ac_thread_trace_get_data_offset(data, se);
+   return va + ac_thread_trace_get_data_offset(rad_info, data, se);
 }
 
 bool
diff --git a/src/amd/common/ac_sqtt.h b/src/amd/common/ac_sqtt.h
index 4b8e282c972..267b6093625 100644
--- a/src/amd/common/ac_sqtt.h
+++ b/src/amd/common/ac_sqtt.h
@@ -78,12 +78,14 @@ uint64_t
 ac_thread_trace_get_info_offset(unsigned se);
 
 uint64_t
-ac_thread_trace_get_data_offset(struct ac_thread_trace_data *data, unsigned 
se);
+ac_thread_trace_get_data_offset(const struct radeon_info *rad_info,
+                                const struct ac_thread_trace_data *data, 
unsigned se);
 uint64_t
 ac_thread_trace_get_info_va(uint64_t va, unsigned se);
 
 uint64_t
-ac_thread_trace_get_data_va(struct ac_thread_trace_data *data, uint64_t va, 
unsigned se);
+ac_thread_trace_get_data_va(const struct radeon_info *rad_info,
+                            const struct ac_thread_trace_data *data, uint64_t 
va, unsigned se);
 
 bool
 ac_is_thread_trace_complete(struct radeon_info *rad_info, const struct 
ac_thread_trace_info *info);
diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c
index ed7e5df9ebd..4bfdc4b6160 100644
--- a/src/amd/vulkan/radv_sqtt.c
+++ b/src/amd/vulkan/radv_sqtt.c
@@ -36,13 +36,14 @@ radv_emit_thread_trace_start(struct radv_device *device,
                             uint32_t queue_family_index)
 {
        uint32_t shifted_size = device->thread_trace.buffer_size >> 
SQTT_BUFFER_ALIGN_SHIFT;
-       unsigned max_se = device->physical_device->rad_info.max_se;
+       struct radeon_info *rad_info = &device->physical_device->rad_info;
+       unsigned max_se = rad_info->max_se;
 
        assert(device->physical_device->rad_info.chip_class >= GFX8);
 
        for (unsigned se = 0; se < max_se; se++) {
                uint64_t va = radv_buffer_get_va(device->thread_trace.bo);
-               uint64_t data_va = 
ac_thread_trace_get_data_va(&device->thread_trace, va, se);
+               uint64_t data_va = ac_thread_trace_get_data_va(rad_info, 
&device->thread_trace, va, se);
                uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
                int first_active_cu = 
ffs(device->physical_device->rad_info.cu_mask[se][0]);
 
@@ -400,6 +401,7 @@ radv_emit_wait_for_idle(struct radv_device *device,
 static bool
 radv_thread_trace_init_bo(struct radv_device *device)
 {
+       unsigned max_se = device->physical_device->rad_info.max_se;
        struct radeon_winsys *ws = device->ws;
        uint64_t size;
 
@@ -409,10 +411,10 @@ radv_thread_trace_init_bo(struct radv_device *device)
        device->thread_trace.buffer_size = 
align64(device->thread_trace.buffer_size,
                                                   1u << 
SQTT_BUFFER_ALIGN_SHIFT);
 
-       /* Compute total size of the thread trace BO for 4 SEs. */
-       size = align64(sizeof(struct ac_thread_trace_info) * 4,
+       /* Compute total size of the thread trace BO for all SEs. */
+       size = align64(sizeof(struct ac_thread_trace_info) * max_se,
                       1 << SQTT_BUFFER_ALIGN_SHIFT);
-       size += device->thread_trace.buffer_size * 4ll;
+       size += device->thread_trace.buffer_size * (uint64_t)max_se;
 
        device->thread_trace.bo = ws->buffer_create(ws, size, 4096,
                                                    RADEON_DOMAIN_VRAM,
@@ -625,7 +627,8 @@ radv_get_thread_trace(struct radv_queue *queue,
                      struct ac_thread_trace *thread_trace)
 {
        struct radv_device *device = queue->device;
-       unsigned max_se = device->physical_device->rad_info.max_se;
+       struct radeon_info *rad_info = &device->physical_device->rad_info;
+       unsigned max_se = rad_info->max_se;
        void *thread_trace_ptr = device->thread_trace.ptr;
 
        memset(thread_trace, 0, sizeof(*thread_trace));
@@ -633,7 +636,7 @@ radv_get_thread_trace(struct radv_queue *queue,
 
        for (unsigned se = 0; se < max_se; se++) {
                uint64_t info_offset = ac_thread_trace_get_info_offset(se);
-               uint64_t data_offset = 
ac_thread_trace_get_data_offset(&device->thread_trace, se);
+               uint64_t data_offset = 
ac_thread_trace_get_data_offset(rad_info, &device->thread_trace, se);
                void *info_ptr = (uint8_t *)thread_trace_ptr + info_offset;
                void *data_ptr = (uint8_t *)thread_trace_ptr + data_offset;
                struct ac_thread_trace_info *info =
diff --git a/src/gallium/drivers/radeonsi/si_sqtt.c 
b/src/gallium/drivers/radeonsi/si_sqtt.c
index 8fc751b9498..3cca3594edf 100644
--- a/src/gallium/drivers/radeonsi/si_sqtt.c
+++ b/src/gallium/drivers/radeonsi/si_sqtt.c
@@ -38,6 +38,7 @@ si_emit_spi_config_cntl(struct si_context* sctx,
 static bool
 si_thread_trace_init_bo(struct si_context *sctx)
 {
+   unsigned max_se = sctx->screen->info.max_se;
    struct radeon_winsys *ws = sctx->ws;
    uint64_t size;
 
@@ -47,10 +48,10 @@ si_thread_trace_init_bo(struct si_context *sctx)
    sctx->thread_trace->buffer_size = align64(sctx->thread_trace->buffer_size,
                                              1u << SQTT_BUFFER_ALIGN_SHIFT);
 
-   /* Compute total size of the thread trace BO for 4 SEs. */
-   size = align64(sizeof(struct ac_thread_trace_info) * 4,
+   /* Compute total size of the thread trace BO for all SEs. */
+   size = align64(sizeof(struct ac_thread_trace_info) * max_se,
                   1 << SQTT_BUFFER_ALIGN_SHIFT);
-   size += sctx->thread_trace->buffer_size * 4ll;
+   size += sctx->thread_trace->buffer_size * (uint64_t)max_se;
 
    sctx->thread_trace->bo =
       ws->buffer_create(ws, size, 4096,
@@ -77,7 +78,7 @@ si_emit_thread_trace_start(struct si_context* sctx,
 
    for (unsigned se = 0; se < max_se; se++) {
       uint64_t va = 
sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo);
-      uint64_t data_va = ac_thread_trace_get_data_va(sctx->thread_trace, va, 
se);
+      uint64_t data_va = ac_thread_trace_get_data_va(&sctx->screen->info, 
sctx->thread_trace, va, se);
       uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
 
       /* Target SEx and SH0. */
@@ -495,7 +496,7 @@ si_get_thread_trace(struct si_context *sctx,
 
    for (unsigned se = 0; se < max_se; se++) {
       uint64_t info_offset = ac_thread_trace_get_info_offset(se);
-      uint64_t data_offset = 
ac_thread_trace_get_data_offset(sctx->thread_trace, se);
+      uint64_t data_offset = 
ac_thread_trace_get_data_offset(&sctx->screen->info, sctx->thread_trace, se);
       void *info_ptr = thread_trace_ptr + info_offset;
       void *data_ptr = thread_trace_ptr + data_offset;
       struct ac_thread_trace_info *info =

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to