Module: Mesa
Branch: staging/18.1
Commit: 31677c5aa867e457cd06ae25150be2155e8da3c6
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=31677c5aa867e457cd06ae25150be2155e8da3c6

Author: Samuel Pitoiset <[email protected]>
Date:   Fri Jul 13 19:37:20 2018 +0200

radv: emit a dummy ZPASS_DONE to prevent GPU hangs on GFX9

A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
counters) must immediately precede every timestamp event to
prevent a GPU hang on GFX9.

Signed-off-by: Samuel Pitoiset <[email protected]>

---

 src/amd/vulkan/radv_cmd_buffer.c | 15 +++++++++++++--
 src/amd/vulkan/radv_device.c     |  4 ++--
 src/amd/vulkan/radv_private.h    |  7 +++++--
 src/amd/vulkan/radv_query.c      |  9 ++++++---
 src/amd/vulkan/si_cmd_buffer.c   | 26 +++++++++++++++++++++-----
 5 files changed, 47 insertions(+), 14 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 1e50334f62..12041f04be 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -319,11 +319,21 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
        }
 
        if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+               unsigned num_db = 
cmd_buffer->device->physical_device->rad_info.num_render_backends;
+               unsigned eop_bug_offset;
                void *fence_ptr;
+
                radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0,
                                             &cmd_buffer->gfx9_fence_offset,
                                             &fence_ptr);
                cmd_buffer->gfx9_fence_bo = cmd_buffer->upload.upload_bo;
+
+               /* Allocate a buffer for the EOP bug on GFX9. */
+               radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0,
+                                            &eop_bug_offset, &fence_ptr);
+               cmd_buffer->gfx9_eop_bug_va =
+                       radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+               cmd_buffer->gfx9_eop_bug_va += eop_bug_offset;
        }
 
        cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL;
@@ -473,7 +483,7 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer 
*cmd_buffer,
                                       
cmd_buffer->device->physical_device->rad_info.chip_class,
                                       ptr, va,
                                       radv_cmd_buffer_uses_mec(cmd_buffer),
-                                      flags);
+                                      flags, cmd_buffer->gfx9_eop_bug_va);
        }
 
        if (unlikely(cmd_buffer->device->trace_bo))
@@ -4100,7 +4110,8 @@ static void write_event(struct radv_cmd_buffer 
*cmd_buffer,
                                   
cmd_buffer->device->physical_device->rad_info.chip_class,
                                   radv_cmd_buffer_uses_mec(cmd_buffer),
                                   V_028A90_BOTTOM_OF_PIPE_TS, 0,
-                                  1, va, 2, value);
+                                  1, va, 2, value,
+                                  cmd_buffer->gfx9_eop_bug_va);
 
        assert(cmd_buffer->cs->cdw <= cdw_max);
 }
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index c49ab58275..bfd6f96536 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -2181,7 +2181,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                               RADV_CMD_FLAG_INV_ICACHE |
                                               RADV_CMD_FLAG_INV_SMEM_L1 |
                                               RADV_CMD_FLAG_INV_VMEM_L1 |
-                                              RADV_CMD_FLAG_INV_GLOBAL_L2);
+                                              RADV_CMD_FLAG_INV_GLOBAL_L2, 0);
                } else if (i == 1) {
                        si_cs_emit_cache_flush(cs,
                                               
queue->device->physical_device->rad_info.chip_class,
@@ -2191,7 +2191,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                               RADV_CMD_FLAG_INV_ICACHE |
                                               RADV_CMD_FLAG_INV_SMEM_L1 |
                                               RADV_CMD_FLAG_INV_VMEM_L1 |
-                                              RADV_CMD_FLAG_INV_GLOBAL_L2);
+                                              RADV_CMD_FLAG_INV_GLOBAL_L2, 0);
                }
 
                if (!queue->device->ws->cs_finalize(cs))
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 3b4c80e025..3f997d348e 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1037,6 +1037,7 @@ struct radv_cmd_buffer {
        uint32_t gfx9_fence_offset;
        struct radeon_winsys_bo *gfx9_fence_bo;
        uint32_t gfx9_fence_idx;
+       uint64_t gfx9_eop_bug_va;
 
        /**
         * Whether a query pool has been resetted and we have to flush caches.
@@ -1069,7 +1070,8 @@ void si_cs_emit_write_event_eop(struct radeon_winsys_cs 
*cs,
                                unsigned data_sel,
                                uint64_t va,
                                uint32_t old_fence,
-                               uint32_t new_fence);
+                               uint32_t new_fence,
+                               uint64_t gfx9_eop_bug_va);
 
 void si_emit_wait_fence(struct radeon_winsys_cs *cs,
                        bool predicated,
@@ -1079,7 +1081,8 @@ void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
                            enum chip_class chip_class,
                            uint32_t *fence_ptr, uint64_t va,
                            bool is_mec,
-                           enum radv_cmd_flush_bits flush_bits);
+                           enum radv_cmd_flush_bits flush_bits,
+                           uint64_t gfx9_eop_bug_va);
 void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
 void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, 
uint64_t va);
 void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index 9a930e85ff..dccdee3611 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -1169,7 +1169,8 @@ static void emit_end_query(struct radv_cmd_buffer 
*cmd_buffer,
                                           
cmd_buffer->device->physical_device->rad_info.chip_class,
                                           radv_cmd_buffer_uses_mec(cmd_buffer),
                                           V_028A90_BOTTOM_OF_PIPE_TS, 0,
-                                          1, avail_va, 0, 1);
+                                          1, avail_va, 0, 1,
+                                          cmd_buffer->gfx9_eop_bug_va);
                break;
        default:
                unreachable("ending unhandled query type");
@@ -1292,13 +1293,15 @@ void radv_CmdWriteTimestamp(
                                                   
cmd_buffer->device->physical_device->rad_info.chip_class,
                                                   mec,
                                                   V_028A90_BOTTOM_OF_PIPE_TS, 
0,
-                                                  3, query_va, 0, 0);
+                                                  3, query_va, 0, 0,
+                                                  cmd_buffer->gfx9_eop_bug_va);
                        si_cs_emit_write_event_eop(cs,
                                                   false,
                                                   
cmd_buffer->device->physical_device->rad_info.chip_class,
                                                   mec,
                                                   V_028A90_BOTTOM_OF_PIPE_TS, 
0,
-                                                  1, avail_va, 0, 1);
+                                                  1, avail_va, 0, 1,
+                                                  cmd_buffer->gfx9_eop_bug_va);
                        break;
                }
                query_va += pool->stride;
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index d4459092d0..7cd863e389 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -852,7 +852,8 @@ void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs,
                                unsigned data_sel,
                                uint64_t va,
                                uint32_t old_fence,
-                               uint32_t new_fence)
+                               uint32_t new_fence,
+                               uint64_t gfx9_eop_bug_va)
 {
        unsigned op = EVENT_TYPE(event) |
                EVENT_INDEX(5) |
@@ -860,6 +861,17 @@ void si_cs_emit_write_event_eop(struct radeon_winsys_cs 
*cs,
        unsigned is_gfx8_mec = is_mec && chip_class < GFX9;
 
        if (chip_class >= GFX9 || is_gfx8_mec) {
+               /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
+                * counters) must immediately precede every timestamp event to
+                * prevent a GPU hang on GFX9.
+                */
+               if (chip_class == GFX9) {
+                       radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+                       radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | 
EVENT_INDEX(1));
+                       radeon_emit(cs, gfx9_eop_bug_va);
+                       radeon_emit(cs, gfx9_eop_bug_va >> 32);
+               }
+
                radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, is_gfx8_mec ? 5 : 6, 
predicated));
                radeon_emit(cs, op);
                radeon_emit(cs, EOP_DATA_SEL(data_sel));
@@ -941,7 +953,8 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
                       uint32_t *flush_cnt,
                       uint64_t flush_va,
                        bool is_mec,
-                       enum radv_cmd_flush_bits flush_bits)
+                       enum radv_cmd_flush_bits flush_bits,
+                      uint64_t gfx9_eop_bug_va)
 {
        unsigned cp_coher_cntl = 0;
        uint32_t flush_cb_db = flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
@@ -971,7 +984,8 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
                                                           chip_class,
                                                           is_mec,
                                                           
V_028A90_FLUSH_AND_INV_CB_DATA_TS,
-                                                          0, 0, 0, 0, 0);
+                                                          0, 0, 0, 0, 0,
+                                                          gfx9_eop_bug_va);
                        }
                }
                if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
@@ -1057,7 +1071,8 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
                uint32_t old_fence = (*flush_cnt)++;
 
                si_cs_emit_write_event_eop(cs, false, chip_class, false, 
cb_db_event, tc_flags, 1,
-                                          flush_va, old_fence, *flush_cnt);
+                                          flush_va, old_fence, *flush_cnt,
+                                          gfx9_eop_bug_va);
                si_emit_wait_fence(cs, false, flush_va, *flush_cnt, 0xffffffff);
        }
 
@@ -1149,7 +1164,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
                               
cmd_buffer->device->physical_device->rad_info.chip_class,
                               ptr, va,
                               radv_cmd_buffer_uses_mec(cmd_buffer),
-                              cmd_buffer->state.flush_bits);
+                              cmd_buffer->state.flush_bits,
+                              cmd_buffer->gfx9_eop_bug_va);
 
 
        if (unlikely(cmd_buffer->device->trace_bo))

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to