Module: Mesa Branch: staging/18.1 Commit: 31677c5aa867e457cd06ae25150be2155e8da3c6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=31677c5aa867e457cd06ae25150be2155e8da3c6
Author: Samuel Pitoiset <[email protected]> Date: Fri Jul 13 19:37:20 2018 +0200 radv: emit a dummy ZPASS_DONE to prevent GPU hangs on GFX9 A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion counters) must immediately precede every timestamp event to prevent a GPU hang on GFX9. Signed-off-by: Samuel Pitoiset <[email protected]> --- src/amd/vulkan/radv_cmd_buffer.c | 15 +++++++++++++-- src/amd/vulkan/radv_device.c | 4 ++-- src/amd/vulkan/radv_private.h | 7 +++++-- src/amd/vulkan/radv_query.c | 9 ++++++--- src/amd/vulkan/si_cmd_buffer.c | 26 +++++++++++++++++++++----- 5 files changed, 47 insertions(+), 14 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 1e50334f62..12041f04be 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -319,11 +319,21 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) } if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { + unsigned num_db = cmd_buffer->device->physical_device->rad_info.num_render_backends; + unsigned eop_bug_offset; void *fence_ptr; + radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0, &cmd_buffer->gfx9_fence_offset, &fence_ptr); cmd_buffer->gfx9_fence_bo = cmd_buffer->upload.upload_bo; + + /* Allocate a buffer for the EOP bug on GFX9. */ + radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0, + &eop_bug_offset, &fence_ptr); + cmd_buffer->gfx9_eop_bug_va = + radv_buffer_get_va(cmd_buffer->upload.upload_bo); + cmd_buffer->gfx9_eop_bug_va += eop_bug_offset; } cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL; @@ -473,7 +483,7 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, cmd_buffer->device->physical_device->rad_info.chip_class, ptr, va, radv_cmd_buffer_uses_mec(cmd_buffer), - flags); + flags, cmd_buffer->gfx9_eop_bug_va); } if (unlikely(cmd_buffer->device->trace_bo)) @@ -4100,7 +4110,8 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer, cmd_buffer->device->physical_device->rad_info.chip_class, radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, - 1, va, 2, value); + 1, va, 2, value, + cmd_buffer->gfx9_eop_bug_va); assert(cmd_buffer->cs->cdw <= cdw_max); } diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index c49ab58275..bfd6f96536 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -2181,7 +2181,7 @@ radv_get_preamble_cs(struct radv_queue *queue, RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SMEM_L1 | RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_GLOBAL_L2); + RADV_CMD_FLAG_INV_GLOBAL_L2, 0); } else if (i == 1) { si_cs_emit_cache_flush(cs, queue->device->physical_device->rad_info.chip_class, @@ -2191,7 +2191,7 @@ radv_get_preamble_cs(struct radv_queue *queue, RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SMEM_L1 | RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_GLOBAL_L2); + RADV_CMD_FLAG_INV_GLOBAL_L2, 0); } if (!queue->device->ws->cs_finalize(cs)) diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 3b4c80e025..3f997d348e 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1037,6 +1037,7 @@ struct radv_cmd_buffer { uint32_t gfx9_fence_offset; struct radeon_winsys_bo *gfx9_fence_bo; uint32_t gfx9_fence_idx; + uint64_t gfx9_eop_bug_va; /** * Whether a query pool has been resetted and we have to flush caches. @@ -1069,7 +1070,8 @@ void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs, unsigned data_sel, uint64_t va, uint32_t old_fence, - uint32_t new_fence); + uint32_t new_fence, + uint64_t gfx9_eop_bug_va); void si_emit_wait_fence(struct radeon_winsys_cs *cs, bool predicated, @@ -1079,7 +1081,8 @@ void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs, enum chip_class chip_class, uint32_t *fence_ptr, uint64_t va, bool is_mec, - enum radv_cmd_flush_bits flush_bits); + enum radv_cmd_flush_bits flush_bits, + uint64_t gfx9_eop_bug_va); void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer); void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va); void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 9a930e85ff..dccdee3611 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -1169,7 +1169,8 @@ static void emit_end_query(struct radv_cmd_buffer *cmd_buffer, cmd_buffer->device->physical_device->rad_info.chip_class, radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, - 1, avail_va, 0, 1); + 1, avail_va, 0, 1, + cmd_buffer->gfx9_eop_bug_va); break; default: unreachable("ending unhandled query type"); @@ -1292,13 +1293,15 @@ void radv_CmdWriteTimestamp( cmd_buffer->device->physical_device->rad_info.chip_class, mec, V_028A90_BOTTOM_OF_PIPE_TS, 0, - 3, query_va, 0, 0); + 3, query_va, 0, 0, + cmd_buffer->gfx9_eop_bug_va); si_cs_emit_write_event_eop(cs, false, cmd_buffer->device->physical_device->rad_info.chip_class, mec, V_028A90_BOTTOM_OF_PIPE_TS, 0, - 1, avail_va, 0, 1); + 1, avail_va, 0, 1, + cmd_buffer->gfx9_eop_bug_va); break; } query_va += pool->stride; diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index d4459092d0..7cd863e389 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -852,7 +852,8 @@ void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs, unsigned data_sel, uint64_t va, uint32_t old_fence, - uint32_t new_fence) + uint32_t new_fence, + uint64_t gfx9_eop_bug_va) { unsigned op = EVENT_TYPE(event) | EVENT_INDEX(5) | @@ -860,6 +861,17 @@ void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs, unsigned is_gfx8_mec = is_mec && chip_class < GFX9; if (chip_class >= GFX9 || is_gfx8_mec) { + /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion + * counters) must immediately precede every timestamp event to + * prevent a GPU hang on GFX9. + */ + if (chip_class == GFX9) { + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); + radeon_emit(cs, gfx9_eop_bug_va); + radeon_emit(cs, gfx9_eop_bug_va >> 32); + } + radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, is_gfx8_mec ? 5 : 6, predicated)); radeon_emit(cs, op); radeon_emit(cs, EOP_DATA_SEL(data_sel)); @@ -941,7 +953,8 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs, uint32_t *flush_cnt, uint64_t flush_va, bool is_mec, - enum radv_cmd_flush_bits flush_bits) + enum radv_cmd_flush_bits flush_bits, + uint64_t gfx9_eop_bug_va) { unsigned cp_coher_cntl = 0; uint32_t flush_cb_db = flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | @@ -971,7 +984,8 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs, chip_class, is_mec, V_028A90_FLUSH_AND_INV_CB_DATA_TS, - 0, 0, 0, 0, 0); + 0, 0, 0, 0, 0, + gfx9_eop_bug_va); } } if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) { @@ -1057,7 +1071,8 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs, uint32_t old_fence = (*flush_cnt)++; si_cs_emit_write_event_eop(cs, false, chip_class, false, cb_db_event, tc_flags, 1, - flush_va, old_fence, *flush_cnt); + flush_va, old_fence, *flush_cnt, + gfx9_eop_bug_va); si_emit_wait_fence(cs, false, flush_va, *flush_cnt, 0xffffffff); } @@ -1149,7 +1164,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->device->physical_device->rad_info.chip_class, ptr, va, radv_cmd_buffer_uses_mec(cmd_buffer), - cmd_buffer->state.flush_bits); + cmd_buffer->state.flush_bits, + cmd_buffer->gfx9_eop_bug_va); if (unlikely(cmd_buffer->device->trace_bo)) _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
