Because WAIT_REG_MEM can only wait for a 32-bit value, it's not safe to use it for timestamp queries. If we only wait on the low 32 bits of a timestamp query we could be unlucky and the GPU might hang.
One possible fix is to emit a full end of pipe event and wait on a 32-bit value which is actually an availability bit. This bit is allocated at creation time and always cleared before emitting the EOP event. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108925 Fixes: 5d6a560a29 ("radv: do not use the availability bit for timestamp queries") Signed-off-by: Samuel Pitoiset <[email protected]> --- src/amd/vulkan/radv_query.c | 49 +++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 550abe307a1..9bb6b660add 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -1056,8 +1056,15 @@ VkResult radv_CreateQueryPool( pool->pipeline_stats_mask = pCreateInfo->pipelineStatistics; pool->availability_offset = pool->stride * pCreateInfo->queryCount; pool->size = pool->availability_offset; - if (pCreateInfo->queryType == VK_QUERY_TYPE_PIPELINE_STATISTICS) + if (pCreateInfo->queryType == VK_QUERY_TYPE_PIPELINE_STATISTICS) { pool->size += 4 * pCreateInfo->queryCount; + } else if (pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP) { + /* Allocate one DWORD for the availability bit which is needed + * for vkCmdCopyQueryPoolResults() because we can't perform a + * WAIT_REG_MEM on a 64-bit value. + */ + pool->size += 4; + } pool->bo = device->ws->buffer_create(device->ws, pool->size, 64, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING); @@ -1328,19 +1335,45 @@ void radv_CmdCopyQueryPoolResults( pool->availability_offset + 4 * firstQuery); break; case VK_QUERY_TYPE_TIMESTAMP: + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + /* Emit a full end of pipe event because we can't + * perform a WAIT_REG_MEM on a 64-bit value. If we only + * do a WAIT_REG_MEM on the low 32 bits of a timestamp + * query we could be unlucky and the GPU might hang. + */ + enum chip_class chip = cmd_buffer->device->physical_device->rad_info.chip_class; + bool is_mec = radv_cmd_buffer_uses_mec(cmd_buffer); + uint64_t avail_va = va + pool->availability_offset; + + /* Clear the availability bit before waiting on the end + * of pipe event. + */ + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(V_370_ME)); + radeon_emit(cs, avail_va); + radeon_emit(cs, avail_va >> 32); + radeon_emit(cs, 0xdeadbeef); + + /* Wait for all prior GPU work. */ + si_cs_emit_write_event_eop(cs, chip, is_mec, + V_028A90_BOTTOM_OF_PIPE_TS, 0, + EOP_DATA_SEL_VALUE_32BIT, + avail_va, 0, 1, + cmd_buffer->gfx9_eop_bug_va); + + /* Wait on the timestamp value. */ + radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, avail_va, + 1, 0xffffffff); + } + for(unsigned i = 0; i < queryCount; ++i, dest_va += stride) { unsigned query = firstQuery + i; uint64_t local_src_va = va + query * pool->stride; MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 19); - - if (flags & VK_QUERY_RESULT_WAIT_BIT) { - radv_cp_wait_mem(cs, WAIT_REG_MEM_NOT_EQUAL, - local_src_va, - TIMESTAMP_NOT_READY >> 32, - 0xffffffff); - } if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { uint64_t avail_dest_va = dest_va + elem_size; -- 2.19.2 _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
