On Feb 9, 2017 8:25 PM, "Dave Airlie" <airl...@gmail.com> wrote:
From: Dave Airlie <airl...@redhat.com> If a buffer is just full of flushes we flush things on command buffer submission, so don't bother submitting these. This will reduce some CPU overhead on dota2, which submits a fair few command streams that don't end up drawing anything. I wrote basically the same patch for our driver earlier this year when I was preparing for our GDC Dota 2 demo. I noticed an improvement at the time but I'm pretty sure it was just because of the stalls we had due to relocations. Now that those stalls are gone, I'm not convinced it would do much. Did you actually measure a performance improvement or was this just a little CPU usage reduction? Signed-off-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_cmd_buffer.c | 3 +++ src/amd/vulkan/radv_device.c | 14 +++++++++----- src/amd/vulkan/radv_meta_buffer.c | 1 + src/amd/vulkan/radv_private.h | 2 ++ src/amd/vulkan/si_cmd_buffer.c | 2 +- 5 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_ buffer.c index f281f33..25b1bd6 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1277,6 +1277,7 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer) MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer- >device->ws, cmd_buffer->cs, 4096); + cmd_buffer->no_draws = false; if ((cmd_buffer->state.vertex_descriptors_dirty || cmd_buffer->state.vb_dirty) && cmd_buffer->state.pipeline->num_vertex_attribs) { unsigned vb_offset; @@ -1592,6 +1593,7 @@ static void radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->record_fail = false; cmd_buffer->ring_offsets_idx = -1; + cmd_buffer->no_draws = true; } VkResult radv_ResetCommandBuffer( @@ -2423,6 +2425,7 @@ void radv_CmdDrawIndexedIndirectCountAMD( static void radv_flush_compute_state(struct radv_cmd_buffer *cmd_buffer) { + cmd_buffer->no_draws = false; radv_emit_compute_pipeline(cmd_buffer); radv_flush_descriptors(cmd_buffer, cmd_buffer->state.compute_ pipeline, VK_SHADER_STAGE_COMPUTE_BIT); diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 8a54a2a..fddada4 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1448,21 +1448,25 @@ VkResult radv_QueueSubmit( cs_array = malloc(sizeof(struct radeon_winsys_cs *) * pSubmits[i]. commandBufferCount); + int draw_cmds_count = 0; for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pSubmits[i].pCommandBuffers[j]); assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_ PRIMARY); - - cs_array[j] = cmd_buffer->cs; + if (cmd_buffer->no_draws == true) { + continue; + } + cs_array[draw_cmds_count] = cmd_buffer->cs; + draw_cmds_count++; if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) can_patch = false; } - for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) { + for (uint32_t j = 0; j < draw_cmds_count; j += advance) { advance = MIN2(max_cs_submission, - pSubmits[i].commandBufferCount - j); + draw_cmds_count - j); bool b = j == 0; - bool e = j + advance == pSubmits[i]. commandBufferCount; + bool e = j + advance == draw_cmds_count; if (queue->device->trace_bo) *queue->device->trace_id_ptr = 0; diff --git a/src/amd/vulkan/radv_meta_buffer.c b/src/amd/vulkan/radv_meta_ buffer.c index cd2973f..4857d3d 100644 --- a/src/amd/vulkan/radv_meta_buffer.c +++ b/src/amd/vulkan/radv_meta_buffer.c @@ -523,6 +523,7 @@ void radv_CmdUpdateBuffer( assert(!(dataSize & 3)); assert(!(va & 3)); + cmd_buffer->no_draws = false; if (dataSize < 4096) { cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_buffer->bo, 8); diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 25ed5de..9a88ce0 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -750,6 +750,8 @@ struct radv_cmd_buffer { uint32_t gsvs_ring_size_needed; int ring_offsets_idx; /* just used for verification */ + + bool no_draws; }; struct radv_image; diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 1c99b22..b94c1f1 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -828,7 +828,7 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count, uint64_t remaining_size, unsigned *flags) { - + cmd_buffer->no_draws = false; /* Flush the caches for the first copy only. * Also wait for the previous CP DMA operations. */ -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev