From: Dave Airlie <airl...@redhat.com>

If a buffer is just full of flushes we flush things on command
buffer submission, so don't bother submitting these.

This will reduce some CPU overhead on dota2, which submits a fair
few command streams that don't end up drawing anything.

Signed-off-by: Dave Airlie <airl...@redhat.com>
---
 src/amd/vulkan/radv_cmd_buffer.c  |  3 +++
 src/amd/vulkan/radv_device.c      | 14 +++++++++-----
 src/amd/vulkan/radv_meta_buffer.c |  1 +
 src/amd/vulkan/radv_private.h     |  2 ++
 src/amd/vulkan/si_cmd_buffer.c    |  2 +-
 5 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index f281f33..25b1bd6 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1277,6 +1277,7 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer 
*cmd_buffer)
        MAYBE_UNUSED unsigned cdw_max = 
radeon_check_space(cmd_buffer->device->ws,
                                                           cmd_buffer->cs, 
4096);
 
+       cmd_buffer->no_draws = false;
        if ((cmd_buffer->state.vertex_descriptors_dirty || 
cmd_buffer->state.vb_dirty) &&
            cmd_buffer->state.pipeline->num_vertex_attribs) {
                unsigned vb_offset;
@@ -1592,6 +1593,7 @@ static void  radv_reset_cmd_buffer(struct radv_cmd_buffer 
*cmd_buffer)
        cmd_buffer->record_fail = false;
 
        cmd_buffer->ring_offsets_idx = -1;
+       cmd_buffer->no_draws = true;
 }
 
 VkResult radv_ResetCommandBuffer(
@@ -2423,6 +2425,7 @@ void radv_CmdDrawIndexedIndirectCountAMD(
 static void
 radv_flush_compute_state(struct radv_cmd_buffer *cmd_buffer)
 {
+       cmd_buffer->no_draws = false;
        radv_emit_compute_pipeline(cmd_buffer);
        radv_flush_descriptors(cmd_buffer, cmd_buffer->state.compute_pipeline,
                               VK_SHADER_STAGE_COMPUTE_BIT);
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 8a54a2a..fddada4 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1448,21 +1448,25 @@ VkResult radv_QueueSubmit(
                cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
                                                pSubmits[i].commandBufferCount);
 
+               int draw_cmds_count = 0;
                for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
                        RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
                                         pSubmits[i].pCommandBuffers[j]);
                        assert(cmd_buffer->level == 
VK_COMMAND_BUFFER_LEVEL_PRIMARY);
-
-                       cs_array[j] = cmd_buffer->cs;
+                       if (cmd_buffer->no_draws == true) {
+                               continue;
+                       }
+                       cs_array[draw_cmds_count] = cmd_buffer->cs;
+                       draw_cmds_count++;
                        if ((cmd_buffer->usage_flags & 
VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
                                can_patch = false;
                }
 
-               for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += 
advance) {
+               for (uint32_t j = 0; j < draw_cmds_count; j += advance) {
                        advance = MIN2(max_cs_submission,
-                                      pSubmits[i].commandBufferCount - j);
+                                      draw_cmds_count - j);
                        bool b = j == 0;
-                       bool e = j + advance == pSubmits[i].commandBufferCount;
+                       bool e = j + advance == draw_cmds_count;
 
                        if (queue->device->trace_bo)
                                *queue->device->trace_id_ptr = 0;
diff --git a/src/amd/vulkan/radv_meta_buffer.c 
b/src/amd/vulkan/radv_meta_buffer.c
index cd2973f..4857d3d 100644
--- a/src/amd/vulkan/radv_meta_buffer.c
+++ b/src/amd/vulkan/radv_meta_buffer.c
@@ -523,6 +523,7 @@ void radv_CmdUpdateBuffer(
        assert(!(dataSize & 3));
        assert(!(va & 3));
 
+       cmd_buffer->no_draws = false;
        if (dataSize < 4096) {
                cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, 
dst_buffer->bo, 8);
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 25ed5de..9a88ce0 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -750,6 +750,8 @@ struct radv_cmd_buffer {
        uint32_t gsvs_ring_size_needed;
 
        int ring_offsets_idx; /* just used for verification */
+
+       bool no_draws;
 };
 
 struct radv_image;
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index 1c99b22..b94c1f1 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -828,7 +828,7 @@ static void si_emit_cp_dma_clear_buffer(struct 
radv_cmd_buffer *cmd_buffer,
 static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t 
byte_count,
                              uint64_t remaining_size, unsigned *flags)
 {
-
+       cmd_buffer->no_draws = false;
        /* Flush the caches for the first copy only.
         * Also wait for the previous CP DMA operations.
         */
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to