Module: Mesa Branch: main Commit: 6aa518ea869e3fb8e36347dc7fc60cf584fce2d6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6aa518ea869e3fb8e36347dc7fc60cf584fce2d6
Author: Timur Kristóf <[email protected]> Date: Thu Mar 30 18:54:35 2023 +0200 radv: Chain cmd buffers in queue code when possible, not in winsys. Signed-off-by: Timur Kristóf <[email protected]> Reviewed-by: Bas Nieuwenhuizen <[email protected]> Reviewed-by: Samuel Pitoiset <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22220> --- src/amd/vulkan/radv_queue.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/amd/vulkan/radv_queue.c b/src/amd/vulkan/radv_queue.c index 221c94a2ffb..8b6e034ec4c 100644 --- a/src/amd/vulkan/radv_queue.c +++ b/src/amd/vulkan/radv_queue.c @@ -1582,7 +1582,6 @@ static VkResult radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submission) { struct radeon_winsys_ctx *ctx = queue->hw_ctx; - bool can_patch = true; bool use_ace = false; bool use_perf_counters = false; VkResult result; @@ -1596,9 +1595,6 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi return result; if (use_ace) { - /* TODO: chaining with gang submit. */ - can_patch = false; - result = radv_update_gang_preambles(queue); if (result != VK_SUCCESS) return result; @@ -1652,9 +1648,6 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi /* RADV only supports perf counters on the GFX queue currently. */ assert(radv_queue_ring(queue) == AMD_IP_GFX); - /* Disallow chaining the lock/unlock CS. */ - can_patch = false; - if (!perf_ctr_lock_cs || !perf_ctr_unlock_cs) { result = VK_ERROR_OUT_OF_HOST_MEMORY; goto fail; @@ -1694,6 +1687,9 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi .uses_shadow_regs = queue->state.uses_shadow_regs, }; + const bool chaining_en = !(queue->device->instance->debug_flags & RADV_DEBUG_NO_IBS) && + queue->device->physical_device->rad_info.gfx_level >= GFX7; + for (uint32_t j = 0, advance; j < cmd_buffer_count; j += advance) { advance = MIN2(max_cs_submission, cmd_buffer_count - j); const bool last_submit = j + advance == cmd_buffer_count; @@ -1708,22 +1704,34 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi if (use_perf_counters) cs_array[num_submitted_cs++] = perf_ctr_lock_cs; + struct radeon_cmdbuf *chainable = NULL; + struct radeon_cmdbuf *chainable_ace = NULL; + /* Add CS from submitted command buffers. */ for (unsigned c = 0; c < advance; ++c) { struct radv_cmd_buffer *cmd_buffer = (struct radv_cmd_buffer *)submission->command_buffers[j + c]; assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + const bool can_chain_next = chaining_en && !(cmd_buffer->usage_flags & + VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT); /* ACE needs to be first because the last CS must match the queue's IP type. */ if (radv_cmd_buffer_needs_ace(cmd_buffer)) { queue->device->ws->cs_unchain(cmd_buffer->ace_internal.cs); - cs_array[num_submitted_cs++] = cmd_buffer->ace_internal.cs; + if (!chainable_ace || + !queue->device->ws->cs_chain(chainable_ace, cmd_buffer->ace_internal.cs, false)) + cs_array[num_submitted_cs++] = cmd_buffer->ace_internal.cs; + + chainable_ace = can_chain_next ? cmd_buffer->ace_internal.cs : NULL; submit_ace = true; } - can_patch &= !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT); queue->device->ws->cs_unchain(cmd_buffer->cs); - cs_array[num_submitted_cs++] = cmd_buffer->cs; + if (!chainable || + !queue->device->ws->cs_chain(chainable, cmd_buffer->cs, queue->state.uses_shadow_regs)) + cs_array[num_submitted_cs++] = cmd_buffer->cs; + + chainable = can_chain_next ? cmd_buffer->cs : NULL; cs_idx = num_submitted_cs - 1; } @@ -1740,9 +1748,9 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi submit.cs_count = num_submitted_cs; submit.preamble_count = submit_ace ? num_preambles : num_1q_preambles; - result = queue->device->ws->cs_submit( - ctx, &submit, j == 0 ? wait_count : 0, waits, - last_submit ? submission->signal_count : 0, submission->signals, can_patch); + result = queue->device->ws->cs_submit(ctx, &submit, j == 0 ? wait_count : 0, waits, + last_submit ? submission->signal_count : 0, + submission->signals, false); if (result != VK_SUCCESS) goto fail;
