Module: Mesa Branch: main Commit: 22267360336e51c3c690492efef878d082266a67 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=22267360336e51c3c690492efef878d082266a67
Author: Timur Kristóf <[email protected]> Date: Thu May 5 21:43:09 2022 +0200 radv/amdgpu: Group queue submit info into a structure. This prepares RADV for submitting to multiple queues at the same time. Signed-off-by: Timur Kristóf <[email protected]> Reviewed-By: Tatsuyuki Ishi <[email protected]> Reviewed-by: Samuel Pitoiset <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16361> --- src/amd/vulkan/radv_device.c | 46 +++++++++++++++++++-------- src/amd/vulkan/radv_radeon_winsys.h | 15 ++++++--- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 20 ++++++++---- 3 files changed, 56 insertions(+), 25 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index cfbd49320fe..b850485b45c 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -4511,10 +4511,12 @@ static VkResult radv_queue_submit_empty(struct radv_queue *queue, struct vk_queue_submit *submission) { struct radeon_winsys_ctx *ctx = queue->hw_ctx; - enum amd_ip_type ring = radv_queue_ring(queue); + struct radv_winsys_submit_info submit = { + .ip_type = radv_queue_ring(queue), + .queue_index = queue->vk.index_in_family, + }; - return queue->device->ws->cs_submit(ctx, ring, queue->vk.index_in_family, NULL, 0, NULL, NULL, - submission->wait_count, submission->waits, + return queue->device->ws->cs_submit(ctx, 1, &submit, submission->wait_count, submission->waits, submission->signal_count, submission->signals, false); } @@ -4522,7 +4524,6 @@ static VkResult radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submission) { struct radeon_winsys_ctx *ctx = queue->hw_ctx; - enum amd_ip_type ring = radv_queue_ring(queue); uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT; bool can_patch = true; uint32_t advance; @@ -4552,21 +4553,31 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING; } + /* For fences on the same queue/vm amdgpu doesn't wait till all processing is finished + * before starting the next cmdbuffer, so we need to do it here. */ + bool need_wait = submission->wait_count > 0; + + struct radv_winsys_submit_info submit = { + .ip_type = radv_queue_ring(queue), + .queue_index = queue->vk.index_in_family, + .cs_array = cs_array, + .cs_count = 0, + .initial_preamble_cs = + need_wait ? queue->initial_full_flush_preamble_cs : queue->initial_preamble_cs, + .continue_preamble_cs = queue->continue_preamble_cs, + }; + for (uint32_t j = 0; j < submission->command_buffer_count; j += advance) { - /* For fences on the same queue/vm amdgpu doesn't wait till all processing is finished - * before starting the next cmdbuffer, so we need to do it here. */ - bool need_wait = !j && submission->wait_count > 0; - struct radeon_cmdbuf *initial_preamble = - need_wait ? queue->initial_full_flush_preamble_cs : queue->initial_preamble_cs; advance = MIN2(max_cs_submission, submission->command_buffer_count - j); bool last_submit = j + advance == submission->command_buffer_count; if (queue->device->trace_bo) *queue->device->trace_id_ptr = 0; + submit.cs_count = advance; + result = queue->device->ws->cs_submit( - ctx, ring, queue->vk.index_in_family, cs_array + j, advance, initial_preamble, - queue->continue_preamble_cs, j == 0 ? submission->wait_count : 0, submission->waits, + ctx, 1, &submit, j == 0 ? submission->wait_count : 0, submission->waits, last_submit ? submission->signal_count : 0, submission->signals, can_patch); if (result != VK_SUCCESS) @@ -4579,6 +4590,9 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi if (queue->device->tma_bo) { radv_check_trap_handler(queue); } + + submit.cs_array += advance; + submit.initial_preamble_cs = queue->initial_preamble_cs; } fail: @@ -4626,10 +4640,14 @@ bool radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs) { struct radeon_winsys_ctx *ctx = queue->hw_ctx; + struct radv_winsys_submit_info submit = { + .ip_type = radv_queue_ring(queue), + .queue_index = queue->vk.index_in_family, + .cs_array = &cs, + .cs_count = 1, + }; - VkResult result = - queue->device->ws->cs_submit(ctx, radv_queue_ring(queue), queue->vk.index_in_family, - &cs, 1, NULL, NULL, 0, NULL, 0, NULL, false); + VkResult result = queue->device->ws->cs_submit(ctx, 1, &submit, 0, NULL, 0, NULL, false); if (result != VK_SUCCESS) return false; diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index 759e2c904bb..dd7a80e0192 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -186,6 +186,15 @@ struct radv_winsys_bo_list { unsigned count; }; +struct radv_winsys_submit_info { + enum amd_ip_type ip_type; + int queue_index; + unsigned cs_count; + struct radeon_cmdbuf **cs_array; + struct radeon_cmdbuf *initial_preamble_cs; + struct radeon_cmdbuf *continue_preamble_cs; +}; + /* Kernel effectively allows 0-31. This sets some priorities for fixed * functionality buffers */ enum { @@ -272,10 +281,8 @@ struct radeon_winsys { void (*cs_grow)(struct radeon_cmdbuf *cs, size_t min_size); - VkResult (*cs_submit)(struct radeon_winsys_ctx *ctx, enum amd_ip_type amd_ip_type, int queue_index, - struct radeon_cmdbuf **cs_array, unsigned cs_count, - struct radeon_cmdbuf *initial_preamble_cs, - struct radeon_cmdbuf *continue_preamble_cs, uint32_t wait_count, + VkResult (*cs_submit)(struct radeon_winsys_ctx *ctx, uint32_t submit_count, + const struct radv_winsys_submit_info *submits, uint32_t wait_count, const struct vk_sync_wait *waits, uint32_t signal_count, const struct vk_sync_signal *signals, bool can_patch); diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index beb2714927b..9a05d0e2749 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -1341,10 +1341,8 @@ radv_amdgpu_winsys_cs_submit_internal(struct radeon_winsys_ctx *_ctx, enum amd_i } static VkResult -radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, enum amd_ip_type ip_type, - int queue_idx, struct radeon_cmdbuf **cs_array, unsigned cs_count, - struct radeon_cmdbuf *initial_preamble_cs, - struct radeon_cmdbuf *continue_preamble_cs, uint32_t wait_count, +radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, uint32_t submit_count, + const struct radv_winsys_submit_info *submits, uint32_t wait_count, const struct vk_sync_wait *waits, uint32_t signal_count, const struct vk_sync_signal *signals, bool can_patch) { @@ -1401,9 +1399,17 @@ radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, enum amd_ip_type ip sem_info.signal.syncobj_count = signal_idx - sem_info.signal.timeline_syncobj_count; sem_info.cs_emit_signal = true; - result = radv_amdgpu_winsys_cs_submit_internal(_ctx, ip_type, queue_idx, cs_array, cs_count, - initial_preamble_cs, continue_preamble_cs, - &sem_info, can_patch); + /* Should submit to at least 1 queue. */ + assert(submit_count); + + if (submit_count == 1) { + result = radv_amdgpu_winsys_cs_submit_internal( + _ctx, submits[0].ip_type, submits[0].queue_index, submits[0].cs_array, + submits[0].cs_count, submits[0].initial_preamble_cs, submits[0].continue_preamble_cs, + &sem_info, can_patch); + } else { + unreachable("submitting to multiple queues at the same time is not supported yet."); + } out: STACK_ARRAY_FINISH(wait_points);
