Module: Mesa Branch: main Commit: 4d79c2d280f23908ea6cfef6b77bd994a629bd3a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4d79c2d280f23908ea6cfef6b77bd994a629bd3a
Author: Sagar Ghuge <sagar.gh...@intel.com> Date: Mon Jun 5 09:59:54 2023 -0700 anv: Execute an empty batch to sync main and companion RCS batch We need to synchronize main (CCS/BCS) and companion rcs batch, so let's create an empty batch and make both the batches (CCS/BCS) and companion RCS batch wait on empty sync batch and signal the fence. Reason to execute the empty batch is we need to make sure the companion RCS batch finish as soon as the CCS/BCS batch finish. Preemption could prevent the companion RCS batch execution and we might end up destroying the CCS/BCS batch before companion RCS finishes. Signed-off-by: Sagar Ghuge <sagar.gh...@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwer...@intel.com> Reviewed-by: José Roberto de Souza <jose.so...@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23661> --- src/intel/vulkan/anv_batch_chain.c | 47 +++++++++++++++++++++++++++++---- src/intel/vulkan/anv_private.h | 23 ++++++++++++++++ src/intel/vulkan/anv_queue.c | 29 ++++++++++++++------ src/intel/vulkan/i915/anv_batch_chain.c | 8 ++++++ src/intel/vulkan/xe/anv_batch_chain.c | 10 +++++-- 5 files changed, 102 insertions(+), 15 deletions(-) diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index 0f86685cbb0..12ceba62e86 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -1281,11 +1281,48 @@ anv_queue_exec_locked(struct anv_queue *queue, uint32_t perf_query_pass) { struct anv_device *device = queue->device; - return device->kmd_backend->queue_exec_locked(queue, wait_count, waits, - cmd_buffer_count, - cmd_buffers, signal_count, - signals, perf_query_pool, - perf_query_pass); + VkResult result = VK_SUCCESS; + + /* We only need to synchronize the main & companion command buffers if we + * have a companion command buffer somewhere in the list of command + * buffers. + */ + bool needs_companion_sync = false; + for (uint32_t i = 0; i < cmd_buffer_count; i++) { + if (cmd_buffers[i]->companion_rcs_cmd_buffer != NULL) { + needs_companion_sync = true; + break; + } + } + + result = + device->kmd_backend->queue_exec_locked( + queue, + wait_count, waits, + cmd_buffer_count, cmd_buffers, + needs_companion_sync ? 0 : signal_count, signals, + perf_query_pool, + perf_query_pass); + if (result != VK_SUCCESS) + return result; + + if (needs_companion_sync) { + struct vk_sync_wait companion_sync = { + .sync = queue->companion_sync, + }; + /* If any of the command buffer had a companion batch, the submission + * backend will signal queue->companion_sync, so to ensure completion, + * we just need to wait on that fence. + */ + result = + device->kmd_backend->queue_exec_locked(queue, + 1, &companion_sync, + 0, NULL, + signal_count, signals, + NULL, 0); + } + + return result; } static inline bool diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 21718b0f2ee..2d6acce6361 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1081,6 +1081,29 @@ struct anv_queue { /** Synchronization object for debug purposes (DEBUG_SYNC) */ struct vk_sync *sync; + /** Companion synchronization object + * + * Vulkan command buffers can be destroyed as soon as their lifecycle moved + * from the Pending state to the Invalid/Executable state. This transition + * happens when the VkFence/VkSemaphore associated with the completion of + * the command buffer work is signaled. + * + * When we're using a companion command buffer to execute part of another + * command buffer, we need to tie the 2 work submissions together to ensure + * when the associated VkFence/VkSemaphore is signaled, both command + * buffers are actually unused by the HW. To do this, we run an empty batch + * buffer that we use to signal after both submissions : + * + * CCS --> main ---> empty_batch (with wait on companion) --> signal + * RCS --> companion -| + * + * When companion batch completes, it signals companion_sync and allow + * empty_batch to execute. Since empty_batch is running on the main engine, + * we're guaranteed that upon completion both main & companion command + * buffers are not used by HW anymore. + */ + struct vk_sync *companion_sync; + struct intel_ds_queue ds; }; diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c index 9178145894a..1989016f6b2 100644 --- a/src/intel/vulkan/anv_queue.c +++ b/src/intel/vulkan/anv_queue.c @@ -68,6 +68,9 @@ anv_queue_init(struct anv_device *device, struct anv_queue *queue, uint32_t index_in_family) { struct anv_physical_device *pdevice = device->physical; + assert(queue->vk.queue_family_index < pdevice->queue.family_count); + struct anv_queue_family *queue_family = + &device->physical->queue.families[pCreateInfo->queueFamilyIndex]; VkResult result; result = vk_queue_init(&queue->vk, &device->vk, pCreateInfo, @@ -75,6 +78,11 @@ anv_queue_init(struct anv_device *device, struct anv_queue *queue, if (result != VK_SUCCESS) return result; + queue->vk.driver_submit = anv_queue_submit; + queue->device = device; + queue->family = queue_family; + queue->decoder = &device->decoder[queue->vk.queue_family_index]; + result = anv_create_engine(device, queue, pCreateInfo); if (result != VK_SUCCESS) { vk_queue_finish(&queue->vk); @@ -91,14 +99,16 @@ anv_queue_init(struct anv_device *device, struct anv_queue *queue, } } - queue->vk.driver_submit = anv_queue_submit; - - queue->device = device; - - assert(queue->vk.queue_family_index < pdevice->queue.family_count); - queue->family = &pdevice->queue.families[queue->vk.queue_family_index]; - - queue->decoder = &device->decoder[queue->vk.queue_family_index]; + if (queue_family->engine_class == INTEL_ENGINE_CLASS_COPY || + queue_family->engine_class == INTEL_ENGINE_CLASS_COMPUTE) { + result = vk_sync_create(&device->vk, + &device->physical->sync_syncobj_type, + 0, 0, &queue->companion_sync); + if (result != VK_SUCCESS) { + anv_queue_finish(queue); + return result; + } + } return VK_SUCCESS; } @@ -109,6 +119,9 @@ anv_queue_finish(struct anv_queue *queue) if (queue->sync) vk_sync_destroy(&queue->device->vk, queue->sync); + if (queue->companion_sync) + vk_sync_destroy(&queue->device->vk, queue->companion_sync); + anv_destroy_engine(queue); vk_queue_finish(&queue->vk); } diff --git a/src/intel/vulkan/i915/anv_batch_chain.c b/src/intel/vulkan/i915/anv_batch_chain.c index 3ded767a631..c95efb2d173 100644 --- a/src/intel/vulkan/i915/anv_batch_chain.c +++ b/src/intel/vulkan/i915/anv_batch_chain.c @@ -677,6 +677,14 @@ i915_companion_rcs_queue_exec_locked(struct anv_queue *queue, goto error; } + if (queue->companion_sync) { + result = anv_execbuf_add_sync(device, &execbuf, + queue->companion_sync, + true /* is_signal */, 0); + if (result != VK_SUCCESS) + goto error; + } + result = setup_execbuf_for_cmd_buffers(&execbuf, queue, cmd_buffers, cmd_buffer_count, true /* is_companion_rcs_cmd_buffer */); diff --git a/src/intel/vulkan/xe/anv_batch_chain.c b/src/intel/vulkan/xe/anv_batch_chain.c index 5fadf847e19..76a8a4fc2ec 100644 --- a/src/intel/vulkan/xe/anv_batch_chain.c +++ b/src/intel/vulkan/xe/anv_batch_chain.c @@ -216,10 +216,16 @@ xe_companion_rcs_queue_exec_locked(struct anv_queue *queue, struct anv_device *device = queue->device; VkResult result; + struct vk_sync_signal companion_sync = { + .sync = queue->companion_sync, + }; struct drm_xe_sync *xe_syncs = NULL; uint32_t xe_syncs_count = 0; - result = xe_exec_process_syncs(queue, wait_count, waits, 0, NULL, NULL, - true, /* is_companion_rcs_queue */ + result = xe_exec_process_syncs(queue, + wait_count, waits, + 1, &companion_sync, + NULL /* utrace_submit */, + true /* is_companion_rcs_queue */, &xe_syncs, &xe_syncs_count); if (result != VK_SUCCESS)