On Fri, Jul 21, 2017 at 9:33 AM, Dave Airlie <[email protected]> wrote: > From: Dave Airlie <[email protected]> > > This adds support for sharing semaphores using kernel syncobjects. > > Syncobj backed semaphores are used for any semaphore which is > created with external flags, and when a semaphore is imported, > otherwise we use the current non-kernel semaphores. > > Temporary imports from syncobj fd are also available, these > just override the current user until the next wait, when the > temp syncobj is dropped. > > v2: allocate more chunks upfront, fix off by one after > previous refactor of syncobj setup, remove unnecessary null > check. > > Signed-off-by: Dave Airlie <[email protected]> > --- > src/amd/vulkan/radv_device.c | 248 > +++++++++++++++++++++++--- > src/amd/vulkan/radv_entrypoints_gen.py | 3 + > src/amd/vulkan/radv_private.h | 16 +- > src/amd/vulkan/radv_radeon_winsys.h | 21 ++- > src/amd/vulkan/radv_wsi.c | 30 +++- > src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 110 ++++++++---- > 6 files changed, 357 insertions(+), 71 deletions(-) > > diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c > index d87be66..44bee5c 100644 > --- a/src/amd/vulkan/radv_device.c > +++ b/src/amd/vulkan/radv_device.c > @@ -102,6 +102,10 @@ static const VkExtensionProperties instance_extensions[] > = { > .extensionName = > VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME, > .specVersion = 1, > }, > + { > + .extensionName = > VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME, > + .specVersion = 1, > + }, > }; > > static const VkExtensionProperties common_device_extensions[] = { > @@ -162,6 +166,16 @@ static const VkExtensionProperties > common_device_extensions[] = { > .specVersion = 1, > }, > }; > +static const VkExtensionProperties ext_sema_device_extensions[] = { > + { > + .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME, > + .specVersion = 1, > + }, > + { > + .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, > + .specVersion = 1, > + }, > +}; > > static VkResult > radv_extensions_register(struct radv_instance *instance, > @@ -312,6 +326,15 @@ radv_physical_device_init(struct radv_physical_device > *device, > if (result != VK_SUCCESS) > goto fail; > > + if (device->rad_info.has_syncobj) { > + result = radv_extensions_register(instance, > + &device->extensions, > + ext_sema_device_extensions, > + > ARRAY_SIZE(ext_sema_device_extensions)); > + if (result != VK_SUCCESS) > + goto fail; > + } > + > fprintf(stderr, "WARNING: radv is not a conformant vulkan > implementation, testing use only.\n"); > device->name = get_chip_name(device->rad_info.family); > > @@ -1885,6 +1908,87 @@ fail: > return VK_ERROR_OUT_OF_DEVICE_MEMORY; > } > > +static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts, > + int num_sems, > + const VkSemaphore *sems, > + bool reset_temp) > +{ > + int syncobj_idx = 0, sem_idx = 0; > + > + if (num_sems == 0) > + return VK_SUCCESS; > + for (uint32_t i = 0; i < num_sems; i++) { > + RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]); > + > + if (sem->temp_syncobj || sem->syncobj) > + counts->syncobj_count++; > + else > + counts->sem_count++; > + } > + > + if (counts->syncobj_count) { > + counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * > counts->syncobj_count); > + if (!counts->syncobj) > + return VK_ERROR_OUT_OF_HOST_MEMORY; > + } > + > + if (counts->sem_count) { > + counts->sem = (struct radeon_winsys_sem > **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count); > + if (!counts->sem) > + return VK_ERROR_OUT_OF_HOST_MEMORY;
Free counts->syncobj? Otherwise looks reasonable to me, Review-by: Bas Nieuwenhuizen <[email protected]> for all 3. > + } > + > + for (uint32_t i = 0; i < num_sems; i++) { > + RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]); > + > + if (sem->temp_syncobj) { > + counts->syncobj[syncobj_idx++] = sem->temp_syncobj; > + if (reset_temp) { > + /* after we wait on a temp import - drop it */ > + sem->temp_syncobj = 0; > + } > + } > + else if (sem->syncobj) > + counts->syncobj[syncobj_idx++] = sem->syncobj; > + else { > + assert(sem->sem); > + counts->sem[sem_idx++] = sem->sem; > + } > + } > + > + return VK_SUCCESS; > +} > + > +void radv_free_sem_info(struct radv_winsys_sem_info *sem_info) > +{ > + free(sem_info->wait.syncobj); > + free(sem_info->wait.sem); > + free(sem_info->signal.syncobj); > + free(sem_info->signal.sem); > +} > + > +VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info, > + int num_wait_sems, > + const VkSemaphore *wait_sems, > + int num_signal_sems, > + const VkSemaphore *signal_sems) > +{ > + VkResult ret; > + memset(sem_info, 0, sizeof(*sem_info)); > + > + ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, > wait_sems, true); > + if (ret) > + return ret; > + ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, > signal_sems, false); > + if (ret) > + radv_free_sem_info(sem_info); > + > + /* caller can override these */ > + sem_info->cs_emit_wait = true; > + sem_info->cs_emit_signal = true; > + return ret; > +} > + > VkResult radv_QueueSubmit( > VkQueue _queue, > uint32_t submitCount, > @@ -1935,16 +2039,22 @@ VkResult radv_QueueSubmit( > bool do_flush = !i || pSubmits[i].pWaitDstStageMask; > bool can_patch = !do_flush; > uint32_t advance; > + struct radv_winsys_sem_info sem_info; > + > + result = radv_alloc_sem_info(&sem_info, > + pSubmits[i].waitSemaphoreCount, > + pSubmits[i].pWaitSemaphores, > + pSubmits[i].signalSemaphoreCount, > + pSubmits[i].pSignalSemaphores); > + if (result != VK_SUCCESS) > + return result; > > if (!pSubmits[i].commandBufferCount) { > if (pSubmits[i].waitSemaphoreCount || > pSubmits[i].signalSemaphoreCount) { > ret = queue->device->ws->cs_submit(ctx, > queue->queue_idx, > > &queue->device->empty_cs[queue->queue_family_index], > 1, NULL, > NULL, > - (struct > radeon_winsys_sem **)pSubmits[i].pWaitSemaphores, > - > pSubmits[i].waitSemaphoreCount, > - (struct > radeon_winsys_sem **)pSubmits[i].pSignalSemaphores, > - > pSubmits[i].signalSemaphoreCount, > + &sem_info, > false, > base_fence); > if (ret) { > radv_loge("failed to submit CS %d\n", > i); > @@ -1952,6 +2062,7 @@ VkResult radv_QueueSubmit( > } > fence_emitted = true; > } > + radv_free_sem_info(&sem_info); > continue; > } > > @@ -1976,18 +2087,16 @@ VkResult radv_QueueSubmit( > for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + > do_flush; j += advance) { > advance = MIN2(max_cs_submission, > pSubmits[i].commandBufferCount + > do_flush - j); > - bool b = j == 0; > - bool e = j + advance == > pSubmits[i].commandBufferCount + do_flush; > > if (queue->device->trace_bo) > *queue->device->trace_id_ptr = 0; > > + sem_info.cs_emit_wait = j == 0; > + sem_info.cs_emit_signal = j + advance == > pSubmits[i].commandBufferCount + do_flush; > + > ret = queue->device->ws->cs_submit(ctx, > queue->queue_idx, cs_array + j, > advance, > initial_preamble_cs, continue_preamble_cs, > - (struct > radeon_winsys_sem **)pSubmits[i].pWaitSemaphores, > - b ? > pSubmits[i].waitSemaphoreCount : 0, > - (struct > radeon_winsys_sem **)pSubmits[i].pSignalSemaphores, > - e ? > pSubmits[i].signalSemaphoreCount : 0, > + &sem_info, > can_patch, > base_fence); > > if (ret) { > @@ -2008,16 +2117,19 @@ VkResult radv_QueueSubmit( > } > } > } > + > + radv_free_sem_info(&sem_info); > free(cs_array); > } > > if (fence) { > - if (!fence_emitted) > + if (!fence_emitted) { > + struct radv_winsys_sem_info sem_info = {0}; > ret = queue->device->ws->cs_submit(ctx, > queue->queue_idx, > > &queue->device->empty_cs[queue->queue_family_index], > - 1, NULL, NULL, > NULL, 0, NULL, 0, > + 1, NULL, NULL, > &sem_info, > false, base_fence); > - > + } > fence->submitted = true; > } > > @@ -2445,6 +2557,7 @@ radv_sparse_image_opaque_bind_memory(struct radv_device > *device, > bool fence_emitted = false; > > for (uint32_t i = 0; i < bindInfoCount; ++i) { > + struct radv_winsys_sem_info sem_info; > for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) { > radv_sparse_buffer_bind_memory(queue->device, > > pBindInfo[i].pBufferBinds + j); > @@ -2455,19 +2568,28 @@ radv_sparse_image_opaque_bind_memory(struct > radv_device *device, > > pBindInfo[i].pImageOpaqueBinds + j); > } > > + VkResult result; > + result = radv_alloc_sem_info(&sem_info, > + pBindInfo[i].waitSemaphoreCount, > + pBindInfo[i].pWaitSemaphores, > + > pBindInfo[i].signalSemaphoreCount, > + pBindInfo[i].pSignalSemaphores); > + if (result != VK_SUCCESS) > + return result; > + > if (pBindInfo[i].waitSemaphoreCount || > pBindInfo[i].signalSemaphoreCount) { > queue->device->ws->cs_submit(queue->hw_ctx, > queue->queue_idx, > > &queue->device->empty_cs[queue->queue_family_index], > 1, NULL, NULL, > - (struct > radeon_winsys_sem **)pBindInfo[i].pWaitSemaphores, > - > pBindInfo[i].waitSemaphoreCount, > - (struct > radeon_winsys_sem **)pBindInfo[i].pSignalSemaphores, > - > pBindInfo[i].signalSemaphoreCount, > + &sem_info, > false, base_fence); > fence_emitted = true; > if (fence) > fence->submitted = true; > } > + > + radv_free_sem_info(&sem_info); > + > } > > if (fence && !fence_emitted) { > @@ -2604,13 +2726,38 @@ VkResult radv_CreateSemaphore( > VkSemaphore* pSemaphore) > { > RADV_FROM_HANDLE(radv_device, device, _device); > - struct radeon_winsys_sem *sem; > + const VkExportSemaphoreCreateInfoKHR *export = > + vk_find_struct_const(pCreateInfo->pNext, > EXPORT_SEMAPHORE_CREATE_INFO_KHR); > + VkExternalSemaphoreHandleTypeFlagsKHR handleTypes = > + export ? export->handleTypes : 0; > > - sem = device->ws->create_sem(device->ws); > + struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator, > + sizeof(*sem), 8, > + > VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); > if (!sem) > return VK_ERROR_OUT_OF_HOST_MEMORY; > > - *pSemaphore = radeon_winsys_sem_to_handle(sem); > + sem->temp_syncobj = 0; > + /* create a syncobject if we are going to export this semaphore */ > + if (handleTypes) { > + assert (device->physical_device->rad_info.has_syncobj); > + assert (handleTypes == > VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR); > + int ret = device->ws->create_syncobj(device->ws, > &sem->syncobj); > + if (ret) { > + vk_free2(&device->alloc, pAllocator, sem); > + return VK_ERROR_OUT_OF_HOST_MEMORY; > + } > + sem->sem = NULL; > + } else { > + sem->sem = device->ws->create_sem(device->ws); > + if (!sem->sem) { > + vk_free2(&device->alloc, pAllocator, sem); > + return VK_ERROR_OUT_OF_HOST_MEMORY; > + } > + sem->syncobj = 0; > + } > + > + *pSemaphore = radv_semaphore_to_handle(sem); > return VK_SUCCESS; > } > > @@ -2620,11 +2767,15 @@ void radv_DestroySemaphore( > const VkAllocationCallbacks* pAllocator) > { > RADV_FROM_HANDLE(radv_device, device, _device); > - RADV_FROM_HANDLE(radeon_winsys_sem, sem, _semaphore); > + RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore); > if (!_semaphore) > return; > > - device->ws->destroy_sem(sem); > + if (sem->syncobj) > + device->ws->destroy_syncobj(device->ws, sem->syncobj); > + else > + device->ws->destroy_sem(sem->sem); > + vk_free2(&device->alloc, pAllocator, sem); > } > > VkResult radv_CreateEvent( > @@ -3409,3 +3560,56 @@ VkResult radv_GetMemoryFdPropertiesKHR(VkDevice > _device, > */ > return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR; > } > + > +VkResult radv_ImportSemaphoreFdKHR(VkDevice _device, > + const VkImportSemaphoreFdInfoKHR > *pImportSemaphoreFdInfo) > +{ > + RADV_FROM_HANDLE(radv_device, device, _device); > + RADV_FROM_HANDLE(radv_semaphore, sem, > pImportSemaphoreFdInfo->semaphore); > + uint32_t syncobj_handle = 0; > + assert(pImportSemaphoreFdInfo->handleType == > VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR); > + > + int ret = device->ws->import_syncobj(device->ws, > pImportSemaphoreFdInfo->fd, &syncobj_handle); > + if (ret != 0) > + return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR; > + > + if (pImportSemaphoreFdInfo->flags & > VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) { > + sem->temp_syncobj = syncobj_handle; > + } else { > + sem->syncobj = syncobj_handle; > + } > + close(pImportSemaphoreFdInfo->fd); > + return VK_SUCCESS; > +} > + > +VkResult radv_GetSemaphoreFdKHR(VkDevice _device, > + const VkSemaphoreGetFdInfoKHR *pGetFdInfo, > + int *pFd) > +{ > + RADV_FROM_HANDLE(radv_device, device, _device); > + RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore); > + int ret; > + uint32_t syncobj_handle; > + > + assert(pGetFdInfo->handleType == > VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR); > + if (sem->temp_syncobj) > + syncobj_handle = sem->temp_syncobj; > + else > + syncobj_handle = sem->syncobj; > + ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd); > + if (ret) > + return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR); > + return VK_SUCCESS; > +} > + > +void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR( > + VkPhysicalDevice physicalDevice, > + const VkPhysicalDeviceExternalSemaphoreInfoKHR* > pExternalSemaphoreInfo, > + VkExternalSemaphorePropertiesKHR* > pExternalSemaphoreProperties) > +{ > + pExternalSemaphoreProperties->exportFromImportedHandleTypes = > VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; > + pExternalSemaphoreProperties->compatibleHandleTypes = > VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; > + pExternalSemaphoreProperties->externalSemaphoreFeatures = > VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR | > + VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR; > + > +} > diff --git a/src/amd/vulkan/radv_entrypoints_gen.py > b/src/amd/vulkan/radv_entrypoints_gen.py > index 61b2328..9f5a4f3 100644 > --- a/src/amd/vulkan/radv_entrypoints_gen.py > +++ b/src/amd/vulkan/radv_entrypoints_gen.py > @@ -49,6 +49,9 @@ supported_extensions = [ > 'VK_KHR_external_memory_fd', > 'VK_KHR_storage_buffer_storage_class', > 'VK_KHR_variable_pointers', > + 'VK_KHR_external_semaphore_capabilities', > + 'VK_KHR_external_semaphore', > + 'VK_KHR_external_semaphore_fd' > ] > > # We generate a static hash table for entry point lookup > diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h > index 891b34e..8cd5ec0 100644 > --- a/src/amd/vulkan/radv_private.h > +++ b/src/amd/vulkan/radv_private.h > @@ -1470,6 +1470,20 @@ struct radv_query_pool { > uint32_t pipeline_stats_mask; > }; > > +struct radv_semaphore { > + /* use a winsys sem for non-exportable */ > + struct radeon_winsys_sem *sem; > + uint32_t syncobj; > + uint32_t temp_syncobj; > +}; > + > +VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info, > + int num_wait_sems, > + const VkSemaphore *wait_sems, > + int num_signal_sems, > + const VkSemaphore *signal_sems); > +void radv_free_sem_info(struct radv_winsys_sem_info *sem_info); > + > void > radv_update_descriptor_sets(struct radv_device *device, > struct radv_cmd_buffer *cmd_buffer, > @@ -1563,6 +1577,6 @@ RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, > VkQueryPool) > RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_render_pass, VkRenderPass) > RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, VkSampler) > RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_shader_module, VkShaderModule) > -RADV_DEFINE_NONDISP_HANDLE_CASTS(radeon_winsys_sem, VkSemaphore) > +RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_semaphore, VkSemaphore) > > #endif /* RADV_PRIVATE_H */ > diff --git a/src/amd/vulkan/radv_radeon_winsys.h > b/src/amd/vulkan/radv_radeon_winsys.h > index 2f3990c..215ef0b 100644 > --- a/src/amd/vulkan/radv_radeon_winsys.h > +++ b/src/amd/vulkan/radv_radeon_winsys.h > @@ -131,9 +131,23 @@ struct radeon_bo_metadata { > uint32_t metadata[64]; > }; > > +uint32_t syncobj_handle; > struct radeon_winsys_bo; > struct radeon_winsys_fence; > -struct radeon_winsys_sem; > + > +struct radv_winsys_sem_counts { > + uint32_t syncobj_count; > + uint32_t sem_count; > + uint32_t *syncobj; > + struct radeon_winsys_sem **sem; > +}; > + > +struct radv_winsys_sem_info { > + bool cs_emit_signal; > + bool cs_emit_wait; > + struct radv_winsys_sem_counts wait; > + struct radv_winsys_sem_counts signal; > +}; > > struct radeon_winsys { > void (*destroy)(struct radeon_winsys *ws); > @@ -191,10 +205,7 @@ struct radeon_winsys { > unsigned cs_count, > struct radeon_winsys_cs *initial_preamble_cs, > struct radeon_winsys_cs *continue_preamble_cs, > - struct radeon_winsys_sem **wait_sem, > - unsigned wait_sem_count, > - struct radeon_winsys_sem **signal_sem, > - unsigned signal_sem_count, > + struct radv_winsys_sem_info *sem_info, > bool can_patch, > struct radeon_winsys_fence *fence); > > diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c > index ab3dcd6..adc4311 100644 > --- a/src/amd/vulkan/radv_wsi.c > +++ b/src/amd/vulkan/radv_wsi.c > @@ -442,7 +442,6 @@ VkResult radv_AcquireNextImageKHR( > fence->submitted = true; > fence->signalled = true; > } > - > return result; > } > > @@ -452,7 +451,6 @@ VkResult radv_QueuePresentKHR( > { > RADV_FROM_HANDLE(radv_queue, queue, _queue); > VkResult result = VK_SUCCESS; > - > const VkPresentRegionsKHR *regions = > vk_find_struct_const(pPresentInfo->pNext, > PRESENT_REGIONS_KHR); > > @@ -461,6 +459,20 @@ VkResult radv_QueuePresentKHR( > struct radeon_winsys_cs *cs; > const VkPresentRegionKHR *region = NULL; > VkResult item_result; > + struct radv_winsys_sem_info sem_info; > + > + item_result = radv_alloc_sem_info(&sem_info, > + > pPresentInfo->waitSemaphoreCount, > + > pPresentInfo->pWaitSemaphores, > + 0, > + NULL); > + if (pPresentInfo->pResults != NULL) > + pPresentInfo->pResults[i] = item_result; > + result = result == VK_SUCCESS ? item_result : result; > + if (item_result != VK_SUCCESS) { > + radv_free_sem_info(&sem_info); > + continue; > + } > > assert(radv_device_from_handle(swapchain->device) == > queue->device); > if (swapchain->fences[0] == VK_NULL_HANDLE) { > @@ -472,8 +484,10 @@ VkResult radv_QueuePresentKHR( > if (pPresentInfo->pResults != NULL) > pPresentInfo->pResults[i] = item_result; > result = result == VK_SUCCESS ? item_result : result; > - if (item_result != VK_SUCCESS) > + if (item_result != VK_SUCCESS) { > + radv_free_sem_info(&sem_info); > continue; > + } > } else { > radv_ResetFences(radv_device_to_handle(queue->device), > 1, &swapchain->fences[0]); > @@ -487,11 +501,12 @@ VkResult radv_QueuePresentKHR( > RADV_FROM_HANDLE(radv_fence, fence, swapchain->fences[0]); > struct radeon_winsys_fence *base_fence = fence->fence; > struct radeon_winsys_ctx *ctx = queue->hw_ctx; > + > queue->device->ws->cs_submit(ctx, queue->queue_idx, > &cs, > 1, NULL, NULL, > - (struct radeon_winsys_sem > **)pPresentInfo->pWaitSemaphores, > - > pPresentInfo->waitSemaphoreCount, NULL, 0, false, base_fence); > + &sem_info, > + false, base_fence); > fence->submitted = true; > > if (regions && regions->pRegions) > @@ -504,8 +519,10 @@ VkResult radv_QueuePresentKHR( > if (pPresentInfo->pResults != NULL) > pPresentInfo->pResults[i] = item_result; > result = result == VK_SUCCESS ? item_result : result; > - if (item_result != VK_SUCCESS) > + if (item_result != VK_SUCCESS) { > + radv_free_sem_info(&sem_info); > continue; > + } > > VkFence last = swapchain->fences[2]; > swapchain->fences[2] = swapchain->fences[1]; > @@ -517,6 +534,7 @@ VkResult radv_QueuePresentKHR( > 1, &last, true, 1); > } > > + radv_free_sem_info(&sem_info); > } > > return VK_SUCCESS; > diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > index 6ed8f32..bc4d460 100644 > --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > @@ -75,13 +75,6 @@ radv_amdgpu_cs(struct radeon_winsys_cs *base) > return (struct radv_amdgpu_cs*)base; > } > > -struct radv_amdgpu_sem_info { > - int wait_sem_count; > - struct radeon_winsys_sem **wait_sems; > - int signal_sem_count; > - struct radeon_winsys_sem **signal_sems; > -}; > - > static int ring_to_hw_ip(enum ring_type ring) > { > switch (ring) { > @@ -99,10 +92,10 @@ static int ring_to_hw_ip(enum ring_type ring) > static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx *ctx, > uint32_t ip_type, > uint32_t ring, > - struct radv_amdgpu_sem_info *sem_info); > + struct radv_winsys_sem_info *sem_info); > static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, > struct amdgpu_cs_request *request, > - struct radv_amdgpu_sem_info *sem_info); > + struct radv_winsys_sem_info *sem_info); > > static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx, > struct radv_amdgpu_fence *fence, > @@ -662,7 +655,7 @@ static void radv_assign_last_submit(struct > radv_amdgpu_ctx *ctx, > > static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx > *_ctx, > int queue_idx, > - struct radv_amdgpu_sem_info > *sem_info, > + struct radv_winsys_sem_info > *sem_info, > struct radeon_winsys_cs > **cs_array, > unsigned cs_count, > struct radeon_winsys_cs > *initial_preamble_cs, > @@ -740,7 +733,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct > radeon_winsys_ctx *_ctx, > > static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx > *_ctx, > int queue_idx, > - struct radv_amdgpu_sem_info > *sem_info, > + struct radv_winsys_sem_info > *sem_info, > struct radeon_winsys_cs > **cs_array, > unsigned cs_count, > struct radeon_winsys_cs > *initial_preamble_cs, > @@ -752,7 +745,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct > radeon_winsys_ctx *_ctx, > struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence; > amdgpu_bo_list_handle bo_list; > struct amdgpu_cs_request request; > - > + bool emit_signal_sem = sem_info->cs_emit_signal; > assert(cs_count); > > for (unsigned i = 0; i < cs_count;) { > @@ -792,6 +785,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct > radeon_winsys_ctx *_ctx, > } > } > > + sem_info->cs_emit_signal = (i == cs_count - cnt) ? > emit_signal_sem : false; > r = radv_amdgpu_cs_submit(ctx, &request, sem_info); > if (r) { > if (r == -ENOMEM) > @@ -818,7 +812,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct > radeon_winsys_ctx *_ctx, > > static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx > *_ctx, > int queue_idx, > - struct radv_amdgpu_sem_info > *sem_info, > + struct radv_winsys_sem_info > *sem_info, > struct radeon_winsys_cs > **cs_array, > unsigned cs_count, > struct radeon_winsys_cs > *initial_preamble_cs, > @@ -833,6 +827,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct > radeon_winsys_ctx *_ctx, > amdgpu_bo_list_handle bo_list; > struct amdgpu_cs_request request; > uint32_t pad_word = 0xffff1000U; > + bool emit_signal_sem = sem_info->cs_emit_signal; > > if (radv_amdgpu_winsys(ws)->info.chip_class == SI) > pad_word = 0x80000000; > @@ -898,6 +893,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct > radeon_winsys_ctx *_ctx, > request.ibs = &ib; > request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, > queue_idx); > > + sem_info->cs_emit_signal = (i == cs_count - cnt) ? > emit_signal_sem : false; > r = radv_amdgpu_cs_submit(ctx, &request, sem_info); > if (r) { > if (r == -ENOMEM) > @@ -929,35 +925,27 @@ static int radv_amdgpu_winsys_cs_submit(struct > radeon_winsys_ctx *_ctx, > unsigned cs_count, > struct radeon_winsys_cs > *initial_preamble_cs, > struct radeon_winsys_cs > *continue_preamble_cs, > - struct radeon_winsys_sem **wait_sem, > - unsigned wait_sem_count, > - struct radeon_winsys_sem **signal_sem, > - unsigned signal_sem_count, > + struct radv_winsys_sem_info *sem_info, > bool can_patch, > struct radeon_winsys_fence *_fence) > { > struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]); > struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx); > int ret; > - struct radv_amdgpu_sem_info sem_info = {0}; > - > - sem_info.wait_sems = wait_sem; > - sem_info.wait_sem_count = wait_sem_count; > - sem_info.signal_sems = signal_sem; > - sem_info.signal_sem_count = signal_sem_count; > > + assert(sem_info); > if (!cs->ws->use_ib_bos) { > - ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, > &sem_info, cs_array, > + ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, > sem_info, cs_array, > cs_count, > initial_preamble_cs, continue_preamble_cs, _fence); > } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && > cs->ws->batchchain) { > - ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, > &sem_info, cs_array, > + ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, > sem_info, cs_array, > cs_count, > initial_preamble_cs, continue_preamble_cs, _fence); > } else { > - ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, > &sem_info, cs_array, > + ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, > sem_info, cs_array, > cs_count, > initial_preamble_cs, continue_preamble_cs, _fence); > } > > - radv_amdgpu_signal_sems(ctx, cs->hw_ip, queue_idx, &sem_info); > + radv_amdgpu_signal_sems(ctx, cs->hw_ip, queue_idx, sem_info); > return ret; > } > > @@ -1072,10 +1060,10 @@ static void radv_amdgpu_destroy_sem(struct > radeon_winsys_sem *_sem) > static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx *ctx, > uint32_t ip_type, > uint32_t ring, > - struct radv_amdgpu_sem_info *sem_info) > + struct radv_winsys_sem_info *sem_info) > { > - for (unsigned i = 0; i < sem_info->signal_sem_count; i++) { > - struct amdgpu_cs_fence *sem = (struct amdgpu_cs_fence > *)sem_info->signal_sems[i]; > + for (unsigned i = 0; i < sem_info->signal.sem_count; i++) { > + struct amdgpu_cs_fence *sem = (struct amdgpu_cs_fence > *)(sem_info->signal.sem)[i]; > > if (sem->context) > return -EINVAL; > @@ -1085,9 +1073,27 @@ static int radv_amdgpu_signal_sems(struct > radv_amdgpu_ctx *ctx, > return 0; > } > > +static struct drm_amdgpu_cs_chunk_sem > *radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts *counts, > + > struct drm_amdgpu_cs_chunk *chunk, int chunk_id) > +{ > + struct drm_amdgpu_cs_chunk_sem *syncobj = malloc(sizeof(struct > drm_amdgpu_cs_chunk_sem) * counts->syncobj_count); > + if (!syncobj) > + return NULL; > + > + for (unsigned i = 0; i < counts->syncobj_count; i++) { > + struct drm_amdgpu_cs_chunk_sem *sem = &syncobj[i]; > + sem->handle = counts->syncobj[i]; > + } > + > + chunk->chunk_id = chunk_id; > + chunk->length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * > counts->syncobj_count; > + chunk->chunk_data = (uint64_t)(uintptr_t)syncobj; > + return syncobj; > +} > + > static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, > struct amdgpu_cs_request *request, > - struct radv_amdgpu_sem_info *sem_info) > + struct radv_winsys_sem_info *sem_info) > { > int r; > int num_chunks; > @@ -1096,10 +1102,12 @@ static int radv_amdgpu_cs_submit(struct > radv_amdgpu_ctx *ctx, > struct drm_amdgpu_cs_chunk *chunks; > struct drm_amdgpu_cs_chunk_data *chunk_data; > struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL; > + struct drm_amdgpu_cs_chunk_sem *wait_syncobj = NULL, *signal_syncobj > = NULL; > int i; > struct amdgpu_cs_fence *sem; > + > user_fence = (request->fence_info.handle != NULL); > - size = request->number_of_ibs + (user_fence ? 2 : 1) + 1; > + size = request->number_of_ibs + (user_fence ? 2 : 1) + 3; > > chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size); > > @@ -1136,15 +1144,30 @@ static int radv_amdgpu_cs_submit(struct > radv_amdgpu_ctx *ctx, > &chunk_data[i]); > } > > - if (sem_info->wait_sem_count) { > - sem_dependencies = malloc(sizeof(struct > drm_amdgpu_cs_chunk_dep) * sem_info->wait_sem_count); > + if (sem_info->wait.syncobj_count && sem_info->cs_emit_wait) { > + wait_syncobj = > radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->wait, > + > &chunks[num_chunks], > + > AMDGPU_CHUNK_ID_SYNCOBJ_IN); > + if (!wait_syncobj) { > + r = -ENOMEM; > + goto error_out; > + } > + num_chunks++; > + > + if (sem_info->wait.sem_count == 0) > + sem_info->cs_emit_wait = false; > + > + } > + > + if (sem_info->wait.sem_count && sem_info->cs_emit_wait) { > + sem_dependencies = malloc(sizeof(struct > drm_amdgpu_cs_chunk_dep) * sem_info->wait.sem_count); > if (!sem_dependencies) { > r = -ENOMEM; > goto error_out; > } > int sem_count = 0; > - for (unsigned j = 0; j < sem_info->wait_sem_count; j++) { > - sem = (struct amdgpu_cs_fence > *)sem_info->wait_sems[j]; > + for (unsigned j = 0; j < sem_info->wait.sem_count; j++) { > + sem = (struct amdgpu_cs_fence *)sem_info->wait.sem[j]; > if (!sem->context) > continue; > struct drm_amdgpu_cs_chunk_dep *dep = > &sem_dependencies[sem_count++]; > @@ -1158,7 +1181,18 @@ static int radv_amdgpu_cs_submit(struct > radv_amdgpu_ctx *ctx, > chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) > / 4 * sem_count; > chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies; > > - sem_info->wait_sem_count = 0; > + sem_info->cs_emit_wait = false; > + } > + > + if (sem_info->signal.syncobj_count && sem_info->cs_emit_signal) { > + signal_syncobj = > radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->signal, > + > &chunks[num_chunks], > + > AMDGPU_CHUNK_ID_SYNCOBJ_OUT); > + if (!signal_syncobj) { > + r = -ENOMEM; > + goto error_out; > + } > + num_chunks++; > } > > r = amdgpu_cs_submit_raw(ctx->ws->dev, > @@ -1169,6 +1203,8 @@ static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx > *ctx, > &request->seq_no); > error_out: > free(sem_dependencies); > + free(wait_syncobj); > + free(signal_syncobj); > return r; > } > > -- > 2.9.4 > > _______________________________________________ > mesa-dev mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
