Module: Mesa Branch: main Commit: 55e5c4e089a405016199964fa1ec3ce2670ac7b0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=55e5c4e089a405016199964fa1ec3ce2670ac7b0
Author: Timur Kristóf <timur.kris...@gmail.com> Date: Fri Oct 13 23:21:52 2023 +0200 radv: Expose transfer queues, hidden behind a perftest flag. This is highly experimental and only recommended for users who know what they are doing. To fully support the spec we are going to need gang submissions which are going to be implemented later. Signed-off-by: Timur Kristóf <timur.kris...@gmail.com> Reviewed-by: Tatsuyuki Ishi <ishitatsuy...@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26913> --- docs/envvars.rst | 4 +++- src/amd/vulkan/radv_debug.h | 1 + src/amd/vulkan/radv_instance.c | 1 + src/amd/vulkan/radv_physical_device.c | 41 ++++++++++++++++++++++++++++++----- 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/docs/envvars.rst b/docs/envvars.rst index 084e37fa1fd..d466755d0f0 100644 --- a/docs/envvars.rst +++ b/docs/envvars.rst @@ -995,7 +995,7 @@ Clover environment variables allows specifying additional linker options. Specified options are appended after the options set by the OpenCL program in ``clLinkProgram``. - + .. _rusticl-env-var: .. envvar:: IRIS_ENABLE_CLOVER @@ -1339,6 +1339,8 @@ RADV driver environment variables enable optimizations to move more driver internal objects to VRAM. ``rtwave64`` enable wave64 for ray tracing shaders (GFX10+) + ``transfer_queue`` + enable experimental transfer queue support (GFX9+, not yet spec compliant) ``video_decode`` enable experimental video decoding support ``gsfastlaunch2`` diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h index d28c2de8043..73ccead2a67 100644 --- a/src/amd/vulkan/radv_debug.h +++ b/src/amd/vulkan/radv_debug.h @@ -89,6 +89,7 @@ enum { RADV_PERFTEST_VIDEO_DECODE = 1u << 11, RADV_PERFTEST_DMA_SHADERS = 1u << 12, RADV_PERFTEST_GS_FAST_LAUNCH_2 = 1u << 13, + RADV_PERFTEST_TRANSFER_QUEUE = 1u << 14, }; bool radv_init_trace(struct radv_device *device); diff --git a/src/amd/vulkan/radv_instance.c b/src/amd/vulkan/radv_instance.c index 762b4b3383d..c0877f53ce1 100644 --- a/src/amd/vulkan/radv_instance.c +++ b/src/amd/vulkan/radv_instance.c @@ -100,6 +100,7 @@ static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_P {"video_decode", RADV_PERFTEST_VIDEO_DECODE}, {"dmashaders", RADV_PERFTEST_DMA_SHADERS}, {"gsfastlaunch2", RADV_PERFTEST_GS_FAST_LAUNCH_2}, + {"transfer_queue", RADV_PERFTEST_TRANSFER_QUEUE}, {NULL, 0}}; const char * diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index fe3ec275ef5..b797bc61eb6 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -71,6 +71,17 @@ radv_taskmesh_enabled(const struct radv_physical_device *pdevice) !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE) && pdevice->rad_info.has_gang_submit; } +static bool +radv_transfer_queue_enabled(const struct radv_physical_device *pdevice) +{ + /* Check if the GPU has SDMA support and transfer queues are allowed. */ + if (pdevice->rad_info.sdma_ip_version == SDMA_UNKNOWN || !pdevice->rad_info.ip[AMD_IP_SDMA].num_queues || + !(pdevice->instance->perftest_flags & RADV_PERFTEST_TRANSFER_QUEUE)) + return false; + + return pdevice->rad_info.gfx_level >= GFX9; +} + static bool radv_vrs_attachment_enabled(const struct radv_physical_device *pdevice) { @@ -199,6 +210,11 @@ radv_physical_device_init_queue_table(struct radv_physical_device *pdevice) } } + if (radv_transfer_queue_enabled(pdevice)) { + pdevice->vk_queue_to_radv[idx] = RADV_QUEUE_TRANSFER; + idx++; + } + pdevice->vk_queue_to_radv[idx++] = RADV_QUEUE_SPARSE; pdevice->num_queues = idx; @@ -2119,6 +2135,10 @@ radv_get_physical_device_queue_family_properties(struct radv_physical_device *pd num_queue_families++; } + if (radv_transfer_queue_enabled(pdevice)) { + num_queue_families++; + } + if (pQueueFamilyProperties == NULL) { *pCount = num_queue_families; return; @@ -2171,6 +2191,18 @@ radv_get_physical_device_queue_family_properties(struct radv_physical_device *pd } } + if (radv_transfer_queue_enabled(pdevice)) { + if (*pCount > idx) { + *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){ + .queueFlags = VK_QUEUE_TRANSFER_BIT, + .queueCount = pdevice->rad_info.ip[AMD_IP_SDMA].num_queues, + .timestampValidBits = 64, + .minImageTransferGranularity = (VkExtent3D){16, 16, 8}, + }; + idx++; + } + } + if (*pCount > idx) { *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){ .queueFlags = VK_QUEUE_SPARSE_BINDING_BIT, @@ -2201,13 +2233,12 @@ radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, ui return; } VkQueueFamilyProperties *properties[] = { - &pQueueFamilyProperties[0].queueFamilyProperties, - &pQueueFamilyProperties[1].queueFamilyProperties, - &pQueueFamilyProperties[2].queueFamilyProperties, - &pQueueFamilyProperties[3].queueFamilyProperties, + &pQueueFamilyProperties[0].queueFamilyProperties, &pQueueFamilyProperties[1].queueFamilyProperties, + &pQueueFamilyProperties[2].queueFamilyProperties, &pQueueFamilyProperties[3].queueFamilyProperties, + &pQueueFamilyProperties[4].queueFamilyProperties, }; radv_get_physical_device_queue_family_properties(pdevice, pCount, properties); - assert(*pCount <= 4); + assert(*pCount <= 5); for (uint32_t i = 0; i < *pCount; i++) { vk_foreach_struct (ext, pQueueFamilyProperties[i].pNext) {