Module: Mesa Branch: main Commit: 725fc0ec03c135acd3e37649955abea0b2eabe11 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=725fc0ec03c135acd3e37649955abea0b2eabe11
Author: Samuel Pitoiset <samuel.pitoi...@gmail.com> Date: Wed Nov 29 16:43:04 2023 +0100 radv: switch to on-demand PS epilogs for GPL RADV currently has two paths for PS epilogs: - the first one is mostly used by GPL to compile fragment shader epilogs as part of the graphics pipeline. It's supposed to be optimal because fragment shader epilogs are compiled in the pipeline and eventually cached. - the second one (the "on-demand" path) is required when some dynamic states are used because otherwise it's just impossible to compile the fragment shader. These epilogs are compiled during cmdbuf recording when all needed info are known, they are also cached in memory. This is the main path for Zink. Having two different paths isn't ideal for maintenance but there is another problem. On RDNA3, alpha to coverage needs to be exported as part of MRTZ when either depth/stencil/samplemask are exported. The problem being that with GPL, the PSO multisample state can be NULL when the frag shader lib is created, which means that we can't know if atc needs to be exported or not, even if it's static. The solution seems to to always use on-demand fragment shader epilogs for GPL on RDNA3. So far, I think that switching to on-demand PS epilogs unconditionally for GPL shouldn't hurt performance and that will simplify a lot of things. Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26398> --- src/amd/vulkan/radv_device.c | 4 +++- src/amd/vulkan/radv_pipeline_graphics.c | 27 +++++++++++++++++++-------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 5bda530d369..ad347e309f9 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -823,8 +823,10 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_FEATURES_EXT: { const VkPhysicalDeviceGraphicsPipelineLibraryFeaturesEXT *features = (const void *)ext; - if (features->graphicsPipelineLibrary) + if (features->graphicsPipelineLibrary) { vs_prologs = true; + ps_epilogs = true; + } break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT: { diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index bbc94d9d3d7..2e2fa477d95 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -261,21 +261,32 @@ radv_format_meta_fs_key(struct radv_device *device, VkFormat format) } static bool -radv_pipeline_needs_dynamic_ps_epilog(const struct radv_graphics_pipeline *pipeline) +radv_pipeline_needs_dynamic_ps_epilog(const struct radv_graphics_pipeline *pipeline, + VkGraphicsPipelineLibraryFlagBitsEXT lib_flags) { + /* Use a PS epilog when the fragment shader is compiled without the fragment output interface. */ + if ((pipeline->active_stages & VK_SHADER_STAGE_FRAGMENT_BIT) && + (lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) && + !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) + return true; + /* These dynamic states need to compile PS epilogs on-demand. */ - return !!(pipeline->dynamic_states & (RADV_DYNAMIC_COLOR_BLEND_ENABLE | RADV_DYNAMIC_COLOR_WRITE_MASK | - RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE | RADV_DYNAMIC_COLOR_BLEND_EQUATION)); + if (pipeline->dynamic_states & (RADV_DYNAMIC_COLOR_BLEND_ENABLE | RADV_DYNAMIC_COLOR_WRITE_MASK | + RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE | RADV_DYNAMIC_COLOR_BLEND_EQUATION)) + return true; + + return false; } static struct radv_blend_state -radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state) +radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state, + VkGraphicsPipelineLibraryFlagBitsEXT lib_flags) { const struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; struct radv_blend_state blend = {0}; unsigned spi_shader_col_format = 0; - if (radv_pipeline_needs_dynamic_ps_epilog(pipeline)) + if (radv_pipeline_needs_dynamic_ps_epilog(pipeline, lib_flags)) return blend; if (ps) { @@ -1920,7 +1931,7 @@ radv_generate_graphics_pipeline_key(const struct radv_device *device, const stru if (device->primitives_generated_query) key.primitives_generated_query = true; - if (radv_pipeline_needs_dynamic_ps_epilog(pipeline)) + if (radv_pipeline_needs_dynamic_ps_epilog(pipeline, lib_flags)) key.ps.dynamic_ps_epilog = true; /* The fragment shader needs an epilog when both: @@ -2450,7 +2461,7 @@ radv_skip_graphics_pipeline_compile(const struct radv_device *device, const stru return false; /* Do not skip when the PS epilog needs to be compiled. */ - if (!radv_pipeline_needs_dynamic_ps_epilog(pipeline) && pipeline->base.shaders[MESA_SHADER_FRAGMENT] && + if (!radv_pipeline_needs_dynamic_ps_epilog(pipeline, lib_flags) && pipeline->base.shaders[MESA_SHADER_FRAGMENT] && pipeline->base.shaders[MESA_SHADER_FRAGMENT]->info.has_epilog && !pipeline->ps_epilog) return false; @@ -3991,7 +4002,7 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv radv_pipeline_init_input_assembly_state(device, pipeline); radv_pipeline_init_dynamic_state(device, pipeline, &state, pCreateInfo); - struct radv_blend_state blend = radv_pipeline_init_blend_state(pipeline, &state); + struct radv_blend_state blend = radv_pipeline_init_blend_state(pipeline, &state, needed_lib_flags); /* Copy the non-compacted SPI_SHADER_COL_FORMAT which is used to emit RBPLUS state. */ pipeline->col_format_non_compacted = blend.spi_shader_col_format;