Module: Mesa Branch: main Commit: 60a7115b4e904ad171a33154209693e0fecade8f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=60a7115b4e904ad171a33154209693e0fecade8f
Author: Samuel Pitoiset <[email protected]> Date: Fri Aug 26 14:18:33 2022 +0200 radv: cleanup computing the workgroup size for all stages Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Timur Kristóf <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18278> --- src/amd/vulkan/radv_pipeline.c | 35 --------------- src/amd/vulkan/radv_shader_info.c | 90 ++++++++++++++++++++++++++------------- 2 files changed, 60 insertions(+), 65 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 402674a7df7..0905506d267 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -2892,41 +2892,6 @@ radv_fill_shader_info(struct radv_pipeline *pipeline, } radv_nir_shader_info_link(device, pipeline_key, stages, pipeline_has_ngg, last_vgt_api_stage); - - if (stages[MESA_SHADER_TESS_CTRL].nir) { - for (gl_shader_stage s = MESA_SHADER_VERTEX; s <= MESA_SHADER_TESS_CTRL; ++s) { - stages[s].info.workgroup_size = - ac_compute_lshs_workgroup_size(device->physical_device->rad_info.gfx_level, s, - stages[MESA_SHADER_TESS_CTRL].info.num_tess_patches, - pipeline_key->tcs.tess_input_vertices, - stages[MESA_SHADER_TESS_CTRL].info.tcs.tcs_vertices_out); - } - } - - /* PS always operates without workgroups. */ - if (stages[MESA_SHADER_FRAGMENT].nir) - stages[MESA_SHADER_FRAGMENT].info.workgroup_size = stages[MESA_SHADER_FRAGMENT].info.wave_size; - - if (stages[MESA_SHADER_COMPUTE].nir) { - /* Variable workgroup size is not supported by Vulkan. */ - assert(!stages[MESA_SHADER_COMPUTE].nir->info.workgroup_size_variable); - - stages[MESA_SHADER_COMPUTE].info.workgroup_size = - ac_compute_cs_workgroup_size( - stages[MESA_SHADER_COMPUTE].nir->info.workgroup_size, false, UINT32_MAX); - } - - if (stages[MESA_SHADER_TASK].nir) { - stages[MESA_SHADER_TASK].info.workgroup_size = - ac_compute_cs_workgroup_size( - stages[MESA_SHADER_TASK].nir->info.workgroup_size, false, UINT32_MAX); - } - - if (!pipeline_has_ngg && !stages[MESA_SHADER_GEOMETRY].nir) { - gl_shader_stage hw_vs_api_stage = - stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; - stages[hw_vs_api_stage].info.workgroup_size = stages[hw_vs_api_stage].info.wave_size; - } } static void diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index e69b2ce5641..383b4894edd 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -796,6 +796,23 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n info->wave_size = radv_get_wave_size(device, nir->info.stage, info); info->ballot_bit_size = radv_get_ballot_bit_size(device, nir->info.stage, info); + + switch (nir->info.stage) { + case MESA_SHADER_COMPUTE: + case MESA_SHADER_TASK: + info->workgroup_size = + ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX); + break; + case MESA_SHADER_MESH: + /* Already computed in gather_shader_info_mesh(). */ + break; + default: + /* FS always operates without workgroups. Other stages are computed during linking but assume + * no workgroups by default. + */ + info->workgroup_size = info->wave_size; + break; + } } static void @@ -1273,39 +1290,52 @@ radv_link_shaders_info(struct radv_device *device, } } - if (producer->stage == MESA_SHADER_VERTEX && consumer->stage == MESA_SHADER_TESS_CTRL && - !radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) { + if (producer->stage == MESA_SHADER_VERTEX && consumer->stage == MESA_SHADER_TESS_CTRL) { struct radv_pipeline_stage *vs_stage = producer; struct radv_pipeline_stage *tcs_stage = consumer; - /* When the number of TCS input and output vertices are the same (typically 3): - * - There is an equal amount of LS and HS invocations - * - In case of merged LSHS shaders, the LS and HS halves of the shader always process the - * exact same vertex. We can use this knowledge to optimize them. - * - * We don't set tcs_in_out_eq if the float controls differ because that might involve - * different float modes for the same block and our optimizer doesn't handle a instruction - * dominating another with a different mode. - */ - vs_stage->info.vs.tcs_in_out_eq = - device->physical_device->rad_info.gfx_level >= GFX9 && - pipeline_key->tcs.tess_input_vertices == tcs_stage->info.tcs.tcs_vertices_out && - vs_stage->nir->info.float_controls_execution_mode == - tcs_stage->nir->info.float_controls_execution_mode; - - if (vs_stage->info.vs.tcs_in_out_eq) - vs_stage->info.vs.tcs_temp_only_input_mask = - tcs_stage->nir->info.inputs_read & - vs_stage->nir->info.outputs_written & - ~tcs_stage->nir->info.tess.tcs_cross_invocation_inputs_read & - ~tcs_stage->nir->info.inputs_read_indirectly & - ~vs_stage->nir->info.outputs_accessed_indirectly; - - /* Copy data to TCS so it can be accessed by the backend if they are merged. */ - tcs_stage->info.vs.tcs_in_out_eq = - vs_stage->info.vs.tcs_in_out_eq; - tcs_stage->info.vs.tcs_temp_only_input_mask = - vs_stage->info.vs.tcs_temp_only_input_mask; + vs_stage->info.workgroup_size = + ac_compute_lshs_workgroup_size(device->physical_device->rad_info.gfx_level, + MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches, + pipeline_key->tcs.tess_input_vertices, + tcs_stage->info.tcs.tcs_vertices_out); + + tcs_stage->info.workgroup_size = + ac_compute_lshs_workgroup_size(device->physical_device->rad_info.gfx_level, + MESA_SHADER_TESS_CTRL, tcs_stage->info.num_tess_patches, + pipeline_key->tcs.tess_input_vertices, + tcs_stage->info.tcs.tcs_vertices_out); + + if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) { + /* When the number of TCS input and output vertices are the same (typically 3): + * - There is an equal amount of LS and HS invocations + * - In case of merged LSHS shaders, the LS and HS halves of the shader always process the + * exact same vertex. We can use this knowledge to optimize them. + * + * We don't set tcs_in_out_eq if the float controls differ because that might involve + * different float modes for the same block and our optimizer doesn't handle a instruction + * dominating another with a different mode. + */ + vs_stage->info.vs.tcs_in_out_eq = + device->physical_device->rad_info.gfx_level >= GFX9 && + pipeline_key->tcs.tess_input_vertices == tcs_stage->info.tcs.tcs_vertices_out && + vs_stage->nir->info.float_controls_execution_mode == + tcs_stage->nir->info.float_controls_execution_mode; + + if (vs_stage->info.vs.tcs_in_out_eq) + vs_stage->info.vs.tcs_temp_only_input_mask = + tcs_stage->nir->info.inputs_read & + vs_stage->nir->info.outputs_written & + ~tcs_stage->nir->info.tess.tcs_cross_invocation_inputs_read & + ~tcs_stage->nir->info.inputs_read_indirectly & + ~vs_stage->nir->info.outputs_accessed_indirectly; + + /* Copy data to TCS so it can be accessed by the backend if they are merged. */ + tcs_stage->info.vs.tcs_in_out_eq = + vs_stage->info.vs.tcs_in_out_eq; + tcs_stage->info.vs.tcs_temp_only_input_mask = + vs_stage->info.vs.tcs_temp_only_input_mask; + } } /* Copy shader info between TCS<->TES. */
