Module: Mesa Branch: main Commit: 773d35d25e589cbd1c786559140dee42c5fe23a2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=773d35d25e589cbd1c786559140dee42c5fe23a2
Author: Rhys Perry <[email protected]> Date: Thu Oct 19 19:27:07 2023 +0100 ac/nir: implement mesh shader multi-row export Unlike AMDVLK, this has separate loops for attribute stores and exports, so that the stores from different rows can overlap. Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Timur Kristóf <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25040> --- src/amd/common/ac_nir_lower_ngg.c | 50 +++++++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index deedb4c08e5..eaa675bf13e 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -198,6 +198,8 @@ typedef struct { enum amd_gfx_level gfx_level; bool fast_launch_2; + bool vert_multirow_export; + bool prim_multirow_export; ms_out_mem_layout layout; uint64_t per_vertex_outputs; @@ -4502,12 +4504,48 @@ emit_ms_outputs(nir_builder *b, nir_def *invocation_index, nir_def *row_start, uint64_t, lower_ngg_ms_state *), lower_ngg_ms_state *s) { - nir_def *has_output = nir_ilt(b, invocation_index, count); - nir_if *if_has_output = nir_push_if(b, has_output); - { - cb(b, invocation_index, row_start, exports, parameters, mask, s); + if (cb == &emit_ms_primitive ? s->prim_multirow_export : s->vert_multirow_export) { + assert(s->hw_workgroup_size % s->wave_size == 0); + const unsigned num_waves = s->hw_workgroup_size / s->wave_size; + + nir_loop *row_loop = nir_push_loop(b); + { + nir_block *preheader = nir_cf_node_as_block(nir_cf_node_prev(&row_loop->cf_node)); + + nir_phi_instr *index = nir_phi_instr_create(b->shader); + nir_phi_instr *row = nir_phi_instr_create(b->shader); + nir_def_init(&index->instr, &index->def, 1, 32); + nir_def_init(&row->instr, &row->def, 1, 32); + + nir_phi_instr_add_src(index, preheader, invocation_index); + nir_phi_instr_add_src(row, preheader, row_start); + + nir_if *if_break = nir_push_if(b, nir_uge(b, &index->def, count)); + { + nir_jump(b, nir_jump_break); + } + nir_pop_if(b, if_break); + + cb(b, &index->def, &row->def, exports, parameters, mask, s); + + nir_block *body = nir_cursor_current_block(b->cursor); + nir_phi_instr_add_src(index, body, + nir_iadd_imm(b, &index->def, s->hw_workgroup_size)); + nir_phi_instr_add_src(row, body, + nir_iadd_imm(b, &row->def, num_waves)); + + nir_instr_insert_before_cf_list(&row_loop->body, &row->instr); + nir_instr_insert_before_cf_list(&row_loop->body, &index->instr); + } + nir_pop_loop(b, row_loop); + } else { + nir_def *has_output = nir_ilt(b, invocation_index, count); + nir_if *if_has_output = nir_push_if(b, has_output); + { + cb(b, invocation_index, row_start, exports, parameters, mask, s); + } + nir_pop_if(b, if_has_output); } - nir_pop_if(b, if_has_output); } static void @@ -4920,6 +4958,8 @@ ac_nir_lower_ngg_ms(nir_shader *shader, .uses_cull_flags = uses_cull, .gfx_level = gfx_level, .fast_launch_2 = fast_launch_2, + .vert_multirow_export = fast_launch_2 && max_vertices > hw_workgroup_size, + .prim_multirow_export = fast_launch_2 && max_primitives > hw_workgroup_size, .clipdist_enable_mask = clipdist_enable_mask, .vs_output_param_offset = vs_output_param_offset, .has_param_exports = has_param_exports,
