Module: Mesa Branch: main Commit: 3fe8f881244601b01a9a05251cc6b3c152ae573d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3fe8f881244601b01a9a05251cc6b3c152ae573d
Author: Qiang Yu <[email protected]> Date: Tue Jul 26 13:57:45 2022 +0800 ac/nir/ngg: support multi stream per output slot for gs radeonsi may pack multi stream output to same slot. Reviewed-by: Timur Kristóf <[email protected]> Signed-off-by: Qiang Yu <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17654> --- src/amd/common/ac_nir_lower_ngg.c | 45 +++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index 91b60855a24..c7af6517a53 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -88,10 +88,10 @@ typedef struct { /* store output base (driver location) */ uint8_t base; + /* output stream index, 2 bit per component */ + uint8_t stream; /* Bitmask of components used: 4 bits per slot, 1 bit per component. */ uint8_t components_mask : 4; - /* output stream index */ - uint8_t stream : 2; } gs_output_info; typedef struct @@ -2139,13 +2139,19 @@ lower_ngg_gs_store_output(nir_builder *b, nir_intrinsic_instr *intrin, lower_ngg if (!(b->shader->info.gs.active_stream_mask & (1 << stream))) continue; - /* The same output should always belong to the same stream and base. */ - assert(!info->components_mask || (info->stream == stream && info->base == base_index)); + unsigned component = component_offset + comp; + + /* The same output should always belong to the same base. */ + assert(!info->components_mask || info->base == base_index); + /* The same output component should always belong to the same stream. */ + assert(!(info->components_mask & (1 << component)) || + ((info->stream >> (component * 2)) & 3) == stream); + info->base = base_index; - info->stream = stream; - info->components_mask |= BITFIELD_BIT(component_offset + comp); + /* Components of the same output slot may belong to different streams. */ + info->stream |= stream << (component * 2); + info->components_mask |= BITFIELD_BIT(component); - unsigned component = component_offset + comp; nir_variable *var = s->output_vars[location][component]; if (!var) { var = nir_local_variable_create( @@ -2161,6 +2167,22 @@ lower_ngg_gs_store_output(nir_builder *b, nir_intrinsic_instr *intrin, lower_ngg return true; } +static unsigned +gs_output_component_mask_with_stream(gs_output_info *info, unsigned stream) +{ + unsigned mask = info->components_mask; + if (!mask) + return 0; + + /* clear component when not requested stream */ + for (int i = 0; i < 4; i++) { + if (((info->stream >> (i * 2)) & 3) != stream) + mask &= ~(1 << i); + } + + return mask; +} + static bool lower_ngg_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *intrin, lower_ngg_gs_state *s) { @@ -2179,10 +2201,11 @@ lower_ngg_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *intri for (unsigned slot = 0; slot < VARYING_SLOT_MAX; ++slot) { unsigned packed_location = util_bitcount64((b->shader->info.outputs_written & BITFIELD64_MASK(slot))); gs_output_info *info = &s->output_info[slot]; - if (info->stream != stream || !info->components_mask) + + unsigned mask = gs_output_component_mask_with_stream(info, stream); + if (!mask) continue; - unsigned mask = info->components_mask; while (mask) { int start, count; u_bit_scan_consecutive_range(&mask, &start, &count); @@ -2356,13 +2379,13 @@ ngg_gs_export_vertices(nir_builder *b, nir_ssa_def *max_num_out_vtx, nir_ssa_def continue; gs_output_info *info = &s->output_info[slot]; - if (!info->components_mask || info->stream != 0) + unsigned mask = gs_output_component_mask_with_stream(info, 0); + if (!mask) continue; unsigned packed_location = util_bitcount64((b->shader->info.outputs_written & BITFIELD64_MASK(slot))); nir_io_semantics io_sem = { .location = slot, .num_slots = 1 }; - unsigned mask = info->components_mask; while (mask) { int start, count; u_bit_scan_consecutive_range(&mask, &start, &count);
