Module: Mesa Branch: main Commit: 8ddd89ffa561456418550c57203ff035668da2c3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8ddd89ffa561456418550c57203ff035668da2c3
Author: Alyssa Rosenzweig <aly...@rosenzweig.io> Date: Sun Nov 26 21:14:47 2023 -0400 nir,zink: Redefine flat_mask in terms of I/O locations Robust against separable shaders, and still makes sense for lowered I/O drivers, whereas just counting FS variables and expecting them to match with the VS is... questionable. Signed-off-by: Alyssa Rosenzweig <aly...@rosenzweig.io> Signed-off-by: antonino <antonino.manisca...@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26888> --- src/compiler/nir/nir_intrinsics.py | 8 ++++---- src/compiler/nir/nir_passthrough_gs.c | 4 ++-- src/gallium/drivers/zink/zink_compiler.c | 26 +++++++++++++++++++------- src/gallium/drivers/zink/zink_compiler.h | 2 +- src/gallium/drivers/zink/zink_program.c | 4 +++- src/gallium/drivers/zink/zink_types.h | 2 +- 6 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index fcbb11ca182..60a7fa3732b 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1210,10 +1210,10 @@ load("mesh_view_indices", [1], [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER]) load("preamble", [], indices=[BASE], flags=[CAN_ELIMINATE, CAN_REORDER]) store("preamble", [], indices=[BASE]) -# A 32 bits bitfield storing 1 in bits corresponding to varyings -# that have the flat interpolation specifier in the fragment shader -# and 0 otherwise -system_value("flat_mask", 1) +# A 64-bit bitfield indexed by I/O location storing 1 in bits corresponding to +# varyings that have the flat interpolation specifier in the fragment shader and +# 0 otherwise +system_value("flat_mask", 1, bit_sizes=[64]) # Whether provoking vertex mode is last system_value("provoking_last", 1) diff --git a/src/compiler/nir/nir_passthrough_gs.c b/src/compiler/nir/nir_passthrough_gs.c index e0c5cf03d4d..cf4edf8e2aa 100644 --- a/src/compiler/nir/nir_passthrough_gs.c +++ b/src/compiler/nir/nir_passthrough_gs.c @@ -226,7 +226,7 @@ nir_create_passthrough_gs(const nir_shader_compiler_options *options, for (unsigned i = start_vert; i < end_vert || needs_closing; i += vert_step) { int idx = i < end_vert ? i : start_vert; /* Copy inputs to outputs. */ - for (unsigned j = 0, oj = 0, of = 0; j < num_inputs; ++j) { + for (unsigned j = 0, oj = 0; j < num_inputs; ++j) { if (in_vars[j]->data.location == VARYING_SLOT_EDGE) { continue; } @@ -235,7 +235,7 @@ nir_create_passthrough_gs(const nir_shader_compiler_options *options, if (in_vars[j]->data.location == VARYING_SLOT_POS || !handle_flat) index = nir_imm_int(&b, idx); else { - unsigned mask = 1u << (of++); + uint64_t mask = BITFIELD64_BIT(in_vars[j]->data.location); index = nir_bcsel(&b, nir_ieq_imm(&b, nir_iand_imm(&b, flat_interp_mask_def, mask), 0), nir_imm_int(&b, idx), pv_vert_index); } nir_deref_instr *value = nir_build_deref_array(&b, nir_build_deref_var(&b, in_vars[j]), index); diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 630dbb4b78c..c13106f516a 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -1165,10 +1165,22 @@ lower_system_values_to_inlined_uniforms_instr(nir_builder *b, } b->cursor = nir_before_instr(&intrin->instr); - nir_def *new_dest_def = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0), - nir_imm_int(b, inlined_uniform_offset), - .align_mul = 4, .align_offset = 0, - .range_base = 0, .range = ~0); + assert(intrin->def.bit_size == 32 || intrin->def.bit_size == 64); + /* nir_inline_uniforms can't handle bit_size != 32 (it will silently ignore + * anything with a different bit_size) so we need to split the load. */ + int num_dwords = intrin->def.bit_size / 32; + nir_def *dwords[2] = {NULL}; + for (unsigned i = 0; i < num_dwords; i++) + dwords[i] = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0), + nir_imm_int(b, inlined_uniform_offset + i), + .align_mul = intrin->def.bit_size / 8, + .align_offset = 0, + .range_base = 0, .range = ~0); + nir_def *new_dest_def; + if (intrin->def.bit_size == 32) + new_dest_def = dwords[0]; + else + new_dest_def = nir_pack_64_2x32_split(b, dwords[0], dwords[1]); nir_def_rewrite_uses(&intrin->def, new_dest_def); nir_instr_remove(&intrin->instr); return true; @@ -4943,13 +4955,13 @@ fixup_io_locations(nir_shader *nir) return true; } -static uint32_t +static uint64_t zink_flat_flags(struct nir_shader *shader) { - uint32_t flat_flags = 0, c = 0; + uint64_t flat_flags = 0; nir_foreach_shader_in_variable(var, shader) { if (var->data.interpolation == INTERP_MODE_FLAT) - flat_flags |= 1u << (c++); + flat_flags |= BITFIELD64_BIT(var->data.location); } return flat_flags; diff --git a/src/gallium/drivers/zink/zink_compiler.h b/src/gallium/drivers/zink/zink_compiler.h index 1319193f83c..e901ee45f7b 100644 --- a/src/gallium/drivers/zink/zink_compiler.h +++ b/src/gallium/drivers/zink/zink_compiler.h @@ -31,7 +31,7 @@ #define ZINK_WORKGROUP_SIZE_Z 3 #define ZINK_VARIABLE_SHARED_MEM 4 #define ZINK_INLINE_VAL_FLAT_MASK 0 -#define ZINK_INLINE_VAL_PV_LAST_VERT 1 +#define ZINK_INLINE_VAL_PV_LAST_VERT 2 /* stop inlining shaders if they have >limit ssa vals after inlining: * recompile time isn't worth the inline diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c index e50ad9c9c91..0832772c038 100644 --- a/src/gallium/drivers/zink/zink_program.c +++ b/src/gallium/drivers/zink/zink_program.c @@ -2512,6 +2512,7 @@ zink_set_primitive_emulation_keys(struct zink_context *ctx) zink_lower_system_values_to_inlined_uniforms(nir); zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK); + zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK+1); zink_add_inline_uniform(nir, ZINK_INLINE_VAL_PV_LAST_VERT); ralloc_free(prev_stage); struct zink_shader *shader = zink_shader_create(screen, nir); @@ -2528,8 +2529,9 @@ zink_set_primitive_emulation_keys(struct zink_context *ctx) ctx->is_generated_gs_bound = true; } - ctx->base.set_inlinable_constants(&ctx->base, MESA_SHADER_GEOMETRY, 2, + ctx->base.set_inlinable_constants(&ctx->base, MESA_SHADER_GEOMETRY, 3, (uint32_t []){ctx->gfx_stages[MESA_SHADER_FRAGMENT]->flat_flags, + ctx->gfx_stages[MESA_SHADER_FRAGMENT]->flat_flags >> 32, ctx->gfx_pipeline_state.dyn_state3.pv_last}); } else if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] && ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated) diff --git a/src/gallium/drivers/zink/zink_types.h b/src/gallium/drivers/zink/zink_types.h index 9ed132ddebc..41eaa21215c 100644 --- a/src/gallium/drivers/zink/zink_types.h +++ b/src/gallium/drivers/zink/zink_types.h @@ -817,7 +817,7 @@ struct zink_shader { unsigned num_texel_buffers; uint32_t ubos_used; // bitfield of which ubo indices are used uint32_t ssbos_used; // bitfield of which ssbo indices are used - uint32_t flat_flags; + uint64_t flat_flags; bool bindless; bool can_inline; bool has_uniforms;