Module: Mesa
Branch: main
Commit: 8ddd89ffa561456418550c57203ff035668da2c3
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8ddd89ffa561456418550c57203ff035668da2c3

Author: Alyssa Rosenzweig <aly...@rosenzweig.io>
Date:   Sun Nov 26 21:14:47 2023 -0400

nir,zink: Redefine flat_mask in terms of I/O locations

Robust against separable shaders, and still makes sense for lowered I/O drivers,
whereas just counting FS variables and expecting them to match with the VS is...
questionable.

Signed-off-by: Alyssa Rosenzweig <aly...@rosenzweig.io>
Signed-off-by: antonino <antonino.manisca...@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26888>

---

 src/compiler/nir/nir_intrinsics.py       |  8 ++++----
 src/compiler/nir/nir_passthrough_gs.c    |  4 ++--
 src/gallium/drivers/zink/zink_compiler.c | 26 +++++++++++++++++++-------
 src/gallium/drivers/zink/zink_compiler.h |  2 +-
 src/gallium/drivers/zink/zink_program.c  |  4 +++-
 src/gallium/drivers/zink/zink_types.h    |  2 +-
 6 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/src/compiler/nir/nir_intrinsics.py 
b/src/compiler/nir/nir_intrinsics.py
index fcbb11ca182..60a7fa3732b 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -1210,10 +1210,10 @@ load("mesh_view_indices", [1], [BASE, RANGE], 
[CAN_ELIMINATE, CAN_REORDER])
 load("preamble", [], indices=[BASE], flags=[CAN_ELIMINATE, CAN_REORDER])
 store("preamble", [], indices=[BASE])
 
-# A 32 bits bitfield storing 1 in bits corresponding to varyings
-# that have the flat interpolation specifier in the fragment shader
-# and 0 otherwise
-system_value("flat_mask", 1)
+# A 64-bit bitfield indexed by I/O location storing 1 in bits corresponding to
+# varyings that have the flat interpolation specifier in the fragment shader 
and
+# 0 otherwise
+system_value("flat_mask", 1, bit_sizes=[64])
 
 # Whether provoking vertex mode is last
 system_value("provoking_last", 1)
diff --git a/src/compiler/nir/nir_passthrough_gs.c 
b/src/compiler/nir/nir_passthrough_gs.c
index e0c5cf03d4d..cf4edf8e2aa 100644
--- a/src/compiler/nir/nir_passthrough_gs.c
+++ b/src/compiler/nir/nir_passthrough_gs.c
@@ -226,7 +226,7 @@ nir_create_passthrough_gs(const nir_shader_compiler_options 
*options,
    for (unsigned i = start_vert; i < end_vert || needs_closing; i += 
vert_step) {
       int idx = i < end_vert ? i : start_vert;
       /* Copy inputs to outputs. */
-      for (unsigned j = 0, oj = 0, of = 0; j < num_inputs; ++j) {
+      for (unsigned j = 0, oj = 0; j < num_inputs; ++j) {
          if (in_vars[j]->data.location == VARYING_SLOT_EDGE) {
             continue;
          }
@@ -235,7 +235,7 @@ nir_create_passthrough_gs(const nir_shader_compiler_options 
*options,
          if (in_vars[j]->data.location == VARYING_SLOT_POS || !handle_flat)
             index = nir_imm_int(&b, idx);
          else {
-            unsigned mask = 1u << (of++);
+            uint64_t mask = BITFIELD64_BIT(in_vars[j]->data.location);
             index = nir_bcsel(&b, nir_ieq_imm(&b, nir_iand_imm(&b, 
flat_interp_mask_def, mask), 0), nir_imm_int(&b, idx), pv_vert_index);
          }
          nir_deref_instr *value = nir_build_deref_array(&b, 
nir_build_deref_var(&b, in_vars[j]), index);
diff --git a/src/gallium/drivers/zink/zink_compiler.c 
b/src/gallium/drivers/zink/zink_compiler.c
index 630dbb4b78c..c13106f516a 100644
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@@ -1165,10 +1165,22 @@ 
lower_system_values_to_inlined_uniforms_instr(nir_builder *b,
    }
 
    b->cursor = nir_before_instr(&intrin->instr);
-   nir_def *new_dest_def = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0),
-                                            nir_imm_int(b, 
inlined_uniform_offset),
-                                            .align_mul = 4, .align_offset = 0,
-                                            .range_base = 0, .range = ~0);
+   assert(intrin->def.bit_size == 32 || intrin->def.bit_size == 64);
+   /* nir_inline_uniforms can't handle bit_size != 32 (it will silently ignore
+    * anything with a different bit_size) so we need to split the load. */
+   int num_dwords = intrin->def.bit_size / 32;
+   nir_def *dwords[2] = {NULL};
+   for (unsigned i = 0; i < num_dwords; i++)
+      dwords[i] = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0),
+                                   nir_imm_int(b, inlined_uniform_offset + i),
+                                   .align_mul = intrin->def.bit_size / 8,
+                                   .align_offset = 0,
+                                   .range_base = 0, .range = ~0);
+   nir_def *new_dest_def;
+   if (intrin->def.bit_size == 32)
+      new_dest_def = dwords[0];
+   else
+      new_dest_def = nir_pack_64_2x32_split(b, dwords[0], dwords[1]);
    nir_def_rewrite_uses(&intrin->def, new_dest_def);
    nir_instr_remove(&intrin->instr);
    return true;
@@ -4943,13 +4955,13 @@ fixup_io_locations(nir_shader *nir)
    return true;
 }
 
-static uint32_t
+static uint64_t
 zink_flat_flags(struct nir_shader *shader)
 {
-   uint32_t flat_flags = 0, c = 0;
+   uint64_t flat_flags = 0;
    nir_foreach_shader_in_variable(var, shader) {
       if (var->data.interpolation == INTERP_MODE_FLAT)
-         flat_flags |= 1u << (c++);
+         flat_flags |= BITFIELD64_BIT(var->data.location);
    }
 
    return flat_flags;
diff --git a/src/gallium/drivers/zink/zink_compiler.h 
b/src/gallium/drivers/zink/zink_compiler.h
index 1319193f83c..e901ee45f7b 100644
--- a/src/gallium/drivers/zink/zink_compiler.h
+++ b/src/gallium/drivers/zink/zink_compiler.h
@@ -31,7 +31,7 @@
 #define ZINK_WORKGROUP_SIZE_Z 3
 #define ZINK_VARIABLE_SHARED_MEM 4
 #define ZINK_INLINE_VAL_FLAT_MASK 0
-#define ZINK_INLINE_VAL_PV_LAST_VERT 1
+#define ZINK_INLINE_VAL_PV_LAST_VERT 2
 
 /* stop inlining shaders if they have >limit ssa vals after inlining:
  * recompile time isn't worth the inline
diff --git a/src/gallium/drivers/zink/zink_program.c 
b/src/gallium/drivers/zink/zink_program.c
index e50ad9c9c91..0832772c038 100644
--- a/src/gallium/drivers/zink/zink_program.c
+++ b/src/gallium/drivers/zink/zink_program.c
@@ -2512,6 +2512,7 @@ zink_set_primitive_emulation_keys(struct zink_context 
*ctx)
             zink_lower_system_values_to_inlined_uniforms(nir);
 
             zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK);
+            zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK+1);
             zink_add_inline_uniform(nir, ZINK_INLINE_VAL_PV_LAST_VERT);
             ralloc_free(prev_stage);
             struct zink_shader *shader = zink_shader_create(screen, nir);
@@ -2528,8 +2529,9 @@ zink_set_primitive_emulation_keys(struct zink_context 
*ctx)
          ctx->is_generated_gs_bound = true;
       }
 
-      ctx->base.set_inlinable_constants(&ctx->base, MESA_SHADER_GEOMETRY, 2,
+      ctx->base.set_inlinable_constants(&ctx->base, MESA_SHADER_GEOMETRY, 3,
                                         (uint32_t 
[]){ctx->gfx_stages[MESA_SHADER_FRAGMENT]->flat_flags,
+                                                      
ctx->gfx_stages[MESA_SHADER_FRAGMENT]->flat_flags >> 32,
                                                       
ctx->gfx_pipeline_state.dyn_state3.pv_last});
    } else if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] &&
               ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated)
diff --git a/src/gallium/drivers/zink/zink_types.h 
b/src/gallium/drivers/zink/zink_types.h
index 9ed132ddebc..41eaa21215c 100644
--- a/src/gallium/drivers/zink/zink_types.h
+++ b/src/gallium/drivers/zink/zink_types.h
@@ -817,7 +817,7 @@ struct zink_shader {
    unsigned num_texel_buffers;
    uint32_t ubos_used; // bitfield of which ubo indices are used
    uint32_t ssbos_used; // bitfield of which ssbo indices are used
-   uint32_t flat_flags;
+   uint64_t flat_flags;
    bool bindless;
    bool can_inline;
    bool has_uniforms;

Reply via email to