Module: Mesa
Branch: main
Commit: 459b49a1749ecdee1726a793d1ca62534ad95932
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=459b49a1749ecdee1726a793d1ca62534ad95932

Author: Mike Blumenkrantz <[email protected]>
Date:   Thu Sep  7 12:46:01 2023 -0400

zink: add a new linker pass to handle mismatched i/o components

this is the inverted version of rewrite_read_as_0 which tests for mismatched
component i/o on a given location and rewrites the inputs to zero if the
producer shader didn't write to the component

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24950>

---

 src/gallium/drivers/zink/zink_compiler.c | 110 +++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)

diff --git a/src/gallium/drivers/zink/zink_compiler.c 
b/src/gallium/drivers/zink/zink_compiler.c
index 56789271219..4616c0505d7 100644
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@@ -2638,6 +2638,105 @@ delete_psiz_store(nir_shader *nir)
                                      nir_metadata_dominance, NULL);
 }
 
+struct write_components {
+   unsigned slot;
+   uint32_t component_mask;
+};
+
+static bool
+fill_zero_reads(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+   struct write_components *wc = data;
+   bool is_load = false;
+   bool is_input = false;
+   bool is_interp = false;
+   if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
+      return false;
+   if (!is_input)
+      return false;
+   nir_io_semantics s = nir_intrinsic_io_semantics(intr);
+   if (wc->slot < s.location || wc->slot >= s.location + s.num_slots)
+      return false;
+   unsigned num_components = intr->num_components;
+   unsigned c = nir_intrinsic_component(intr);
+   if (intr->def.bit_size == 64)
+      num_components *= 2;
+   nir_src *src_offset = nir_get_io_offset_src(intr);
+   if (nir_src_is_const(*src_offset)) {
+      unsigned slot_offset = nir_src_as_uint(*src_offset);
+      if (s.location + slot_offset != wc->slot)
+         return false;
+   } else if (s.location > wc->slot || s.location + s.num_slots <= wc->slot) {
+      return false;
+   }
+   uint32_t readmask = BITFIELD_MASK(intr->num_components) << c;
+   if (intr->def.bit_size == 64)
+      readmask |= readmask << (intr->num_components + c);
+   /* handle dvec3/dvec4 */
+   if (num_components + c > 4)
+      readmask >>= 4;
+   if ((wc->component_mask & readmask) == readmask)
+      return false;
+   uint32_t rewrite_mask = readmask & ~wc->component_mask;
+   if (!rewrite_mask)
+      return false;
+   b->cursor = nir_after_instr(&intr->instr);
+   nir_def *zero = nir_imm_zero(b, intr->def.num_components, 
intr->def.bit_size);
+   if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
+      switch (wc->slot) {
+      case VARYING_SLOT_COL0:
+      case VARYING_SLOT_COL1:
+      case VARYING_SLOT_BFC0:
+      case VARYING_SLOT_BFC1:
+         /* default color is 0,0,0,1 */
+         if (intr->def.num_components == 4)
+            zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
+         break;
+      default:
+         break;
+      }
+   }
+   rewrite_mask >>= c;
+   nir_def *dest = &intr->def;
+   u_foreach_bit(component, rewrite_mask)
+      dest = nir_vector_insert_imm(b, dest, nir_channel(b, zero, component), 
component);
+   nir_def_rewrite_uses_after(&intr->def, dest, dest->parent_instr);
+   return true;
+}
+
+static bool
+find_max_write_components(nir_builder *b, nir_intrinsic_instr *intr, void 
*data)
+{
+   struct write_components *wc = data;
+   bool is_load = false;
+   bool is_input = false;
+   bool is_interp = false;
+   if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
+      return false;
+   if (is_input || is_load)
+      return false;
+   nir_io_semantics s = nir_intrinsic_io_semantics(intr);
+   if (wc->slot < s.location || wc->slot >= s.location + s.num_slots)
+      return false;
+   unsigned location = s.location;
+   unsigned c = nir_intrinsic_component(intr);
+   uint32_t wrmask = nir_intrinsic_write_mask(intr) << c;
+   if ((nir_intrinsic_src_type(intr) & NIR_ALU_TYPE_SIZE_MASK) == 64) {
+      unsigned num_components = intr->num_components * 2;
+      nir_src *src_offset = nir_get_io_offset_src(intr);
+      if (nir_src_is_const(*src_offset)) {
+         if (location + nir_src_as_uint(*src_offset) != wc->slot && 
num_components + c < 4)
+            return false;
+      }
+      wrmask |= wrmask << intr->num_components;
+      /* handle dvec3/dvec4 */
+      if (num_components + c > 4)
+         wrmask >>= 4;
+   }
+   wc->component_mask |= wrmask;
+   return false;
+}
+
 void
 zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, 
nir_shader *consumer)
 {
@@ -2679,6 +2778,16 @@ zink_compiler_assign_io(struct zink_screen *screen, 
nir_shader *producer, nir_sh
       if (consumer->info.stage == MESA_SHADER_FRAGMENT && 
screen->driver_workarounds.needs_sanitised_layer)
          do_fixup |= clamp_layer_output(producer, consumer, &reserved);
    }
+   nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer));
+   if (producer->info.io_lowered && consumer->info.io_lowered) {
+      u_foreach_bit64(slot, producer->info.outputs_written & 
BITFIELD64_RANGE(VARYING_SLOT_VAR0, 31)) {
+         struct write_components wc = {slot, 0};
+         nir_shader_intrinsics_pass(producer, find_max_write_components, 
nir_metadata_all, &wc);
+         assert(wc.component_mask);
+         if (wc.component_mask != BITFIELD_MASK(4))
+            do_fixup |= nir_shader_intrinsics_pass(consumer, fill_zero_reads, 
nir_metadata_dominance, &wc);
+      }
+   }
    if (!do_fixup)
       return;
    nir_fixup_deref_modes(nir);
@@ -5294,6 +5403,7 @@ zink_shader_create(struct zink_screen *screen, struct 
nir_shader *nir)
    if (nir->info.stage == MESA_SHADER_VERTEX)
       lower_io_flags |= nir_lower_io_lower_64bit_to_32;
    NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in, zink_type_size, 
lower_io_flags);
+   nir->info.io_lowered = true;
    optimize_nir(nir, NULL);
    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | 
nir_var_shader_out) {
       if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {

Reply via email to