Currently, i965 interpolates all FS inputs at the top of the program.
This has advantages and disadvantages, but I'd like to keep that policy
while reworking this code.  We can consider changing it independently.

The next patch will make the compiler generate PLN instructions "on the
fly", when it encounters an input load intrinsic, rather than doing it
for all inputs at the start of the program.

To emulate this behavior, we introduce an ugly pass to move all NIR
load_interpolated_input and payload-based (not interpolator message)
load_barycentric_* intrinsics to the shader's start block.

This helps avoid regressions in shader-db for cases such as:

   if (...) {
      ...load some input...
   } else {
      ...load that same input...
   }

which CSE can't handle, because there's no dominance relationship
between the two loads.  Because the start block dominates all others,
we can CSE all inputs and emit PLNs exactly once, as we did before.

Ideally, global value numbering would eliminate these redundant loads,
while not forcing them all the way to the start block.  When that lands,
we should consider dropping this hacky pass.

Again, this pass currently does nothing, as i965 doesn't generate these
intrinsics yet.  But it will shortly, and I figured I'd separate this
code as it's relatively self-contained.

Signed-off-by: Kenneth Graunke <kenn...@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 78 ++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index ea6616b..94127bc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -6400,6 +6400,83 @@ computed_depth_mode(const nir_shader *shader)
 }
 
 /**
+ * Move load_interpolated_input with simple (payload-based) barycentric modes
+ * to the top of the program so we don't emit multiple PLNs for the same input.
+ *
+ * This works around CSE not being able to handle non-dominating cases
+ * such as:
+ *
+ *    if (...) {
+ *       interpolate input
+ *    } else {
+ *       interpolate the same exact input
+ *    }
+ *
+ * This should be replaced by global value numbering someday.
+ */
+void
+move_interpolation_to_top(nir_shader *nir)
+{
+   nir_foreach_function(f, nir) {
+      if (!f->impl)
+         continue;
+
+      nir_builder b;
+      nir_builder_init(&b, f->impl);
+      b.cursor = nir_before_block(nir_start_block(f->impl));
+
+      nir_foreach_block(block, f->impl) {
+         nir_foreach_instr_safe(instr, block) {
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
+
+            nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
+            if (load->intrinsic != nir_intrinsic_load_interpolated_input)
+               continue;
+
+            nir_intrinsic_instr *bary =
+               nir_instr_as_intrinsic(load->src[0].ssa->parent_instr);
+
+            /* Leave interpolateAtSample/Offset() where it is. */
+            if (bary->intrinsic == nir_intrinsic_load_barycentric_at_sample ||
+                bary->intrinsic == nir_intrinsic_load_barycentric_at_offset)
+               continue;
+
+            /* Make a new load_barycentric_* intrinsic at the top */
+            nir_ssa_def *top_bary =
+               nir_load_barycentric(&b, bary->intrinsic,
+                                    nir_intrinsic_interp_mode(bary));
+
+            /* Make a new load_intrinsic_input at the top */
+            nir_intrinsic_instr *top_load = nir_intrinsic_instr_create(nir,
+               nir_intrinsic_load_interpolated_input);
+            top_load->num_components = load->num_components;
+            top_load->src[0] = nir_src_for_ssa(top_bary);
+            /* We don't support indirects today - otherwise we might not
+             * be able to move this to the top. add_const_offset_to_base
+             * guarantees the offset will be 0.
+             */
+            assert(nir_src_as_const_value(load->src[1]) &&
+                   nir_src_as_const_value(load->src[1])->u32[0] == 0);
+            top_load->src[1] = nir_src_for_ssa(nir_imm_int(&b, 0));
+            top_load->const_index[0] = load->const_index[0];
+            top_load->const_index[1] = load->const_index[1];
+            nir_ssa_dest_init(&top_load->instr, &top_load->dest,
+                              load->dest.ssa.num_components,
+                              load->dest.ssa.bit_size, NULL);
+
+            nir_ssa_def_rewrite_uses(&load->dest.ssa,
+                                     nir_src_for_ssa(&top_load->dest.ssa));
+            nir_builder_instr_insert(&b, &top_load->instr);
+         }
+      }
+      nir_metadata_preserve(f->impl, (nir_metadata)
+                            ((unsigned) nir_metadata_block_index |
+                             (unsigned) nir_metadata_dominance));
+   }
+}
+
+/**
  * Apply default interpolation settings to FS inputs which don't specify any.
  */
 static void
@@ -6506,6 +6583,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void 
*log_data,
    brw_nir_lower_fs_outputs(shader);
    if (!key->multisample_fbo)
       NIR_PASS_V(shader, demote_sample_qualifiers);
+   NIR_PASS_V(shader, move_interpolation_to_top);
    shader = brw_postprocess_nir(shader, compiler->devinfo, true);
 
    /* key->alpha_test_func means simulating alpha testing via discards,
-- 
2.9.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to