Module: Mesa
Branch: main
Commit: cac6f633b21799bd1ecc35471d73a0bd190ccada
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=cac6f633b21799bd1ecc35471d73a0bd190ccada

Author: Emma Anholt <[email protected]>
Date:   Thu Dec 23 17:28:40 2021 -0800

nir/opt_offsets: Use nir_ssa_scalar to chase offset additions.

For nir_to_tgsi, I want to be able to fold into the base from a vector
load_const, which the ad-hoc scalar chasing couldn't handle.

r300:
total instructions in shared programs: 1278731 -> 1256502 (-1.74%)
instructions in affected programs: 457909 -> 435680 (-4.85%)
total flowcontrol in shared programs: 8316 -> 8313 (-0.04%)
flowcontrol in affected programs: 5 -> 2 (-60.00%)
total temps in shared programs: 213687 -> 213774 (0.04%)
temps in affected programs: 13140 -> 13227 (0.66%)
total consts in shared programs: 952850 -> 949929 (-0.31%)
consts in affected programs: 386352 -> 383431 (-0.76%)

Fixes: #5781
Reviewed-by: Timur Kristóf <[email protected]>
Acked-by: Matt Turner <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14309>

---

 src/compiler/nir/nir_opt_offsets.c | 69 +++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 27 deletions(-)

diff --git a/src/compiler/nir/nir_opt_offsets.c 
b/src/compiler/nir/nir_opt_offsets.c
index f9f54789693..2c235224c0c 100644
--- a/src/compiler/nir/nir_opt_offsets.c
+++ b/src/compiler/nir/nir_opt_offsets.c
@@ -34,17 +34,26 @@ typedef struct
    const nir_opt_offsets_options *options;
 } opt_offsets_state;
 
-static nir_ssa_def *
-try_extract_const_addition(nir_builder *b, nir_instr *instr, opt_offsets_state 
*state, unsigned *out_const, uint32_t max)
+static nir_ssa_scalar
+try_extract_const_addition(nir_builder *b, nir_ssa_scalar val, 
opt_offsets_state *state, unsigned *out_const, uint32_t max)
 {
-   if (instr->type != nir_instr_type_alu)
-      return NULL;
+   val = nir_ssa_scalar_chase_movs(val);
 
-   nir_alu_instr *alu = nir_instr_as_alu(instr);
+   if (!nir_ssa_scalar_is_alu(val))
+      return val;
+
+   nir_alu_instr *alu = nir_instr_as_alu(val.def->parent_instr);
    if (alu->op != nir_op_iadd ||
-       !nir_alu_src_is_trivial_ssa(alu, 0) ||
-       !nir_alu_src_is_trivial_ssa(alu, 1))
-      return NULL;
+       !alu->src[0].src.is_ssa ||
+       !alu->src[1].src.is_ssa ||
+       alu->src[0].negate || alu->src[0].abs ||
+       alu->src[1].negate || alu->src[1].abs)
+      return val;
+
+   nir_ssa_scalar src[2] = {
+      {alu->src[0].src.ssa, alu->src[0].swizzle[val.comp]},
+      {alu->src[1].src.ssa, alu->src[1].swizzle[val.comp]},
+   };
 
    /* Make sure that we aren't taking out an addition that could trigger
     * unsigned wrapping in a way that would change the semantics of the load.
@@ -58,39 +67,38 @@ try_extract_const_addition(nir_builder *b, nir_instr 
*instr, opt_offsets_state *
       }
 
       /* Check if there can really be an unsigned wrap. */
-      nir_ssa_scalar src0 = {alu->src[0].src.ssa, 0};
-      nir_ssa_scalar src1 = {alu->src[1].src.ssa, 0};
-      uint32_t ub0 = nir_unsigned_upper_bound(b->shader, state->range_ht, 
src0, NULL);
-      uint32_t ub1 = nir_unsigned_upper_bound(b->shader, state->range_ht, 
src1, NULL);
+      uint32_t ub0 = nir_unsigned_upper_bound(b->shader, state->range_ht, 
src[0], NULL);
+      uint32_t ub1 = nir_unsigned_upper_bound(b->shader, state->range_ht, 
src[1], NULL);
 
       if ((UINT32_MAX - ub0) < ub1)
-         return NULL;
+         return val;
 
       /* We proved that unsigned wrap won't be possible, so we can set the 
flag too. */
       alu->no_unsigned_wrap = true;
    }
 
    for (unsigned i = 0; i < 2; ++i) {
-      if (nir_src_is_const(alu->src[i].src)) {
-         uint32_t offset = nir_src_as_uint(alu->src[i].src);
+      src[i] = nir_ssa_scalar_chase_movs(src[i]);
+      if (nir_ssa_scalar_is_const(src[i])) {
+         uint32_t offset = nir_ssa_scalar_as_uint(src[i]);
          if (offset + *out_const <= max) {
             *out_const += offset;
-            nir_ssa_def *replace_src =
-                try_extract_const_addition(b, alu->src[1 - 
i].src.ssa->parent_instr, state, out_const, max);
-            return replace_src ? replace_src : alu->src[1 - i].src.ssa;
+            return try_extract_const_addition(b, src[1 - i], state, out_const, 
max);
          }
       }
    }
 
-   nir_ssa_def *replace_src0 = try_extract_const_addition(b, 
alu->src[0].src.ssa->parent_instr, state, out_const, max);
-   nir_ssa_def *replace_src1 = try_extract_const_addition(b, 
alu->src[1].src.ssa->parent_instr, state, out_const, max);
-   if (!replace_src0 && !replace_src1)
-      return NULL;
+   uint32_t orig_offset = *out_const;
+   src[0] = try_extract_const_addition(b, src[0], state, out_const, max);
+   src[1] = try_extract_const_addition(b, src[1], state, out_const, max);
+   if (*out_const == orig_offset)
+      return val;
 
    b->cursor = nir_before_instr(&alu->instr);
-   replace_src0 = replace_src0 ? replace_src0 : nir_ssa_for_alu_src(b, alu, 0);
-   replace_src1 = replace_src1 ? replace_src1 : nir_ssa_for_alu_src(b, alu, 1);
-   return nir_iadd(b, replace_src0, replace_src1);
+   nir_ssa_def *r =
+          nir_iadd(b, nir_channel(b, src[0].def, src[0].comp),
+                   nir_channel(b, src[1].def, src[1].comp));
+   return (nir_ssa_scalar){r, 0};
 }
 
 static bool
@@ -113,8 +121,15 @@ try_fold_load_store(nir_builder *b,
       return false;
 
    if (!nir_src_is_const(*off_src)) {
-      replace_src = try_extract_const_addition(b, off_src->ssa->parent_instr, 
state, &off_const, max);
-   } else if (nir_src_as_uint(*off_src) && nir_src_as_uint(*off_src) < max) {
+      uint32_t add_offset = 0;
+      nir_ssa_scalar val = {.def = off_src->ssa, .comp = 0};
+      val = try_extract_const_addition(b, val, state, &add_offset, max);
+      if (add_offset == 0)
+         return false;
+      off_const += add_offset;
+      b->cursor = nir_before_instr(&intrin->instr);
+      replace_src = nir_channel(b, val.def, val.comp);
+   } else if (nir_src_as_uint(*off_src) && off_const + 
nir_src_as_uint(*off_src) <= max) {
       off_const += nir_src_as_uint(*off_src);
       b->cursor = nir_before_instr(&intrin->instr);
       replace_src = nir_imm_zero(b, off_src->ssa->num_components, 
off_src->ssa->bit_size);

Reply via email to