This allows some loops to unroll were they are guaranteed to
exit after the first iteration. For example:

        loop {
                block block_1:
                /* preds: block_0 block_13 */
                vec1 32 ssa_85 = load_const (0x00000002 /* 0.000000 */)
                vec1 32 ssa_86 = ieq ssa_48, ssa_85
                vec1 32 ssa_87 = load_const (0x00000001 /* 0.000000 */)
                vec1 32 ssa_88 = ieq ssa_48, ssa_87
                vec1 32 ssa_89 = ior ssa_86, ssa_88
                vec1 32 ssa_90 = ieq ssa_48, ssa_0
                vec1 32 ssa_91 = ior ssa_89, ssa_90

                /* succs: block_2 block_3 */
                if ssa_86 {
                        block block_2:
                        /* preds: block_1 */
                         ...
                        break
                        /* succs: block_14 */
                } else {
                        block block_3:
                        /* preds: block_1 */
                        /* succs: block_4 */
                }
                block block_4:
                /* preds: block_3 */
                /* succs: block_5 block_6 */
                if ssa_88 {
                        block block_5:
                        /* preds: block_4 */
                         ...
                        break
                        /* succs: block_14 */
                } else {
                        block block_6:
                        /* preds: block_4 */
                        /* succs: block_7 */
                }
                block block_7:
                /* preds: block_6 */
                /* succs: block_8 block_9 */
                if ssa_90 {
                        block block_8:
                        /* preds: block_7 */
                         ...
                        break
                        /* succs: block_14 */
                } else {
                        block block_9:
                        /* preds: block_7 */
                        /* succs: block_10 */
                }
                block block_10:
                /* preds: block_9 */
                vec1 32 ssa_107 = inot ssa_91
                /* succs: block_11 block_12 */
                if ssa_107 {
                        block block_11:
                        /* preds: block_10 */
                        break
                        /* succs: block_14 */
                } else {
                        block block_12:
                        /* preds: block_10 */
                        /* succs: block_13 */
                }
        }

These loops have been seen in Bethesda games running over
DXVK. There is a slight increase in VGPR use but removing
the loops allows us to further optimise the code in
future. For example many of the unrolled if-statements
could now be merged as they apear in the shaders multiple
times.

vkpipeline results RADV (from a db of only 3 games):

Totals from affected shaders:
SGPRS: 10920 -> 10440 (-4.40 %)
VGPRS: 6120 -> 6264 (2.35 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 369952 -> 356608 (-3.61 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 2040 -> 2040 (0.00 %)
Wait states: 0 -> 0 (0.00 %)
---
 src/compiler/nir/nir_opt_if.c | 38 +++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index 7b8085452ce..b3403f70a4e 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -525,6 +525,44 @@ opt_if_evaluate_condition_use_loop_terminator(nir_if *nif, 
nir_loop *loop,
                                            after_loop->index, NIR_TRUE,
                                            or_use, mem_ctx, true);
          }
+      } else if (nir_boolean == NIR_FALSE &&
+                 parent_instr->type == nir_instr_type_alu &&
+                 nir_instr_as_alu(parent_instr)->op == nir_op_ior) {
+
+         nir_alu_instr *alu = nir_instr_as_alu(parent_instr);
+
+         nir_src *other_or_src = NULL;
+         for (unsigned i = 0; i < 2; i++) {
+            if (alu->src[i].src.ssa != use_src->ssa) {
+               other_or_src = &alu->src[i].src;
+               break;
+            }
+         }
+         assert(other_or_src);
+
+         nir_foreach_use_safe(or_use, &alu->dest.dest.ssa) {
+            if (prev_block->index < or_use->parent_instr->block->index &&
+               after_loop->index > or_use->parent_instr->block->index) {
+
+               nir_instr_rewrite_src(or_use->parent_instr, or_use,
+                                     *other_or_src);
+               progress = true;
+            }
+         }
+
+         nir_foreach_if_use_safe(or_use, &alu->dest.dest.ssa) {
+            if (or_use->parent_if != nif) {
+               unsigned blk_idx_before_if =
+                  nir_cf_node_as_block(nir_cf_node_prev(
+                     &or_use->parent_if->cf_node))->index;
+
+               if (prev_block->index <= blk_idx_before_if &&
+                   after_loop->index > blk_idx_before_if) {
+                  nir_if_rewrite_condition(or_use->parent_if, *other_or_src);
+                  progress = true;
+               }
+            }
+         }
       } else {
          progress =
             evaluate_term_condition_use(prev_block->index, after_loop->index,
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to