This allows some loops to unroll were they are guaranteed to exit after the first iteration. For example:
loop { block block_1: /* preds: block_0 block_13 */ vec1 32 ssa_85 = load_const (0x00000002 /* 0.000000 */) vec1 32 ssa_86 = ieq ssa_48, ssa_85 vec1 32 ssa_87 = load_const (0x00000001 /* 0.000000 */) vec1 32 ssa_88 = ieq ssa_48, ssa_87 vec1 32 ssa_89 = ior ssa_86, ssa_88 vec1 32 ssa_90 = ieq ssa_48, ssa_0 vec1 32 ssa_91 = ior ssa_89, ssa_90 /* succs: block_2 block_3 */ if ssa_86 { block block_2: /* preds: block_1 */ ... break /* succs: block_14 */ } else { block block_3: /* preds: block_1 */ /* succs: block_4 */ } block block_4: /* preds: block_3 */ /* succs: block_5 block_6 */ if ssa_88 { block block_5: /* preds: block_4 */ ... break /* succs: block_14 */ } else { block block_6: /* preds: block_4 */ /* succs: block_7 */ } block block_7: /* preds: block_6 */ /* succs: block_8 block_9 */ if ssa_90 { block block_8: /* preds: block_7 */ ... break /* succs: block_14 */ } else { block block_9: /* preds: block_7 */ /* succs: block_10 */ } block block_10: /* preds: block_9 */ vec1 32 ssa_107 = inot ssa_91 /* succs: block_11 block_12 */ if ssa_107 { block block_11: /* preds: block_10 */ break /* succs: block_14 */ } else { block block_12: /* preds: block_10 */ /* succs: block_13 */ } } These loops have been seen in Bethesda games running over DXVK. There is a slight increase in VGPR use but removing the loops allows us to further optimise the code in future. For example many of the unrolled if-statements could now be merged as they apear in the shaders multiple times. vkpipeline results RADV (from a db of only 3 games): Totals from affected shaders: SGPRS: 10920 -> 10440 (-4.40 %) VGPRS: 6120 -> 6264 (2.35 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 369952 -> 356608 (-3.61 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 2040 -> 2040 (0.00 %) Wait states: 0 -> 0 (0.00 %) --- src/compiler/nir/nir_opt_if.c | 38 +++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c index 7b8085452ce..b3403f70a4e 100644 --- a/src/compiler/nir/nir_opt_if.c +++ b/src/compiler/nir/nir_opt_if.c @@ -525,6 +525,44 @@ opt_if_evaluate_condition_use_loop_terminator(nir_if *nif, nir_loop *loop, after_loop->index, NIR_TRUE, or_use, mem_ctx, true); } + } else if (nir_boolean == NIR_FALSE && + parent_instr->type == nir_instr_type_alu && + nir_instr_as_alu(parent_instr)->op == nir_op_ior) { + + nir_alu_instr *alu = nir_instr_as_alu(parent_instr); + + nir_src *other_or_src = NULL; + for (unsigned i = 0; i < 2; i++) { + if (alu->src[i].src.ssa != use_src->ssa) { + other_or_src = &alu->src[i].src; + break; + } + } + assert(other_or_src); + + nir_foreach_use_safe(or_use, &alu->dest.dest.ssa) { + if (prev_block->index < or_use->parent_instr->block->index && + after_loop->index > or_use->parent_instr->block->index) { + + nir_instr_rewrite_src(or_use->parent_instr, or_use, + *other_or_src); + progress = true; + } + } + + nir_foreach_if_use_safe(or_use, &alu->dest.dest.ssa) { + if (or_use->parent_if != nif) { + unsigned blk_idx_before_if = + nir_cf_node_as_block(nir_cf_node_prev( + &or_use->parent_if->cf_node))->index; + + if (prev_block->index <= blk_idx_before_if && + after_loop->index > blk_idx_before_if) { + nir_if_rewrite_condition(or_use->parent_if, *other_or_src); + progress = true; + } + } + } } else { progress = evaluate_term_condition_use(prev_block->index, after_loop->index, -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev