Module: Mesa Branch: main Commit: e43007af564fb10085119ddb5a6f5ec1f24f5546 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e43007af564fb10085119ddb5a6f5ec1f24f5546
Author: Rhys Perry <[email protected]> Date: Wed Aug 18 19:08:54 2021 +0100 nir/opt_if: add opt_if_rewrite_uniform_uses Turns: if (a == (b=readfirstlane(a))) use(a) into: if (a == (b=readfirstlane(a))) use(b) Improves divergence analysis and lets us scalarize use(a). Improves Cyberpunk 2077 performance. fossil-db (Sienna Cichlid, Cyberpunk 2077): Totals from 57 (10.56% of 540) affected shaders: VGPRs: 4904 -> 4040 (-17.62%) CodeSize: 624360 -> 626828 (+0.40%); split: -0.06%, +0.46% MaxWaves: 656 -> 824 (+25.61%) Instrs: 119770 -> 119447 (-0.27%); split: -0.49%, +0.22% Latency: 1950256 -> 1633110 (-16.26%); split: -16.26%, +0.00% InvThroughput: 364852 -> 292089 (-19.94%) VClause: 1512 -> 1008 (-33.33%) SClause: 2693 -> 3196 (+18.68%) Copies: 10050 -> 9955 (-0.95%); split: -3.34%, +2.40% Branches: 3476 -> 3547 (+2.04%) PreSGPRs: 4003 -> 5076 (+26.80%) PreVGPRs: 4709 -> 3810 (-19.09%) Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Timur Kristóf <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12472> --- src/compiler/nir/nir_opt_if.c | 107 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c index b6c69bfd163..10336069972 100644 --- a/src/compiler/nir/nir_opt_if.c +++ b/src/compiler/nir/nir_opt_if.c @@ -1200,6 +1200,111 @@ opt_if_evaluate_condition_use(nir_builder *b, nir_if *nif) return progress; } +static bool +rewrite_comp_uses_within_if(nir_builder *b, nir_if *nif, bool invert, + nir_ssa_scalar scalar, nir_ssa_scalar new_scalar) +{ + bool progress = false; + + nir_block *first = invert ? nir_if_first_else_block(nif) : nir_if_first_then_block(nif); + nir_block *last = invert ? nir_if_last_else_block(nif) : nir_if_last_then_block(nif); + + nir_ssa_def *new_ssa = NULL; + nir_foreach_use_safe(use, scalar.def) { + if (use->parent_instr->block->index < first->index || + use->parent_instr->block->index > last->index) + continue; + + /* Only rewrite users which use only the new component. This is to avoid a + * situation where copy propagation will undo the rewrite and we risk an infinite + * loop. + * + * We could rewrite users which use a mix of the old and new components, but if + * nir_src_components_read() is incomplete, then we risk the new component actually being + * unused and some optimization later undoing the rewrite. + */ + if (nir_src_components_read(use) != BITFIELD64_BIT(scalar.comp)) + continue; + + if (!new_ssa) { + b->cursor = nir_before_cf_node(&nif->cf_node); + new_ssa = nir_channel(b, new_scalar.def, new_scalar.comp); + if (scalar.def->num_components > 1) { + nir_ssa_def *vec = nir_ssa_undef(b, scalar.def->num_components, scalar.def->bit_size); + new_ssa = nir_vector_insert_imm(b, vec, new_ssa, scalar.comp); + } + } + + nir_instr_rewrite_src_ssa(use->parent_instr, use, new_ssa); + progress = true; + } + + return progress; +} + +/* + * This optimization turns: + * + * if (a == (b=readfirstlane(a))) + * use(a) + * if (c == (d=load_const)) + * use(c) + * + * into: + * + * if (a == (b=readfirstlane(a))) + * use(b) + * if (c == (d=load_const)) + * use(d) +*/ +static bool +opt_if_rewrite_uniform_uses(nir_builder *b, nir_if *nif, nir_ssa_scalar cond, bool accept_ine) +{ + bool progress = false; + + if (!nir_ssa_scalar_is_alu(cond)) + return false; + + nir_op op = nir_ssa_scalar_alu_op(cond); + if (op == nir_op_iand) { + progress |= opt_if_rewrite_uniform_uses(b, nif, nir_ssa_scalar_chase_alu_src(cond, 0), false); + progress |= opt_if_rewrite_uniform_uses(b, nif, nir_ssa_scalar_chase_alu_src(cond, 1), false); + return progress; + } + + if (op != nir_op_ieq && (op != nir_op_ine || !accept_ine)) + return false; + + for (unsigned i = 0; i < 2; i++) { + nir_ssa_scalar src_uni = nir_ssa_scalar_chase_alu_src(cond, i); + nir_ssa_scalar src_div = nir_ssa_scalar_chase_alu_src(cond, !i); + + if (src_uni.def->parent_instr->type == nir_instr_type_load_const && src_div.def != src_uni.def) + return rewrite_comp_uses_within_if(b, nif, op == nir_op_ine, src_div, src_uni); + + if (src_uni.def->parent_instr->type != nir_instr_type_intrinsic) + continue; + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src_uni.def->parent_instr); + if (intrin->intrinsic != nir_intrinsic_read_first_invocation && + (intrin->intrinsic != nir_intrinsic_reduce || nir_intrinsic_cluster_size(intrin))) + continue; + + nir_ssa_scalar intrin_src = {intrin->src[0].ssa, src_uni.comp}; + nir_ssa_scalar resolved_intrin_src = nir_ssa_scalar_resolved(intrin_src.def, intrin_src.comp); + + if (resolved_intrin_src.comp != src_div.comp || resolved_intrin_src.def != src_div.def) + continue; + + progress |= rewrite_comp_uses_within_if(b, nif, op == nir_op_ine, resolved_intrin_src, src_uni); + if (intrin_src.comp != resolved_intrin_src.comp || intrin_src.def != resolved_intrin_src.def) + progress |= rewrite_comp_uses_within_if(b, nif, op == nir_op_ine, intrin_src, src_uni); + + return progress; + } + + return false; +} + static void simple_merge_if(nir_if *dest_if, nir_if *src_if, bool dest_if_then, bool src_if_then) @@ -1387,6 +1492,8 @@ opt_if_safe_cf_list(nir_builder *b, struct exec_list *cf_list) progress |= opt_if_safe_cf_list(b, &nif->then_list); progress |= opt_if_safe_cf_list(b, &nif->else_list); progress |= opt_if_evaluate_condition_use(b, nif); + nir_ssa_scalar cond = nir_ssa_scalar_resolved(nif->condition.ssa, 0); + progress |= opt_if_rewrite_uniform_uses(b, nif, cond, true); break; }
