Module: Mesa Branch: main Commit: 1804c21fb570285d90709010b746ed6812b42aa6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1804c21fb570285d90709010b746ed6812b42aa6
Author: Rhys Perry <[email protected]> Date: Fri Jan 28 13:47:16 2022 +0000 aco: optimize abs(mul(a, b)) fossil-db (Sienna Cichlid): Totals from 18 (0.01% of 134913) affected shaders: CodeSize: 173924 -> 173852 (-0.04%) Instrs: 33864 -> 33846 (-0.05%) Latency: 122233 -> 122211 (-0.02%) InvThroughput: 22482 -> 22462 (-0.09%) Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14773> --- src/amd/compiler/aco_optimizer.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 6f08dbfe930..537f3d4b1b9 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -3482,8 +3482,8 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr) * The various comparison optimizations also currently only work with 32-bit * floats. */ - /* neg(mul(a, b)) -> mul(neg(a), b) */ - if (ctx.info[instr->definitions[0].tempId()].is_neg() && + /* neg(mul(a, b)) -> mul(neg(a), b), abs(mul(a, b)) -> mul(abs(a), abs(b)) */ + if ((ctx.info[instr->definitions[0].tempId()].label & (label_neg | label_abs)) && ctx.uses[instr->operands[1].tempId()] == 1) { Temp val = ctx.info[instr->definitions[0].tempId()].temp; @@ -3502,10 +3502,10 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr) ctx.fp_mode.preserve_signed_zero_inf_nan32) return; - /* convert to mul(neg(a), b) */ + /* convert to mul(neg(a), b), mul(abs(a), abs(b)) or mul(neg(abs(a)), abs(b)) */ ctx.uses[mul_instr->definitions[0].tempId()]--; Definition def = instr->definitions[0]; - /* neg(abs(mul(a, b))) -> mul(neg(abs(a)), abs(b)) */ + bool is_neg = ctx.info[instr->definitions[0].tempId()].is_neg(); bool is_abs = ctx.info[instr->definitions[0].tempId()].is_abs(); instr.reset( create_instruction<VOP3_instruction>(mul_instr->opcode, asVOP3(Format::VOP2), 2, 1)); @@ -3525,7 +3525,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr) new_mul.neg[0] = new_mul.neg[1] = false; new_mul.abs[0] = new_mul.abs[1] = true; } - new_mul.neg[0] ^= true; + new_mul.neg[0] ^= is_neg; new_mul.clamp = false; ctx.info[instr->definitions[0].tempId()].set_mul(instr.get());
