Module: Mesa Branch: main Commit: 72ac6a5251f123b9df6d779b002ad206c6aab564 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=72ac6a5251f123b9df6d779b002ad206c6aab564
Author: Georg Lehmann <dadschoo...@gmail.com> Date: Sun Jan 7 11:46:30 2024 +0100 aco: clean up fneg/fabs combining This technically fixes some bugs with fneg(fneg(a)) and fabs(fneg(a)), but those shouldn't be present in the input NIR. Reviewed-by: Daniel Schürmann <dan...@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26919> --- src/amd/compiler/aco_optimizer.cpp | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index daa0acaf824..83648c0172a 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -1419,34 +1419,34 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr) /* for instructions other than v_cndmask_b32, the size of the instruction should match the * operand size */ - unsigned can_use_mod = + bool can_use_mod = instr->opcode != aco_opcode::v_cndmask_b32 || instr->operands[i].getTemp().bytes() == 4; - can_use_mod = - can_use_mod && can_use_input_modifiers(ctx.program->gfx_level, instr->opcode, i); + can_use_mod &= can_use_input_modifiers(ctx.program->gfx_level, instr->opcode, i); if (instr->isSDWA()) - can_use_mod = can_use_mod && instr->sdwa().sel[i].size() == 4; + can_use_mod &= instr->sdwa().sel[i].size() == 4; else - can_use_mod = can_use_mod && (instr->isDPP16() || can_use_VOP3(ctx, instr)); + can_use_mod &= instr->isDPP16() || can_use_VOP3(ctx, instr); unsigned bits = get_operand_size(instr, i); - bool mod_bitsize_compat = instr->operands[i].bytes() * 8 == bits; + can_use_mod &= instr->operands[i].bytes() * 8 == bits; - if (info.is_neg() && instr->opcode == aco_opcode::v_add_f32 && mod_bitsize_compat) { - instr->opcode = i ? aco_opcode::v_sub_f32 : aco_opcode::v_subrev_f32; - instr->operands[i].setTemp(info.temp); - } else if (info.is_neg() && instr->opcode == aco_opcode::v_add_f16 && mod_bitsize_compat) { - instr->opcode = i ? aco_opcode::v_sub_f16 : aco_opcode::v_subrev_f16; - instr->operands[i].setTemp(info.temp); - } else if (info.is_neg() && can_use_mod && mod_bitsize_compat && - can_eliminate_fcanonicalize(ctx, instr, info.temp, i)) { - if (!instr->isDPP16() && can_use_VOP3(ctx, instr)) - instr->format = asVOP3(instr->format); + if (info.is_neg() && can_use_mod && + can_eliminate_fcanonicalize(ctx, instr, info.temp, i)) { instr->operands[i].setTemp(info.temp); - if (!instr->valu().abs[i]) - instr->valu().neg[i] = true; + if (instr->valu().abs[i]) { + /* fabs(fneg(a)) -> fabs(a) */ + } else if (instr->opcode == aco_opcode::v_add_f32) { + instr->opcode = i ? aco_opcode::v_sub_f32 : aco_opcode::v_subrev_f32; + } else if (instr->opcode == aco_opcode::v_add_f16) { + instr->opcode = i ? aco_opcode::v_sub_f16 : aco_opcode::v_subrev_f16; + } else { + if (!instr->isDPP16() && can_use_VOP3(ctx, instr)) + instr->format = asVOP3(instr->format); + instr->valu().neg[i] ^= true; + } } - if (info.is_abs() && can_use_mod && mod_bitsize_compat && + if (info.is_abs() && can_use_mod && can_eliminate_fcanonicalize(ctx, instr, info.temp, i)) { if (!instr->isDPP16() && can_use_VOP3(ctx, instr)) instr->format = asVOP3(instr->format);