Module: Mesa Branch: main Commit: fddd866b2731c2b035e6f535f298a03961504923 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fddd866b2731c2b035e6f535f298a03961504923
Author: Georg Lehmann <dadschoo...@gmail.com> Date: Sun Jan 7 11:56:12 2024 +0100 aco: apply fneg/fabs to VOP3P Reviewed-by: Daniel Schürmann <dan...@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26919> --- src/amd/compiler/aco_optimizer.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 83648c0172a..5ec76c4c32e 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -1423,8 +1423,14 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr) instr->opcode != aco_opcode::v_cndmask_b32 || instr->operands[i].getTemp().bytes() == 4; can_use_mod &= can_use_input_modifiers(ctx.program->gfx_level, instr->opcode, i); + bool packed_math = instr->isVOP3P() && instr->opcode != aco_opcode::v_fma_mix_f32 && + instr->opcode != aco_opcode::v_fma_mixlo_f16 && + instr->opcode != aco_opcode::v_fma_mixhi_f16; + if (instr->isSDWA()) can_use_mod &= instr->sdwa().sel[i].size() == 4; + else if (instr->isVOP3P()) + can_use_mod &= !packed_math || !info.is_abs(); else can_use_mod &= instr->isDPP16() || can_use_VOP3(ctx, instr); @@ -1434,12 +1440,17 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr) if (info.is_neg() && can_use_mod && can_eliminate_fcanonicalize(ctx, instr, info.temp, i)) { instr->operands[i].setTemp(info.temp); - if (instr->valu().abs[i]) { + if (!packed_math && instr->valu().abs[i]) { /* fabs(fneg(a)) -> fabs(a) */ } else if (instr->opcode == aco_opcode::v_add_f32) { instr->opcode = i ? aco_opcode::v_sub_f32 : aco_opcode::v_subrev_f32; } else if (instr->opcode == aco_opcode::v_add_f16) { instr->opcode = i ? aco_opcode::v_sub_f16 : aco_opcode::v_subrev_f16; + } else if (packed_math) { + /* Bit size compat should ensure this. */ + assert(!instr->valu().opsel_lo[i] && !instr->valu().opsel_hi[i]); + instr->valu().neg_lo[i] ^= true; + instr->valu().neg_hi[i] ^= true; } else { if (!instr->isDPP16() && can_use_VOP3(ctx, instr)) instr->format = asVOP3(instr->format);