Module: Mesa Branch: main Commit: 62bcfcd0a80153668a8545069f9a33c7e2be5ebb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=62bcfcd0a80153668a8545069f9a33c7e2be5ebb
Author: Daniel Schürmann <[email protected]> Date: Thu Nov 4 18:40:44 2021 +0100 aco: change fneg for VOP3P to use fmul with +1.0 This will be useful to be able to also apply fneg_lo and fneg_hi. Reviewed-by: Rhys Perry <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13688> --- src/amd/compiler/aco_instruction_selection.cpp | 7 +++++-- src/amd/compiler/aco_optimizer.cpp | 6 +++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 0ccfc150e66..649bf1923fc 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2230,8 +2230,11 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) case nir_op_fneg: { if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) { Temp src = get_alu_src_vop3p(ctx, instr->src[0]); - bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand::c16(0xBC00), - instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1); + Instruction* vop3p = + bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand::c16(0x3C00), + instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1); + vop3p->vop3p().neg_lo[0] = true; + vop3p->vop3p().neg_hi[0] = true; emit_split_vector(ctx, dst, 2); break; } diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 8c92f530ba9..0336d5f66a8 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -3158,7 +3158,7 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr) ssa_info& info = ctx.info[op.tempId()]; if (info.is_vop3p() && info.instr->opcode == aco_opcode::v_pk_mul_f16 && - info.instr->operands[1].constantEquals(0xBC00)) { + info.instr->operands[1].constantEquals(0x3C00)) { Operand ops[3]; for (unsigned j = 0; j < instr->operands.size(); j++) ops[j] = instr->operands[j]; @@ -3177,8 +3177,8 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr) */ bool opsel_lo = (vop3p->opsel_lo >> i) & 1; bool opsel_hi = (vop3p->opsel_hi >> i) & 1; - bool neg_lo = true ^ fneg->neg_lo[0] ^ fneg->neg_lo[1]; - bool neg_hi = true ^ fneg->neg_hi[0] ^ fneg->neg_hi[1]; + bool neg_lo = fneg->neg_lo[0] ^ fneg->neg_lo[1]; + bool neg_hi = fneg->neg_hi[0] ^ fneg->neg_hi[1]; vop3p->neg_lo[i] ^= opsel_lo ? neg_hi : neg_lo; vop3p->neg_hi[i] ^= opsel_hi ? neg_hi : neg_lo; vop3p->opsel_lo ^= ((opsel_lo ? ~fneg->opsel_hi : fneg->opsel_lo) & 1) << i;
