Module: Mesa Branch: main Commit: 3bd5b583f9df727808a9587f40c9fc619ebeef94 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3bd5b583f9df727808a9587f40c9fc619ebeef94
Author: Georg Lehmann <[email protected]> Date: Wed Feb 8 14:18:15 2023 +0100 aco: combine a ^ ~b and ~(a ^ b) to v_xnor_b32 Foz-DB Navi21: Totals from 13 (0.01% of 134913) affected shaders: CodeSize: 225432 -> 225180 (-0.11%) Instrs: 41973 -> 41908 (-0.15%) Latency: 297464 -> 297326 (-0.05%) InvThroughput: 82536 -> 82467 (-0.08%) Copies: 2452 -> 2440 (-0.49%) Reviewed-by: Timur Kristóf <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21410> --- src/amd/compiler/aco_optimizer.cpp | 53 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index a0ff60e366d..4557fd0ff47 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -2066,6 +2066,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr) case aco_opcode::v_bcnt_u32_b32: case aco_opcode::v_and_b32: case aco_opcode::v_xor_b32: + case aco_opcode::v_not_b32: ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get()); break; case aco_opcode::v_min_f32: @@ -2812,6 +2813,55 @@ combine_add_or_then_and_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr) return false; } +/* v_xor(a, s_not(b)) -> v_xnor(a, b) + * v_xor(a, v_not(b)) -> v_xnor(a, b) + */ +bool +combine_xor_not(opt_ctx& ctx, aco_ptr<Instruction>& instr) +{ + if (instr->usesModifiers()) + return false; + + for (unsigned i = 0; i < 2; i++) { + Instruction* op_instr = follow_operand(ctx, instr->operands[i], true); + if (!op_instr || + (op_instr->opcode != aco_opcode::v_not_b32 && + op_instr->opcode != aco_opcode::s_not_b32) || + op_instr->usesModifiers() || op_instr->operands[0].isLiteral()) + continue; + + instr->opcode = aco_opcode::v_xnor_b32; + instr->operands[i] = copy_operand(ctx, op_instr->operands[0]); + decrease_uses(ctx, op_instr); + if (instr->operands[0].isOfType(RegType::vgpr)) + std::swap(instr->operands[0], instr->operands[1]); + if (!instr->operands[1].isOfType(RegType::vgpr)) + to_VOP3(ctx, instr); + + return true; + } + + return false; +} + +/* v_not(v_xor(a, b)) -> v_xnor(a, b) */ +bool +combine_not_xor(opt_ctx& ctx, aco_ptr<Instruction>& instr) +{ + if (instr->usesModifiers()) + return false; + + Instruction* op_instr = follow_operand(ctx, instr->operands[0]); + if (!op_instr || op_instr->opcode != aco_opcode::v_xor_b32 || op_instr->isSDWA()) + return false; + + ctx.uses[instr->operands[0].tempId()]--; + std::swap(instr->definitions[0], op_instr->definitions[0]); + op_instr->opcode = aco_opcode::v_xnor_b32; + + return true; +} + bool combine_minmax(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode opposite, aco_opcode op3src, aco_opcode minmax) @@ -4467,7 +4517,10 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr) 1 | 2)) { } else if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xor3_b32, "012", 1 | 2)) { + } else if (combine_xor_not(ctx, instr)) { } + } else if (instr->opcode == aco_opcode::v_not_b32 && ctx.program->gfx_level >= GFX10) { + combine_not_xor(ctx, instr); } else if (instr->opcode == aco_opcode::v_add_u16) { combine_three_valu_op( ctx, instr, aco_opcode::v_mul_lo_u16,
