Module: Mesa
Branch: main
Commit: 3bd5b583f9df727808a9587f40c9fc619ebeef94
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3bd5b583f9df727808a9587f40c9fc619ebeef94

Author: Georg Lehmann <[email protected]>
Date:   Wed Feb  8 14:18:15 2023 +0100

aco: combine  a ^ ~b and ~(a ^ b) to v_xnor_b32

Foz-DB Navi21:
Totals from 13 (0.01% of 134913) affected shaders:
CodeSize: 225432 -> 225180 (-0.11%)
Instrs: 41973 -> 41908 (-0.15%)
Latency: 297464 -> 297326 (-0.05%)
InvThroughput: 82536 -> 82467 (-0.08%)
Copies: 2452 -> 2440 (-0.49%)

Reviewed-by: Timur Kristóf <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21410>

---

 src/amd/compiler/aco_optimizer.cpp | 53 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/src/amd/compiler/aco_optimizer.cpp 
b/src/amd/compiler/aco_optimizer.cpp
index a0ff60e366d..4557fd0ff47 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -2066,6 +2066,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& 
instr)
    case aco_opcode::v_bcnt_u32_b32:
    case aco_opcode::v_and_b32:
    case aco_opcode::v_xor_b32:
+   case aco_opcode::v_not_b32:
       ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get());
       break;
    case aco_opcode::v_min_f32:
@@ -2812,6 +2813,55 @@ combine_add_or_then_and_lshl(opt_ctx& ctx, 
aco_ptr<Instruction>& instr)
    return false;
 }
 
+/* v_xor(a, s_not(b)) -> v_xnor(a, b)
+ * v_xor(a, v_not(b)) -> v_xnor(a, b)
+ */
+bool
+combine_xor_not(opt_ctx& ctx, aco_ptr<Instruction>& instr)
+{
+   if (instr->usesModifiers())
+      return false;
+
+   for (unsigned i = 0; i < 2; i++) {
+      Instruction* op_instr = follow_operand(ctx, instr->operands[i], true);
+      if (!op_instr ||
+          (op_instr->opcode != aco_opcode::v_not_b32 &&
+           op_instr->opcode != aco_opcode::s_not_b32) ||
+          op_instr->usesModifiers() || op_instr->operands[0].isLiteral())
+         continue;
+
+      instr->opcode = aco_opcode::v_xnor_b32;
+      instr->operands[i] = copy_operand(ctx, op_instr->operands[0]);
+      decrease_uses(ctx, op_instr);
+      if (instr->operands[0].isOfType(RegType::vgpr))
+         std::swap(instr->operands[0], instr->operands[1]);
+      if (!instr->operands[1].isOfType(RegType::vgpr))
+         to_VOP3(ctx, instr);
+
+      return true;
+   }
+
+   return false;
+}
+
+/* v_not(v_xor(a, b)) -> v_xnor(a, b) */
+bool
+combine_not_xor(opt_ctx& ctx, aco_ptr<Instruction>& instr)
+{
+   if (instr->usesModifiers())
+      return false;
+
+   Instruction* op_instr = follow_operand(ctx, instr->operands[0]);
+   if (!op_instr || op_instr->opcode != aco_opcode::v_xor_b32 || 
op_instr->isSDWA())
+      return false;
+
+   ctx.uses[instr->operands[0].tempId()]--;
+   std::swap(instr->definitions[0], op_instr->definitions[0]);
+   op_instr->opcode = aco_opcode::v_xnor_b32;
+
+   return true;
+}
+
 bool
 combine_minmax(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode opposite, 
aco_opcode op3src,
                aco_opcode minmax)
@@ -4467,7 +4517,10 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& 
instr)
                                 1 | 2)) {
       } else if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, 
aco_opcode::v_xor3_b32,
                                        "012", 1 | 2)) {
+      } else if (combine_xor_not(ctx, instr)) {
       }
+   } else if (instr->opcode == aco_opcode::v_not_b32 && ctx.program->gfx_level 
>= GFX10) {
+      combine_not_xor(ctx, instr);
    } else if (instr->opcode == aco_opcode::v_add_u16) {
       combine_three_valu_op(
          ctx, instr, aco_opcode::v_mul_lo_u16,

Reply via email to