Module: Mesa
Branch: staging/20.3
Commit: 162a0678ac3e9ac8e90cf6fff8aef77d1311ea14
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=162a0678ac3e9ac8e90cf6fff8aef77d1311ea14

Author: Samuel Pitoiset <[email protected]>
Date:   Mon Nov  9 19:42:22 2020 +0100

aco: fix combining add/sub to b2i if a new dest needs to be allocated

The uses vector needs to be expanded to avoid out of bounds access
and to make sure the number of uses is initialized to 0.

This fixes combining more v_and(a, v_subbrev_co_u32).

fossilds-db (Vega10):
Totals from 4574 (3.28% of 139517) affected shaders:
SGPRs: 291625 -> 292217 (+0.20%); split: -0.01%, +0.21%
VGPRs: 276368 -> 276188 (-0.07%); split: -0.07%, +0.01%
SpillSGPRs: 455 -> 533 (+17.14%)
SpillVGPRs: 76 -> 78 (+2.63%)
CodeSize: 23327500 -> 23304152 (-0.10%); split: -0.17%, +0.07%
MaxWaves: 22044 -> 22066 (+0.10%)
Instrs: 4583064 -> 4576301 (-0.15%); split: -0.15%, +0.01%
Cycles: 47925276 -> 47871968 (-0.11%); split: -0.13%, +0.01%
VMEM: 1599363 -> 1597473 (-0.12%); split: +0.08%, -0.19%
SMEM: 331461 -> 331126 (-0.10%); split: +0.08%, -0.18%
VClause: 80639 -> 80696 (+0.07%); split: -0.02%, +0.09%
SClause: 155992 -> 155993 (+0.00%); split: -0.02%, +0.02%
Copies: 333482 -> 333318 (-0.05%); split: -0.12%, +0.07%
Branches: 70967 -> 70968 (+0.00%)
PreSGPRs: 187078 -> 187711 (+0.34%); split: -0.01%, +0.35%
PreVGPRs: 244918 -> 244785 (-0.05%)

Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Rhys Perry <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7513>
(cherry picked from commit ec347ee9bc41f99dc8e398c652d873cc192bc99c)

---

 .pick_status.json                         |  2 +-
 src/amd/compiler/aco_optimizer.cpp        | 13 ++++++++++---
 src/amd/compiler/tests/test_optimizer.cpp |  6 ++++++
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index c62c3a1c840..28987b29698 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -4756,7 +4756,7 @@
         "description": "aco: fix combining add/sub to b2i if a new dest needs 
to be allocated",
         "nominated": false,
         "nomination_type": null,
-        "resolution": 4,
+        "resolution": 1,
         "master_sha": null,
         "because_sha": null
     },
diff --git a/src/amd/compiler/aco_optimizer.cpp 
b/src/amd/compiler/aco_optimizer.cpp
index 2f582d69b12..8fcabf2bc6d 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -2278,9 +2278,16 @@ bool combine_add_sub_b2i(opt_ctx& ctx, 
aco_ptr<Instruction>& instr, aco_opcode n
          }
          ctx.uses[instr->operands[i].tempId()]--;
          new_instr->definitions[0] = instr->definitions[0];
-         new_instr->definitions[1] =
-            instr->definitions.size() == 2 ? instr->definitions[1] :
-            Definition(ctx.program->allocateTmp(ctx.program->lane_mask));
+         if (instr->definitions.size() == 2) {
+            new_instr->definitions[1] = instr->definitions[1];
+         } else {
+            new_instr->definitions[1] =
+               Definition(ctx.program->allocateTmp(ctx.program->lane_mask));
+            /* Make sure the uses vector is large enough and the number of
+             * uses properly initialized to 0.
+             */
+            ctx.uses.push_back(0);
+         }
          new_instr->definitions[1].setHint(vcc);
          new_instr->operands[0] = Operand(0u);
          new_instr->operands[1] = instr->operands[!i];
diff --git a/src/amd/compiler/tests/test_optimizer.cpp 
b/src/amd/compiler/tests/test_optimizer.cpp
index 7c6c98ddbab..c6bff377ca3 100644
--- a/src/amd/compiler/tests/test_optimizer.cpp
+++ b/src/amd/compiler/tests/test_optimizer.cpp
@@ -119,6 +119,12 @@ BEGIN_TEST(optimize.cndmask)
       Temp xor_a = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), inputs[0], 
subbrev);
       writeout(3, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), xor_a, 
subbrev));
 
+      //! v1: %res4 = v_cndmask_b32 0, %a, %c
+      //! p_unit_test 4, %res4
+      Temp cndmask = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), 
Operand(0u), Operand(1u), Operand(inputs[2]));
+      Temp sub = bld.vsub32(bld.def(v1), Operand(0u), cndmask);
+      writeout(4, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), 
Operand(inputs[0]), sub));
+
       finish_opt_test();
    }
 END_TEST

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to