Module: Mesa
Branch: main
Commit: 09413ff7458e12c5cbce20633d16bfd618cb5512
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=09413ff7458e12c5cbce20633d16bfd618cb5512

Author: Daniel Schürmann <dan...@schuermann.dev>
Date:   Sat Jan  6 16:46:54 2024 +0100

aco/insert_exec_mask: only create loop phis for exec mask if necessary

Totals from 195 (0.25% of 79242) affected shaders: (GFX11)

Instrs: 476457 -> 476031 (-0.09%); split: -0.23%, +0.14%
CodeSize: 2453964 -> 2452108 (-0.08%); split: -0.23%, +0.16%
SpillSGPRs: 944 -> 913 (-3.28%); split: -3.39%, +0.11%
SpillVGPRs: 838 -> 835 (-0.36%); split: -0.95%, +0.60%
Latency: 10811026 -> 10810125 (-0.01%); split: -0.08%, +0.07%
InvThroughput: 2276677 -> 2276698 (+0.00%); split: -0.12%, +0.12%
VClause: 9223 -> 9233 (+0.11%); split: -0.10%, +0.21%
SClause: 9025 -> 9005 (-0.22%); split: -0.38%, +0.16%
Copies: 67419 -> 67382 (-0.05%); split: -0.97%, +0.92%
PreSGPRs: 10830 -> 10668 (-1.50%)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26937>

---

 src/amd/compiler/aco_insert_exec_mask.cpp | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp 
b/src/amd/compiler/aco_insert_exec_mask.cpp
index c3a8f09f018..21d38b81cea 100644
--- a/src/amd/compiler/aco_insert_exec_mask.cpp
+++ b/src/amd/compiler/aco_insert_exec_mask.cpp
@@ -238,7 +238,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, 
std::vector<aco_ptr<Instruction>>
       assert(ctx.info[idx].exec.size() == info.num_exec_masks);
 
       /* create ssa names for outer exec masks */
-      if (info.has_discard) {
+      if (info.has_discard && preds.size() > 1) {
          aco_ptr<Pseudo_instruction> phi;
          for (int i = 0; i < info.num_exec_masks - 1; i++) {
             
phi.reset(create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi,
@@ -249,25 +249,21 @@ add_coupling_code(exec_ctx& ctx, Block* block, 
std::vector<aco_ptr<Instruction>>
          }
       }
 
-      /* create ssa name for loop active mask */
-      aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(
-         aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)};
-      if (info.has_divergent_continue)
-         phi->definitions[0] = bld.def(bld.lm);
-      else
-         phi->definitions[0] = Definition(exec, bld.lm);
-      phi->operands[0] = get_exec_op(ctx.info[preds[0]].exec.back().first);
-      ctx.info[idx].exec.back().first = bld.insert(std::move(phi));
       ctx.info[idx].exec.back().second |= mask_type_loop;
 
-      /* create a parallelcopy to move the active mask to exec */
       if (info.has_divergent_continue) {
+         /* create ssa name for loop active mask */
+         aco_ptr<Pseudo_instruction> 
phi{create_instruction<Pseudo_instruction>(
+            aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)};
+         phi->definitions[0] = bld.def(bld.lm);
+         phi->operands[0] = get_exec_op(ctx.info[preds[0]].exec.back().first);
+         ctx.info[idx].exec.back().first = bld.insert(std::move(phi));
+
          restore_exec = true;
          uint8_t mask_type = ctx.info[idx].exec.back().second & (mask_type_wqm 
| mask_type_exact);
          ctx.info[idx].exec.emplace_back(ctx.info[idx].exec.back().first, 
mask_type);
       }
 
-   /* loop exit block */
    } else if (block->kind & block_kind_loop_exit) {
       Block* header = ctx.loop.back().loop_header;
       loop_info& info = ctx.loop.back();
@@ -278,7 +274,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, 
std::vector<aco_ptr<Instruction>>
       /* fill the loop header phis */
       std::vector<unsigned>& header_preds = header->linear_preds;
       int instr_idx = 0;
-      if (info.has_discard) {
+      if (info.has_discard && header_preds.size() > 1) {
          while (instr_idx < info.num_exec_masks - 1) {
             aco_ptr<Instruction>& phi = header->instructions[instr_idx];
             assert(phi->opcode == aco_opcode::p_linear_phi);
@@ -288,7 +284,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, 
std::vector<aco_ptr<Instruction>>
          }
       }
 
-      {
+      if (info.has_divergent_continue) {
          aco_ptr<Instruction>& phi = header->instructions[instr_idx++];
          assert(phi->opcode == aco_opcode::p_linear_phi);
          for (unsigned i = 1; i < phi->operands.size(); i++)

Reply via email to