Module: Mesa Branch: main Commit: 09413ff7458e12c5cbce20633d16bfd618cb5512 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=09413ff7458e12c5cbce20633d16bfd618cb5512
Author: Daniel Schürmann <dan...@schuermann.dev> Date: Sat Jan 6 16:46:54 2024 +0100 aco/insert_exec_mask: only create loop phis for exec mask if necessary Totals from 195 (0.25% of 79242) affected shaders: (GFX11) Instrs: 476457 -> 476031 (-0.09%); split: -0.23%, +0.14% CodeSize: 2453964 -> 2452108 (-0.08%); split: -0.23%, +0.16% SpillSGPRs: 944 -> 913 (-3.28%); split: -3.39%, +0.11% SpillVGPRs: 838 -> 835 (-0.36%); split: -0.95%, +0.60% Latency: 10811026 -> 10810125 (-0.01%); split: -0.08%, +0.07% InvThroughput: 2276677 -> 2276698 (+0.00%); split: -0.12%, +0.12% VClause: 9223 -> 9233 (+0.11%); split: -0.10%, +0.21% SClause: 9025 -> 9005 (-0.22%); split: -0.38%, +0.16% Copies: 67419 -> 67382 (-0.05%); split: -0.97%, +0.92% PreSGPRs: 10830 -> 10668 (-1.50%) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26937> --- src/amd/compiler/aco_insert_exec_mask.cpp | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index c3a8f09f018..21d38b81cea 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -238,7 +238,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>> assert(ctx.info[idx].exec.size() == info.num_exec_masks); /* create ssa names for outer exec masks */ - if (info.has_discard) { + if (info.has_discard && preds.size() > 1) { aco_ptr<Pseudo_instruction> phi; for (int i = 0; i < info.num_exec_masks - 1; i++) { phi.reset(create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, @@ -249,25 +249,21 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>> } } - /* create ssa name for loop active mask */ - aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>( - aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)}; - if (info.has_divergent_continue) - phi->definitions[0] = bld.def(bld.lm); - else - phi->definitions[0] = Definition(exec, bld.lm); - phi->operands[0] = get_exec_op(ctx.info[preds[0]].exec.back().first); - ctx.info[idx].exec.back().first = bld.insert(std::move(phi)); ctx.info[idx].exec.back().second |= mask_type_loop; - /* create a parallelcopy to move the active mask to exec */ if (info.has_divergent_continue) { + /* create ssa name for loop active mask */ + aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>( + aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)}; + phi->definitions[0] = bld.def(bld.lm); + phi->operands[0] = get_exec_op(ctx.info[preds[0]].exec.back().first); + ctx.info[idx].exec.back().first = bld.insert(std::move(phi)); + restore_exec = true; uint8_t mask_type = ctx.info[idx].exec.back().second & (mask_type_wqm | mask_type_exact); ctx.info[idx].exec.emplace_back(ctx.info[idx].exec.back().first, mask_type); } - /* loop exit block */ } else if (block->kind & block_kind_loop_exit) { Block* header = ctx.loop.back().loop_header; loop_info& info = ctx.loop.back(); @@ -278,7 +274,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>> /* fill the loop header phis */ std::vector<unsigned>& header_preds = header->linear_preds; int instr_idx = 0; - if (info.has_discard) { + if (info.has_discard && header_preds.size() > 1) { while (instr_idx < info.num_exec_masks - 1) { aco_ptr<Instruction>& phi = header->instructions[instr_idx]; assert(phi->opcode == aco_opcode::p_linear_phi); @@ -288,7 +284,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>> } } - { + if (info.has_divergent_continue) { aco_ptr<Instruction>& phi = header->instructions[instr_idx++]; assert(phi->opcode == aco_opcode::p_linear_phi); for (unsigned i = 1; i < phi->operands.size(); i++)