Module: Mesa Branch: main Commit: 6eaf416f353310b106c12a928fd5099b105503cb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6eaf416f353310b106c12a928fd5099b105503cb
Author: Daniel Schürmann <[email protected]> Date: Mon Sep 4 10:07:27 2023 +0200 aco/insert_exec_mask: Simplify WQM handling (2/2) by calculating WQM requirements on demand. No fossil-db changes. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25038> --- src/amd/compiler/aco_insert_exec_mask.cpp | 90 ++++--------------------------- 1 file changed, 11 insertions(+), 79 deletions(-) diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index 6a31a01b48d..6053874308a 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -47,18 +47,6 @@ enum mask_type : uint8_t { mask_type_loop = 1 << 3, /* active lanes of a loop */ }; -struct wqm_ctx { - Program* program; - /* state for WQM propagation */ - std::set<unsigned> worklist; - std::vector<bool> branch_wqm; /* true if the branch condition in this block should be in wqm */ - wqm_ctx(Program* program_) : program(program_), branch_wqm(program->blocks.size()) - { - for (unsigned i = 0; i < program->blocks.size(); i++) - worklist.insert(i); - } -}; - struct loop_info { Block* loop_header; uint16_t num_exec_masks; @@ -74,8 +62,6 @@ struct loop_info { struct block_info { std::vector<std::pair<Operand, uint8_t>> exec; /* Vector of exec masks. Either a temporary or const -1. */ - std::vector<WQMState> instr_needs; - uint8_t block_needs; }; struct exec_ctx { @@ -108,70 +94,16 @@ needs_exact(aco_ptr<Instruction>& instr) } } -void -mark_block_wqm(wqm_ctx& ctx, unsigned block_idx) +WQMState +get_instr_needs(aco_ptr<Instruction>& instr) { - if (ctx.branch_wqm[block_idx]) - return; - - for (Block& block : ctx.program->blocks) { - if (block.index >= block_idx && block.kind & block_kind_top_level) - break; - ctx.branch_wqm[block.index] = true; - ctx.worklist.insert(block.index); - } -} - -void -get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block) -{ - block_info& info = exec_ctx.info[block->index]; - - std::vector<WQMState> instr_needs(block->instructions.size()); - - bool propagate_wqm = ctx.branch_wqm[block->index]; - for (int i = block->instructions.size() - 1; i >= 0; --i) { - aco_ptr<Instruction>& instr = block->instructions[i]; - - if (instr->opcode == aco_opcode::p_end_wqm) - propagate_wqm = true; + if (needs_exact(instr)) + return Exact; - bool pred_by_exec = needs_exec_mask(instr.get()) || - instr->opcode == aco_opcode::p_logical_end || instr->isBranch(); - - if (needs_exact(instr)) - instr_needs[i] = Exact; - else if (propagate_wqm && pred_by_exec) - instr_needs[i] = WQM; - else - instr_needs[i] = Unspecified; - - info.block_needs |= instr_needs[i]; - } - - info.instr_needs = instr_needs; - - /* for "if (<cond>) <wqm code>" or "while (<cond>) <wqm code>", - * <cond> should be computed in WQM */ - if (propagate_wqm) { - mark_block_wqm(ctx, block->index); - } -} - -void -calculate_wqm_needs(exec_ctx& exec_ctx) -{ - wqm_ctx ctx(exec_ctx.program); - - while (!ctx.worklist.empty()) { - unsigned block_index = *std::prev(ctx.worklist.end()); - ctx.worklist.erase(std::prev(ctx.worklist.end())); - - Block& block = exec_ctx.program->blocks[block_index]; - get_block_needs(ctx, exec_ctx, &block); - } + bool pred_by_exec = needs_exec_mask(instr.get()) || instr->opcode == aco_opcode::p_logical_end || + instr->isBranch(); - exec_ctx.handle_wqm = true; + return pred_by_exec ? WQM : Unspecified; } Operand @@ -542,13 +474,13 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio for (; idx < block->instructions.size(); idx++) { aco_ptr<Instruction> instr = std::move(block->instructions[idx]); - WQMState needs = ctx.handle_wqm ? ctx.info[block->index].instr_needs[idx] : Unspecified; + WQMState needs = ctx.handle_wqm ? get_instr_needs(instr) : Unspecified; if (needs == WQM && state != WQM) { transition_to_WQM(ctx, bld, block->index); state = WQM; } else if (needs == Exact) { - if (ctx.info[block->index].block_needs & WQM) + if (ctx.handle_wqm) handle_atomic_data(ctx, bld, block->index, instr); transition_to_Exact(ctx, bld, block->index); state = Exact; @@ -564,7 +496,7 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio current_exec = get_exec_op(ctx.info[block->index].exec[0].first); ctx.info[block->index].exec[0].first = Operand(bld.lm); state = Exact; - } else if (ctx.info[block->index].exec.size() >= 2 && needs == WQM) { + } else if (ctx.info[block->index].exec.size() >= 2 && ctx.handle_wqm) { /* Preserve the WQM mask */ ctx.info[block->index].exec[1].second &= ~mask_type_global; } @@ -904,7 +836,7 @@ insert_exec_mask(Program* program) exec_ctx ctx(program); if (program->needs_wqm && program->needs_exact) - calculate_wqm_needs(ctx); + ctx.handle_wqm = true; for (Block& block : program->blocks) process_block(ctx, &block);
