Module: Mesa Branch: main Commit: f3f2311d69b9d172b9ad7996e795e6d3492294f2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f3f2311d69b9d172b9ad7996e795e6d3492294f2
Author: Qiang Yu <[email protected]> Date: Sun Sep 10 16:46:14 2023 +0800 aco: extend max operands in a instruction to 128 We get more than 64 operands in p_end_with_regs when radeonsi. Reviewed-by: Daniel Schürmann <[email protected]> Signed-off-by: Qiang Yu <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25631> --- src/amd/compiler/aco_register_allocation.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 445d723feb7..83960cd5d77 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -24,6 +24,7 @@ #include "aco_ir.h" +#include "util/bitset.h" #include "util/enum_operators.h" #include <algorithm> @@ -1971,11 +1972,12 @@ handle_fixed_operands(ra_ctx& ctx, RegisterFile& register_file, std::vector<std::pair<Operand, Definition>>& parallelcopy, aco_ptr<Instruction>& instr) { - assert(instr->operands.size() <= 64); + assert(instr->operands.size() <= 128); RegisterFile tmp_file(register_file); - uint64_t mask = 0; + BITSET_DECLARE(mask, 128) = {0}; + for (unsigned i = 0; i < instr->operands.size(); i++) { Operand& op = instr->operands[i]; @@ -1990,8 +1992,9 @@ handle_fixed_operands(ra_ctx& ctx, RegisterFile& register_file, continue; } + unsigned j; bool found = false; - u_foreach_bit64 (j, mask) { + BITSET_FOREACH_SET (j, mask, i) { if (instr->operands[j].tempId() == op.tempId() && instr->operands[j].physReg() == op.physReg()) { found = true; @@ -2004,18 +2007,19 @@ handle_fixed_operands(ra_ctx& ctx, RegisterFile& register_file, /* clear from register_file so fixed operands are not collected be collect_vars() */ tmp_file.clear(src, op.regClass()); // TODO: try to avoid moving block vars to src - mask |= (uint64_t)1 << i; + BITSET_SET(mask, i); Operand pc_op(instr->operands[i].getTemp(), src); Definition pc_def = Definition(op.physReg(), pc_op.regClass()); parallelcopy.emplace_back(pc_op, pc_def); } - if (!mask) + if (BITSET_IS_EMPTY(mask)) return; + unsigned i; std::vector<unsigned> blocking_vars; - u_foreach_bit64 (i, mask) { + BITSET_FOREACH_SET (i, mask, instr->operands.size()) { Operand& op = instr->operands[i]; PhysRegInterval target{op.physReg(), op.size()}; std::vector<unsigned> blocking_vars2 = collect_vars(ctx, tmp_file, target);
