Module: Mesa Branch: master Commit: 6b538506f2ae77cb7dd54ee2768946c3155ba529 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6b538506f2ae77cb7dd54ee2768946c3155ba529
Author: Tony Wasserka <[email protected]> Date: Wed Dec 2 18:35:55 2020 +0100 aco/ra: Fix register allocation for subdword operands ACO attempts to store the output of an instruction in the same register occupied by its operands where possible. Importantly this only works if the operands are large enough to store the result register size. The code failed to consider subdword operands when checking for this, causing entire register slots to be freed up even though subdword parts were still used. In Mafia 3, this affected the following code: v2b: %363:v[2][0:16], v2b: %362:v[2][16:32] = p_split_vector %360:v[2] v1: %116:v[2] = v_cvt_f32_f16 %362:v[2][16:32] v1: %117:v[2] = v_cvt_f32_f16 %363:v[2][0:16] where v[2] is allocated to %116 even though its original lower 16 bits are still used in the instruction after. Reviewed-by: Daniel Schürmann <[email protected]> Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3717 Fixes: 031edbc4a54d5685b05e244f8aa1e094ec246eb5 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7461> --- src/amd/compiler/aco_register_allocation.cpp | 31 +++++++++++++++++----------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index dd59239e29b..a75de106955 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -158,6 +158,7 @@ public: return res; } + /* Returns true if any of the bytes in the given range are allocated or blocked */ bool test(PhysReg start, unsigned num_bytes) { for (PhysReg i = start; i.reg_b < start.reg_b + num_bytes; i = PhysReg(i + 1)) { if (regs[i] & 0x0FFFFFFF) @@ -968,15 +969,21 @@ std::pair<PhysReg, bool> get_reg_impl(ra_ctx& ctx, /* mark and count killed operands */ unsigned killed_ops = 0; + std::bitset<256> is_killed_operand; /* per-register */ for (unsigned j = 0; !is_phi(instr) && j < instr->operands.size(); j++) { - if (instr->operands[j].isTemp() && - instr->operands[j].isFirstKillBeforeDef() && - instr->operands[j].physReg() >= lb && - instr->operands[j].physReg() < ub && - !reg_file.test(instr->operands[j].physReg(), instr->operands[j].bytes())) { - assert(instr->operands[j].isFixed()); - tmp_file.block(instr->operands[j].physReg(), instr->operands[j].regClass()); - killed_ops += instr->operands[j].getTemp().size(); + Operand& op = instr->operands[j]; + if (op.isTemp() && + op.isFirstKillBeforeDef() && + op.physReg() >= lb && + op.physReg() < ub && + !reg_file.test(PhysReg{op.physReg().reg()}, align(op.bytes() + op.physReg().byte(), 4))) { + assert(op.isFixed()); + + for (unsigned i = 0; i < op.size(); ++i) { + is_killed_operand[(op.physReg() & 0xff) + i] = true; + } + + killed_ops += op.getTemp().size(); } } @@ -1015,11 +1022,8 @@ std::pair<PhysReg, bool> get_reg_impl(ra_ctx& ctx, bool found = true; bool aligned = rc == RegClass::v4 && reg_lo % 4 == 0; for (unsigned j = reg_lo; found && j <= reg_hi; j++) { - if (tmp_file[j] == 0 || tmp_file[j] == last_var) - continue; - /* dead operands effectively reduce the number of estimated moves */ - if (tmp_file.is_blocked(PhysReg{j})) { + if (is_killed_operand[j & 0xFF]) { if (remaining_op_moves) { k--; remaining_op_moves--; @@ -1027,6 +1031,9 @@ std::pair<PhysReg, bool> get_reg_impl(ra_ctx& ctx, continue; } + if (tmp_file[j] == 0 || tmp_file[j] == last_var) + continue; + if (tmp_file[j] == 0xF0000000) { k += 1; n++; _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
