Module: Mesa Branch: master Commit: 76106301248b94b995a7600aa9b99360ce4e91f7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=76106301248b94b995a7600aa9b99360ce4e91f7
Author: Rhys Perry <[email protected]> Date: Thu Nov 26 14:30:32 2020 +0000 aco: coalesce constant copies fossil-db (Navi): Totals from 20108 (14.49% of 138791) affected shaders: CodeSize: 117835376 -> 117830512 (-0.00%) Instrs: 22813722 -> 22733245 (-0.35%) Cycles: 1009135584 -> 1008543628 (-0.06%) VMEM: 5401668 -> 5391247 (-0.19%) SMEM: 1286824 -> 1283663 (-0.25%) Copies: 1742154 -> 1661686 (-4.62%) Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7798> --- src/amd/compiler/aco_lower_to_hw_instr.cpp | 72 +++++++++++++++++++----------- 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index c42e2bb671e..a5b939e905b 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -1331,6 +1331,51 @@ void do_pack_2x16(lower_context *ctx, Builder& bld, Definition def, Operand lo, bld.vop3(aco_opcode::v_alignbyte_b32, def, hi, lo, Operand(2u)); } +void try_coalesce_copies(lower_context *ctx, + std::map<PhysReg, copy_operation>& copy_map, + copy_operation& copy) +{ + // TODO try more relaxed alignment for subdword copies + unsigned next_def_align = util_next_power_of_two(copy.bytes + 1); + unsigned next_op_align = next_def_align; + if (copy.def.regClass().type() == RegType::vgpr) + next_def_align = MIN2(next_def_align, 4); + if (copy.op.regClass().type() == RegType::vgpr) + next_op_align = MIN2(next_op_align, 4); + + if (copy.bytes >= 8 || copy.def.physReg().reg_b % next_def_align || + (!copy.op.isConstant() && copy.op.physReg().reg_b % next_op_align)) + return; + + auto other = copy_map.find(copy.def.physReg().advance(copy.bytes)); + if (other == copy_map.end() || copy.bytes + other->second.bytes > 8 || + copy.op.isConstant() != other->second.op.isConstant()) + return; + + /* don't create 64-bit copies before GFX10 */ + if (copy.bytes >= 4 && copy.def.regClass().type() == RegType::vgpr && + ctx->program->chip_class < GFX10) + return; + + unsigned new_size = copy.bytes + other->second.bytes; + if (copy.op.isConstant()) { + uint64_t val = copy.op.constantValue64() | + (other->second.op.constantValue64() << (copy.bytes * 8u)); + if (!Operand::is_constant_representable(val, copy.bytes + other->second.bytes, true, + copy.def.regClass().type() == RegType::vgpr)) + return; + copy.op = Operand::get_const(ctx->program->chip_class, val, new_size); + } else { + if (other->second.op.physReg() != copy.op.physReg().advance(copy.bytes)) + return; + copy.op = Operand(copy.op.physReg(), RegClass::get(copy.op.regClass().type(), new_size)); + } + + copy.bytes = new_size; + copy.def = Definition(copy.def.physReg(), RegClass::get(copy.def.regClass().type(), copy.bytes)); + copy_map.erase(other); +} + void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* ctx, chip_class chip_class, Pseudo_instruction *pi) { Builder bld(ctx->program, &ctx->instructions); @@ -1368,32 +1413,7 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* it->second.bytes = 8; } - /* try to coalesce copies */ - unsigned next_def_align = util_next_power_of_two(it->second.bytes + 1); - unsigned next_op_align = next_def_align; - if (it->second.def.regClass().type() == RegType::vgpr) - next_def_align = MIN2(next_def_align, 4); - if (it->second.op.regClass().type() == RegType::vgpr) - next_op_align = MIN2(next_op_align, 4); - - if (it->second.bytes < 8 && !it->second.op.isConstant() && - it->first.reg_b % next_def_align == 0 && - it->second.op.physReg().reg_b % next_op_align == 0) { - // TODO try more relaxed alignment for subdword copies - PhysReg other_def_reg = it->first; - other_def_reg.reg_b += it->second.bytes; - PhysReg other_op_reg = it->second.op.physReg(); - other_op_reg.reg_b += it->second.bytes; - std::map<PhysReg, copy_operation>::iterator other = copy_map.find(other_def_reg); - if (other != copy_map.end() && - other->second.op.physReg() == other_op_reg && - it->second.bytes + other->second.bytes <= 8) { - it->second.bytes += other->second.bytes; - it->second.def = Definition(it->first, RegClass::get(it->second.def.regClass().type(), it->second.bytes)); - it->second.op = Operand(it->second.op.physReg(), RegClass::get(it->second.op.regClass().type(), it->second.bytes)); - copy_map.erase(other); - } - } + try_coalesce_copies(ctx, copy_map, it->second); /* check if the definition reg is used by another copy operation */ for (std::pair<const PhysReg, copy_operation>& copy : copy_map) { _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
