Module: Mesa Branch: master Commit: 74e2e9b682afe9f0e49e28facb2ecc387a8b4a74 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=74e2e9b682afe9f0e49e28facb2ecc387a8b4a74
Author: Rhys Perry <[email protected]> Date: Thu Oct 15 19:39:37 2020 +0100 aco: don't use bld.copy() in handle_operands() No fossil-db changes. Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Timur Kristóf <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7216> --- src/amd/compiler/aco_builder_h.py | 1 + src/amd/compiler/aco_lower_to_hw_instr.cpp | 100 +++++++++++++++++++++++------ 2 files changed, 80 insertions(+), 21 deletions(-) diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py index 9f708c4adfc..bcfdb91aecf 100644 --- a/src/amd/compiler/aco_builder_h.py +++ b/src/amd/compiler/aco_builder_h.py @@ -559,6 +559,7 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod ("barrier", [Format.PSEUDO_BARRIER], 'Pseudo_barrier_instruction', [(0, 0)]), ("reduction", [Format.PSEUDO_REDUCTION], 'Pseudo_reduction_instruction', [(3, 2)]), ("vop1", [Format.VOP1], 'VOP1_instruction', [(0, 0), (1, 1), (2, 2)]), + ("vop1_sdwa", [Format.VOP1, Format.SDWA], 'SDWA_instruction', [(1, 1)]), ("vop2", [Format.VOP2], 'VOP2_instruction', itertools.product([1, 2], [2, 3])), ("vop2_sdwa", [Format.VOP2, Format.SDWA], 'SDWA_instruction', itertools.product([1, 2], [2, 3])), ("vopc", [Format.VOPC], 'VOPC_instruction', itertools.product([1, 2], [2])), diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 3c3d67095e1..3c4898e0d59 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -980,23 +980,73 @@ uint32_t get_intersection_mask(int a_start, int a_size, return u_bit_consecutive(intersection_start, intersection_end - intersection_start) & mask; } -void copy_16bit_literal(lower_context *ctx, Builder& bld, Definition def, Operand op) +void copy_constant(lower_context *ctx, Builder& bld, Definition dst, Operand op) { - if (ctx->program->chip_class < GFX10 || !(ctx->block->fp_mode.denorm16_64 & fp_denorm_keep_in)) { - unsigned offset = def.physReg().byte() * 8u; - def = Definition(PhysReg(def.physReg().reg()), v1); - Operand def_op(def.physReg(), v1); - bld.vop2(aco_opcode::v_and_b32, def, Operand(~(0xffffu << offset)), def_op); - bld.vop2(aco_opcode::v_or_b32, def, Operand(op.constantValue() << offset), def_op); - } else if (def.physReg().byte() == 2) { - Operand def_lo(def.physReg().advance(-2), v2b); - Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, def, def_lo, op); - static_cast<VOP3A_instruction*>(instr)->opsel = 0; + assert(op.bytes() == dst.bytes()); + + if (dst.regClass() == s1 && op.isLiteral()) { + uint32_t imm = op.constantValue(); + if (imm >= 0xffff8000 || imm <= 0x7fff) { + bld.sopk(aco_opcode::s_movk_i32, dst, imm & 0xFFFFu); + return; + } else if (util_bitreverse(imm) <= 64 || util_bitreverse(imm) >= 0xFFFFFFF0) { + uint32_t rev = util_bitreverse(imm); + bld.sop1(aco_opcode::s_brev_b32, dst, Operand(rev)); + return; + } else if (imm != 0) { + unsigned start = (ffs(imm) - 1) & 0x1f; + unsigned size = util_bitcount(imm) & 0x1f; + if ((((1u << size) - 1u) << start) == imm) { + bld.sop2(aco_opcode::s_bfm_b32, dst, Operand(size), Operand(start)); + return; + } + } + } + + if (dst.regClass() == s1) { + if (op.constantEquals(0x3e22f983) && ctx->program->chip_class >= GFX8) + op.setFixed(PhysReg{248}); /* it can be an inline constant on GFX8+ */ + bld.sop1(aco_opcode::s_mov_b32, dst, op); + } else if (dst.regClass() == s2) { + bld.sop1(aco_opcode::s_mov_b64, dst, op); + } else if (dst.regClass() == v1) { + bld.vop1(aco_opcode::v_mov_b32, dst, op); + } else if (dst.regClass() == v1b) { + assert(ctx->program->chip_class >= GFX8); + uint8_t val = op.constantValue(); + Operand op32((uint32_t)val | (val & 0x80u ? 0xffffff00u : 0u)); + aco_ptr<SDWA_instruction> sdwa; + if (op32.isLiteral()) { + uint32_t a = (uint32_t)int8_mul_table[val * 2]; + uint32_t b = (uint32_t)int8_mul_table[val * 2 + 1]; + bld.vop2_sdwa(aco_opcode::v_mul_u32_u24, dst, + Operand(a | (a & 0x80u ? 0xffffff00u : 0x0u)), + Operand(b | (b & 0x80u ? 0xffffff00u : 0x0u))); + } else { + bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op32); + } + } else if (dst.regClass() == v2b && op.isConstant() && !op.isLiteral()) { + assert(ctx->program->chip_class >= GFX8); + bld.vop2_sdwa(aco_opcode::v_add_f16, dst, op, Operand(0u)); + } else if (dst.regClass() == v2b && op.isLiteral()) { + if (ctx->program->chip_class < GFX10 || !(ctx->block->fp_mode.denorm16_64 & fp_denorm_keep_in)) { + unsigned offset = dst.physReg().byte() * 8u; + dst = Definition(PhysReg(dst.physReg().reg()), v1); + Operand def_op(dst.physReg(), v1); + bld.vop2(aco_opcode::v_and_b32, dst, Operand(~(0xffffu << offset)), def_op); + bld.vop2(aco_opcode::v_or_b32, dst, Operand(op.constantValue() << offset), def_op); + } else if (dst.physReg().byte() == 2) { + Operand def_lo(dst.physReg().advance(-2), v2b); + Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, def_lo, op); + static_cast<VOP3A_instruction*>(instr)->opsel = 0; + } else { + assert(dst.physReg().byte() == 0); + Operand def_hi(dst.physReg().advance(2), v2b); + Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, op, def_hi); + static_cast<VOP3A_instruction*>(instr)->opsel = 2; + } } else { - assert(def.physReg().byte() == 0); - Operand def_hi(def.physReg().advance(2), v2b); - Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, def, op, def_hi); - static_cast<VOP3A_instruction*>(instr)->opsel = 2; + unreachable("unsupported copy"); } } @@ -1048,10 +1098,18 @@ bool do_copy(lower_context* ctx, Builder& bld, const copy_operation& copy, bool } else { bld.vop1(aco_opcode::v_mov_b32, def, op); } - } else if (def.regClass() == v2b && op.isLiteral()) { - copy_16bit_literal(ctx, bld, def, op); + } else if (op.isConstant()) { + copy_constant(ctx, bld, def, op); + } else if (def.regClass() == v1) { + bld.vop1(aco_opcode::v_mov_b32, def, op); + } else if (def.regClass() == s1) { + bld.sop1(aco_opcode::s_mov_b32, def, op); + } else if (def.regClass() == s2) { + bld.sop1(aco_opcode::s_mov_b64, def, op); + } else if (def.regClass().is_subdword()) { + bld.vop1_sdwa(aco_opcode::v_mov_b32, def, op); } else { - bld.copy(def, op); + unreachable("unsupported copy"); } did_copy = true; @@ -1157,7 +1215,7 @@ void do_swap(lower_context *ctx, Builder& bld, const copy_operation& copy, bool void do_pack_2x16(lower_context *ctx, Builder& bld, Definition def, Operand lo, Operand hi) { if (lo.isConstant() && hi.isConstant()) { - bld.copy(def, Operand(lo.constantValue() | (hi.constantValue() << 16))); + copy_constant(ctx, bld, def, Operand(lo.constantValue() | (hi.constantValue() << 16))); return; } @@ -1225,9 +1283,9 @@ void do_pack_2x16(lower_context *ctx, Builder& bld, Definition def, Operand lo, if (ctx->program->chip_class >= GFX8) { /* either hi or lo are already placed correctly */ if (lo.physReg().reg() == def.physReg().reg()) - bld.copy(def_hi, hi); + bld.vop1_sdwa(aco_opcode::v_mov_b32, def_hi, hi); else - bld.copy(def_lo, lo); + bld.vop1_sdwa(aco_opcode::v_mov_b32, def_lo, lo); return; } _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
