Module: Mesa Branch: main Commit: 94789018245a5e3c4c03ea48cb4719ed08c60631 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=94789018245a5e3c4c03ea48cb4719ed08c60631
Author: Timur Kristóf <[email protected]> Date: Wed Jun 23 18:28:18 2021 +0200 aco: Implement integer conversions using p_extract. Fossil DB stats on Sienna Cichlid: Totals from 563 (0.44% of 128647) affected shaders: SpillSGPRs: 1381 -> 1382 (+0.07%) SpillVGPRs: 1606 -> 1552 (-3.36%) CodeSize: 2474724 -> 2446612 (-1.14%); split: -1.15%, +0.02% Scratch: 181248 -> 180224 (-0.56%) Instrs: 440973 -> 435091 (-1.33%); split: -1.35%, +0.01% Latency: 9123609 -> 8517830 (-6.64%); split: -6.66%, +0.02% InvThroughput: 3685256 -> 3383293 (-8.19%); split: -8.22%, +0.02% VClause: 8425 -> 8372 (-0.63%) Copies: 66553 -> 66681 (+0.19%); split: -0.49%, +0.68% Branches: 13824 -> 13825 (+0.01%); split: -0.01%, +0.01% PreSGPRs: 21816 -> 21824 (+0.04%) Fossil DB stats on Sienna Cichlid with NGGC on: Totals from 58802 (45.71% of 128647) affected shaders: SpillSGPRs: 6541 -> 6542 (+0.02%) SpillVGPRs: 1606 -> 1552 (-3.36%) CodeSize: 162976608 -> 162244340 (-0.45%); split: -0.45%, +0.00% Scratch: 181248 -> 180224 (-0.56%) Instrs: 31163521 -> 31098078 (-0.21%); split: -0.21%, +0.00% Latency: 146893569 -> 144920070 (-1.34%); split: -1.34%, +0.00% InvThroughput: 25384324 -> 25035940 (-1.37%); split: -1.38%, +0.00% VClause: 552310 -> 552257 (-0.01%) Copies: 3356856 -> 3356984 (+0.00%); split: -0.01%, +0.01% Branches: 1237314 -> 1237315 (+0.00%); split: -0.00%, +0.00% PreSGPRs: 2185339 -> 2185347 (+0.00%) Signed-off-by: Timur Kristóf <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11560> --- src/amd/compiler/aco_instruction_selection.cpp | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 9dc15e3685a..89b4dc7b170 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -635,24 +635,10 @@ convert_int(isel_context* ctx, Builder& bld, Temp src, unsigned src_bits, unsign assert(src_bits < 32); bld.pseudo(aco_opcode::p_extract, Definition(tmp), bld.def(s1, scc), src, Operand::zero(), Operand::c32(src_bits), Operand::c32((unsigned)sign_extend)); - } else if (ctx->options->chip_class >= GFX8) { - assert(src_bits < 32); - assert(src_bits != 8 || src.regClass() == v1b); - assert(src_bits != 16 || src.regClass() == v2b); - assert(dst_bits >= 16); - aco_ptr<SDWA_instruction> sdwa{ - create_instruction<SDWA_instruction>(aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)}; - sdwa->operands[0] = Operand(src); - sdwa->definitions[0] = Definition(tmp); - sdwa->sel[0] = SubdwordSel(src_bits / 8, 0, sign_extend); - sdwa->dst_sel = tmp.bytes() == 2 ? SubdwordSel::uword : SubdwordSel::dword; - bld.insert(std::move(sdwa)); } else { assert(src_bits < 32); - assert(ctx->options->chip_class == GFX6 || ctx->options->chip_class == GFX7); - aco_opcode opcode = sign_extend ? aco_opcode::v_bfe_i32 : aco_opcode::v_bfe_u32; - bld.vop3(opcode, Definition(tmp), src, Operand::zero(), - Operand::c32(src_bits == 8 ? 8u : 16u)); + bld.pseudo(aco_opcode::p_extract, Definition(tmp), src, Operand::zero(), Operand::c32(src_bits), + Operand::c32((unsigned)sign_extend)); } if (dst_bits == 64) {
