Module: Mesa
Branch: main
Commit: 94789018245a5e3c4c03ea48cb4719ed08c60631
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=94789018245a5e3c4c03ea48cb4719ed08c60631

Author: Timur Kristóf <[email protected]>
Date:   Wed Jun 23 18:28:18 2021 +0200

aco: Implement integer conversions using p_extract.

Fossil DB stats on Sienna Cichlid:

Totals from 563 (0.44% of 128647) affected shaders:
SpillSGPRs: 1381 -> 1382 (+0.07%)
SpillVGPRs: 1606 -> 1552 (-3.36%)
CodeSize: 2474724 -> 2446612 (-1.14%); split: -1.15%, +0.02%
Scratch: 181248 -> 180224 (-0.56%)
Instrs: 440973 -> 435091 (-1.33%); split: -1.35%, +0.01%
Latency: 9123609 -> 8517830 (-6.64%); split: -6.66%, +0.02%
InvThroughput: 3685256 -> 3383293 (-8.19%); split: -8.22%, +0.02%
VClause: 8425 -> 8372 (-0.63%)
Copies: 66553 -> 66681 (+0.19%); split: -0.49%, +0.68%
Branches: 13824 -> 13825 (+0.01%); split: -0.01%, +0.01%
PreSGPRs: 21816 -> 21824 (+0.04%)

Fossil DB stats on Sienna Cichlid with NGGC on:

Totals from 58802 (45.71% of 128647) affected shaders:
SpillSGPRs: 6541 -> 6542 (+0.02%)
SpillVGPRs: 1606 -> 1552 (-3.36%)
CodeSize: 162976608 -> 162244340 (-0.45%); split: -0.45%, +0.00%
Scratch: 181248 -> 180224 (-0.56%)
Instrs: 31163521 -> 31098078 (-0.21%); split: -0.21%, +0.00%
Latency: 146893569 -> 144920070 (-1.34%); split: -1.34%, +0.00%
InvThroughput: 25384324 -> 25035940 (-1.37%); split: -1.38%, +0.00%
VClause: 552310 -> 552257 (-0.01%)
Copies: 3356856 -> 3356984 (+0.00%); split: -0.01%, +0.01%
Branches: 1237314 -> 1237315 (+0.00%); split: -0.00%, +0.00%
PreSGPRs: 2185339 -> 2185347 (+0.00%)

Signed-off-by: Timur Kristóf <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11560>

---

 src/amd/compiler/aco_instruction_selection.cpp | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp 
b/src/amd/compiler/aco_instruction_selection.cpp
index 9dc15e3685a..89b4dc7b170 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -635,24 +635,10 @@ convert_int(isel_context* ctx, Builder& bld, Temp src, 
unsigned src_bits, unsign
       assert(src_bits < 32);
       bld.pseudo(aco_opcode::p_extract, Definition(tmp), bld.def(s1, scc), 
src, Operand::zero(),
                  Operand::c32(src_bits), Operand::c32((unsigned)sign_extend));
-   } else if (ctx->options->chip_class >= GFX8) {
-      assert(src_bits < 32);
-      assert(src_bits != 8 || src.regClass() == v1b);
-      assert(src_bits != 16 || src.regClass() == v2b);
-      assert(dst_bits >= 16);
-      aco_ptr<SDWA_instruction> sdwa{
-         create_instruction<SDWA_instruction>(aco_opcode::v_mov_b32, 
asSDWA(Format::VOP1), 1, 1)};
-      sdwa->operands[0] = Operand(src);
-      sdwa->definitions[0] = Definition(tmp);
-      sdwa->sel[0] = SubdwordSel(src_bits / 8, 0, sign_extend);
-      sdwa->dst_sel = tmp.bytes() == 2 ? SubdwordSel::uword : 
SubdwordSel::dword;
-      bld.insert(std::move(sdwa));
    } else {
       assert(src_bits < 32);
-      assert(ctx->options->chip_class == GFX6 || ctx->options->chip_class == 
GFX7);
-      aco_opcode opcode = sign_extend ? aco_opcode::v_bfe_i32 : 
aco_opcode::v_bfe_u32;
-      bld.vop3(opcode, Definition(tmp), src, Operand::zero(),
-               Operand::c32(src_bits == 8 ? 8u : 16u));
+      bld.pseudo(aco_opcode::p_extract, Definition(tmp), src, Operand::zero(), 
Operand::c32(src_bits),
+                 Operand::c32((unsigned)sign_extend));
    }
 
    if (dst_bits == 64) {

Reply via email to