Module: Mesa
Branch: main
Commit: ce5838599d73cbda68303ba0ffb29de29410dfa2
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ce5838599d73cbda68303ba0ffb29de29410dfa2

Author: Rhys Perry <[email protected]>
Date:   Wed Nov 16 17:08:09 2022 +0000

aco/gfx11: use v_cvt_i32_i16/v_cvt_u32_u16

fossil-db (gfx1100):
Totals from 52753 (39.07% of 135032) affected shaders:
CodeSize: 153603860 -> 153163384 (-0.29%)

Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19933>

---

 src/amd/compiler/aco_lower_to_hw_instr.cpp  | 5 +++++
 src/amd/compiler/tests/test_to_hw_instr.cpp | 4 +++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp 
b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index 0f4d1f445ff..95c8d77ffc3 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -1478,6 +1478,8 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition 
def, Operand lo, Opera
       /* move lo and zero high bits */
       if (lo.physReg().byte() == 2)
          bld.vop2(aco_opcode::v_lshrrev_b32, def_lo, Operand::c32(16u), lo);
+      else if (ctx->program->gfx_level >= GFX11)
+         bld.vop1(aco_opcode::v_cvt_u32_u16, def, lo);
       else
          bld.vop2(aco_opcode::v_and_b32, def_lo, Operand::c32(0xFFFFu), lo);
       bld.vop2(aco_opcode::v_or_b32, def, Operand::c32(hi.constantValue() << 
16u),
@@ -2237,6 +2239,9 @@ lower_to_hw_instr(Program* program)
                   if (offset == (32 - bits) && op.regClass() != s1) {
                      bld.vop2(signext ? aco_opcode::v_ashrrev_i32 : 
aco_opcode::v_lshrrev_b32, dst,
                               Operand::c32(offset), op);
+                  } else if (offset == 0 && bits == 16 && 
ctx.program->gfx_level >= GFX11) {
+                     bld.vop1(signext ? aco_opcode::v_cvt_i32_i16 : 
aco_opcode::v_cvt_u32_u16, dst,
+                              op);
                   } else {
                      bld.vop3(signext ? aco_opcode::v_bfe_i32 : 
aco_opcode::v_bfe_u32, dst, op,
                               Operand::c32(offset), Operand::c32(bits));
diff --git a/src/amd/compiler/tests/test_to_hw_instr.cpp 
b/src/amd/compiler/tests/test_to_hw_instr.cpp
index 4f6aa44623f..91d049e691e 100644
--- a/src/amd/compiler/tests/test_to_hw_instr.cpp
+++ b/src/amd/compiler/tests/test_to_hw_instr.cpp
@@ -626,7 +626,9 @@ BEGIN_TEST(to_hw_instr.extract)
       EXT(2, 8)
       //! v1: %_:v[0] = @v_shr 24, %_:v[1]
       EXT(3, 8)
-      //! v1: %_:v[0] = @v_bfe %_:v[1], 0, 16
+      //~gfx(7|8|9)_.*! v1: %_:v[0] = @v_bfe %_:v[1], 0, 16
+      //~gfx11_unsigned! v1: %_:v[0] = v_cvt_u32_u16 %_:v[1]
+      //~gfx11_signed! v1: %_:v[0] = v_cvt_i32_i16 %_:v[1]
       EXT(0, 16)
       //! v1: %_:v[0] = @v_shr 16, %_:v[1]
       EXT(1, 16)

Reply via email to