Module: Mesa
Branch: main
Commit: 254b178d5bb66e30b5566858e6450e8d0acb32f3
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=254b178d5bb66e30b5566858e6450e8d0acb32f3

Author: Rhys Perry <[email protected]>
Date:   Fri Nov 11 19:38:38 2022 +0000

aco: disallow SGPRS/constants with interpolation instructions

https://reviews.llvm.org/D137575

The VINTRP format cannot encode anything except VGPRs.

Reading VINTERPInstructions.td, looks like it's the same for GFX11.

No fossil-db changes.

Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Georg Lehmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20251>

---

 src/amd/compiler/aco_optimizer.cpp        | 31 ++++++++++++++++++++++++++++---
 src/amd/compiler/tests/test_assembler.cpp | 26 +++++++++++++-------------
 2 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/src/amd/compiler/aco_optimizer.cpp 
b/src/amd/compiler/aco_optimizer.cpp
index c4bcbdd2a60..54d6a333686 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -597,6 +597,7 @@ pseudo_propagate_temp(opt_ctx& ctx, aco_ptr<Instruction>& 
instr, Temp temp, unsi
 bool
 can_apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
 {
+   assert(instr->isVALU());
    if (instr->isSDWA() && ctx.program->gfx_level < GFX9)
       return false;
    return instr->opcode != aco_opcode::v_readfirstlane_b32 &&
@@ -605,7 +606,20 @@ can_apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
           instr->opcode != aco_opcode::v_writelane_b32 &&
           instr->opcode != aco_opcode::v_writelane_b32_e64 &&
           instr->opcode != aco_opcode::v_permlane16_b32 &&
-          instr->opcode != aco_opcode::v_permlanex16_b32;
+          instr->opcode != aco_opcode::v_permlanex16_b32 &&
+          instr->opcode != aco_opcode::v_interp_p1_f32 &&
+          instr->opcode != aco_opcode::v_interp_p2_f32 &&
+          instr->opcode != aco_opcode::v_interp_mov_f32 &&
+          instr->opcode != aco_opcode::v_interp_p1ll_f16 &&
+          instr->opcode != aco_opcode::v_interp_p1lv_f16 &&
+          instr->opcode != aco_opcode::v_interp_p2_legacy_f16 &&
+          instr->opcode != aco_opcode::v_interp_p2_f16 &&
+          instr->opcode != aco_opcode::v_interp_p10_f32_inreg &&
+          instr->opcode != aco_opcode::v_interp_p2_f32_inreg &&
+          instr->opcode != aco_opcode::v_interp_p10_f16_f32_inreg &&
+          instr->opcode != aco_opcode::v_interp_p2_f16_f32_inreg &&
+          instr->opcode != aco_opcode::v_interp_p10_rtz_f16_f32_inreg &&
+          instr->opcode != aco_opcode::v_interp_p2_rtz_f16_f32_inreg;
 }
 
 void
@@ -658,7 +672,6 @@ bool
 alu_can_accept_constant(aco_opcode opcode, unsigned operand)
 {
    switch (opcode) {
-   case aco_opcode::v_interp_p2_f32:
    case aco_opcode::v_mac_f32:
    case aco_opcode::v_writelane_b32:
    case aco_opcode::v_writelane_b32_e64:
@@ -677,7 +690,19 @@ alu_can_accept_constant(aco_opcode opcode, unsigned 
operand)
    case aco_opcode::p_bpermute_gfx10w64:
    case aco_opcode::p_bpermute_gfx11w64:
    case aco_opcode::p_interp_gfx11:
-   case aco_opcode::p_dual_src_export_gfx11: return false;
+   case aco_opcode::p_dual_src_export_gfx11:
+   case aco_opcode::v_interp_p1_f32:
+   case aco_opcode::v_interp_p2_f32:
+   case aco_opcode::v_interp_mov_f32:
+   case aco_opcode::v_interp_p1ll_f16:
+   case aco_opcode::v_interp_p1lv_f16:
+   case aco_opcode::v_interp_p2_legacy_f16:
+   case aco_opcode::v_interp_p10_f32_inreg:
+   case aco_opcode::v_interp_p2_f32_inreg:
+   case aco_opcode::v_interp_p10_f16_f32_inreg:
+   case aco_opcode::v_interp_p2_f16_f32_inreg:
+   case aco_opcode::v_interp_p10_rtz_f16_f32_inreg:
+   case aco_opcode::v_interp_p2_rtz_f16_f32_inreg: return false;
    default: return true;
    }
 }
diff --git a/src/amd/compiler/tests/test_assembler.cpp 
b/src/amd/compiler/tests/test_assembler.cpp
index e46ee13954e..533f69c6ba7 100644
--- a/src/amd/compiler/tests/test_assembler.cpp
+++ b/src/amd/compiler/tests/test_assembler.cpp
@@ -735,40 +735,40 @@ BEGIN_TEST(assembler.gfx11.vinterp)
    Operand op1(bld.tmp(v1));
    op1.setFixed(PhysReg(256 + 20));
 
-   Operand op2(bld.tmp(s1));
-   op2.setFixed(PhysReg(30));
+   Operand op2(bld.tmp(v1));
+   op2.setFixed(PhysReg(256 + 30));
 
-   //>> v_interp_p10_f32 v42, v10, v20, s30 wait_exp:7              ; cd00072a 
007a290a
+   //>> v_interp_p10_f32 v42, v10, v20, v30 wait_exp:7              ; cd00072a 
047a290a
    bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2);
 
-   //! v_interp_p10_f32 v42, v10, v20, s30 wait_exp:6              ; cd00062a 
007a290a
+   //! v_interp_p10_f32 v42, v10, v20, v30 wait_exp:6              ; cd00062a 
047a290a
    bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 
6);
 
-   //! v_interp_p2_f32 v42, v10, v20, s30                          ; cd01002a 
007a290a
+   //! v_interp_p2_f32 v42, v10, v20, v30                          ; cd01002a 
047a290a
    bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, dst, op0, op1, op2, 0);
 
-   //! v_interp_p10_f32 v42, -v10, v20, s30                        ; cd00002a 
207a290a
+   //! v_interp_p10_f32 v42, -v10, v20, v30                        ; cd00002a 
247a290a
    bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 
0)->vinterp_inreg().neg[0] = true;
 
-   //! v_interp_p10_f32 v42, v10, -v20, s30                        ; cd00002a 
407a290a
+   //! v_interp_p10_f32 v42, v10, -v20, v30                        ; cd00002a 
447a290a
    bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 
0)->vinterp_inreg().neg[1] = true;
 
-   //! v_interp_p10_f32 v42, v10, v20, -s30                        ; cd00002a 
807a290a
+   //! v_interp_p10_f32 v42, v10, v20, -v30                        ; cd00002a 
847a290a
    bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 
0)->vinterp_inreg().neg[2] = true;
 
-   //! v_interp_p10_f16_f32 v42, v10, v20, s30 op_sel:[1,0,0,0]    ; cd02082a 
007a290a
+   //! v_interp_p10_f16_f32 v42, v10, v20, v30 op_sel:[1,0,0,0]    ; cd02082a 
047a290a
    bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, dst, op0, op1, 
op2, 0, 0x1);
 
-   //! v_interp_p2_f16_f32 v42, v10, v20, s30 op_sel:[0,1,0,0]     ; cd03102a 
007a290a
+   //! v_interp_p2_f16_f32 v42, v10, v20, v30 op_sel:[0,1,0,0]     ; cd03102a 
047a290a
    bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, dst, op0, op1, 
op2, 0, 0x2);
 
-   //! v_interp_p10_rtz_f16_f32 v42, v10, v20, s30 op_sel:[0,0,1,0] ; cd04202a 
007a290a
+   //! v_interp_p10_rtz_f16_f32 v42, v10, v20, v30 op_sel:[0,0,1,0] ; cd04202a 
047a290a
    bld.vinterp_inreg(aco_opcode::v_interp_p10_rtz_f16_f32_inreg, dst, op0, 
op1, op2, 0, 0x4);
 
-   //! v_interp_p2_rtz_f16_f32 v42, v10, v20, s30 op_sel:[0,0,0,1] ; cd05402a 
007a290a
+   //! v_interp_p2_rtz_f16_f32 v42, v10, v20, v30 op_sel:[0,0,0,1] ; cd05402a 
047a290a
    bld.vinterp_inreg(aco_opcode::v_interp_p2_rtz_f16_f32_inreg, dst, op0, op1, 
op2, 0, 0x8);
 
-   //! v_interp_p10_f32 v42, v10, v20, s30 clamp                   ; cd00802a 
007a290a
+   //! v_interp_p10_f32 v42, v10, v20, v30 clamp                   ; cd00802a 
047a290a
    bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 
0)->vinterp_inreg().clamp = true;
 
    finish_assembler_test();

Reply via email to