Module: Mesa
Branch: main
Commit: f14023666ca9f6387af668e403627ea2bf85c66a
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f14023666ca9f6387af668e403627ea2bf85c66a

Author: Timur Kristóf <[email protected]>
Date:   Wed Jun 23 18:27:19 2021 +0200

aco: Allow p_extract to have different definition and operand sizes.

Makes p_extract more flexible and prepares it to be usable
for other use cases.

No Fossil DB changes.

Signed-off-by: Timur Kristóf <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11560>

---

 src/amd/compiler/aco_lower_to_hw_instr.cpp | 12 ++++++++----
 src/amd/compiler/aco_validate.cpp          | 27 +++++++++++++++++----------
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp 
b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index a3f70da6321..df94f21db85 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -2129,7 +2129,8 @@ lower_to_hw_instr(Program* program)
                      bld.sop2(signext ? aco_opcode::s_bfe_i32 : 
aco_opcode::s_bfe_u32, dst,
                               bld.def(s1, scc), op, Operand::c32((bits << 16) 
| offset));
                   }
-               } else if (dst.regClass() == v1 || ctx.program->chip_class <= 
GFX7) {
+               } else if ((dst.regClass() == v1 && op.regClass() == v1) ||
+                          ctx.program->chip_class <= GFX7) {
                   assert(op.physReg().byte() == 0 && dst.physReg().byte() == 
0);
                   if (offset == (32 - bits) && op.regClass() != s1) {
                      bld.vop2(signext ? aco_opcode::v_ashrrev_i32 : 
aco_opcode::v_lshrrev_b32, dst,
@@ -2138,9 +2139,12 @@ lower_to_hw_instr(Program* program)
                      bld.vop3(signext ? aco_opcode::v_bfe_i32 : 
aco_opcode::v_bfe_u32, dst, op,
                               Operand::c32(offset), Operand::c32(bits));
                   }
-               } else if (dst.regClass() == v2b) {
-                  bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, 
op).instr->sdwa().sel[0] =
-                     SubdwordSel(1, offset / 8, signext);
+               } else {
+                  assert(dst.regClass() == v2b || dst.regClass() == v1b || 
op.regClass() == v2b ||
+                         op.regClass() == v1b);
+                  SDWA_instruction& sdwa =
+                     bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, 
op).instr->sdwa();
+                  sdwa.sel[0] = SubdwordSel(bits / 8, offset / 8, signext);
                }
                break;
             }
diff --git a/src/amd/compiler/aco_validate.cpp 
b/src/amd/compiler/aco_validate.cpp
index dc25abab9ef..0badb3ce17a 100644
--- a/src/amd/compiler/aco_validate.cpp
+++ b/src/amd/compiler/aco_validate.cpp
@@ -466,22 +466,29 @@ validate_ir(Program* program)
                         instr->operands[0].getTemp().type() == RegType::sgpr,
                      "Can't extract/insert VGPR to SGPR", instr.get());
 
-               if (instr->operands[0].getTemp().type() == RegType::vgpr)
+               if (instr->opcode == aco_opcode::p_insert)
                   check(instr->operands[0].bytes() == 
instr->definitions[0].bytes(),
-                        "Sizes of operand and definition must match", 
instr.get());
+                        "Sizes of p_insert data operand and definition must 
match", instr.get());
 
                if (instr->definitions[0].getTemp().type() == RegType::sgpr)
                   check(instr->definitions.size() >= 2 && 
instr->definitions[1].isFixed() &&
                            instr->definitions[1].physReg() == scc,
-                        "SGPR extract/insert needs a SCC definition", 
instr.get());
-
-               check(instr->operands[2].constantEquals(8) || 
instr->operands[2].constantEquals(16),
-                     "Size must be 8 or 16", instr.get());
-               check(instr->operands[2].constantValue() < 
instr->operands[0].getTemp().bytes() * 8u,
-                     "Size must be smaller than source", instr.get());
+                        "SGPR extract/insert needs an SCC definition", 
instr.get());
+
+               unsigned data_bits = instr->operands[0].getTemp().bytes() * 8u;
+               unsigned op_bits = instr->operands[2].constantValue();
+
+               if (instr->opcode == aco_opcode::p_insert) {
+                  check(op_bits == 8 || op_bits == 16, "Size must be 8 or 16", 
instr.get());
+                  check(op_bits < data_bits, "Size must be smaller than 
source", instr.get());
+               } else if (instr->opcode == aco_opcode::p_extract) {
+                  check(op_bits == 8 || op_bits == 16 || op_bits == 32,
+                        "Size must be 8 or 16 or 32", instr.get());
+                  check(data_bits >= op_bits, "Can't extract more bits than 
what the data has.",
+                        instr.get());
+               }
 
-               unsigned comp =
-                  instr->operands[0].bytes() * 8u / 
MAX2(instr->operands[2].constantValue(), 1);
+               unsigned comp = data_bits / MAX2(op_bits, 1);
                check(instr->operands[1].constantValue() < comp, "Index must be 
in-bounds",
                      instr.get());
             }

Reply via email to