Module: Mesa
Branch: staging/20.0
Commit: 99e36946d9a530cd03757900e4a1375ece6a2ca2
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=99e36946d9a530cd03757900e4a1375ece6a2ca2

Author: Rhys Perry <[email protected]>
Date:   Fri Mar 20 16:07:08 2020 +0000

aco: implement 64-bit VGPR constant copies in handle_operands()

64-bit VGPR constant copies can happen because of 64-bit constant copy
propagation. Since this optimization is beneficial and more annoying to
deal with in the optimizer, I've implemented 64-bit VGPR constant copies
in handle_operands().

This also sets copy_operation::size correctly for 64-bit constant copies.

Cc: 20.0 <[email protected]>
Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Tested-by: Marge Bot 
<https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4260>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4260>
(cherry picked from commit 43918c9a7fc76b56a521d5eea6a8d2b3fb675a15)

---

 .pick_status.json                          |  2 +-
 src/amd/compiler/aco_ir.h                  | 30 ++++++++++++++++++++++++++++++
 src/amd/compiler/aco_lower_to_hw_instr.cpp | 13 +++++++++----
 3 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index fdcfddb2cbc..f790e240a9a 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -1192,7 +1192,7 @@
         "description": "aco: implement 64-bit VGPR constant copies in 
handle_operands()",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "master_sha": null,
         "because_sha": null
     },
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 1ccaf2a0158..c6d84b68900 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -466,6 +466,36 @@ public:
       return isConstant() && constantValue() == cmp;
    }
 
+   constexpr uint64_t constantValue64(bool signext=false) const noexcept
+   {
+      if (is64BitConst_) {
+         if (reg_.reg <= 192)
+            return reg_.reg - 128;
+         else if (reg_.reg <= 208)
+            return 0xFFFFFFFFFFFFFFFF - (reg_.reg - 193);
+
+         switch (reg_.reg) {
+         case 240:
+            return 0x3FE0000000000000;
+         case 241:
+            return 0xBFE0000000000000;
+         case 242:
+            return 0x3FF0000000000000;
+         case 243:
+            return 0xBFF0000000000000;
+         case 244:
+            return 0x4000000000000000;
+         case 245:
+            return 0xC000000000000000;
+         case 246:
+            return 0x4010000000000000;
+         case 247:
+            return 0xC010000000000000;
+         }
+      }
+      return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 
0ull) | data_.i;
+   }
+
    constexpr void setKill(bool flag) noexcept
    {
       isKill_ = flag;
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp 
b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index c555ccdfa85..6d95cc7d809 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -764,6 +764,11 @@ void handle_operands(std::map<PhysReg, copy_operation>& 
copy_map, lower_context*
             preserve_scc = true;
          } else if (it->second.size == 2 && it->second.def.getTemp().type() == 
RegType::sgpr) {
             bld.sop1(aco_opcode::s_mov_b64, it->second.def, 
Operand(it->second.op.physReg(), s2));
+         } else if (it->second.size == 2 && it->second.op.isConstant()) {
+            uint64_t val = it->second.op.constantValue64();
+            bld.vop1(aco_opcode::v_mov_b32, it->second.def, 
Operand((uint32_t)val));
+            bld.vop1(aco_opcode::v_mov_b32, 
Definition(PhysReg{it->second.def.physReg() + 1}, v1),
+                     Operand((uint32_t)(val >> 32)));
          } else {
             bld.copy(it->second.def, it->second.op);
          }
@@ -921,7 +926,7 @@ void lower_to_hw_instr(Program* program)
                   if (op.isConstant()) {
                      const PhysReg reg = 
PhysReg{instr->definitions[0].physReg() + reg_idx};
                      const Definition def = Definition(reg, rc_def);
-                     copy_operations[reg] = {op, def, 0, 1};
+                     copy_operations[reg] = {op, def, 0, op.size()};
                      reg_idx++;
                      continue;
                   }
@@ -948,7 +953,7 @@ void lower_to_hw_instr(Program* program)
                   for (unsigned j = 0; j < k; j++) {
                      Operand op = Operand(PhysReg{instr->operands[0].physReg() 
+ (i*k+j)}, rc_op);
                      Definition def = 
Definition(PhysReg{instr->definitions[i].physReg() + j}, rc_def);
-                     copy_operations[def.physReg()] = {op, def, 0, 1};
+                     copy_operations[def.physReg()] = {op, def, 0, op.size()};
                   }
                }
                handle_operands(copy_operations, &ctx, program->chip_class, pi);
@@ -963,7 +968,7 @@ void lower_to_hw_instr(Program* program)
                   Operand operand = instr->operands[i];
                   if (operand.isConstant() || operand.size() == 1) {
                      assert(instr->definitions[i].size() == operand.size());
-                     copy_operations[instr->definitions[i].physReg()] = 
{operand, instr->definitions[i], 0, 1};
+                     copy_operations[instr->definitions[i].physReg()] = 
{operand, instr->definitions[i], 0, operand.size()};
                   } else {
                      RegClass def_rc = 
RegClass(instr->definitions[i].regClass().type(), 1);
                      RegClass op_rc = RegClass(operand.getTemp().type(), 1);
@@ -1035,7 +1040,7 @@ void lower_to_hw_instr(Program* program)
                   Operand operand = instr->operands[0];
                   if (operand.isConstant() || operand.size() == 1) {
                      assert(instr->definitions[0].size() == 1);
-                     copy_operations[instr->definitions[0].physReg()] = 
{operand, instr->definitions[0], 0, 1};
+                     copy_operations[instr->definitions[0].physReg()] = 
{operand, instr->definitions[0], 0, operand.size()};
                   } else {
                      for (unsigned i = 0; i < operand.size(); i++)
                      {

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to