llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Nicolai Hähnle (nhaehnle)

<details>
<summary>Changes</summary>

If the instruction with tied operands is a BUNDLE instruction and we
handle it by replacing an operand, then we need to update the
corresponding internal operands as well. Otherwise, the resulting MIR is
invalid.

The test case is degenerate in the sense that the bundle only contains a
single instruction, but it is sufficient to exercise this issue.

---

**Stack**:
- [5/5] #<!-- -->166213
- [4/5] #<!-- -->166212 ⬅
- [3/5] #<!-- -->166211
- [2/5] #<!-- -->166210
- [1/5] #<!-- -->166209


⚠️ *Part of a stack created by [spr](https://github.com/ejoffe/spr). Merging 
this PR using the GitHub UI may have unexpected results.*

---
Full diff: https://github.com/llvm/llvm-project/pull/166212.diff


2 Files Affected:

- (modified) llvm/lib/CodeGen/TwoAddressInstructionPass.cpp (+16) 
- (added) llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir (+57) 


``````````diff
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp 
b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 414e414738b71..1f816b94cf56b 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1665,6 +1665,22 @@ void 
TwoAddressInstructionImpl::processTiedPairs(MachineInstr *MI,
     // by SubRegB is compatible with RegA with no subregister. So regardless of
     // whether the dest oper writes a subreg, the source oper should not.
     MO.setSubReg(0);
+
+    // Update uses of RegB to uses of RegA inside the bundle.
+    if (MI->isBundle()) {
+      for (MachineInstr *InnerMI = MI; InnerMI->isBundledWithSucc();) {
+        InnerMI = InnerMI->getNextNode();
+
+        for (MachineOperand &MO : InnerMI->all_uses()) {
+          if (MO.isReg() && MO.getReg() == RegB) {
+            assert(
+                MO.getSubReg() == 0 &&
+                "tied subregister uses in bundled instructions not supported");
+            MO.setReg(RegA);
+          }
+        }
+      }
+    }
   }
 
   if (AllUsesCopied) {
diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir 
b/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir
new file mode 100644
index 0000000000000..696962a88c8b8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir
@@ -0,0 +1,57 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 %s --passes=two-address-instruction 
-verify-each -o - | FileCheck --check-prefixes=GCN %s
+
+# Exercise very basic handling of BUNDLE'd instructions by the 
two-address-instruction pass.
+
+# This test is an example where it is best to keep the two-address instruction
+# and resolve the tie with a COPY that is expected to be coalesced.
+---
+name:            test_fmac_bundle
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: test_fmac_bundle
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], 
[[COPY1]], 0, implicit $exec
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+    ; GCN-NEXT: BUNDLE implicit-def [[COPY2]], implicit [[DEF]], implicit 
[[DEF1]], implicit [[COPY2]](tied-def 0), implicit $mode, implicit $exec {
+    ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e32 killed [[DEF]], 
killed [[DEF1]], killed [[COPY2]], implicit $mode, implicit $exec
+    ; GCN-NEXT: }
+    %10:vgpr_32 = COPY $vgpr0
+    %11:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = V_ADD_U32_e64 %10, %11, 0, implicit $exec
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    BUNDLE implicit-def %3:vgpr_32, implicit %0, implicit %1, implicit killed 
%2(tied-def 0), implicit $mode, implicit $exec {
+      %3:vgpr_32 = V_FMAC_F32_e32 killed %0, killed %1, killed %2, implicit 
$mode, implicit $exec
+    }
+
+...
+
+# This test is an example where conversion to three-address form would be 
beneficial.
+---
+name:            test_fmac_reuse_bundle
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: test_fmac_reuse_bundle
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+    ; GCN-NEXT: BUNDLE implicit-def [[COPY1]], implicit [[DEF]], implicit 
[[DEF1]], implicit [[COPY1]](tied-def 0), implicit $mode, implicit $exec {
+    ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e32 killed [[DEF]], 
killed [[DEF1]], killed [[COPY1]], implicit $mode, implicit $exec
+    ; GCN-NEXT: }
+    ; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], 
[[COPY]], 0, implicit $exec
+    %2:vgpr_32 = COPY $vgpr0
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    BUNDLE implicit-def %3:vgpr_32, implicit %0, implicit %1, implicit 
%2(tied-def 0), implicit $mode, implicit $exec {
+      %3:vgpr_32 = V_FMAC_F32_e32 killed %0, killed %1, killed %2, implicit 
$mode, implicit $exec
+    }
+    %4:vgpr_32 = V_ADD_U32_e64 %3, %2, 0, implicit $exec
+
+...

``````````

</details>


https://github.com/llvm/llvm-project/pull/166212
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to