================
@@ -66,63 +69,97 @@ char NVPTXPeephole::ID = 0;
 
 INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, 
false)
 
-static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
+static bool isCVTALocalCombinationCandidate(MachineInstr &Root) {
   auto &MBB = *Root.getParent();
   auto &MF = *MBB.getParent();
-  // Check current instruction is cvta.to.local
-  if (Root.getOpcode() != NVPTX::cvta_to_local_64 &&
-      Root.getOpcode() != NVPTX::cvta_to_local)
+  // Check current instruction is cvta.local
+  if (Root.getOpcode() != NVPTX::cvta_local_64 &&
+      Root.getOpcode() != NVPTX::cvta_local)
     return false;
 
   auto &Op = Root.getOperand(1);
   const auto &MRI = MF.getRegInfo();
-  MachineInstr *GenericAddrDef = nullptr;
+  MachineInstr *LocalAddrDef = nullptr;
   if (Op.isReg() && Op.getReg().isVirtual()) {
-    GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
+    LocalAddrDef = MRI.getUniqueVRegDef(Op.getReg());
+  }
+
+  if (!LocalAddrDef || LocalAddrDef->getParent() != &MBB)
+    return false;
+
+  //  With -nvptx-short-ptr there's an extra cvta.u64.u32 instruction
+  // between the LEA_ADDRi and the cvta.local.
+  if (LocalAddrDef->getOpcode() == NVPTX::CVT_u64_u32) {
+    auto &Op = LocalAddrDef->getOperand(1);
+    if (Op.isReg() && Op.getReg().isVirtual())
+      LocalAddrDef = MRI.getUniqueVRegDef(Op.getReg());
   }
 
   // Check the register operand is uniquely defined by LEA_ADDRi instruction
-  if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
-      (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
-       GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
+  if (!LocalAddrDef || LocalAddrDef->getParent() != &MBB ||
+      (LocalAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
+       LocalAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
     return false;
   }
 
   const NVPTXRegisterInfo *NRI =
       MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
 
   // Check the LEA_ADDRi operand is Frame index
-  auto &BaseAddrOp = GenericAddrDef->getOperand(1);
-  if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NRI->getFrameRegister(MF)) {
+  auto &BaseAddrOp = LocalAddrDef->getOperand(1);
+  if (BaseAddrOp.isReg() &&
+      BaseAddrOp.getReg() == NRI->getFrameLocalRegister(MF)) {
     return true;
   }
 
   return false;
 }
 
-static void CombineCVTAToLocal(MachineInstr &Root) {
-  auto &MBB = *Root.getParent();
+static void CombineCVTALocal(MachineInstr &CVTALocalInstr) {
+  auto &MBB = *CVTALocalInstr.getParent();
   auto &MF = *MBB.getParent();
   const auto &MRI = MF.getRegInfo();
   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
-  auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+  auto *LeaInstr = MRI.getUniqueVRegDef(CVTALocalInstr.getOperand(1).getReg());
+  MachineInstr *CVTInstr = nullptr;
+  if (LeaInstr->getOpcode() == NVPTX::CVT_u64_u32) {
+    CVTInstr = LeaInstr;
+    LeaInstr = MRI.getUniqueVRegDef(LeaInstr->getOperand(1).getReg());
+    assert((LeaInstr->getOpcode() == NVPTX::LEA_ADDRi64 ||
+            LeaInstr->getOpcode() == NVPTX::LEA_ADDRi) &&
+           "Expected LEA_ADDRi64 or LEA_ADDRi");
----------------
Artem-B wrote:

Seeing a novel sequence of instructions should not be a fatal error here, IMO. 
We always have the safe fallback of doing nothing. Asserting on the current 
implementation detail seems to be an overkill. Can we just bail out, maybe with 
a debug printout.

https://github.com/llvm/llvm-project/pull/154814
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to