https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/125432

>From 7ba3e69a759f59bf746cb14640ea8ea426fa09fd Mon Sep 17 00:00:00 2001
From: jofrn <jofer...@amd.com>
Date: Fri, 31 Jan 2025 13:12:56 -0500
Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic
 vector.

After splitting, all elements are created. The two components must
be found by looking at the upper and lower half of the value.
This change extends EltsFromConsecutiveLoads
to understand AtomicSDNode so that unused elements can be removed.

commit-id:b83937a8
---
 llvm/include/llvm/CodeGen/SelectionDAG.h      |  2 +-
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  2 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 61 +++++++++++++++----
 3 files changed, 51 insertions(+), 14 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h 
b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 87b6914f8a0ee..40550d96a5b3d 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1873,7 +1873,7 @@ class SelectionDAG {
   /// chain to the token factor. This ensures that the new memory node will 
have
   /// the same relative memory dependency position as the old load. Returns the
   /// new merged load chain.
-  SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp);
+  SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp);
 
   /// Topological-sort the AllNodes list and a
   /// assign a unique node id for each node in the DAG based on their
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index e6a7d092b7b79..1c1445f9f44b7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -12236,7 +12236,7 @@ SDValue 
SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain,
   return TokenFactor;
 }
 
-SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
+SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad,
                                                    SDValue NewMemOp) {
   assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
   SDValue OldChain = SDValue(OldLoad, 1);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1fc50a36fda72..5ce8d83feb0dd 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7193,15 +7193,19 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, 
MVT VT, const SDLoc &dl,
 }
 
 // Recurse to find a LoadSDNode source and the accumulated ByteOffest.
-static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) {
-  if (ISD::isNON_EXTLoad(Elt.getNode())) {
-    auto *BaseLd = cast<LoadSDNode>(Elt);
-    if (!BaseLd->isSimple())
-      return false;
+static bool findEltLoadSrc(SDValue Elt, MemSDNode *&Ld, int64_t &ByteOffset) {
+  if (auto *BaseLd = dyn_cast<AtomicSDNode>(Elt)) {
     Ld = BaseLd;
     ByteOffset = 0;
     return true;
-  }
+  } else if (auto *BaseLd = dyn_cast<LoadSDNode>(Elt))
+    if (ISD::isNON_EXTLoad(Elt.getNode())) {
+      if (!BaseLd->isSimple())
+        return false;
+      Ld = BaseLd;
+      ByteOffset = 0;
+      return true;
+    }
 
   switch (Elt.getOpcode()) {
   case ISD::BITCAST:
@@ -7254,7 +7258,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, 
ArrayRef<SDValue> Elts,
   APInt ZeroMask = APInt::getZero(NumElems);
   APInt UndefMask = APInt::getZero(NumElems);
 
-  SmallVector<LoadSDNode*, 8> Loads(NumElems, nullptr);
+  SmallVector<MemSDNode *, 8> Loads(NumElems, nullptr);
   SmallVector<int64_t, 8> ByteOffsets(NumElems, 0);
 
   // For each element in the initializer, see if we've found a load, zero or an
@@ -7304,7 +7308,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, 
ArrayRef<SDValue> Elts,
   EVT EltBaseVT = EltBase.getValueType();
   assert(EltBaseVT.getSizeInBits() == EltBaseVT.getStoreSizeInBits() &&
          "Register/Memory size mismatch");
-  LoadSDNode *LDBase = Loads[FirstLoadedElt];
+  MemSDNode *LDBase = Loads[FirstLoadedElt];
   assert(LDBase && "Did not find base load for merging consecutive loads");
   unsigned BaseSizeInBits = EltBaseVT.getStoreSizeInBits();
   unsigned BaseSizeInBytes = BaseSizeInBits / 8;
@@ -7318,15 +7322,18 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, 
ArrayRef<SDValue> Elts,
 
   // Check to see if the element's load is consecutive to the base load
   // or offset from a previous (already checked) load.
-  auto CheckConsecutiveLoad = [&](LoadSDNode *Base, int EltIdx) {
-    LoadSDNode *Ld = Loads[EltIdx];
+  auto CheckConsecutiveLoad = [&](MemSDNode *Base, int EltIdx) {
+    MemSDNode *Ld = Loads[EltIdx];
     int64_t ByteOffset = ByteOffsets[EltIdx];
     if (ByteOffset && (ByteOffset % BaseSizeInBytes) == 0) {
       int64_t BaseIdx = EltIdx - (ByteOffset / BaseSizeInBytes);
       return (0 <= BaseIdx && BaseIdx < (int)NumElems && LoadMask[BaseIdx] &&
               Loads[BaseIdx] == Ld && ByteOffsets[BaseIdx] == 0);
     }
-    return DAG.areNonVolatileConsecutiveLoads(Ld, Base, BaseSizeInBytes,
+    auto *L = dyn_cast<LoadSDNode>(Ld);
+    auto *B = dyn_cast<LoadSDNode>(Base);
+    return L && B &&
+           DAG.areNonVolatileConsecutiveLoads(L, B, BaseSizeInBytes,
                                               EltIdx - FirstLoadedElt);
   };
 
@@ -7347,7 +7354,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, 
ArrayRef<SDValue> Elts,
     }
   }
 
-  auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) {
+  auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, MemSDNode *LDBase) {
     auto MMOFlags = LDBase->getMemOperand()->getFlags();
     assert(LDBase->isSimple() &&
            "Cannot merge volatile or atomic loads.");
@@ -60539,6 +60546,35 @@ static SDValue combineINTRINSIC_VOID(SDNode *N, 
SelectionDAG &DAG,
   return SDValue();
 }
 
+static SDValue combineVZEXT_LOAD(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  // Find the TokenFactor to locate the associated AtomicLoad.
+  SDNode *ALD = nullptr;
+  for (auto &TF : N->uses())
+    if (TF.getUser()->getOpcode() == ISD::TokenFactor) {
+      SDValue L = TF.getUser()->getOperand(0);
+      SDValue R = TF.getUser()->getOperand(1);
+      if (L.getNode() == N)
+        ALD = R.getNode();
+      else if (R.getNode() == N)
+        ALD = L.getNode();
+    }
+
+  if (!ALD)
+    return SDValue();
+  if (!isa<AtomicSDNode>(ALD))
+    return SDValue();
+
+  // Replace the VZEXT_LOAD with the AtomicLoad.
+  SDLoc dl(N);
+  SDValue SV =
+      DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+                  N->getValueType(0).changeTypeToInteger(), SDValue(ALD, 0));
+  SDValue BC = DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), SV);
+  BC = DCI.CombineTo(N, BC, SDValue(ALD, 1));
+  return BC;
+}
+
 SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -60735,6 +60771,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::INTRINSIC_VOID:  return combineINTRINSIC_VOID(N, DAG, DCI);
   case ISD::FP_TO_SINT_SAT:
   case ISD::FP_TO_UINT_SAT: return combineFP_TO_xINT_SAT(N, DAG, Subtarget);
+  case X86ISD::VZEXT_LOAD: return combineVZEXT_LOAD(N, DAG, DCI);
     // clang-format on
   }
 

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to