llvmorg-github-actions[bot] wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-powerpc

Author: llvmbot

<details>
<summary>Changes</summary>

Backport 1907b586384b51be2f6b44490c46941f08ff6974

Requested by: @<!-- -->amy-kwan

---
Full diff: https://github.com/llvm/llvm-project/pull/198177.diff


2 Files Affected:

- (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+28-8) 
- (added) llvm/test/CodeGen/PowerPC/ppc-i128-cmp.ll (+282) 


``````````diff
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index bdba040529d00..56aa33fdd4098 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -15586,17 +15586,27 @@ SDValue 
PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
 }
 
 // The function check a i128 load can convert to 16i8 load for Vcmpequb.
-static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS) {
+static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS, bool IsPPC64) {
 
-  auto isValidForConvert = [](SDValue &Operand) {
+  auto isValidForConvert = [IsPPC64](SDValue &Operand) {
     if (!Operand.hasOneUse())
       return false;
 
     if (Operand.getValueType() != MVT::i128)
       return false;
 
-    if (Operand.getOpcode() == ISD::Constant)
+    if (Operand.getOpcode() == ISD::Constant) {
+      auto *C = cast<ConstantSDNode>(Operand);
+      const APInt &Val = C->getAPIntValue();
+      // On PPC64, comparing an i128 value loaded from memory against a
+      // constant smaller than 2^16 is usually better left to scalar lowering.
+      // In that case, the compare can be lowered using xori (since xori has a
+      // 16-bit immediate field), which is cheaper than materializing a vector
+      // constant and using vcmpequb.
+      if (IsPPC64 && Val.ult(1ULL << 16))
+        return false;
       return true;
+    }
 
     auto *LoadNode = dyn_cast<LoadSDNode>(Operand);
     if (!LoadNode)
@@ -15647,10 +15657,19 @@ SDValue convertTwoLoadsAndCmpToVCMPEQUB(SelectionDAG 
&DAG, SDNode *N,
     assert(Operand.getOpcode() == ISD::LOAD && "Must be LoadSDNode here.");
 
     auto *LoadNode = cast<LoadSDNode>(Operand);
-    SDValue NewLoad =
-        DAG.getLoad(MVT::v16i8, DL, LoadNode->getChain(),
-                    LoadNode->getBasePtr(), LoadNode->getMemOperand());
-    DAG.ReplaceAllUsesOfValueWith(Operand.getValue(1), NewLoad.getValue(1));
+    // Create a new MachineMemOperand without range metadata.
+    // Range metadata is only valid for integer scalar types, not vectors.
+    // The original i128 load may have range metadata, but when we convert
+    // to v16i8, that metadata is no longer semantically valid.
+    MachineMemOperand *MMO = LoadNode->getMemOperand();
+    MachineFunction &MF = DAG.getMachineFunction();
+    MachineMemOperand *NewMMO = MF.getMachineMemOperand(
+        MMO->getPointerInfo(), MMO->getFlags(), MMO->getSize(), 
MMO->getAlign(),
+        MMO->getAAInfo(), nullptr, MMO->getSyncScopeID(),
+        MMO->getSuccessOrdering(), MMO->getFailureOrdering());
+    SDValue NewLoad = DAG.getLoad(MVT::v16i8, DL, LoadNode->getChain(),
+                                  LoadNode->getBasePtr(), NewMMO);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(LoadNode, 1), NewLoad.getValue(1));
     return NewLoad;
   };
 
@@ -15815,7 +15834,8 @@ SDValue PPCTargetLowering::combineSetCC(SDNode *N,
     //   This transformation replaces memcmp(a, b, 16) with two vector loads
     //   and one vector compare instruction.
 
-    if (Subtarget.hasAltivec() && canConvertToVcmpequb(LHS, RHS))
+    if (Subtarget.hasAltivec() &&
+        canConvertToVcmpequb(LHS, RHS, Subtarget.isPPC64()))
       return convertTwoLoadsAndCmpToVCMPEQUB(DCI.DAG, N, SDLoc(N));
   }
 
diff --git a/llvm/test/CodeGen/PowerPC/ppc-i128-cmp.ll 
b/llvm/test/CodeGen/PowerPC/ppc-i128-cmp.ll
new file mode 100644
index 0000000000000..c661d7da690b4
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/ppc-i128-cmp.ll
@@ -0,0 +1,282 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
+; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix < %s 
| \
+; RUN:   FileCheck %s --check-prefixes=COMMON,CHECK-AIX64
+; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names 
-mtriple=powerpc64le-unknown-linux-gnu < %s | \
+; RUN:   FileCheck %s --check-prefixes=COMMON,CHECK-LINUX
+; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix < %s | \
+; RUN:   FileCheck %s --check-prefixes=COMMON,CHECK-AIX32
+
+define i1 @test1() {
+; CHECK-AIX64-LABEL: test1:
+; CHECK-AIX64:       # %bb.0: # %bb
+; CHECK-AIX64-NEXT:    ld r3, 0(0)
+; CHECK-AIX64-NEXT:    ld r4, 8(0)
+; CHECK-AIX64-NEXT:    or r3, r4, r3
+; CHECK-AIX64-NEXT:    cntlzd r3, r3
+; CHECK-AIX64-NEXT:    rldicl r3, r3, 58, 63
+; CHECK-AIX64-NEXT:    blr
+;
+; CHECK-LINUX-LABEL: test1:
+; CHECK-LINUX:       # %bb.0: # %bb
+; CHECK-LINUX-NEXT:    ld r3, 8(0)
+; CHECK-LINUX-NEXT:    ld r4, 0(0)
+; CHECK-LINUX-NEXT:    or r3, r4, r3
+; CHECK-LINUX-NEXT:    cntlzd r3, r3
+; CHECK-LINUX-NEXT:    rldicl r3, r3, 58, 63
+; CHECK-LINUX-NEXT:    blr
+;
+; CHECK-AIX32-LABEL: test1:
+; CHECK-AIX32:       # %bb.0: # %bb
+; CHECK-AIX32-NEXT:    li r3, 0
+; CHECK-AIX32-NEXT:    xxlxor vs35, vs35, vs35
+; CHECK-AIX32-NEXT:    lxvw4x vs34, 0, r3
+; CHECK-AIX32-NEXT:    vcmpequb. v2, v2, v3
+; CHECK-AIX32-NEXT:    mfocrf r3, 2
+; CHECK-AIX32-NEXT:    rlwinm r3, r3, 25, 31, 31
+; CHECK-AIX32-NEXT:    blr
+bb:
+  %load = load i128, ptr null, align 16
+  %icmp = icmp eq i128 %load, 0
+  ret i1 %icmp
+}
+
+define i1 @test2() {
+; CHECK-AIX64-LABEL: test2:
+; CHECK-AIX64:       # %bb.0: # %bb
+; CHECK-AIX64-NEXT:    ld r4, 8(0)
+; CHECK-AIX64-NEXT:    ld r3, 0(0)
+; CHECK-AIX64-NEXT:    xori r4, r4, 10
+; CHECK-AIX64-NEXT:    or r3, r4, r3
+; CHECK-AIX64-NEXT:    cntlzd r3, r3
+; CHECK-AIX64-NEXT:    rldicl r3, r3, 58, 63
+; CHECK-AIX64-NEXT:    blr
+;
+; CHECK-LINUX-LABEL: test2:
+; CHECK-LINUX:       # %bb.0: # %bb
+; CHECK-LINUX-NEXT:    ld r4, 0(0)
+; CHECK-LINUX-NEXT:    ld r3, 8(0)
+; CHECK-LINUX-NEXT:    xori r4, r4, 10
+; CHECK-LINUX-NEXT:    or r3, r4, r3
+; CHECK-LINUX-NEXT:    cntlzd r3, r3
+; CHECK-LINUX-NEXT:    rldicl r3, r3, 58, 63
+; CHECK-LINUX-NEXT:    blr
+;
+; CHECK-AIX32-LABEL: test2:
+; CHECK-AIX32:       # %bb.0: # %bb
+; CHECK-AIX32-NEXT:    li r3, 0
+; CHECK-AIX32-NEXT:    lxvw4x vs34, 0, r3
+; CHECK-AIX32-NEXT:    lwz r3, L..C0(r2) # %const.0
+; CHECK-AIX32-NEXT:    lxvw4x vs35, 0, r3
+; CHECK-AIX32-NEXT:    vcmpequb. v2, v2, v3
+; CHECK-AIX32-NEXT:    mfocrf r3, 2
+; CHECK-AIX32-NEXT:    rlwinm r3, r3, 25, 31, 31
+; CHECK-AIX32-NEXT:    blr
+bb:
+  %load = load i128, ptr null, align 16
+  %icmp = icmp eq i128 %load, 10
+  ret i1 %icmp
+}
+
+define i1 @test3() {
+; CHECK-AIX64-LABEL: test3:
+; CHECK-AIX64:       # %bb.0: # %bb
+; CHECK-AIX64-NEXT:    ld r4, 8(0)
+; CHECK-AIX64-NEXT:    ld r3, 0(0)
+; CHECK-AIX64-NEXT:    xori r4, r4, 65535
+; CHECK-AIX64-NEXT:    or r3, r4, r3
+; CHECK-AIX64-NEXT:    cntlzd r3, r3
+; CHECK-AIX64-NEXT:    rldicl r3, r3, 58, 63
+; CHECK-AIX64-NEXT:    blr
+;
+; CHECK-LINUX-LABEL: test3:
+; CHECK-LINUX:       # %bb.0: # %bb
+; CHECK-LINUX-NEXT:    ld r4, 0(0)
+; CHECK-LINUX-NEXT:    ld r3, 8(0)
+; CHECK-LINUX-NEXT:    xori r4, r4, 65535
+; CHECK-LINUX-NEXT:    or r3, r4, r3
+; CHECK-LINUX-NEXT:    cntlzd r3, r3
+; CHECK-LINUX-NEXT:    rldicl r3, r3, 58, 63
+; CHECK-LINUX-NEXT:    blr
+;
+; CHECK-AIX32-LABEL: test3:
+; CHECK-AIX32:       # %bb.0: # %bb
+; CHECK-AIX32-NEXT:    li r3, 0
+; CHECK-AIX32-NEXT:    lxvw4x vs34, 0, r3
+; CHECK-AIX32-NEXT:    lwz r3, L..C1(r2) # %const.0
+; CHECK-AIX32-NEXT:    lxvw4x vs35, 0, r3
+; CHECK-AIX32-NEXT:    vcmpequb. v2, v2, v3
+; CHECK-AIX32-NEXT:    mfocrf r3, 2
+; CHECK-AIX32-NEXT:    rlwinm r3, r3, 25, 31, 31
+; CHECK-AIX32-NEXT:    blr
+bb:
+  %load = load i128, ptr null, align 16
+  %icmp = icmp eq i128 %load, 65535
+  ret i1 %icmp
+}
+
+define i1 @test4() {
+; CHECK-AIX64-LABEL: test4:
+; CHECK-AIX64:       # %bb.0: # %bb
+; CHECK-AIX64-NEXT:    li r3, 0
+; CHECK-AIX64-NEXT:    lxvw4x vs34, 0, r3
+; CHECK-AIX64-NEXT:    ld r3, L..C0(r2) # %const.0
+; CHECK-AIX64-NEXT:    lxvd2x vs35, 0, r3
+; CHECK-AIX64-NEXT:    vcmpequb. v2, v2, v3
+; CHECK-AIX64-NEXT:    mfocrf r3, 2
+; CHECK-AIX64-NEXT:    rlwinm r3, r3, 25, 31, 31
+; CHECK-AIX64-NEXT:    blr
+;
+; CHECK-LINUX-LABEL: test4:
+; CHECK-LINUX:       # %bb.0: # %bb
+; CHECK-LINUX-NEXT:    li r3, 0
+; CHECK-LINUX-NEXT:    lxvd2x vs34, 0, r3
+; CHECK-LINUX-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
+; CHECK-LINUX-NEXT:    addi r3, r3, .LCPI3_0@toc@l
+; CHECK-LINUX-NEXT:    lxvd2x vs35, 0, r3
+; CHECK-LINUX-NEXT:    vcmpequb. v2, v2, v3
+; CHECK-LINUX-NEXT:    mfocrf r3, 2
+; CHECK-LINUX-NEXT:    rlwinm r3, r3, 25, 31, 31
+; CHECK-LINUX-NEXT:    blr
+;
+; CHECK-AIX32-LABEL: test4:
+; CHECK-AIX32:       # %bb.0: # %bb
+; CHECK-AIX32-NEXT:    li r3, 0
+; CHECK-AIX32-NEXT:    lxvw4x vs34, 0, r3
+; CHECK-AIX32-NEXT:    lwz r3, L..C2(r2) # %const.0
+; CHECK-AIX32-NEXT:    lxvw4x vs35, 0, r3
+; CHECK-AIX32-NEXT:    vcmpequb. v2, v2, v3
+; CHECK-AIX32-NEXT:    mfocrf r3, 2
+; CHECK-AIX32-NEXT:    rlwinm r3, r3, 25, 31, 31
+; CHECK-AIX32-NEXT:    blr
+bb:
+  %load = load i128, ptr null, align 16
+  %icmp = icmp eq i128 %load, 65536
+  ret i1 %icmp
+}
+
+; Test using the !range metadata
+define i1 @test5() {
+; CHECK-AIX64-LABEL: test5:
+; CHECK-AIX64:       # %bb.0: # %bb
+; CHECK-AIX64-NEXT:    ld r3, 0(0)
+; CHECK-AIX64-NEXT:    ld r4, 8(0)
+; CHECK-AIX64-NEXT:    or r3, r4, r3
+; CHECK-AIX64-NEXT:    cntlzd r3, r3
+; CHECK-AIX64-NEXT:    rldicl r3, r3, 58, 63
+; CHECK-AIX64-NEXT:    blr
+;
+; CHECK-LINUX-LABEL: test5:
+; CHECK-LINUX:       # %bb.0: # %bb
+; CHECK-LINUX-NEXT:    ld r3, 8(0)
+; CHECK-LINUX-NEXT:    ld r4, 0(0)
+; CHECK-LINUX-NEXT:    or r3, r4, r3
+; CHECK-LINUX-NEXT:    cntlzd r3, r3
+; CHECK-LINUX-NEXT:    rldicl r3, r3, 58, 63
+; CHECK-LINUX-NEXT:    blr
+;
+; CHECK-AIX32-LABEL: test5:
+; CHECK-AIX32:       # %bb.0: # %bb
+; CHECK-AIX32-NEXT:    li r3, 0
+; CHECK-AIX32-NEXT:    xxlxor vs35, vs35, vs35
+; CHECK-AIX32-NEXT:    lxvw4x vs34, 0, r3
+; CHECK-AIX32-NEXT:    vcmpequb. v2, v2, v3
+; CHECK-AIX32-NEXT:    mfocrf r3, 2
+; CHECK-AIX32-NEXT:    rlwinm r3, r3, 25, 31, 31
+; CHECK-AIX32-NEXT:    blr
+bb:
+  %load = load i128, ptr null, align 16, !range !0
+  %icmp = icmp eq i128 %load, 0
+  ret i1 %icmp
+}
+
+define i1 @test6() {
+; CHECK-AIX64-LABEL: test6:
+; CHECK-AIX64:       # %bb.0: # %bb
+; CHECK-AIX64-NEXT:    ld r4, 8(0)
+; CHECK-AIX64-NEXT:    ld r3, 0(0)
+; CHECK-AIX64-NEXT:    xori r4, r4, 65535
+; CHECK-AIX64-NEXT:    or r3, r4, r3
+; CHECK-AIX64-NEXT:    cntlzd r3, r3
+; CHECK-AIX64-NEXT:    rldicl r3, r3, 58, 63
+; CHECK-AIX64-NEXT:    blr
+;
+; CHECK-LINUX-LABEL: test6:
+; CHECK-LINUX:       # %bb.0: # %bb
+; CHECK-LINUX-NEXT:    ld r4, 0(0)
+; CHECK-LINUX-NEXT:    ld r3, 8(0)
+; CHECK-LINUX-NEXT:    xori r4, r4, 65535
+; CHECK-LINUX-NEXT:    or r3, r4, r3
+; CHECK-LINUX-NEXT:    cntlzd r3, r3
+; CHECK-LINUX-NEXT:    rldicl r3, r3, 58, 63
+; CHECK-LINUX-NEXT:    blr
+;
+; CHECK-AIX32-LABEL: test6:
+; CHECK-AIX32:       # %bb.0: # %bb
+; CHECK-AIX32-NEXT:    li r3, 0
+; CHECK-AIX32-NEXT:    lxvw4x vs34, 0, r3
+; CHECK-AIX32-NEXT:    lwz r3, L..C3(r2) # %const.0
+; CHECK-AIX32-NEXT:    lxvw4x vs35, 0, r3
+; CHECK-AIX32-NEXT:    vcmpequb. v2, v2, v3
+; CHECK-AIX32-NEXT:    mfocrf r3, 2
+; CHECK-AIX32-NEXT:    rlwinm r3, r3, 25, 31, 31
+; CHECK-AIX32-NEXT:    blr
+bb:
+  %load = load i128, ptr null, align 16, !range !1
+  %icmp = icmp eq i128 %load, 65535
+  ret i1 %icmp
+}
+
+define i1 @test7() {
+; COMMON-LABEL: test7:
+; COMMON:       # %bb.0: # %bb
+; COMMON-NEXT:    li r3, 0
+; COMMON-NEXT:    blr
+bb:
+  %load = load i128, ptr null, align 16, !range !1
+  %icmp = icmp eq i128 %load, 65536
+  ret i1 %icmp
+}
+
+define i1 @test8() {
+; CHECK-AIX64-LABEL: test8:
+; CHECK-AIX64:       # %bb.0: # %bb
+; CHECK-AIX64-NEXT:    li r3, 0
+; CHECK-AIX64-NEXT:    lxvw4x vs34, 0, r3
+; CHECK-AIX64-NEXT:    ld r3, L..C1(r2) # %const.0
+; CHECK-AIX64-NEXT:    lxvd2x vs35, 0, r3
+; CHECK-AIX64-NEXT:    vcmpequb. v2, v2, v3
+; CHECK-AIX64-NEXT:    mfocrf r3, 2
+; CHECK-AIX64-NEXT:    rlwinm r3, r3, 25, 31, 31
+; CHECK-AIX64-NEXT:    blr
+;
+; CHECK-LINUX-LABEL: test8:
+; CHECK-LINUX:       # %bb.0: # %bb
+; CHECK-LINUX-NEXT:    li r3, 0
+; CHECK-LINUX-NEXT:    lxvd2x vs34, 0, r3
+; CHECK-LINUX-NEXT:    addis r3, r2, .LCPI7_0@toc@ha
+; CHECK-LINUX-NEXT:    addi r3, r3, .LCPI7_0@toc@l
+; CHECK-LINUX-NEXT:    lxvd2x vs35, 0, r3
+; CHECK-LINUX-NEXT:    vcmpequb. v2, v2, v3
+; CHECK-LINUX-NEXT:    mfocrf r3, 2
+; CHECK-LINUX-NEXT:    rlwinm r3, r3, 25, 31, 31
+; CHECK-LINUX-NEXT:    blr
+;
+; CHECK-AIX32-LABEL: test8:
+; CHECK-AIX32:       # %bb.0: # %bb
+; CHECK-AIX32-NEXT:    li r3, 0
+; CHECK-AIX32-NEXT:    lxvw4x vs34, 0, r3
+; CHECK-AIX32-NEXT:    lwz r3, L..C4(r2) # %const.0
+; CHECK-AIX32-NEXT:    lxvw4x vs35, 0, r3
+; CHECK-AIX32-NEXT:    vcmpequb. v2, v2, v3
+; CHECK-AIX32-NEXT:    mfocrf r3, 2
+; CHECK-AIX32-NEXT:    rlwinm r3, r3, 25, 31, 31
+; CHECK-AIX32-NEXT:    blr
+bb:
+  %load = load i128, ptr null, align 16, !range !2
+  %icmp = icmp eq i128 %load, 65536
+  ret i1 %icmp
+}
+
+!0 = !{i128 0, i128 2}
+!1 = !{i128 0, i128 65536}
+!2 = !{i128 0, i128 65537}

``````````

</details>


https://github.com/llvm/llvm-project/pull/198177
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to