llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-powerpc Author: llvmbot <details> <summary>Changes</summary> Backport 1907b586384b51be2f6b44490c46941f08ff6974 Requested by: @<!-- -->amy-kwan --- Full diff: https://github.com/llvm/llvm-project/pull/198177.diff 2 Files Affected: - (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+28-8) - (added) llvm/test/CodeGen/PowerPC/ppc-i128-cmp.ll (+282) ``````````diff diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index bdba040529d00..56aa33fdd4098 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -15586,17 +15586,27 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, } // The function check a i128 load can convert to 16i8 load for Vcmpequb. -static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS) { +static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS, bool IsPPC64) { - auto isValidForConvert = [](SDValue &Operand) { + auto isValidForConvert = [IsPPC64](SDValue &Operand) { if (!Operand.hasOneUse()) return false; if (Operand.getValueType() != MVT::i128) return false; - if (Operand.getOpcode() == ISD::Constant) + if (Operand.getOpcode() == ISD::Constant) { + auto *C = cast<ConstantSDNode>(Operand); + const APInt &Val = C->getAPIntValue(); + // On PPC64, comparing an i128 value loaded from memory against a + // constant smaller than 2^16 is usually better left to scalar lowering. + // In that case, the compare can be lowered using xori (since xori has a + // 16-bit immediate field), which is cheaper than materializing a vector + // constant and using vcmpequb. + if (IsPPC64 && Val.ult(1ULL << 16)) + return false; return true; + } auto *LoadNode = dyn_cast<LoadSDNode>(Operand); if (!LoadNode) @@ -15647,10 +15657,19 @@ SDValue convertTwoLoadsAndCmpToVCMPEQUB(SelectionDAG &DAG, SDNode *N, assert(Operand.getOpcode() == ISD::LOAD && "Must be LoadSDNode here."); auto *LoadNode = cast<LoadSDNode>(Operand); - SDValue NewLoad = - DAG.getLoad(MVT::v16i8, DL, LoadNode->getChain(), - LoadNode->getBasePtr(), LoadNode->getMemOperand()); - DAG.ReplaceAllUsesOfValueWith(Operand.getValue(1), NewLoad.getValue(1)); + // Create a new MachineMemOperand without range metadata. + // Range metadata is only valid for integer scalar types, not vectors. + // The original i128 load may have range metadata, but when we convert + // to v16i8, that metadata is no longer semantically valid. + MachineMemOperand *MMO = LoadNode->getMemOperand(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *NewMMO = MF.getMachineMemOperand( + MMO->getPointerInfo(), MMO->getFlags(), MMO->getSize(), MMO->getAlign(), + MMO->getAAInfo(), nullptr, MMO->getSyncScopeID(), + MMO->getSuccessOrdering(), MMO->getFailureOrdering()); + SDValue NewLoad = DAG.getLoad(MVT::v16i8, DL, LoadNode->getChain(), + LoadNode->getBasePtr(), NewMMO); + DAG.ReplaceAllUsesOfValueWith(SDValue(LoadNode, 1), NewLoad.getValue(1)); return NewLoad; }; @@ -15815,7 +15834,8 @@ SDValue PPCTargetLowering::combineSetCC(SDNode *N, // This transformation replaces memcmp(a, b, 16) with two vector loads // and one vector compare instruction. - if (Subtarget.hasAltivec() && canConvertToVcmpequb(LHS, RHS)) + if (Subtarget.hasAltivec() && + canConvertToVcmpequb(LHS, RHS, Subtarget.isPPC64())) return convertTwoLoadsAndCmpToVCMPEQUB(DCI.DAG, N, SDLoc(N)); } diff --git a/llvm/test/CodeGen/PowerPC/ppc-i128-cmp.ll b/llvm/test/CodeGen/PowerPC/ppc-i128-cmp.ll new file mode 100644 index 0000000000000..c661d7da690b4 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ppc-i128-cmp.ll @@ -0,0 +1,282 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefixes=COMMON,CHECK-AIX64 +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefixes=COMMON,CHECK-LINUX +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefixes=COMMON,CHECK-AIX32 + +define i1 @test1() { +; CHECK-AIX64-LABEL: test1: +; CHECK-AIX64: # %bb.0: # %bb +; CHECK-AIX64-NEXT: ld r3, 0(0) +; CHECK-AIX64-NEXT: ld r4, 8(0) +; CHECK-AIX64-NEXT: or r3, r4, r3 +; CHECK-AIX64-NEXT: cntlzd r3, r3 +; CHECK-AIX64-NEXT: rldicl r3, r3, 58, 63 +; CHECK-AIX64-NEXT: blr +; +; CHECK-LINUX-LABEL: test1: +; CHECK-LINUX: # %bb.0: # %bb +; CHECK-LINUX-NEXT: ld r3, 8(0) +; CHECK-LINUX-NEXT: ld r4, 0(0) +; CHECK-LINUX-NEXT: or r3, r4, r3 +; CHECK-LINUX-NEXT: cntlzd r3, r3 +; CHECK-LINUX-NEXT: rldicl r3, r3, 58, 63 +; CHECK-LINUX-NEXT: blr +; +; CHECK-AIX32-LABEL: test1: +; CHECK-AIX32: # %bb.0: # %bb +; CHECK-AIX32-NEXT: li r3, 0 +; CHECK-AIX32-NEXT: xxlxor vs35, vs35, vs35 +; CHECK-AIX32-NEXT: lxvw4x vs34, 0, r3 +; CHECK-AIX32-NEXT: vcmpequb. v2, v2, v3 +; CHECK-AIX32-NEXT: mfocrf r3, 2 +; CHECK-AIX32-NEXT: rlwinm r3, r3, 25, 31, 31 +; CHECK-AIX32-NEXT: blr +bb: + %load = load i128, ptr null, align 16 + %icmp = icmp eq i128 %load, 0 + ret i1 %icmp +} + +define i1 @test2() { +; CHECK-AIX64-LABEL: test2: +; CHECK-AIX64: # %bb.0: # %bb +; CHECK-AIX64-NEXT: ld r4, 8(0) +; CHECK-AIX64-NEXT: ld r3, 0(0) +; CHECK-AIX64-NEXT: xori r4, r4, 10 +; CHECK-AIX64-NEXT: or r3, r4, r3 +; CHECK-AIX64-NEXT: cntlzd r3, r3 +; CHECK-AIX64-NEXT: rldicl r3, r3, 58, 63 +; CHECK-AIX64-NEXT: blr +; +; CHECK-LINUX-LABEL: test2: +; CHECK-LINUX: # %bb.0: # %bb +; CHECK-LINUX-NEXT: ld r4, 0(0) +; CHECK-LINUX-NEXT: ld r3, 8(0) +; CHECK-LINUX-NEXT: xori r4, r4, 10 +; CHECK-LINUX-NEXT: or r3, r4, r3 +; CHECK-LINUX-NEXT: cntlzd r3, r3 +; CHECK-LINUX-NEXT: rldicl r3, r3, 58, 63 +; CHECK-LINUX-NEXT: blr +; +; CHECK-AIX32-LABEL: test2: +; CHECK-AIX32: # %bb.0: # %bb +; CHECK-AIX32-NEXT: li r3, 0 +; CHECK-AIX32-NEXT: lxvw4x vs34, 0, r3 +; CHECK-AIX32-NEXT: lwz r3, L..C0(r2) # %const.0 +; CHECK-AIX32-NEXT: lxvw4x vs35, 0, r3 +; CHECK-AIX32-NEXT: vcmpequb. v2, v2, v3 +; CHECK-AIX32-NEXT: mfocrf r3, 2 +; CHECK-AIX32-NEXT: rlwinm r3, r3, 25, 31, 31 +; CHECK-AIX32-NEXT: blr +bb: + %load = load i128, ptr null, align 16 + %icmp = icmp eq i128 %load, 10 + ret i1 %icmp +} + +define i1 @test3() { +; CHECK-AIX64-LABEL: test3: +; CHECK-AIX64: # %bb.0: # %bb +; CHECK-AIX64-NEXT: ld r4, 8(0) +; CHECK-AIX64-NEXT: ld r3, 0(0) +; CHECK-AIX64-NEXT: xori r4, r4, 65535 +; CHECK-AIX64-NEXT: or r3, r4, r3 +; CHECK-AIX64-NEXT: cntlzd r3, r3 +; CHECK-AIX64-NEXT: rldicl r3, r3, 58, 63 +; CHECK-AIX64-NEXT: blr +; +; CHECK-LINUX-LABEL: test3: +; CHECK-LINUX: # %bb.0: # %bb +; CHECK-LINUX-NEXT: ld r4, 0(0) +; CHECK-LINUX-NEXT: ld r3, 8(0) +; CHECK-LINUX-NEXT: xori r4, r4, 65535 +; CHECK-LINUX-NEXT: or r3, r4, r3 +; CHECK-LINUX-NEXT: cntlzd r3, r3 +; CHECK-LINUX-NEXT: rldicl r3, r3, 58, 63 +; CHECK-LINUX-NEXT: blr +; +; CHECK-AIX32-LABEL: test3: +; CHECK-AIX32: # %bb.0: # %bb +; CHECK-AIX32-NEXT: li r3, 0 +; CHECK-AIX32-NEXT: lxvw4x vs34, 0, r3 +; CHECK-AIX32-NEXT: lwz r3, L..C1(r2) # %const.0 +; CHECK-AIX32-NEXT: lxvw4x vs35, 0, r3 +; CHECK-AIX32-NEXT: vcmpequb. v2, v2, v3 +; CHECK-AIX32-NEXT: mfocrf r3, 2 +; CHECK-AIX32-NEXT: rlwinm r3, r3, 25, 31, 31 +; CHECK-AIX32-NEXT: blr +bb: + %load = load i128, ptr null, align 16 + %icmp = icmp eq i128 %load, 65535 + ret i1 %icmp +} + +define i1 @test4() { +; CHECK-AIX64-LABEL: test4: +; CHECK-AIX64: # %bb.0: # %bb +; CHECK-AIX64-NEXT: li r3, 0 +; CHECK-AIX64-NEXT: lxvw4x vs34, 0, r3 +; CHECK-AIX64-NEXT: ld r3, L..C0(r2) # %const.0 +; CHECK-AIX64-NEXT: lxvd2x vs35, 0, r3 +; CHECK-AIX64-NEXT: vcmpequb. v2, v2, v3 +; CHECK-AIX64-NEXT: mfocrf r3, 2 +; CHECK-AIX64-NEXT: rlwinm r3, r3, 25, 31, 31 +; CHECK-AIX64-NEXT: blr +; +; CHECK-LINUX-LABEL: test4: +; CHECK-LINUX: # %bb.0: # %bb +; CHECK-LINUX-NEXT: li r3, 0 +; CHECK-LINUX-NEXT: lxvd2x vs34, 0, r3 +; CHECK-LINUX-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-LINUX-NEXT: addi r3, r3, .LCPI3_0@toc@l +; CHECK-LINUX-NEXT: lxvd2x vs35, 0, r3 +; CHECK-LINUX-NEXT: vcmpequb. v2, v2, v3 +; CHECK-LINUX-NEXT: mfocrf r3, 2 +; CHECK-LINUX-NEXT: rlwinm r3, r3, 25, 31, 31 +; CHECK-LINUX-NEXT: blr +; +; CHECK-AIX32-LABEL: test4: +; CHECK-AIX32: # %bb.0: # %bb +; CHECK-AIX32-NEXT: li r3, 0 +; CHECK-AIX32-NEXT: lxvw4x vs34, 0, r3 +; CHECK-AIX32-NEXT: lwz r3, L..C2(r2) # %const.0 +; CHECK-AIX32-NEXT: lxvw4x vs35, 0, r3 +; CHECK-AIX32-NEXT: vcmpequb. v2, v2, v3 +; CHECK-AIX32-NEXT: mfocrf r3, 2 +; CHECK-AIX32-NEXT: rlwinm r3, r3, 25, 31, 31 +; CHECK-AIX32-NEXT: blr +bb: + %load = load i128, ptr null, align 16 + %icmp = icmp eq i128 %load, 65536 + ret i1 %icmp +} + +; Test using the !range metadata +define i1 @test5() { +; CHECK-AIX64-LABEL: test5: +; CHECK-AIX64: # %bb.0: # %bb +; CHECK-AIX64-NEXT: ld r3, 0(0) +; CHECK-AIX64-NEXT: ld r4, 8(0) +; CHECK-AIX64-NEXT: or r3, r4, r3 +; CHECK-AIX64-NEXT: cntlzd r3, r3 +; CHECK-AIX64-NEXT: rldicl r3, r3, 58, 63 +; CHECK-AIX64-NEXT: blr +; +; CHECK-LINUX-LABEL: test5: +; CHECK-LINUX: # %bb.0: # %bb +; CHECK-LINUX-NEXT: ld r3, 8(0) +; CHECK-LINUX-NEXT: ld r4, 0(0) +; CHECK-LINUX-NEXT: or r3, r4, r3 +; CHECK-LINUX-NEXT: cntlzd r3, r3 +; CHECK-LINUX-NEXT: rldicl r3, r3, 58, 63 +; CHECK-LINUX-NEXT: blr +; +; CHECK-AIX32-LABEL: test5: +; CHECK-AIX32: # %bb.0: # %bb +; CHECK-AIX32-NEXT: li r3, 0 +; CHECK-AIX32-NEXT: xxlxor vs35, vs35, vs35 +; CHECK-AIX32-NEXT: lxvw4x vs34, 0, r3 +; CHECK-AIX32-NEXT: vcmpequb. v2, v2, v3 +; CHECK-AIX32-NEXT: mfocrf r3, 2 +; CHECK-AIX32-NEXT: rlwinm r3, r3, 25, 31, 31 +; CHECK-AIX32-NEXT: blr +bb: + %load = load i128, ptr null, align 16, !range !0 + %icmp = icmp eq i128 %load, 0 + ret i1 %icmp +} + +define i1 @test6() { +; CHECK-AIX64-LABEL: test6: +; CHECK-AIX64: # %bb.0: # %bb +; CHECK-AIX64-NEXT: ld r4, 8(0) +; CHECK-AIX64-NEXT: ld r3, 0(0) +; CHECK-AIX64-NEXT: xori r4, r4, 65535 +; CHECK-AIX64-NEXT: or r3, r4, r3 +; CHECK-AIX64-NEXT: cntlzd r3, r3 +; CHECK-AIX64-NEXT: rldicl r3, r3, 58, 63 +; CHECK-AIX64-NEXT: blr +; +; CHECK-LINUX-LABEL: test6: +; CHECK-LINUX: # %bb.0: # %bb +; CHECK-LINUX-NEXT: ld r4, 0(0) +; CHECK-LINUX-NEXT: ld r3, 8(0) +; CHECK-LINUX-NEXT: xori r4, r4, 65535 +; CHECK-LINUX-NEXT: or r3, r4, r3 +; CHECK-LINUX-NEXT: cntlzd r3, r3 +; CHECK-LINUX-NEXT: rldicl r3, r3, 58, 63 +; CHECK-LINUX-NEXT: blr +; +; CHECK-AIX32-LABEL: test6: +; CHECK-AIX32: # %bb.0: # %bb +; CHECK-AIX32-NEXT: li r3, 0 +; CHECK-AIX32-NEXT: lxvw4x vs34, 0, r3 +; CHECK-AIX32-NEXT: lwz r3, L..C3(r2) # %const.0 +; CHECK-AIX32-NEXT: lxvw4x vs35, 0, r3 +; CHECK-AIX32-NEXT: vcmpequb. v2, v2, v3 +; CHECK-AIX32-NEXT: mfocrf r3, 2 +; CHECK-AIX32-NEXT: rlwinm r3, r3, 25, 31, 31 +; CHECK-AIX32-NEXT: blr +bb: + %load = load i128, ptr null, align 16, !range !1 + %icmp = icmp eq i128 %load, 65535 + ret i1 %icmp +} + +define i1 @test7() { +; COMMON-LABEL: test7: +; COMMON: # %bb.0: # %bb +; COMMON-NEXT: li r3, 0 +; COMMON-NEXT: blr +bb: + %load = load i128, ptr null, align 16, !range !1 + %icmp = icmp eq i128 %load, 65536 + ret i1 %icmp +} + +define i1 @test8() { +; CHECK-AIX64-LABEL: test8: +; CHECK-AIX64: # %bb.0: # %bb +; CHECK-AIX64-NEXT: li r3, 0 +; CHECK-AIX64-NEXT: lxvw4x vs34, 0, r3 +; CHECK-AIX64-NEXT: ld r3, L..C1(r2) # %const.0 +; CHECK-AIX64-NEXT: lxvd2x vs35, 0, r3 +; CHECK-AIX64-NEXT: vcmpequb. v2, v2, v3 +; CHECK-AIX64-NEXT: mfocrf r3, 2 +; CHECK-AIX64-NEXT: rlwinm r3, r3, 25, 31, 31 +; CHECK-AIX64-NEXT: blr +; +; CHECK-LINUX-LABEL: test8: +; CHECK-LINUX: # %bb.0: # %bb +; CHECK-LINUX-NEXT: li r3, 0 +; CHECK-LINUX-NEXT: lxvd2x vs34, 0, r3 +; CHECK-LINUX-NEXT: addis r3, r2, .LCPI7_0@toc@ha +; CHECK-LINUX-NEXT: addi r3, r3, .LCPI7_0@toc@l +; CHECK-LINUX-NEXT: lxvd2x vs35, 0, r3 +; CHECK-LINUX-NEXT: vcmpequb. v2, v2, v3 +; CHECK-LINUX-NEXT: mfocrf r3, 2 +; CHECK-LINUX-NEXT: rlwinm r3, r3, 25, 31, 31 +; CHECK-LINUX-NEXT: blr +; +; CHECK-AIX32-LABEL: test8: +; CHECK-AIX32: # %bb.0: # %bb +; CHECK-AIX32-NEXT: li r3, 0 +; CHECK-AIX32-NEXT: lxvw4x vs34, 0, r3 +; CHECK-AIX32-NEXT: lwz r3, L..C4(r2) # %const.0 +; CHECK-AIX32-NEXT: lxvw4x vs35, 0, r3 +; CHECK-AIX32-NEXT: vcmpequb. v2, v2, v3 +; CHECK-AIX32-NEXT: mfocrf r3, 2 +; CHECK-AIX32-NEXT: rlwinm r3, r3, 25, 31, 31 +; CHECK-AIX32-NEXT: blr +bb: + %load = load i128, ptr null, align 16, !range !2 + %icmp = icmp eq i128 %load, 65536 + ret i1 %icmp +} + +!0 = !{i128 0, i128 2} +!1 = !{i128 0, i128 65536} +!2 = !{i128 0, i128 65537} `````````` </details> https://github.com/llvm/llvm-project/pull/198177 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
