llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-x86 Author: Vitaly Buka (vitalybuka) <details> <summary>Changes</summary> This reverts commit a55a7207c7e4d98dad32e8d53dd5964ee833edd9. --- Patch is 338.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165979.diff 2 Files Affected: - (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+2-112) - (modified) llvm/test/CodeGen/X86/bittest-big-integer.ll (+6331-994) ``````````diff diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6f75a2eb7075a..c5fb5535d0057 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53344,80 +53344,6 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, return SDValue(); } -// Look for a RMW operation that only touches one bit of a larger than legal -// type and fold it to a BTC/BTR/BTS pattern acting on a single i32 sub value. -static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL, - SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - using namespace SDPatternMatch; - - // Only handle normal stores and its chain was a matching normal load. - auto *Ld = dyn_cast<LoadSDNode>(St->getChain()); - if (!ISD::isNormalStore(St) || !St->isSimple() || !Ld || - !ISD::isNormalLoad(Ld) || !Ld->isSimple() || - Ld->getBasePtr() != St->getBasePtr() || - Ld->getOffset() != St->getOffset()) - return SDValue(); - - SDValue LoadVal(Ld, 0); - SDValue StoredVal = St->getValue(); - EVT VT = StoredVal.getValueType(); - - // Only narrow larger than legal scalar integers. - if (!VT.isScalarInteger() || - VT.getSizeInBits() <= (Subtarget.is64Bit() ? 64 : 32)) - return SDValue(); - - // BTR: X & ~(1 << ShAmt) - // BTS: X | (1 << ShAmt) - // BTC: X ^ (1 << ShAmt) - SDValue ShAmt; - if (!StoredVal.hasOneUse() || - !(sd_match(StoredVal, m_And(m_Specific(LoadVal), - m_Not(m_Shl(m_One(), m_Value(ShAmt))))) || - sd_match(StoredVal, - m_Or(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) || - sd_match(StoredVal, - m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))))) - return SDValue(); - - // Ensure the shift amount is in bounds. - KnownBits KnownAmt = DAG.computeKnownBits(ShAmt); - if (KnownAmt.getMaxValue().uge(VT.getSizeInBits())) - return SDValue(); - - // Split the shift into an alignment shift that moves the active i32 block to - // the bottom bits for truncation and a modulo shift that can act on the i32. - EVT AmtVT = ShAmt.getValueType(); - SDValue AlignAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, - DAG.getSignedConstant(-32LL, DL, AmtVT)); - SDValue ModuloAmt = - DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, DAG.getConstant(31, DL, AmtVT)); - - // Compute the byte offset for the i32 block that is changed by the RMW. - // combineTruncate will adjust the load for us in a similar way. - EVT PtrVT = St->getBasePtr().getValueType(); - SDValue PtrBitOfs = DAG.getZExtOrTrunc(AlignAmt, DL, PtrVT); - SDValue PtrByteOfs = DAG.getNode(ISD::SRL, DL, PtrVT, PtrBitOfs, - DAG.getShiftAmountConstant(3, PtrVT, DL)); - SDValue NewPtr = DAG.getMemBasePlusOffset(St->getBasePtr(), PtrByteOfs, DL, - SDNodeFlags::NoUnsignedWrap); - - // Reconstruct the BTC/BTR/BTS pattern for the i32 block and store. - SDValue X = DAG.getNode(ISD::SRL, DL, VT, LoadVal, AlignAmt); - X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); - - SDValue Mask = - DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32), - DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8)); - if (StoredVal.getOpcode() == ISD::AND) - Mask = DAG.getNOT(DL, Mask, MVT::i32); - - SDValue Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask); - return DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(), - Align(), St->getMemOperand()->getFlags()); -} - static SDValue combineStore(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -53644,9 +53570,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, } } - if (SDValue R = narrowBitOpRMW(St, dl, DAG, Subtarget)) - return R; - // Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC) // store(cmov(x, load(p), CC), p) to cstore(x, p, InvertCC) if ((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) && @@ -54579,9 +54502,8 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, // truncation, see if we can convert the shift into a pointer offset instead. // Limit this to normal (non-ext) scalar integer loads. if (SrcVT.isScalarInteger() && Src.getOpcode() == ISD::SRL && - Src.hasOneUse() && ISD::isNormalLoad(Src.getOperand(0).getNode()) && - (Src.getOperand(0).hasOneUse() || - !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, SrcVT))) { + Src.hasOneUse() && Src.getOperand(0).hasOneUse() && + ISD::isNormalLoad(Src.getOperand(0).getNode())) { auto *Ld = cast<LoadSDNode>(Src.getOperand(0)); if (Ld->isSimple() && VT.isByteSized() && isPowerOf2_64(VT.getSizeInBits())) { @@ -56381,7 +56303,6 @@ static SDValue combineAVX512SetCCToKMOV(EVT VT, SDValue Op0, ISD::CondCode CC, static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { - using namespace SDPatternMatch; const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); const SDValue LHS = N->getOperand(0); const SDValue RHS = N->getOperand(1); @@ -56440,37 +56361,6 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, if (SDValue AndN = MatchAndCmpEq(RHS, LHS)) return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); - // If we're performing a bit test on a larger than legal type, attempt - // to (aligned) shift down the value to the bottom 32-bits and then - // perform the bittest on the i32 value. - // ICMP_ZERO(AND(X,SHL(1,IDX))) - // --> ICMP_ZERO(AND(TRUNC(SRL(X,AND(IDX,-32))),SHL(1,AND(IDX,31)))) - if (isNullConstant(RHS) && - OpVT.getScalarSizeInBits() > (Subtarget.is64Bit() ? 64 : 32)) { - SDValue X, ShAmt; - if (sd_match(LHS, m_OneUse(m_And(m_Value(X), - m_Shl(m_One(), m_Value(ShAmt)))))) { - // Only attempt this if the shift amount is known to be in bounds. - KnownBits KnownAmt = DAG.computeKnownBits(ShAmt); - if (KnownAmt.getMaxValue().ult(OpVT.getScalarSizeInBits())) { - EVT AmtVT = ShAmt.getValueType(); - SDValue AlignAmt = - DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, - DAG.getSignedConstant(-32LL, DL, AmtVT)); - SDValue ModuloAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, - DAG.getConstant(31, DL, AmtVT)); - SDValue Mask = DAG.getNode( - ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32), - DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8)); - X = DAG.getNode(ISD::SRL, DL, OpVT, X, AlignAmt); - X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); - X = DAG.getNode(ISD::AND, DL, MVT::i32, X, Mask); - return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, MVT::i32), - CC); - } - } - } - // cmpeq(trunc(x),C) --> cmpeq(x,C) // cmpne(trunc(x),C) --> cmpne(x,C) // iff x upper bits are zero. diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll index 06e7d4773c58d..8007d9dcf13bc 100644 --- a/llvm/test/CodeGen/X86/bittest-big-integer.ll +++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll @@ -203,14 +203,24 @@ define i1 @init_eq_i32(ptr %word, i32 %position, i1 zeroext %value) nounwind { define i1 @test_ne_i64(ptr %word, i32 %position) nounwind { ; X86-LABEL: test_ne_i64: ; X86: # %bb.0: +; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: andl $32, %edx -; X86-NEXT: shrl $3, %edx -; X86-NEXT: movl (%eax,%edx), %eax -; X86-NEXT: btl %ecx, %eax -; X86-NEXT: setb %al +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $1, %edx +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: shldl %cl, %edx, %esi +; X86-NEXT: shll %cl, %edx +; X86-NEXT: testb $32, %cl +; X86-NEXT: je .LBB5_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %edx, %esi +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: .LBB5_2: +; X86-NEXT: andl 4(%eax), %esi +; X86-NEXT: andl (%eax), %edx +; X86-NEXT: orl %esi, %edx +; X86-NEXT: setne %al +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: test_ne_i64: @@ -232,20 +242,38 @@ define i1 @test_ne_i64(ptr %word, i32 %position) nounwind { define i1 @complement_ne_i64(ptr %word, i32 %position) nounwind { ; X86-LABEL: complement_ne_i64: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: andl $32, %esi -; X86-NEXT: shrl $3, %esi -; X86-NEXT: movl (%ecx,%esi), %edi -; X86-NEXT: btl %edx, %edi -; X86-NEXT: setb %al -; X86-NEXT: btcl %edx, %edi -; X86-NEXT: movl %edi, (%ecx,%esi) +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $1, %eax +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: shll %cl, %eax +; X86-NEXT: testb $32, %cl +; X86-NEXT: je .LBB6_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %eax, %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: .LBB6_2: +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: movl 4(%edx), %edi +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: andl %esi, %ebx +; X86-NEXT: movl %ecx, %ebp +; X86-NEXT: andl %eax, %ebp +; X86-NEXT: xorl %esi, %edi +; X86-NEXT: xorl %eax, %ecx +; X86-NEXT: orl %ebx, %ebp +; X86-NEXT: setne %al +; X86-NEXT: movl %ecx, (%edx) +; X86-NEXT: movl %edi, 4(%edx) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: complement_ne_i64: @@ -272,20 +300,40 @@ define i1 @complement_ne_i64(ptr %word, i32 %position) nounwind { define i1 @reset_eq_i64(ptr %word, i32 %position) nounwind { ; X86-LABEL: reset_eq_i64: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: andl $32, %esi -; X86-NEXT: shrl $3, %esi -; X86-NEXT: movl (%ecx,%esi), %edi -; X86-NEXT: btl %edx, %edi -; X86-NEXT: setae %al -; X86-NEXT: btrl %edx, %edi -; X86-NEXT: movl %edi, (%ecx,%esi) +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $1, %esi +; X86-NEXT: xorl %edi, %edi +; X86-NEXT: shldl %cl, %esi, %edi +; X86-NEXT: shll %cl, %esi +; X86-NEXT: testb $32, %cl +; X86-NEXT: je .LBB7_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %esi, %edi +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: .LBB7_2: +; X86-NEXT: movl (%edx), %eax +; X86-NEXT: movl 4(%edx), %ecx +; X86-NEXT: movl %ecx, %ebx +; X86-NEXT: andl %edi, %ebx +; X86-NEXT: notl %edi +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: andl %esi, %ebp +; X86-NEXT: notl %esi +; X86-NEXT: andl %ecx, %edi +; X86-NEXT: andl %eax, %esi +; X86-NEXT: orl %ebx, %ebp +; X86-NEXT: sete %al +; X86-NEXT: movl %esi, (%edx) +; X86-NEXT: movl %edi, 4(%edx) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: reset_eq_i64: @@ -313,20 +361,38 @@ define i1 @reset_eq_i64(ptr %word, i32 %position) nounwind { define i1 @set_ne_i64(ptr %word, i32 %position) nounwind { ; X86-LABEL: set_ne_i64: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: andl $32, %esi -; X86-NEXT: shrl $3, %esi -; X86-NEXT: movl (%ecx,%esi), %edi -; X86-NEXT: btl %edx, %edi -; X86-NEXT: setb %al -; X86-NEXT: btsl %edx, %edi -; X86-NEXT: movl %edi, (%ecx,%esi) +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $1, %eax +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: shldl %cl, %eax, %esi +; X86-NEXT: shll %cl, %eax +; X86-NEXT: testb $32, %cl +; X86-NEXT: je .LBB8_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %eax, %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: .LBB8_2: +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: movl 4(%edx), %edi +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: andl %esi, %ebx +; X86-NEXT: movl %ecx, %ebp +; X86-NEXT: andl %eax, %ebp +; X86-NEXT: orl %esi, %edi +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: orl %ebx, %ebp +; X86-NEXT: setne %al +; X86-NEXT: movl %ecx, (%edx) +; X86-NEXT: movl %edi, 4(%edx) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: set_ne_i64: @@ -353,47 +419,52 @@ define i1 @set_ne_i64(ptr %word, i32 %position) nounwind { define i1 @init_eq_i64(ptr %word, i32 %position, i1 zeroext %value) nounwind { ; X86-LABEL: init_eq_i64: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl $1, %edx -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: shldl %cl, %edx, %esi -; X86-NEXT: shll %cl, %edx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorl %edi, %edi -; X86-NEXT: shldl %cl, %eax, %edi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $1, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: shldl %cl, %eax, %edx ; X86-NEXT: shll %cl, %eax +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi +; X86-NEXT: xorl %edi, %edi +; X86-NEXT: shldl %cl, %esi, %edi +; X86-NEXT: shll %cl, %esi ; X86-NEXT: testb $32, %cl ; X86-NEXT: je .LBB9_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl $0, %edx +; X86-NEXT: movl %eax, %edx +; X86-NEXT: movl $0, %eax ; X86-NEXT: .LBB9_2: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: notl %esi -; X86-NEXT: notl %edx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: notl %ebx +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: notl %ebp ; X86-NEXT: je .LBB9_4 ; X86-NEXT: # %bb.3: -; X86-NEXT: movl %eax, %edi -; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movl %esi, %edi +; X86-NEXT: xorl %esi, %esi ; X86-NEXT: .LBB9_4: -; X86-NEXT: andl 4(%ebx), %esi -; X86-NEXT: orl %edi, %esi -; X86-NEXT: andl (%ebx), %edx -; X86-NEXT: orl %eax, %edx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: andl $32, %eax -; X86-NEXT: shrl $3, %eax -; X86-NEXT: movl (%ebx,%eax), %eax -; X86-NEXT: btl %ecx, %eax -; X86-NEXT: setae %al -; X86-NEXT: movl %esi, 4(%ebx) -; X86-NEXT: movl %edx, (%ebx) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl 4(%ecx), %ecx +; X86-NEXT: andl %ecx, %edx +; X86-NEXT: andl %ecx, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl (%edi), %ecx +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: andl %ecx, %ebp +; X86-NEXT: orl %esi, %ebp +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl %ebp, (%edi) +; X86-NEXT: movl %ebx, 4(%edi) +; X86-NEXT: sete %al ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; SSE-LABEL: init_eq_i64: @@ -445,25 +516,101 @@ define i1 @init_eq_i64(ptr %word, i32 %position, i1 zeroext %value) nounwind { define i1 @test_ne_i128(ptr %word, i32 %position) nounwind { ; X86-LABEL: test_ne_i128: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: andl $96, %edx -; X86-NEXT: shrl $3, %edx -; X86-NEXT: movl (%eax,%edx), %eax -; X86-NEXT: btl %ecx, %eax -; X86-NEXT: setb %al +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $48, %esp +; X86-NEXT: movzbl 12(%ebp), %ecx +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, (%esp) +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $12, %al +; X86-NEXT: negb %al +; X86-NEXT: movsbl %al, %esi +; X86-NEXT: movl 24(%esp,%esi), %edi +; X86-NEXT: movl 28(%esp,%esi), %eax +; X86-NEXT: shldl %cl, %edi, %eax +; X86-NEXT: movl 16(%esp,%esi), %edx +; X86-NEXT: movl 20(%esp,%esi), %esi +; X86-NEXT: shldl %cl, %esi, %edi +; X86-NEXT: shldl %cl, %edx, %esi +; X86-NEXT: movl 8(%ebp), %ebx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: andl 8(%ebx), %edi +; X86-NEXT: andl (%ebx), %edx +; X86-NEXT: orl %edi, %edx +; X86-NEXT: andl 12(%ebx), %eax +; X86-NEXT: andl 4(%ebx), %esi +; X86-NEXT: orl %eax, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: setne %al +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; -; X64-LABEL: test_ne_i128: -; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: andl $96, %eax -; X64-NEXT: shrl $3, %eax -; X64-NEXT: movl (%rdi,%rax), %eax -; X64-NEXT: btl %esi, %eax -; X64-NEXT: setb %al -; X64-NEXT: retq +; SSE-LABEL: test_ne_i128: +; SSE: # %bb.0: +; SSE-NEXT: movl %esi, %ecx +; SSE-NEXT: movl $1, %eax +; SSE-NEXT: xorl %edx, %edx +; SSE-NEXT: shldq %cl, %rax, %rdx +; SSE-NEXT: xorl %esi, %esi +; SSE-NEXT: shlq %cl, %rax +; SSE-NEXT: testb $64, %cl +; SSE-NEXT: cmovneq %rax, %rdx +; SSE-NEXT: cmovneq %rsi, %rax +; SSE-NEXT: andq 8(%rdi), %rdx +; SSE-NEXT: andq (%rdi), %rax +; SSE-NEXT: orq %rdx, %rax +; SSE-NEXT: setne %al +; SSE-NEXT: retq +; +; AVX2-LABEL: test_ne_i128: +; AVX2: # %bb.0: +; AVX2-NEXT: movl %esi, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: movl $1, %edx +; AVX2-NEXT: xorl %esi, %esi +; AVX2-NEXT: shldq %cl, %rdx, %rsi +; AVX2-NEXT: shlxq %rcx, %rdx, %rdx +; AVX2-NEXT: testb $64, %cl +; AVX2-NEXT: cmovneq %rdx, %rsi +; AVX2-NEXT: cmovneq %rax, %rdx +; AVX2-NEXT: andq 8(%rdi), %rsi +; AVX2-NEXT: andq (%rdi), %rdx +; AVX2-NEXT: orq %rsi, %rdx +; AVX2-NEXT: setne %al +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_ne_i128: +; AVX512: # %bb.0: +; AVX512-NEXT: movl %esi, %ecx +; AVX512-NEXT: movl $1, %eax +; AVX512-NEXT: xorl %edx, %edx +; AVX512-NEXT: shldq %cl, %rax, %rdx +; AVX512-NEXT: xorl %esi, %esi +; AVX512-NEXT: shlxq %rcx, %rax, %rax +; AVX512-NEXT: testb $64, %cl +; AVX512-NEXT: cmovneq %rax, %rdx +; AVX512-NEXT: cmovneq %rsi, %rax +; AVX512-NEXT: andq 8(%rdi), %rdx +; AVX512-NEXT: andq (%rdi), %rax +; AVX512-NEXT: orq %rdx, %rax +; AVX512-NEXT: setne %al +; AVX512-NEXT: retq %rem = and i32 %position, 127 %ofs = zext nneg i32 %rem to i128 %bit = shl nuw i128 1, %ofs @@ -476,33 +623,124 @@ define i1 @test_ne_i128(ptr %word, i32 %position) nounwind { define i1 @complement_ne_i128(ptr %word, i32 %position) nounwind { ; X86-LABEL: complement_ne_i128: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: andl $96, ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/165979 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
