llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) <details> <summary>Changes</summary> * Remove X86ISD::PDEP/PEXT and use ISD::PDEP/PEXT instead * AutoUpgrade x86 pdep/pext intrinsics to llvm.pdep/pext generics * Move X86 DAG knownbits/demandedbits handling to generic (unchanged) * Move X86 InstCombine folds to generic (unchanged) * Add memory sanitizer handling for generic pdep/pext intrinsics * Updated clang builtins to emit generics --- Patch is 51.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/204144.diff 20 Files Affected: - (modified) clang/lib/CodeGen/TargetBuiltins/X86.cpp (+10) - (modified) clang/test/CodeGen/X86/bmi2-builtins.c (+4-4) - (modified) llvm/include/llvm/IR/IntrinsicsX86.td (-12) - (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+6) - (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+18) - (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+28) - (modified) llvm/lib/IR/AutoUpgrade.cpp (+8) - (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+1-49) - (modified) llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp (-88) - (modified) llvm/lib/Target/X86/X86InstrFragments.td (-4) - (modified) llvm/lib/Target/X86/X86InstrMisc.td (+8-46) - (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (-4) - (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+58) - (modified) llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (+25-4) - (modified) llvm/test/CodeGen/X86/bmi2.ll (+11-12) - (modified) llvm/test/Instrumentation/MemorySanitizer/bmi.ll (+8-8) - (modified) llvm/test/Instrumentation/MemorySanitizer/pdep.ll (+25-10) - (modified) llvm/test/Instrumentation/MemorySanitizer/pext.ll (+25-10) - (modified) llvm/test/Transforms/InstCombine/pdep.ll (+12-18) - (modified) llvm/test/Transforms/InstCombine/pext.ll (+12-18) ``````````diff diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp index acfeb9967cd2f..50125a71fcd5f 100644 --- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp @@ -976,6 +976,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); } + case X86::BI__builtin_ia32_pdep_si: + case X86::BI__builtin_ia32_pdep_di: { + Function *F = CGM.getIntrinsic(Intrinsic::pdep, Ops[0]->getType()); + return Builder.CreateCall(F, Ops); + } + case X86::BI__builtin_ia32_pext_si: + case X86::BI__builtin_ia32_pext_di: { + Function *F = CGM.getIntrinsic(Intrinsic::pext, Ops[0]->getType()); + return Builder.CreateCall(F, Ops); + } case X86::BI__builtin_ia32_undef128: case X86::BI__builtin_ia32_undef256: case X86::BI__builtin_ia32_undef512: diff --git a/clang/test/CodeGen/X86/bmi2-builtins.c b/clang/test/CodeGen/X86/bmi2-builtins.c index 1b2cb9048adb2..c83cc43d9fc3f 100644 --- a/clang/test/CodeGen/X86/bmi2-builtins.c +++ b/clang/test/CodeGen/X86/bmi2-builtins.c @@ -17,12 +17,12 @@ unsigned int test_bzhi_u32(unsigned int __X, unsigned int __Y) { } unsigned int test_pdep_u32(unsigned int __X, unsigned int __Y) { - // CHECK: @llvm.x86.bmi.pdep.32 + // CHECK: @llvm.pdep.i32 return _pdep_u32(__X, __Y); } unsigned int test_pext_u32(unsigned int __X, unsigned int __Y) { - // CHECK: @llvm.x86.bmi.pext.32 + // CHECK: @llvm.pext.i32 return _pext_u32(__X, __Y); } @@ -41,12 +41,12 @@ unsigned long long test_bzhi_u64(unsigned long long __X, unsigned long long __Y) } unsigned long long test_pdep_u64(unsigned long long __X, unsigned long long __Y) { - // CHECK: @llvm.x86.bmi.pdep.64 + // CHECK: @llvm.pdep.i64 return _pdep_u64(__X, __Y); } unsigned long long test_pext_u64(unsigned long long __X, unsigned long long __Y) { - // CHECK: @llvm.x86.bmi.pext.64 + // CHECK: @llvm.pext.i64 return _pext_u64(__X, __Y); } diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index b75a0485d6263..5c7785731111c 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -2575,18 +2575,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_bmi_bzhi_64 : ClangBuiltin<"__builtin_ia32_bzhi_di">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; - def int_x86_bmi_pdep_32 : ClangBuiltin<"__builtin_ia32_pdep_si">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_bmi_pdep_64 : ClangBuiltin<"__builtin_ia32_pdep_di">, - DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], - [IntrNoMem]>; - def int_x86_bmi_pext_32 : ClangBuiltin<"__builtin_ia32_pext_si">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_bmi_pext_64 : ClangBuiltin<"__builtin_ia32_pext_di">, - DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], - [IntrNoMem]>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5a4ae64cb98af..122b7f89c9d6c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12245,12 +12245,18 @@ SDValue DAGCombiner::visitPDEP(SDNode *N) { // pdep(x, 0) -> 0 if (isNullOrNullSplat(N1)) return DAG.getConstant(0, DL, VT); + // pdep(x, -1) -> x (all positions selected, bits deposited at identity) if (isAllOnesOrAllOnesSplat(N1)) return N0; + // fold pdep(c1, c2) -> expandBits(c1, c2) if (SDValue C = DAG.FoldConstantArithmetic(ISD::PDEP, DL, VT, {N0, N1})) return C; + + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b32c16fe4300f..44120cceed2a3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3952,6 +3952,24 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setBitsFrom(1); break; } + case ISD::PDEP: { + Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + // Zeros are retained from the mask operand. But not ones. + Known.One.clearAllBits(); + // The result will have at least as many trailing zeros as the non-mask + // operand since bits can only map to the same or higher bit position. + Known.Zero.setLowBits(Known2.countMinTrailingZeros()); + break; + } + case ISD::PEXT: { + Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + // The result has as many leading zeros as the number of zeroes in the mask. + unsigned Count = Known.Zero.popcount(); + Known.Zero = APInt::getHighBitsSet(BitWidth, Count); + Known.One.clearAllBits(); + break; + } case ISD::CLMUL: { Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5999e7a9c9fb2..0bd636d19065f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2463,6 +2463,34 @@ bool TargetLowering::SimplifyDemandedBits( Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); break; } + case ISD::PDEP: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + unsigned DemandedBitsLZ = OriginalDemandedBits.countl_zero(); + APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ); + + // If the demanded bits has leading zeroes, we don't demand those from the + // mask. + if (SimplifyDemandedBits(Op1, LoMask, Known, TLO, Depth + 1)) + return true; + + // The number of possible 1s in the mask determines the number of LSBs of + // operand 0 used. Undemanded bits from the mask don't matter so filter + // them before counting. + KnownBits Known2; + uint64_t Count = (~Known.Zero & LoMask).popcount(); + APInt DemandedMask(APInt::getLowBitsSet(BitWidth, Count)); + if (SimplifyDemandedBits(Op0, DemandedMask, Known2, TLO, Depth + 1)) + return true; + + // Zeroes are retained from the mask, but not ones. + Known.One.clearAllBits(); + // The result will have at least as many trailing zeros as the non-mask + // operand since bits can only map to the same or higher bit position. + Known.Zero.setLowBits(Known2.countMinTrailingZeros()); + break; + } case ISD::SIGN_EXTEND_INREG: { SDValue Op0 = Op.getOperand(0); EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 814e985ebf7be..9422fc6129efd 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -533,6 +533,10 @@ static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0 Name.starts_with("vprot")); // Added in 8.0 + if (Name.consume_front("bmi.")) + return (Name.starts_with("pdep.") || // Added in 23.0 + Name.starts_with("pext.")); // Added in 23.0 + return (Name == "addcarry.u32" || // Added in 8.0 Name == "addcarry.u64" || // Added in 8.0 Name == "addcarryx.u32" || // Added in 8.0 @@ -4616,6 +4620,10 @@ static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, } else if (Name.starts_with("avx512.mask.") && upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) { // Rep will be updated by the call in the condition. + } else if (Name.starts_with("bmi.pdep.")) { + Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::pdep); + } else if (Name.starts_with("bmi.pext.")) { + Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::pext); } else reportFatalUsageErrorWithCI("Unexpected intrinsic", CI); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b9a65e2671aa9..1bc4bfd4251cf 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39748,25 +39748,6 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.One.clearAllBits(); break; } - case X86ISD::PDEP: { - KnownBits Known2; - Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - // Zeros are retained from the mask operand. But not ones. - Known.One.clearAllBits(); - // The result will have at least as many trailing zeros as the non-mask - // operand since bits can only map to the same or higher bit position. - Known.Zero.setLowBits(Known2.countMinTrailingZeros()); - break; - } - case X86ISD::PEXT: { - Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - // The result has as many leading zeros as the number of zeroes in the mask. - unsigned Count = Known.Zero.popcount(); - Known.Zero = APInt::getHighBitsSet(BitWidth, Count); - Known.One.clearAllBits(); - break; - } case X86ISD::VTRUNC: case X86ISD::VTRUNCS: case X86ISD::VTRUNCUS: @@ -46015,34 +45996,6 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( break; } - case X86ISD::PDEP: { - SDValue Op0 = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); - - unsigned DemandedBitsLZ = OriginalDemandedBits.countl_zero(); - APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ); - - // If the demanded bits has leading zeroes, we don't demand those from the - // mask. - if (SimplifyDemandedBits(Op1, LoMask, Known, TLO, Depth + 1)) - return true; - - // The number of possible 1s in the mask determines the number of LSBs of - // operand 0 used. Undemanded bits from the mask don't matter so filter - // them before counting. - KnownBits Known2; - uint64_t Count = (~Known.Zero & LoMask).popcount(); - APInt DemandedMask(APInt::getLowBitsSet(BitWidth, Count)); - if (SimplifyDemandedBits(Op0, DemandedMask, Known2, TLO, Depth + 1)) - return true; - - // Zeroes are retained from the mask, but not ones. - Known.One.clearAllBits(); - // The result will have at least as many trailing zeros as the non-mask - // operand since bits can only map to the same or higher bit position. - Known.Zero.setLowBits(Known2.countMinTrailingZeros()); - return false; - } case X86ISD::VPMADD52L: case X86ISD::VPMADD52H: { KnownBits KnownOp0, KnownOp1, KnownOp2; @@ -63423,8 +63376,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::MOVDQ2Q: return combineMOVDQ2Q(N, DAG); case X86ISD::BEXTR: case X86ISD::BEXTRI: - case X86ISD::BZHI: - case X86ISD::PDEP: return combineBMI(N, DAG, DCI); + case X86ISD::BZHI: return combineBMI(N, DAG, DCI); case X86ISD::PCLMULQDQ: return combinePCLMULQDQ(N, DAG, DCI); case ISD::INTRINSIC_WO_CHAIN: return combineINTRINSIC_WO_CHAIN(N, DAG, DCI); case ISD::INTRINSIC_W_CHAIN: return combineINTRINSIC_W_CHAIN(N, DAG, DCI); diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp index 4999581489e82..ad1c171428671 100644 --- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp @@ -2259,94 +2259,6 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { // TODO should we convert this to an AND if the RHS is constant? } break; - case Intrinsic::x86_bmi_pext_32: - case Intrinsic::x86_bmi_pext_64: - if (auto *MaskC = dyn_cast<ConstantInt>(II.getArgOperand(1))) { - if (MaskC->isNullValue()) { - return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0)); - } - if (MaskC->isAllOnesValue()) { - return IC.replaceInstUsesWith(II, II.getArgOperand(0)); - } - - unsigned MaskIdx, MaskLen; - if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) { - // any single contingous sequence of 1s anywhere in the mask simply - // describes a subset of the input bits shifted to the appropriate - // position. Replace with the straight forward IR. - Value *Input = II.getArgOperand(0); - Value *Masked = IC.Builder.CreateAnd(Input, II.getArgOperand(1)); - Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx); - Value *Shifted = IC.Builder.CreateLShr(Masked, ShiftAmt); - return IC.replaceInstUsesWith(II, Shifted); - } - - if (auto *SrcC = dyn_cast<ConstantInt>(II.getArgOperand(0))) { - uint64_t Src = SrcC->getZExtValue(); - uint64_t Mask = MaskC->getZExtValue(); - uint64_t Result = 0; - uint64_t BitToSet = 1; - - while (Mask) { - // Isolate lowest set bit. - uint64_t BitToTest = Mask & -Mask; - if (BitToTest & Src) - Result |= BitToSet; - - BitToSet <<= 1; - // Clear lowest set bit. - Mask &= Mask - 1; - } - - return IC.replaceInstUsesWith(II, - ConstantInt::get(II.getType(), Result)); - } - } - break; - case Intrinsic::x86_bmi_pdep_32: - case Intrinsic::x86_bmi_pdep_64: - if (auto *MaskC = dyn_cast<ConstantInt>(II.getArgOperand(1))) { - if (MaskC->isNullValue()) { - return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0)); - } - if (MaskC->isAllOnesValue()) { - return IC.replaceInstUsesWith(II, II.getArgOperand(0)); - } - - unsigned MaskIdx, MaskLen; - if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) { - // any single contingous sequence of 1s anywhere in the mask simply - // describes a subset of the input bits shifted to the appropriate - // position. Replace with the straight forward IR. - Value *Input = II.getArgOperand(0); - Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx); - Value *Shifted = IC.Builder.CreateShl(Input, ShiftAmt); - Value *Masked = IC.Builder.CreateAnd(Shifted, II.getArgOperand(1)); - return IC.replaceInstUsesWith(II, Masked); - } - - if (auto *SrcC = dyn_cast<ConstantInt>(II.getArgOperand(0))) { - uint64_t Src = SrcC->getZExtValue(); - uint64_t Mask = MaskC->getZExtValue(); - uint64_t Result = 0; - uint64_t BitToTest = 1; - - while (Mask) { - // Isolate lowest set bit. - uint64_t BitToSet = Mask & -Mask; - if (BitToTest & Src) - Result |= BitToSet; - - BitToTest <<= 1; - // Clear lowest set bit; - Mask &= Mask - 1; - } - - return IC.replaceInstUsesWith(II, - ConstantInt::get(II.getType(), Result)); - } - } - break; case Intrinsic::x86_sse_cvtss2si: case Intrinsic::x86_sse_cvtss2si64: diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td index 9316360c5e02a..923b968382866 100644 --- a/llvm/lib/Target/X86/X86InstrFragments.td +++ b/llvm/lib/Target/X86/X86InstrFragments.td @@ -424,10 +424,6 @@ def X86bextri : SDNode<"X86ISD::BEXTRI", SDTIntBinOp>; // Zero High Bits Starting with Specified Bit Position. def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntBinOp>; -// Parallel extract and deposit. -def X86pdep : SDNode<"X86ISD::PDEP", SDTIntBinOp>; -def X86pext : SDNode<"X86ISD::PEXT", SDTIntBinOp>; - // X86-specific multiply by immediate. def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td index 613a431fe365a..c6acaa697fdc7 100644 --- a/llvm/lib/Target/X86/X86InstrMisc.td +++ b/llvm/lib/Target/X86/X86InstrMisc.td @@ -1391,55 +1391,17 @@ multiclass PdepPext<string m, X86TypeInfo t, SDPatternOperator node, } let Predicates = [HasBMI2, NoEGPR] in { - defm PDEP32 : PdepPext<"pdep", Xi32, X86pdep>, XD, VEX; - defm PDEP64 : PdepPext<"pdep", Xi64, X86pdep>, XD, REX_W, VEX; - defm PEXT32 : PdepPext<"pext", Xi32, X86pext>, XS, VEX; - defm PEXT64 : PdepPext<"pext", Xi64, X86pext>, XS, REX_W, VEX; + defm PDEP32 : PdepPext<"pdep", Xi32, pdep>, XD, VEX; + defm PDEP64 : PdepPext<"pdep", Xi64, pdep>, XD, REX_W, VEX; + defm PEXT32 : PdepPext<"pext", Xi32, pext>, XS, VEX; + defm PEXT64 : PdepPext<"pext", Xi64, pext>, XS, REX_W, VEX; } let Predicates = [HasBMI2, HasEGPR] in { - defm PDEP32 : PdepPext<"pdep", Xi32, X86pdep, "_EVEX">, XD, EVEX; - defm PDEP64 : PdepPext<"pdep", Xi64, X86pdep, "_EVEX">, XD, REX_W, EVEX; - defm PEXT32 : PdepPext<"pext", Xi32, X86pext, "_EVEX">, XS, EVEX; - defm PEXT64 : PdepPext<"pext", Xi64, X86pext, "_EVEX">, XS, REX_W, EVEX; -} - -let Predicates = [HasBMI2, NoEGPR] in { - def : Pat<(i32 (pext GR32:$src, GR32:$mask)), - (PEXT32rr GR32:$src, GR32:$mask)>; - def : Pat<(i32 (pext GR32:$src, (loadi32 addr:$mask))), - (PEXT32rm GR32:$src, i32mem:$mask)>; - def : Pat<(i64 (pext GR64:$src, GR64:$mask)), - (PEXT64rr GR64:$src, GR64:$mask)>; - def : Pat<(i64 (pext GR64:$src, (loadi64 addr:$mask))), - (PEXT64rm GR64:$src, i64mem:$mask)>; - def : Pat<(i32 (pdep GR32:$src, GR32:$mask)), - (PDEP32rr GR32:$src, GR32:$mask)>; - def : Pat<(i32 (pdep GR32:$src, (loadi32 addr:$mask))), - (PDEP32rm GR32:$src, i32mem:$mask)>; - def : Pat<(i64 (pdep GR64:$src, GR64:$mask)), - (PDEP64rr GR64:$src, GR64:$mask)>; - def : Pat<(i64 (pdep GR64:$src, (loadi64 addr:$mask))), - (PDEP64rm GR64:$src, i64mem:$mask)>; -} - -let Predicates = [HasBMI2, HasEGPR] in { - def : Pat<(i32 (pext GR32:$src, GR32:$mask)), - (PEXT32rr_EVEX GR32:$src, GR32:$mask)>; - def : Pat<(i32 (pext GR32:$src, (loadi32 addr:$mask))), - (PEXT32rm_EVEX GR32:$src, i32mem:$mask)>; - def : Pat<(i64 (pext GR64:$src, GR64:$mask)), - (PEXT64rr_EVEX GR64:$src, GR64:$mask)>; - def : Pat<(i64 (pext GR64:$src, (loadi64 addr:$mask))), - (PEXT64rm_EVEX GR64:$src, i64mem:$mask)>; - def : Pat<(i32 (pdep GR32:$src, GR32:$mask)), - (PDEP32rr_EVEX GR32:$src, GR32:$mask)>; - def : Pat<(i32 (pdep GR32:$src, (loadi32 addr:$mask))), - (PDEP32rm_EVEX GR32:$src, i32mem:$mask)>; - def : Pat<(i64 (pdep GR64:$src, GR64:$mask)), - (PDEP64rr_EVEX GR64:$src, GR64:$mask)>; - def : Pat<(i64 (pdep GR64:$src, (loadi64 addr:$mask))), - (PDEP64rm_EVEX GR64:$src, i64mem:$mask)>; + defm PDEP32 : PdepPext<"pdep", Xi32, pdep, "_EVEX">, XD, EVEX; + defm PDEP64 : PdepPext<"pdep", Xi64, pdep, "_EVEX">, XD, REX_W, EVEX; + defm PEXT32 : PdepPext<"pext", Xi32, pext, "_EVEX">, XS, EVEX; + defm PEXT64 : PdepPext<"pext", Xi64, pext, "_EVEX">, XS, REX_W, EVEX; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 9e32ca23dafe2..a6b0db0230cf3 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1837,10 +1837,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0), X86_INTRINSIC_DATA(bmi_bzhi_32, INTR_TYPE_2OP, X86ISD::BZHI, 0), X86_INTRINSIC_DATA(bmi_bzhi_64, INTR_TYPE_2OP, X86ISD::BZHI, 0), - X86_INTRINSIC_DATA(bmi_pdep_32, INTR_TYPE_2OP, X86ISD::PDEP, 0), - X86_INTRINSIC_DATA(bmi_pdep_64, INTR_TYPE_2OP, X86ISD::PDEP, 0), - X86_INTRINSIC_DATA(bmi_pext_32, INTR_TYPE_2O... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/204144 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
