Author: Ramkumar Ramachandra
Date: 2026-05-30T21:42:51Z
New Revision: 223ef1f31274c5dc0f176789fa09e9e467fb49fe

URL: 
https://github.com/llvm/llvm-project/commit/223ef1f31274c5dc0f176789fa09e9e467fb49fe
DIFF: 
https://github.com/llvm/llvm-project/commit/223ef1f31274c5dc0f176789fa09e9e467fb49fe.diff

LOG: [IRBuilder] ConstFold unary intrinsics (#200496)

Extend TargetFolder and InstSimplifyFolder to fold unary intrinsics.
CreateUnaryIntrinsic now returns a Value, similar to
CreateBinaryIntrinsic, and this has necessitated more changes.

Added: 
    

Modified: 
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/test/CodeGen/builtins-nvptx-native-half-type.c
    clang/test/CodeGen/promoted-complex-div.c
    llvm/include/llvm/Analysis/ConstantFolding.h
    llvm/include/llvm/Analysis/InstSimplifyFolder.h
    llvm/include/llvm/Analysis/TargetFolder.h
    llvm/include/llvm/IR/ConstantFolder.h
    llvm/include/llvm/IR/IRBuilder.h
    llvm/include/llvm/IR/IRBuilderFolder.h
    llvm/include/llvm/IR/NoFolder.h
    llvm/lib/Analysis/ConstantFolding.cpp
    llvm/lib/IR/IRBuilder.cpp
    llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
    llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
    llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
    llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
    
llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll
    
llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll
    llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll
    llvm/test/Transforms/SLPVectorizer/SystemZ/reorder-same-node.ll
    llvm/test/Transforms/SLPVectorizer/X86/const-reduced-vals-resized.ll
    llvm/test/Transforms/SLPVectorizer/X86/constant-based-reductions.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_reordering_undefs.ll
    llvm/test/Transforms/SLPVectorizer/X86/ctpop-non-power-of-2-reduction.ll
    llvm/test/Transforms/SLPVectorizer/X86/identity-match-splat-less-defined.ll
    llvm/test/Transforms/SLPVectorizer/X86/minbw-multiused-from-gather.ll
    llvm/test/Transforms/SLPVectorizer/X86/multi-tracked-reduced-value.ll
    llvm/test/Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll
    llvm/test/Transforms/SLPVectorizer/X86/reduce-with-folded-to-consts.ll
    
llvm/test/Transforms/SLPVectorizer/X86/reduced-value-replace-extractelement.ll
    llvm/test/Transforms/SLPVectorizer/X86/reduced-value-vectorized-later.ll
    llvm/test/Transforms/SLPVectorizer/X86/reduction-value-in-tree.ll
    llvm/test/Transforms/SLPVectorizer/X86/reschedule-only-scheduled.ll
    
llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-replace-extractelement.ll
    
llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-vectorized-later.ll
    llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 9ceda41c69da5..04bd92bcc1c9f 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -757,8 +757,9 @@ static llvm::Value *emitModfBuiltin(CodeGenFunction &CGF, 
const CallExpr *E,
 
 /// EmitFAbs - Emit a call to @llvm.fabs().
 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
-  llvm::CallInst *Call = CGF.Builder.CreateFAbs(V);
-  Call->setDoesNotAccessMemory();
+  llvm::Value *Call = CGF.Builder.CreateFAbs(V);
+  if (auto *CallI = dyn_cast<llvm::CallInst>(Call))
+    CallI->setDoesNotAccessMemory();
   return Call;
 }
 

diff  --git a/clang/test/CodeGen/builtins-nvptx-native-half-type.c 
b/clang/test/CodeGen/builtins-nvptx-native-half-type.c
index 1f16c7e54b85d..6092054feb96f 100644
--- a/clang/test/CodeGen/builtins-nvptx-native-half-type.c
+++ b/clang/test/CodeGen/builtins-nvptx-native-half-type.c
@@ -174,16 +174,16 @@ __device__ void nvvm_min_max_sm86() {
 }
 
 // CHECK-LABEL: nvvm_fabs_f16
-__device__ void nvvm_fabs_f16() {
+__device__ void nvvm_fabs_f16(const __fp16 *p, const __fp16 *q) {
 #if __CUDA_ARCH__ >= 530
   // CHECK: call half @llvm.nvvm.fabs.f16
-  __nvvm_fabs_f16(0.1f16);
+  __nvvm_fabs_f16(*p);
   // CHECK: call half @llvm.nvvm.fabs.ftz.f16
-  __nvvm_fabs_ftz_f16(0.1f16);
+  __nvvm_fabs_ftz_f16(*p);
   // CHECK: call <2 x half> @llvm.nvvm.fabs.v2f16
-  __nvvm_fabs_f16x2({0.1f16, 0.7f16});
+  __nvvm_fabs_f16x2({*p, *q});
   // CHECK: call <2 x half> @llvm.nvvm.fabs.ftz.v2f16
-  __nvvm_fabs_ftz_f16x2({0.1f16, 0.7f16});
+  __nvvm_fabs_ftz_f16x2({*p, *q});
 #endif
   // CHECK: ret void
 }

diff  --git a/clang/test/CodeGen/promoted-complex-div.c 
b/clang/test/CodeGen/promoted-complex-div.c
index 006b5e334e6ea..e68c5f5ac85ad 100644
--- a/clang/test/CodeGen/promoted-complex-div.c
+++ b/clang/test/CodeGen/promoted-complex-div.c
@@ -89,7 +89,7 @@ _Complex double divf(_Complex double a, _Complex double b) {
 typedef double a;
 _Complex double *b;
 // CHECK-LABEL: define dso_local void @DivideByComplexZero
-void DivideByComplexZero() {
+void DivideByComplexZero(_Complex double p) {
   // CHECK: fpext double {{.*}} to x86_fp80
   // CHECK: fpext double {{.*}} to x86_fp80
   // CHECK: fmul x86_fp80
@@ -108,6 +108,9 @@ void DivideByComplexZero() {
   // NOX87-NEXT: fcmp ugt double {{.*}}, {{.*}}
   // NOX87-NEXT: br i1 {{.*}}, label
   // NOX87: abs_rhsr_greater_or_equal_abs_rhsi:
+  // NOX87-NEXT: fdiv double
+  // NOX87-NEXT: fmul double
+  // NOX87-NEXT: fadd double
   // NOX87-NEXT: fmul double
   // NOX87-NEXT: fadd double
   // NOX87-NEXT: fdiv double
@@ -116,6 +119,9 @@ void DivideByComplexZero() {
   // NOX87-NEXT: fdiv double
   // NOX87-NEXT: br label {{.*}}
   // NOX87: abs_rhsr_less_than_abs_rhsi:
+  // NOX87-NEXT: fdiv double
+  // NOX87-NEXT: fmul double
+  // NOX87-NEXT: fadd double
   // NOX87-NEXT: fmul double
   // NOX87-NEXT: fadd double
   // NOX87-NEXT: fdiv double
@@ -131,5 +137,5 @@ void DivideByComplexZero() {
   // NOX87-NEXT: store double
   // NOX87-NEXT: store double
 
-  *b /= 1.0iF * (a)0;
+  *b /= p * (a)0;
 }

diff  --git a/llvm/include/llvm/Analysis/ConstantFolding.h 
b/llvm/include/llvm/Analysis/ConstantFolding.h
index 8b7769a93df0a..2f7a327e1652a 100644
--- a/llvm/include/llvm/Analysis/ConstantFolding.h
+++ b/llvm/include/llvm/Analysis/ConstantFolding.h
@@ -169,6 +169,9 @@ LLVM_ABI Constant *ConstantFoldCall(const CallBase *Call, 
Function *F,
                                     const TargetLibraryInfo *TLI = nullptr,
                                     bool AllowNonDeterministic = true);
 
+LLVM_ABI Constant *ConstantFoldUnaryIntrinsic(Intrinsic::ID ID, Constant *Op,
+                                              Type *Ty);
+
 LLVM_ABI Constant *ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS,
                                                Constant *RHS, Type *Ty);
 

diff  --git a/llvm/include/llvm/Analysis/InstSimplifyFolder.h 
b/llvm/include/llvm/Analysis/InstSimplifyFolder.h
index b0b8dde3b2967..a8dff839de214 100644
--- a/llvm/include/llvm/Analysis/InstSimplifyFolder.h
+++ b/llvm/include/llvm/Analysis/InstSimplifyFolder.h
@@ -120,6 +120,12 @@ class LLVM_ABI InstSimplifyFolder final : public 
IRBuilderFolder {
     return simplifyCastInst(Op, V, DestTy, SQ);
   }
 
+  Value *
+  FoldUnaryIntrinsic(Intrinsic::ID ID, Value *Op, Type *Ty,
+                     FastMathFlags FMF = FastMathFlags()) const override {
+    return simplifyUnaryIntrinsic(ID, Op, FMF, SQ);
+  }
+
   Value *
   FoldBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Type *Ty,
                       FastMathFlags FMF = FastMathFlags()) const override {

diff  --git a/llvm/include/llvm/Analysis/TargetFolder.h 
b/llvm/include/llvm/Analysis/TargetFolder.h
index 596e3600f7306..0a9d9c3d88111 100644
--- a/llvm/include/llvm/Analysis/TargetFolder.h
+++ b/llvm/include/llvm/Analysis/TargetFolder.h
@@ -191,6 +191,13 @@ class LLVM_ABI TargetFolder final : public IRBuilderFolder 
{
     return nullptr;
   }
 
+  Value *FoldUnaryIntrinsic(Intrinsic::ID ID, Value *Op, Type *Ty,
+                            FastMathFlags FMF) const override {
+    if (auto *OpC = dyn_cast<Constant>(Op))
+      return ConstantFoldUnaryIntrinsic(ID, OpC, Ty);
+    return nullptr;
+  }
+
   Value *FoldBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Type 
*Ty,
                              FastMathFlags FMF) const override {
     auto *C1 = dyn_cast<Constant>(LHS);

diff  --git a/llvm/include/llvm/IR/ConstantFolder.h 
b/llvm/include/llvm/IR/ConstantFolder.h
index 384773027fb65..b2937c2339ca7 100644
--- a/llvm/include/llvm/IR/ConstantFolder.h
+++ b/llvm/include/llvm/IR/ConstantFolder.h
@@ -182,6 +182,12 @@ class LLVM_ABI ConstantFolder final : public 
IRBuilderFolder {
     return nullptr;
   }
 
+  Value *FoldUnaryIntrinsic(Intrinsic::ID ID, Value *Op, Type *Ty,
+                            FastMathFlags FMF) const override {
+    // Use TargetFolder or InstSimplifyFolder instead.
+    return nullptr;
+  }
+
   Value *FoldBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Type 
*Ty,
                              FastMathFlags FMF) const override {
     // Use TargetFolder or InstSimplifyFolder instead.

diff  --git a/llvm/include/llvm/IR/IRBuilder.h 
b/llvm/include/llvm/IR/IRBuilder.h
index a10a59a9ae9e7..7c3eb46cb1cbb 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -1008,9 +1008,9 @@ class IRBuilderBase {
 
   /// Create a call to intrinsic \p ID with 1 operand which is mangled on its
   /// type.
-  LLVM_ABI CallInst *CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V,
-                                          FMFSource FMFSource = {},
-                                          const Twine &Name = "");
+  LLVM_ABI Value *CreateUnaryIntrinsic(Intrinsic::ID ID, Value *Op,
+                                       FMFSource FMFSource = {},
+                                       const Twine &Name = "");
 
   /// Create a call to intrinsic \p ID with 2 operands which is mangled on the
   /// first type.
@@ -1045,8 +1045,8 @@ class IRBuilderBase {
   }
 
   /// Create call to the fabs intrinsic.
-  CallInst *CreateFAbs(Value *V, FMFSource FMFSource = {},
-                       const Twine &Name = "") {
+  Value *CreateFAbs(Value *V, FMFSource FMFSource = {},
+                    const Twine &Name = "") {
     return CreateUnaryIntrinsic(Intrinsic::fabs, V, FMFSource, Name);
   }
 

diff  --git a/llvm/include/llvm/IR/IRBuilderFolder.h 
b/llvm/include/llvm/IR/IRBuilderFolder.h
index 3cf3bcd3d9522..e68fff29b7165 100644
--- a/llvm/include/llvm/IR/IRBuilderFolder.h
+++ b/llvm/include/llvm/IR/IRBuilderFolder.h
@@ -76,6 +76,10 @@ class LLVM_ABI IRBuilderFolder {
   virtual Value *FoldCast(Instruction::CastOps Op, Value *V,
                           Type *DestTy) const = 0;
 
+  virtual Value *
+  FoldUnaryIntrinsic(Intrinsic::ID ID, Value *Op, Type *Ty,
+                     FastMathFlags FMF = FastMathFlags()) const = 0;
+
   virtual Value *
   FoldBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Type *Ty,
                       FastMathFlags FMF = FastMathFlags()) const = 0;

diff  --git a/llvm/include/llvm/IR/NoFolder.h b/llvm/include/llvm/IR/NoFolder.h
index a8dbbf5eeb1c2..a86cbf724e69f 100644
--- a/llvm/include/llvm/IR/NoFolder.h
+++ b/llvm/include/llvm/IR/NoFolder.h
@@ -114,6 +114,11 @@ class LLVM_ABI NoFolder final : public IRBuilderFolder {
     return nullptr;
   }
 
+  Value *FoldUnaryIntrinsic(Intrinsic::ID ID, Value *Op, Type *Ty,
+                            FastMathFlags FMF) const override {
+    return nullptr;
+  }
+
   Value *FoldBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Type 
*Ty,
                              FastMathFlags FMF) const override {
     return nullptr;

diff  --git a/llvm/lib/Analysis/ConstantFolding.cpp 
b/llvm/lib/Analysis/ConstantFolding.cpp
index 7846e66896d5e..959b46f8eff46 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -4691,6 +4691,11 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID 
IntrinsicID,
 
 } // end anonymous namespace
 
+Constant *llvm::ConstantFoldUnaryIntrinsic(Intrinsic::ID ID, Constant *Op,
+                                           Type *Ty) {
+  return ConstantFoldScalarCall1("", ID, Ty, Op, nullptr, nullptr);
+}
+
 Constant *llvm::ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS,
                                             Constant *RHS, Type *Ty) {
   return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS}, nullptr);

diff  --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 7cdcb60a78aa3..a52c3db9dad97 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -909,12 +909,15 @@ CallInst *IRBuilderBase::CreateGCGetPointerOffset(Value 
*DerivedPtr,
                          {DerivedPtr}, {}, Name);
 }
 
-CallInst *IRBuilderBase::CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V,
-                                              FMFSource FMFSource,
-                                              const Twine &Name) {
+Value *IRBuilderBase::CreateUnaryIntrinsic(Intrinsic::ID ID, Value *Op,
+                                           FMFSource FMFSource,
+                                           const Twine &Name) {
   Module *M = BB->getModule();
-  Function *Fn = Intrinsic::getOrInsertDeclaration(M, ID, {V->getType()});
-  return createCallHelper(Fn, {V}, Name, FMFSource);
+  Function *Fn = Intrinsic::getOrInsertDeclaration(M, ID, Op->getType());
+  if (Value *V = Folder.FoldUnaryIntrinsic(ID, Op, Fn->getReturnType(),
+                                           FMFSource.get(FMF)))
+    return V;
+  return createCallHelper(Fn, Op, Name, FMFSource);
 }
 
 Value *IRBuilderBase::CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS,

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index ffc4b484de072..1bcfc1da3b84e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -1113,8 +1113,10 @@ Value *AMDGPUCodeGenPrepareImpl::expandDivRem24Impl(
   Value *FQM = Builder.CreateFMul(FA, RCP);
 
   // fq = trunc(fqm);
-  CallInst *FQ = Builder.CreateUnaryIntrinsic(Intrinsic::trunc, FQM);
-  FQ->copyFastMathFlags(Builder.getFastMathFlags());
+  Value *FQ = Builder.CreateUnaryIntrinsic(Intrinsic::trunc, FQM);
+  auto *FQI = dyn_cast<Instruction>(FQ);
+  if (FQI)
+    FQI->copyFastMathFlags(Builder.getFastMathFlags());
 
   // float fqneg = -fq;
   Value *FQNeg = Builder.CreateFNeg(FQ);
@@ -1123,18 +1125,18 @@ Value *AMDGPUCodeGenPrepareImpl::expandDivRem24Impl(
   auto FMAD = !ST.hasMadMacF32Insts()
                   ? Intrinsic::fma
                   : (Intrinsic::ID)Intrinsic::amdgcn_fmad_ftz;
-  Value *FR = Builder.CreateIntrinsic(FMAD,
-                                      {FQNeg->getType()}, {FQNeg, FB, FA}, FQ);
+  Value *FR =
+      Builder.CreateIntrinsic(FMAD, {FQNeg->getType()}, {FQNeg, FB, FA}, FQI);
 
   // int iq = (int)fq;
   Value *IQ = IsSigned ? Builder.CreateFPToSI(FQ, I32Ty)
                        : Builder.CreateFPToUI(FQ, I32Ty);
 
   // fr = fabs(fr);
-  FR = Builder.CreateFAbs(FR, FQ);
+  FR = Builder.CreateFAbs(FR, FQI);
 
   // fb = fabs(fb);
-  FB = Builder.CreateFAbs(FB, FQ);
+  FB = Builder.CreateFAbs(FB, FQI);
 
   // int cv = fr >= fb;
   Value *CV = Builder.CreateFCmpOGE(FR, FB);

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index dc4225b94b466..1e92028b0fff3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1176,14 +1176,15 @@ bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, 
IRBuilder<> &B,
     // rootn(x, 2) = sqrt(x)
     LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> sqrt(" << *opr0 << 
")\n");
 
-    CallInst *NewCall = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
+    Value *NewCall = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
     NewCall->takeName(CI);
 
     // OpenCL rootn has a looser ulp of 2 requirement than sqrt, so add some
     // metadata.
     MDBuilder MDHelper(M->getContext());
     MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->getFPAccuracy(), 
2.0f));
-    NewCall->setMetadata(LLVMContext::MD_fpmath, FPMD);
+    if (auto *NewCallI = dyn_cast<Instruction>(NewCall))
+      NewCallI->setMetadata(LLVMContext::MD_fpmath, FPMD);
 
     replaceCall(CI, NewCall);
     return true;
@@ -1222,10 +1223,11 @@ bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, 
IRBuilder<> &B,
     FastMathFlags FMF = FPOp->getFastMathFlags();
     FMF.setAllowContract(true);
 
-    CallInst *Sqrt = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
+    Value *Sqrt = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
     Instruction *RSqrt = cast<Instruction>(
         B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0), Sqrt));
-    Sqrt->setFastMathFlags(FMF);
+    if (auto *SqrtI = dyn_cast<Instruction>(Sqrt))
+      SqrtI->setFastMathFlags(FMF);
     RSqrt->setFastMathFlags(FMF);
     RSqrt->setMetadata(LLVMContext::MD_fpmath, FPMD);
 

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index dd1444197cf5d..aeb40939cd10e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3138,7 +3138,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst 
&CI) {
     Value *X;
     // fabs (-X) --> fabs (X)
     if (match(Arg, m_FNeg(m_Value(X)))) {
-      CallInst *Fabs = Builder.CreateFAbs(X, II);
+      Value *Fabs = Builder.CreateFAbs(X, II);
       return replaceInstUsesWith(CI, Fabs);
     }
 
@@ -3168,7 +3168,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst 
&CI) {
     if (match(II->getArgOperand(0),
               m_CopySign(m_Value(Magnitude), m_Value(Sign)))) {
       // fabs (copysign x, y) -> (fabs x)
-      CallInst *AbsSign = Builder.CreateFAbs(Magnitude, II);
+      Value *AbsSign = Builder.CreateFAbs(Magnitude, II);
       return replaceInstUsesWith(*II, AbsSign);
     }
 

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 97e1a6555eac4..7b6d380acffe1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5896,7 +5896,7 @@ static Instruction *foldICmpPow2Test(ICmpInst &I,
 
   if (A) {
     Type *Ty = A->getType();
-    CallInst *CtPop = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, A);
+    Value *CtPop = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, A);
     return CheckIs ? new ICmpInst(ICmpInst::ICMP_ULT, CtPop,
                                   ConstantInt::get(Ty, 2))
                    : new ICmpInst(ICmpInst::ICMP_UGT, CtPop,

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 021a850860b34..17ac09c10f41c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1082,10 +1082,10 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator 
&I) {
       match(&I,
             m_c_FMul(m_OneUse(m_Intrinsic<Intrinsic::tan>(m_Value(X))),
                      m_OneUse(m_Intrinsic<Intrinsic::cos>(m_Deferred(X)))))) {
-    auto *Sin = Builder.CreateUnaryIntrinsic(Intrinsic::sin, X, &I);
-    if (auto *Metadata = I.getMetadata(LLVMContext::MD_fpmath)) {
-      Sin->setMetadata(LLVMContext::MD_fpmath, Metadata);
-    }
+    Value *Sin = Builder.CreateUnaryIntrinsic(Intrinsic::sin, X, &I);
+    if (auto *Metadata = I.getMetadata(LLVMContext::MD_fpmath))
+      if (auto *SinI = dyn_cast<Instruction>(Sin))
+        SinI->setMetadata(LLVMContext::MD_fpmath, Metadata);
     return replaceInstUsesWith(I, Sin);
   }
 

diff  --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp 
b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 7feab4baa3de0..1cbc8d0bef5ae 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1937,7 +1937,7 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, 
IRBuilderBase &B,
 // Replace a libcall \p CI with a call to intrinsic \p IID
 static Value *replaceUnaryCall(CallInst *CI, IRBuilderBase &B,
                                Intrinsic::ID IID) {
-  CallInst *NewCall = B.CreateUnaryIntrinsic(IID, CI->getArgOperand(0), CI);
+  Value *NewCall = B.CreateUnaryIntrinsic(IID, CI->getArgOperand(0), CI);
   NewCall->takeName(CI);
   return copyFlags(*CI, NewCall);
 }
@@ -2640,9 +2640,12 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, 
IRBuilderBase &B) {
           Known.isKnownNeverLogicalZero(F->getDenormalMode(FltSem));
     }
     if (IsKnownNoErrno) {
-      auto *NewLog = B.CreateUnaryIntrinsic(LogID, Log->getArgOperand(0), Log);
-      NewLog->copyMetadata(*Log);
-      return copyFlags(*Log, NewLog);
+      Value *NewLog = B.CreateUnaryIntrinsic(LogID, Log->getArgOperand(0), 
Log);
+      if (auto *I = dyn_cast<Instruction>(NewLog)) {
+        I->copyMetadata(*Log);
+        return copyFlags(*Log, I);
+      }
+      return NewLog;
     }
   } else if (LogID == Intrinsic::log || LogID == Intrinsic::log2 ||
              LogID == Intrinsic::log10) {

diff  --git 
a/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll
 
b/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll
index 54c31285c194e..bdecbd0aa1053 100644
--- 
a/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll
+++ 
b/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll
@@ -4,8 +4,7 @@
 define i1 @degenerate() {
 ; CHECK-LABEL: define i1 @degenerate() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[OR_COND30:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x 
i1> zeroinitializer)
-; CHECK-NEXT:    ret i1 [[OR_COND30]]
+; CHECK-NEXT:    ret i1 false
 ;
 entry:
   %0 = extractelement <4 x fp128> zeroinitializer, i32 0

diff  --git 
a/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll
 
b/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll
index c89a82dd35963..623a69ef9170a 100644
--- 
a/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll
+++ 
b/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll
@@ -6,9 +6,7 @@ define i32 @test(ptr %0, ptr %1) {
 ; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[LOAD_5:%.*]] = load i32, ptr [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> 
splat (i8 1))
-; CHECK-NEXT:    [[TMP3:%.*]] = sext i8 [[TMP2]] to i32
-; CHECK-NEXT:    [[OP_RDX:%.*]] = and i32 [[TMP3]], [[LOAD_5]]
+; CHECK-NEXT:    [[OP_RDX:%.*]] = and i32 1, [[LOAD_5]]
 ; CHECK-NEXT:    ret i32 [[OP_RDX]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll 
b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll
index d71e4f8f5906e..bc05cc40bd549 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll
@@ -364,11 +364,10 @@ define void @reduce_or_2() {
 ; ZVFHDEFAULT-NEXT:    ret void
 ;
 ; ZVFH256-LABEL: @reduce_or_2(
-; ZVFH256-NEXT:    [[TMP1:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x 
i1> zeroinitializer)
-; ZVFH256-NEXT:    br i1 [[TMP1]], label [[TMP3:%.*]], label [[TMP2:%.*]]
-; ZVFH256:       2:
+; ZVFH256-NEXT:    br i1 false, label [[TMP2:%.*]], label [[TMP1:%.*]]
+; ZVFH256:       1:
 ; ZVFH256-NEXT:    ret void
-; ZVFH256:       3:
+; ZVFH256:       2:
 ; ZVFH256-NEXT:    ret void
 ;
 ; ZVFH512-LABEL: @reduce_or_2(

diff  --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/reorder-same-node.ll 
b/llvm/test/Transforms/SLPVectorizer/SystemZ/reorder-same-node.ll
index b4c8a1c587694..f4ed85d74aa1b 100644
--- a/llvm/test/Transforms/SLPVectorizer/SystemZ/reorder-same-node.ll
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/reorder-same-node.ll
@@ -4,8 +4,7 @@
 define void @test() {
 ; CHECK-LABEL: define void @test(
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vector.reduce.xor.v8i64(<8 x 
i64> zeroinitializer)
-; CHECK-NEXT:    store i64 [[TMP1]], ptr null, align 8
+; CHECK-NEXT:    store i64 0, ptr null, align 8
 ; CHECK-NEXT:    ret void
 ;
   %1 = zext i8 0 to i32

diff  --git 
a/llvm/test/Transforms/SLPVectorizer/X86/const-reduced-vals-resized.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/const-reduced-vals-resized.ll
index dc8c1c420bf80..fddc0ce41f38e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/const-reduced-vals-resized.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/const-reduced-vals-resized.ll
@@ -4,8 +4,7 @@
 define i32 @test() {
 ; CHECK-LABEL: define i32 @test() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x 
i32> zeroinitializer)
-; CHECK-NEXT:    ret i32 [[TMP0]]
+; CHECK-NEXT:    ret i32 0
 ;
 entry:
   %cond = zext i1 false to i32

diff  --git 
a/llvm/test/Transforms/SLPVectorizer/X86/constant-based-reductions.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/constant-based-reductions.ll
index 405e44dfabf9b..02aa48ff177a1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/constant-based-reductions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/constant-based-reductions.ll
@@ -7,9 +7,7 @@ define void @test() {
 ; CHECK-NEXT:  [[BB:.*]]:
 ; CHECK-NEXT:    br label %[[BB8:.*]]
 ; CHECK:       [[BB8]]:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[OP_RDX:%.*]], %[[BB8]] ], [ 0, 
%[[BB]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x 
i32> zeroinitializer)
-; CHECK-NEXT:    [[OP_RDX]] = add i32 0, [[TMP0]]
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, %[[BB8]] ], [ 0, %[[BB]] ]
 ; CHECK-NEXT:    br label %[[BB8]]
 ;
 bb:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_reordering_undefs.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/crash_reordering_undefs.ll
index fded7a4f3f0c7..fac532fe1a142 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_reordering_undefs.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_reordering_undefs.ll
@@ -4,9 +4,7 @@
 define i32 @crash_reordering_undefs() {
 ; CHECK-LABEL: @crash_reordering_undefs(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ADD0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x 
i32> splat (i32 65537))
-; CHECK-NEXT:    [[OP_RDX:%.*]] = add i32 undef, [[ADD0]]
-; CHECK-NEXT:    ret i32 [[OP_RDX]]
+; CHECK-NEXT:    ret i32 undef
 ;
 entry:
   %or0 = or i64 undef, undef

diff  --git 
a/llvm/test/Transforms/SLPVectorizer/X86/ctpop-non-power-of-2-reduction.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/ctpop-non-power-of-2-reduction.ll
index cabb241226466..17377a5a424da 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/ctpop-non-power-of-2-reduction.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/ctpop-non-power-of-2-reduction.ll
@@ -4,9 +4,7 @@
 define i8 @test() {
 ; CHECK-LABEL: define i8 @test() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[TMP0:%.*]] = call i12 @llvm.ctpop.i12(i12 256)
-; CHECK-NEXT:    [[OP_RDX:%.*]] = trunc i12 [[TMP0]] to i8
-; CHECK-NEXT:    ret i8 [[OP_RDX]]
+; CHECK-NEXT:    ret i8 1
 ;
 entry:
   %inc.1.i.i = zext i1 false to i8

diff  --git 
a/llvm/test/Transforms/SLPVectorizer/X86/identity-match-splat-less-defined.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/identity-match-splat-less-defined.ll
index 4a0a5c56960b3..68c0a7df33830 100644
--- 
a/llvm/test/Transforms/SLPVectorizer/X86/identity-match-splat-less-defined.ll
+++ 
b/llvm/test/Transforms/SLPVectorizer/X86/identity-match-splat-less-defined.ll
@@ -23,8 +23,7 @@ define i32 @test() {
 ; CHECK-NEXT:    [[TMP35:%.*]] = shufflevector <4 x i8> [[RDX_OP2]], <4 x i8> 
poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP36:%.*]] = shufflevector <32 x i8> [[TMP33]], <32 x i8> 
[[TMP35]], <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 4, i32 5, i32 6, i32 
7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 
17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 
27, i32 28, i32 29, i32 30, i32 31>
 ; CHECK-NEXT:    [[TMP37:%.*]] = call i8 @llvm.vector.reduce.xor.v32i8(<32 x 
i8> [[TMP36]])
-; CHECK-NEXT:    [[TMP38:%.*]] = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> 
splat (i8 1))
-; CHECK-NEXT:    [[TMP23:%.*]] = xor i8 [[TMP38]], [[TMP37]]
+; CHECK-NEXT:    [[TMP23:%.*]] = xor i8 0, [[TMP37]]
 ; CHECK-NEXT:    [[TMP24:%.*]] = sext i8 [[TMP23]] to i32
 ; CHECK-NEXT:    ret i32 [[TMP24]]
 ;

diff  --git 
a/llvm/test/Transforms/SLPVectorizer/X86/minbw-multiused-from-gather.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/minbw-multiused-from-gather.ll
index 8e4b280271051..81200132a2dc9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/minbw-multiused-from-gather.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minbw-multiused-from-gather.ll
@@ -24,8 +24,7 @@ define i1 @test() {
 ; CHECK-NEXT:    [[CMP3_I_25_I355_I_I:%.*]] = icmp ugt i32 
[[CONV85_25_I354_I_I]], 0
 ; CHECK-NEXT:    [[SHL_I111_25_I356_I_I:%.*]] = select i1 
[[CMP3_I_25_I355_I_I]], i32 0, i32 0
 ; CHECK-NEXT:    [[C25_I357_I_I:%.*]] = shl i32 [[CONV85_25_I354_I_I]], 
[[SHL_I111_25_I356_I_I]]
-; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x 
i32> zeroinitializer)
-; CHECK-NEXT:    [[OP_RDX:%.*]] = and i32 [[TMP4]], [[C22_I336_I_I]]
+; CHECK-NEXT:    [[OP_RDX:%.*]] = and i32 0, [[C22_I336_I_I]]
 ; CHECK-NEXT:    [[OP_RDX1:%.*]] = and i32 [[C23_I343_I_I]], [[C24_I350_I_I]]
 ; CHECK-NEXT:    [[OP_RDX2:%.*]] = and i32 [[OP_RDX]], [[OP_RDX1]]
 ; CHECK-NEXT:    [[OP_RDX3:%.*]] = and i32 [[OP_RDX2]], [[C25_I357_I_I]]

diff  --git 
a/llvm/test/Transforms/SLPVectorizer/X86/multi-tracked-reduced-value.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/multi-tracked-reduced-value.ll
index d7c63457bf5c1..9a8cd736836de 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi-tracked-reduced-value.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-tracked-reduced-value.ll
@@ -8,12 +8,11 @@ define i8 @test() {
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 0 to i8
 ; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 0 to i8
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 0 to i8
-; CHECK-NEXT:    [[TMP4:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> 
zeroinitializer)
 ; CHECK-NEXT:    [[OP_RDX1:%.*]] = or i8 [[TMP0]], [[TMP2]]
 ; CHECK-NEXT:    [[OP_RDX2:%.*]] = or i8 [[OP_RDX1]], [[TMP0]]
 ; CHECK-NEXT:    [[OP_RDX3:%.*]] = or i8 [[OP_RDX2]], [[TMP1]]
 ; CHECK-NEXT:    [[OP_RDX5:%.*]] = or i8 [[OP_RDX3]], [[TMP3]]
-; CHECK-NEXT:    [[OP_RDX4:%.*]] = or i8 [[OP_RDX5]], [[TMP4]]
+; CHECK-NEXT:    [[OP_RDX4:%.*]] = or i8 [[OP_RDX5]], 0
 ; CHECK-NEXT:    ret i8 [[OP_RDX4]]
 ;
 entry:

diff  --git 
a/llvm/test/Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll
index ddcd038c2b220..99016bb3b3cc5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll
@@ -9,12 +9,10 @@ define i1 @foo() {
 ; CHECK-NEXT:    [[TOBOOL_NOT_NOT509_I_2329_I_I1:%.*]] = icmp ne i32 0, 0
 ; CHECK-NEXT:    [[STOREMERGE_2333_I_I:%.*]] = select i1 
[[TOBOOL_NOT_NOT509_I_2329_I_I1]], i32 0, i32 0
 ; CHECK-NEXT:    [[TOBOOL_NOT_NOT509_I_1_2_I_I:%.*]] = icmp ne i32 
[[STOREMERGE_2333_I_I]], 0
-; CHECK-NEXT:    [[TMP0:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> 
zeroinitializer)
-; CHECK-NEXT:    [[OP_RDX4:%.*]] = select i1 [[TMP0]], i1 
[[TOBOOL_NOT_NOT509_I_1_2_I_I]], i1 false
+; CHECK-NEXT:    [[OP_RDX4:%.*]] = select i1 false, i1 
[[TOBOOL_NOT_NOT509_I_1_2_I_I]], i1 false
 ; CHECK-NEXT:    [[OP_RDX1:%.*]] = select i1 
[[TOBOOL_NOT_NOT509_I_2329_I_I1]], i1 [[TOBOOL_NOT_NOT509_I_1_1_I_I]], i1 false
-; CHECK-NEXT:    [[OP_RDX2:%.*]] = select i1 [[TOBOOL_NOT_NOT509_I_2329_I_I]], 
i1 false, i1 false
 ; CHECK-NEXT:    [[OP_RDX3:%.*]] = select i1 [[OP_RDX4]], i1 [[OP_RDX1]], i1 
false
-; CHECK-NEXT:    [[OP_RDX:%.*]] = select i1 [[OP_RDX3]], i1 [[OP_RDX2]], i1 
false
+; CHECK-NEXT:    [[OP_RDX:%.*]] = select i1 [[OP_RDX3]], i1 
[[TOBOOL_NOT_NOT509_I_2329_I_I]], i1 false
 ; CHECK-NEXT:    ret i1 [[OP_RDX]]
 ;
 entry:

diff  --git 
a/llvm/test/Transforms/SLPVectorizer/X86/reduce-with-folded-to-consts.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/reduce-with-folded-to-consts.ll
index 6ab010919ffe8..91a9e4368a52b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduce-with-folded-to-consts.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduce-with-folded-to-consts.ll
@@ -5,9 +5,7 @@ define void @test() {
 ; CHECK-LABEL: define void @test
 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x 
i32> zeroinitializer)
-; CHECK-NEXT:    [[OP_RDX2:%.*]] = add i32 0, [[TMP0]]
-; CHECK-NEXT:    store i32 [[OP_RDX2]], ptr null, align 4
+; CHECK-NEXT:    store i32 0, ptr null, align 4
 ; CHECK-NEXT:    ret void
 ;
 bb:

diff  --git 
a/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-replace-extractelement.ll
 
b/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-replace-extractelement.ll
index 5cbf78435233b..b4e5d0968346d 100644
--- 
a/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-replace-extractelement.ll
+++ 
b/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-replace-extractelement.ll
@@ -9,9 +9,7 @@ define void @test() {
 ; CHECK:       [[BB1]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ 
[[TMP4:%.*]], %[[BB1]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> 
zeroinitializer)
-; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
-; CHECK-NEXT:    [[OP_RDX:%.*]] = mul i32 [[TMP3]], [[TMP1]]
+; CHECK-NEXT:    [[OP_RDX:%.*]] = mul i32 0, [[TMP1]]
 ; CHECK-NEXT:    [[OP_RDX1:%.*]] = mul i32 [[OP_RDX]], [[TRUNC]]
 ; CHECK-NEXT:    [[TMP4]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 
[[OP_RDX1]], i32 1
 ; CHECK-NEXT:    br label %[[BB1]]

diff  --git 
a/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-vectorized-later.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-vectorized-later.ll
index e35491823cc55..417678c9949e7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-vectorized-later.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-vectorized-later.ll
@@ -4,8 +4,7 @@
 define i16 @test() {
 ; CHECK-LABEL: define i16 @test() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[OP_RDX:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x 
i16> zeroinitializer)
-; CHECK-NEXT:    [[OP_RDX1:%.*]] = or i16 [[OP_RDX]], 0
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = or i16 0, 0
 ; CHECK-NEXT:    ret i16 [[OP_RDX1]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-value-in-tree.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/reduction-value-in-tree.ll
index 6d6dd502415e5..f83ad05f08b2e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-value-in-tree.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-value-in-tree.ll
@@ -5,9 +5,7 @@ define void @test() {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    br i1 false, label [[PH:%.*]], label [[EXIT:%.*]]
 ; CHECK:       ph:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> 
zeroinitializer)
-; CHECK-NEXT:    [[OP_RDX:%.*]] = and i8 0, [[TMP0]]
-; CHECK-NEXT:    [[OP_RDX1:%.*]] = and i8 [[OP_RDX]], 0
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = and i8 0, 0
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[PHI:%.*]] = phi i8 [ [[OP_RDX1]], [[PH]] ], [ 0, [[BB:%.*]] 
]

diff  --git 
a/llvm/test/Transforms/SLPVectorizer/X86/reschedule-only-scheduled.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/reschedule-only-scheduled.ll
index 0925a7713f392..ce308692fbb2e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reschedule-only-scheduled.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reschedule-only-scheduled.ll
@@ -21,9 +21,7 @@ define i16 @test() {
 ; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <28 x i1> [[RDX_OP]] to i28
 ; CHECK-NEXT:    [[TMP12:%.*]] = call i28 @llvm.ctpop.i28(i28 [[TMP11]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = trunc i28 [[TMP12]] to i16
-; CHECK-NEXT:    [[TMP14:%.*]] = call i4 @llvm.ctpop.i4(i4 -8)
-; CHECK-NEXT:    [[TMP15:%.*]] = zext i4 [[TMP14]] to i16
-; CHECK-NEXT:    [[OP_RDX4:%.*]] = add i16 [[TMP15]], [[TMP13]]
+; CHECK-NEXT:    [[OP_RDX4:%.*]] = add i16 1, [[TMP13]]
 ; CHECK-NEXT:    ret i16 [[OP_RDX4]]
 ;
 entry:

diff  --git 
a/llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-replace-extractelement.ll
 
b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-replace-extractelement.ll
index bdd5971c0c91a..9c86e81f0df5b 100644
--- 
a/llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-replace-extractelement.ll
+++ 
b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-replace-extractelement.ll
@@ -9,9 +9,7 @@ define void @test() {
 ; CHECK:       [[BB1]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ 
[[TMP4:%.*]], %[[BB1]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> 
zeroinitializer)
-; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
-; CHECK-NEXT:    [[OP_RDX:%.*]] = mul i32 [[TMP3]], [[TMP1]]
+; CHECK-NEXT:    [[OP_RDX:%.*]] = mul i32 0, [[TMP1]]
 ; CHECK-NEXT:    [[OP_RDX1:%.*]] = mul i32 [[OP_RDX]], [[TRUNC]]
 ; CHECK-NEXT:    [[TMP4]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 
[[OP_RDX1]], i32 1
 ; CHECK-NEXT:    br label %[[BB1]]

diff  --git 
a/llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-vectorized-later.ll
 
b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-vectorized-later.ll
index 57b46274f4a13..4305ad171c4b9 100644
--- 
a/llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-vectorized-later.ll
+++ 
b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-vectorized-later.ll
@@ -4,16 +4,7 @@
 define <4 x i16> @test() {
 ; CHECK-LABEL: define <4 x i16> @test() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[TMP37:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x 
i16> zeroinitializer)
-; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i16> poison, i16 
[[TMP37]], i64 0
-; CHECK-NEXT:    [[TMP40:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x 
i16> zeroinitializer)
-; CHECK-NEXT:    [[TMP41:%.*]] = insertelement <4 x i16> [[TMP38]], i16 
[[TMP40]], i64 1
-; CHECK-NEXT:    [[TMP43:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x 
i16> zeroinitializer)
-; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i16> [[TMP41]], i16 
[[TMP43]], i64 2
-; CHECK-NEXT:    [[TMP46:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x 
i16> zeroinitializer)
-; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i16> [[TMP44]], i16 
[[TMP46]], i64 3
-; CHECK-NEXT:    [[OP_RDX9:%.*]] = or <4 x i16> [[TMP47]], zeroinitializer
-; CHECK-NEXT:    ret <4 x i16> [[OP_RDX9]]
+; CHECK-NEXT:    ret <4 x i16> zeroinitializer
 ;
 entry:
   %subi = add <4 x i16> zeroinitializer, zeroinitializer

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll
index 4c295355617e4..8eec4142f2a2f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll
@@ -4,8 +4,7 @@
 define i16 @test() {
 ; CHECK-LABEL: define i16 @test() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[TMP9:%.*]] = call i16 @llvm.vector.reduce.and.v4i16(<4 x 
i16> zeroinitializer)
-; CHECK-NEXT:    ret i16 [[TMP9]]
+; CHECK-NEXT:    ret i16 0
 ;
 entry:
   %conv73 = xor i64 0, 0


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to