https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/176160
>From 1acc5a45f5e86b3f7e1b484ef37cf0619080082e Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell <[email protected]> Date: Wed, 14 Jan 2026 17:53:39 +0000 Subject: [PATCH 1/3] [Clang] Add `__builtin_reduce_addf` for ordered/unordered fp reductions This adds `__builtin_reduce_addf` to expose the `llvm.vector.reduce.fadd.*` intrinsic directly in Clang, for the full range of supported FP types. Given a floating-point vector `vec` and a scalar floating-point value `acc`: - `__builtin_reduce_addf(vec)` corresponds to an unordered/fast reduction * i.e, the lanes can be summed in any order - `__builtin_reduce_addf(vec, acc)` corresponds to an ordered redunction * i.e, the result is as-if an accumulator was initialized with `acc` and each lane was added to it in-order, starting from lane 0 The `acc` is only used for ordered reductions as the original motivation for adding the "start_value/acc" in the intrinsic was to distinguish between ordered/unordered reductions, see: https://reviews.llvm.org/D30086. --- clang/docs/LanguageExtensions.rst | 4 ++ clang/include/clang/Basic/Builtins.td | 6 +++ clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 1 + clang/lib/CodeGen/CGBuiltin.cpp | 22 ++++++++ clang/lib/Sema/SemaChecking.cpp | 53 +++++++++++++++++--- clang/test/CodeGen/builtins-reduction-math.c | 23 +++++++++ clang/test/Sema/builtins-reduction-math.c | 17 +++++++ 7 files changed, 119 insertions(+), 7 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 0adfaebf24581..332c1cebfb47f 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -946,6 +946,10 @@ Let ``VT`` be a vector type and ``ET`` the element type of ``VT``. semantics, see `LangRef <http://llvm.org/docs/LangRef.html#i-fminmax-family>`_ for the comparison. + ET __builtin_reduce_addf(VT a) unordered floating-point add reduction. floating point types + ET __builtin_reduce_addf(VT a, ET s) ordered floating-point add reduction, initializing the accumulator floating point types + with `(ET)s`, then adding each lane of the `a` in-order, starting from + lane 0. ======================================= ====================================================================== ================================== *Masked Builtins* diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index bc8f1474493b0..cf4869b3dbd89 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1658,6 +1658,12 @@ def ReduceAdd : Builtin { let Prototype = "void(...)"; } +def ReduceAddf : Builtin { + let Spellings = ["__builtin_reduce_addf"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + def ReduceMul : Builtin { let Spellings = ["__builtin_reduce_mul"]; let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr]; diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index 0e5a5b531df78..f136ba6ed9139 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -1258,6 +1258,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, case Builtin::BI__builtin_reduce_xor: case Builtin::BI__builtin_reduce_or: case Builtin::BI__builtin_reduce_and: + case Builtin::BI__builtin_reduce_addf: case Builtin::BI__builtin_reduce_maximum: case Builtin::BI__builtin_reduce_minimum: case Builtin::BI__builtin_matrix_transpose: diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 339d6cff0a386..3ba3e46fd820a 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4197,6 +4197,28 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_reduce_minimum: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, Intrinsic::vector_reduce_fminimum, "rdx.minimum")); + case Builtin::BI__builtin_reduce_addf: { + llvm::Value *Vector = EmitScalarExpr(E->getArg(0)); + llvm::Type *ScalarTy = Vector->getType()->getScalarType(); + llvm::Value *StartValue = nullptr; + if (E->getNumArgs() == 2) + StartValue = Builder.CreateFPCast(EmitScalarExpr(E->getArg(1)), ScalarTy); + llvm::Value *Args[] = {/*start_value=*/StartValue + ? StartValue + : llvm::ConstantFP::get(ScalarTy, -0.0F), + /*vector=*/Vector}; + llvm::Function *F = + CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Vector->getType()); + llvm::CallBase *Reduce = Builder.CreateCall(F, Args, "rdx.addf"); + if (!StartValue) { + // No start value means an unordered reduction, which requires the reassoc + // FMF flag. + llvm::FastMathFlags FMF; + FMF.setAllowReassoc(); + cast<llvm::CallBase>(Reduce)->setFastMathFlags(FMF); + } + return RValue::get(Reduce); + } case Builtin::BI__builtin_matrix_transpose: { auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>(); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index e2e1b37572364..38aeac9cc2a93 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2761,6 +2761,14 @@ static ExprResult BuiltinVectorMathConversions(Sema &S, Expr *E) { return S.UsualUnaryFPConversions(Res.get()); } +static QualType GetVectorElementType(ASTContext &Context, QualType VecTy) { + if (const auto *TyA = VecTy->getAs<VectorType>()) + return TyA->getElementType(); + if (VecTy->isSizelessVectorType()) + return VecTy->getSizelessVectorEltType(Context); + return QualType(); +} + ExprResult Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, CallExpr *TheCall) { @@ -3609,14 +3617,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, return ExprError(); const Expr *Arg = TheCall->getArg(0); - const auto *TyA = Arg->getType()->getAs<VectorType>(); - - QualType ElTy; - if (TyA) - ElTy = TyA->getElementType(); - else if (Arg->getType()->isSizelessVectorType()) - ElTy = Arg->getType()->getSizelessVectorEltType(Context); + QualType ElTy = GetVectorElementType(Context, Arg->getType()); if (ElTy.isNull() || !ElTy->isIntegerType()) { Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type) << 1 << /* vector of */ 4 << /* int */ 1 << /* no fp */ 0 @@ -3628,6 +3630,43 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, break; } + case Builtin::BI__builtin_reduce_addf: { + if (checkArgCountRange(TheCall, 1, 2)) + return ExprError(); + + ExprResult Vec = UsualUnaryConversions(TheCall->getArg(0)); + if (Vec.isInvalid()) + return ExprError(); + + TheCall->setArg(0, Vec.get()); + + QualType ElTy = GetVectorElementType(Context, Vec.get()->getType()); + if (ElTy.isNull() || !ElTy->isRealFloatingType()) { + Diag(Vec.get()->getBeginLoc(), diag::err_builtin_invalid_arg_type) + << 1 << /* vector of */ 4 << /* no int */ 0 << /* fp */ 1 + << Vec.get()->getType(); + return ExprError(); + } + + if (TheCall->getNumArgs() == 2) { + ExprResult StartValue = UsualUnaryConversions(TheCall->getArg(1)); + if (StartValue.isInvalid()) + return ExprError(); + + if (!StartValue.get()->getType()->isRealFloatingType()) { + Diag(StartValue.get()->getBeginLoc(), + diag::err_builtin_invalid_arg_type) + << 2 << /* scalar */ 1 << /* no int */ 0 << /* fp */ 1 + << StartValue.get()->getType(); + return ExprError(); + } + TheCall->setArg(1, StartValue.get()); + } + + TheCall->setType(ElTy); + break; + } + case Builtin::BI__builtin_matrix_transpose: return BuiltinMatrixTranspose(TheCall, TheCallResult); diff --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c index e12fd729c84c0..bde6e9a4f9868 100644 --- a/clang/test/CodeGen/builtins-reduction-math.c +++ b/clang/test/CodeGen/builtins-reduction-math.c @@ -4,6 +4,8 @@ // RUN: %clang_cc1 -O1 -triple aarch64 -target-feature +sve %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=SVE %s typedef float float4 __attribute__((ext_vector_type(4))); +typedef _Float16 half8 __attribute__((ext_vector_type(8))); + typedef short int si8 __attribute__((ext_vector_type(8))); typedef unsigned int u4 __attribute__((ext_vector_type(4))); @@ -162,6 +164,27 @@ void test_builtin_reduce_minimum(float4 vf1) { const double r4 = __builtin_reduce_minimum(vf1_as_one); } +void test_builtin_reduce_addf(float4 vf1, half8 vf2) { + // CHECK-LABEL: define void @test_builtin_reduce_addf( + + // CHECK: [[V0:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 + // CHECK-NEXT: call reassoc float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[V0]]) + float r1 = __builtin_reduce_addf(vf1); + + // CHECK: [[V1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 + // CHECK-NEXT: call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[V1]]) + float r2 = __builtin_reduce_addf(vf1, 0.0f); + + // CHECK: [[V2:%.+]] = load <8 x half>, ptr %vf2.addr, align 16 + // CHECK-NEXT: call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[V2:%.+]]) + _Float16 r3 = __builtin_reduce_addf(vf2); + + // CHECK: [[V3:%.+]] = load <8 x half>, ptr %vf2.addr, align 16 + // CHECK-NEXT: [[RDX:%.+]] = call half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[V3]]) + // CHECK-NEXT: fpext half [[RDX]] to float + float r4 = __builtin_reduce_addf(vf2, -0.0f); +} + #if defined(__ARM_FEATURE_SVE) #include <arm_sve.h> diff --git a/clang/test/Sema/builtins-reduction-math.c b/clang/test/Sema/builtins-reduction-math.c index 74f09d501198b..d4562d967e0e9 100644 --- a/clang/test/Sema/builtins-reduction-math.c +++ b/clang/test/Sema/builtins-reduction-math.c @@ -148,3 +148,20 @@ void test_builtin_reduce_minimum(int i, float4 v, int3 iv) { i = __builtin_reduce_minimum(i); // expected-error@-1 {{1st argument must be a vector of floating-point types (was 'int')}} } + +void test_builtin_reduce_addf(float f, float4 v, int3 iv) { + struct Foo s = __builtin_reduce_addf(v); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}} + + f = __builtin_reduce_addf(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + + f = __builtin_reduce_addf(v, f, v); + // expected-error@-1 {{too many arguments to function call, expected at most 2, have 3}} + + f = __builtin_reduce_addf(iv); + // expected-error@-1 {{1st argument must be a vector of floating-point types (was 'int3' (vector of 3 'int' values))}} + + f = __builtin_reduce_addf(v, (int)121); + // expected-error@-1 {{2nd argument must be a scalar floating-point type (was 'int')}} +} >From 0a4ee9a805e6996ed9ba194c5bd68c139cbc4d9e Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell <[email protected]> Date: Thu, 15 Jan 2026 13:37:34 +0000 Subject: [PATCH 2/3] Try to fix docs --- clang/docs/LanguageExtensions.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 332c1cebfb47f..79bec04435c15 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -948,8 +948,8 @@ Let ``VT`` be a vector type and ``ET`` the element type of ``VT``. for the comparison. ET __builtin_reduce_addf(VT a) unordered floating-point add reduction. floating point types ET __builtin_reduce_addf(VT a, ET s) ordered floating-point add reduction, initializing the accumulator floating point types - with `(ET)s`, then adding each lane of the `a` in-order, starting from - lane 0. + with `(ET)s`, then adding each lane of the `a` in-order, starting + from lane 0. ======================================= ====================================================================== ================================== *Masked Builtins* >From e4da729cf819da7352dbaa217d070c7056a0737e Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell <[email protected]> Date: Mon, 26 Jan 2026 13:58:12 +0000 Subject: [PATCH 3/3] Fixups --- clang/docs/LanguageExtensions.rst | 50 ++++++++++---------- clang/include/clang/Basic/Builtins.td | 6 +++ clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 1 + clang/lib/CodeGen/CGBuiltin.cpp | 9 ++-- clang/lib/Sema/SemaChecking.cpp | 9 ++-- clang/test/CodeGen/builtins-reduction-math.c | 10 ++-- clang/test/Sema/builtins-reduction-math.c | 8 ++-- 7 files changed, 51 insertions(+), 42 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 79bec04435c15..64c8575f68116 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -926,31 +926,31 @@ Example: Let ``VT`` be a vector type and ``ET`` the element type of ``VT``. -======================================= ====================================================================== ================================== - Name Operation Supported element types -======================================= ====================================================================== ================================== - ET __builtin_reduce_max(VT a) return the largest element of the vector. The floating point result integer and floating point types - will always be a number unless all elements of the vector are NaN. - ET __builtin_reduce_min(VT a) return the smallest element of the vector. The floating point result integer and floating point types - will always be a number unless all elements of the vector are NaN. - ET __builtin_reduce_add(VT a) \+ integer types - ET __builtin_reduce_mul(VT a) \* integer types - ET __builtin_reduce_and(VT a) & integer types - ET __builtin_reduce_or(VT a) \| integer types - ET __builtin_reduce_xor(VT a) ^ integer types - ET __builtin_reduce_maximum(VT a) return the largest element of the vector. Follows IEEE 754-2019 floating point types - semantics, see `LangRef - <http://llvm.org/docs/LangRef.html#i-fminmax-family>`_ - for the comparison. - ET __builtin_reduce_minimum(VT a) return the smallest element of the vector. Follows IEEE 754-2019 floating point types - semantics, see `LangRef - <http://llvm.org/docs/LangRef.html#i-fminmax-family>`_ - for the comparison. - ET __builtin_reduce_addf(VT a) unordered floating-point add reduction. floating point types - ET __builtin_reduce_addf(VT a, ET s) ordered floating-point add reduction, initializing the accumulator floating point types - with `(ET)s`, then adding each lane of the `a` in-order, starting - from lane 0. -======================================= ====================================================================== ================================== +============================================= ====================================================================== ================================== + Name Operation Supported element types +============================================= ====================================================================== ================================== + ET __builtin_reduce_max(VT a) return the largest element of the vector. The floating point result integer and floating point types + will always be a number unless all elements of the vector are NaN. + ET __builtin_reduce_min(VT a) return the smallest element of the vector. The floating point result integer and floating point types + will always be a number unless all elements of the vector are NaN. + ET __builtin_reduce_add(VT a) \+ integer types + ET __builtin_reduce_mul(VT a) \* integer types + ET __builtin_reduce_and(VT a) & integer types + ET __builtin_reduce_or(VT a) \| integer types + ET __builtin_reduce_xor(VT a) ^ integer types + ET __builtin_reduce_maximum(VT a) return the largest element of the vector. Follows IEEE 754-2019 floating point types + semantics, see `LangRef + <http://llvm.org/docs/LangRef.html#i-fminmax-family>`_ + for the comparison. + ET __builtin_reduce_minimum(VT a) return the smallest element of the vector. Follows IEEE 754-2019 floating point types + semantics, see `LangRef + <http://llvm.org/docs/LangRef.html#i-fminmax-family>`_ + for the comparison. + ET __builtin_reduce_addf(VT a) unordered floating-point add reduction. floating point types + ET __builtin_ordered_reduce_addf(VT a, ET s) ordered floating-point add reduction, initializing the accumulator floating point types + with `(ET)s`, then adding each lane of the `a` in-order, starting + from lane 0. +============================================= ====================================================================== ================================== *Masked Builtins* diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index cf4869b3dbd89..db037290f5ec1 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1664,6 +1664,12 @@ def ReduceAddf : Builtin { let Prototype = "void(...)"; } +def OrderedReduceAddf : Builtin { + let Spellings = ["__builtin_ordered_reduce_addf"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + def ReduceMul : Builtin { let Spellings = ["__builtin_reduce_mul"]; let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr]; diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index f136ba6ed9139..533662fec8c60 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -1259,6 +1259,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, case Builtin::BI__builtin_reduce_or: case Builtin::BI__builtin_reduce_and: case Builtin::BI__builtin_reduce_addf: + case Builtin::BI__builtin_ordered_reduce_addf: case Builtin::BI__builtin_reduce_maximum: case Builtin::BI__builtin_reduce_minimum: case Builtin::BI__builtin_matrix_transpose: diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 3ba3e46fd820a..2bb5fda9697aa 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4197,7 +4197,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_reduce_minimum: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, Intrinsic::vector_reduce_fminimum, "rdx.minimum")); - case Builtin::BI__builtin_reduce_addf: { + case Builtin::BI__builtin_reduce_addf: + case Builtin::BI__builtin_ordered_reduce_addf: { llvm::Value *Vector = EmitScalarExpr(E->getArg(0)); llvm::Type *ScalarTy = Vector->getType()->getScalarType(); llvm::Value *StartValue = nullptr; @@ -4210,9 +4211,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Vector->getType()); llvm::CallBase *Reduce = Builder.CreateCall(F, Args, "rdx.addf"); - if (!StartValue) { - // No start value means an unordered reduction, which requires the reassoc - // FMF flag. + if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_reduce_addf) { + // `__builtin_reduce_addf` an unordered reduction, which requires the + // reassoc FMF flag. llvm::FastMathFlags FMF; FMF.setAllowReassoc(); cast<llvm::CallBase>(Reduce)->setFastMathFlags(FMF); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 38aeac9cc2a93..3506020cde7f9 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2761,7 +2761,7 @@ static ExprResult BuiltinVectorMathConversions(Sema &S, Expr *E) { return S.UsualUnaryFPConversions(Res.get()); } -static QualType GetVectorElementType(ASTContext &Context, QualType VecTy) { +static QualType getVectorElementType(ASTContext &Context, QualType VecTy) { if (const auto *TyA = VecTy->getAs<VectorType>()) return TyA->getElementType(); if (VecTy->isSizelessVectorType()) @@ -3618,7 +3618,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, const Expr *Arg = TheCall->getArg(0); - QualType ElTy = GetVectorElementType(Context, Arg->getType()); + QualType ElTy = getVectorElementType(Context, Arg->getType()); if (ElTy.isNull() || !ElTy->isIntegerType()) { Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type) << 1 << /* vector of */ 4 << /* int */ 1 << /* no fp */ 0 @@ -3630,7 +3630,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, break; } - case Builtin::BI__builtin_reduce_addf: { + case Builtin::BI__builtin_reduce_addf: + case Builtin::BI__builtin_ordered_reduce_addf: { if (checkArgCountRange(TheCall, 1, 2)) return ExprError(); @@ -3640,7 +3641,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, TheCall->setArg(0, Vec.get()); - QualType ElTy = GetVectorElementType(Context, Vec.get()->getType()); + QualType ElTy = getVectorElementType(Context, Vec.get()->getType()); if (ElTy.isNull() || !ElTy->isRealFloatingType()) { Diag(Vec.get()->getBeginLoc(), diag::err_builtin_invalid_arg_type) << 1 << /* vector of */ 4 << /* no int */ 0 << /* fp */ 1 diff --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c index bde6e9a4f9868..2c69315419882 100644 --- a/clang/test/CodeGen/builtins-reduction-math.c +++ b/clang/test/CodeGen/builtins-reduction-math.c @@ -168,12 +168,12 @@ void test_builtin_reduce_addf(float4 vf1, half8 vf2) { // CHECK-LABEL: define void @test_builtin_reduce_addf( // CHECK: [[V0:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 - // CHECK-NEXT: call reassoc float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[V0]]) - float r1 = __builtin_reduce_addf(vf1); + // CHECK-NEXT: call reassoc float @llvm.vector.reduce.fadd.v4f32(float 1.000000e+00, <4 x float> [[V0]]) + float r1 = __builtin_reduce_addf(vf1, 1.0f); // CHECK: [[V1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 - // CHECK-NEXT: call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[V1]]) - float r2 = __builtin_reduce_addf(vf1, 0.0f); + // CHECK-NEXT: call float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[V1]]) + float r2 = __builtin_ordered_reduce_addf(vf1); // CHECK: [[V2:%.+]] = load <8 x half>, ptr %vf2.addr, align 16 // CHECK-NEXT: call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[V2:%.+]]) @@ -182,7 +182,7 @@ void test_builtin_reduce_addf(float4 vf1, half8 vf2) { // CHECK: [[V3:%.+]] = load <8 x half>, ptr %vf2.addr, align 16 // CHECK-NEXT: [[RDX:%.+]] = call half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[V3]]) // CHECK-NEXT: fpext half [[RDX]] to float - float r4 = __builtin_reduce_addf(vf2, -0.0f); + float r4 = __builtin_ordered_reduce_addf(vf2, -0.0f); } #if defined(__ARM_FEATURE_SVE) diff --git a/clang/test/Sema/builtins-reduction-math.c b/clang/test/Sema/builtins-reduction-math.c index d4562d967e0e9..3ca5b5755a53e 100644 --- a/clang/test/Sema/builtins-reduction-math.c +++ b/clang/test/Sema/builtins-reduction-math.c @@ -153,15 +153,15 @@ void test_builtin_reduce_addf(float f, float4 v, int3 iv) { struct Foo s = __builtin_reduce_addf(v); // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}} + f = __builtin_ordered_reduce_addf(v, f, f); + // expected-error@-1 {{too many arguments to function call, expected at most 2, have 3}} + f = __builtin_reduce_addf(); // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} - f = __builtin_reduce_addf(v, f, v); - // expected-error@-1 {{too many arguments to function call, expected at most 2, have 3}} - f = __builtin_reduce_addf(iv); // expected-error@-1 {{1st argument must be a vector of floating-point types (was 'int3' (vector of 3 'int' values))}} - f = __builtin_reduce_addf(v, (int)121); + f = __builtin_ordered_reduce_addf(v, (int)121); // expected-error@-1 {{2nd argument must be a scalar floating-point type (was 'int')}} } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
