[clang] [Clang] Add `__builtin_reduce_addf` for ordered/unordered fp reductions (PR #176160)

Benjamin Maxwell via cfe-commits Mon, 26 Jan 2026 06:12:44 -0800

https://github.com/MacDue updated 
https://github.com/llvm/llvm-project/pull/176160


>From 1acc5a45f5e86b3f7e1b484ef37cf0619080082e Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <[email protected]>
Date: Wed, 14 Jan 2026 17:53:39 +0000
Subject: [PATCH 1/3] [Clang] Add `__builtin_reduce_addf` for ordered/unordered
 fp reductions

This adds `__builtin_reduce_addf` to expose the `llvm.vector.reduce.fadd.*`
intrinsic directly in Clang, for the full range of supported FP types.

Given a floating-point vector `vec` and a scalar floating-point value `acc`:

- `__builtin_reduce_addf(vec)` corresponds to an unordered/fast reduction
  * i.e, the lanes can be summed in any order
- `__builtin_reduce_addf(vec, acc)` corresponds to an ordered redunction
  * i.e, the result is as-if an accumulator was initialized with `acc`
    and each lane was added to it in-order, starting from lane 0

The `acc` is only used for ordered reductions as the original motivation
for adding the "start_value/acc" in the intrinsic was to distinguish
between ordered/unordered reductions, see: https://reviews.llvm.org/D30086.
---
 clang/docs/LanguageExtensions.rst            |  4 ++
 clang/include/clang/Basic/Builtins.td        |  6 +++
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp      |  1 +
 clang/lib/CodeGen/CGBuiltin.cpp              | 22 ++++++++
 clang/lib/Sema/SemaChecking.cpp              | 53 +++++++++++++++++---
 clang/test/CodeGen/builtins-reduction-math.c | 23 +++++++++
 clang/test/Sema/builtins-reduction-math.c    | 17 +++++++
 7 files changed, 119 insertions(+), 7 deletions(-)

diff --git a/clang/docs/LanguageExtensions.rst 
b/clang/docs/LanguageExtensions.rst
index 0adfaebf24581..332c1cebfb47f 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -946,6 +946,10 @@ Let ``VT`` be a vector type and ``ET`` the element type of 
``VT``.
                                          semantics, see `LangRef
                                          
<http://llvm.org/docs/LangRef.html#i-fminmax-family>`_
                                          for the comparison.
+ ET __builtin_reduce_addf(VT a)          unordered floating-point add 
reduction.                                floating point types
+ ET __builtin_reduce_addf(VT a, ET s)    ordered floating-point add reduction, 
initializing the accumulator     floating point types
+                                         with `(ET)s`, then adding each lane 
of the `a` in-order, starting from
+                                         lane 0.
 ======================================= 
====================================================================== 
==================================
 
 *Masked Builtins*
diff --git a/clang/include/clang/Basic/Builtins.td 
b/clang/include/clang/Basic/Builtins.td
index bc8f1474493b0..cf4869b3dbd89 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1658,6 +1658,12 @@ def ReduceAdd : Builtin {
   let Prototype = "void(...)";
 }
 
+def ReduceAddf : Builtin {
+  let Spellings = ["__builtin_reduce_addf"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 def ReduceMul : Builtin {
   let Spellings = ["__builtin_reduce_mul"];
   let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 0e5a5b531df78..f136ba6ed9139 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -1258,6 +1258,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl 
&gd, unsigned builtinID,
   case Builtin::BI__builtin_reduce_xor:
   case Builtin::BI__builtin_reduce_or:
   case Builtin::BI__builtin_reduce_and:
+  case Builtin::BI__builtin_reduce_addf:
   case Builtin::BI__builtin_reduce_maximum:
   case Builtin::BI__builtin_reduce_minimum:
   case Builtin::BI__builtin_matrix_transpose:
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 339d6cff0a386..3ba3e46fd820a 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4197,6 +4197,28 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   case Builtin::BI__builtin_reduce_minimum:
     return RValue::get(emitBuiltinWithOneOverloadedType<1>(
         *this, E, Intrinsic::vector_reduce_fminimum, "rdx.minimum"));
+  case Builtin::BI__builtin_reduce_addf: {
+    llvm::Value *Vector = EmitScalarExpr(E->getArg(0));
+    llvm::Type *ScalarTy = Vector->getType()->getScalarType();
+    llvm::Value *StartValue = nullptr;
+    if (E->getNumArgs() == 2)
+      StartValue = Builder.CreateFPCast(EmitScalarExpr(E->getArg(1)), 
ScalarTy);
+    llvm::Value *Args[] = {/*start_value=*/StartValue
+                               ? StartValue
+                               : llvm::ConstantFP::get(ScalarTy, -0.0F),
+                           /*vector=*/Vector};
+    llvm::Function *F =
+        CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Vector->getType());
+    llvm::CallBase *Reduce = Builder.CreateCall(F, Args, "rdx.addf");
+    if (!StartValue) {
+      // No start value means an unordered reduction, which requires the 
reassoc
+      // FMF flag.
+      llvm::FastMathFlags FMF;
+      FMF.setAllowReassoc();
+      cast<llvm::CallBase>(Reduce)->setFastMathFlags(FMF);
+    }
+    return RValue::get(Reduce);
+  }
 
   case Builtin::BI__builtin_matrix_transpose: {
     auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index e2e1b37572364..38aeac9cc2a93 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2761,6 +2761,14 @@ static ExprResult BuiltinVectorMathConversions(Sema &S, 
Expr *E) {
   return S.UsualUnaryFPConversions(Res.get());
 }
 
+static QualType GetVectorElementType(ASTContext &Context, QualType VecTy) {
+  if (const auto *TyA = VecTy->getAs<VectorType>())
+    return TyA->getElementType();
+  if (VecTy->isSizelessVectorType())
+    return VecTy->getSizelessVectorEltType(Context);
+  return QualType();
+}
+
 ExprResult
 Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
                                CallExpr *TheCall) {
@@ -3609,14 +3617,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, 
unsigned BuiltinID,
       return ExprError();
 
     const Expr *Arg = TheCall->getArg(0);
-    const auto *TyA = Arg->getType()->getAs<VectorType>();
-
-    QualType ElTy;
-    if (TyA)
-      ElTy = TyA->getElementType();
-    else if (Arg->getType()->isSizelessVectorType())
-      ElTy = Arg->getType()->getSizelessVectorEltType(Context);
 
+    QualType ElTy = GetVectorElementType(Context, Arg->getType());
     if (ElTy.isNull() || !ElTy->isIntegerType()) {
       Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
           << 1 << /* vector of */ 4 << /* int */ 1 << /* no fp */ 0
@@ -3628,6 +3630,43 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, 
unsigned BuiltinID,
     break;
   }
 
+  case Builtin::BI__builtin_reduce_addf: {
+    if (checkArgCountRange(TheCall, 1, 2))
+      return ExprError();
+
+    ExprResult Vec = UsualUnaryConversions(TheCall->getArg(0));
+    if (Vec.isInvalid())
+      return ExprError();
+
+    TheCall->setArg(0, Vec.get());
+
+    QualType ElTy = GetVectorElementType(Context, Vec.get()->getType());
+    if (ElTy.isNull() || !ElTy->isRealFloatingType()) {
+      Diag(Vec.get()->getBeginLoc(), diag::err_builtin_invalid_arg_type)
+          << 1 << /* vector of */ 4 << /* no int */ 0 << /* fp */ 1
+          << Vec.get()->getType();
+      return ExprError();
+    }
+
+    if (TheCall->getNumArgs() == 2) {
+      ExprResult StartValue = UsualUnaryConversions(TheCall->getArg(1));
+      if (StartValue.isInvalid())
+        return ExprError();
+
+      if (!StartValue.get()->getType()->isRealFloatingType()) {
+        Diag(StartValue.get()->getBeginLoc(),
+             diag::err_builtin_invalid_arg_type)
+            << 2 << /* scalar */ 1 << /* no int */ 0 << /* fp */ 1
+            << StartValue.get()->getType();
+        return ExprError();
+      }
+      TheCall->setArg(1, StartValue.get());
+    }
+
+    TheCall->setType(ElTy);
+    break;
+  }
+
   case Builtin::BI__builtin_matrix_transpose:
     return BuiltinMatrixTranspose(TheCall, TheCallResult);
 
diff --git a/clang/test/CodeGen/builtins-reduction-math.c 
b/clang/test/CodeGen/builtins-reduction-math.c
index e12fd729c84c0..bde6e9a4f9868 100644
--- a/clang/test/CodeGen/builtins-reduction-math.c
+++ b/clang/test/CodeGen/builtins-reduction-math.c
@@ -4,6 +4,8 @@
 // RUN: %clang_cc1 -O1 -triple aarch64 -target-feature +sve  %s -emit-llvm 
-disable-llvm-passes -o - | FileCheck --check-prefixes=SVE   %s
 
 typedef float float4 __attribute__((ext_vector_type(4)));
+typedef _Float16 half8 __attribute__((ext_vector_type(8)));
+
 typedef short int si8 __attribute__((ext_vector_type(8)));
 typedef unsigned int u4 __attribute__((ext_vector_type(4)));
 
@@ -162,6 +164,27 @@ void test_builtin_reduce_minimum(float4 vf1) {
   const double r4 = __builtin_reduce_minimum(vf1_as_one);
 }
 
+void test_builtin_reduce_addf(float4 vf1, half8 vf2) {
+  // CHECK-LABEL: define void @test_builtin_reduce_addf(
+
+  // CHECK:      [[V0:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
+  // CHECK-NEXT: call reassoc float @llvm.vector.reduce.fadd.v4f32(float 
-0.000000e+00, <4 x float> [[V0]])
+  float r1 = __builtin_reduce_addf(vf1);
+
+  // CHECK:      [[V1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
+  // CHECK-NEXT: call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, 
<4 x float> [[V1]])
+  float r2 = __builtin_reduce_addf(vf1, 0.0f);
+
+  // CHECK:      [[V2:%.+]] = load <8 x half>, ptr %vf2.addr, align 16
+  // CHECK-NEXT: call reassoc half @llvm.vector.reduce.fadd.v8f16(half 
0xH8000, <8 x half> [[V2:%.+]])
+  _Float16 r3 = __builtin_reduce_addf(vf2);
+
+  // CHECK:      [[V3:%.+]] = load <8 x half>, ptr %vf2.addr, align 16
+  // CHECK-NEXT: [[RDX:%.+]] = call half @llvm.vector.reduce.fadd.v8f16(half 
0xH8000, <8 x half> [[V3]])
+  // CHECK-NEXT: fpext half [[RDX]] to float
+  float r4 = __builtin_reduce_addf(vf2, -0.0f);
+}
+
 #if defined(__ARM_FEATURE_SVE)
 #include <arm_sve.h>
 
diff --git a/clang/test/Sema/builtins-reduction-math.c 
b/clang/test/Sema/builtins-reduction-math.c
index 74f09d501198b..d4562d967e0e9 100644
--- a/clang/test/Sema/builtins-reduction-math.c
+++ b/clang/test/Sema/builtins-reduction-math.c
@@ -148,3 +148,20 @@ void test_builtin_reduce_minimum(int i, float4 v, int3 iv) 
{
   i = __builtin_reduce_minimum(i);
   // expected-error@-1 {{1st argument must be a vector of floating-point types 
(was 'int')}}
 }
+
+void test_builtin_reduce_addf(float f, float4 v, int3 iv) {
+  struct Foo s = __builtin_reduce_addf(v);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of 
incompatible type 'float'}}
+
+  f = __builtin_reduce_addf();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 
0}}
+
+  f = __builtin_reduce_addf(v, f, v);
+  // expected-error@-1 {{too many arguments to function call, expected at most 
2, have 3}}
+
+  f = __builtin_reduce_addf(iv);
+  // expected-error@-1 {{1st argument must be a vector of floating-point types 
(was 'int3' (vector of 3 'int' values))}}
+
+  f = __builtin_reduce_addf(v, (int)121);
+  // expected-error@-1 {{2nd argument must be a scalar floating-point type 
(was 'int')}}
+}

>From 0a4ee9a805e6996ed9ba194c5bd68c139cbc4d9e Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <[email protected]>
Date: Thu, 15 Jan 2026 13:37:34 +0000
Subject: [PATCH 2/3] Try to fix docs

---
 clang/docs/LanguageExtensions.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/docs/LanguageExtensions.rst 
b/clang/docs/LanguageExtensions.rst
index 332c1cebfb47f..79bec04435c15 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -948,8 +948,8 @@ Let ``VT`` be a vector type and ``ET`` the element type of 
``VT``.
                                          for the comparison.
  ET __builtin_reduce_addf(VT a)          unordered floating-point add 
reduction.                                floating point types
  ET __builtin_reduce_addf(VT a, ET s)    ordered floating-point add reduction, 
initializing the accumulator     floating point types
-                                         with `(ET)s`, then adding each lane 
of the `a` in-order, starting from
-                                         lane 0.
+                                         with `(ET)s`, then adding each lane 
of the `a` in-order, starting
+                                         from lane 0.
 ======================================= 
====================================================================== 
==================================
 
 *Masked Builtins*

>From e4da729cf819da7352dbaa217d070c7056a0737e Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <[email protected]>
Date: Mon, 26 Jan 2026 13:58:12 +0000
Subject: [PATCH 3/3] Fixups

---
 clang/docs/LanguageExtensions.rst            | 50 ++++++++++----------
 clang/include/clang/Basic/Builtins.td        |  6 +++
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp      |  1 +
 clang/lib/CodeGen/CGBuiltin.cpp              |  9 ++--
 clang/lib/Sema/SemaChecking.cpp              |  9 ++--
 clang/test/CodeGen/builtins-reduction-math.c | 10 ++--
 clang/test/Sema/builtins-reduction-math.c    |  8 ++--
 7 files changed, 51 insertions(+), 42 deletions(-)

diff --git a/clang/docs/LanguageExtensions.rst 
b/clang/docs/LanguageExtensions.rst
index 79bec04435c15..64c8575f68116 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -926,31 +926,31 @@ Example:
 
 Let ``VT`` be a vector type and ``ET`` the element type of ``VT``.
 
-======================================= 
====================================================================== 
==================================
-         Name                            Operation                             
                                 Supported element types
-======================================= 
====================================================================== 
==================================
- ET __builtin_reduce_max(VT a)           return the largest element of the 
vector. The floating point result    integer and floating point types
-                                         will always be a number unless all 
elements of the vector are NaN.
- ET __builtin_reduce_min(VT a)           return the smallest element of the 
vector. The floating point result   integer and floating point types
-                                         will always be a number unless all 
elements of the vector are NaN.
- ET __builtin_reduce_add(VT a)           \+                                    
                                 integer types
- ET __builtin_reduce_mul(VT a)           \*                                    
                                 integer types
- ET __builtin_reduce_and(VT a)           &                                     
                                 integer types
- ET __builtin_reduce_or(VT a)            \|                                    
                                 integer types
- ET __builtin_reduce_xor(VT a)           ^                                     
                                 integer types
- ET __builtin_reduce_maximum(VT a)       return the largest element of the 
vector. Follows IEEE 754-2019        floating point types
-                                         semantics, see `LangRef
-                                         
<http://llvm.org/docs/LangRef.html#i-fminmax-family>`_
-                                         for the comparison.
- ET __builtin_reduce_minimum(VT a)       return the smallest element of the 
vector. Follows IEEE 754-2019       floating point types
-                                         semantics, see `LangRef
-                                         
<http://llvm.org/docs/LangRef.html#i-fminmax-family>`_
-                                         for the comparison.
- ET __builtin_reduce_addf(VT a)          unordered floating-point add 
reduction.                                floating point types
- ET __builtin_reduce_addf(VT a, ET s)    ordered floating-point add reduction, 
initializing the accumulator     floating point types
-                                         with `(ET)s`, then adding each lane 
of the `a` in-order, starting
-                                         from lane 0.
-======================================= 
====================================================================== 
==================================
+============================================= 
====================================================================== 
==================================
+         Name                                 Operation                        
                                      Supported element types
+============================================= 
====================================================================== 
==================================
+ ET __builtin_reduce_max(VT a)                 return the largest element of 
the vector. The floating point result    integer and floating point types
+                                               will always be a number unless 
all elements of the vector are NaN.
+ ET __builtin_reduce_min(VT a)                 return the smallest element of 
the vector. The floating point result   integer and floating point types
+                                               will always be a number unless 
all elements of the vector are NaN.
+ ET __builtin_reduce_add(VT a)                 \+                              
                                       integer types
+ ET __builtin_reduce_mul(VT a)                 \*                              
                                       integer types
+ ET __builtin_reduce_and(VT a)                 &                               
                                       integer types
+ ET __builtin_reduce_or(VT a)                  \|                              
                                       integer types
+ ET __builtin_reduce_xor(VT a)                 ^                               
                                       integer types
+ ET __builtin_reduce_maximum(VT a)             return the largest element of 
the vector. Follows IEEE 754-2019        floating point types
+                                               semantics, see `LangRef
+                                               
<http://llvm.org/docs/LangRef.html#i-fminmax-family>`_
+                                               for the comparison.
+ ET __builtin_reduce_minimum(VT a)             return the smallest element of 
the vector. Follows IEEE 754-2019       floating point types
+                                               semantics, see `LangRef
+                                               
<http://llvm.org/docs/LangRef.html#i-fminmax-family>`_
+                                               for the comparison.
+ ET __builtin_reduce_addf(VT a)                unordered floating-point add 
reduction.                                floating point types
+ ET __builtin_ordered_reduce_addf(VT a, ET s)  ordered floating-point add 
reduction, initializing the accumulator     floating point types
+                                               with `(ET)s`, then adding each 
lane of the `a` in-order, starting
+                                               from lane 0.
+============================================= 
====================================================================== 
==================================
 
 *Masked Builtins*
 
diff --git a/clang/include/clang/Basic/Builtins.td 
b/clang/include/clang/Basic/Builtins.td
index cf4869b3dbd89..db037290f5ec1 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1664,6 +1664,12 @@ def ReduceAddf : Builtin {
   let Prototype = "void(...)";
 }
 
+def OrderedReduceAddf : Builtin {
+  let Spellings = ["__builtin_ordered_reduce_addf"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 def ReduceMul : Builtin {
   let Spellings = ["__builtin_reduce_mul"];
   let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index f136ba6ed9139..533662fec8c60 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -1259,6 +1259,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl 
&gd, unsigned builtinID,
   case Builtin::BI__builtin_reduce_or:
   case Builtin::BI__builtin_reduce_and:
   case Builtin::BI__builtin_reduce_addf:
+  case Builtin::BI__builtin_ordered_reduce_addf:
   case Builtin::BI__builtin_reduce_maximum:
   case Builtin::BI__builtin_reduce_minimum:
   case Builtin::BI__builtin_matrix_transpose:
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 3ba3e46fd820a..2bb5fda9697aa 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4197,7 +4197,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   case Builtin::BI__builtin_reduce_minimum:
     return RValue::get(emitBuiltinWithOneOverloadedType<1>(
         *this, E, Intrinsic::vector_reduce_fminimum, "rdx.minimum"));
-  case Builtin::BI__builtin_reduce_addf: {
+  case Builtin::BI__builtin_reduce_addf:
+  case Builtin::BI__builtin_ordered_reduce_addf: {
     llvm::Value *Vector = EmitScalarExpr(E->getArg(0));
     llvm::Type *ScalarTy = Vector->getType()->getScalarType();
     llvm::Value *StartValue = nullptr;
@@ -4210,9 +4211,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
     llvm::Function *F =
         CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Vector->getType());
     llvm::CallBase *Reduce = Builder.CreateCall(F, Args, "rdx.addf");
-    if (!StartValue) {
-      // No start value means an unordered reduction, which requires the 
reassoc
-      // FMF flag.
+    if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_reduce_addf) {
+      // `__builtin_reduce_addf` an unordered reduction, which requires the
+      // reassoc FMF flag.
       llvm::FastMathFlags FMF;
       FMF.setAllowReassoc();
       cast<llvm::CallBase>(Reduce)->setFastMathFlags(FMF);
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 38aeac9cc2a93..3506020cde7f9 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2761,7 +2761,7 @@ static ExprResult BuiltinVectorMathConversions(Sema &S, 
Expr *E) {
   return S.UsualUnaryFPConversions(Res.get());
 }
 
-static QualType GetVectorElementType(ASTContext &Context, QualType VecTy) {
+static QualType getVectorElementType(ASTContext &Context, QualType VecTy) {
   if (const auto *TyA = VecTy->getAs<VectorType>())
     return TyA->getElementType();
   if (VecTy->isSizelessVectorType())
@@ -3618,7 +3618,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, 
unsigned BuiltinID,
 
     const Expr *Arg = TheCall->getArg(0);
 
-    QualType ElTy = GetVectorElementType(Context, Arg->getType());
+    QualType ElTy = getVectorElementType(Context, Arg->getType());
     if (ElTy.isNull() || !ElTy->isIntegerType()) {
       Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
           << 1 << /* vector of */ 4 << /* int */ 1 << /* no fp */ 0
@@ -3630,7 +3630,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, 
unsigned BuiltinID,
     break;
   }
 
-  case Builtin::BI__builtin_reduce_addf: {
+  case Builtin::BI__builtin_reduce_addf:
+  case Builtin::BI__builtin_ordered_reduce_addf: {
     if (checkArgCountRange(TheCall, 1, 2))
       return ExprError();
 
@@ -3640,7 +3641,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, 
unsigned BuiltinID,
 
     TheCall->setArg(0, Vec.get());
 
-    QualType ElTy = GetVectorElementType(Context, Vec.get()->getType());
+    QualType ElTy = getVectorElementType(Context, Vec.get()->getType());
     if (ElTy.isNull() || !ElTy->isRealFloatingType()) {
       Diag(Vec.get()->getBeginLoc(), diag::err_builtin_invalid_arg_type)
           << 1 << /* vector of */ 4 << /* no int */ 0 << /* fp */ 1
diff --git a/clang/test/CodeGen/builtins-reduction-math.c 
b/clang/test/CodeGen/builtins-reduction-math.c
index bde6e9a4f9868..2c69315419882 100644
--- a/clang/test/CodeGen/builtins-reduction-math.c
+++ b/clang/test/CodeGen/builtins-reduction-math.c
@@ -168,12 +168,12 @@ void test_builtin_reduce_addf(float4 vf1, half8 vf2) {
   // CHECK-LABEL: define void @test_builtin_reduce_addf(
 
   // CHECK:      [[V0:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
-  // CHECK-NEXT: call reassoc float @llvm.vector.reduce.fadd.v4f32(float 
-0.000000e+00, <4 x float> [[V0]])
-  float r1 = __builtin_reduce_addf(vf1);
+  // CHECK-NEXT: call reassoc float @llvm.vector.reduce.fadd.v4f32(float 
1.000000e+00, <4 x float> [[V0]])
+  float r1 = __builtin_reduce_addf(vf1, 1.0f);
 
   // CHECK:      [[V1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
-  // CHECK-NEXT: call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, 
<4 x float> [[V1]])
-  float r2 = __builtin_reduce_addf(vf1, 0.0f);
+  // CHECK-NEXT: call float @llvm.vector.reduce.fadd.v4f32(float 
-0.000000e+00, <4 x float> [[V1]])
+  float r2 = __builtin_ordered_reduce_addf(vf1);
 
   // CHECK:      [[V2:%.+]] = load <8 x half>, ptr %vf2.addr, align 16
   // CHECK-NEXT: call reassoc half @llvm.vector.reduce.fadd.v8f16(half 
0xH8000, <8 x half> [[V2:%.+]])
@@ -182,7 +182,7 @@ void test_builtin_reduce_addf(float4 vf1, half8 vf2) {
   // CHECK:      [[V3:%.+]] = load <8 x half>, ptr %vf2.addr, align 16
   // CHECK-NEXT: [[RDX:%.+]] = call half @llvm.vector.reduce.fadd.v8f16(half 
0xH8000, <8 x half> [[V3]])
   // CHECK-NEXT: fpext half [[RDX]] to float
-  float r4 = __builtin_reduce_addf(vf2, -0.0f);
+  float r4 = __builtin_ordered_reduce_addf(vf2, -0.0f);
 }
 
 #if defined(__ARM_FEATURE_SVE)
diff --git a/clang/test/Sema/builtins-reduction-math.c 
b/clang/test/Sema/builtins-reduction-math.c
index d4562d967e0e9..3ca5b5755a53e 100644
--- a/clang/test/Sema/builtins-reduction-math.c
+++ b/clang/test/Sema/builtins-reduction-math.c
@@ -153,15 +153,15 @@ void test_builtin_reduce_addf(float f, float4 v, int3 iv) 
{
   struct Foo s = __builtin_reduce_addf(v);
   // expected-error@-1 {{initializing 'struct Foo' with an expression of 
incompatible type 'float'}}
 
+  f = __builtin_ordered_reduce_addf(v, f, f);
+  // expected-error@-1 {{too many arguments to function call, expected at most 
2, have 3}}
+
   f = __builtin_reduce_addf();
   // expected-error@-1 {{too few arguments to function call, expected 1, have 
0}}
 
-  f = __builtin_reduce_addf(v, f, v);
-  // expected-error@-1 {{too many arguments to function call, expected at most 
2, have 3}}
-
   f = __builtin_reduce_addf(iv);
   // expected-error@-1 {{1st argument must be a vector of floating-point types 
(was 'int3' (vector of 3 'int' values))}}
 
-  f = __builtin_reduce_addf(v, (int)121);
+  f = __builtin_ordered_reduce_addf(v, (int)121);
   // expected-error@-1 {{2nd argument must be a scalar floating-point type 
(was 'int')}}
 }

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang] Add `__builtin_reduce_addf` for ordered/unordered fp reductions (PR #176160)

Reply via email to