fmin (PR #171633)

Jasmine Tang via cfe-commits Wed, 10 Dec 2025 22:53:55 -0800

https://github.com/badumbatish updated 
https://github.com/llvm/llvm-project/pull/171633


>From 83f30f03132f2279567b889f9cb4022e4b8b9856 Mon Sep 17 00:00:00 2001
From: Jasmine Tang <[email protected]>
Date: Wed, 10 Dec 2025 05:11:12 -0800
Subject: [PATCH 1/5] [CIR] Implement reduce fadd

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    | 29 +++++++++++++-
 .../CIR/CodeGen/X86/avx512-reduceIntrin.c     | 40 +++++++++++++++++++
 .../CodeGenBuiltins/X86/avx512fp16-builtins.c | 17 +++++++-
 .../X86/avx512vlfp16-builtins.c               | 38 ++++++++++++++++++
 4 files changed, 122 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c
 create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 62836ce0f7537..eaf55fdae6b47 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1492,11 +1492,38 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_vpshrdw128:
   case X86::BI__builtin_ia32_vpshrdw256:
   case X86::BI__builtin_ia32_vpshrdw512:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return {};
   case X86::BI__builtin_ia32_reduce_fadd_pd512:
   case X86::BI__builtin_ia32_reduce_fadd_ps512:
   case X86::BI__builtin_ia32_reduce_fadd_ph512:
   case X86::BI__builtin_ia32_reduce_fadd_ph256:
-  case X86::BI__builtin_ia32_reduce_fadd_ph128:
+  case X86::BI__builtin_ia32_reduce_fadd_ph128: {
+    StringRef intrinsicName = "";
+    switch (builtinID) {
+    case X86::BI__builtin_ia32_reduce_fadd_pd512:
+      intrinsicName = "vector.reduce.fadd.v8f64";
+      break;
+    case X86::BI__builtin_ia32_reduce_fadd_ps512:
+      intrinsicName = "vector.reduce.fadd.v16f32";
+      break;
+    case X86::BI__builtin_ia32_reduce_fadd_ph512:
+      intrinsicName = "vector.reduce.fadd.v32f16";
+      break;
+    case X86::BI__builtin_ia32_reduce_fadd_ph256:
+      intrinsicName = "vector.reduce.fadd.v16f16";
+      break;
+    case X86::BI__builtin_ia32_reduce_fadd_ph128:
+      intrinsicName = "vector.reduce.fadd.v8f16";
+      break;
+    }
+    assert(!cir::MissingFeatures::fastMathFlags());
+    return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+                               intrinsicName, ops[0].getType(),
+                               mlir::ValueRange{ops[0], ops[1]});
+  }
   case X86::BI__builtin_ia32_reduce_fmul_pd512:
   case X86::BI__builtin_ia32_reduce_fmul_ps512:
   case X86::BI__builtin_ia32_reduce_fmul_ph512:
diff --git a/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c 
b/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c
new file mode 100644
index 0000000000000..a9ba2ba688b22
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin 
-target-cpu skylake-avx512 -fclangir -emit-cir -o - -Wall -Werror | FileCheck 
%s --check-prefixes=CIR
+// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin 
-target-cpu skylake-avx512 -fclangir -emit-llvm -o - -Wall -Werror | FileCheck 
%s --check-prefixes=LLVM
+// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin 
-target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s 
--check-prefixes=OGCG
+
+#include <immintrin.h>
+
+double test_mm512_reduce_add_pd(__m512d __W, double ExtraAddOp){
+
+  // CIR-LABEL: _mm512_reduce_add_pd
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v8f64" %[[R:.*]], 
%[[V:.*]] : (!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double
+
+  // CIR-LABEL: test_mm512_reduce_add_pd
+  // CIR: cir.call @_mm512_reduce_add_pd(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.double>) -> !cir.double
+
+  // LLVM-LABEL: test_mm512_reduce_add_pd
+  // LLVM: call double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 
x double> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_add_pd
+  // OGCG-NOT: reassoc
+  // OGCG: call reassoc {{.*}}double @llvm.vector.reduce.fadd.v8f64(double 
-0.000000e+00, <8 x double> %{{.*}})
+  // OGCG-NOT: reassoc
+  return _mm512_reduce_add_pd(__W) + ExtraAddOp;
+}
+
+
+float test_mm512_reduce_add_ps(__m512 __W){
+  // CIR-LABEL: _mm512_reduce_add_ps
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v16f32" %[[R:.*]], 
%[[V:.*]] : (!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float
+
+  // CIR-LABEL: test_mm512_reduce_add_ps
+  // CIR: cir.call @_mm512_reduce_add_ps(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.float>) -> !cir.float
+
+  // LLVM-LABEL: test_mm512_reduce_add_ps
+  // LLVM: call float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 
x float> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_add_ps
+  // OGCG: call reassoc {{.*}}float @llvm.vector.reduce.fadd.v16f32(float 
-0.000000e+00, <16 x float> %{{.*}})
+  return _mm512_reduce_add_ps(__W);
+}
+
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
index 161fc45b2a32d..b67d51dd3cb11 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
@@ -63,4 +63,19 @@ __m512h test_mm512_undefined_ph(void) {
   // OGCG-LABEL: test_mm512_undefined_ph
   // OGCG: ret <32 x half> zeroinitializer
   return _mm512_undefined_ph();
-}
\ No newline at end of file
+}
+
+_Float16 test_mm512_reduce_add_ph(__m512h __W) {
+  // CIR-LABEL: _mm512_reduce_add_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v32f16" %[[R:.*]], 
%[[V:.*]] : (!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16
+
+  // CIR-LABEL: test_mm512_reduce_add_ph
+  // CIR: cir.call @_mm512_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<32 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm512_reduce_add_ph
+  // LLVM: call half @llvm.vector.reduce.fadd.v32f16(half 0xH8000, <32 x half> 
%{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_add_ph
+  // OGCG: call reassoc {{.*}}half @llvm.vector.reduce.fadd.v32f16(half 
0xH8000, <32 x half> %{{.*}})
+  return _mm512_reduce_add_ph(__W);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c
new file mode 100644
index 0000000000000..6c042beef88fd
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature 
+avx512fp16 -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature 
+avx512fp16 -fclangir -emit-llvm -o %t.ll  -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature 
+avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature 
+avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
+#include <immintrin.h>
+
+_Float16 test_mm256_reduce_add_ph(__m256h __W) {
+  // CIR-LABEL: _mm256_reduce_add_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v16f16" %[[R:.*]], 
%[[V:.*]] : (!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16
+
+  // CIR-LABEL: test_mm256_reduce_add_ph
+  // CIR: cir.call @_mm256_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm256_reduce_add_ph
+  // LLVM: call half @llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> 
%{{.*}})
+
+  // OGCG-LABEL: test_mm256_reduce_add_ph
+  // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fadd.v16f16(half 0xH8000, 
<16 x half> %{{.*}})
+  return _mm256_reduce_add_ph(__W);
+}
+
+_Float16 test_mm_reduce_add_ph(__m128h __W) {
+  // CIR-LABEL: _mm_reduce_add_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v8f16" %[[R:.*]], 
%[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16
+
+  // CIR-LABEL: test_mm_reduce_add_ph
+  // CIR: cir.call @_mm_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm_reduce_add_ph
+  // LLVM: call half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> 
%{{.*}})
+
+  // OGCG-LABEL: test_mm_reduce_add_ph
+  // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 
x half> %{{.*}})
+  return _mm_reduce_add_ph(__W);
+}

>From f216010a54aeed70b1ac605d9bdd812b91a41586 Mon Sep 17 00:00:00 2001
From: Jasmine Tang <[email protected]>
Date: Wed, 10 Dec 2025 05:40:24 -0800
Subject: [PATCH 2/5] [CIR] Implement reduce fmul

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    | 25 ++++++++++++++-
 .../CIR/CodeGen/X86/avx512-reduceIntrin.c     | 31 +++++++++++++++++++
 .../CodeGenBuiltins/X86/avx512fp16-builtins.c | 15 +++++++++
 .../X86/avx512vlfp16-builtins.c               | 31 +++++++++++++++++++
 4 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index eaf55fdae6b47..c0055293c5a13 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1528,7 +1528,30 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_reduce_fmul_ps512:
   case X86::BI__builtin_ia32_reduce_fmul_ph512:
   case X86::BI__builtin_ia32_reduce_fmul_ph256:
-  case X86::BI__builtin_ia32_reduce_fmul_ph128:
+  case X86::BI__builtin_ia32_reduce_fmul_ph128: {
+    StringRef intrinsicName = "";
+    switch (builtinID) {
+    case X86::BI__builtin_ia32_reduce_fmul_pd512:
+      intrinsicName = "vector.reduce.fmul.v8f64";
+      break;
+    case X86::BI__builtin_ia32_reduce_fmul_ps512:
+      intrinsicName = "vector.reduce.fmul.v16f32";
+      break;
+    case X86::BI__builtin_ia32_reduce_fmul_ph512:
+      intrinsicName = "vector.reduce.fmul.v32f16";
+      break;
+    case X86::BI__builtin_ia32_reduce_fmul_ph256:
+      intrinsicName = "vector.reduce.fmul.v16f16";
+      break;
+    case X86::BI__builtin_ia32_reduce_fmul_ph128:
+      intrinsicName = "vector.reduce.fmul.v8f16";
+      break;
+    }
+    assert(!cir::MissingFeatures::fastMathFlags());
+    return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+                               intrinsicName, ops[0].getType(),
+                               mlir::ValueRange{ops[0], ops[1]});
+  }
   case X86::BI__builtin_ia32_reduce_fmax_pd512:
   case X86::BI__builtin_ia32_reduce_fmax_ps512:
   case X86::BI__builtin_ia32_reduce_fmax_ph512:
diff --git a/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c 
b/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c
index a9ba2ba688b22..9b956b80adf8c 100644
--- a/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c
+++ b/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c
@@ -22,6 +22,23 @@ double test_mm512_reduce_add_pd(__m512d __W, double 
ExtraAddOp){
   return _mm512_reduce_add_pd(__W) + ExtraAddOp;
 }
 
+double test_mm512_reduce_mul_pd(__m512d __W, double ExtraMulOp){
+  // CIR-LABEL: _mm512_reduce_mul_pd
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v8f64" %[[R:.*]], 
%[[V:.*]] : (!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double
+
+  // CIR-LABEL: test_mm512_reduce_mul_pd
+  // CIR: cir.call @_mm512_reduce_mul_pd(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.double>) -> !cir.double
+
+  // LLVM-LABEL: test_mm512_reduce_mul_pd
+  // LLVM: call double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 
x double> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_mul_pd
+  // OGCG-NOT: reassoc
+  // OGCG:    call reassoc {{.*}}double @llvm.vector.reduce.fmul.v8f64(double 
1.000000e+00, <8 x double> %{{.*}})
+  // OGCG-NOT: reassoc
+  return _mm512_reduce_mul_pd(__W) * ExtraMulOp;
+}
+
 
 float test_mm512_reduce_add_ps(__m512 __W){
   // CIR-LABEL: _mm512_reduce_add_ps
@@ -38,3 +55,17 @@ float test_mm512_reduce_add_ps(__m512 __W){
   return _mm512_reduce_add_ps(__W);
 }
 
+float test_mm512_reduce_mul_ps(__m512 __W){
+  // CIR-LABEL: _mm512_reduce_mul_ps
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v16f32" %[[R:.*]], 
%[[V:.*]] : (!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float
+
+  // CIR-LABEL: test_mm512_reduce_mul_ps
+  // CIR: cir.call @_mm512_reduce_mul_ps(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.float>) -> !cir.float
+
+  // LLVM-LABEL: test_mm512_reduce_mul_ps
+  // LLVM: call float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 
x float> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_mul_ps
+  // OGCG:    call reassoc {{.*}}float @llvm.vector.reduce.fmul.v16f32(float 
1.000000e+00, <16 x float> %{{.*}})
+  return _mm512_reduce_mul_ps(__W);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
index b67d51dd3cb11..0c2234fd9dd15 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
@@ -79,3 +79,18 @@ _Float16 test_mm512_reduce_add_ph(__m512h __W) {
   // OGCG: call reassoc {{.*}}half @llvm.vector.reduce.fadd.v32f16(half 
0xH8000, <32 x half> %{{.*}})
   return _mm512_reduce_add_ph(__W);
 }
+
+_Float16 test_mm512_reduce_mul_ph(__m512h __W) {
+  // CIR-LABEL: _mm512_reduce_mul_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v32f16" %[[R:.*]], 
%[[V:.*]] : (!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16
+
+  // CIR-LABEL: test_mm512_reduce_mul_ph
+  // CIR: cir.call @_mm512_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<32 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm512_reduce_mul_ph
+  // LLVM: call half @llvm.vector.reduce.fmul.v32f16(half 0xH3C00, <32 x half> 
%{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_mul_ph
+  // OGCG: call reassoc {{.*}}half @llvm.vector.reduce.fmul.v32f16(half 
0xH3C00, <32 x half> %{{.*}})
+  return _mm512_reduce_mul_ph(__W);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c
index 6c042beef88fd..f7156427a5ad8 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c
@@ -22,6 +22,21 @@ _Float16 test_mm256_reduce_add_ph(__m256h __W) {
   return _mm256_reduce_add_ph(__W);
 }
 
+_Float16 test_mm256_reduce_mul_ph(__m256h __W) {
+  // CIR-LABEL: _mm256_reduce_mul_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v16f16" %[[R:.*]], 
%[[V:.*]] : (!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16
+
+  // CIR-LABEL: test_mm256_reduce_mul_ph
+  // CIR: cir.call @_mm256_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm256_reduce_mul_ph
+  // LLVM: call half @llvm.vector.reduce.fmul.v16f16(half 0xH3C00, <16 x half> 
%{{.*}})
+
+  // OGCG-LABEL: test_mm256_reduce_mul_ph
+  // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fmul.v16f16(half 0xH3C00, 
<16 x half> %{{.*}})
+  return _mm256_reduce_mul_ph(__W);
+}
+
 _Float16 test_mm_reduce_add_ph(__m128h __W) {
   // CIR-LABEL: _mm_reduce_add_ph
   // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v8f16" %[[R:.*]], 
%[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16
@@ -36,3 +51,19 @@ _Float16 test_mm_reduce_add_ph(__m128h __W) {
   // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 
x half> %{{.*}})
   return _mm_reduce_add_ph(__W);
 }
+
+_Float16 test_mm_reduce_mul_ph(__m128h __W) {
+  // CIR-LABEL: _mm_reduce_mul_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v8f16" %[[R:.*]], 
%[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16
+
+  // CIR-LABEL: test_mm_reduce_mul_ph
+  // CIR: cir.call @_mm_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm_reduce_mul_ph
+  // LLVM: call half @llvm.vector.reduce.fmul.v8f16(half 0xH3C00, <8 x half> 
%{{.*}})
+
+  // OGCG-LABEL: test_mm_reduce_mul_ph
+  // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fmul.v8f16(half 0xH3C00, <8 
x half> %{{.*}})
+  return _mm_reduce_mul_ph(__W);
+}
+

>From 8bed48d7f6f20999c6b249c5885defc1b01a5fd1 Mon Sep 17 00:00:00 2001
From: Jasmine Tang <[email protected]>
Date: Wed, 10 Dec 2025 06:35:14 -0800
Subject: [PATCH 3/5] [CIR] Implement reduce fmax

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    | 26 ++++++++++++-
 .../CodeGen/X86/avx512-reduceMinMaxIntrin.c   | 37 +++++++++++++++++++
 .../CodeGenBuiltins/X86/avx512fp16-builtins.c | 15 ++++++++
 .../X86/avx512vlfp16-builtins.c               | 30 +++++++++++++++
 4 files changed, 107 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index c0055293c5a13..5d4b81751bc0f 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1556,7 +1556,31 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_reduce_fmax_ps512:
   case X86::BI__builtin_ia32_reduce_fmax_ph512:
   case X86::BI__builtin_ia32_reduce_fmax_ph256:
-  case X86::BI__builtin_ia32_reduce_fmax_ph128:
+  case X86::BI__builtin_ia32_reduce_fmax_ph128: {
+    StringRef intrinsicName = "";
+    switch (builtinID) {
+    case X86::BI__builtin_ia32_reduce_fmax_pd512:
+      intrinsicName = "vector.reduce.fmax.v8f64";
+      break;
+    case X86::BI__builtin_ia32_reduce_fmax_ps512:
+      intrinsicName = "vector.reduce.fmax.v16f32";
+      break;
+    case X86::BI__builtin_ia32_reduce_fmax_ph512:
+      intrinsicName = "vector.reduce.fmax.v32f16";
+      break;
+    case X86::BI__builtin_ia32_reduce_fmax_ph256:
+      intrinsicName = "vector.reduce.fmax.v16f16";
+      break;
+    case X86::BI__builtin_ia32_reduce_fmax_ph128:
+      intrinsicName = "vector.reduce.fmax.v8f16";
+      break;
+    }
+    assert(!cir::MissingFeatures::fastMathFlags());
+    cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType());
+    return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+                               intrinsicName, vecTy.getElementType(),
+                               mlir::ValueRange{ops[0]});
+  }
   case X86::BI__builtin_ia32_reduce_fmin_pd512:
   case X86::BI__builtin_ia32_reduce_fmin_ps512:
   case X86::BI__builtin_ia32_reduce_fmin_ph512:
diff --git a/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c 
b/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c
new file mode 100644
index 0000000000000..548182ef7e74c
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin 
-target-cpu skylake-avx512 -fclangir -emit-cir -o - -Wall -Werror | FileCheck 
%s --check-prefixes=CIR
+// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin 
-target-cpu skylake-avx512 -fclangir -emit-llvm -o - -Wall -Werror | FileCheck 
%s --check-prefixes=LLVM
+// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin 
-target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s 
--check-prefixes=OGCG
+
+#include <immintrin.h>
+
+double test_mm512_reduce_max_pd(__m512d __W, double ExtraAddOp){
+  // CIR-LABEL: _mm512_reduce_max_pd
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v8f64" %[[V:.*]] : 
(!cir.vector<8 x !cir.double>) -> !cir.double
+
+  // CIR-LABEL: test_mm512_reduce_max_pd
+  // CIR: cir.call @_mm512_reduce_max_pd(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.double>) -> !cir.double
+
+  // LLVM-LABEL: test_mm512_reduce_max_pd
+  // LLVM: call double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_max_pd
+  // OGCG-NOT: nnan
+  // OGCG: call nnan {{.*}}double @llvm.vector.reduce.fmax.v8f64(<8 x double> 
%{{.*}})
+  // OGCG-NOT: nnan
+  return _mm512_reduce_max_pd(__W) + ExtraAddOp;
+}
+
+float test_mm512_reduce_max_ps(__m512 __W){
+  // CIR-LABEL: _mm512_reduce_max_ps
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v16f32" %[[V:.*]] : 
(!cir.vector<16 x !cir.float>) -> !cir.float
+
+  // CIR-LABEL: test_mm512_reduce_max_ps
+  // CIR: cir.call @_mm512_reduce_max_ps(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.float>) -> !cir.float
+
+  // LLVM-LABEL: test_mm512_reduce_max_ps
+  // LLVM: call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_max_ps
+  // OGCG: call nnan {{.*}}float @llvm.vector.reduce.fmax.v16f32(<16 x float> 
%{{.*}})
+  return _mm512_reduce_max_ps(__W);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
index 0c2234fd9dd15..d1a2be041a2a4 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
@@ -94,3 +94,18 @@ _Float16 test_mm512_reduce_mul_ph(__m512h __W) {
   // OGCG: call reassoc {{.*}}half @llvm.vector.reduce.fmul.v32f16(half 
0xH3C00, <32 x half> %{{.*}})
   return _mm512_reduce_mul_ph(__W);
 }
+
+_Float16 test_mm512_reduce_max_ph(__m512h __W) {
+  // CIR-LABEL: _mm512_reduce_max_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v32f16" %[[V:.*]] 
(!cir.vector<32 x !cir.f16>) -> !cir.f16 
+
+  // CIR-LABEL: test_mm512_reduce_max_ph
+  // CIR: cir.call @_mm512_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<32 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm512_reduce_max_ph
+  // LLVM: call half @llvm.vector.reduce.fmax.v32f16(<32 x half> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_max_ph
+  // OGCG: call nnan {{.*}}half @llvm.vector.reduce.fmax.v32f16(<32 x half> 
%{{.*}})
+  return _mm512_reduce_max_ph(__W);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c
index f7156427a5ad8..12ff135037cf3 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c
@@ -37,6 +37,21 @@ _Float16 test_mm256_reduce_mul_ph(__m256h __W) {
   return _mm256_reduce_mul_ph(__W);
 }
 
+_Float16 test_mm256_reduce_max_ph(__m256h __W) {
+  // CIR-LABEL: _mm256_reduce_max_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v16f16" %[[V:.*]] 
(!cir.vector<16 x !cir.f16>) -> !cir.f16 
+
+  // CIR-LABEL: test_mm256_reduce_max_ph
+  // CIR: cir.call @_mm256_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm256_reduce_max_ph
+  // LLVM: call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %{{.*}})
+
+  // OGCG-LABEL: test_mm256_reduce_max_ph
+  // OGCG: call nnan {{.*}}@llvm.vector.reduce.fmax.v16f16(<16 x half> %{{.*}})
+  return _mm256_reduce_max_ph(__W);
+}
+
 _Float16 test_mm_reduce_add_ph(__m128h __W) {
   // CIR-LABEL: _mm_reduce_add_ph
   // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v8f16" %[[R:.*]], 
%[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16
@@ -67,3 +82,18 @@ _Float16 test_mm_reduce_mul_ph(__m128h __W) {
   return _mm_reduce_mul_ph(__W);
 }
 
+_Float16 test_mm_reduce_max_ph(__m128h __W) {
+  // CIR-LABEL: _mm_reduce_max_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v8f16" %[[V:.*]] 
(!cir.vector<8 x !cir.f16>) -> !cir.f16 
+
+  // CIR-LABEL: test_mm_reduce_max_ph
+  // CIR: cir.call @_mm_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm_reduce_max_ph
+  // LLVM: call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %{{.*}})
+
+  // OGCG-LABEL: test_mm_reduce_max_ph
+  // OGCG: call nnan {{.*}}@llvm.vector.reduce.fmax.v8f16(<8 x half> %{{.*}})
+  return _mm_reduce_max_ph(__W);
+}
+

>From d54e553430e0e07bd77175f2e4e06955f3caca2d Mon Sep 17 00:00:00 2001
From: Jasmine Tang <[email protected]>
Date: Wed, 10 Dec 2025 07:08:27 -0800
Subject: [PATCH 4/5] [CIR] Implement reduce fmin

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    | 27 +++++++++++++++-
 .../CodeGen/X86/avx512-reduceMinMaxIntrin.c   | 32 +++++++++++++++++++
 .../CodeGenBuiltins/X86/avx512fp16-builtins.c | 15 +++++++++
 .../X86/avx512vlfp16-builtins.c               | 30 +++++++++++++++++
 4 files changed, 103 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 5d4b81751bc0f..9ffbbc03edb11 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1585,7 +1585,32 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_reduce_fmin_ps512:
   case X86::BI__builtin_ia32_reduce_fmin_ph512:
   case X86::BI__builtin_ia32_reduce_fmin_ph256:
-  case X86::BI__builtin_ia32_reduce_fmin_ph128:
+  case X86::BI__builtin_ia32_reduce_fmin_ph128: {
+    StringRef intrinsicName = "";
+    switch (builtinID) {
+    case X86::BI__builtin_ia32_reduce_fmin_pd512:
+      intrinsicName = "vector.reduce.fmin.v8f64";
+      break;
+    case X86::BI__builtin_ia32_reduce_fmin_ps512:
+      intrinsicName = "vector.reduce.fmin.v16f32";
+      break;
+    case X86::BI__builtin_ia32_reduce_fmin_ph512:
+      intrinsicName = "vector.reduce.fmin.v32f16";
+      break;
+    case X86::BI__builtin_ia32_reduce_fmin_ph256:
+      intrinsicName = "vector.reduce.fmin.v16f16";
+      break;
+    case X86::BI__builtin_ia32_reduce_fmin_ph128:
+      intrinsicName = "vector.reduce.fmin.v8f16";
+      break;
+    }
+
+    assert(!cir::MissingFeatures::fastMathFlags());
+    cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType());
+    return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+                               intrinsicName, vecTy.getElementType(),
+                               mlir::ValueRange{ops[0]});
+  }
   case X86::BI__builtin_ia32_rdrand16_step:
   case X86::BI__builtin_ia32_rdrand32_step:
   case X86::BI__builtin_ia32_rdrand64_step:
diff --git a/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c 
b/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c
index 548182ef7e74c..be2865f9a4934 100644
--- a/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c
+++ b/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c
@@ -21,6 +21,23 @@ double test_mm512_reduce_max_pd(__m512d __W, double 
ExtraAddOp){
   return _mm512_reduce_max_pd(__W) + ExtraAddOp;
 }
 
+double test_mm512_reduce_min_pd(__m512d __W, double ExtraMulOp){
+  // CIR-LABEL: _mm512_reduce_min_pd
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v8f64" %[[V:.*]] : 
(!cir.vector<8 x !cir.double>) -> !cir.double
+
+  // CIR-LABEL: test_mm512_reduce_min_pd
+  // CIR: cir.call @_mm512_reduce_min_pd(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.double>) -> !cir.double
+
+  // LLVM-LABEL: test_mm512_reduce_min_pd
+  // LLVM: call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_min_pd
+  // OGCG-NOT: nnan
+  // OGCG:    call nnan {{.*}}double @llvm.vector.reduce.fmin.v8f64(<8 x 
double> %{{.*}})
+  // OGCG-NOT: nnan
+  return _mm512_reduce_min_pd(__W) * ExtraMulOp;
+}
+
 float test_mm512_reduce_max_ps(__m512 __W){
   // CIR-LABEL: _mm512_reduce_max_ps
   // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v16f32" %[[V:.*]] : 
(!cir.vector<16 x !cir.float>) -> !cir.float
@@ -35,3 +52,18 @@ float test_mm512_reduce_max_ps(__m512 __W){
   // OGCG: call nnan {{.*}}float @llvm.vector.reduce.fmax.v16f32(<16 x float> 
%{{.*}})
   return _mm512_reduce_max_ps(__W);
 }
+
+float test_mm512_reduce_min_ps(__m512 __W){
+  // CIR-LABEL: _mm512_reduce_min_ps
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v16f32" %[[V:.*]] : 
(!cir.vector<16 x !cir.float>) -> !cir.float
+
+  // CIR-LABEL: test_mm512_reduce_min_ps
+  // CIR: cir.call @_mm512_reduce_min_ps(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.float>) -> !cir.float
+
+  // LLVM-LABEL: test_mm512_reduce_min_ps
+  // LLVM: call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_min_ps
+  // OGCG: call nnan {{.*}}float @llvm.vector.reduce.fmin.v16f32(<16 x float> 
%{{.*}})
+  return _mm512_reduce_min_ps(__W);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
index d1a2be041a2a4..94a3834f09a01 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
@@ -109,3 +109,18 @@ _Float16 test_mm512_reduce_max_ph(__m512h __W) {
   // OGCG: call nnan {{.*}}half @llvm.vector.reduce.fmax.v32f16(<32 x half> 
%{{.*}})
   return _mm512_reduce_max_ph(__W);
 }
+
+_Float16 test_mm512_reduce_min_ph(__m512h __W) {
+  // CIR-LABEL: _mm512_reduce_min_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v32f16" %[[V:.*]] 
(!cir.vector<32 x !cir.f16>) -> !cir.f16 
+
+  // CIR-LABEL: test_mm512_reduce_min_ph
+  // CIR: cir.call @_mm512_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<32 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm512_reduce_min_ph
+  // LLVM: call half @llvm.vector.reduce.fmin.v32f16(<32 x half> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_min_ph
+  // OGCG: call nnan {{.*}}half @llvm.vector.reduce.fmin.v32f16(<32 x half> 
%{{.*}})
+  return _mm512_reduce_min_ph(__W);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c
index 12ff135037cf3..f1dfe56861eec 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c
@@ -52,6 +52,21 @@ _Float16 test_mm256_reduce_max_ph(__m256h __W) {
   return _mm256_reduce_max_ph(__W);
 }
 
+_Float16 test_mm256_reduce_min_ph(__m256h __W) {
+  // CIR-LABEL: _mm256_reduce_min_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v16f16" %[[V:.*]] : 
(!cir.vector<16 x !cir.f16>) -> !cir.f16
+
+  // CIR-LABEL: test_mm256_reduce_min_ph
+  // CIR: cir.call @_mm256_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm256_reduce_min_ph
+  // LLVM: call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %{{.*}})
+
+  // OGCG-LABEL: test_mm256_reduce_min_ph
+  // OGCG: call nnan {{.*}}@llvm.vector.reduce.fmin.v16f16(<16 x half> %{{.*}})
+  return _mm256_reduce_min_ph(__W);
+}
+
 _Float16 test_mm_reduce_add_ph(__m128h __W) {
   // CIR-LABEL: _mm_reduce_add_ph
   // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v8f16" %[[R:.*]], 
%[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16
@@ -97,3 +112,18 @@ _Float16 test_mm_reduce_max_ph(__m128h __W) {
   return _mm_reduce_max_ph(__W);
 }
 
+_Float16 test_mm_reduce_min_ph(__m128h __W) {
+  // CIR-LABEL: _mm_reduce_min_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v8f16" %[[V:.*]] : 
(!cir.vector<8 x !cir.f16>) -> !cir.f16
+
+  // CIR-LABEL: test_mm_reduce_min_ph
+  // CIR: cir.call @_mm_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm_reduce_min_ph
+  // LLVM: call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %{{.*}})
+
+  // OGCG-LABEL: test_mm_reduce_min_ph
+  // OGCG: call nnan {{.*}}@llvm.vector.reduce.fmin.v8f16(<8 x half> %{{.*}})
+  return _mm_reduce_min_ph(__W);
+}
+

>From 7dda4392cbb9484cd23e48036499703ac34db310 Mon Sep 17 00:00:00 2001
From: Jasmine Tang <[email protected]>
Date: Wed, 10 Dec 2025 22:47:21 -0800
Subject: [PATCH 5/5] Remove suffix from string intrinsics

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    | 77 +------------------
 .../CIR/CodeGen/X86/avx512-reduceIntrin.c     |  8 +-
 .../CodeGen/X86/avx512-reduceMinMaxIntrin.c   |  8 +-
 .../CodeGenBuiltins/X86/avx512fp16-builtins.c |  8 +-
 .../X86/avx512vlfp16-builtins.c               | 16 ++--
 5 files changed, 24 insertions(+), 93 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 9ffbbc03edb11..51e2a9de000cb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1501,24 +1501,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_reduce_fadd_ph512:
   case X86::BI__builtin_ia32_reduce_fadd_ph256:
   case X86::BI__builtin_ia32_reduce_fadd_ph128: {
-    StringRef intrinsicName = "";
-    switch (builtinID) {
-    case X86::BI__builtin_ia32_reduce_fadd_pd512:
-      intrinsicName = "vector.reduce.fadd.v8f64";
-      break;
-    case X86::BI__builtin_ia32_reduce_fadd_ps512:
-      intrinsicName = "vector.reduce.fadd.v16f32";
-      break;
-    case X86::BI__builtin_ia32_reduce_fadd_ph512:
-      intrinsicName = "vector.reduce.fadd.v32f16";
-      break;
-    case X86::BI__builtin_ia32_reduce_fadd_ph256:
-      intrinsicName = "vector.reduce.fadd.v16f16";
-      break;
-    case X86::BI__builtin_ia32_reduce_fadd_ph128:
-      intrinsicName = "vector.reduce.fadd.v8f16";
-      break;
-    }
+    StringRef intrinsicName = "vector.reduce.fadd";
     assert(!cir::MissingFeatures::fastMathFlags());
     return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
                                intrinsicName, ops[0].getType(),
@@ -1529,24 +1512,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_reduce_fmul_ph512:
   case X86::BI__builtin_ia32_reduce_fmul_ph256:
   case X86::BI__builtin_ia32_reduce_fmul_ph128: {
-    StringRef intrinsicName = "";
-    switch (builtinID) {
-    case X86::BI__builtin_ia32_reduce_fmul_pd512:
-      intrinsicName = "vector.reduce.fmul.v8f64";
-      break;
-    case X86::BI__builtin_ia32_reduce_fmul_ps512:
-      intrinsicName = "vector.reduce.fmul.v16f32";
-      break;
-    case X86::BI__builtin_ia32_reduce_fmul_ph512:
-      intrinsicName = "vector.reduce.fmul.v32f16";
-      break;
-    case X86::BI__builtin_ia32_reduce_fmul_ph256:
-      intrinsicName = "vector.reduce.fmul.v16f16";
-      break;
-    case X86::BI__builtin_ia32_reduce_fmul_ph128:
-      intrinsicName = "vector.reduce.fmul.v8f16";
-      break;
-    }
+    StringRef intrinsicName = "vector.reduce.fmul";
     assert(!cir::MissingFeatures::fastMathFlags());
     return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
                                intrinsicName, ops[0].getType(),
@@ -1557,24 +1523,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_reduce_fmax_ph512:
   case X86::BI__builtin_ia32_reduce_fmax_ph256:
   case X86::BI__builtin_ia32_reduce_fmax_ph128: {
-    StringRef intrinsicName = "";
-    switch (builtinID) {
-    case X86::BI__builtin_ia32_reduce_fmax_pd512:
-      intrinsicName = "vector.reduce.fmax.v8f64";
-      break;
-    case X86::BI__builtin_ia32_reduce_fmax_ps512:
-      intrinsicName = "vector.reduce.fmax.v16f32";
-      break;
-    case X86::BI__builtin_ia32_reduce_fmax_ph512:
-      intrinsicName = "vector.reduce.fmax.v32f16";
-      break;
-    case X86::BI__builtin_ia32_reduce_fmax_ph256:
-      intrinsicName = "vector.reduce.fmax.v16f16";
-      break;
-    case X86::BI__builtin_ia32_reduce_fmax_ph128:
-      intrinsicName = "vector.reduce.fmax.v8f16";
-      break;
-    }
+    StringRef intrinsicName = "vector.reduce.fmax";
     assert(!cir::MissingFeatures::fastMathFlags());
     cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType());
     return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
@@ -1586,25 +1535,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_reduce_fmin_ph512:
   case X86::BI__builtin_ia32_reduce_fmin_ph256:
   case X86::BI__builtin_ia32_reduce_fmin_ph128: {
-    StringRef intrinsicName = "";
-    switch (builtinID) {
-    case X86::BI__builtin_ia32_reduce_fmin_pd512:
-      intrinsicName = "vector.reduce.fmin.v8f64";
-      break;
-    case X86::BI__builtin_ia32_reduce_fmin_ps512:
-      intrinsicName = "vector.reduce.fmin.v16f32";
-      break;
-    case X86::BI__builtin_ia32_reduce_fmin_ph512:
-      intrinsicName = "vector.reduce.fmin.v32f16";
-      break;
-    case X86::BI__builtin_ia32_reduce_fmin_ph256:
-      intrinsicName = "vector.reduce.fmin.v16f16";
-      break;
-    case X86::BI__builtin_ia32_reduce_fmin_ph128:
-      intrinsicName = "vector.reduce.fmin.v8f16";
-      break;
-    }
-
+    StringRef intrinsicName = "vector.reduce.fmin";
     assert(!cir::MissingFeatures::fastMathFlags());
     cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType());
     return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
diff --git a/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c 
b/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c
index 9b956b80adf8c..bc4249ffd25fc 100644
--- a/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c
+++ b/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c
@@ -7,7 +7,7 @@
 double test_mm512_reduce_add_pd(__m512d __W, double ExtraAddOp){
 
   // CIR-LABEL: _mm512_reduce_add_pd
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v8f64" %[[R:.*]], 
%[[V:.*]] : (!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : 
(!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double
 
   // CIR-LABEL: test_mm512_reduce_add_pd
   // CIR: cir.call @_mm512_reduce_add_pd(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.double>) -> !cir.double
@@ -24,7 +24,7 @@ double test_mm512_reduce_add_pd(__m512d __W, double 
ExtraAddOp){
 
 double test_mm512_reduce_mul_pd(__m512d __W, double ExtraMulOp){
   // CIR-LABEL: _mm512_reduce_mul_pd
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v8f64" %[[R:.*]], 
%[[V:.*]] : (!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : 
(!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double
 
   // CIR-LABEL: test_mm512_reduce_mul_pd
   // CIR: cir.call @_mm512_reduce_mul_pd(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.double>) -> !cir.double
@@ -42,7 +42,7 @@ double test_mm512_reduce_mul_pd(__m512d __W, double 
ExtraMulOp){
 
 float test_mm512_reduce_add_ps(__m512 __W){
   // CIR-LABEL: _mm512_reduce_add_ps
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v16f32" %[[R:.*]], 
%[[V:.*]] : (!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : 
(!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float
 
   // CIR-LABEL: test_mm512_reduce_add_ps
   // CIR: cir.call @_mm512_reduce_add_ps(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.float>) -> !cir.float
@@ -57,7 +57,7 @@ float test_mm512_reduce_add_ps(__m512 __W){
 
 float test_mm512_reduce_mul_ps(__m512 __W){
   // CIR-LABEL: _mm512_reduce_mul_ps
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v16f32" %[[R:.*]], 
%[[V:.*]] : (!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : 
(!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float
 
   // CIR-LABEL: test_mm512_reduce_mul_ps
   // CIR: cir.call @_mm512_reduce_mul_ps(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.float>) -> !cir.float
diff --git a/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c 
b/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c
index be2865f9a4934..104e76fa6ad03 100644
--- a/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c
+++ b/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c
@@ -6,7 +6,7 @@
 
 double test_mm512_reduce_max_pd(__m512d __W, double ExtraAddOp){
   // CIR-LABEL: _mm512_reduce_max_pd
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v8f64" %[[V:.*]] : 
(!cir.vector<8 x !cir.double>) -> !cir.double
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] : 
(!cir.vector<8 x !cir.double>) -> !cir.double
 
   // CIR-LABEL: test_mm512_reduce_max_pd
   // CIR: cir.call @_mm512_reduce_max_pd(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.double>) -> !cir.double
@@ -23,7 +23,7 @@ double test_mm512_reduce_max_pd(__m512d __W, double 
ExtraAddOp){
 
 double test_mm512_reduce_min_pd(__m512d __W, double ExtraMulOp){
   // CIR-LABEL: _mm512_reduce_min_pd
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v8f64" %[[V:.*]] : 
(!cir.vector<8 x !cir.double>) -> !cir.double
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : 
(!cir.vector<8 x !cir.double>) -> !cir.double
 
   // CIR-LABEL: test_mm512_reduce_min_pd
   // CIR: cir.call @_mm512_reduce_min_pd(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.double>) -> !cir.double
@@ -40,7 +40,7 @@ double test_mm512_reduce_min_pd(__m512d __W, double 
ExtraMulOp){
 
 float test_mm512_reduce_max_ps(__m512 __W){
   // CIR-LABEL: _mm512_reduce_max_ps
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v16f32" %[[V:.*]] : 
(!cir.vector<16 x !cir.float>) -> !cir.float
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] : 
(!cir.vector<16 x !cir.float>) -> !cir.float
 
   // CIR-LABEL: test_mm512_reduce_max_ps
   // CIR: cir.call @_mm512_reduce_max_ps(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.float>) -> !cir.float
@@ -55,7 +55,7 @@ float test_mm512_reduce_max_ps(__m512 __W){
 
 float test_mm512_reduce_min_ps(__m512 __W){
   // CIR-LABEL: _mm512_reduce_min_ps
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v16f32" %[[V:.*]] : 
(!cir.vector<16 x !cir.float>) -> !cir.float
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : 
(!cir.vector<16 x !cir.float>) -> !cir.float
 
   // CIR-LABEL: test_mm512_reduce_min_ps
   // CIR: cir.call @_mm512_reduce_min_ps(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.float>) -> !cir.float
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
index 94a3834f09a01..464fa29fffc20 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
@@ -67,7 +67,7 @@ __m512h test_mm512_undefined_ph(void) {
 
 _Float16 test_mm512_reduce_add_ph(__m512h __W) {
   // CIR-LABEL: _mm512_reduce_add_ph
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v32f16" %[[R:.*]], 
%[[V:.*]] : (!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : 
(!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16
 
   // CIR-LABEL: test_mm512_reduce_add_ph
   // CIR: cir.call @_mm512_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<32 x 
!cir.f16>) -> !cir.f16
@@ -82,7 +82,7 @@ _Float16 test_mm512_reduce_add_ph(__m512h __W) {
 
 _Float16 test_mm512_reduce_mul_ph(__m512h __W) {
   // CIR-LABEL: _mm512_reduce_mul_ph
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v32f16" %[[R:.*]], 
%[[V:.*]] : (!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : 
(!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16
 
   // CIR-LABEL: test_mm512_reduce_mul_ph
   // CIR: cir.call @_mm512_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<32 x 
!cir.f16>) -> !cir.f16
@@ -97,7 +97,7 @@ _Float16 test_mm512_reduce_mul_ph(__m512h __W) {
 
 _Float16 test_mm512_reduce_max_ph(__m512h __W) {
   // CIR-LABEL: _mm512_reduce_max_ph
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v32f16" %[[V:.*]] 
(!cir.vector<32 x !cir.f16>) -> !cir.f16 
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] 
(!cir.vector<32 x !cir.f16>) -> !cir.f16 
 
   // CIR-LABEL: test_mm512_reduce_max_ph
   // CIR: cir.call @_mm512_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<32 x 
!cir.f16>) -> !cir.f16
@@ -112,7 +112,7 @@ _Float16 test_mm512_reduce_max_ph(__m512h __W) {
 
 _Float16 test_mm512_reduce_min_ph(__m512h __W) {
   // CIR-LABEL: _mm512_reduce_min_ph
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v32f16" %[[V:.*]] 
(!cir.vector<32 x !cir.f16>) -> !cir.f16 
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] 
(!cir.vector<32 x !cir.f16>) -> !cir.f16 
 
   // CIR-LABEL: test_mm512_reduce_min_ph
   // CIR: cir.call @_mm512_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<32 x 
!cir.f16>) -> !cir.f16
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c
index f1dfe56861eec..994fdfec23c2c 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c
@@ -9,7 +9,7 @@
 
 _Float16 test_mm256_reduce_add_ph(__m256h __W) {
   // CIR-LABEL: _mm256_reduce_add_ph
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v16f16" %[[R:.*]], 
%[[V:.*]] : (!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : 
(!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16
 
   // CIR-LABEL: test_mm256_reduce_add_ph
   // CIR: cir.call @_mm256_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.f16>) -> !cir.f16
@@ -24,7 +24,7 @@ _Float16 test_mm256_reduce_add_ph(__m256h __W) {
 
 _Float16 test_mm256_reduce_mul_ph(__m256h __W) {
   // CIR-LABEL: _mm256_reduce_mul_ph
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v16f16" %[[R:.*]], 
%[[V:.*]] : (!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : 
(!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16
 
   // CIR-LABEL: test_mm256_reduce_mul_ph
   // CIR: cir.call @_mm256_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.f16>) -> !cir.f16
@@ -39,7 +39,7 @@ _Float16 test_mm256_reduce_mul_ph(__m256h __W) {
 
 _Float16 test_mm256_reduce_max_ph(__m256h __W) {
   // CIR-LABEL: _mm256_reduce_max_ph
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v16f16" %[[V:.*]] 
(!cir.vector<16 x !cir.f16>) -> !cir.f16 
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] 
(!cir.vector<16 x !cir.f16>) -> !cir.f16 
 
   // CIR-LABEL: test_mm256_reduce_max_ph
   // CIR: cir.call @_mm256_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.f16>) -> !cir.f16
@@ -54,7 +54,7 @@ _Float16 test_mm256_reduce_max_ph(__m256h __W) {
 
 _Float16 test_mm256_reduce_min_ph(__m256h __W) {
   // CIR-LABEL: _mm256_reduce_min_ph
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v16f16" %[[V:.*]] : 
(!cir.vector<16 x !cir.f16>) -> !cir.f16
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : 
(!cir.vector<16 x !cir.f16>) -> !cir.f16
 
   // CIR-LABEL: test_mm256_reduce_min_ph
   // CIR: cir.call @_mm256_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.f16>) -> !cir.f16
@@ -69,7 +69,7 @@ _Float16 test_mm256_reduce_min_ph(__m256h __W) {
 
 _Float16 test_mm_reduce_add_ph(__m128h __W) {
   // CIR-LABEL: _mm_reduce_add_ph
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v8f16" %[[R:.*]], 
%[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : 
(!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16
 
   // CIR-LABEL: test_mm_reduce_add_ph
   // CIR: cir.call @_mm_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.f16>) -> !cir.f16
@@ -84,7 +84,7 @@ _Float16 test_mm_reduce_add_ph(__m128h __W) {
 
 _Float16 test_mm_reduce_mul_ph(__m128h __W) {
   // CIR-LABEL: _mm_reduce_mul_ph
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v8f16" %[[R:.*]], 
%[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : 
(!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16
 
   // CIR-LABEL: test_mm_reduce_mul_ph
   // CIR: cir.call @_mm_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.f16>) -> !cir.f16
@@ -99,7 +99,7 @@ _Float16 test_mm_reduce_mul_ph(__m128h __W) {
 
 _Float16 test_mm_reduce_max_ph(__m128h __W) {
   // CIR-LABEL: _mm_reduce_max_ph
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v8f16" %[[V:.*]] 
(!cir.vector<8 x !cir.f16>) -> !cir.f16 
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] 
(!cir.vector<8 x !cir.f16>) -> !cir.f16 
 
   // CIR-LABEL: test_mm_reduce_max_ph
   // CIR: cir.call @_mm_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.f16>) -> !cir.f16
@@ -114,7 +114,7 @@ _Float16 test_mm_reduce_max_ph(__m128h __W) {
 
 _Float16 test_mm_reduce_min_ph(__m128h __W) {
   // CIR-LABEL: _mm_reduce_min_ph
-  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v8f16" %[[V:.*]] : 
(!cir.vector<8 x !cir.f16>) -> !cir.f16
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : 
(!cir.vector<8 x !cir.f16>) -> !cir.f16
 
   // CIR-LABEL: test_mm_reduce_min_ph
   // CIR: cir.call @_mm_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.f16>) -> !cir.f16

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR] Implement builtin reduce fadd/fmul/fmax/fmin (PR #171633)

Reply via email to