[clang] [CIR][NEON] Add lowering for `vfmah_f16` and `vfmsh_f16` (PR #181148)

Andrzej Warzyński via cfe-commits Mon, 16 Feb 2026 11:10:42 -0800

https://github.com/banach-space updated 
https://github.com/llvm/llvm-project/pull/181148


From 7eaaf905be04a69a4e5ea880cf5a0208f4881ef7 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <[email protected]>
Date: Thu, 12 Feb 2026 14:16:03 +0000
Subject: [PATCH 1/3] [CIR][NEON] Add lowering for `vfmah_f16`

As with other NEON builtins, reuse the existing default-lowering
tests to validate the CIR lowering path.
---
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  | 21 +++++++++++++++++++
 clang/test/CodeGen/AArch64/neon/fullfp16.c    | 10 +++++++++
 .../CodeGen/AArch64/v8.2a-fp16-intrinsics.c   |  7 -------
 3 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 71cf896aede10..0d4ed51920093 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -123,6 +123,17 @@ emitAArch64CompareBuiltinExpr(CIRGenFunction &cgf, 
CIRGenBuilderTy &builder,
   return builder.createCast(loc, cir::CastKind::integral, cmp, retTy);
 }
 
+// Emit an intrinsic where all operands are of the same type as the result.
+// Depending on mode, this may be a constrained floating-point intrinsic.
+static mlir::Value
+emitCallMaybeConstrainedBuiltin(CIRGenBuilderTy &builder, mlir::Location loc,
+                                StringRef intrName, mlir::Type retTy,
+                                llvm::SmallVector<mlir::Value> &ops) {
+  assert(!cir::MissingFeatures::emitConstrainedFPCall());
+
+  return builder.emitIntrinsicCallOp(loc, intrName, retTy, ops);
+}
+
 bool CIRGenFunction::getAArch64SVEProcessedOperands(
     unsigned builtinID, const CallExpr *expr, SmallVectorImpl<mlir::Value> 
&ops,
     SVETypeFlags typeFlags) {
@@ -1508,7 +1519,17 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
   case NEON::BI__builtin_neon_vsubh_f16:
   case NEON::BI__builtin_neon_vmulh_f16:
   case NEON::BI__builtin_neon_vdivh_f16:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented AArch64 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
   case NEON::BI__builtin_neon_vfmah_f16:
+    ops.push_back(emitScalarExpr(expr->getArg(1)));
+    ops.push_back(emitScalarExpr(expr->getArg(2)));
+    ops.push_back(emitScalarExpr(expr->getArg(0)));
+    return emitCallMaybeConstrainedBuiltin(builder, loc, "fma",
+                                           convertType(expr->getType()), ops);
+    break;
   case NEON::BI__builtin_neon_vfmsh_f16:
   case NEON::BI__builtin_neon_vaddd_s64:
   case NEON::BI__builtin_neon_vaddd_u64:
diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c 
b/clang/test/CodeGen/AArch64/neon/fullfp16.c
index f3268df2f4165..3a96107a3a0f6 100644
--- a/clang/test/CodeGen/AArch64/neon/fullfp16.c
+++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c
@@ -50,3 +50,13 @@ float16_t test_vnegh_f16(float16_t a) {
 // LLVM: ret half [[NEG]]
   return vnegh_f16(a);
 }
+
+// ALL-LABEL: test_vfmah_f16
+float16_t test_vfmah_f16(float16_t a, float16_t b, float16_t c) {
+// CIR: cir.call_llvm_intrinsic "fma" {{.*}} : (!cir.f16, !cir.f16, !cir.f16) 
-> !cir.f16
+
+// LLVM-SAME: half{{.*}} [[A:%.*]], half{{.*}} [[B:%.*]], half{{.*}} [[C:%.*]])
+// LLVM:  [[FMA:%.*]] = call half @llvm.fma.f16(half [[B]], half [[C]], half 
[[A]])
+// LLVM:  ret half [[FMA]]
+  return vfmah_f16(a, b, c);
+}
diff --git a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c 
b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
index 353f02195721f..d6bfb1c607f81 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
@@ -620,13 +620,6 @@ float16_t test_vsubh_f16(float16_t a, float16_t b) {
   return vsubh_f16(a, b);
 }
 
-// CHECK-LABEL: test_vfmah_f16
-// CHECK:  [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half %c, half %a)
-// CHECK:  ret half [[FMA]]
-float16_t test_vfmah_f16(float16_t a, float16_t b, float16_t c) {
-  return vfmah_f16(a, b, c);
-}
-
 // CHECK-LABEL: test_vfmsh_f16
 // CHECK:  [[SUB:%.*]] = fneg half %b
 // CHECK:  [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half %c, half 
%a)

From 8188d17ebd6bb73370314df38773aa1cebefd1d1 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <[email protected]>
Date: Thu, 12 Feb 2026 20:01:48 +0000
Subject: [PATCH 2/3] Add vfmsh_f16

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp     |  5 +++++
 clang/test/CodeGen/AArch64/neon/fullfp16.c         | 12 ++++++++++++
 clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c |  9 ---------
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 0d4ed51920093..bf0b6ab90a165 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -1531,6 +1531,11 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
                                            convertType(expr->getType()), ops);
     break;
   case NEON::BI__builtin_neon_vfmsh_f16:
+    ops.push_back(builder.createFNeg(emitScalarExpr(expr->getArg(1))));
+    ops.push_back(emitScalarExpr(expr->getArg(2)));
+    ops.push_back(emitScalarExpr(expr->getArg(0)));
+    return emitCallMaybeConstrainedBuiltin(builder, loc, "fma",
+                                           convertType(expr->getType()), ops);
   case NEON::BI__builtin_neon_vaddd_s64:
   case NEON::BI__builtin_neon_vaddd_u64:
   case NEON::BI__builtin_neon_vsubd_s64:
diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c 
b/clang/test/CodeGen/AArch64/neon/fullfp16.c
index 3a96107a3a0f6..ab424fc08f176 100644
--- a/clang/test/CodeGen/AArch64/neon/fullfp16.c
+++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c
@@ -60,3 +60,15 @@ float16_t test_vfmah_f16(float16_t a, float16_t b, float16_t 
c) {
 // LLVM:  ret half [[FMA]]
   return vfmah_f16(a, b, c);
 }
+
+// ALL-LABEL: test_vfmsh_f16
+float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) {
+// CIR: [[SUB:%.*]] = cir.unary(minus, %{{.*}}) : !cir.f16, !cir.f16
+// CIR: cir.call_llvm_intrinsic "fma" [[SUB]], {{.*}} : (!cir.f16, !cir.f16, 
!cir.f16) -> !cir.f16
+
+// LLVM-SAME: half{{.*}} [[A:%.*]], half{{.*}} [[B:%.*]], half{{.*}} [[C:%.*]])
+// LLVM:  [[SUB:%.*]] = fneg half [[B]]
+// LLVM:  [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[C]], half 
[[A]])
+// LLVM:  ret half [[ADD]]
+  return vfmsh_f16(a, b, c);
+}
diff --git a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c 
b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
index d6bfb1c607f81..080e2351ff1e7 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
@@ -619,12 +619,3 @@ float16_t test_vrsqrtsh_f16(float16_t a, float16_t b) {
 float16_t test_vsubh_f16(float16_t a, float16_t b) {
   return vsubh_f16(a, b);
 }
-
-// CHECK-LABEL: test_vfmsh_f16
-// CHECK:  [[SUB:%.*]] = fneg half %b
-// CHECK:  [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half %c, half 
%a)
-// CHECK:  ret half [[ADD]]
-float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) {
-  return vfmsh_f16(a, b, c);
-}
-

From 7b04cb2280759c932718de7d84587cf71946cc74 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <[email protected]>
Date: Mon, 16 Feb 2026 19:09:44 +0000
Subject: [PATCH 3/3] Avoid repeating

---
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  | 48 ++++++++++++++-----
 1 file changed, 37 insertions(+), 11 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index bf0b6ab90a165..699fee5a3a358 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -1355,10 +1355,41 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
   // Find out if any arguments are required to be integer constant
   // expressions.
   assert(!cir::MissingFeatures::handleBuiltinICEArguments());
+  unsigned iceArguments = 0;
+  ASTContext::GetBuiltinTypeError error;
+  getContext().GetBuiltinType(builtinID, error, &iceArguments);
+  assert(error == ASTContext::GE_None && "Should not codegen an error");
+  llvm::SmallVector<mlir::Value> ops;
+  for (auto [idx, arg] : llvm::enumerate(expr->arguments())) {
+    if (idx == 0) {
+      switch (builtinID) {
+      case NEON::BI__builtin_neon_vld1_v:
+      case NEON::BI__builtin_neon_vld1q_v:
+      case NEON::BI__builtin_neon_vld1_dup_v:
+      case NEON::BI__builtin_neon_vld1q_dup_v:
+      case NEON::BI__builtin_neon_vld1_lane_v:
+      case NEON::BI__builtin_neon_vld1q_lane_v:
+      case NEON::BI__builtin_neon_vst1_v:
+      case NEON::BI__builtin_neon_vst1q_v:
+      case NEON::BI__builtin_neon_vst1_lane_v:
+      case NEON::BI__builtin_neon_vst1q_lane_v:
+      case NEON::BI__builtin_neon_vldap1_lane_s64:
+      case NEON::BI__builtin_neon_vldap1q_lane_s64:
+      case NEON::BI__builtin_neon_vstl1_lane_s64:
+      case NEON::BI__builtin_neon_vstl1q_lane_s64:
+        // Get the alignment for the argument in addition to the value;
+        // we'll use it later.
+        cgm.errorNYI(
+            expr->getSourceRange(),
+            std::string("unimplemented AArch64 builtin argument handling ") +
+                getContext().BuiltinInfo.getName(builtinID));
+      }
+    }
+    ops.push_back(emitScalarOrConstFoldImmArg(iceArguments, idx, arg));
+  }
 
   assert(!cir::MissingFeatures::neonSISDIntrinsics());
 
-  llvm::SmallVector<mlir::Value> ops;
   mlir::Location loc = getLoc(expr->getExprLoc());
 
   // Handle non-overloaded intrinsics first.
@@ -1366,7 +1397,6 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
   default:
     break;
   case NEON::BI__builtin_neon_vabsh_f16: {
-    ops.push_back(emitScalarExpr(expr->getArg(0)));
     return cir::FAbsOp::create(builder, loc, ops);
   }
   case NEON::BI__builtin_neon_vaddq_p128:
@@ -1408,7 +1438,6 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
                      getContext().BuiltinInfo.getName(builtinID));
     return mlir::Value{};
   case NEON::BI__builtin_neon_vceqzd_s64:
-    ops.push_back(emitScalarExpr(expr->getArg(0)));
     return emitAArch64CompareBuiltinExpr(
         *this, builder, loc, ops[0],
         convertType(expr->getCallReturnType(getContext())), 
cir::CmpOpKind::eq);
@@ -1462,11 +1491,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
                      getContext().BuiltinInfo.getName(builtinID));
     return mlir::Value{};
   case NEON::BI__builtin_neon_vnegd_s64: {
-    ops.push_back(emitScalarExpr(expr->getArg(0)));
     return builder.createNeg(ops[0]);
   }
   case NEON::BI__builtin_neon_vnegh_f16: {
-    ops.push_back(emitScalarExpr(expr->getArg(0)));
     return builder.createFNeg(ops[0]);
   }
   case NEON::BI__builtin_neon_vtstd_s64:
@@ -1524,16 +1551,15 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
                      getContext().BuiltinInfo.getName(builtinID));
     return mlir::Value{};
   case NEON::BI__builtin_neon_vfmah_f16:
-    ops.push_back(emitScalarExpr(expr->getArg(1)));
-    ops.push_back(emitScalarExpr(expr->getArg(2)));
-    ops.push_back(emitScalarExpr(expr->getArg(0)));
+    // NEON intrinsic puts accumulator first, unlike the LLVM fma.
+    std::rotate(ops.begin(), ops.begin() + 1, ops.end());
     return emitCallMaybeConstrainedBuiltin(builder, loc, "fma",
                                            convertType(expr->getType()), ops);
     break;
   case NEON::BI__builtin_neon_vfmsh_f16:
-    ops.push_back(builder.createFNeg(emitScalarExpr(expr->getArg(1))));
-    ops.push_back(emitScalarExpr(expr->getArg(2)));
-    ops.push_back(emitScalarExpr(expr->getArg(0)));
+    // NEON intrinsic puts accumulator first, unlike the LLVM fma.
+    std::rotate(ops.begin(), ops.begin() + 1, ops.end());
+    ops[0] = builder.createFNeg(ops[0]);
     return emitCallMaybeConstrainedBuiltin(builder, loc, "fma",
                                            convertType(expr->getType()), ops);
   case NEON::BI__builtin_neon_vaddd_s64:

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR][NEON] Add lowering for `vfmah_f16` and `vfmsh_f16` (PR #181148)

Reply via email to