https://github.com/banach-space updated https://github.com/llvm/llvm-project/pull/181148
From 7eaaf905be04a69a4e5ea880cf5a0208f4881ef7 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski <[email protected]> Date: Thu, 12 Feb 2026 14:16:03 +0000 Subject: [PATCH 1/3] [CIR][NEON] Add lowering for `vfmah_f16` As with other NEON builtins, reuse the existing default-lowering tests to validate the CIR lowering path. --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 21 +++++++++++++++++++ clang/test/CodeGen/AArch64/neon/fullfp16.c | 10 +++++++++ .../CodeGen/AArch64/v8.2a-fp16-intrinsics.c | 7 ------- 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 71cf896aede10..0d4ed51920093 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -123,6 +123,17 @@ emitAArch64CompareBuiltinExpr(CIRGenFunction &cgf, CIRGenBuilderTy &builder, return builder.createCast(loc, cir::CastKind::integral, cmp, retTy); } +// Emit an intrinsic where all operands are of the same type as the result. +// Depending on mode, this may be a constrained floating-point intrinsic. +static mlir::Value +emitCallMaybeConstrainedBuiltin(CIRGenBuilderTy &builder, mlir::Location loc, + StringRef intrName, mlir::Type retTy, + llvm::SmallVector<mlir::Value> &ops) { + assert(!cir::MissingFeatures::emitConstrainedFPCall()); + + return builder.emitIntrinsicCallOp(loc, intrName, retTy, ops); +} + bool CIRGenFunction::getAArch64SVEProcessedOperands( unsigned builtinID, const CallExpr *expr, SmallVectorImpl<mlir::Value> &ops, SVETypeFlags typeFlags) { @@ -1508,7 +1519,17 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vsubh_f16: case NEON::BI__builtin_neon_vmulh_f16: case NEON::BI__builtin_neon_vdivh_f16: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return mlir::Value{}; case NEON::BI__builtin_neon_vfmah_f16: + ops.push_back(emitScalarExpr(expr->getArg(1))); + ops.push_back(emitScalarExpr(expr->getArg(2))); + ops.push_back(emitScalarExpr(expr->getArg(0))); + return emitCallMaybeConstrainedBuiltin(builder, loc, "fma", + convertType(expr->getType()), ops); + break; case NEON::BI__builtin_neon_vfmsh_f16: case NEON::BI__builtin_neon_vaddd_s64: case NEON::BI__builtin_neon_vaddd_u64: diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c b/clang/test/CodeGen/AArch64/neon/fullfp16.c index f3268df2f4165..3a96107a3a0f6 100644 --- a/clang/test/CodeGen/AArch64/neon/fullfp16.c +++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c @@ -50,3 +50,13 @@ float16_t test_vnegh_f16(float16_t a) { // LLVM: ret half [[NEG]] return vnegh_f16(a); } + +// ALL-LABEL: test_vfmah_f16 +float16_t test_vfmah_f16(float16_t a, float16_t b, float16_t c) { +// CIR: cir.call_llvm_intrinsic "fma" {{.*}} : (!cir.f16, !cir.f16, !cir.f16) -> !cir.f16 + +// LLVM-SAME: half{{.*}} [[A:%.*]], half{{.*}} [[B:%.*]], half{{.*}} [[C:%.*]]) +// LLVM: [[FMA:%.*]] = call half @llvm.fma.f16(half [[B]], half [[C]], half [[A]]) +// LLVM: ret half [[FMA]] + return vfmah_f16(a, b, c); +} diff --git a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c index 353f02195721f..d6bfb1c607f81 100644 --- a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c +++ b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c @@ -620,13 +620,6 @@ float16_t test_vsubh_f16(float16_t a, float16_t b) { return vsubh_f16(a, b); } -// CHECK-LABEL: test_vfmah_f16 -// CHECK: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half %c, half %a) -// CHECK: ret half [[FMA]] -float16_t test_vfmah_f16(float16_t a, float16_t b, float16_t c) { - return vfmah_f16(a, b, c); -} - // CHECK-LABEL: test_vfmsh_f16 // CHECK: [[SUB:%.*]] = fneg half %b // CHECK: [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half %c, half %a) From 8188d17ebd6bb73370314df38773aa1cebefd1d1 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski <[email protected]> Date: Thu, 12 Feb 2026 20:01:48 +0000 Subject: [PATCH 2/3] Add vfmsh_f16 --- clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 5 +++++ clang/test/CodeGen/AArch64/neon/fullfp16.c | 12 ++++++++++++ clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c | 9 --------- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 0d4ed51920093..bf0b6ab90a165 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -1531,6 +1531,11 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, convertType(expr->getType()), ops); break; case NEON::BI__builtin_neon_vfmsh_f16: + ops.push_back(builder.createFNeg(emitScalarExpr(expr->getArg(1)))); + ops.push_back(emitScalarExpr(expr->getArg(2))); + ops.push_back(emitScalarExpr(expr->getArg(0))); + return emitCallMaybeConstrainedBuiltin(builder, loc, "fma", + convertType(expr->getType()), ops); case NEON::BI__builtin_neon_vaddd_s64: case NEON::BI__builtin_neon_vaddd_u64: case NEON::BI__builtin_neon_vsubd_s64: diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c b/clang/test/CodeGen/AArch64/neon/fullfp16.c index 3a96107a3a0f6..ab424fc08f176 100644 --- a/clang/test/CodeGen/AArch64/neon/fullfp16.c +++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c @@ -60,3 +60,15 @@ float16_t test_vfmah_f16(float16_t a, float16_t b, float16_t c) { // LLVM: ret half [[FMA]] return vfmah_f16(a, b, c); } + +// ALL-LABEL: test_vfmsh_f16 +float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) { +// CIR: [[SUB:%.*]] = cir.unary(minus, %{{.*}}) : !cir.f16, !cir.f16 +// CIR: cir.call_llvm_intrinsic "fma" [[SUB]], {{.*}} : (!cir.f16, !cir.f16, !cir.f16) -> !cir.f16 + +// LLVM-SAME: half{{.*}} [[A:%.*]], half{{.*}} [[B:%.*]], half{{.*}} [[C:%.*]]) +// LLVM: [[SUB:%.*]] = fneg half [[B]] +// LLVM: [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[C]], half [[A]]) +// LLVM: ret half [[ADD]] + return vfmsh_f16(a, b, c); +} diff --git a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c index d6bfb1c607f81..080e2351ff1e7 100644 --- a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c +++ b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c @@ -619,12 +619,3 @@ float16_t test_vrsqrtsh_f16(float16_t a, float16_t b) { float16_t test_vsubh_f16(float16_t a, float16_t b) { return vsubh_f16(a, b); } - -// CHECK-LABEL: test_vfmsh_f16 -// CHECK: [[SUB:%.*]] = fneg half %b -// CHECK: [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half %c, half %a) -// CHECK: ret half [[ADD]] -float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) { - return vfmsh_f16(a, b, c); -} - From 7b04cb2280759c932718de7d84587cf71946cc74 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski <[email protected]> Date: Mon, 16 Feb 2026 19:09:44 +0000 Subject: [PATCH 3/3] Avoid repeating --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 48 ++++++++++++++----- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index bf0b6ab90a165..699fee5a3a358 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -1355,10 +1355,41 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, // Find out if any arguments are required to be integer constant // expressions. assert(!cir::MissingFeatures::handleBuiltinICEArguments()); + unsigned iceArguments = 0; + ASTContext::GetBuiltinTypeError error; + getContext().GetBuiltinType(builtinID, error, &iceArguments); + assert(error == ASTContext::GE_None && "Should not codegen an error"); + llvm::SmallVector<mlir::Value> ops; + for (auto [idx, arg] : llvm::enumerate(expr->arguments())) { + if (idx == 0) { + switch (builtinID) { + case NEON::BI__builtin_neon_vld1_v: + case NEON::BI__builtin_neon_vld1q_v: + case NEON::BI__builtin_neon_vld1_dup_v: + case NEON::BI__builtin_neon_vld1q_dup_v: + case NEON::BI__builtin_neon_vld1_lane_v: + case NEON::BI__builtin_neon_vld1q_lane_v: + case NEON::BI__builtin_neon_vst1_v: + case NEON::BI__builtin_neon_vst1q_v: + case NEON::BI__builtin_neon_vst1_lane_v: + case NEON::BI__builtin_neon_vst1q_lane_v: + case NEON::BI__builtin_neon_vldap1_lane_s64: + case NEON::BI__builtin_neon_vldap1q_lane_s64: + case NEON::BI__builtin_neon_vstl1_lane_s64: + case NEON::BI__builtin_neon_vstl1q_lane_s64: + // Get the alignment for the argument in addition to the value; + // we'll use it later. + cgm.errorNYI( + expr->getSourceRange(), + std::string("unimplemented AArch64 builtin argument handling ") + + getContext().BuiltinInfo.getName(builtinID)); + } + } + ops.push_back(emitScalarOrConstFoldImmArg(iceArguments, idx, arg)); + } assert(!cir::MissingFeatures::neonSISDIntrinsics()); - llvm::SmallVector<mlir::Value> ops; mlir::Location loc = getLoc(expr->getExprLoc()); // Handle non-overloaded intrinsics first. @@ -1366,7 +1397,6 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, default: break; case NEON::BI__builtin_neon_vabsh_f16: { - ops.push_back(emitScalarExpr(expr->getArg(0))); return cir::FAbsOp::create(builder, loc, ops); } case NEON::BI__builtin_neon_vaddq_p128: @@ -1408,7 +1438,6 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, getContext().BuiltinInfo.getName(builtinID)); return mlir::Value{}; case NEON::BI__builtin_neon_vceqzd_s64: - ops.push_back(emitScalarExpr(expr->getArg(0))); return emitAArch64CompareBuiltinExpr( *this, builder, loc, ops[0], convertType(expr->getCallReturnType(getContext())), cir::CmpOpKind::eq); @@ -1462,11 +1491,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, getContext().BuiltinInfo.getName(builtinID)); return mlir::Value{}; case NEON::BI__builtin_neon_vnegd_s64: { - ops.push_back(emitScalarExpr(expr->getArg(0))); return builder.createNeg(ops[0]); } case NEON::BI__builtin_neon_vnegh_f16: { - ops.push_back(emitScalarExpr(expr->getArg(0))); return builder.createFNeg(ops[0]); } case NEON::BI__builtin_neon_vtstd_s64: @@ -1524,16 +1551,15 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, getContext().BuiltinInfo.getName(builtinID)); return mlir::Value{}; case NEON::BI__builtin_neon_vfmah_f16: - ops.push_back(emitScalarExpr(expr->getArg(1))); - ops.push_back(emitScalarExpr(expr->getArg(2))); - ops.push_back(emitScalarExpr(expr->getArg(0))); + // NEON intrinsic puts accumulator first, unlike the LLVM fma. + std::rotate(ops.begin(), ops.begin() + 1, ops.end()); return emitCallMaybeConstrainedBuiltin(builder, loc, "fma", convertType(expr->getType()), ops); break; case NEON::BI__builtin_neon_vfmsh_f16: - ops.push_back(builder.createFNeg(emitScalarExpr(expr->getArg(1)))); - ops.push_back(emitScalarExpr(expr->getArg(2))); - ops.push_back(emitScalarExpr(expr->getArg(0))); + // NEON intrinsic puts accumulator first, unlike the LLVM fma. + std::rotate(ops.begin(), ops.begin() + 1, ops.end()); + ops[0] = builder.createFNeg(ops[0]); return emitCallMaybeConstrainedBuiltin(builder, loc, "fma", convertType(expr->getType()), ops); case NEON::BI__builtin_neon_vaddd_s64: _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
