https://github.com/yairbenavraham created https://github.com/llvm/llvm-project/pull/188190
This PR implements the AArch64 NEON ClangIR lowering for the vfma lane/laneq builtins and adds CIR-enabled regression tests. Covered scope: - vector lane/laneq forms - scalar lane/laneq forms - includes the vfmaq_laneq_v family called out in #185382 Validation: - clean build from scratch - post-build sanity check - focused llvm-lit validation for the touched AArch64 NEON tests Part of #185382 >From 3aa2a1dcd459df3235b33430e02d99d9f76fe00d Mon Sep 17 00:00:00 2001 From: Yair Ben Avraham <[email protected]> Date: Sun, 22 Mar 2026 04:57:07 +0200 Subject: [PATCH 1/3] [CIR] Fix generated type constraint header dependencies Add the missing MLIRCIRTypeConstraintsIncGen dependencies in the CIR dialect and lowering CMake targets so clean CIR-enabled builds generate the required headers before the lowering libraries are compiled. --- clang/include/clang/CIR/Dialect/IR/CMakeLists.txt | 2 +- clang/lib/CIR/Lowering/CMakeLists.txt | 3 +++ clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CMakeLists.txt b/clang/include/clang/CIR/Dialect/IR/CMakeLists.txt index 870f9e3f5d052..1388e5bc612f2 100644 --- a/clang/include/clang/CIR/Dialect/IR/CMakeLists.txt +++ b/clang/include/clang/CIR/Dialect/IR/CMakeLists.txt @@ -27,5 +27,5 @@ clang_tablegen(CIRLowering.inc -gen-cir-lowering set(LLVM_TARGET_DEFINITIONS CIRTypeConstraints.td) mlir_tablegen(CIRTypeConstraints.h.inc -gen-type-constraint-decls) mlir_tablegen(CIRTypeConstraints.cpp.inc -gen-type-constraint-defs) -add_public_tablegen_target(MLIRCIRTypeConstraintsIncGen) +add_mlir_generic_tablegen_target(MLIRCIRTypeConstraintsIncGen) add_dependencies(mlir-headers MLIRCIRTypeConstraintsIncGen) diff --git a/clang/lib/CIR/Lowering/CMakeLists.txt b/clang/lib/CIR/Lowering/CMakeLists.txt index 28ec3c551018c..77d28ef72d11d 100644 --- a/clang/lib/CIR/Lowering/CMakeLists.txt +++ b/clang/lib/CIR/Lowering/CMakeLists.txt @@ -9,6 +9,9 @@ add_clang_library(clangCIRLoweringCommon CIRPasses.cpp LoweringHelpers.cpp + DEPENDS + MLIRCIRTypeConstraintsIncGen + LINK_LIBS clangCIR ${dialect_libs} diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt b/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt index c7467fe40ba30..5b197ddca12c0 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt +++ b/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt @@ -13,6 +13,7 @@ add_clang_library(clangCIRLoweringDirectToLLVM MLIRCIREnumsGen MLIRCIROpsIncGen MLIRCIROpInterfacesIncGen + MLIRCIRTypeConstraintsIncGen LINK_LIBS clangCIRLoweringCommon >From edf952a72486ae41b7720f92067638ee76b09251 Mon Sep 17 00:00:00 2001 From: Yair Ben Avraham <[email protected]> Date: Mon, 23 Mar 2026 17:55:19 +0200 Subject: [PATCH 2/3] [CIR][AArch64] Lower vfma lane builtins Lower the AArch64 vfma lane and laneq builtins in CIR codegen. This adds handling for the vector and scalar vfma lane forms, including the vfmaq_laneq_v family called out in the issue, and keeps the CIR builtin structure aligned with the existing AArch64 builtin lowering pattern. The patch also includes the required formatting adjustment so the implementation matches the repository clang-format style. --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 67 +++++++++++++++---- 1 file changed, 53 insertions(+), 14 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 5d7b8d839fa84..26560b2ab3447 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -801,11 +801,10 @@ static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags, cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: BFloat16")); [[fallthrough]]; case NeonTypeFlags::Float16: - if (hasLegalHalfType) + if (!hasLegalHalfType) cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16")); - else - cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16")); - [[fallthrough]]; + return cir::VectorType::get(cgf->getCIRGenModule().fP16Ty, + v1Ty ? 1 : (4 << isQuad)); case NeonTypeFlags::Int32: return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt32Ty : cgf->sInt32Ty, @@ -2848,6 +2847,23 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, return mlir::Value{}; } + switch (builtinID) { + case NEON::BI__builtin_neon_vfmah_lane_f16: + case NEON::BI__builtin_neon_vfmas_lane_f32: + case NEON::BI__builtin_neon_vfmah_laneq_f16: + case NEON::BI__builtin_neon_vfmas_laneq_f32: + case NEON::BI__builtin_neon_vfmad_lane_f64: + case NEON::BI__builtin_neon_vfmad_laneq_f64: { + mlir::Value lane = cir::VecExtractOp::create(builder, loc, ops[2], ops[3]); + mlir::Type scalarTy = convertType(expr->getType()); + llvm::SmallVector<mlir::Value> fmaOps = {ops[1], lane, ops[0]}; + return emitCallMaybeConstrainedBuiltin(builder, loc, "fma", scalarTy, + fmaOps); + } + default: + break; + } + cir::VectorType ty = getNeonType(this, type, loc); if (!ty) return nullptr; @@ -2859,16 +2875,6 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, return std::nullopt; case NEON::BI__builtin_neon_vbsl_v: case NEON::BI__builtin_neon_vbslq_v: - case NEON::BI__builtin_neon_vfma_lane_v: - case NEON::BI__builtin_neon_vfmaq_lane_v: - case NEON::BI__builtin_neon_vfma_laneq_v: - case NEON::BI__builtin_neon_vfmaq_laneq_v: - case NEON::BI__builtin_neon_vfmah_lane_f16: - case NEON::BI__builtin_neon_vfmas_lane_f32: - case NEON::BI__builtin_neon_vfmah_laneq_f16: - case NEON::BI__builtin_neon_vfmas_laneq_f32: - case NEON::BI__builtin_neon_vfmad_lane_f64: - case NEON::BI__builtin_neon_vfmad_laneq_f64: case NEON::BI__builtin_neon_vmull_v: case NEON::BI__builtin_neon_vmax_v: case NEON::BI__builtin_neon_vmaxq_v: @@ -2886,6 +2892,39 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, if (cir::isFPOrVectorOfFPType(ty)) intrName = "aarch64.neon.fabd"; return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc); + case NEON::BI__builtin_neon_vfma_lane_v: + case NEON::BI__builtin_neon_vfmaq_lane_v: + case NEON::BI__builtin_neon_vfma_laneq_v: + case NEON::BI__builtin_neon_vfmaq_laneq_v: { + mlir::Value addend = ops[0]; + mlir::Value multiplicand = ops[1]; + mlir::Value laneSource = ops[2]; + auto vecTy = mlir::cast<cir::VectorType>(ty); + auto elemTy = vecTy.getElementType(); + auto numElts = vecTy.getSize(); + + if (addend.getType() != ty) + addend = builder.createBitcast(loc, addend, ty); + if (multiplicand.getType() != ty) + multiplicand = builder.createBitcast(loc, multiplicand, ty); + + cir::VectorType sourceTy = ty; + if (builtinID == NEON::BI__builtin_neon_vfmaq_lane_v) + sourceTy = cir::VectorType::get(elemTy, numElts / 2); + else if (builtinID == NEON::BI__builtin_neon_vfma_laneq_v) + sourceTy = cir::VectorType::get(elemTy, numElts * 2); + + if (laneSource.getType() != sourceTy) + laneSource = builder.createBitcast(loc, laneSource, sourceTy); + + int64_t lane = + expr->getArg(3)->EvaluateKnownConstInt(getContext()).getSExtValue(); + llvm::SmallVector<int64_t> mask(numElts, lane); + mlir::Value splat = builder.createVecShuffle(loc, laneSource, mask); + + llvm::SmallVector<mlir::Value> fmaOps = {multiplicand, splat, addend}; + return emitCallMaybeConstrainedBuiltin(builder, loc, "fma", ty, fmaOps); + } case NEON::BI__builtin_neon_vpadal_v: case NEON::BI__builtin_neon_vpadalq_v: case NEON::BI__builtin_neon_vpmin_v: >From a7e2f678ea07679d73974bf3aa200f3248a7c1db Mon Sep 17 00:00:00 2001 From: Yair Ben Avraham <[email protected]> Date: Mon, 23 Mar 2026 17:55:19 +0200 Subject: [PATCH 3/3] [CIR][AArch64] Add vfma lane tests Add focused AArch64 NEON tests for the vfma lane and laneq builtins. The tests cover the vector and scalar forms used by this patch series and are placed under clang/test/CodeGen/AArch64/neon for CIR-enabled validation. --- clang/test/CodeGen/AArch64/neon/vfma-lane.c | 136 ++++++++++++++++++ .../CodeGen/AArch64/neon/vfma-scalar-lane.c | 77 ++++++++++ 2 files changed, 213 insertions(+) create mode 100644 clang/test/CodeGen/AArch64/neon/vfma-lane.c create mode 100644 clang/test/CodeGen/AArch64/neon/vfma-scalar-lane.c diff --git a/clang/test/CodeGen/AArch64/neon/vfma-lane.c b/clang/test/CodeGen/AArch64/neon/vfma-lane.c new file mode 100644 index 0000000000000..955ab411793b9 --- /dev/null +++ b/clang/test/CodeGen/AArch64/neon/vfma-lane.c @@ -0,0 +1,136 @@ +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ +// RUN: -target-feature +fullfp16 -disable-O0-optnone \ +// RUN: -flax-vector-conversions=none -emit-llvm -o - %s | \ +// RUN: opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefix=LLVM +// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu \ +// RUN: -target-feature +neon -target-feature +fullfp16 \ +// RUN: -disable-O0-optnone -flax-vector-conversions=none \ +// RUN: -fclangir -emit-llvm -o - %s | \ +// RUN: opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefix=LLVM %} +// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu \ +// RUN: -target-feature +neon -target-feature +fullfp16 \ +// RUN: -disable-O0-optnone -flax-vector-conversions=none \ +// RUN: -fclangir -emit-cir -o - %s | FileCheck %s --check-prefix=CIR %} + +#include <arm_neon.h> + +// LLVM-LABEL: @test_vfma_lane_f16( +// LLVM: shufflevector <4 x half> +// LLVM: call <4 x half> @llvm.fma.v4f16( +// CIR-LABEL: @test_vfma_lane_f16( +// CIR: cir.vec.shuffle +// CIR: cir.call_llvm_intrinsic "fma" +float16x4_t test_vfma_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) { + return vfma_lane_f16(a, b, c, 3); +} + +// LLVM-LABEL: @test_vfmaq_lane_f16( +// LLVM: shufflevector <4 x half> +// LLVM: call <8 x half> @llvm.fma.v8f16( +// CIR-LABEL: @test_vfmaq_lane_f16( +// CIR: cir.vec.shuffle +// CIR: cir.call_llvm_intrinsic "fma" +float16x8_t test_vfmaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) { + return vfmaq_lane_f16(a, b, c, 3); +} + +// LLVM-LABEL: @test_vfma_laneq_f16( +// LLVM: shufflevector <8 x half> +// LLVM: call <4 x half> @llvm.fma.v4f16( +// CIR-LABEL: @test_vfma_laneq_f16( +// CIR: cir.vec.shuffle +// CIR: cir.call_llvm_intrinsic "fma" +float16x4_t test_vfma_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) { + return vfma_laneq_f16(a, b, c, 7); +} + +// LLVM-LABEL: @test_vfmaq_laneq_f16( +// LLVM: shufflevector <8 x half> +// LLVM: call <8 x half> @llvm.fma.v8f16( +// CIR-LABEL: @test_vfmaq_laneq_f16( +// CIR: cir.vec.shuffle +// CIR: cir.call_llvm_intrinsic "fma" +float16x8_t test_vfmaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) { + return vfmaq_laneq_f16(a, b, c, 7); +} + +// LLVM-LABEL: @test_vfma_lane_f32( +// LLVM: shufflevector <2 x float> +// LLVM: call <2 x float> @llvm.fma.v2f32( +// CIR-LABEL: @test_vfma_lane_f32( +// CIR: cir.vec.shuffle +// CIR: cir.call_llvm_intrinsic "fma" +float32x2_t test_vfma_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { + return vfma_lane_f32(a, b, v, 1); +} + +// LLVM-LABEL: @test_vfmaq_lane_f32( +// LLVM: shufflevector <2 x float> +// LLVM: call <4 x float> @llvm.fma.v4f32( +// CIR-LABEL: @test_vfmaq_lane_f32( +// CIR: cir.vec.shuffle +// CIR: cir.call_llvm_intrinsic "fma" +float32x4_t test_vfmaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { + return vfmaq_lane_f32(a, b, v, 1); +} + +// LLVM-LABEL: @test_vfma_laneq_f32( +// LLVM: shufflevector <4 x float> +// LLVM: call <2 x float> @llvm.fma.v2f32( +// CIR-LABEL: @test_vfma_laneq_f32( +// CIR: cir.vec.shuffle +// CIR: cir.call_llvm_intrinsic "fma" +float32x2_t test_vfma_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { + return vfma_laneq_f32(a, b, v, 3); +} + +// LLVM-LABEL: @test_vfmaq_laneq_f32( +// LLVM: shufflevector <4 x float> +// LLVM: call <4 x float> @llvm.fma.v4f32( +// CIR-LABEL: @test_vfmaq_laneq_f32( +// CIR: cir.vec.shuffle +// CIR: cir.call_llvm_intrinsic "fma" +float32x4_t test_vfmaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { + return vfmaq_laneq_f32(a, b, v, 3); +} + +// LLVM-LABEL: @test_vfma_lane_f64( +// LLVM: shufflevector <1 x double> +// LLVM: call <1 x double> @llvm.fma.v1f64( +// CIR-LABEL: @test_vfma_lane_f64( +// CIR: cir.vec.shuffle +// CIR: cir.call_llvm_intrinsic "fma" +float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) { + return vfma_lane_f64(a, b, v, 0); +} + +// LLVM-LABEL: @test_vfmaq_lane_f64( +// LLVM: shufflevector <1 x double> +// LLVM: call <2 x double> @llvm.fma.v2f64( +// CIR-LABEL: @test_vfmaq_lane_f64( +// CIR: cir.vec.shuffle +// CIR: cir.call_llvm_intrinsic "fma" +float64x2_t test_vfmaq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) { + return vfmaq_lane_f64(a, b, v, 0); +} + +// LLVM-LABEL: @test_vfma_laneq_f64( +// LLVM: @llvm.fma +// CIR-LABEL: @test_vfma_laneq_f64( +// CIR: cir.vec.shuffle +// CIR: cir.call_llvm_intrinsic "fma" +float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) { + return vfma_laneq_f64(a, b, v, 0); +} + +// LLVM-LABEL: @test_vfmaq_laneq_f64( +// LLVM: shufflevector <2 x double> +// LLVM: call <2 x double> @llvm.fma.v2f64( +// CIR-LABEL: @test_vfmaq_laneq_f64( +// CIR: cir.vec.shuffle +// CIR: cir.call_llvm_intrinsic "fma" +float64x2_t test_vfmaq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) { + return vfmaq_laneq_f64(a, b, v, 1); +} diff --git a/clang/test/CodeGen/AArch64/neon/vfma-scalar-lane.c b/clang/test/CodeGen/AArch64/neon/vfma-scalar-lane.c new file mode 100644 index 0000000000000..53fc9761e01a0 --- /dev/null +++ b/clang/test/CodeGen/AArch64/neon/vfma-scalar-lane.c @@ -0,0 +1,77 @@ +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ +// RUN: -target-feature +fullfp16 -disable-O0-optnone \ +// RUN: -flax-vector-conversions=none -emit-llvm -o - %s | \ +// RUN: opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefix=LLVM +// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu \ +// RUN: -target-feature +neon -target-feature +fullfp16 \ +// RUN: -disable-O0-optnone -flax-vector-conversions=none \ +// RUN: -fclangir -emit-llvm -o - %s | \ +// RUN: opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefix=LLVM %} +// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu \ +// RUN: -target-feature +neon -target-feature +fullfp16 \ +// RUN: -disable-O0-optnone -flax-vector-conversions=none \ +// RUN: -fclangir -emit-cir -o - %s | FileCheck %s --check-prefix=CIR %} + +#include <arm_neon.h> + +// LLVM-LABEL: @test_vfmah_lane_f16( +// LLVM: extractelement <4 x half> +// LLVM: call half @llvm.fma.f16( +// CIR-LABEL: @test_vfmah_lane_f16( +// CIR: cir.vec.extract +// CIR: cir.call_llvm_intrinsic "fma" +float16_t test_vfmah_lane_f16(float16_t a, float16_t b, float16x4_t c) { + return vfmah_lane_f16(a, b, c, 3); +} + +// LLVM-LABEL: @test_vfmah_laneq_f16( +// LLVM: extractelement <8 x half> +// LLVM: call half @llvm.fma.f16( +// CIR-LABEL: @test_vfmah_laneq_f16( +// CIR: cir.vec.extract +// CIR: cir.call_llvm_intrinsic "fma" +float16_t test_vfmah_laneq_f16(float16_t a, float16_t b, float16x8_t c) { + return vfmah_laneq_f16(a, b, c, 7); +} + +// LLVM-LABEL: @test_vfmas_lane_f32( +// LLVM: extractelement <2 x float> +// LLVM: call float @llvm.fma.f32( +// CIR-LABEL: @test_vfmas_lane_f32( +// CIR: cir.vec.extract +// CIR: cir.call_llvm_intrinsic "fma" +float32_t test_vfmas_lane_f32(float32_t a, float32_t b, float32x2_t c) { + return vfmas_lane_f32(a, b, c, 1); +} + +// LLVM-LABEL: @test_vfmas_laneq_f32( +// LLVM: extractelement <4 x float> +// LLVM: call float @llvm.fma.f32( +// CIR-LABEL: @test_vfmas_laneq_f32( +// CIR: cir.vec.extract +// CIR: cir.call_llvm_intrinsic "fma" +float32_t test_vfmas_laneq_f32(float32_t a, float32_t b, float32x4_t c) { + return vfmas_laneq_f32(a, b, c, 3); +} + +// LLVM-LABEL: @test_vfmad_lane_f64( +// LLVM: extractelement <1 x double> +// LLVM: call double @llvm.fma.f64( +// CIR-LABEL: @test_vfmad_lane_f64( +// CIR: cir.vec.extract +// CIR: cir.call_llvm_intrinsic "fma" +float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) { + return vfmad_lane_f64(a, b, c, 0); +} + +// LLVM-LABEL: @test_vfmad_laneq_f64( +// LLVM: extractelement <2 x double> +// LLVM: call double @llvm.fma.f64( +// CIR-LABEL: @test_vfmad_laneq_f64( +// CIR: cir.vec.extract +// CIR: cir.call_llvm_intrinsic "fma" +float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) { + return vfmad_laneq_f64(a, b, c, 1); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
