https://github.com/banach-space updated https://github.com/llvm/llvm-project/pull/185478
From b0df1da73a8039464b79dae6da4b0e6117b00d5d Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski <[email protected]> Date: Mon, 9 Mar 2026 17:50:06 +0000 Subject: [PATCH 1/2] [CIR][AArch64] Add lowering for remaining `vabd_*` builtins Implement the missing CIR lowerings for the AdvSIMD (Neon) `vabd_*` (absolute difference) intrinsic group. Most `vabd` variants were already supported (see #183595); this patch completes the remaining cases listed in [1]. Move the corresponding tests from: * clang/test/CodeGen/AArch64/neon_intrinsics.c to: * clang/test/CodeGen/AArch64/neon/intrinsics.c The implementation mirrors the existing lowering in CodeGen/TargetBuiltins/ARM.cpp. To support this, add the `emitCommonNeonSISDBuiltinExpr` helper. Reference: [1] https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#absolute-difference --- clang/include/clang/CIR/MissingFeatures.h | 1 - .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 353 +++++++++++++++++- clang/test/CodeGen/AArch64/neon-intrinsics.c | 20 - clang/test/CodeGen/AArch64/neon/intrinsics.c | 24 +- 4 files changed, 372 insertions(+), 26 deletions(-) diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 1e3a2c9af35d1..70a3dc3bdbfa5 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -311,7 +311,6 @@ struct MissingFeatures { static bool metaDataNode() { return false; } static bool moduleNameHash() { return false; } static bool msabi() { return false; } - static bool neonSISDIntrinsics() { return false; } static bool nrvo() { return false; } static bool objCBlocks() { return false; } static bool objCGC() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index ea215985f3032..9591c0f8f67c4 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -409,6 +409,301 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0), }; +// Single-Instruction-Single-Data (SISD) intrinsics. +// +// The name is somewhat misleading: not all intrinsics in this table are +// strictly SISD. While many builtins operate on scalars, +// * some take vector operands (e.g. reduction builtins such as +// `vminvq_u16` and `vaddvq_s32`), and +// * some take both scalar and vector operands (e.g. crypto builtins +// such as `vsha1cq_u32`). +// +// TODO: Either rename this table to better reflect its contents, or +// restrict it to true SISD intrinsics only. +static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { + NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), + NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), + NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), + NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), + NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), + NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), + NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), + NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), + NEONMAP1(vaddv_s16, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddv_s32, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddv_s8, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddv_u16, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddv_u32, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddv_u8, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), + NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), + NEONMAP1(vaddvq_s16, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_s32, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_s64, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_s8, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_u16, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_u32, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_u64, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_u8, vector_reduce_add, Add1ArgType), + NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), + NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), + NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), + NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), + NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), + NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), + NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), + NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), + NEONMAP1(vcvtad_s32_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), + NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), + NEONMAP1(vcvtad_u32_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), + NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), + NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), + NEONMAP1(vcvtas_s64_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), + NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), + NEONMAP1(vcvtas_u64_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), + NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), + NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), + NEONMAP1(vcvtd_s32_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), + NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), + NEONMAP1(vcvtd_u32_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), + NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), + NEONMAP0(vcvth_bf16_f32), + NEONMAP1(vcvtmd_s32_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), + NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), + NEONMAP1(vcvtmd_u32_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), + NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), + NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), + NEONMAP1(vcvtms_s64_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), + NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), + NEONMAP1(vcvtms_u64_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), + NEONMAP1(vcvtnd_s32_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), + NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), + NEONMAP1(vcvtnd_u32_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), + NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), + NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), + NEONMAP1(vcvtns_s64_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), + NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), + NEONMAP1(vcvtns_u64_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), + NEONMAP1(vcvtpd_s32_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), + NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), + NEONMAP1(vcvtpd_u32_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), + NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), + NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), + NEONMAP1(vcvtps_s64_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), + NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), + NEONMAP1(vcvtps_u64_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), + NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), + NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), + NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), + NEONMAP1(vcvts_s64_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), + NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), + NEONMAP1(vcvts_u64_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), + NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), + NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), + NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), + NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), + NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), + NEONMAP1(vmaxv_s16, vector_reduce_smax, Add1ArgType), + NEONMAP1(vmaxv_s32, vector_reduce_smax, Add1ArgType), + NEONMAP1(vmaxv_s8, vector_reduce_smax, Add1ArgType), + NEONMAP1(vmaxv_u16, vector_reduce_umax, Add1ArgType), + NEONMAP1(vmaxv_u32, vector_reduce_umax, Add1ArgType), + NEONMAP1(vmaxv_u8, vector_reduce_umax, Add1ArgType), + NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), + NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), + NEONMAP1(vmaxvq_s16, vector_reduce_smax, Add1ArgType), + NEONMAP1(vmaxvq_s32, vector_reduce_smax, Add1ArgType), + NEONMAP1(vmaxvq_s8, vector_reduce_smax, Add1ArgType), + NEONMAP1(vmaxvq_u16, vector_reduce_umax, Add1ArgType), + NEONMAP1(vmaxvq_u32, vector_reduce_umax, Add1ArgType), + NEONMAP1(vmaxvq_u8, vector_reduce_umax, Add1ArgType), + NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), + NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), + NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), + NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), + NEONMAP1(vminv_s16, vector_reduce_smin, Add1ArgType), + NEONMAP1(vminv_s32, vector_reduce_smin, Add1ArgType), + NEONMAP1(vminv_s8, vector_reduce_smin, Add1ArgType), + NEONMAP1(vminv_u16, vector_reduce_umin, Add1ArgType), + NEONMAP1(vminv_u32, vector_reduce_umin, Add1ArgType), + NEONMAP1(vminv_u8, vector_reduce_umin, Add1ArgType), + NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), + NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), + NEONMAP1(vminvq_s16, vector_reduce_smin, Add1ArgType), + NEONMAP1(vminvq_s32, vector_reduce_smin, Add1ArgType), + NEONMAP1(vminvq_s8, vector_reduce_smin, Add1ArgType), + NEONMAP1(vminvq_u16, vector_reduce_umin, Add1ArgType), + NEONMAP1(vminvq_u32, vector_reduce_umin, Add1ArgType), + NEONMAP1(vminvq_u8, vector_reduce_umin, Add1ArgType), + NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), + NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), + NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), + NEONMAP1(vpaddd_s64, vector_reduce_add, Add1ArgType), + NEONMAP1(vpaddd_u64, vector_reduce_add, Add1ArgType), + NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), + NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), + NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), + NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), + NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), + NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), + NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), + NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), + NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), + NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), + NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), + NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), + NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), + NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), + NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), + NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), + NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), + NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), + NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), + NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), + NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), + NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), + NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), + NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), + NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), + NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), + NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), + NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), + NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), + NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), + NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), + NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), + NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), + NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), + NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), + NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), + NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), + NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), + NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), + NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), + NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), + NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), + NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), + NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), + NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), + NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), + NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), + NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), + NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), + NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), + NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), + NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), + NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), + NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), + NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), + NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), + NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), + NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), + NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), + NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), + NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), + NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), + NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), + NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), + NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), + NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), + NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), + NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), + NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), + NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), + NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), + NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), + NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), + NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), + NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), + NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), + NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), + NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), + NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), + NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), + NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), + NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), + NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), + NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), + NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), + NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), + NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), + // FP16 scalar intrinisics go here. + NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType), + NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), + NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), + NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), + NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), + NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), + NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), + NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), + NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), + NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), + NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), + NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), + NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), + NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), + NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), + NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), + NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), + NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), + NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), + NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), + NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), + NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), + NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType), + NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType), + NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType), + NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType), + NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType), +}; +// clang-format on + #define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ {#NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \ TypeModifier} @@ -422,6 +717,7 @@ static const ARMVectorIntrinsicInfo aarch64SVEIntrinsicMap[] = { }; static bool aarch64SIMDIntrinsicsProvenSorted = false; +static bool aarch64SISDIntrinsicsProvenSorted = false; static bool aarch64SVEIntrinsicsProvenSorted = false; // Check if Builtin `builtinId` is present in `intrinsicMap`. If yes, returns @@ -1055,6 +1351,56 @@ static mlir::Value emitNeonCall(CIRGenModule &cgm, CIRGenBuilderTy &builder, isConstrainedFPIntrinsic, shift, rightshift); } +static mlir::Value emitCommonNeonSISDBuiltinExpr( + CIRGenFunction &cgf, const ARMVectorIntrinsicInfo &info, + llvm::SmallVectorImpl<mlir::Value> &ops, const CallExpr *expr) { + assert(info.llvmIntrinsic && "Generic code assumes a valid intrinsic"); + + switch (info.builtinID) { + case NEON::BI__builtin_neon_vcled_s64: + case NEON::BI__builtin_neon_vcled_u64: + case NEON::BI__builtin_neon_vcles_f32: + case NEON::BI__builtin_neon_vcled_f64: + case NEON::BI__builtin_neon_vcltd_s64: + case NEON::BI__builtin_neon_vcltd_u64: + case NEON::BI__builtin_neon_vclts_f32: + case NEON::BI__builtin_neon_vcltd_f64: + case NEON::BI__builtin_neon_vcales_f32: + case NEON::BI__builtin_neon_vcaled_f64: + case NEON::BI__builtin_neon_vcalts_f32: + case NEON::BI__builtin_neon_vcaltd_f64: + cgf.cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + cgf.getContext().BuiltinInfo.getName(info.builtinID)); + break; + } + + llvm::StringRef llvmIntrName = getLLVMIntrNameNoPrefix( + static_cast<llvm::Intrinsic::ID>(info.llvmIntrinsic)); + mlir::Location loc = cgf.getLoc(expr->getExprLoc()); + + // The switch stmt is intended to help catch NYI cases and will be removed + // once the CIR implementation is complete. Avoid adding specialized + // code in cases - that should only be required for a handful of examples. + switch (info.builtinID) { + default: + cgf.cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + cgf.getContext().BuiltinInfo.getName(info.builtinID)); + break; + case NEON::BI__builtin_neon_vabdd_f64: + case NEON::BI__builtin_neon_vabds_f32: + return emitNeonCall(cgf.cgm, cgf.getBuilder(), + {cgf.convertType(expr->getArg(0)->getType())}, ops, + llvmIntrName, cgf.convertType(expr->getType()), loc); + } + + return nullptr; +} + +//===----------------------------------------------------------------------===// +// SVE helpers +//===----------------------------------------------------------------------===// std::optional<mlir::Value> CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID, const CallExpr *expr) { @@ -2218,7 +2564,11 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, emitScalarOrConstFoldImmArg(iceArguments, i, expr->getArg(i))); } - assert(!cir::MissingFeatures::neonSISDIntrinsics()); + auto theSISDMap = ArrayRef(AArch64SISDIntrinsicMap); + const ARMVectorIntrinsicInfo *builtin = findARMVectorIntrinsicInMap( + theSISDMap, builtinID, aarch64SISDIntrinsicsProvenSorted); + if (builtin) + return emitCommonNeonSISDBuiltinExpr(*this, *builtin, ops, expr); // Not all intrinsics handled by the common case work for AArch64 yet, so only // defer to common code if it's been added to our special map. @@ -2240,7 +2590,6 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, // Not all intrinsics handled by the common case work for AArch64 yet, so only // defer to common code if it's been added to our special map. - const ARMVectorIntrinsicInfo *builtin; builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, builtinID, aarch64SIMDIntrinsicsProvenSorted); diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c index 33b0b6bc55426..0d316afe0ecd7 100644 --- a/clang/test/CodeGen/AArch64/neon-intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c @@ -22417,26 +22417,6 @@ poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) { return vreinterpretq_p64_p16(a); } -// CHECK-LABEL: define dso_local float @test_vabds_f32( -// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float [[A]], float [[B]]) -// CHECK-NEXT: ret float [[VABDS_F32_I]] -// -float32_t test_vabds_f32(float32_t a, float32_t b) { - return vabds_f32(a, b); -} - -// CHECK-LABEL: define dso_local double @test_vabdd_f64( -// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double [[A]], double [[B]]) -// CHECK-NEXT: ret double [[VABDD_F64_I]] -// -float64_t test_vabdd_f64(float64_t a, float64_t b) { - return vabdd_f64(a, b); -} - // CHECK-LABEL: define dso_local <16 x i8> @test_vuqaddq_s8( // CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c index 94275599c334e..e2e6f00aee072 100644 --- a/clang/test/CodeGen/AArch64/neon/intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c @@ -676,9 +676,27 @@ float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) { return vabdq_f64(v1, v2); } -// TODO SISD variants: -// TODO @vabdd_f64(a, b); -// TODO @test_vabds_f32( +// LLVM-LABEL: @test_vabds_f32( +// CIR-LABEL: @vabds_f32( +float32_t test_vabds_f32(float32_t a, float32_t b) { +// CIR: cir.call_llvm_intrinsic "aarch64.sisd.fabd" + +// LLVM-SAME: float {{.*}} [[A:%.*]], float noundef [[B:%.*]]) +// LLVM: [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float [[A]], float [[B]]) +// LLVM-NEXT: ret float [[VABDS_F32_I]] + return vabds_f32(a, b); +} + +// LLVM-LABEL: @test_vabdd_f64( +// CIR-LABEL: @vabdd_f64( +float64_t test_vabdd_f64(float64_t a, float64_t b) { +// CIR: cir.call_llvm_intrinsic "aarch64.sisd.fabd" + +// LLVM-SAME: double {{.*}} [[A:%.*]], double noundef [[B:%.*]]) +// LLVM: [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double [[A]], double [[B]]) +// LLVM-NEXT: ret double [[VABDD_F64_I]] + return vabdd_f64(a, b); +} //===------------------------------------------------------===// // 2.1.1.6.3. Absolute difference and accumulate From 2fac7a83bc22868415abdfa1906da5c51adfb8ed Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski <[email protected]> Date: Tue, 10 Mar 2026 11:50:21 +0000 Subject: [PATCH 2/2] Minor simplification --- clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 9591c0f8f67c4..1614991ba6a09 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2564,9 +2564,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, emitScalarOrConstFoldImmArg(iceArguments, i, expr->getArg(i))); } - auto theSISDMap = ArrayRef(AArch64SISDIntrinsicMap); const ARMVectorIntrinsicInfo *builtin = findARMVectorIntrinsicInMap( - theSISDMap, builtinID, aarch64SISDIntrinsicsProvenSorted); + AArch64SISDIntrinsicMap, builtinID, aarch64SISDIntrinsicsProvenSorted); if (builtin) return emitCommonNeonSISDBuiltinExpr(*this, *builtin, ops, expr); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
