llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-codegen Author: Andrzej WarzyĆski (banach-space) <details> <summary>Changes</summary> - **Revert "[mlir] Avoid ASan ODR violation in mlir-src-sharder with LLVM dylib (#<!-- -->181045)"** - **[clang][Neon] Extract code shared by classic and CIR codegen (NFC)** --- Patch is 119.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/186448.diff 4 Files Affected: - (added) clang/include/clang/Basic/AArch64CodeGenUtils.h (+650) - (modified) clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp (+14-676) - (modified) clang/lib/CodeGen/TargetBuiltins/ARM.cpp (+2-627) - (modified) mlir/tools/mlir-src-sharder/CMakeLists.txt (+6-3) ``````````diff diff --git a/clang/include/clang/Basic/AArch64CodeGenUtils.h b/clang/include/clang/Basic/AArch64CodeGenUtils.h new file mode 100644 index 0000000000000..812914315c09f --- /dev/null +++ b/clang/include/clang/Basic/AArch64CodeGenUtils.h @@ -0,0 +1,650 @@ +//===--- AArch64CodeGenUtils.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Utilities used for generating code for AArch64 that are shared between the +/// classic and ClangIR code-gen. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_AARCH64CODEGENUTILS_H +#define LLVM_CLANG_BASIC_AARCH64CODEGENUTILS_H + +#include "clang/Basic/TargetBuiltins.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAArch64.h" + +namespace clang { +namespace aarch64 { + +//===----------------------------------------------------------------------===// +// Intrinsics maps +// +// Maps that help automate code-generation. +//===----------------------------------------------------------------------===// +enum { + AddRetType = (1 << 0), + Add1ArgType = (1 << 1), + Add2ArgTypes = (1 << 2), + + VectorizeRetType = (1 << 3), + VectorizeArgTypes = (1 << 4), + + InventFloatType = (1 << 5), + UnsignedAlts = (1 << 6), + + Use64BitVectors = (1 << 7), + Use128BitVectors = (1 << 8), + + Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, + VectorRet = AddRetType | VectorizeRetType, + VectorRetGetArgs01 = + AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, + FpCmpzModifiers = + AddRetType | VectorizeRetType | Add1ArgType | InventFloatType +}; + +struct ARMVectorIntrinsicInfo { + const char *NameHint; + unsigned BuiltinID; + unsigned LLVMIntrinsic; + unsigned AltLLVMIntrinsic; + uint64_t TypeModifier; + + bool operator<(unsigned RHSBuiltinID) const { + return BuiltinID < RHSBuiltinID; + } + bool operator<(const ARMVectorIntrinsicInfo &TE) const { + return BuiltinID < TE.BuiltinID; + } +}; + +#define NEONMAP0(NameBase) \ + {#NameBase, NEON::BI__builtin_neon_##NameBase, 0, 0, 0} + +#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ + {#NameBase, NEON::BI__builtin_neon_##NameBase, llvm::Intrinsic::LLVMIntrinsic, 0, \ + TypeModifier} + +#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ + {#NameBase, NEON::BI__builtin_neon_##NameBase, llvm::Intrinsic::LLVMIntrinsic, \ + llvm::Intrinsic::AltLLVMIntrinsic, TypeModifier} + +// clang-format off +const inline ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap [] = { + NEONMAP0(splat_lane_v), + NEONMAP0(splat_laneq_v), + NEONMAP0(splatq_lane_v), + NEONMAP0(splatq_laneq_v), + NEONMAP1(vabs_v, aarch64_neon_abs, 0), + NEONMAP1(vabsq_v, aarch64_neon_abs, 0), + NEONMAP0(vadd_v), + NEONMAP0(vaddhn_v), + NEONMAP0(vaddq_v), + NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0), + NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0), + NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0), + NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0), + NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), + NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), + NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0), + NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0), + NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0), + NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0), + NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0), + NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcage_v, aarch64_neon_facge, 0), + NEONMAP1(vcageq_v, aarch64_neon_facge, 0), + NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), + NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), + NEONMAP1(vcale_v, aarch64_neon_facge, 0), + NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), + NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), + NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), + NEONMAP0(vceqz_v), + NEONMAP0(vceqzq_v), + NEONMAP0(vcgez_v), + NEONMAP0(vcgezq_v), + NEONMAP0(vcgtz_v), + NEONMAP0(vcgtzq_v), + NEONMAP0(vclez_v), + NEONMAP0(vclezq_v), + NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), + NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), + NEONMAP0(vcltz_v), + NEONMAP0(vcltzq_v), + NEONMAP1(vclz_v, ctlz, Add1ArgType), + NEONMAP1(vclzq_v, ctlz, Add1ArgType), + NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcnt_v, ctpop, Add1ArgType), + NEONMAP1(vcntq_v, ctpop, Add1ArgType), + NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), + NEONMAP0(vcvt_f16_s16), + NEONMAP0(vcvt_f16_u16), + NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), + NEONMAP0(vcvt_f32_v), + NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0), + NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0), + NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), + NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), + NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0), + NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), + NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), + NEONMAP0(vcvtq_f16_s16), + NEONMAP0(vcvtq_f16_u16), + NEONMAP0(vcvtq_f32_v), + NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0), + NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0), + NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), + NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), + NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), + NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0), + NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), + NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), + NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), + NEONMAP1(vdot_s32, aarch64_neon_sdot, 0), + NEONMAP1(vdot_u32, aarch64_neon_udot, 0), + NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0), + NEONMAP1(vdotq_u32, aarch64_neon_udot, 0), + NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), + NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), + NEONMAP0(vext_v), + NEONMAP0(vextq_v), + NEONMAP0(vfma_v), + NEONMAP0(vfmaq_v), + NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0), + NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0), + NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0), + NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0), + NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0), + NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0), + NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0), + NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0), + NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), + NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), + NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), + NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), + NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0), + NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0), + NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0), + NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0), + NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0), + NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0), + NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0), + NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0), + NEONMAP0(vmovl_v), + NEONMAP0(vmovn_v), + NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), + NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), + NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), + NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), + NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), + NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), + NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), + NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), + NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), + NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), + NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), + NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), + NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0), + NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0), + NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), + NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0), + NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0), + NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), + NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), + NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), + NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), + NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), + NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), + NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType), + NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType), + NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0), + NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0), + NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), + NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0), + NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0), + NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), + NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), + NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), + NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), + NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), + NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), + NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), + NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), + NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), + NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), + NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), + NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), + NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0), + NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), + NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), + NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), + NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), + NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), + NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), + NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType), + NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType), + NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType), + NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType), + NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType), + NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType), + NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType), + NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType), + NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType), + NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType), + NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType), + NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType), + NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType), + NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType), + NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType), + NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType), + NEONMAP0(vrndi_v), + NEONMAP0(vrndiq_v), + NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), + NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), + NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), + NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), + NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), + NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), + NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), + NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), + NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), + NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0), + NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0), + NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0), + NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0), + NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0), + NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0), + NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0), + NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0), + NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0), + NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0), + NEONMAP0(vshl_n_v), + NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), + NEONMAP0(vshll_n_v), + NEONMAP0(vshlq_n_v), + NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), + NEONMAP0(vshr_n_v), + NEONMAP0(vshrn_n_v), + NEONMAP0(vshrq_n_v), + NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0), + NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0), + NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0), + NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0), + NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0), + NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0), + NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0), + NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0), + NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0), + NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0), + NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0), + NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0), + NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0), + NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0), + NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0), + NEONMAP0(vsubhn_v), + NEONMAP0(vtst_v), + NEONMAP0(vtstq_v), + NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0), + NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0), + NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0), + NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0), +}; + + +// Single-Instruction-Single-Data (SISD) intrinsics. +// +// The name is somewhat misleading: not all intrinsics in this table are +// strictly SISD. While many builtins operate on scalars, +// * some take vector operands (e.g. reduction builtins such as +// `vminvq_u16` and `vaddvq_s32`), and +// * some take both scalar and vector operands (e.g. crypto builtins +// such as `vsha1cq_u32`). +// +// TODO: Either rename this table to better reflect its contents, or +// restrict it to true SISD intrinsics only. +const inline ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { + NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), + NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), + NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), + NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), + NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), + NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), + NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), + NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), + NEONMAP1(vaddv_s16, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddv_s32, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddv_s8, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddv_u16, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddv_u32, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddv_u8, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), + NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), + NEONMAP1(vaddvq_s16, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_s32, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_s64, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_s8, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_u16, vector_reduce_add, Add1ArgType), + NEONMAP1(vaddvq_u32, vector_reduce_add, Add1ArgType),... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/186448 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
