https://github.com/MartinWehking updated https://github.com/llvm/llvm-project/pull/186807
>From 59676e350f96f414271588c300d40f099ca9ca11 Mon Sep 17 00:00:00 2001 From: Martin Wehking <[email protected]> Date: Mon, 16 Mar 2026 11:01:10 +0000 Subject: [PATCH 1/2] [AArch64] Add 9.7 data processing intrinsics Add Clang/LLVM intrinsics for svcvt, scvtflt, ucvtf, ucvtflt and fcvtzsn, fcvtzun. The Clang intrinsics are guarded by the sve2.3 and sme2.3 feature flags. ACLE Patch: https://github.com/ARM-software/acle/pull/428 Fix overload and address comments Fix intrinsic name and simplify CHECK lines Reintroduce overloaded short forms for intrinsics Adapt the test cases accordingly. Rename ACLE clang intrinsic A clang intrinsic was renamed in the ACLE patch. Change the name accordingly. Use existing pattern template Apply suggestions Apply suggestions --- clang/include/clang/Basic/arm_sve.td | 22 ++ clang/include/clang/Basic/arm_sve_sme_incl.td | 1 + clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 3 +- .../acle_sve2_fp_int_cvtn_x2.c | 113 ++++++++++ .../sve2p3-intrinsics/acle_sve2_int_fp_cvt.c | 197 ++++++++++++++++++ clang/utils/TableGen/SveEmitter.cpp | 4 + llvm/include/llvm/IR/IntrinsicsAArch64.td | 30 +++ .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 12 +- llvm/lib/Target/AArch64/SMEInstrFormats.td | 2 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 16 +- .../AArch64/sve2p3-intrinsics-fp-converts.ll | 120 +++++++++++ .../sve2p3-intrinsics-fp-converts_x2.ll | 108 ++++++++++ 12 files changed, 615 insertions(+), 13 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c create mode 100644 clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c create mode 100644 llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll create mode 100644 llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index ac9f9af30fce7..4e67d46ae8ce9 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1001,6 +1001,28 @@ def SVCVTLT_Z_F32_F16 : SInst<"svcvtlt_f32[_f16]", "dPh", "f", MergeZeroExp, "a def SVCVTLT_Z_F64_F32 : SInst<"svcvtlt_f64[_f32]", "dPh", "d", MergeZeroExp, "aarch64_sve_fcvtlt_f64f32", [IsOverloadNone, VerifyRuntimeMode]>; } + +let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in { +def SVCVTZN_S : SInst<"svcvtzn_{0}[_{1}_x2]", "y2.d", "hfd", MergeNone, "aarch64_sve_fcvtzsn_x2", [IsReductionQV, VerifyRuntimeMode]>; +def SVCVTZN_U : SInst<"svcvtzn_{0}[_{1}_x2]", "e2.d", "hfd", MergeNone, "aarch64_sve_fcvtzun_x2", [IsReductionQV, VerifyRuntimeMode]>; + +def SVCVTT_F16_S8 : SInst<"svcvtt_f16[_s8]", "Od", "c", MergeNone, "aarch64_sve_scvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTT_F32_S16 : SInst<"svcvtt_f32[_s16]", "Md", "s", MergeNone, "aarch64_sve_scvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTT_F64_S32 : SInst<"svcvtt_f64[_s32]", "Nd", "i", MergeNone, "aarch64_sve_scvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; + +def SVCVTT_F16_U8 : SInst<"svcvtt_f16[_u8]", "Od", "Uc", MergeNone, "aarch64_sve_ucvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTT_F32_U16 : SInst<"svcvtt_f32[_u16]", "Md", "Us", MergeNone, "aarch64_sve_ucvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTT_F64_U32 : SInst<"svcvtt_f64[_u32]", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; + +def SVCVTB_F16_S8 : SInst<"svcvtb_f16[_s8]", "Od", "c", MergeNone, "aarch64_sve_scvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTB_F32_S16 : SInst<"svcvtb_f32[_s16]", "Md", "s", MergeNone, "aarch64_sve_scvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTB_F64_S32 : SInst<"svcvtb_f64[_s32]", "Nd", "i", MergeNone, "aarch64_sve_scvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; + +def SVCVTB_F16_U8 : SInst<"svcvtb_f16[_u8]", "Od", "Uc", MergeNone, "aarch64_sve_ucvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTB_F32_U16 : SInst<"svcvtb_f32[_u16]", "Md", "Us", MergeNone, "aarch64_sve_ucvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTB_F64_U32 : SInst<"svcvtb_f64[_u32]", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; +} + //////////////////////////////////////////////////////////////////////////////// // Permutations and selection diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td index 7e60e87b12a4d..acc5159608b06 100644 --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -78,6 +78,7 @@ include "arm_immcheck_incl.td" // R: scalar of 1/2 width element type (splat to vector type) // r: scalar of 1/4 width element type (splat to vector type) // @: unsigned scalar of 1/4 width element type (splat to vector type) +// y: 1/2 width signed elements, 2x element count // e: 1/2 width unsigned elements, 2x element count // b: 1/4 width unsigned elements, 4x element count // h: 1/2 width elements, 2x element count diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index f8990ced2a577..8d5731c473bb1 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -3942,8 +3942,7 @@ CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags, if (TypeFlags.isOverloadFirstandLast()) return {Ops[0]->getType(), Ops.back()->getType()}; - if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() && - ResultType->isVectorTy()) + if (TypeFlags.isReductionQV()) return {ResultType, Ops[1]->getType()}; assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads"); diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c new file mode 100644 index 0000000000000..ded4f3a02d2a0 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c @@ -0,0 +1,113 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3\ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3\ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR +#endif + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// CHECK-LABEL: @test_svcvtzn_s8_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.x2.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z22test_svcvtzn_s8_f16_x213svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.x2.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svint8_t test_svcvtzn_s8_f16_x2(svfloat16x2_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtzn_s8,_f16_x2)(zn); +} + +// CHECK-LABEL: @test_svcvtzn_s16_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.x2.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z23test_svcvtzn_s16_f32_x213svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.x2.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +svint16_t test_svcvtzn_s16_f32_x2(svfloat32x2_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtzn_s16,_f32_x2)(zn); +} + +// CHECK-LABEL: @test_svcvtzn_s32_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.x2.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z23test_svcvtzn_s32_f64_x213svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.x2.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] +// +svint32_t test_svcvtzn_s32_f64_x2(svfloat64x2_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtzn_s32,_f64_x2)(zn); +} + +// CHECK-LABEL: @test_svcvtzn_u8_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.x2.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z22test_svcvtzn_u8_f16_x213svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.x2.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svuint8_t test_svcvtzn_u8_f16_x2(svfloat16x2_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtzn_u8,_f16_x2)(zn); +} + +// CHECK-LABEL: @test_svcvtzn_u16_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.x2.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z23test_svcvtzn_u16_f32_x213svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.x2.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +svuint16_t test_svcvtzn_u16_f32_x2(svfloat32x2_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtzn_u16,_f32_x2)(zn); +} + +// CHECK-LABEL: @test_svcvtzn_u32_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.x2.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z23test_svcvtzn_u32_f64_x213svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.x2.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] +// +svuint32_t test_svcvtzn_u32_f64_x2(svfloat64x2_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtzn_u32,_f64_x2)(zn); +} diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c new file mode 100644 index 0000000000000..26e077d05c28b --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c @@ -0,0 +1,197 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3\ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3\ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR +#endif + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// CHECK-LABEL: @test_svcvtb_f16_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvtb_f16_s8u10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +svfloat16_t test_svcvtb_f16_s8(svint8_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtb_f16,_s8)(zn); +} + +// CHECK-LABEL: @test_svcvtb_f32_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svcvtb_f32_s16u11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +svfloat32_t test_svcvtb_f32_s16(svint16_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtb_f32,_s16)(zn); +} + +// CHECK-LABEL: @test_svcvtb_f64_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svcvtb_f64_s32u11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +svfloat64_t test_svcvtb_f64_s32(svint32_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtb_f64,_s32)(zn); +} + +// CHECK-LABEL: @test_svcvtb_f16_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvtb_f16_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +svfloat16_t test_svcvtb_f16_u8(svuint8_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtb_f16,_u8)(zn); +} + +// CHECK-LABEL: @test_svcvtb_f32_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svcvtb_f32_u16u12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +svfloat32_t test_svcvtb_f32_u16(svuint16_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtb_f32,_u16)(zn); +} + +// CHECK-LABEL: @test_svcvtb_f64_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svcvtb_f64_u32u12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +svfloat64_t test_svcvtb_f64_u32(svuint32_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtb_f64,_u32)(zn); +} + +// CHECK-LABEL: @test_svcvt_f16_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svcvt_f16_s8u10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +svfloat16_t test_svcvt_f16_s8(svint8_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtt_f16,_s8)(zn); +} + +// CHECK-LABEL: @test_svcvt_f32_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvt_f32_s16u11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +svfloat32_t test_svcvt_f32_s16(svint16_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtt_f32,_s16)(zn); +} + +// CHECK-LABEL: @test_svcvt_f64_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvt_f64_s32u11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +svfloat64_t test_svcvt_f64_s32(svint32_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtt_f64,_s32)(zn); +} + +// CHECK-LABEL: @test_svcvt_f16_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svcvt_f16_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +svfloat16_t test_svcvt_f16_u8(svuint8_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtt_f16,_u8)(zn); +} + +// CHECK-LABEL: @test_svcvt_f32_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvt_f32_u16u12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +svfloat32_t test_svcvt_f32_u16(svuint16_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtt_f32,_u16)(zn); +} + +// CHECK-LABEL: @test_svcvt_f64_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvt_f64_u32u12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +svfloat64_t test_svcvt_f64_u32(svuint32_t zn) MODE_ATTR { + return SVE_ACLE_FUNC(svcvtt_f64,_u32)(zn); +} diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index accb7b240288f..0f770a3202538 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -697,6 +697,10 @@ void SVEType::applyModifier(char Mod) { Kind = UInt; ElementBitwidth /= 2; break; + case 'y': + Kind = SInt; + ElementBitwidth /= 2; + break; case 'h': ElementBitwidth /= 2; break; diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 48c38fb2c2c9f..0c9be7f5fa9bc 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1048,6 +1048,7 @@ def llvm_nxv4i1_ty : LLVMType<nxv4i1>; def llvm_nxv8i1_ty : LLVMType<nxv8i1>; def llvm_nxv16i1_ty : LLVMType<nxv16i1>; def llvm_nxv16i8_ty : LLVMType<nxv16i8>; +def llvm_nxv8i16_ty : LLVMType<nxv8i16>; def llvm_nxv4i32_ty : LLVMType<nxv4i32>; def llvm_nxv2i64_ty : LLVMType<nxv2i64>; def llvm_nxv8f16_ty : LLVMType<nxv8f16>; @@ -2613,6 +2614,29 @@ def int_aarch64_sve_fmlslb_lane : SVE2_3VectorArgIndexed_Long_Intrinsic; def int_aarch64_sve_fmlslt : SVE2_3VectorArg_Long_Intrinsic; def int_aarch64_sve_fmlslt_lane : SVE2_3VectorArgIndexed_Long_Intrinsic; +// +// SVE2 - Multi-vector narrowing convert to floating point +// + +class Builtin_SVCVT_UNPRED<LLVMType OUT, LLVMType IN> + : DefaultAttrsIntrinsic<[OUT], [IN], [IntrNoMem]>; + +def int_aarch64_sve_scvtfb_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>; +def int_aarch64_sve_scvtfb_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>; +def int_aarch64_sve_scvtfb_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>; + +def int_aarch64_sve_scvtflt_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>; +def int_aarch64_sve_scvtflt_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>; +def int_aarch64_sve_scvtflt_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>; + +def int_aarch64_sve_ucvtfb_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>; +def int_aarch64_sve_ucvtfb_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>; +def int_aarch64_sve_ucvtfb_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>; + +def int_aarch64_sve_ucvtflt_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>; +def int_aarch64_sve_ucvtflt_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>; +def int_aarch64_sve_ucvtflt_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>; + // // SVE2 - Floating-point integer binary logarithm // @@ -4018,6 +4042,12 @@ let TargetPrefix = "aarch64" in { LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; + + // + // SVE2.3/SME2.3 - Multi-vector narrowing convert to floating point + // + def int_aarch64_sve_fcvtzsn_x2: AdvSIMD_2Arg_FloatCompare_Intrinsic; + def int_aarch64_sve_fcvtzun_x2: AdvSIMD_2Arg_FloatCompare_Intrinsic; } // SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2 diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 9df77f8e93c64..c95e0fbb301e3 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4858,14 +4858,14 @@ let Predicates = [HasSVE2p3_or_SME2p3] in { defm UDOT_ZZZI_BtoH : sve2p3_two_way_dot_vvi<"udot", 0b1, int_aarch64_sve_udot_lane_x2>; // SVE2 fp convert, narrow and interleave to integer, rounding toward zero - defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0>; - defm FCVTZUN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzun", 0b1>; + defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0, int_aarch64_sve_fcvtzsn_x2>; + defm FCVTZUN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzun", 0b1, int_aarch64_sve_fcvtzun_x2>; // SVE2 signed/unsigned integer convert to floating-point - defm SCVTF_ZZ : sve2_int_to_fp_upcvt<"scvtf", 0b00>; - defm SCVTFLT_ZZ : sve2_int_to_fp_upcvt<"scvtflt", 0b10>; - defm UCVTF_ZZ : sve2_int_to_fp_upcvt<"ucvtf", 0b01>; - defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11>; + defm SCVTF_ZZ : sve2_int_to_fp_upcvt<"scvtf", 0b00, "int_aarch64_sve_scvtfb">; + defm SCVTFLT_ZZ : sve2_int_to_fp_upcvt<"scvtflt", 0b10, "int_aarch64_sve_scvtflt">; + defm UCVTF_ZZ : sve2_int_to_fp_upcvt<"ucvtf", 0b01, "int_aarch64_sve_ucvtfb">; + defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11, "int_aarch64_sve_ucvtflt">; // SVE2 saturating shift right narrow by immediate and interleave defm SQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrn", 0b101, int_aarch64_sve_sqrshrn_x2>; diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 99836aeed7c0a..771c4c1fb2b6e 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -2558,7 +2558,7 @@ class sme2_cvt_vg2_single<string mnemonic, bits<5> op, multiclass sme2_cvt_vg2_single<string mnemonic, bits<5> op, ValueType out_vt, ValueType in_vt, SDPatternOperator intrinsic> { def NAME : sme2_cvt_vg2_single<mnemonic, op, ZPR16, ZZ_s_mul_r>; - def : SVE2p1_Cvt_VG2_Pat<NAME, intrinsic, out_vt, in_vt>; + def : SVE_Cvt_VG2_Pat<NAME, intrinsic, out_vt, in_vt>; } // SME2 multi-vec FP8 down convert two registers diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 106986a64ffba..18b9008ee314c 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -724,7 +724,7 @@ class SVE_Sat_Shift_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))), (!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>; -class SVE2p1_Cvt_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt> +class SVE_Cvt_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt> : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2)), (!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1))>; @@ -10174,7 +10174,7 @@ class sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, bits<3> tsz> multiclass sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, SDPatternOperator intrinsic> { def NAME : sve2p1_multi_vec_extract_narrow<mnemonic, opc, 0b010>; - def : SVE2p1_Cvt_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32>; + def : SVE_Cvt_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32>; } // SVE2 multi-vec shift narrow @@ -11485,10 +11485,14 @@ class sve2_fp_to_int_downcvt<string asm, ZPRRegOp ZdRC, RegisterOperand ZSrcOp, let Inst{4-0} = Zd; } -multiclass sve2_fp_to_int_downcvt<string asm, bit U> { +multiclass sve2_fp_to_int_downcvt<string asm, bit U, SDPatternOperator op> { def _HtoB : sve2_fp_to_int_downcvt<asm, ZPR8, ZZ_h_mul_r, 0b01, U>; def _StoH : sve2_fp_to_int_downcvt<asm, ZPR16, ZZ_s_mul_r, 0b10, U>; def _DtoS : sve2_fp_to_int_downcvt<asm, ZPR32, ZZ_d_mul_r, 0b11, U>; + + def: SVE_Cvt_VG2_Pat<NAME # _HtoB, op, nxv16i8, nxv8f16>; + def: SVE_Cvt_VG2_Pat<NAME # _StoH, op, nxv8i16, nxv4f32>; + def: SVE_Cvt_VG2_Pat<NAME # _DtoS, op, nxv4i32, nxv2f64>; } //===----------------------------------------------------------------------===// @@ -11508,8 +11512,12 @@ class sve2_int_to_fp_upcvt<string asm, ZPRRegOp ZdRC, ZPRRegOp ZnRC, let Inst{4-0} = Zd; } -multiclass sve2_int_to_fp_upcvt<string asm, bits<2> U> { +multiclass sve2_int_to_fp_upcvt<string asm, bits<2> U, string op> { def _BtoH : sve2_int_to_fp_upcvt<asm, ZPR16, ZPR8, 0b01, U>; def _HtoS : sve2_int_to_fp_upcvt<asm, ZPR32, ZPR16, 0b10, U>; def _StoD : sve2_int_to_fp_upcvt<asm, ZPR64, ZPR32, 0b11, U>; + + def : SVE_1_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # "_f16i8"), nxv16i8, !cast<Instruction>(NAME # _BtoH)>; + def : SVE_1_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # "_f32i16"), nxv8i16, !cast<Instruction>(NAME # _HtoS)>; + def : SVE_1_Op_Pat<nxv2f64, !cast<SDPatternOperator>(op # "_f64i32"), nxv4i32, !cast<Instruction>(NAME # _StoD)>; } diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll new file mode 100644 index 0000000000000..b842571e1ef8e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll @@ -0,0 +1,120 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | FileCheck %s +; +; SVCVTB (SCVTFB / UCVTFB) +; + +define <vscale x 8 x half> @scvtfb_f16_i8(<vscale x 16 x i8> %zn) { +; CHECK-LABEL: scvtfb_f16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf z0.h, z0.b +; CHECK-NEXT: ret + %res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> %zn) + ret <vscale x 8 x half> %res +} + +define <vscale x 4 x float> @scvtfb_f32_i16(<vscale x 8 x i16> %zn) { +; CHECK-LABEL: scvtfb_f32_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf z0.s, z0.h +; CHECK-NEXT: ret + %res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> %zn) + ret <vscale x 4 x float> %res +} + +define <vscale x 2 x double> @scvtfb_f64_i32(<vscale x 4 x i32> %zn) { +; CHECK-LABEL: scvtfb_f64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf z0.d, z0.s +; CHECK-NEXT: ret + %res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> %zn) + ret <vscale x 2 x double> %res +} + +define <vscale x 8 x half> @ucvtfb_f16_i8(<vscale x 16 x i8> %zn) { +; CHECK-LABEL: ucvtfb_f16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf z0.h, z0.b +; CHECK-NEXT: ret + %res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> %zn) + ret <vscale x 8 x half> %res +} + +define <vscale x 4 x float> @ucvtfb_f32_i16(<vscale x 8 x i16> %zn) { +; CHECK-LABEL: ucvtfb_f32_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf z0.s, z0.h +; CHECK-NEXT: ret + %res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> %zn) + ret <vscale x 4 x float> %res +} + +define <vscale x 2 x double> @ucvtfb_f64_i32(<vscale x 4 x i32> %zn) { +; CHECK-LABEL: ucvtfb_f64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf z0.d, z0.s +; CHECK-NEXT: ret + %res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> %zn) + ret <vscale x 2 x double> %res +} + +; +; SVCVTT (SCVTFLT / UCVTFLT) +; + +define <vscale x 8 x half> @scvtflt_f16_i8(<vscale x 16 x i8> %zn) { +; CHECK-LABEL: scvtflt_f16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtflt z0.h, z0.b +; CHECK-NEXT: ret + %res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> %zn) + ret <vscale x 8 x half> %res +} + +define <vscale x 4 x float> @scvtflt_f32_i16(<vscale x 8 x i16> %zn) { +; CHECK-LABEL: scvtflt_f32_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtflt z0.s, z0.h +; CHECK-NEXT: ret + %res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> %zn) + ret <vscale x 4 x float> %res +} + +define <vscale x 2 x double> @scvtflt_f64_i32(<vscale x 4 x i32> %zn) { +; CHECK-LABEL: scvtflt_f64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtflt z0.d, z0.s +; CHECK-NEXT: ret + %res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> %zn) + ret <vscale x 2 x double> %res +} + +define <vscale x 8 x half> @ucvtflt_f16_i8(<vscale x 16 x i8> %zn) { +; CHECK-LABEL: ucvtflt_f16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtflt z0.h, z0.b +; CHECK-NEXT: ret + %res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> %zn) + ret <vscale x 8 x half> %res +} + +define <vscale x 4 x float> @ucvtflt_f32_i16(<vscale x 8 x i16> %zn) { +; CHECK-LABEL: ucvtflt_f32_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtflt z0.s, z0.h +; CHECK-NEXT: ret + %res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> %zn) + ret <vscale x 4 x float> %res +} + +define <vscale x 2 x double> @ucvtflt_f64_i32(<vscale x 4 x i32> %zn) { +; CHECK-LABEL: ucvtflt_f64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtflt z0.d, z0.s +; CHECK-NEXT: ret + %res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> %zn) + ret <vscale x 2 x double> %res +} + diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll new file mode 100644 index 0000000000000..7e05793cabcc1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | FileCheck %s --check-prefix=CHECK-STREAMING +; +; FCVTZSN +; + +define <vscale x 16 x i8> @fcvtzsn_i8_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) { +; CHECK-LABEL: fcvtzsn_i8_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fcvtzsn z0.b, { z0.h, z1.h } +; CHECK-NEXT: ret +; +; CHECK-STREAMING-LABEL: fcvtzsn_i8_f16: +; CHECK-STREAMING: // %bb.0: +; CHECK-STREAMING-NEXT: fcvtzsn z0.b, { z0.h, z1.h } +; CHECK-STREAMING-NEXT: ret + %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.x2.i8f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) + ret <vscale x 16 x i8> %res +} + +define <vscale x 8 x i16> @fcvtzsn_i16_f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) { +; CHECK-LABEL: fcvtzsn_i16_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fcvtzsn z0.h, { z0.s, z1.s } +; CHECK-NEXT: ret +; +; CHECK-STREAMING-LABEL: fcvtzsn_i16_f32: +; CHECK-STREAMING: // %bb.0: +; CHECK-STREAMING-NEXT: fcvtzsn z0.h, { z0.s, z1.s } +; CHECK-STREAMING-NEXT: ret + %res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.x2.i16f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) + ret <vscale x 8 x i16> %res +} + +define <vscale x 4 x i32> @fcvtzsn_i32_f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) { +; CHECK-LABEL: fcvtzsn_i32_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fcvtzsn z0.s, { z0.d, z1.d } +; CHECK-NEXT: ret +; +; CHECK-STREAMING-LABEL: fcvtzsn_i32_f64: +; CHECK-STREAMING: // %bb.0: +; CHECK-STREAMING-NEXT: fcvtzsn z0.s, { z0.d, z1.d } +; CHECK-STREAMING-NEXT: ret + %res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.x2.i32f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) + ret <vscale x 4 x i32> %res +} + +; +; FCVTZUN +; + +define <vscale x 16 x i8> @fcvtzun_i8_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) { +; CHECK-LABEL: fcvtzun_i8_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fcvtzun z0.b, { z0.h, z1.h } +; CHECK-NEXT: ret +; +; CHECK-STREAMING-LABEL: fcvtzun_i8_f16: +; CHECK-STREAMING: // %bb.0: +; CHECK-STREAMING-NEXT: fcvtzun z0.b, { z0.h, z1.h } +; CHECK-STREAMING-NEXT: ret + %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.x2.i8f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) + ret <vscale x 16 x i8> %res +} + +define <vscale x 8 x i16> @fcvtzun_i16_f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) { +; CHECK-LABEL: fcvtzun_i16_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fcvtzun z0.h, { z0.s, z1.s } +; CHECK-NEXT: ret +; +; CHECK-STREAMING-LABEL: fcvtzun_i16_f32: +; CHECK-STREAMING: // %bb.0: +; CHECK-STREAMING-NEXT: fcvtzun z0.h, { z0.s, z1.s } +; CHECK-STREAMING-NEXT: ret + %res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.x2.i16f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) + ret <vscale x 8 x i16> %res +} + +define <vscale x 4 x i32> @fcvtzun_i32_f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) { +; CHECK-LABEL: fcvtzun_i32_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fcvtzun z0.s, { z0.d, z1.d } +; CHECK-NEXT: ret +; +; CHECK-STREAMING-LABEL: fcvtzun_i32_f64: +; CHECK-STREAMING: // %bb.0: +; CHECK-STREAMING-NEXT: fcvtzun z0.s, { z0.d, z1.d } +; CHECK-STREAMING-NEXT: ret + %res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.x2.i32f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) + ret <vscale x 4 x i32> %res +} + >From ad13945ead2c7d26b468c62ed5502bb020a42ca7 Mon Sep 17 00:00:00 2001 From: Martin Wehking <[email protected]> Date: Tue, 19 May 2026 13:12:05 +0000 Subject: [PATCH 2/2] Apply suggestions Introduce a new flag for overload resolution and combine some front end intrinsics --- clang/include/clang/Basic/TargetBuiltins.h | 3 + clang/include/clang/Basic/arm_sve.td | 21 +--- clang/include/clang/Basic/arm_sve_sme_incl.td | 1 + clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 3 + .../sve2p3-intrinsics/acle_sve2_int_fp_cvt.c | 48 +++---- ...e2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c | 117 ++++++++++++++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 26 ++-- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 8 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 8 +- .../AArch64/sve2p3-intrinsics-fp-converts.ll | 24 ++-- 10 files changed, 181 insertions(+), 78 deletions(-) diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index ae4bcdb9eeb64..9b4613c853206 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -401,6 +401,9 @@ namespace clang { bool isOverloadFirstandLast() const { return Flags & IsOverloadFirstandLast; } + bool isOverloadDefaultAndOp0() const { + return Flags & IsOverloadDefaultAndOp0; + } bool isPrefetch() const { return Flags & IsPrefetch; } bool isReverseCompare() const { return Flags & ReverseCompare; } bool isAppendSVALL() const { return Flags & IsAppendSVALL; } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 4e67d46ae8ce9..a5c84f163d8c9 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1006,21 +1006,10 @@ let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in { def SVCVTZN_S : SInst<"svcvtzn_{0}[_{1}_x2]", "y2.d", "hfd", MergeNone, "aarch64_sve_fcvtzsn_x2", [IsReductionQV, VerifyRuntimeMode]>; def SVCVTZN_U : SInst<"svcvtzn_{0}[_{1}_x2]", "e2.d", "hfd", MergeNone, "aarch64_sve_fcvtzun_x2", [IsReductionQV, VerifyRuntimeMode]>; -def SVCVTT_F16_S8 : SInst<"svcvtt_f16[_s8]", "Od", "c", MergeNone, "aarch64_sve_scvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTT_F32_S16 : SInst<"svcvtt_f32[_s16]", "Md", "s", MergeNone, "aarch64_sve_scvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTT_F64_S32 : SInst<"svcvtt_f64[_s32]", "Nd", "i", MergeNone, "aarch64_sve_scvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; - -def SVCVTT_F16_U8 : SInst<"svcvtt_f16[_u8]", "Od", "Uc", MergeNone, "aarch64_sve_ucvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTT_F32_U16 : SInst<"svcvtt_f32[_u16]", "Md", "Us", MergeNone, "aarch64_sve_ucvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTT_F64_U32 : SInst<"svcvtt_f64[_u32]", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; - -def SVCVTB_F16_S8 : SInst<"svcvtb_f16[_s8]", "Od", "c", MergeNone, "aarch64_sve_scvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTB_F32_S16 : SInst<"svcvtb_f32[_s16]", "Md", "s", MergeNone, "aarch64_sve_scvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTB_F64_S32 : SInst<"svcvtb_f64[_s32]", "Nd", "i", MergeNone, "aarch64_sve_scvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; - -def SVCVTB_F16_U8 : SInst<"svcvtb_f16[_u8]", "Od", "Uc", MergeNone, "aarch64_sve_ucvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTB_F32_U16 : SInst<"svcvtb_f32[_u16]", "Md", "Us", MergeNone, "aarch64_sve_ucvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTB_F64_U32 : SInst<"svcvtb_f64[_u32]", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; +foreach suffix = ["b", "t"] in { +def SVCVT # !toupper(suffix) # _S: SInst<"svcvt" # suffix # "_{d}[_{1}]", "dy", "hfd", MergeNone, "aarch64_sve_scvtf" # suffix, [IsOverloadDefaultAndOp0, VerifyRuntimeMode]>; +def SVCVT # !toupper(suffix) # _U: SInst<"svcvt" # suffix # "_{d}[_{1}]", "de", "hfd", MergeNone, "aarch64_sve_ucvtf" # suffix, [IsOverloadDefaultAndOp0, VerifyRuntimeMode]>; +} } //////////////////////////////////////////////////////////////////////////////// @@ -2557,4 +2546,4 @@ let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in { def SVDOT_LANE_X2_SH : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "s", MergeNone, "aarch64_sve_sdot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>; def SVDOT_LANE_X2_UH : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "Us", MergeNone, "aarch64_sve_udot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>; -} \ No newline at end of file +} diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td index acc5159608b06..b1fbafa6e81e7 100644 --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -216,6 +216,7 @@ def IsOverloadNone : FlagType<0x00100000>; // Intrinsic does no def IsOverloadWhileOrMultiVecCvt : FlagType<0x00200000>; // Use {default type, typeof(operand1)} as overloaded types. def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type), typeof(operand0)} as overloaded types. def IsOverloadFirstandLast : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types. +def IsOverloadDefaultAndOp0 : FlagType<0x4000000000000>; // Use {default type, typeof(operand0)} as overloaded types. def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type. def IsByteIndexed : FlagType<0x01000000>; def IsAppendSVALL : FlagType<0x02000000>; // Appends SV_ALL as the last operand. diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index 8d5731c473bb1..6aa92a2361b56 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -3939,6 +3939,9 @@ CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags, if (TypeFlags.isOverloadWhileRW()) return {getSVEPredType(TypeFlags), Ops[0]->getType()}; + if (TypeFlags.isOverloadDefaultAndOp0()) + return {DefaultType, Ops[0]->getType()}; + if (TypeFlags.isOverloadFirstandLast()) return {Ops[0]->getType(), Ops.back()->getType()}; diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c index 26e077d05c28b..c12a0fff786ef 100644 --- a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c @@ -30,12 +30,12 @@ // CHECK-LABEL: @test_svcvtb_f16_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.nxv8f16.nxv16i8(<vscale x 16 x i8> [[ZN:%.*]]) // CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svcvtb_f16_s8u10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.nxv8f16.nxv16i8(<vscale x 16 x i8> [[ZN:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] // svfloat16_t test_svcvtb_f16_s8(svint8_t zn) MODE_ATTR { @@ -44,12 +44,12 @@ svfloat16_t test_svcvtb_f16_s8(svint8_t zn) MODE_ATTR { // CHECK-LABEL: @test_svcvtb_f32_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.nxv4f32.nxv8i16(<vscale x 8 x i16> [[ZN:%.*]]) // CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svcvtb_f32_s16u11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.nxv4f32.nxv8i16(<vscale x 8 x i16> [[ZN:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // svfloat32_t test_svcvtb_f32_s16(svint16_t zn) MODE_ATTR { @@ -58,12 +58,12 @@ svfloat32_t test_svcvtb_f32_s16(svint16_t zn) MODE_ATTR { // CHECK-LABEL: @test_svcvtb_f64_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.nxv2f64.nxv4i32(<vscale x 4 x i32> [[ZN:%.*]]) // CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svcvtb_f64_s32u11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.nxv2f64.nxv4i32(<vscale x 4 x i32> [[ZN:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] // svfloat64_t test_svcvtb_f64_s32(svint32_t zn) MODE_ATTR { @@ -72,12 +72,12 @@ svfloat64_t test_svcvtb_f64_s32(svint32_t zn) MODE_ATTR { // CHECK-LABEL: @test_svcvtb_f16_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.nxv8f16.nxv16i8(<vscale x 16 x i8> [[ZN:%.*]]) // CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svcvtb_f16_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.nxv8f16.nxv16i8(<vscale x 16 x i8> [[ZN:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] // svfloat16_t test_svcvtb_f16_u8(svuint8_t zn) MODE_ATTR { @@ -86,12 +86,12 @@ svfloat16_t test_svcvtb_f16_u8(svuint8_t zn) MODE_ATTR { // CHECK-LABEL: @test_svcvtb_f32_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.nxv4f32.nxv8i16(<vscale x 8 x i16> [[ZN:%.*]]) // CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svcvtb_f32_u16u12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.nxv4f32.nxv8i16(<vscale x 8 x i16> [[ZN:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // svfloat32_t test_svcvtb_f32_u16(svuint16_t zn) MODE_ATTR { @@ -100,12 +100,12 @@ svfloat32_t test_svcvtb_f32_u16(svuint16_t zn) MODE_ATTR { // CHECK-LABEL: @test_svcvtb_f64_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.nxv2f64.nxv4i32(<vscale x 4 x i32> [[ZN:%.*]]) // CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svcvtb_f64_u32u12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.nxv2f64.nxv4i32(<vscale x 4 x i32> [[ZN:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] // svfloat64_t test_svcvtb_f64_u32(svuint32_t zn) MODE_ATTR { @@ -114,12 +114,12 @@ svfloat64_t test_svcvtb_f64_u32(svuint32_t zn) MODE_ATTR { // CHECK-LABEL: @test_svcvt_f16_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtft.nxv8f16.nxv16i8(<vscale x 16 x i8> [[ZN:%.*]]) // CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svcvt_f16_s8u10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtft.nxv8f16.nxv16i8(<vscale x 16 x i8> [[ZN:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] // svfloat16_t test_svcvt_f16_s8(svint8_t zn) MODE_ATTR { @@ -128,12 +128,12 @@ svfloat16_t test_svcvt_f16_s8(svint8_t zn) MODE_ATTR { // CHECK-LABEL: @test_svcvt_f32_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtft.nxv4f32.nxv8i16(<vscale x 8 x i16> [[ZN:%.*]]) // CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svcvt_f32_s16u11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtft.nxv4f32.nxv8i16(<vscale x 8 x i16> [[ZN:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // svfloat32_t test_svcvt_f32_s16(svint16_t zn) MODE_ATTR { @@ -142,12 +142,12 @@ svfloat32_t test_svcvt_f32_s16(svint16_t zn) MODE_ATTR { // CHECK-LABEL: @test_svcvt_f64_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtft.nxv2f64.nxv4i32(<vscale x 4 x i32> [[ZN:%.*]]) // CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svcvt_f64_s32u11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtft.nxv2f64.nxv4i32(<vscale x 4 x i32> [[ZN:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] // svfloat64_t test_svcvt_f64_s32(svint32_t zn) MODE_ATTR { @@ -156,12 +156,12 @@ svfloat64_t test_svcvt_f64_s32(svint32_t zn) MODE_ATTR { // CHECK-LABEL: @test_svcvt_f16_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtft.nxv8f16.nxv16i8(<vscale x 16 x i8> [[ZN:%.*]]) // CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svcvt_f16_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtft.nxv8f16.nxv16i8(<vscale x 16 x i8> [[ZN:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] // svfloat16_t test_svcvt_f16_u8(svuint8_t zn) MODE_ATTR { @@ -170,12 +170,12 @@ svfloat16_t test_svcvt_f16_u8(svuint8_t zn) MODE_ATTR { // CHECK-LABEL: @test_svcvt_f32_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtft.nxv4f32.nxv8i16(<vscale x 8 x i16> [[ZN:%.*]]) // CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svcvt_f32_u16u12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtft.nxv4f32.nxv8i16(<vscale x 8 x i16> [[ZN:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // svfloat32_t test_svcvt_f32_u16(svuint16_t zn) MODE_ATTR { @@ -184,12 +184,12 @@ svfloat32_t test_svcvt_f32_u16(svuint16_t zn) MODE_ATTR { // CHECK-LABEL: @test_svcvt_f64_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtft.nxv2f64.nxv4i32(<vscale x 4 x i32> [[ZN:%.*]]) // CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svcvt_f64_u32u12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtft.nxv2f64.nxv4i32(<vscale x 4 x i32> [[ZN:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] // svfloat64_t test_svcvt_f64_u32(svuint32_t zn) MODE_ATTR { diff --git a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c index 34ead79e726ab..8f0a28b260d5d 100644 --- a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c +++ b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c @@ -14,6 +14,9 @@ void test(void) { int16_t int16_t_val; int32_t int32_t_val; svbool_t svbool_t_val; + svfloat16x2_t svfloat16x2_t_val; + svfloat32x2_t svfloat32x2_t_val; + svfloat64x2_t svfloat64x2_t_val; svint8_t svint8_t_val; svint16_t svint16_t_val; svint16x2_t svint16x2_t_val; @@ -86,6 +89,42 @@ void test(void) { svaddsubp_u16(svuint16_t_val, svuint16_t_val); svaddsubp_u32(svuint32_t_val, svuint32_t_val); svaddsubp_u64(svuint64_t_val, svuint64_t_val); + svcvtb_f16(svint8_t_val); + svcvtb_f16(svuint8_t_val); + svcvtb_f16_s8(svint8_t_val); + svcvtb_f16_u8(svuint8_t_val); + svcvtb_f32(svint16_t_val); + svcvtb_f32(svuint16_t_val); + svcvtb_f32_s16(svint16_t_val); + svcvtb_f32_u16(svuint16_t_val); + svcvtb_f64(svint32_t_val); + svcvtb_f64(svuint32_t_val); + svcvtb_f64_s32(svint32_t_val); + svcvtb_f64_u32(svuint32_t_val); + svcvtt_f16(svint8_t_val); + svcvtt_f16(svuint8_t_val); + svcvtt_f16_s8(svint8_t_val); + svcvtt_f16_u8(svuint8_t_val); + svcvtt_f32(svint16_t_val); + svcvtt_f32(svuint16_t_val); + svcvtt_f32_s16(svint16_t_val); + svcvtt_f32_u16(svuint16_t_val); + svcvtt_f64(svint32_t_val); + svcvtt_f64(svuint32_t_val); + svcvtt_f64_s32(svint32_t_val); + svcvtt_f64_u32(svuint32_t_val); + svcvtzn_s8(svfloat16x2_t_val); + svcvtzn_s8_f16_x2(svfloat16x2_t_val); + svcvtzn_s16(svfloat32x2_t_val); + svcvtzn_s16_f32_x2(svfloat32x2_t_val); + svcvtzn_s32(svfloat64x2_t_val); + svcvtzn_s32_f64_x2(svfloat64x2_t_val); + svcvtzn_u8(svfloat16x2_t_val); + svcvtzn_u8_f16_x2(svfloat16x2_t_val); + svcvtzn_u16(svfloat32x2_t_val); + svcvtzn_u16_f32_x2(svfloat32x2_t_val); + svcvtzn_u32(svfloat64x2_t_val); + svcvtzn_u32_f64_x2(svfloat64x2_t_val); svdot(svint16_t_val, svint8_t_val, int8_t_val); svdot(svint16_t_val, svint8_t_val, svint8_t_val); svdot(svuint16_t_val, svuint8_t_val, svuint8_t_val); @@ -171,6 +210,9 @@ void test_streaming(void) __arm_streaming{ int16_t int16_t_val; int32_t int32_t_val; svbool_t svbool_t_val; + svfloat16x2_t svfloat16x2_t_val; + svfloat32x2_t svfloat32x2_t_val; + svfloat64x2_t svfloat64x2_t_val; svint8_t svint8_t_val; svint16_t svint16_t_val; svint16x2_t svint16x2_t_val; @@ -243,6 +285,42 @@ void test_streaming(void) __arm_streaming{ svaddsubp_u16(svuint16_t_val, svuint16_t_val); svaddsubp_u32(svuint32_t_val, svuint32_t_val); svaddsubp_u64(svuint64_t_val, svuint64_t_val); + svcvtb_f16(svint8_t_val); + svcvtb_f16(svuint8_t_val); + svcvtb_f16_s8(svint8_t_val); + svcvtb_f16_u8(svuint8_t_val); + svcvtb_f32(svint16_t_val); + svcvtb_f32(svuint16_t_val); + svcvtb_f32_s16(svint16_t_val); + svcvtb_f32_u16(svuint16_t_val); + svcvtb_f64(svint32_t_val); + svcvtb_f64(svuint32_t_val); + svcvtb_f64_s32(svint32_t_val); + svcvtb_f64_u32(svuint32_t_val); + svcvtt_f16(svint8_t_val); + svcvtt_f16(svuint8_t_val); + svcvtt_f16_s8(svint8_t_val); + svcvtt_f16_u8(svuint8_t_val); + svcvtt_f32(svint16_t_val); + svcvtt_f32(svuint16_t_val); + svcvtt_f32_s16(svint16_t_val); + svcvtt_f32_u16(svuint16_t_val); + svcvtt_f64(svint32_t_val); + svcvtt_f64(svuint32_t_val); + svcvtt_f64_s32(svint32_t_val); + svcvtt_f64_u32(svuint32_t_val); + svcvtzn_s8(svfloat16x2_t_val); + svcvtzn_s8_f16_x2(svfloat16x2_t_val); + svcvtzn_s16(svfloat32x2_t_val); + svcvtzn_s16_f32_x2(svfloat32x2_t_val); + svcvtzn_s32(svfloat64x2_t_val); + svcvtzn_s32_f64_x2(svfloat64x2_t_val); + svcvtzn_u8(svfloat16x2_t_val); + svcvtzn_u8_f16_x2(svfloat16x2_t_val); + svcvtzn_u16(svfloat32x2_t_val); + svcvtzn_u16_f32_x2(svfloat32x2_t_val); + svcvtzn_u32(svfloat64x2_t_val); + svcvtzn_u32_f64_x2(svfloat64x2_t_val); svdot(svint16_t_val, svint8_t_val, int8_t_val); svdot(svint16_t_val, svint8_t_val, svint8_t_val); svdot(svuint16_t_val, svuint8_t_val, svuint8_t_val); @@ -328,6 +406,9 @@ void test_streaming_compatible(void) __arm_streaming_compatible{ int16_t int16_t_val; int32_t int32_t_val; svbool_t svbool_t_val; + svfloat16x2_t svfloat16x2_t_val; + svfloat32x2_t svfloat32x2_t_val; + svfloat64x2_t svfloat64x2_t_val; svint8_t svint8_t_val; svint16_t svint16_t_val; svint16x2_t svint16x2_t_val; @@ -400,6 +481,42 @@ void test_streaming_compatible(void) __arm_streaming_compatible{ svaddsubp_u16(svuint16_t_val, svuint16_t_val); svaddsubp_u32(svuint32_t_val, svuint32_t_val); svaddsubp_u64(svuint64_t_val, svuint64_t_val); + svcvtb_f16(svint8_t_val); + svcvtb_f16(svuint8_t_val); + svcvtb_f16_s8(svint8_t_val); + svcvtb_f16_u8(svuint8_t_val); + svcvtb_f32(svint16_t_val); + svcvtb_f32(svuint16_t_val); + svcvtb_f32_s16(svint16_t_val); + svcvtb_f32_u16(svuint16_t_val); + svcvtb_f64(svint32_t_val); + svcvtb_f64(svuint32_t_val); + svcvtb_f64_s32(svint32_t_val); + svcvtb_f64_u32(svuint32_t_val); + svcvtt_f16(svint8_t_val); + svcvtt_f16(svuint8_t_val); + svcvtt_f16_s8(svint8_t_val); + svcvtt_f16_u8(svuint8_t_val); + svcvtt_f32(svint16_t_val); + svcvtt_f32(svuint16_t_val); + svcvtt_f32_s16(svint16_t_val); + svcvtt_f32_u16(svuint16_t_val); + svcvtt_f64(svint32_t_val); + svcvtt_f64(svuint32_t_val); + svcvtt_f64_s32(svint32_t_val); + svcvtt_f64_u32(svuint32_t_val); + svcvtzn_s8(svfloat16x2_t_val); + svcvtzn_s8_f16_x2(svfloat16x2_t_val); + svcvtzn_s16(svfloat32x2_t_val); + svcvtzn_s16_f32_x2(svfloat32x2_t_val); + svcvtzn_s32(svfloat64x2_t_val); + svcvtzn_s32_f64_x2(svfloat64x2_t_val); + svcvtzn_u8(svfloat16x2_t_val); + svcvtzn_u8_f16_x2(svfloat16x2_t_val); + svcvtzn_u16(svfloat32x2_t_val); + svcvtzn_u16_f32_x2(svfloat32x2_t_val); + svcvtzn_u32(svfloat64x2_t_val); + svcvtzn_u32_f64_x2(svfloat64x2_t_val); svdot(svint16_t_val, svint8_t_val, int8_t_val); svdot(svint16_t_val, svint8_t_val, svint8_t_val); svdot(svuint16_t_val, svuint8_t_val, svuint8_t_val); diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 0c9be7f5fa9bc..ec084626d5b2a 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2618,24 +2618,14 @@ def int_aarch64_sve_fmlslt_lane : SVE2_3VectorArgIndexed_Long_Intrinsic; // SVE2 - Multi-vector narrowing convert to floating point // -class Builtin_SVCVT_UNPRED<LLVMType OUT, LLVMType IN> - : DefaultAttrsIntrinsic<[OUT], [IN], [IntrNoMem]>; - -def int_aarch64_sve_scvtfb_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>; -def int_aarch64_sve_scvtfb_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>; -def int_aarch64_sve_scvtfb_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>; - -def int_aarch64_sve_scvtflt_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>; -def int_aarch64_sve_scvtflt_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>; -def int_aarch64_sve_scvtflt_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>; - -def int_aarch64_sve_ucvtfb_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>; -def int_aarch64_sve_ucvtfb_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>; -def int_aarch64_sve_ucvtfb_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>; - -def int_aarch64_sve_ucvtflt_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>; -def int_aarch64_sve_ucvtflt_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>; -def int_aarch64_sve_ucvtflt_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>; +def int_aarch64_sve_scvtfb + : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>; +def int_aarch64_sve_scvtft + : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>; +def int_aarch64_sve_ucvtfb + : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>; +def int_aarch64_sve_ucvtft + : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>; // // SVE2 - Floating-point integer binary logarithm diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index c95e0fbb301e3..6a03b9abb3c19 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4862,10 +4862,10 @@ let Predicates = [HasSVE2p3_or_SME2p3] in { defm FCVTZUN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzun", 0b1, int_aarch64_sve_fcvtzun_x2>; // SVE2 signed/unsigned integer convert to floating-point - defm SCVTF_ZZ : sve2_int_to_fp_upcvt<"scvtf", 0b00, "int_aarch64_sve_scvtfb">; - defm SCVTFLT_ZZ : sve2_int_to_fp_upcvt<"scvtflt", 0b10, "int_aarch64_sve_scvtflt">; - defm UCVTF_ZZ : sve2_int_to_fp_upcvt<"ucvtf", 0b01, "int_aarch64_sve_ucvtfb">; - defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11, "int_aarch64_sve_ucvtflt">; + defm SCVTF_ZZ : sve2_int_to_fp_upcvt<"scvtf", 0b00, int_aarch64_sve_scvtfb>; + defm SCVTFLT_ZZ : sve2_int_to_fp_upcvt<"scvtflt", 0b10, int_aarch64_sve_scvtft>; + defm UCVTF_ZZ : sve2_int_to_fp_upcvt<"ucvtf", 0b01, int_aarch64_sve_ucvtfb>; + defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11, int_aarch64_sve_ucvtft>; // SVE2 saturating shift right narrow by immediate and interleave defm SQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrn", 0b101, int_aarch64_sve_sqrshrn_x2>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 18b9008ee314c..947ef7f9c9f6d 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -11512,12 +11512,12 @@ class sve2_int_to_fp_upcvt<string asm, ZPRRegOp ZdRC, ZPRRegOp ZnRC, let Inst{4-0} = Zd; } -multiclass sve2_int_to_fp_upcvt<string asm, bits<2> U, string op> { +multiclass sve2_int_to_fp_upcvt<string asm, bits<2> U, SDPatternOperator op> { def _BtoH : sve2_int_to_fp_upcvt<asm, ZPR16, ZPR8, 0b01, U>; def _HtoS : sve2_int_to_fp_upcvt<asm, ZPR32, ZPR16, 0b10, U>; def _StoD : sve2_int_to_fp_upcvt<asm, ZPR64, ZPR32, 0b11, U>; - def : SVE_1_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # "_f16i8"), nxv16i8, !cast<Instruction>(NAME # _BtoH)>; - def : SVE_1_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # "_f32i16"), nxv8i16, !cast<Instruction>(NAME # _HtoS)>; - def : SVE_1_Op_Pat<nxv2f64, !cast<SDPatternOperator>(op # "_f64i32"), nxv4i32, !cast<Instruction>(NAME # _StoD)>; + def : SVE_1_Op_Pat<nxv8f16, op, nxv16i8, !cast<Instruction>(NAME # _BtoH)>; + def : SVE_1_Op_Pat<nxv4f32, op, nxv8i16, !cast<Instruction>(NAME # _HtoS)>; + def : SVE_1_Op_Pat<nxv2f64, op, nxv4i32, !cast<Instruction>(NAME # _StoD)>; } diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll index b842571e1ef8e..1a07e4371810d 100644 --- a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll +++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll @@ -11,7 +11,7 @@ define <vscale x 8 x half> @scvtfb_f16_i8(<vscale x 16 x i8> %zn) { ; CHECK: // %bb.0: ; CHECK-NEXT: scvtf z0.h, z0.b ; CHECK-NEXT: ret - %res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> %zn) + %res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.nxv8f16.nxv16i8(<vscale x 16 x i8> %zn) ret <vscale x 8 x half> %res } @@ -20,7 +20,7 @@ define <vscale x 4 x float> @scvtfb_f32_i16(<vscale x 8 x i16> %zn) { ; CHECK: // %bb.0: ; CHECK-NEXT: scvtf z0.s, z0.h ; CHECK-NEXT: ret - %res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> %zn) + %res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.nxv4f32.nxv8i16(<vscale x 8 x i16> %zn) ret <vscale x 4 x float> %res } @@ -29,7 +29,7 @@ define <vscale x 2 x double> @scvtfb_f64_i32(<vscale x 4 x i32> %zn) { ; CHECK: // %bb.0: ; CHECK-NEXT: scvtf z0.d, z0.s ; CHECK-NEXT: ret - %res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> %zn) + %res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.nxv2f64.nxv4i32(<vscale x 4 x i32> %zn) ret <vscale x 2 x double> %res } @@ -38,7 +38,7 @@ define <vscale x 8 x half> @ucvtfb_f16_i8(<vscale x 16 x i8> %zn) { ; CHECK: // %bb.0: ; CHECK-NEXT: ucvtf z0.h, z0.b ; CHECK-NEXT: ret - %res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> %zn) + %res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.nxv8f16.nxv16i8(<vscale x 16 x i8> %zn) ret <vscale x 8 x half> %res } @@ -47,7 +47,7 @@ define <vscale x 4 x float> @ucvtfb_f32_i16(<vscale x 8 x i16> %zn) { ; CHECK: // %bb.0: ; CHECK-NEXT: ucvtf z0.s, z0.h ; CHECK-NEXT: ret - %res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> %zn) + %res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.nxv4f32.nxv8i16(<vscale x 8 x i16> %zn) ret <vscale x 4 x float> %res } @@ -56,7 +56,7 @@ define <vscale x 2 x double> @ucvtfb_f64_i32(<vscale x 4 x i32> %zn) { ; CHECK: // %bb.0: ; CHECK-NEXT: ucvtf z0.d, z0.s ; CHECK-NEXT: ret - %res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> %zn) + %res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.nxv2f64.nxv4i32(<vscale x 4 x i32> %zn) ret <vscale x 2 x double> %res } @@ -69,7 +69,7 @@ define <vscale x 8 x half> @scvtflt_f16_i8(<vscale x 16 x i8> %zn) { ; CHECK: // %bb.0: ; CHECK-NEXT: scvtflt z0.h, z0.b ; CHECK-NEXT: ret - %res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> %zn) + %res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtft.nxv8f16.nxv16i8(<vscale x 16 x i8> %zn) ret <vscale x 8 x half> %res } @@ -78,7 +78,7 @@ define <vscale x 4 x float> @scvtflt_f32_i16(<vscale x 8 x i16> %zn) { ; CHECK: // %bb.0: ; CHECK-NEXT: scvtflt z0.s, z0.h ; CHECK-NEXT: ret - %res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> %zn) + %res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtft.nxv4f32.nxv8i16(<vscale x 8 x i16> %zn) ret <vscale x 4 x float> %res } @@ -87,7 +87,7 @@ define <vscale x 2 x double> @scvtflt_f64_i32(<vscale x 4 x i32> %zn) { ; CHECK: // %bb.0: ; CHECK-NEXT: scvtflt z0.d, z0.s ; CHECK-NEXT: ret - %res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> %zn) + %res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtft.nxv2f64.nxv4i32(<vscale x 4 x i32> %zn) ret <vscale x 2 x double> %res } @@ -96,7 +96,7 @@ define <vscale x 8 x half> @ucvtflt_f16_i8(<vscale x 16 x i8> %zn) { ; CHECK: // %bb.0: ; CHECK-NEXT: ucvtflt z0.h, z0.b ; CHECK-NEXT: ret - %res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> %zn) + %res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtft.nxv8f16.nxv16i8(<vscale x 16 x i8> %zn) ret <vscale x 8 x half> %res } @@ -105,7 +105,7 @@ define <vscale x 4 x float> @ucvtflt_f32_i16(<vscale x 8 x i16> %zn) { ; CHECK: // %bb.0: ; CHECK-NEXT: ucvtflt z0.s, z0.h ; CHECK-NEXT: ret - %res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> %zn) + %res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtft.nxv4f32.nxv8i16(<vscale x 8 x i16> %zn) ret <vscale x 4 x float> %res } @@ -114,7 +114,7 @@ define <vscale x 2 x double> @ucvtflt_f64_i32(<vscale x 4 x i32> %zn) { ; CHECK: // %bb.0: ; CHECK-NEXT: ucvtflt z0.d, z0.s ; CHECK-NEXT: ret - %res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> %zn) + %res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtft.nxv2f64.nxv4i32(<vscale x 4 x i32> %zn) ret <vscale x 2 x double> %res } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
