https://github.com/MartinWehking updated https://github.com/llvm/llvm-project/pull/186807
>From 759eecd18b5ba07c1d1c7fc151cf2c824baffb1b Mon Sep 17 00:00:00 2001 From: Martin Wehking <[email protected]> Date: Mon, 16 Mar 2026 11:01:10 +0000 Subject: [PATCH 1/2] [AArch64] Add 9.7 data processing intrinsics Add Clang/LLVM intrinsics for svcvt, scvtflt, ucvtf, ucvtflt and fcvtzsn, fcvtzun. The Clang intrinsics are guarded by the sve2.3 and sme2.3 feature flags. ACLE Patch: https://github.com/ARM-software/acle/pull/428 --- clang/include/clang/Basic/arm_sve.td | 27 ++ .../acle_sve2_fp_int_cvtn_x2.c | 105 ++++++++ .../sve2p3-intrinsics/acle_sve2_int_fp_cvt.c | 189 +++++++++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 33 +++ .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 12 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 15 +- .../AArch64/sve2p3-intrinsics-fp-converts.ll | 255 ++++++++++++++++++ .../sve2p3-intrinsics-fp-converts_x2.ll | 157 +++++++++++ 8 files changed, 785 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c create mode 100644 clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c create mode 100644 llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll create mode 100644 llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index be3cd8a76503b..852cc60c6e0b3 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -997,6 +997,33 @@ def SVCVTLT_Z_F32_F16 : SInst<"svcvtlt_f32[_f16]", "dPh", "f", MergeZeroExp, "a def SVCVTLT_Z_F64_F32 : SInst<"svcvtlt_f64[_f32]", "dPh", "d", MergeZeroExp, "aarch64_sve_fcvtlt_f64f32", [IsOverloadNone, VerifyRuntimeMode]>; } + +let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in { +def SVCVT_S8_F16 : SInst<"svcvt_s8[_f16_x2]", "d2.O", "c", MergeNone, "aarch64_sve_fcvtzsn", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; +def SVCVT_S16_F32 : SInst<"svcvt_s16[_f32_x2]", "d2.M", "s", MergeNone, "aarch64_sve_fcvtzsn", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; +def SVCVT_S32_F64 : SInst<"svcvt_s32[_f64_x2]", "d2.N", "i", MergeNone, "aarch64_sve_fcvtzsn", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; + +def SVCVT_U8_F16 : SInst<"svcvt_u8[_f16_x2]", "d2.O", "Uc", MergeNone, "aarch64_sve_fcvtzun", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; +def SVCVT_U16_F32 : SInst<"svcvt_u16[_f32_x2]", "d2.M", "Us", MergeNone, "aarch64_sve_fcvtzun", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; +def SVCVT_U32_F64 : SInst<"svcvt_u32[_f64_x2]", "d2.N", "Ui", MergeNone, "aarch64_sve_fcvtzun", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; + +def SVCVTT_F16_S8 : SInst<"svcvtt_f16[_s8]", "Od", "c", MergeNone, "aarch64_sve_scvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTT_F32_S16 : SInst<"svcvtt_f32[_s16]", "Md", "s", MergeNone, "aarch64_sve_scvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTT_F64_S32 : SInst<"svcvtt_f64[_s32]", "Nd", "i", MergeNone, "aarch64_sve_scvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; + +def SVCVTT_F16_U8 : SInst<"svcvtt_f16[_u8]", "Od", "Uc", MergeNone, "aarch64_sve_ucvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTT_F32_U16 : SInst<"svcvtt_f32[_u16]", "Md", "Us", MergeNone, "aarch64_sve_ucvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTT_F64_U32 : SInst<"svcvtt_f64[_u32]", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; + +def SVCVTB_F16_S8 : SInst<"svcvtb_f16[_s8]", "Od", "c", MergeNone, "aarch64_sve_scvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTB_F32_S16 : SInst<"svcvtb_f32[_s16]", "Md", "s", MergeNone, "aarch64_sve_scvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTB_F64_S32 : SInst<"svcvtb_f64[_s32]", "Nd", "i", MergeNone, "aarch64_sve_scvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; + +def SVCVTB_F16_U8 : SInst<"svcvtb_f16[_u8]", "Od", "Uc", MergeNone, "aarch64_sve_ucvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTB_F32_U16 : SInst<"svcvtb_f32[_u16]", "Md", "Us", MergeNone, "aarch64_sve_ucvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTB_F64_U32 : SInst<"svcvtb_f64[_u32]", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; +} + //////////////////////////////////////////////////////////////////////////////// // Permutations and selection diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c new file mode 100644 index 0000000000000..a4a7c58e1ced9 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c @@ -0,0 +1,105 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3\ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3\ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR +#endif + +// CHECK-LABEL: @test_svcvt_s8_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svcvt_s8_f16_x213svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svint8_t test_svcvt_s8_f16_x2(svfloat16x2_t zn) MODE_ATTR { + return svcvt_s8_f16_x2(zn); +} + +// CHECK-LABEL: @test_svcvt_s16_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z21test_svcvt_s16_f32_x213svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +svint16_t test_svcvt_s16_f32_x2(svfloat32x2_t zn) MODE_ATTR { + return svcvt_s16_f32_x2(zn); +} + +// CHECK-LABEL: @test_svcvt_s32_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z21test_svcvt_s32_f64_x213svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] +// +svint32_t test_svcvt_s32_f64_x2(svfloat64x2_t zn) MODE_ATTR { + return svcvt_s32_f64_x2(zn); +} + +// CHECK-LABEL: @test_svcvt_u8_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svcvt_u8_f16_x213svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svuint8_t test_svcvt_u8_f16_x2(svfloat16x2_t zn) MODE_ATTR { + return svcvt_u8_f16_x2(zn); +} + +// CHECK-LABEL: @test_svcvt_u16_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z21test_svcvt_u16_f32_x213svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +svuint16_t test_svcvt_u16_f32_x2(svfloat32x2_t zn) MODE_ATTR { + return svcvt_u16_f32_x2(zn); +} + +// CHECK-LABEL: @test_svcvt_u32_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z21test_svcvt_u32_f64_x213svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] +// +svuint32_t test_svcvt_u32_f64_x2(svfloat64x2_t zn) MODE_ATTR { + return svcvt_u32_f64_x2(zn); +} diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c new file mode 100644 index 0000000000000..6b7252e045e33 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c @@ -0,0 +1,189 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3\ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3\ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR +#endif + +// CHECK-LABEL: @test_svcvtb_f16_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvtb_f16_s8u10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +svfloat16_t test_svcvtb_f16_s8(svint8_t zn) MODE_ATTR { + return svcvtb_f16_s8(zn); +} + +// CHECK-LABEL: @test_svcvtb_f32_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svcvtb_f32_s16u11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +svfloat32_t test_svcvtb_f32_s16(svint16_t zn) MODE_ATTR { + return svcvtb_f32_s16(zn); +} + +// CHECK-LABEL: @test_svcvtb_f64_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svcvtb_f64_s32u11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +svfloat64_t test_svcvtb_f64_s32(svint32_t zn) MODE_ATTR { + return svcvtb_f64_s32(zn); +} + +// CHECK-LABEL: @test_svcvtb_f16_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvtb_f16_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +svfloat16_t test_svcvtb_f16_u8(svuint8_t zn) MODE_ATTR { + return svcvtb_f16_u8(zn); +} + +// CHECK-LABEL: @test_svcvtb_f32_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svcvtb_f32_u16u12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +svfloat32_t test_svcvtb_f32_u16(svuint16_t zn) MODE_ATTR { + return svcvtb_f32_u16(zn); +} + +// CHECK-LABEL: @test_svcvtb_f64_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svcvtb_f64_u32u12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +svfloat64_t test_svcvtb_f64_u32(svuint32_t zn) MODE_ATTR { + return svcvtb_f64_u32(zn); +} + +// CHECK-LABEL: @test_svcvt_f16_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svcvt_f16_s8u10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +svfloat16_t test_svcvt_f16_s8(svint8_t zn) MODE_ATTR { + return svcvtt_f16_s8(zn); +} + +// CHECK-LABEL: @test_svcvt_f32_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvt_f32_s16u11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +svfloat32_t test_svcvt_f32_s16(svint16_t zn) MODE_ATTR { + return svcvtt_f32_s16(zn); +} + +// CHECK-LABEL: @test_svcvt_f64_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvt_f64_s32u11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +svfloat64_t test_svcvt_f64_s32(svint32_t zn) MODE_ATTR { + return svcvtt_f64_s32(zn); +} + +// CHECK-LABEL: @test_svcvt_f16_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svcvt_f16_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +svfloat16_t test_svcvt_f16_u8(svuint8_t zn) MODE_ATTR { + return svcvtt_f16_u8(zn); +} + +// CHECK-LABEL: @test_svcvt_f32_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvt_f32_u16u12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +svfloat32_t test_svcvt_f32_u16(svuint16_t zn) MODE_ATTR { + return svcvtt_f32_u16(zn); +} + +// CHECK-LABEL: @test_svcvt_f64_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvt_f64_u32u12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +svfloat64_t test_svcvt_f64_u32(svuint32_t zn) MODE_ATTR { + return svcvtt_f64_u32(zn); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 75929cbc222ad..d9f7314740953 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1051,6 +1051,7 @@ def llvm_nxv4i1_ty : LLVMType<nxv4i1>; def llvm_nxv8i1_ty : LLVMType<nxv8i1>; def llvm_nxv16i1_ty : LLVMType<nxv16i1>; def llvm_nxv16i8_ty : LLVMType<nxv16i8>; +def llvm_nxv8i16_ty : LLVMType<nxv8i16>; def llvm_nxv4i32_ty : LLVMType<nxv4i32>; def llvm_nxv2i64_ty : LLVMType<nxv2i64>; def llvm_nxv8f16_ty : LLVMType<nxv8f16>; @@ -2610,6 +2611,29 @@ def int_aarch64_sve_fmlslb_lane : SVE2_3VectorArgIndexed_Long_Intrinsic; def int_aarch64_sve_fmlslt : SVE2_3VectorArg_Long_Intrinsic; def int_aarch64_sve_fmlslt_lane : SVE2_3VectorArgIndexed_Long_Intrinsic; +// +// SVE2 - Multi-vector narrowing convert to floating point +// + +class Builtin_SVCVT_UNPRED<LLVMType OUT, LLVMType IN> + : DefaultAttrsIntrinsic<[OUT], [IN], [IntrNoMem]>; + +def int_aarch64_sve_scvtfb_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>; +def int_aarch64_sve_scvtfb_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>; +def int_aarch64_sve_scvtfb_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>; + +def int_aarch64_sve_scvtflt_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>; +def int_aarch64_sve_scvtflt_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>; +def int_aarch64_sve_scvtflt_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>; + +def int_aarch64_sve_ucvtfb_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>; +def int_aarch64_sve_ucvtfb_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>; +def int_aarch64_sve_ucvtfb_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>; + +def int_aarch64_sve_ucvtflt_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>; +def int_aarch64_sve_ucvtflt_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>; +def int_aarch64_sve_ucvtflt_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>; + // // SVE2 - Floating-point integer binary logarithm // @@ -3526,6 +3550,10 @@ let TargetPrefix = "aarch64" in { [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], [IntrNoMem]>; + class SVE2_CVT_VG2_Single_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>], + [IntrNoMem]>; // // Multi-vector fused multiply-add/subtract // @@ -4053,6 +4081,11 @@ def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_neon_famax : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic; +// SVE2.3/SME2.3 - Multi-vector narrowing convert to floating point + +def int_aarch64_sve_fcvtzsn: SVE2_CVT_VG2_Single_Intrinsic; +def int_aarch64_sve_fcvtzun: SVE2_CVT_VG2_Single_Intrinsic; + // // FP8 Intrinsics // diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 926593022b537..72a6f3bd49abe 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4776,14 +4776,14 @@ let Predicates = [HasSVE2p3_or_SME2p3] in { def UDOT_ZZZI_BtoH : sve_intx_dot_by_indexed_elem_x<0b1, "udot">; // SVE2 fp convert, narrow and interleave to integer, rounding toward zero - defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0>; - defm FCVTZUN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzun", 0b1>; + defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0, int_aarch64_sve_fcvtzsn>; + defm FCVTZUN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzun", 0b1, int_aarch64_sve_fcvtzun>; // SVE2 signed/unsigned integer convert to floating-point - defm SCVTF_ZZ : sve2_int_to_fp_upcvt<"scvtf", 0b00>; - defm SCVTFLT_ZZ : sve2_int_to_fp_upcvt<"scvtflt", 0b10>; - defm UCVTF_ZZ : sve2_int_to_fp_upcvt<"ucvtf", 0b01>; - defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11>; + defm SCVTF_ZZ : sve2_int_to_fp_upcvt<"scvtf", 0b00, "int_aarch64_sve_scvtfb">; + defm SCVTFLT_ZZ : sve2_int_to_fp_upcvt<"scvtflt", 0b10, "int_aarch64_sve_scvtflt">; + defm UCVTF_ZZ : sve2_int_to_fp_upcvt<"ucvtf", 0b01, "int_aarch64_sve_ucvtfb">; + defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11, "int_aarch64_sve_ucvtflt">; // SVE2 saturating shift right narrow by immediate and interleave defm SQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrn", 0b101>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 8a3f52090ab4c..0958b3b665e32 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -11423,10 +11423,17 @@ class sve2_fp_to_int_downcvt<string asm, ZPRRegOp ZdRC, RegisterOperand ZSrcOp, let Inst{4-0} = Zd; } -multiclass sve2_fp_to_int_downcvt<string asm, bit U> { +multiclass sve2_fp_to_int_downcvt<string asm, bit U, SDPatternOperator op> { def _HtoB : sve2_fp_to_int_downcvt<asm, ZPR8, ZZ_h_mul_r, 0b01, U>; def _StoH : sve2_fp_to_int_downcvt<asm, ZPR16, ZZ_s_mul_r, 0b10, U>; def _DtoS : sve2_fp_to_int_downcvt<asm, ZPR32, ZZ_d_mul_r, 0b11, U>; + + def : Pat<(nxv16i8 (op nxv8f16:$Zn1, nxv8f16:$Zn2)), + (!cast<Instruction>(NAME # _HtoB) (REG_SEQUENCE ZPR2Mul2, $Zn1, zsub0, $Zn2, zsub1))>; + def : Pat<(nxv8i16 (op nxv4f32:$Zn1, nxv4f32:$Zn2)), + (!cast<Instruction>(NAME # _StoH) (REG_SEQUENCE ZPR2Mul2, $Zn1, zsub0, $Zn2, zsub1))>; + def : Pat<(nxv4i32 (op nxv2f64:$Zn1, nxv2f64:$Zn2)), + (!cast<Instruction>(NAME # _DtoS) (REG_SEQUENCE ZPR2Mul2, $Zn1, zsub0, $Zn2, zsub1))>; } //===----------------------------------------------------------------------===// @@ -11446,8 +11453,12 @@ class sve2_int_to_fp_upcvt<string asm, ZPRRegOp ZdRC, ZPRRegOp ZnRC, let Inst{4-0} = Zd; } -multiclass sve2_int_to_fp_upcvt<string asm, bits<2> U> { +multiclass sve2_int_to_fp_upcvt<string asm, bits<2> U, string op> { def _BtoH : sve2_int_to_fp_upcvt<asm, ZPR16, ZPR8, 0b01, U>; def _HtoS : sve2_int_to_fp_upcvt<asm, ZPR32, ZPR16, 0b10, U>; def _StoD : sve2_int_to_fp_upcvt<asm, ZPR64, ZPR32, 0b11, U>; + + def : SVE_1_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # "_f16i8"), nxv16i8, !cast<Instruction>(NAME # _BtoH)>; + def : SVE_1_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # "_f32i16"), nxv8i16, !cast<Instruction>(NAME # _HtoS)>; + def : SVE_1_Op_Pat<nxv2f64, !cast<SDPatternOperator>(op # "_f64i32"), nxv4i32, !cast<Instruction>(NAME # _StoD)>; } diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll new file mode 100644 index 0000000000000..46778fc14b81f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll @@ -0,0 +1,255 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s --check-prefix=SVE2P3 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s --check-prefix=SME2P3 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | FileCheck %s --check-prefix=STR +; +; SVCVTB (SCVTFB / UCVTFB) +; + +define <vscale x 8 x half> @scvtfb_f16_i8(<vscale x 16 x i8> %zn) { +; SVE2P3-LABEL: scvtfb_f16_i8: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: scvtf z0.h, z0.b +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: scvtfb_f16_i8: +; SME2P3: // %bb.0: +; SME2P3-NEXT: scvtf z0.h, z0.b +; SME2P3-NEXT: ret +; +; STR-LABEL: scvtfb_f16_i8: +; STR: // %bb.0: +; STR-NEXT: scvtf z0.h, z0.b +; STR-NEXT: ret + %res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> %zn) + ret <vscale x 8 x half> %res +} + +define <vscale x 4 x float> @scvtfb_f32_i16(<vscale x 8 x i16> %zn) { +; SVE2P3-LABEL: scvtfb_f32_i16: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: scvtf z0.s, z0.h +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: scvtfb_f32_i16: +; SME2P3: // %bb.0: +; SME2P3-NEXT: scvtf z0.s, z0.h +; SME2P3-NEXT: ret +; +; STR-LABEL: scvtfb_f32_i16: +; STR: // %bb.0: +; STR-NEXT: scvtf z0.s, z0.h +; STR-NEXT: ret + %res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> %zn) + ret <vscale x 4 x float> %res +} + +define <vscale x 2 x double> @scvtfb_f64_i32(<vscale x 4 x i32> %zn) { +; SVE2P3-LABEL: scvtfb_f64_i32: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: scvtf z0.d, z0.s +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: scvtfb_f64_i32: +; SME2P3: // %bb.0: +; SME2P3-NEXT: scvtf z0.d, z0.s +; SME2P3-NEXT: ret +; +; STR-LABEL: scvtfb_f64_i32: +; STR: // %bb.0: +; STR-NEXT: scvtf z0.d, z0.s +; STR-NEXT: ret + %res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> %zn) + ret <vscale x 2 x double> %res +} + +define <vscale x 8 x half> @ucvtfb_f16_i8(<vscale x 16 x i8> %zn) { +; SVE2P3-LABEL: ucvtfb_f16_i8: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: ucvtf z0.h, z0.b +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: ucvtfb_f16_i8: +; SME2P3: // %bb.0: +; SME2P3-NEXT: ucvtf z0.h, z0.b +; SME2P3-NEXT: ret +; +; STR-LABEL: ucvtfb_f16_i8: +; STR: // %bb.0: +; STR-NEXT: ucvtf z0.h, z0.b +; STR-NEXT: ret + %res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> %zn) + ret <vscale x 8 x half> %res +} + +define <vscale x 4 x float> @ucvtfb_f32_i16(<vscale x 8 x i16> %zn) { +; SVE2P3-LABEL: ucvtfb_f32_i16: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: ucvtf z0.s, z0.h +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: ucvtfb_f32_i16: +; SME2P3: // %bb.0: +; SME2P3-NEXT: ucvtf z0.s, z0.h +; SME2P3-NEXT: ret +; +; STR-LABEL: ucvtfb_f32_i16: +; STR: // %bb.0: +; STR-NEXT: ucvtf z0.s, z0.h +; STR-NEXT: ret + %res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> %zn) + ret <vscale x 4 x float> %res +} + +define <vscale x 2 x double> @ucvtfb_f64_i32(<vscale x 4 x i32> %zn) { +; SVE2P3-LABEL: ucvtfb_f64_i32: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: ucvtf z0.d, z0.s +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: ucvtfb_f64_i32: +; SME2P3: // %bb.0: +; SME2P3-NEXT: ucvtf z0.d, z0.s +; SME2P3-NEXT: ret +; +; STR-LABEL: ucvtfb_f64_i32: +; STR: // %bb.0: +; STR-NEXT: ucvtf z0.d, z0.s +; STR-NEXT: ret + %res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> %zn) + ret <vscale x 2 x double> %res +} + +; +; SVCVTT (SCVTFLT / UCVTFLT) +; + +define <vscale x 8 x half> @scvtflt_f16_i8(<vscale x 16 x i8> %zn) { +; SVE2P3-LABEL: scvtflt_f16_i8: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: scvtflt z0.h, z0.b +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: scvtflt_f16_i8: +; SME2P3: // %bb.0: +; SME2P3-NEXT: scvtflt z0.h, z0.b +; SME2P3-NEXT: ret +; +; STR-LABEL: scvtflt_f16_i8: +; STR: // %bb.0: +; STR-NEXT: scvtflt z0.h, z0.b +; STR-NEXT: ret + %res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> %zn) + ret <vscale x 8 x half> %res +} + +define <vscale x 4 x float> @scvtflt_f32_i16(<vscale x 8 x i16> %zn) { +; SVE2P3-LABEL: scvtflt_f32_i16: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: scvtflt z0.s, z0.h +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: scvtflt_f32_i16: +; SME2P3: // %bb.0: +; SME2P3-NEXT: scvtflt z0.s, z0.h +; SME2P3-NEXT: ret +; +; STR-LABEL: scvtflt_f32_i16: +; STR: // %bb.0: +; STR-NEXT: scvtflt z0.s, z0.h +; STR-NEXT: ret + %res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> %zn) + ret <vscale x 4 x float> %res +} + +define <vscale x 2 x double> @scvtflt_f64_i32(<vscale x 4 x i32> %zn) { +; SVE2P3-LABEL: scvtflt_f64_i32: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: scvtflt z0.d, z0.s +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: scvtflt_f64_i32: +; SME2P3: // %bb.0: +; SME2P3-NEXT: scvtflt z0.d, z0.s +; SME2P3-NEXT: ret +; +; STR-LABEL: scvtflt_f64_i32: +; STR: // %bb.0: +; STR-NEXT: scvtflt z0.d, z0.s +; STR-NEXT: ret + %res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> %zn) + ret <vscale x 2 x double> %res +} + +define <vscale x 8 x half> @ucvtflt_f16_i8(<vscale x 16 x i8> %zn) { +; SVE2P3-LABEL: ucvtflt_f16_i8: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: ucvtflt z0.h, z0.b +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: ucvtflt_f16_i8: +; SME2P3: // %bb.0: +; SME2P3-NEXT: ucvtflt z0.h, z0.b +; SME2P3-NEXT: ret +; +; STR-LABEL: ucvtflt_f16_i8: +; STR: // %bb.0: +; STR-NEXT: ucvtflt z0.h, z0.b +; STR-NEXT: ret + %res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> %zn) + ret <vscale x 8 x half> %res +} + +define <vscale x 4 x float> @ucvtflt_f32_i16(<vscale x 8 x i16> %zn) { +; SVE2P3-LABEL: ucvtflt_f32_i16: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: ucvtflt z0.s, z0.h +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: ucvtflt_f32_i16: +; SME2P3: // %bb.0: +; SME2P3-NEXT: ucvtflt z0.s, z0.h +; SME2P3-NEXT: ret +; +; STR-LABEL: ucvtflt_f32_i16: +; STR: // %bb.0: +; STR-NEXT: ucvtflt z0.s, z0.h +; STR-NEXT: ret + %res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> %zn) + ret <vscale x 4 x float> %res +} + +define <vscale x 2 x double> @ucvtflt_f64_i32(<vscale x 4 x i32> %zn) { +; SVE2P3-LABEL: ucvtflt_f64_i32: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: ucvtflt z0.d, z0.s +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: ucvtflt_f64_i32: +; SME2P3: // %bb.0: +; SME2P3-NEXT: ucvtflt z0.d, z0.s +; SME2P3-NEXT: ret +; +; STR-LABEL: ucvtflt_f64_i32: +; STR: // %bb.0: +; STR-NEXT: ucvtflt z0.d, z0.s +; STR-NEXT: ret + %res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> %zn) + ret <vscale x 2 x double> %res +} + +declare <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8>) +declare <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16>) +declare <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32>) + +declare <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8>) +declare <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16>) +declare <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32>) + +declare <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8>) +declare <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16>) +declare <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32>) + +declare <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8>) +declare <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16>) +declare <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32>) diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll new file mode 100644 index 0000000000000..4c99a4c241318 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll @@ -0,0 +1,157 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s --check-prefix=SVE2P3 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s --check-prefix=SME2P3 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | FileCheck %s --check-prefix=STR +; +; FCVTZSN +; + +define <vscale x 16 x i8> @fcvtzsn_i8_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) { +; SVE2P3-LABEL: fcvtzsn_i8_f16: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: fcvtzsn z0.b, { z0.h, z1.h } +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: fcvtzsn_i8_f16: +; SME2P3: // %bb.0: +; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: fcvtzsn z0.b, { z0.h, z1.h } +; SME2P3-NEXT: ret +; +; STR-LABEL: fcvtzsn_i8_f16: +; STR: // %bb.0: +; STR-NEXT: fcvtzsn z0.b, { z0.h, z1.h } +; STR-NEXT: ret + %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.i8f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) + ret <vscale x 16 x i8> %res +} + +define <vscale x 8 x i16> @fcvtzsn_i16_f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) { +; SVE2P3-LABEL: fcvtzsn_i16_f32: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: fcvtzsn z0.h, { z0.s, z1.s } +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: fcvtzsn_i16_f32: +; SME2P3: // %bb.0: +; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: fcvtzsn z0.h, { z0.s, z1.s } +; SME2P3-NEXT: ret +; +; STR-LABEL: fcvtzsn_i16_f32: +; STR: // %bb.0: +; STR-NEXT: fcvtzsn z0.h, { z0.s, z1.s } +; STR-NEXT: ret + %res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.i16f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) + ret <vscale x 8 x i16> %res +} + +define <vscale x 4 x i32> @fcvtzsn_i32_f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) { +; SVE2P3-LABEL: fcvtzsn_i32_f64: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: fcvtzsn z0.s, { z0.d, z1.d } +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: fcvtzsn_i32_f64: +; SME2P3: // %bb.0: +; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: fcvtzsn z0.s, { z0.d, z1.d } +; SME2P3-NEXT: ret +; +; STR-LABEL: fcvtzsn_i32_f64: +; STR: // %bb.0: +; STR-NEXT: fcvtzsn z0.s, { z0.d, z1.d } +; STR-NEXT: ret + %res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.i32f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) + ret <vscale x 4 x i32> %res +} + +; +; FCVTZUN +; + +define <vscale x 16 x i8> @fcvtzun_i8_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) { +; SVE2P3-LABEL: fcvtzun_i8_f16: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: fcvtzun z0.b, { z0.h, z1.h } +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: fcvtzun_i8_f16: +; SME2P3: // %bb.0: +; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: fcvtzun z0.b, { z0.h, z1.h } +; SME2P3-NEXT: ret +; +; STR-LABEL: fcvtzun_i8_f16: +; STR: // %bb.0: +; STR-NEXT: fcvtzun z0.b, { z0.h, z1.h } +; STR-NEXT: ret + %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.i8f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) + ret <vscale x 16 x i8> %res +} + +define <vscale x 8 x i16> @fcvtzun_i16_f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) { +; SVE2P3-LABEL: fcvtzun_i16_f32: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: fcvtzun z0.h, { z0.s, z1.s } +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: fcvtzun_i16_f32: +; SME2P3: // %bb.0: +; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: fcvtzun z0.h, { z0.s, z1.s } +; SME2P3-NEXT: ret +; +; STR-LABEL: fcvtzun_i16_f32: +; STR: // %bb.0: +; STR-NEXT: fcvtzun z0.h, { z0.s, z1.s } +; STR-NEXT: ret + %res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.i16f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) + ret <vscale x 8 x i16> %res +} + +define <vscale x 4 x i32> @fcvtzun_i32_f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) { +; SVE2P3-LABEL: fcvtzun_i32_f64: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: fcvtzun z0.s, { z0.d, z1.d } +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: fcvtzun_i32_f64: +; SME2P3: // %bb.0: +; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: fcvtzun z0.s, { z0.d, z1.d } +; SME2P3-NEXT: ret +; +; STR-LABEL: fcvtzun_i32_f64: +; STR: // %bb.0: +; STR-NEXT: fcvtzun z0.s, { z0.d, z1.d } +; STR-NEXT: ret + %res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.i32f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) + ret <vscale x 4 x i32> %res +} + +declare <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.i8f16(<vscale x 8 x half>, <vscale x 8 x half>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.i16f32(<vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.i32f64(<vscale x 2 x double>, <vscale x 2 x double>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.i8f16(<vscale x 8 x half>, <vscale x 8 x half>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.i16f32(<vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.i32f64(<vscale x 2 x double>, <vscale x 2 x double>) >From 609c8fbb67451d8872a45d0efda13b5392d8f9b5 Mon Sep 17 00:00:00 2001 From: Martin Wehking <[email protected]> Date: Wed, 18 Mar 2026 10:18:50 +0000 Subject: [PATCH 2/2] Fix overload and address comments --- clang/include/clang/Basic/arm_sve.td | 36 +-- .../acle_sve2_fp_int_cvtn_x2.c | 49 ++-- ...e2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c | 121 +++++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 13 +- .../AArch64/sve2p3-intrinsics-fp-converts.ll | 237 ++++-------------- .../sve2p3-intrinsics-fp-converts_x2.ll | 19 +- 6 files changed, 233 insertions(+), 242 deletions(-) create mode 100644 clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 852cc60c6e0b3..c55a2d03f2037 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -999,29 +999,29 @@ def SVCVTLT_Z_F64_F32 : SInst<"svcvtlt_f64[_f32]", "dPh", "d", MergeZeroExp, "a } let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in { -def SVCVT_S8_F16 : SInst<"svcvt_s8[_f16_x2]", "d2.O", "c", MergeNone, "aarch64_sve_fcvtzsn", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; -def SVCVT_S16_F32 : SInst<"svcvt_s16[_f32_x2]", "d2.M", "s", MergeNone, "aarch64_sve_fcvtzsn", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; -def SVCVT_S32_F64 : SInst<"svcvt_s32[_f64_x2]", "d2.N", "i", MergeNone, "aarch64_sve_fcvtzsn", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; +def SVCVT_S8_F16 : SInst<"svcvt_s8[_f16_x2]", "d2.O", "c", MergeNone, "aarch64_sve_fcvtzsn_x2", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; +def SVCVT_S16_F32 : SInst<"svcvt_s16[_f32_x2]", "d2.M", "s", MergeNone, "aarch64_sve_fcvtzsn_x2", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; +def SVCVT_S32_F64 : SInst<"svcvt_s32[_f64_x2]", "d2.N", "i", MergeNone, "aarch64_sve_fcvtzsn_x2", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; -def SVCVT_U8_F16 : SInst<"svcvt_u8[_f16_x2]", "d2.O", "Uc", MergeNone, "aarch64_sve_fcvtzun", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; -def SVCVT_U16_F32 : SInst<"svcvt_u16[_f32_x2]", "d2.M", "Us", MergeNone, "aarch64_sve_fcvtzun", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; -def SVCVT_U32_F64 : SInst<"svcvt_u32[_f64_x2]", "d2.N", "Ui", MergeNone, "aarch64_sve_fcvtzun", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; +def SVCVT_U8_F16 : SInst<"svcvt_u8[_f16_x2]", "d2.O", "Uc", MergeNone, "aarch64_sve_fcvtzun_x2", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; +def SVCVT_U16_F32 : SInst<"svcvt_u16[_f32_x2]", "d2.M", "Us", MergeNone, "aarch64_sve_fcvtzun_x2", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; +def SVCVT_U32_F64 : SInst<"svcvt_u32[_f64_x2]", "d2.N", "Ui", MergeNone, "aarch64_sve_fcvtzun_x2", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; -def SVCVTT_F16_S8 : SInst<"svcvtt_f16[_s8]", "Od", "c", MergeNone, "aarch64_sve_scvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTT_F32_S16 : SInst<"svcvtt_f32[_s16]", "Md", "s", MergeNone, "aarch64_sve_scvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTT_F64_S32 : SInst<"svcvtt_f64[_s32]", "Nd", "i", MergeNone, "aarch64_sve_scvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTT_F16_S8 : SInst<"svcvtt_f16_s8", "Od", "c", MergeNone, "aarch64_sve_scvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTT_F32_S16 : SInst<"svcvtt_f32_s16", "Md", "s", MergeNone, "aarch64_sve_scvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTT_F64_S32 : SInst<"svcvtt_f64_s32", "Nd", "i", MergeNone, "aarch64_sve_scvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTT_F16_U8 : SInst<"svcvtt_f16[_u8]", "Od", "Uc", MergeNone, "aarch64_sve_ucvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTT_F32_U16 : SInst<"svcvtt_f32[_u16]", "Md", "Us", MergeNone, "aarch64_sve_ucvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTT_F64_U32 : SInst<"svcvtt_f64[_u32]", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTT_F16_U8 : SInst<"svcvtt_f16_u8", "Od", "Uc", MergeNone, "aarch64_sve_ucvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTT_F32_U16 : SInst<"svcvtt_f32_u16", "Md", "Us", MergeNone, "aarch64_sve_ucvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTT_F64_U32 : SInst<"svcvtt_f64_u32", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTB_F16_S8 : SInst<"svcvtb_f16[_s8]", "Od", "c", MergeNone, "aarch64_sve_scvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTB_F32_S16 : SInst<"svcvtb_f32[_s16]", "Md", "s", MergeNone, "aarch64_sve_scvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTB_F64_S32 : SInst<"svcvtb_f64[_s32]", "Nd", "i", MergeNone, "aarch64_sve_scvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTB_F16_S8 : SInst<"svcvtb_f16_s8", "Od", "c", MergeNone, "aarch64_sve_scvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTB_F32_S16 : SInst<"svcvtb_f32_s16", "Md", "s", MergeNone, "aarch64_sve_scvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTB_F64_S32 : SInst<"svcvtb_f64_s32", "Nd", "i", MergeNone, "aarch64_sve_scvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTB_F16_U8 : SInst<"svcvtb_f16[_u8]", "Od", "Uc", MergeNone, "aarch64_sve_ucvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTB_F32_U16 : SInst<"svcvtb_f32[_u16]", "Md", "Us", MergeNone, "aarch64_sve_ucvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTB_F64_U32 : SInst<"svcvtb_f64[_u32]", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTB_F16_U8 : SInst<"svcvtb_f16_u8", "Od", "Uc", MergeNone, "aarch64_sve_ucvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTB_F32_U16 : SInst<"svcvtb_f32_u16", "Md", "Us", MergeNone, "aarch64_sve_ucvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTB_F64_U32 : SInst<"svcvtb_f64_u32", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; } //////////////////////////////////////////////////////////////////////////////// diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c index a4a7c58e1ced9..e2cd71bd8b062 100644 --- a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c @@ -1,9 +1,13 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK - +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// // RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3\ // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s @@ -20,86 +24,93 @@ #define MODE_ATTR #endif +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + // CHECK-LABEL: @test_svcvt_s8_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.x2.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svcvt_s8_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.x2.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // svint8_t test_svcvt_s8_f16_x2(svfloat16x2_t zn) MODE_ATTR { - return svcvt_s8_f16_x2(zn); + return SVE_ACLE_FUNC(svcvt_s8,_f16_x2)(zn); } // CHECK-LABEL: @test_svcvt_s16_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.x2.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_s16_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.x2.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] // svint16_t test_svcvt_s16_f32_x2(svfloat32x2_t zn) MODE_ATTR { - return svcvt_s16_f32_x2(zn); + return SVE_ACLE_FUNC(svcvt_s16,_f32_x2)(zn); } // CHECK-LABEL: @test_svcvt_s32_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.x2.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_s32_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.x2.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] // svint32_t test_svcvt_s32_f64_x2(svfloat64x2_t zn) MODE_ATTR { - return svcvt_s32_f64_x2(zn); + return SVE_ACLE_FUNC(svcvt_s32,_f64_x2)(zn); } // CHECK-LABEL: @test_svcvt_u8_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.x2.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svcvt_u8_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.x2.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // svuint8_t test_svcvt_u8_f16_x2(svfloat16x2_t zn) MODE_ATTR { - return svcvt_u8_f16_x2(zn); + return SVE_ACLE_FUNC(svcvt_u8,_f16_x2)(zn); } // CHECK-LABEL: @test_svcvt_u16_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.x2.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_u16_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.x2.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] // svuint16_t test_svcvt_u16_f32_x2(svfloat32x2_t zn) MODE_ATTR { - return svcvt_u16_f32_x2(zn); + return SVE_ACLE_FUNC(svcvt_u16,_f32_x2)(zn); } // CHECK-LABEL: @test_svcvt_u32_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.x2.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_u32_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.x2.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] // svuint32_t test_svcvt_u32_f64_x2(svfloat64x2_t zn) MODE_ATTR { - return svcvt_u32_f64_x2(zn); + return SVE_ACLE_FUNC(svcvt_u32,_f64_x2)(zn); } diff --git a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c new file mode 100644 index 0000000000000..76290675e3b93 --- /dev/null +++ b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c @@ -0,0 +1,121 @@ +// NOTE: File has been autogenerated by utils/aarch64_builtins_test_generator.py +// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +sve2p3 -verify +// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3 -target-feature +sve -verify +// expected-no-diagnostics + +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +// Properties: guard="sve,(sve2p3|sme2p3)" streaming_guard="sme,(sve2p3|sme2p3)" flags="feature-dependent" + +void test(void) { + svfloat16x2_t svfloat16x2_t_val; + svfloat32x2_t svfloat32x2_t_val; + svfloat64x2_t svfloat64x2_t_val; + svint8_t svint8_t_val; + svint16_t svint16_t_val; + svint32_t svint32_t_val; + svuint8_t svuint8_t_val; + svuint16_t svuint16_t_val; + svuint32_t svuint32_t_val; + + svcvt_s8(svfloat16x2_t_val); + svcvt_s8_f16_x2(svfloat16x2_t_val); + svcvt_s16(svfloat32x2_t_val); + svcvt_s16_f32_x2(svfloat32x2_t_val); + svcvt_s32(svfloat64x2_t_val); + svcvt_s32_f64_x2(svfloat64x2_t_val); + svcvt_u8(svfloat16x2_t_val); + svcvt_u8_f16_x2(svfloat16x2_t_val); + svcvt_u16(svfloat32x2_t_val); + svcvt_u16_f32_x2(svfloat32x2_t_val); + svcvt_u32(svfloat64x2_t_val); + svcvt_u32_f64_x2(svfloat64x2_t_val); + svcvtb_f16_s8(svint8_t_val); + svcvtb_f16_u8(svuint8_t_val); + svcvtb_f32_s16(svint16_t_val); + svcvtb_f32_u16(svuint16_t_val); + svcvtb_f64_s32(svint32_t_val); + svcvtb_f64_u32(svuint32_t_val); + svcvtt_f16_s8(svint8_t_val); + svcvtt_f16_u8(svuint8_t_val); + svcvtt_f32_s16(svint16_t_val); + svcvtt_f32_u16(svuint16_t_val); + svcvtt_f64_s32(svint32_t_val); + svcvtt_f64_u32(svuint32_t_val); +} + +void test_streaming(void) __arm_streaming{ + svfloat16x2_t svfloat16x2_t_val; + svfloat32x2_t svfloat32x2_t_val; + svfloat64x2_t svfloat64x2_t_val; + svint8_t svint8_t_val; + svint16_t svint16_t_val; + svint32_t svint32_t_val; + svuint8_t svuint8_t_val; + svuint16_t svuint16_t_val; + svuint32_t svuint32_t_val; + + svcvt_s8(svfloat16x2_t_val); + svcvt_s8_f16_x2(svfloat16x2_t_val); + svcvt_s16(svfloat32x2_t_val); + svcvt_s16_f32_x2(svfloat32x2_t_val); + svcvt_s32(svfloat64x2_t_val); + svcvt_s32_f64_x2(svfloat64x2_t_val); + svcvt_u8(svfloat16x2_t_val); + svcvt_u8_f16_x2(svfloat16x2_t_val); + svcvt_u16(svfloat32x2_t_val); + svcvt_u16_f32_x2(svfloat32x2_t_val); + svcvt_u32(svfloat64x2_t_val); + svcvt_u32_f64_x2(svfloat64x2_t_val); + svcvtb_f16_s8(svint8_t_val); + svcvtb_f16_u8(svuint8_t_val); + svcvtb_f32_s16(svint16_t_val); + svcvtb_f32_u16(svuint16_t_val); + svcvtb_f64_s32(svint32_t_val); + svcvtb_f64_u32(svuint32_t_val); + svcvtt_f16_s8(svint8_t_val); + svcvtt_f16_u8(svuint8_t_val); + svcvtt_f32_s16(svint16_t_val); + svcvtt_f32_u16(svuint16_t_val); + svcvtt_f64_s32(svint32_t_val); + svcvtt_f64_u32(svuint32_t_val); +} + +void test_streaming_compatible(void) __arm_streaming_compatible{ + svfloat16x2_t svfloat16x2_t_val; + svfloat32x2_t svfloat32x2_t_val; + svfloat64x2_t svfloat64x2_t_val; + svint8_t svint8_t_val; + svint16_t svint16_t_val; + svint32_t svint32_t_val; + svuint8_t svuint8_t_val; + svuint16_t svuint16_t_val; + svuint32_t svuint32_t_val; + + svcvt_s8(svfloat16x2_t_val); + svcvt_s8_f16_x2(svfloat16x2_t_val); + svcvt_s16(svfloat32x2_t_val); + svcvt_s16_f32_x2(svfloat32x2_t_val); + svcvt_s32(svfloat64x2_t_val); + svcvt_s32_f64_x2(svfloat64x2_t_val); + svcvt_u8(svfloat16x2_t_val); + svcvt_u8_f16_x2(svfloat16x2_t_val); + svcvt_u16(svfloat32x2_t_val); + svcvt_u16_f32_x2(svfloat32x2_t_val); + svcvt_u32(svfloat64x2_t_val); + svcvt_u32_f64_x2(svfloat64x2_t_val); + svcvtb_f16_s8(svint8_t_val); + svcvtb_f16_u8(svuint8_t_val); + svcvtb_f32_s16(svint16_t_val); + svcvtb_f32_u16(svuint16_t_val); + svcvtb_f64_s32(svint32_t_val); + svcvtb_f64_u32(svuint32_t_val); + svcvtt_f16_s8(svint8_t_val); + svcvtt_f16_u8(svuint8_t_val); + svcvtt_f32_s16(svint16_t_val); + svcvtt_f32_u16(svuint16_t_val); + svcvtt_f64_s32(svint32_t_val); + svcvtt_f64_u32(svuint32_t_val); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index d9f7314740953..29520c17a3950 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3550,7 +3550,7 @@ let TargetPrefix = "aarch64" in { [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], [IntrNoMem]>; - class SVE2_CVT_VG2_Single_Intrinsic + class SVE2_CVT_VG2_Narrowing_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>], [IntrNoMem]>; @@ -4038,6 +4038,12 @@ let TargetPrefix = "aarch64" in { LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; + + // + // SVE2.3/SME2.3 - Multi-vector narrowing convert to floating point + // + def int_aarch64_sve_fcvtzsn_x2: SVE2_CVT_VG2_Narrowing_Intrinsic; + def int_aarch64_sve_fcvtzun_x2: SVE2_CVT_VG2_Narrowing_Intrinsic; } // SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2 @@ -4081,11 +4087,6 @@ def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_neon_famax : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic; -// SVE2.3/SME2.3 - Multi-vector narrowing convert to floating point - -def int_aarch64_sve_fcvtzsn: SVE2_CVT_VG2_Single_Intrinsic; -def int_aarch64_sve_fcvtzun: SVE2_CVT_VG2_Single_Intrinsic; - // // FP8 Intrinsics // diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll index 46778fc14b81f..b842571e1ef8e 100644 --- a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll +++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll @@ -1,121 +1,61 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s --check-prefix=SVE2P3 -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s --check-prefix=SME2P3 -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | FileCheck %s --check-prefix=STR +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | FileCheck %s ; ; SVCVTB (SCVTFB / UCVTFB) ; define <vscale x 8 x half> @scvtfb_f16_i8(<vscale x 16 x i8> %zn) { -; SVE2P3-LABEL: scvtfb_f16_i8: -; SVE2P3: // %bb.0: -; SVE2P3-NEXT: scvtf z0.h, z0.b -; SVE2P3-NEXT: ret -; -; SME2P3-LABEL: scvtfb_f16_i8: -; SME2P3: // %bb.0: -; SME2P3-NEXT: scvtf z0.h, z0.b -; SME2P3-NEXT: ret -; -; STR-LABEL: scvtfb_f16_i8: -; STR: // %bb.0: -; STR-NEXT: scvtf z0.h, z0.b -; STR-NEXT: ret +; CHECK-LABEL: scvtfb_f16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf z0.h, z0.b +; CHECK-NEXT: ret %res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> %zn) ret <vscale x 8 x half> %res } define <vscale x 4 x float> @scvtfb_f32_i16(<vscale x 8 x i16> %zn) { -; SVE2P3-LABEL: scvtfb_f32_i16: -; SVE2P3: // %bb.0: -; SVE2P3-NEXT: scvtf z0.s, z0.h -; SVE2P3-NEXT: ret -; -; SME2P3-LABEL: scvtfb_f32_i16: -; SME2P3: // %bb.0: -; SME2P3-NEXT: scvtf z0.s, z0.h -; SME2P3-NEXT: ret -; -; STR-LABEL: scvtfb_f32_i16: -; STR: // %bb.0: -; STR-NEXT: scvtf z0.s, z0.h -; STR-NEXT: ret +; CHECK-LABEL: scvtfb_f32_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf z0.s, z0.h +; CHECK-NEXT: ret %res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> %zn) ret <vscale x 4 x float> %res } define <vscale x 2 x double> @scvtfb_f64_i32(<vscale x 4 x i32> %zn) { -; SVE2P3-LABEL: scvtfb_f64_i32: -; SVE2P3: // %bb.0: -; SVE2P3-NEXT: scvtf z0.d, z0.s -; SVE2P3-NEXT: ret -; -; SME2P3-LABEL: scvtfb_f64_i32: -; SME2P3: // %bb.0: -; SME2P3-NEXT: scvtf z0.d, z0.s -; SME2P3-NEXT: ret -; -; STR-LABEL: scvtfb_f64_i32: -; STR: // %bb.0: -; STR-NEXT: scvtf z0.d, z0.s -; STR-NEXT: ret +; CHECK-LABEL: scvtfb_f64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf z0.d, z0.s +; CHECK-NEXT: ret %res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> %zn) ret <vscale x 2 x double> %res } define <vscale x 8 x half> @ucvtfb_f16_i8(<vscale x 16 x i8> %zn) { -; SVE2P3-LABEL: ucvtfb_f16_i8: -; SVE2P3: // %bb.0: -; SVE2P3-NEXT: ucvtf z0.h, z0.b -; SVE2P3-NEXT: ret -; -; SME2P3-LABEL: ucvtfb_f16_i8: -; SME2P3: // %bb.0: -; SME2P3-NEXT: ucvtf z0.h, z0.b -; SME2P3-NEXT: ret -; -; STR-LABEL: ucvtfb_f16_i8: -; STR: // %bb.0: -; STR-NEXT: ucvtf z0.h, z0.b -; STR-NEXT: ret +; CHECK-LABEL: ucvtfb_f16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf z0.h, z0.b +; CHECK-NEXT: ret %res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> %zn) ret <vscale x 8 x half> %res } define <vscale x 4 x float> @ucvtfb_f32_i16(<vscale x 8 x i16> %zn) { -; SVE2P3-LABEL: ucvtfb_f32_i16: -; SVE2P3: // %bb.0: -; SVE2P3-NEXT: ucvtf z0.s, z0.h -; SVE2P3-NEXT: ret -; -; SME2P3-LABEL: ucvtfb_f32_i16: -; SME2P3: // %bb.0: -; SME2P3-NEXT: ucvtf z0.s, z0.h -; SME2P3-NEXT: ret -; -; STR-LABEL: ucvtfb_f32_i16: -; STR: // %bb.0: -; STR-NEXT: ucvtf z0.s, z0.h -; STR-NEXT: ret +; CHECK-LABEL: ucvtfb_f32_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf z0.s, z0.h +; CHECK-NEXT: ret %res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> %zn) ret <vscale x 4 x float> %res } define <vscale x 2 x double> @ucvtfb_f64_i32(<vscale x 4 x i32> %zn) { -; SVE2P3-LABEL: ucvtfb_f64_i32: -; SVE2P3: // %bb.0: -; SVE2P3-NEXT: ucvtf z0.d, z0.s -; SVE2P3-NEXT: ret -; -; SME2P3-LABEL: ucvtfb_f64_i32: -; SME2P3: // %bb.0: -; SME2P3-NEXT: ucvtf z0.d, z0.s -; SME2P3-NEXT: ret -; -; STR-LABEL: ucvtfb_f64_i32: -; STR: // %bb.0: -; STR-NEXT: ucvtf z0.d, z0.s -; STR-NEXT: ret +; CHECK-LABEL: ucvtfb_f64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf z0.d, z0.s +; CHECK-NEXT: ret %res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> %zn) ret <vscale x 2 x double> %res } @@ -125,131 +65,56 @@ define <vscale x 2 x double> @ucvtfb_f64_i32(<vscale x 4 x i32> %zn) { ; define <vscale x 8 x half> @scvtflt_f16_i8(<vscale x 16 x i8> %zn) { -; SVE2P3-LABEL: scvtflt_f16_i8: -; SVE2P3: // %bb.0: -; SVE2P3-NEXT: scvtflt z0.h, z0.b -; SVE2P3-NEXT: ret -; -; SME2P3-LABEL: scvtflt_f16_i8: -; SME2P3: // %bb.0: -; SME2P3-NEXT: scvtflt z0.h, z0.b -; SME2P3-NEXT: ret -; -; STR-LABEL: scvtflt_f16_i8: -; STR: // %bb.0: -; STR-NEXT: scvtflt z0.h, z0.b -; STR-NEXT: ret +; CHECK-LABEL: scvtflt_f16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtflt z0.h, z0.b +; CHECK-NEXT: ret %res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> %zn) ret <vscale x 8 x half> %res } define <vscale x 4 x float> @scvtflt_f32_i16(<vscale x 8 x i16> %zn) { -; SVE2P3-LABEL: scvtflt_f32_i16: -; SVE2P3: // %bb.0: -; SVE2P3-NEXT: scvtflt z0.s, z0.h -; SVE2P3-NEXT: ret -; -; SME2P3-LABEL: scvtflt_f32_i16: -; SME2P3: // %bb.0: -; SME2P3-NEXT: scvtflt z0.s, z0.h -; SME2P3-NEXT: ret -; -; STR-LABEL: scvtflt_f32_i16: -; STR: // %bb.0: -; STR-NEXT: scvtflt z0.s, z0.h -; STR-NEXT: ret +; CHECK-LABEL: scvtflt_f32_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtflt z0.s, z0.h +; CHECK-NEXT: ret %res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> %zn) ret <vscale x 4 x float> %res } define <vscale x 2 x double> @scvtflt_f64_i32(<vscale x 4 x i32> %zn) { -; SVE2P3-LABEL: scvtflt_f64_i32: -; SVE2P3: // %bb.0: -; SVE2P3-NEXT: scvtflt z0.d, z0.s -; SVE2P3-NEXT: ret -; -; SME2P3-LABEL: scvtflt_f64_i32: -; SME2P3: // %bb.0: -; SME2P3-NEXT: scvtflt z0.d, z0.s -; SME2P3-NEXT: ret -; -; STR-LABEL: scvtflt_f64_i32: -; STR: // %bb.0: -; STR-NEXT: scvtflt z0.d, z0.s -; STR-NEXT: ret +; CHECK-LABEL: scvtflt_f64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtflt z0.d, z0.s +; CHECK-NEXT: ret %res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> %zn) ret <vscale x 2 x double> %res } define <vscale x 8 x half> @ucvtflt_f16_i8(<vscale x 16 x i8> %zn) { -; SVE2P3-LABEL: ucvtflt_f16_i8: -; SVE2P3: // %bb.0: -; SVE2P3-NEXT: ucvtflt z0.h, z0.b -; SVE2P3-NEXT: ret -; -; SME2P3-LABEL: ucvtflt_f16_i8: -; SME2P3: // %bb.0: -; SME2P3-NEXT: ucvtflt z0.h, z0.b -; SME2P3-NEXT: ret -; -; STR-LABEL: ucvtflt_f16_i8: -; STR: // %bb.0: -; STR-NEXT: ucvtflt z0.h, z0.b -; STR-NEXT: ret +; CHECK-LABEL: ucvtflt_f16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtflt z0.h, z0.b +; CHECK-NEXT: ret %res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> %zn) ret <vscale x 8 x half> %res } define <vscale x 4 x float> @ucvtflt_f32_i16(<vscale x 8 x i16> %zn) { -; SVE2P3-LABEL: ucvtflt_f32_i16: -; SVE2P3: // %bb.0: -; SVE2P3-NEXT: ucvtflt z0.s, z0.h -; SVE2P3-NEXT: ret -; -; SME2P3-LABEL: ucvtflt_f32_i16: -; SME2P3: // %bb.0: -; SME2P3-NEXT: ucvtflt z0.s, z0.h -; SME2P3-NEXT: ret -; -; STR-LABEL: ucvtflt_f32_i16: -; STR: // %bb.0: -; STR-NEXT: ucvtflt z0.s, z0.h -; STR-NEXT: ret +; CHECK-LABEL: ucvtflt_f32_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtflt z0.s, z0.h +; CHECK-NEXT: ret %res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> %zn) ret <vscale x 4 x float> %res } define <vscale x 2 x double> @ucvtflt_f64_i32(<vscale x 4 x i32> %zn) { -; SVE2P3-LABEL: ucvtflt_f64_i32: -; SVE2P3: // %bb.0: -; SVE2P3-NEXT: ucvtflt z0.d, z0.s -; SVE2P3-NEXT: ret -; -; SME2P3-LABEL: ucvtflt_f64_i32: -; SME2P3: // %bb.0: -; SME2P3-NEXT: ucvtflt z0.d, z0.s -; SME2P3-NEXT: ret -; -; STR-LABEL: ucvtflt_f64_i32: -; STR: // %bb.0: -; STR-NEXT: ucvtflt z0.d, z0.s -; STR-NEXT: ret +; CHECK-LABEL: ucvtflt_f64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtflt z0.d, z0.s +; CHECK-NEXT: ret %res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> %zn) ret <vscale x 2 x double> %res } -declare <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8>) -declare <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16>) -declare <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32>) - -declare <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8>) -declare <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16>) -declare <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32>) - -declare <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8>) -declare <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16>) -declare <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32>) - -declare <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8>) -declare <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16>) -declare <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32>) diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll index 4c99a4c241318..121f8b87255f5 100644 --- a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll +++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll @@ -25,7 +25,7 @@ define <vscale x 16 x i8> @fcvtzsn_i8_f16(<vscale x 8 x half> %zn1, <vscale x 8 ; STR: // %bb.0: ; STR-NEXT: fcvtzsn z0.b, { z0.h, z1.h } ; STR-NEXT: ret - %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.i8f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) + %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.x2.i8f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) ret <vscale x 16 x i8> %res } @@ -48,7 +48,7 @@ define <vscale x 8 x i16> @fcvtzsn_i16_f32(<vscale x 4 x float> %zn1, <vscale x ; STR: // %bb.0: ; STR-NEXT: fcvtzsn z0.h, { z0.s, z1.s } ; STR-NEXT: ret - %res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.i16f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) + %res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.x2.i16f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) ret <vscale x 8 x i16> %res } @@ -71,7 +71,7 @@ define <vscale x 4 x i32> @fcvtzsn_i32_f64(<vscale x 2 x double> %zn1, <vscale x ; STR: // %bb.0: ; STR-NEXT: fcvtzsn z0.s, { z0.d, z1.d } ; STR-NEXT: ret - %res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.i32f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) + %res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.x2.i32f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) ret <vscale x 4 x i32> %res } @@ -98,7 +98,7 @@ define <vscale x 16 x i8> @fcvtzun_i8_f16(<vscale x 8 x half> %zn1, <vscale x 8 ; STR: // %bb.0: ; STR-NEXT: fcvtzun z0.b, { z0.h, z1.h } ; STR-NEXT: ret - %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.i8f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) + %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.x2.i8f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) ret <vscale x 16 x i8> %res } @@ -121,7 +121,7 @@ define <vscale x 8 x i16> @fcvtzun_i16_f32(<vscale x 4 x float> %zn1, <vscale x ; STR: // %bb.0: ; STR-NEXT: fcvtzun z0.h, { z0.s, z1.s } ; STR-NEXT: ret - %res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.i16f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) + %res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.x2.i16f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) ret <vscale x 8 x i16> %res } @@ -144,14 +144,7 @@ define <vscale x 4 x i32> @fcvtzun_i32_f64(<vscale x 2 x double> %zn1, <vscale x ; STR: // %bb.0: ; STR-NEXT: fcvtzun z0.s, { z0.d, z1.d } ; STR-NEXT: ret - %res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.i32f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) + %res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.x2.i32f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) ret <vscale x 4 x i32> %res } -declare <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.i8f16(<vscale x 8 x half>, <vscale x 8 x half>) -declare <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.i16f32(<vscale x 4 x float>, <vscale x 4 x float>) -declare <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.i32f64(<vscale x 2 x double>, <vscale x 2 x double>) - -declare <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.i8f16(<vscale x 8 x half>, <vscale x 8 x half>) -declare <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.i16f32(<vscale x 4 x float>, <vscale x 4 x float>) -declare <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.i32f64(<vscale x 2 x double>, <vscale x 2 x double>) _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
