[clang] [llvm] [Clang][LLVM][AArch64] Add support for FCVTXNT, FCVTLT, {B}FCVTNT int… (PR #170356)

via cfe-commits Tue, 02 Dec 2025 11:44:08 -0800

llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: None (CarolineConcatto)

<details>
<summary>Changes</summary>

…rinsics

This patch adds support in Clang for these assembly instructions FCVTXNT, 
FCVTLT, {B}FCVTNT
By implementing these prototypes:

// Variant is available for _f64_f32
svfloat32_t     svcvtlt_f32[_f16]_z     (svbool_t pg, svfloat16_t op);

// Variants are available for:
// _f32_f64, _bf16_f32
svfloat16_t     svcvtnt_f16[_f32]_z     (svfloat16_t even, svbool_t pg, 
svfloat32_t op);

svfloat32_t     svcvtxnt_f32[_f64]_z (svfloat32_t even, svbool_t pg, 
svfloat64_t op);

according to the ACLE[1]

[1] https://github.com/ARM-software/acle/pull/412

---
Full diff: https://github.com/llvm/llvm-project/pull/170356.diff


6 Files Affected:

- (modified) clang/include/clang/Basic/arm_sve.td (+12) 
- (added) clang/test/CodeGen/AArch64/sve2p2-intrinsics/acle_sve2_cvtnt.c (+138) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+6-2) 
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+4-4) 
- (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+13-4) 
- (added) llvm/test/CodeGen/AArch64/sve2p2-intrinsics-fp-converts.ll (+53) 


``````````diff
diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index d2b7b78b9970f..37438ff7ed8c3 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -968,6 +968,18 @@ def SVCVTXNT_F32_F64   : SInst<"svcvtxnt_f32[_f64]", 
"MMPd", "d", MergeOp1, "aar
 //  SVCVTXNT_X_F32_F64 : Implemented as macro by SveEmitter.cpp
 }
 
+let SVETargetGuard = "sve2p2|sme2p2", SMETargetGuard = "sve2p2|sme2p2" in {
+
+def SVCVTNT_Z_F16_F32  : SInst<"svcvtnt_f16[_f32]_z",  "hhPd", "f", MergeNone, 
"aarch64_sve_fcvtnt_z_f16f32", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTNT_Z_F32_F64  : SInst<"svcvtnt_f32[_f64]_z",  "hhPd", "d", MergeNone, 
"aarch64_sve_fcvtnt_z_f32f64", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTNT_Z_BF16_F32 : SInst<"svcvtnt_bf16[_f32]_z", "$$Pd", "f", MergeNone, 
"aarch64_sve_fcvtnt_z_bf16f32", [IsOverloadNone, VerifyRuntimeMode]>;
+
+def SVCVTXNT_Z_F32_F64 : SInst<"svcvtxnt_f32[_f64]_z", "MMPd", "d", MergeNone, 
"aarch64_sve_fcvtxnt_z_f32f64", [IsOverloadNone, VerifyRuntimeMode]>;
+
+def SVCVTLT_Z_F32_F16  : SInst<"svcvtlt_f32[_f16]", "dPh", "f", MergeZeroExp, 
"aarch64_sve_fcvtlt_f32f16", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTLT_Z_F64_F32  : SInst<"svcvtlt_f64[_f32]", "dPh", "d", MergeZeroExp, 
"aarch64_sve_fcvtlt_f64f32",  [IsOverloadNone, VerifyRuntimeMode]>;
+
+}
 
////////////////////////////////////////////////////////////////////////////////
 // Permutations and selection
 
diff --git a/clang/test/CodeGen/AArch64/sve2p2-intrinsics/acle_sve2_cvtnt.c 
b/clang/test/CodeGen/AArch64/sve2p2-intrinsics/acle_sve2_cvtnt.c
new file mode 100644
index 0000000000000..7a77fb9a86a7e
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p2-intrinsics/acle_sve2_cvtnt.c
@@ -0,0 +1,138 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-target-feature +sve2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-target-feature +sve2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck 
%s -check-prefix=CPP-CHECK
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sme2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sme2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme 
-target-feature +sme2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme 
-target-feature +sme2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck 
%s -check-prefix=CPP-CHECK
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2p2 -target-feature +sve2p2 \
+// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2p2 -target-feature +sve2p2 \
+// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+//
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
+// CHECK-LABEL: @test_svcvtnt_f16_f32_z(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.fcvtnt.z.f16f32(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale 
x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CPP-CHECK-LABEL: 
@_Z22test_svcvtnt_f16_f32_zu13__SVFloat16_tu10__SVBool_tu13__SVFloat32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.fcvtnt.z.f16f32(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale 
x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_svcvtnt_f16_f32_z(svfloat16_t inactive, svbool_t pg, 
svfloat32_t op) MODE_ATTR
+{
+  return SVE_ACLE_FUNC(svcvtnt_f16,_f32,_z,)(inactive, pg, op);
+}
+
+// CHECK-LABEL: @test_svcvtnt_bf16_f32_z(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.fcvtnt.z.bf16f32(<vscale x 8 x bfloat> [[INACTIVE:%.*]], 
<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
+//
+// CPP-CHECK-LABEL: 
@_Z23test_svcvtnt_bf16_f32_zu14__SVBfloat16_tu10__SVBool_tu13__SVFloat32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.fcvtnt.z.bf16f32(<vscale x 8 x bfloat> [[INACTIVE:%.*]], 
<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
+//
+svbfloat16_t test_svcvtnt_bf16_f32_z(svbfloat16_t inactive, svbool_t pg, 
svfloat32_t op) MODE_ATTR
+{
+  return SVE_ACLE_FUNC(svcvtnt_bf16,_f32,_z,)(inactive, pg, op);
+}
+
+// CHECK-LABEL: @test_svcvtnt_f32_f64_z(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.fcvtnt.z.f32f64(<vscale x 4 x float> [[INACTIVE:%.*]], 
<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CPP-CHECK-LABEL: 
@_Z22test_svcvtnt_f32_f64_zu13__SVFloat32_tu10__SVBool_tu13__SVFloat64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.fcvtnt.z.f32f64(<vscale x 4 x float> [[INACTIVE:%.*]], 
<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_svcvtnt_f32_f64_z(svfloat32_t inactive, svbool_t pg, 
svfloat64_t op) MODE_ATTR
+{
+  return SVE_ACLE_FUNC(svcvtnt_f32,_f64,_z,)(inactive, pg, op);
+}
+
+
+
+// CHECK-LABEL: @test_svcvtxnt_f32_f64_z(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.fcvtxnt.z.f32f64(<vscale x 4 x float> [[INACTIVE:%.*]], 
<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CPP-CHECK-LABEL: 
@_Z23test_svcvtxnt_f32_f64_zu13__SVFloat32_tu10__SVBool_tu13__SVFloat64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.fcvtxnt.z.f32f64(<vscale x 4 x float> [[INACTIVE:%.*]], 
<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_svcvtxnt_f32_f64_z(svfloat32_t inactive, svbool_t pg, 
svfloat64_t op) MODE_ATTR
+{
+  return SVE_ACLE_FUNC(svcvtxnt_f32,_f64,_z,)(inactive, pg, op);
+}
+
+// CHECK-LABEL: @test_svcvtlt_f32_f16_z(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> zeroinitializer, <vscale x 
4 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svcvtlt_f32_f16_zu10__SVBool_tu13__SVFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> zeroinitializer, <vscale x 
4 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_svcvtlt_f32_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
+{
+  return SVE_ACLE_FUNC(svcvtlt_f32,_f16,_z,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svcvtlt_f64_f32_z(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> zeroinitializer, <vscale 
x 2 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svcvtlt_f64_f32_zu10__SVBool_tu13__SVFloat32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> zeroinitializer, <vscale 
x 2 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_svcvtlt_f64_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
+{
+  return SVE_ACLE_FUNC(svcvtlt_f64,_f32,_z,)(pg, op);
+}
+
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td 
b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 77fdb8295faa8..f748dbc160a24 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2211,6 +2211,7 @@ def int_aarch64_sve_fcvtzs_i64f32   : 
Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1
 
 def int_aarch64_sve_fcvt_bf16f32_v2   : Builtin_SVCVT<llvm_nxv8bf16_ty, 
llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
 def int_aarch64_sve_fcvtnt_bf16f32_v2 : Builtin_SVCVT<llvm_nxv8bf16_ty, 
llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtnt_z_bf16f32  : Builtin_SVCVT<llvm_nxv8bf16_ty, 
llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
 
 def int_aarch64_sve_fcvtzu_i32f16   : Builtin_SVCVT<llvm_nxv4i32_ty, 
llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
 def int_aarch64_sve_fcvtzu_i32f64   : Builtin_SVCVT<llvm_nxv4i32_ty, 
llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
@@ -2228,10 +2229,13 @@ def int_aarch64_sve_fcvt_f64f32     : 
Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1
 def int_aarch64_sve_fcvtlt_f32f16   : Builtin_SVCVT<llvm_nxv4f32_ty, 
llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
 def int_aarch64_sve_fcvtlt_f64f32   : Builtin_SVCVT<llvm_nxv2f64_ty, 
llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
 def int_aarch64_sve_fcvtnt_f16f32   : Builtin_SVCVT<llvm_nxv8f16_ty, 
llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtnt_z_f16f32 : Builtin_SVCVT<llvm_nxv8f16_ty, 
llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
 def int_aarch64_sve_fcvtnt_f32f64   : Builtin_SVCVT<llvm_nxv4f32_ty, 
llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtnt_z_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, 
llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
 
-def int_aarch64_sve_fcvtx_f32f64    : Builtin_SVCVT<llvm_nxv4f32_ty, 
llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_fcvtxnt_f32f64  : Builtin_SVCVT<llvm_nxv4f32_ty, 
llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtx_f32f64     : Builtin_SVCVT<llvm_nxv4f32_ty, 
llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtxnt_f32f64   : Builtin_SVCVT<llvm_nxv4f32_ty, 
llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtxnt_z_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, 
llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
 
 def int_aarch64_sve_scvtf_f16i32    : Builtin_SVCVT<llvm_nxv8f16_ty, 
llvm_nxv4i1_ty, llvm_nxv4i32_ty>;
 def int_aarch64_sve_scvtf_f16i64    : Builtin_SVCVT<llvm_nxv8f16_ty, 
llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index e99b3f8ff07e0..bb42a6cc5679a 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4552,8 +4552,8 @@ let Predicates = [HasSVE2p2_or_SME2p2] in {
   defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt", "int_aarch64_sve_fcvt">;
 
   // SVE2p2 floating-point convert precision down (placing odd), zeroing 
predicate
-  defm FCVTNT_ZPzZ : sve2_fp_convert_down_narrow_z<"fcvtnt">;
-  def FCVTXNT_ZPzZ : sve2_fp_convert_precision<0b0010, 0b0, "fcvtxnt", ZPR32, 
ZPR64, /*destructive*/ true>;
+  defm FCVTNT_ZPzZ  : sve2_fp_convert_down_narrow_z<"fcvtnt", 
"int_aarch64_sve_fcvtnt_z">;
+  defm FCVTXNT_ZPzZ : sve_float_convert_top<"fcvtxnt", 
int_aarch64_sve_fcvtxnt_z_f32f64>;
   // Placing even
   defm FCVTX_ZPzZ  : sve_fp_z2op_p_zd<"fcvtx", int_aarch64_sve_fcvtx_f32f64>;
 
@@ -4561,8 +4561,8 @@ let Predicates = [HasSVE2p2_or_SME2p2] in {
   defm FCVTLT_ZPzZ : sve2_fp_convert_up_long_z<"fcvtlt", 
"int_aarch64_sve_fcvtlt">;
 
   // SVE2p2 floating-point convert single-to-bf (placing odd), zeroing 
predicate
-  def BFCVTNT_ZPzZ      : sve2_fp_convert_precision<0b1010, 0b0, "bfcvtnt", 
ZPR16, ZPR32, /*destructive*/ true>;
-  defm BFCVT_ZPzZ_StoH  : sve_fp_z2op_p_zd_bfcvt<"bfcvt", 
int_aarch64_sve_fcvt_bf16f32_v2>;
+  defm BFCVTNT_ZPzZ_StoH : sve_bfloat_convert_top<"bfcvtnt", 
int_aarch64_sve_fcvtnt_z_bf16f32, 0b0, true>;
+  defm BFCVT_ZPzZ_StoH   : sve_fp_z2op_p_zd_bfcvt<"bfcvt", 
int_aarch64_sve_fcvt_bf16f32_v2>;
 
   // Floating-point convert to integer, zeroing predicate
   defm FCVTZS_ZPzZ : sve_fp_z2op_p_zd_d<0b0, "fcvtzs", 
"int_aarch64_sve_fcvtzs", AArch64fcvtzs_mt>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td 
b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index c63ae8660cad2..25b0bebed0fec 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2907,9 +2907,12 @@ multiclass sve2_fp_convert_up_long_z<string asm, string 
op> {
   defm : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # 
_f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
 }
 
-multiclass sve2_fp_convert_down_narrow_z<string asm> {
+multiclass sve2_fp_convert_down_narrow_z<string asm, string op> {
   def _StoH : sve2_fp_convert_precision<0b1000, 0b0, asm,  ZPR16, ZPR32, 
/*destructive*/ true>;
   def _DtoS : sve2_fp_convert_precision<0b1110, 0b0, asm,  ZPR32, ZPR64, 
/*destructive*/ true>;
+
+  def : SVE_3_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, 
nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
+  def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, 
nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
 }
 
 
//===----------------------------------------------------------------------===//
@@ -9551,10 +9554,16 @@ multiclass sve_bfloat_convert<string asm, 
SDPatternOperator op, SDPatternOperato
   def : SVE_1_Op_Passthru_Round_Pat<nxv2bf16, ir_op, nxv2i1, nxv2f32, 
!cast<Instruction>(NAME)>;
 }
 
-multiclass sve_bfloat_convert_top<string asm,  SDPatternOperator op> {
-  def NAME : sve2_fp_convert_precision<0b1010, 0b1, asm, ZPR16, ZPR32>;
+multiclass sve_bfloat_convert_top<string asm, SDPatternOperator ir_op, bit op 
= true, bit destructive = op> {
+  def NAME : sve2_fp_convert_precision<0b1010, op, asm, ZPR16, ZPR32, 
destructive>;
 
-  def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, 
!cast<Instruction>(NAME)>;
+  def : SVE_3_Op_Pat<nxv8bf16, ir_op, nxv8bf16, nxv4i1, nxv4f32, 
!cast<Instruction>(NAME)>;
+}
+
+multiclass sve_float_convert_top<string asm, SDPatternOperator ir_op> {
+  def _StoD : sve2_fp_convert_precision<0b0010, 0b0, asm, ZPR32, ZPR64, 
/*destructive*/ true>;
+
+  def : SVE_3_Op_Pat<nxv4f32, ir_op, nxv4f32, nxv2i1, nxv2f64, 
!cast<Instruction>(NAME # _StoD)>;
 }
 
 
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/sve2p2-intrinsics-fp-converts.ll 
b/llvm/test/CodeGen/AArch64/sve2p2-intrinsics-fp-converts.ll
new file mode 100644
index 0000000000000..d55d72bcf7e2a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p2-intrinsics-fp-converts.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p2 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p2 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p2 -force-streaming < %s | 
FileCheck %s
+
+
+;FCVTNT, BFCVTNT
+define <vscale x 8 x half> @fcvtnt_f16_f32_z(<vscale x 8 x half> %even, 
<vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fcvtnt_f16_f32_z:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnt z0.h, p0/z, z1.s
+; CHECK-NEXT:    ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.z.f16f32(<vscale x 
8 x half> %even,
+                                                                   <vscale x 4 
x i1> %pg,
+                                                                    <vscale x 
4 x float> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x bfloat> @fcvtnt_bf16_f32_z(<vscale x 8 x bfloat> %even, 
<vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fcvtnt_bf16_f32_z:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfcvtnt z0.h, p0/z, z1.s
+; CHECK-NEXT:    ret
+  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.z.bf16f32(<vscale 
x 8 x bfloat> %even,
+                                                                       <vscale 
x 4 x i1> %pg,
+                                                                       <vscale 
x 4 x float> %b)
+  ret <vscale x 8 x bfloat> %out
+}
+
+define <vscale x 4 x float> @fcvtnt_f32_f64_z(<vscale x 4 x float> %even, 
<vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fcvtnt_f32_f64_z:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnt z0.s, p0/z, z1.d
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.z.f32f64(<vscale x 
4 x float> %even,
+                                                                    <vscale x 
2 x i1> %pg,
+                                                                     <vscale x 
2 x double> %b)
+  ret <vscale x 4 x float> %out
+}
+
+;FCVTXNT
+
+
+define <vscale x 4 x float> @fcvtxnt_f32_f64_z(<vscale x 4 x float> %a, 
<vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fcvtxnt_f32_f64_z:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtxnt z0.s, p0/z, z1.d
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.z.f32f64(<vscale 
x 4 x float> %a,
+                                                                      <vscale 
x 2 x i1> %pg,
+                                                                      <vscale 
x 2 x double> %b)
+  ret <vscale x 4 x float> %out
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/170356
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [Clang][LLVM][AArch64] Add support for FCVTXNT, FCVTLT, {B}FCVTNT int… (PR #170356)

Reply via email to