[clang] [llvm] [AArch64] Add 9.7 CVT data processing intrinsics (PR #186807)

Martin Wehking via cfe-commits Wed, 20 May 2026 07:00:12 -0700

https://github.com/MartinWehking updated 
https://github.com/llvm/llvm-project/pull/186807


>From 59676e350f96f414271588c300d40f099ca9ca11 Mon Sep 17 00:00:00 2001
From: Martin Wehking <[email protected]>
Date: Mon, 16 Mar 2026 11:01:10 +0000
Subject: [PATCH 1/2] [AArch64] Add 9.7 data processing intrinsics

Add Clang/LLVM intrinsics for svcvt, scvtflt, ucvtf, ucvtflt and fcvtzsn,
fcvtzun.
The Clang intrinsics are guarded by the sve2.3 and sme2.3 feature flags.

ACLE Patch:
https://github.com/ARM-software/acle/pull/428

Fix overload and address comments

Fix intrinsic name and simplify CHECK lines

Reintroduce overloaded short forms for intrinsics

Adapt the test cases accordingly.

Rename ACLE clang intrinsic

A clang intrinsic was renamed in the ACLE patch.
Change the name accordingly.

Use existing pattern template

Apply suggestions

Apply suggestions
---
 clang/include/clang/Basic/arm_sve.td          |  22 ++
 clang/include/clang/Basic/arm_sve_sme_incl.td |   1 +
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp      |   3 +-
 .../acle_sve2_fp_int_cvtn_x2.c                | 113 ++++++++++
 .../sve2p3-intrinsics/acle_sve2_int_fp_cvt.c  | 197 ++++++++++++++++++
 clang/utils/TableGen/SveEmitter.cpp           |   4 +
 llvm/include/llvm/IR/IntrinsicsAArch64.td     |  30 +++
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  12 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td    |   2 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td    |  16 +-
 .../AArch64/sve2p3-intrinsics-fp-converts.ll  | 120 +++++++++++
 .../sve2p3-intrinsics-fp-converts_x2.ll       | 108 ++++++++++
 12 files changed, 615 insertions(+), 13 deletions(-)
 create mode 100644 
clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c
 create mode 100644 
clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c
 create mode 100644 llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll
 create mode 100644 
llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index ac9f9af30fce7..4e67d46ae8ce9 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1001,6 +1001,28 @@ def SVCVTLT_Z_F32_F16  : SInst<"svcvtlt_f32[_f16]", 
"dPh", "f", MergeZeroExp, "a
 def SVCVTLT_Z_F64_F32  : SInst<"svcvtlt_f64[_f32]", "dPh", "d", MergeZeroExp, 
"aarch64_sve_fcvtlt_f64f32",  [IsOverloadNone, VerifyRuntimeMode]>;
 
 }
+
+let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in {
+def SVCVTZN_S : SInst<"svcvtzn_{0}[_{1}_x2]", "y2.d", "hfd", MergeNone, 
"aarch64_sve_fcvtzsn_x2", [IsReductionQV, VerifyRuntimeMode]>;
+def SVCVTZN_U : SInst<"svcvtzn_{0}[_{1}_x2]", "e2.d", "hfd", MergeNone, 
"aarch64_sve_fcvtzun_x2", [IsReductionQV, VerifyRuntimeMode]>;
+
+def SVCVTT_F16_S8  : SInst<"svcvtt_f16[_s8]",  "Od", "c", MergeNone, 
"aarch64_sve_scvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F32_S16  : SInst<"svcvtt_f32[_s16]",  "Md", "s", MergeNone, 
"aarch64_sve_scvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F64_S32  : SInst<"svcvtt_f64[_s32]",  "Nd", "i", MergeNone, 
"aarch64_sve_scvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
+
+def SVCVTT_F16_U8  : SInst<"svcvtt_f16[_u8]",  "Od", "Uc", MergeNone, 
"aarch64_sve_ucvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F32_U16  : SInst<"svcvtt_f32[_u16]",  "Md", "Us", MergeNone, 
"aarch64_sve_ucvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F64_U32  : SInst<"svcvtt_f64[_u32]",  "Nd", "Ui", MergeNone, 
"aarch64_sve_ucvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
+
+def SVCVTB_F16_S8  : SInst<"svcvtb_f16[_s8]",  "Od", "c", MergeNone, 
"aarch64_sve_scvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F32_S16  : SInst<"svcvtb_f32[_s16]",  "Md", "s", MergeNone, 
"aarch64_sve_scvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F64_S32  : SInst<"svcvtb_f64[_s32]",  "Nd", "i", MergeNone, 
"aarch64_sve_scvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
+
+def SVCVTB_F16_U8  : SInst<"svcvtb_f16[_u8]",  "Od", "Uc", MergeNone, 
"aarch64_sve_ucvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F32_U16  : SInst<"svcvtb_f32[_u16]",  "Md", "Us", MergeNone, 
"aarch64_sve_ucvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F64_U32  : SInst<"svcvtb_f64[_u32]",  "Nd", "Ui", MergeNone, 
"aarch64_sve_ucvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
+}
+
 
////////////////////////////////////////////////////////////////////////////////
 // Permutations and selection
 
diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td 
b/clang/include/clang/Basic/arm_sve_sme_incl.td
index 7e60e87b12a4d..acc5159608b06 100644
--- a/clang/include/clang/Basic/arm_sve_sme_incl.td
+++ b/clang/include/clang/Basic/arm_sve_sme_incl.td
@@ -78,6 +78,7 @@ include "arm_immcheck_incl.td"
 // R: scalar of 1/2 width element type (splat to vector type)
 // r: scalar of 1/4 width element type (splat to vector type)
 // @: unsigned scalar of 1/4 width element type (splat to vector type)
+// y: 1/2 width signed elements, 2x element count
 // e: 1/2 width unsigned elements, 2x element count
 // b: 1/4 width unsigned elements, 4x element count
 // h: 1/2 width elements, 2x element count
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index f8990ced2a577..8d5731c473bb1 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -3942,8 +3942,7 @@ CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags 
&TypeFlags,
   if (TypeFlags.isOverloadFirstandLast())
     return {Ops[0]->getType(), Ops.back()->getType()};
 
-  if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
-      ResultType->isVectorTy())
+  if (TypeFlags.isReductionQV())
     return {ResultType, Ops[1]->getType()};
 
   assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
diff --git 
a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c 
b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c
new file mode 100644
index 0000000000000..ded4f3a02d2a0
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c
@@ -0,0 +1,113 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck 
%s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme 
-target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme 
-target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck 
%s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2p3\
+// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2p3\
+// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+//
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1
+#else
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
+#endif
+
+// CHECK-LABEL: @test_svcvtzn_s8_f16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.fcvtzsn.x2.nxv16i8.nxv8f16(<vscale x 8 x half> 
[[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svcvtzn_s8_f16_x213svfloat16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.fcvtzsn.x2.nxv16i8.nxv8f16(<vscale x 8 x half> 
[[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svcvtzn_s8_f16_x2(svfloat16x2_t zn) MODE_ATTR {
+  return SVE_ACLE_FUNC(svcvtzn_s8,_f16_x2)(zn);
+}
+
+// CHECK-LABEL: @test_svcvtzn_s16_f32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.fcvtzsn.x2.nxv8i16.nxv4f32(<vscale x 4 x float> 
[[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z23test_svcvtzn_s16_f32_x213svfloat32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.fcvtzsn.x2.nxv8i16.nxv4f32(<vscale x 4 x float> 
[[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svcvtzn_s16_f32_x2(svfloat32x2_t zn) MODE_ATTR {
+  return SVE_ACLE_FUNC(svcvtzn_s16,_f32_x2)(zn);
+}
+
+// CHECK-LABEL: @test_svcvtzn_s32_f64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> 
@llvm.aarch64.sve.fcvtzsn.x2.nxv4i32.nxv2f64(<vscale x 2 x double> 
[[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z23test_svcvtzn_s32_f64_x213svfloat64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> 
@llvm.aarch64.sve.fcvtzsn.x2.nxv4i32.nxv2f64(<vscale x 2 x double> 
[[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svcvtzn_s32_f64_x2(svfloat64x2_t zn) MODE_ATTR {
+  return SVE_ACLE_FUNC(svcvtzn_s32,_f64_x2)(zn);
+}
+
+// CHECK-LABEL: @test_svcvtzn_u8_f16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.fcvtzun.x2.nxv16i8.nxv8f16(<vscale x 8 x half> 
[[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svcvtzn_u8_f16_x213svfloat16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.fcvtzun.x2.nxv16i8.nxv8f16(<vscale x 8 x half> 
[[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svcvtzn_u8_f16_x2(svfloat16x2_t zn) MODE_ATTR {
+  return SVE_ACLE_FUNC(svcvtzn_u8,_f16_x2)(zn);
+}
+
+// CHECK-LABEL: @test_svcvtzn_u16_f32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.fcvtzun.x2.nxv8i16.nxv4f32(<vscale x 4 x float> 
[[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z23test_svcvtzn_u16_f32_x213svfloat32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.fcvtzun.x2.nxv8i16.nxv4f32(<vscale x 4 x float> 
[[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svcvtzn_u16_f32_x2(svfloat32x2_t zn) MODE_ATTR {
+  return SVE_ACLE_FUNC(svcvtzn_u16,_f32_x2)(zn);
+}
+
+// CHECK-LABEL: @test_svcvtzn_u32_f64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> 
@llvm.aarch64.sve.fcvtzun.x2.nxv4i32.nxv2f64(<vscale x 2 x double> 
[[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z23test_svcvtzn_u32_f64_x213svfloat64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> 
@llvm.aarch64.sve.fcvtzun.x2.nxv4i32.nxv2f64(<vscale x 2 x double> 
[[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svcvtzn_u32_f64_x2(svfloat64x2_t zn) MODE_ATTR {
+  return SVE_ACLE_FUNC(svcvtzn_u32,_f64_x2)(zn);
+}
diff --git 
a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c 
b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c
new file mode 100644
index 0000000000000..26e077d05c28b
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c
@@ -0,0 +1,197 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck 
%s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme 
-target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme 
-target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck 
%s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2p3\
+// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2p3\
+// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+//
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1
+#else
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
+#endif
+
+// CHECK-LABEL: @test_svcvtb_f16_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcvtb_f16_s8u10__SVInt8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svcvtb_f16_s8(svint8_t zn) MODE_ATTR {
+  return SVE_ACLE_FUNC(svcvtb_f16,_s8)(zn);
+}
+
+// CHECK-LABEL: @test_svcvtb_f32_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svcvtb_f32_s16u11__SVInt16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svcvtb_f32_s16(svint16_t zn) MODE_ATTR {
+  return SVE_ACLE_FUNC(svcvtb_f32,_s16)(zn);
+}
+
+// CHECK-LABEL: @test_svcvtb_f64_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svcvtb_f64_s32u11__SVInt32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svcvtb_f64_s32(svint32_t zn) MODE_ATTR {
+  return SVE_ACLE_FUNC(svcvtb_f64,_s32)(zn);
+}
+
+// CHECK-LABEL: @test_svcvtb_f16_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcvtb_f16_u8u11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svcvtb_f16_u8(svuint8_t zn) MODE_ATTR {
+  return SVE_ACLE_FUNC(svcvtb_f16,_u8)(zn);
+}
+
+// CHECK-LABEL: @test_svcvtb_f32_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svcvtb_f32_u16u12__SVUint16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svcvtb_f32_u16(svuint16_t zn) MODE_ATTR {
+  return SVE_ACLE_FUNC(svcvtb_f32,_u16)(zn);
+}
+
+// CHECK-LABEL: @test_svcvtb_f64_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svcvtb_f64_u32u12__SVUint32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svcvtb_f64_u32(svuint32_t zn) MODE_ATTR {
+  return SVE_ACLE_FUNC(svcvtb_f64,_u32)(zn);
+}
+
+// CHECK-LABEL: @test_svcvt_f16_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svcvt_f16_s8u10__SVInt8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svcvt_f16_s8(svint8_t zn) MODE_ATTR {
+  return SVE_ACLE_FUNC(svcvtt_f16,_s8)(zn);
+}
+
+// CHECK-LABEL: @test_svcvt_f32_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcvt_f32_s16u11__SVInt16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svcvt_f32_s16(svint16_t zn) MODE_ATTR {
+  return SVE_ACLE_FUNC(svcvtt_f32,_s16)(zn);
+}
+
+// CHECK-LABEL: @test_svcvt_f64_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcvt_f64_s32u11__SVInt32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svcvt_f64_s32(svint32_t zn) MODE_ATTR {
+  return SVE_ACLE_FUNC(svcvtt_f64,_s32)(zn);
+}
+
+// CHECK-LABEL: @test_svcvt_f16_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svcvt_f16_u8u11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svcvt_f16_u8(svuint8_t zn) MODE_ATTR {
+  return SVE_ACLE_FUNC(svcvtt_f16,_u8)(zn);
+}
+
+// CHECK-LABEL: @test_svcvt_f32_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcvt_f32_u16u12__SVUint16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svcvt_f32_u16(svuint16_t zn) MODE_ATTR {
+  return SVE_ACLE_FUNC(svcvtt_f32,_u16)(zn);
+}
+
+// CHECK-LABEL: @test_svcvt_f64_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcvt_f64_u32u12__SVUint32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svcvt_f64_u32(svuint32_t zn) MODE_ATTR {
+  return SVE_ACLE_FUNC(svcvtt_f64,_u32)(zn);
+}
diff --git a/clang/utils/TableGen/SveEmitter.cpp 
b/clang/utils/TableGen/SveEmitter.cpp
index accb7b240288f..0f770a3202538 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -697,6 +697,10 @@ void SVEType::applyModifier(char Mod) {
     Kind = UInt;
     ElementBitwidth /= 2;
     break;
+  case 'y':
+    Kind = SInt;
+    ElementBitwidth /= 2;
+    break;
   case 'h':
     ElementBitwidth /= 2;
     break;
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td 
b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 48c38fb2c2c9f..0c9be7f5fa9bc 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1048,6 +1048,7 @@ def llvm_nxv4i1_ty  : LLVMType<nxv4i1>;
 def llvm_nxv8i1_ty  : LLVMType<nxv8i1>;
 def llvm_nxv16i1_ty : LLVMType<nxv16i1>;
 def llvm_nxv16i8_ty : LLVMType<nxv16i8>;
+def llvm_nxv8i16_ty : LLVMType<nxv8i16>;
 def llvm_nxv4i32_ty : LLVMType<nxv4i32>;
 def llvm_nxv2i64_ty : LLVMType<nxv2i64>;
 def llvm_nxv8f16_ty : LLVMType<nxv8f16>;
@@ -2613,6 +2614,29 @@ def int_aarch64_sve_fmlslb_lane   : 
SVE2_3VectorArgIndexed_Long_Intrinsic;
 def int_aarch64_sve_fmlslt        : SVE2_3VectorArg_Long_Intrinsic;
 def int_aarch64_sve_fmlslt_lane   : SVE2_3VectorArgIndexed_Long_Intrinsic;
 
+//
+// SVE2 - Multi-vector narrowing convert to floating point
+//
+
+class Builtin_SVCVT_UNPRED<LLVMType OUT, LLVMType IN>
+    : DefaultAttrsIntrinsic<[OUT], [IN], [IntrNoMem]>;
+
+def int_aarch64_sve_scvtfb_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, 
llvm_nxv16i8_ty>;
+def int_aarch64_sve_scvtfb_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, 
llvm_nxv8i16_ty>;
+def int_aarch64_sve_scvtfb_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, 
llvm_nxv4i32_ty>;
+
+def int_aarch64_sve_scvtflt_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, 
llvm_nxv16i8_ty>;
+def int_aarch64_sve_scvtflt_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, 
llvm_nxv8i16_ty>;
+def int_aarch64_sve_scvtflt_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, 
llvm_nxv4i32_ty>;
+
+def int_aarch64_sve_ucvtfb_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, 
llvm_nxv16i8_ty>;
+def int_aarch64_sve_ucvtfb_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, 
llvm_nxv8i16_ty>;
+def int_aarch64_sve_ucvtfb_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, 
llvm_nxv4i32_ty>;
+
+def int_aarch64_sve_ucvtflt_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, 
llvm_nxv16i8_ty>;
+def int_aarch64_sve_ucvtflt_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, 
llvm_nxv8i16_ty>;
+def int_aarch64_sve_ucvtflt_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, 
llvm_nxv4i32_ty>;
+
 //
 // SVE2 - Floating-point integer binary logarithm
 //
@@ -4018,6 +4042,12 @@ let TargetPrefix = "aarch64" in {
                              LLVMVectorOfBitcastsToInt<0>, 
LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, 
LLVMVectorOfBitcastsToInt<0>],
                             [IntrNoMem]>;
 
+
+   //
+   // SVE2.3/SME2.3 - Multi-vector narrowing convert to floating point
+   //
+   def int_aarch64_sve_fcvtzsn_x2: AdvSIMD_2Arg_FloatCompare_Intrinsic;
+   def int_aarch64_sve_fcvtzun_x2: AdvSIMD_2Arg_FloatCompare_Intrinsic;
 }
 
 // SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 9df77f8e93c64..c95e0fbb301e3 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4858,14 +4858,14 @@ let Predicates = [HasSVE2p3_or_SME2p3] in {
   defm UDOT_ZZZI_BtoH : sve2p3_two_way_dot_vvi<"udot", 0b1, 
int_aarch64_sve_udot_lane_x2>;
 
   // SVE2 fp convert, narrow and interleave to integer, rounding toward zero
-  defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0>;
-  defm FCVTZUN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzun", 0b1>;
+  defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0, 
int_aarch64_sve_fcvtzsn_x2>;
+  defm FCVTZUN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzun", 0b1, 
int_aarch64_sve_fcvtzun_x2>;
 
   // SVE2 signed/unsigned integer convert to floating-point
-  defm SCVTF_ZZ   : sve2_int_to_fp_upcvt<"scvtf",   0b00>;
-  defm SCVTFLT_ZZ : sve2_int_to_fp_upcvt<"scvtflt", 0b10>;
-  defm UCVTF_ZZ   : sve2_int_to_fp_upcvt<"ucvtf",   0b01>;
-  defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11>;
+  defm SCVTF_ZZ   : sve2_int_to_fp_upcvt<"scvtf",   0b00, 
"int_aarch64_sve_scvtfb">;
+  defm SCVTFLT_ZZ : sve2_int_to_fp_upcvt<"scvtflt", 0b10, 
"int_aarch64_sve_scvtflt">;
+  defm UCVTF_ZZ   : sve2_int_to_fp_upcvt<"ucvtf",   0b01, 
"int_aarch64_sve_ucvtfb">;
+  defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11, 
"int_aarch64_sve_ucvtflt">;
 
   // SVE2 saturating shift right narrow by immediate and interleave
   defm SQRSHRN_Z2ZI_HtoB  : sve_multi_vec_round_shift_narrow<"sqrshrn",  
0b101, int_aarch64_sve_sqrshrn_x2>;
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td 
b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 99836aeed7c0a..771c4c1fb2b6e 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -2558,7 +2558,7 @@ class sme2_cvt_vg2_single<string mnemonic, bits<5> op,
 multiclass sme2_cvt_vg2_single<string mnemonic, bits<5> op, ValueType out_vt,
                                ValueType in_vt, SDPatternOperator intrinsic> {
   def NAME :  sme2_cvt_vg2_single<mnemonic, op, ZPR16, ZZ_s_mul_r>;
-  def : SVE2p1_Cvt_VG2_Pat<NAME, intrinsic, out_vt, in_vt>;
+  def : SVE_Cvt_VG2_Pat<NAME, intrinsic, out_vt, in_vt>;
 }
 
 // SME2 multi-vec FP8 down convert two registers
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td 
b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 106986a64ffba..18b9008ee314c 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -724,7 +724,7 @@ class SVE_Sat_Shift_VG2_Pat<string name, SDPatternOperator 
intrinsic, ValueType
     : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))),
                   (!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, 
in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>;
 
-class SVE2p1_Cvt_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType 
out_vt, ValueType in_vt>
+class SVE_Cvt_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType 
out_vt, ValueType in_vt>
     : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2)),
                   (!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, 
in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1))>;
 
@@ -10174,7 +10174,7 @@ class sve2p1_multi_vec_extract_narrow<string mnemonic, 
bits<2> opc, bits<3> tsz>
 
 multiclass sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, 
SDPatternOperator intrinsic> {
   def NAME : sve2p1_multi_vec_extract_narrow<mnemonic, opc, 0b010>;
-  def : SVE2p1_Cvt_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32>;
+  def : SVE_Cvt_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32>;
 }
 
 // SVE2 multi-vec shift narrow
@@ -11485,10 +11485,14 @@ class sve2_fp_to_int_downcvt<string asm, ZPRRegOp 
ZdRC, RegisterOperand ZSrcOp,
   let Inst{4-0}   = Zd;
 }
 
-multiclass sve2_fp_to_int_downcvt<string asm, bit U> {
+multiclass sve2_fp_to_int_downcvt<string asm, bit U, SDPatternOperator op> {
   def _HtoB : sve2_fp_to_int_downcvt<asm, ZPR8,  ZZ_h_mul_r, 0b01, U>;
   def _StoH : sve2_fp_to_int_downcvt<asm, ZPR16, ZZ_s_mul_r, 0b10, U>;
   def _DtoS : sve2_fp_to_int_downcvt<asm, ZPR32, ZZ_d_mul_r, 0b11, U>;
+
+  def: SVE_Cvt_VG2_Pat<NAME # _HtoB, op, nxv16i8, nxv8f16>;
+  def: SVE_Cvt_VG2_Pat<NAME # _StoH, op, nxv8i16, nxv4f32>;
+  def: SVE_Cvt_VG2_Pat<NAME # _DtoS, op, nxv4i32, nxv2f64>;
 }
 
 
//===----------------------------------------------------------------------===//
@@ -11508,8 +11512,12 @@ class sve2_int_to_fp_upcvt<string asm, ZPRRegOp ZdRC, 
ZPRRegOp ZnRC,
   let Inst{4-0}   = Zd;
 }
 
-multiclass sve2_int_to_fp_upcvt<string asm, bits<2> U> {
+multiclass sve2_int_to_fp_upcvt<string asm, bits<2> U, string op> {
   def _BtoH : sve2_int_to_fp_upcvt<asm, ZPR16, ZPR8,  0b01, U>;
   def _HtoS : sve2_int_to_fp_upcvt<asm, ZPR32, ZPR16, 0b10, U>;
   def _StoD : sve2_int_to_fp_upcvt<asm, ZPR64, ZPR32, 0b11, U>;
+
+  def : SVE_1_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # "_f16i8"), 
nxv16i8, !cast<Instruction>(NAME # _BtoH)>;
+  def : SVE_1_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # "_f32i16"), 
nxv8i16, !cast<Instruction>(NAME # _HtoS)>;
+  def : SVE_1_Op_Pat<nxv2f64, !cast<SDPatternOperator>(op # "_f64i32"), 
nxv4i32, !cast<Instruction>(NAME # _StoD)>;
 }
diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll 
b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll
new file mode 100644
index 0000000000000..b842571e1ef8e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | 
FileCheck %s
+;
+; SVCVTB (SCVTFB / UCVTFB)
+;
+
+define <vscale x 8 x half> @scvtfb_f16_i8(<vscale x 16 x i8> %zn) {
+; CHECK-LABEL: scvtfb_f16_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf z0.h, z0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 
x i8> %zn)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @scvtfb_f32_i16(<vscale x 8 x i16> %zn) {
+; CHECK-LABEL: scvtfb_f32_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf z0.s, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 
x i16> %zn)
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @scvtfb_f64_i32(<vscale x 4 x i32> %zn) {
+; CHECK-LABEL: scvtfb_f64_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 
4 x i32> %zn)
+  ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x half> @ucvtfb_f16_i8(<vscale x 16 x i8> %zn) {
+; CHECK-LABEL: ucvtfb_f16_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf z0.h, z0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 
x i8> %zn)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @ucvtfb_f32_i16(<vscale x 8 x i16> %zn) {
+; CHECK-LABEL: ucvtfb_f32_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf z0.s, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 
x i16> %zn)
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @ucvtfb_f64_i32(<vscale x 4 x i32> %zn) {
+; CHECK-LABEL: ucvtfb_f64_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 
4 x i32> %zn)
+  ret <vscale x 2 x double> %res
+}
+
+;
+; SVCVTT (SCVTFLT / UCVTFLT)
+;
+
+define <vscale x 8 x half> @scvtflt_f16_i8(<vscale x 16 x i8> %zn) {
+; CHECK-LABEL: scvtflt_f16_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtflt z0.h, z0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 
x i8> %zn)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @scvtflt_f32_i16(<vscale x 8 x i16> %zn) {
+; CHECK-LABEL: scvtflt_f32_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtflt z0.s, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 
8 x i16> %zn)
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @scvtflt_f64_i32(<vscale x 4 x i32> %zn) {
+; CHECK-LABEL: scvtflt_f64_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtflt z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 
4 x i32> %zn)
+  ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x half> @ucvtflt_f16_i8(<vscale x 16 x i8> %zn) {
+; CHECK-LABEL: ucvtflt_f16_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtflt z0.h, z0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 
x i8> %zn)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @ucvtflt_f32_i16(<vscale x 8 x i16> %zn) {
+; CHECK-LABEL: ucvtflt_f32_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtflt z0.s, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 
8 x i16> %zn)
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @ucvtflt_f64_i32(<vscale x 4 x i32> %zn) {
+; CHECK-LABEL: ucvtflt_f64_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtflt z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 
4 x i32> %zn)
+  ret <vscale x 2 x double> %res
+}
+
diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll 
b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll
new file mode 100644
index 0000000000000..7e05793cabcc1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s 
--check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s 
--check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | 
FileCheck %s --check-prefix=CHECK-STREAMING
+;
+; FCVTZSN
+;
+
+define <vscale x 16 x i8> @fcvtzsn_i8_f16(<vscale x 8 x half> %zn1, <vscale x 
8 x half> %zn2) {
+; CHECK-LABEL: fcvtzsn_i8_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    fcvtzsn z0.b, { z0.h, z1.h }
+; CHECK-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: fcvtzsn_i8_f16:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    fcvtzsn z0.b, { z0.h, z1.h }
+; CHECK-STREAMING-NEXT:    ret
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.x2.i8f16(<vscale x 
8 x half> %zn1, <vscale x 8 x half> %zn2)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @fcvtzsn_i16_f32(<vscale x 4 x float> %zn1, <vscale 
x 4 x float> %zn2) {
+; CHECK-LABEL: fcvtzsn_i16_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    fcvtzsn z0.h, { z0.s, z1.s }
+; CHECK-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: fcvtzsn_i16_f32:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    fcvtzsn z0.h, { z0.s, z1.s }
+; CHECK-STREAMING-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.x2.i16f32(<vscale x 
4 x float> %zn1, <vscale x 4 x float> %zn2)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @fcvtzsn_i32_f64(<vscale x 2 x double> %zn1, <vscale 
x 2 x double> %zn2) {
+; CHECK-LABEL: fcvtzsn_i32_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    fcvtzsn z0.s, { z0.d, z1.d }
+; CHECK-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: fcvtzsn_i32_f64:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    fcvtzsn z0.s, { z0.d, z1.d }
+; CHECK-STREAMING-NEXT:    ret
+  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.x2.i32f64(<vscale x 
2 x double> %zn1, <vscale x 2 x double> %zn2)
+  ret <vscale x 4 x i32> %res
+}
+
+;
+; FCVTZUN
+;
+
+define <vscale x 16 x i8> @fcvtzun_i8_f16(<vscale x 8 x half> %zn1, <vscale x 
8 x half> %zn2) {
+; CHECK-LABEL: fcvtzun_i8_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    fcvtzun z0.b, { z0.h, z1.h }
+; CHECK-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: fcvtzun_i8_f16:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    fcvtzun z0.b, { z0.h, z1.h }
+; CHECK-STREAMING-NEXT:    ret
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.x2.i8f16(<vscale x 
8 x half> %zn1, <vscale x 8 x half> %zn2)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @fcvtzun_i16_f32(<vscale x 4 x float> %zn1, <vscale 
x 4 x float> %zn2) {
+; CHECK-LABEL: fcvtzun_i16_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    fcvtzun z0.h, { z0.s, z1.s }
+; CHECK-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: fcvtzun_i16_f32:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    fcvtzun z0.h, { z0.s, z1.s }
+; CHECK-STREAMING-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.x2.i16f32(<vscale x 
4 x float> %zn1, <vscale x 4 x float> %zn2)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @fcvtzun_i32_f64(<vscale x 2 x double> %zn1, <vscale 
x 2 x double> %zn2) {
+; CHECK-LABEL: fcvtzun_i32_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    fcvtzun z0.s, { z0.d, z1.d }
+; CHECK-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: fcvtzun_i32_f64:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    fcvtzun z0.s, { z0.d, z1.d }
+; CHECK-STREAMING-NEXT:    ret
+  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.x2.i32f64(<vscale x 
2 x double> %zn1, <vscale x 2 x double> %zn2)
+  ret <vscale x 4 x i32> %res
+}
+

>From ad13945ead2c7d26b468c62ed5502bb020a42ca7 Mon Sep 17 00:00:00 2001
From: Martin Wehking <[email protected]>
Date: Tue, 19 May 2026 13:12:05 +0000
Subject: [PATCH 2/2] Apply suggestions

Introduce a new flag for overload resolution and combine some front end
intrinsics
---
 clang/include/clang/Basic/TargetBuiltins.h    |   3 +
 clang/include/clang/Basic/arm_sve.td          |  21 +---
 clang/include/clang/Basic/arm_sve_sme_incl.td |   1 +
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp      |   3 +
 .../sve2p3-intrinsics/acle_sve2_int_fp_cvt.c  |  48 +++----
 ...e2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c | 117 ++++++++++++++++++
 llvm/include/llvm/IR/IntrinsicsAArch64.td     |  26 ++--
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |   8 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td    |   8 +-
 .../AArch64/sve2p3-intrinsics-fp-converts.ll  |  24 ++--
 10 files changed, 181 insertions(+), 78 deletions(-)

diff --git a/clang/include/clang/Basic/TargetBuiltins.h 
b/clang/include/clang/Basic/TargetBuiltins.h
index ae4bcdb9eeb64..9b4613c853206 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -401,6 +401,9 @@ namespace clang {
     bool isOverloadFirstandLast() const {
       return Flags & IsOverloadFirstandLast;
     }
+    bool isOverloadDefaultAndOp0() const {
+      return Flags & IsOverloadDefaultAndOp0;
+    }
     bool isPrefetch() const { return Flags & IsPrefetch; }
     bool isReverseCompare() const { return Flags & ReverseCompare; }
     bool isAppendSVALL() const { return Flags & IsAppendSVALL; }
diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 4e67d46ae8ce9..a5c84f163d8c9 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1006,21 +1006,10 @@ let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = 
"sve2p3|sme2p3" in {
 def SVCVTZN_S : SInst<"svcvtzn_{0}[_{1}_x2]", "y2.d", "hfd", MergeNone, 
"aarch64_sve_fcvtzsn_x2", [IsReductionQV, VerifyRuntimeMode]>;
 def SVCVTZN_U : SInst<"svcvtzn_{0}[_{1}_x2]", "e2.d", "hfd", MergeNone, 
"aarch64_sve_fcvtzun_x2", [IsReductionQV, VerifyRuntimeMode]>;
 
-def SVCVTT_F16_S8  : SInst<"svcvtt_f16[_s8]",  "Od", "c", MergeNone, 
"aarch64_sve_scvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTT_F32_S16  : SInst<"svcvtt_f32[_s16]",  "Md", "s", MergeNone, 
"aarch64_sve_scvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTT_F64_S32  : SInst<"svcvtt_f64[_s32]",  "Nd", "i", MergeNone, 
"aarch64_sve_scvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
-
-def SVCVTT_F16_U8  : SInst<"svcvtt_f16[_u8]",  "Od", "Uc", MergeNone, 
"aarch64_sve_ucvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTT_F32_U16  : SInst<"svcvtt_f32[_u16]",  "Md", "Us", MergeNone, 
"aarch64_sve_ucvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTT_F64_U32  : SInst<"svcvtt_f64[_u32]",  "Nd", "Ui", MergeNone, 
"aarch64_sve_ucvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
-
-def SVCVTB_F16_S8  : SInst<"svcvtb_f16[_s8]",  "Od", "c", MergeNone, 
"aarch64_sve_scvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTB_F32_S16  : SInst<"svcvtb_f32[_s16]",  "Md", "s", MergeNone, 
"aarch64_sve_scvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTB_F64_S32  : SInst<"svcvtb_f64[_s32]",  "Nd", "i", MergeNone, 
"aarch64_sve_scvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
-
-def SVCVTB_F16_U8  : SInst<"svcvtb_f16[_u8]",  "Od", "Uc", MergeNone, 
"aarch64_sve_ucvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTB_F32_U16  : SInst<"svcvtb_f32[_u16]",  "Md", "Us", MergeNone, 
"aarch64_sve_ucvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTB_F64_U32  : SInst<"svcvtb_f64[_u32]",  "Nd", "Ui", MergeNone, 
"aarch64_sve_ucvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
+foreach suffix = ["b", "t"] in {
+def SVCVT # !toupper(suffix) # _S: SInst<"svcvt" # suffix # "_{d}[_{1}]", 
"dy", "hfd", MergeNone, "aarch64_sve_scvtf" # suffix, [IsOverloadDefaultAndOp0, 
VerifyRuntimeMode]>;
+def SVCVT # !toupper(suffix) # _U: SInst<"svcvt" # suffix # "_{d}[_{1}]", 
"de", "hfd", MergeNone, "aarch64_sve_ucvtf" # suffix, [IsOverloadDefaultAndOp0, 
VerifyRuntimeMode]>;
+}
 }
 
 
////////////////////////////////////////////////////////////////////////////////
@@ -2557,4 +2546,4 @@ let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = 
"sve2p3|sme2p3" in {
 
   def SVDOT_LANE_X2_SH : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "s",  
MergeNone, "aarch64_sve_sdot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, 
ImmCheck0_7>]>;
   def SVDOT_LANE_X2_UH : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "Us", 
MergeNone, "aarch64_sve_udot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, 
ImmCheck0_7>]>;
-}
\ No newline at end of file
+}
diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td 
b/clang/include/clang/Basic/arm_sve_sme_incl.td
index acc5159608b06..b1fbafa6e81e7 100644
--- a/clang/include/clang/Basic/arm_sve_sme_incl.td
+++ b/clang/include/clang/Basic/arm_sve_sme_incl.td
@@ -216,6 +216,7 @@ def IsOverloadNone                  : FlagType<0x00100000>; 
// Intrinsic does no
 def IsOverloadWhileOrMultiVecCvt    : FlagType<0x00200000>; // Use {default 
type, typeof(operand1)} as overloaded types.
 def IsOverloadWhileRW               : FlagType<0x00400000>; // Use 
{pred(default type), typeof(operand0)} as overloaded types.
 def IsOverloadFirstandLast          : FlagType<0x00800000>; // Use 
{typeof(operand0), typeof(last operand)} as overloaded types.
+def IsOverloadDefaultAndOp0         : FlagType<0x4000000000000>; // Use 
{default type, typeof(operand0)} as overloaded types.
 def OverloadKindMask                : FlagType<0x00E00000>; // When the masked 
values are all '0', the default type is used as overload type.
 def IsByteIndexed                   : FlagType<0x01000000>;
 def IsAppendSVALL                   : FlagType<0x02000000>; // Appends SV_ALL 
as the last operand.
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 8d5731c473bb1..6aa92a2361b56 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -3939,6 +3939,9 @@ CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags 
&TypeFlags,
   if (TypeFlags.isOverloadWhileRW())
     return {getSVEPredType(TypeFlags), Ops[0]->getType()};
 
+  if (TypeFlags.isOverloadDefaultAndOp0())
+    return {DefaultType, Ops[0]->getType()};
+
   if (TypeFlags.isOverloadFirstandLast())
     return {Ops[0]->getType(), Ops.back()->getType()};
 
diff --git 
a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c 
b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c
index 26e077d05c28b..c12a0fff786ef 100644
--- a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c
@@ -30,12 +30,12 @@
 
 // CHECK-LABEL: @test_svcvtb_f16_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.scvtfb.nxv8f16.nxv16i8(<vscale x 16 x i8> [[ZN:%.*]])
 // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svcvtb_f16_s8u10__SVInt8_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.scvtfb.nxv8f16.nxv16i8(<vscale x 16 x i8> [[ZN:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 svfloat16_t test_svcvtb_f16_s8(svint8_t zn) MODE_ATTR {
@@ -44,12 +44,12 @@ svfloat16_t test_svcvtb_f16_s8(svint8_t zn) MODE_ATTR {
 
 // CHECK-LABEL: @test_svcvtb_f32_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.scvtfb.nxv4f32.nxv8i16(<vscale x 8 x i16> [[ZN:%.*]])
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svcvtb_f32_s16u11__SVInt16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.scvtfb.nxv4f32.nxv8i16(<vscale x 8 x i16> [[ZN:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 svfloat32_t test_svcvtb_f32_s16(svint16_t zn) MODE_ATTR {
@@ -58,12 +58,12 @@ svfloat32_t test_svcvtb_f32_s16(svint16_t zn) MODE_ATTR {
 
 // CHECK-LABEL: @test_svcvtb_f64_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.scvtfb.nxv2f64.nxv4i32(<vscale x 4 x i32> [[ZN:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svcvtb_f64_s32u11__SVInt32_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.scvtfb.nxv2f64.nxv4i32(<vscale x 4 x i32> [[ZN:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 svfloat64_t test_svcvtb_f64_s32(svint32_t zn) MODE_ATTR {
@@ -72,12 +72,12 @@ svfloat64_t test_svcvtb_f64_s32(svint32_t zn) MODE_ATTR {
 
 // CHECK-LABEL: @test_svcvtb_f16_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.ucvtfb.nxv8f16.nxv16i8(<vscale x 16 x i8> [[ZN:%.*]])
 // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svcvtb_f16_u8u11__SVUint8_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.ucvtfb.nxv8f16.nxv16i8(<vscale x 16 x i8> [[ZN:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 svfloat16_t test_svcvtb_f16_u8(svuint8_t zn) MODE_ATTR {
@@ -86,12 +86,12 @@ svfloat16_t test_svcvtb_f16_u8(svuint8_t zn) MODE_ATTR {
 
 // CHECK-LABEL: @test_svcvtb_f32_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.ucvtfb.nxv4f32.nxv8i16(<vscale x 8 x i16> [[ZN:%.*]])
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svcvtb_f32_u16u12__SVUint16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.ucvtfb.nxv4f32.nxv8i16(<vscale x 8 x i16> [[ZN:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 svfloat32_t test_svcvtb_f32_u16(svuint16_t zn) MODE_ATTR {
@@ -100,12 +100,12 @@ svfloat32_t test_svcvtb_f32_u16(svuint16_t zn) MODE_ATTR {
 
 // CHECK-LABEL: @test_svcvtb_f64_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.ucvtfb.nxv2f64.nxv4i32(<vscale x 4 x i32> [[ZN:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svcvtb_f64_u32u12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.ucvtfb.nxv2f64.nxv4i32(<vscale x 4 x i32> [[ZN:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 svfloat64_t test_svcvtb_f64_u32(svuint32_t zn) MODE_ATTR {
@@ -114,12 +114,12 @@ svfloat64_t test_svcvtb_f64_u32(svuint32_t zn) MODE_ATTR {
 
 // CHECK-LABEL: @test_svcvt_f16_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.scvtft.nxv8f16.nxv16i8(<vscale x 16 x i8> [[ZN:%.*]])
 // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z17test_svcvt_f16_s8u10__SVInt8_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.scvtft.nxv8f16.nxv16i8(<vscale x 16 x i8> [[ZN:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 svfloat16_t test_svcvt_f16_s8(svint8_t zn) MODE_ATTR {
@@ -128,12 +128,12 @@ svfloat16_t test_svcvt_f16_s8(svint8_t zn) MODE_ATTR {
 
 // CHECK-LABEL: @test_svcvt_f32_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.scvtft.nxv4f32.nxv8i16(<vscale x 8 x i16> [[ZN:%.*]])
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svcvt_f32_s16u11__SVInt16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.scvtft.nxv4f32.nxv8i16(<vscale x 8 x i16> [[ZN:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 svfloat32_t test_svcvt_f32_s16(svint16_t zn) MODE_ATTR {
@@ -142,12 +142,12 @@ svfloat32_t test_svcvt_f32_s16(svint16_t zn) MODE_ATTR {
 
 // CHECK-LABEL: @test_svcvt_f64_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.scvtft.nxv2f64.nxv4i32(<vscale x 4 x i32> [[ZN:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svcvt_f64_s32u11__SVInt32_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.scvtft.nxv2f64.nxv4i32(<vscale x 4 x i32> [[ZN:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 svfloat64_t test_svcvt_f64_s32(svint32_t zn) MODE_ATTR {
@@ -156,12 +156,12 @@ svfloat64_t test_svcvt_f64_s32(svint32_t zn) MODE_ATTR {
 
 // CHECK-LABEL: @test_svcvt_f16_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.ucvtft.nxv8f16.nxv16i8(<vscale x 16 x i8> [[ZN:%.*]])
 // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z17test_svcvt_f16_u8u11__SVUint8_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.ucvtft.nxv8f16.nxv16i8(<vscale x 16 x i8> [[ZN:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 svfloat16_t test_svcvt_f16_u8(svuint8_t zn) MODE_ATTR {
@@ -170,12 +170,12 @@ svfloat16_t test_svcvt_f16_u8(svuint8_t zn) MODE_ATTR {
 
 // CHECK-LABEL: @test_svcvt_f32_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.ucvtft.nxv4f32.nxv8i16(<vscale x 8 x i16> [[ZN:%.*]])
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svcvt_f32_u16u12__SVUint16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> 
@llvm.aarch64.sve.ucvtft.nxv4f32.nxv8i16(<vscale x 8 x i16> [[ZN:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 svfloat32_t test_svcvt_f32_u16(svuint16_t zn) MODE_ATTR {
@@ -184,12 +184,12 @@ svfloat32_t test_svcvt_f32_u16(svuint16_t zn) MODE_ATTR {
 
 // CHECK-LABEL: @test_svcvt_f64_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.ucvtft.nxv2f64.nxv4i32(<vscale x 4 x i32> [[ZN:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svcvt_f64_u32u12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> 
@llvm.aarch64.sve.ucvtft.nxv2f64.nxv4i32(<vscale x 4 x i32> [[ZN:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 svfloat64_t test_svcvt_f64_u32(svuint32_t zn) MODE_ATTR {
diff --git 
a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
 
b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
index 34ead79e726ab..8f0a28b260d5d 100644
--- 
a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
+++ 
b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
@@ -14,6 +14,9 @@ void test(void) {
   int16_t int16_t_val;
   int32_t int32_t_val;
   svbool_t svbool_t_val;
+  svfloat16x2_t svfloat16x2_t_val;
+  svfloat32x2_t svfloat32x2_t_val;
+  svfloat64x2_t svfloat64x2_t_val;
   svint8_t svint8_t_val;
   svint16_t svint16_t_val;
   svint16x2_t svint16x2_t_val;
@@ -86,6 +89,42 @@ void test(void) {
   svaddsubp_u16(svuint16_t_val, svuint16_t_val);
   svaddsubp_u32(svuint32_t_val, svuint32_t_val);
   svaddsubp_u64(svuint64_t_val, svuint64_t_val);
+  svcvtb_f16(svint8_t_val);
+  svcvtb_f16(svuint8_t_val);
+  svcvtb_f16_s8(svint8_t_val);
+  svcvtb_f16_u8(svuint8_t_val);
+  svcvtb_f32(svint16_t_val);
+  svcvtb_f32(svuint16_t_val);
+  svcvtb_f32_s16(svint16_t_val);
+  svcvtb_f32_u16(svuint16_t_val);
+  svcvtb_f64(svint32_t_val);
+  svcvtb_f64(svuint32_t_val);
+  svcvtb_f64_s32(svint32_t_val);
+  svcvtb_f64_u32(svuint32_t_val);
+  svcvtt_f16(svint8_t_val);
+  svcvtt_f16(svuint8_t_val);
+  svcvtt_f16_s8(svint8_t_val);
+  svcvtt_f16_u8(svuint8_t_val);
+  svcvtt_f32(svint16_t_val);
+  svcvtt_f32(svuint16_t_val);
+  svcvtt_f32_s16(svint16_t_val);
+  svcvtt_f32_u16(svuint16_t_val);
+  svcvtt_f64(svint32_t_val);
+  svcvtt_f64(svuint32_t_val);
+  svcvtt_f64_s32(svint32_t_val);
+  svcvtt_f64_u32(svuint32_t_val);
+  svcvtzn_s8(svfloat16x2_t_val);
+  svcvtzn_s8_f16_x2(svfloat16x2_t_val);
+  svcvtzn_s16(svfloat32x2_t_val);
+  svcvtzn_s16_f32_x2(svfloat32x2_t_val);
+  svcvtzn_s32(svfloat64x2_t_val);
+  svcvtzn_s32_f64_x2(svfloat64x2_t_val);
+  svcvtzn_u8(svfloat16x2_t_val);
+  svcvtzn_u8_f16_x2(svfloat16x2_t_val);
+  svcvtzn_u16(svfloat32x2_t_val);
+  svcvtzn_u16_f32_x2(svfloat32x2_t_val);
+  svcvtzn_u32(svfloat64x2_t_val);
+  svcvtzn_u32_f64_x2(svfloat64x2_t_val);
   svdot(svint16_t_val, svint8_t_val, int8_t_val);
   svdot(svint16_t_val, svint8_t_val, svint8_t_val);
   svdot(svuint16_t_val, svuint8_t_val, svuint8_t_val);
@@ -171,6 +210,9 @@ void test_streaming(void) __arm_streaming{
   int16_t int16_t_val;
   int32_t int32_t_val;
   svbool_t svbool_t_val;
+  svfloat16x2_t svfloat16x2_t_val;
+  svfloat32x2_t svfloat32x2_t_val;
+  svfloat64x2_t svfloat64x2_t_val;
   svint8_t svint8_t_val;
   svint16_t svint16_t_val;
   svint16x2_t svint16x2_t_val;
@@ -243,6 +285,42 @@ void test_streaming(void) __arm_streaming{
   svaddsubp_u16(svuint16_t_val, svuint16_t_val);
   svaddsubp_u32(svuint32_t_val, svuint32_t_val);
   svaddsubp_u64(svuint64_t_val, svuint64_t_val);
+  svcvtb_f16(svint8_t_val);
+  svcvtb_f16(svuint8_t_val);
+  svcvtb_f16_s8(svint8_t_val);
+  svcvtb_f16_u8(svuint8_t_val);
+  svcvtb_f32(svint16_t_val);
+  svcvtb_f32(svuint16_t_val);
+  svcvtb_f32_s16(svint16_t_val);
+  svcvtb_f32_u16(svuint16_t_val);
+  svcvtb_f64(svint32_t_val);
+  svcvtb_f64(svuint32_t_val);
+  svcvtb_f64_s32(svint32_t_val);
+  svcvtb_f64_u32(svuint32_t_val);
+  svcvtt_f16(svint8_t_val);
+  svcvtt_f16(svuint8_t_val);
+  svcvtt_f16_s8(svint8_t_val);
+  svcvtt_f16_u8(svuint8_t_val);
+  svcvtt_f32(svint16_t_val);
+  svcvtt_f32(svuint16_t_val);
+  svcvtt_f32_s16(svint16_t_val);
+  svcvtt_f32_u16(svuint16_t_val);
+  svcvtt_f64(svint32_t_val);
+  svcvtt_f64(svuint32_t_val);
+  svcvtt_f64_s32(svint32_t_val);
+  svcvtt_f64_u32(svuint32_t_val);
+  svcvtzn_s8(svfloat16x2_t_val);
+  svcvtzn_s8_f16_x2(svfloat16x2_t_val);
+  svcvtzn_s16(svfloat32x2_t_val);
+  svcvtzn_s16_f32_x2(svfloat32x2_t_val);
+  svcvtzn_s32(svfloat64x2_t_val);
+  svcvtzn_s32_f64_x2(svfloat64x2_t_val);
+  svcvtzn_u8(svfloat16x2_t_val);
+  svcvtzn_u8_f16_x2(svfloat16x2_t_val);
+  svcvtzn_u16(svfloat32x2_t_val);
+  svcvtzn_u16_f32_x2(svfloat32x2_t_val);
+  svcvtzn_u32(svfloat64x2_t_val);
+  svcvtzn_u32_f64_x2(svfloat64x2_t_val);
   svdot(svint16_t_val, svint8_t_val, int8_t_val);
   svdot(svint16_t_val, svint8_t_val, svint8_t_val);
   svdot(svuint16_t_val, svuint8_t_val, svuint8_t_val);
@@ -328,6 +406,9 @@ void test_streaming_compatible(void) 
__arm_streaming_compatible{
   int16_t int16_t_val;
   int32_t int32_t_val;
   svbool_t svbool_t_val;
+  svfloat16x2_t svfloat16x2_t_val;
+  svfloat32x2_t svfloat32x2_t_val;
+  svfloat64x2_t svfloat64x2_t_val;
   svint8_t svint8_t_val;
   svint16_t svint16_t_val;
   svint16x2_t svint16x2_t_val;
@@ -400,6 +481,42 @@ void test_streaming_compatible(void) 
__arm_streaming_compatible{
   svaddsubp_u16(svuint16_t_val, svuint16_t_val);
   svaddsubp_u32(svuint32_t_val, svuint32_t_val);
   svaddsubp_u64(svuint64_t_val, svuint64_t_val);
+  svcvtb_f16(svint8_t_val);
+  svcvtb_f16(svuint8_t_val);
+  svcvtb_f16_s8(svint8_t_val);
+  svcvtb_f16_u8(svuint8_t_val);
+  svcvtb_f32(svint16_t_val);
+  svcvtb_f32(svuint16_t_val);
+  svcvtb_f32_s16(svint16_t_val);
+  svcvtb_f32_u16(svuint16_t_val);
+  svcvtb_f64(svint32_t_val);
+  svcvtb_f64(svuint32_t_val);
+  svcvtb_f64_s32(svint32_t_val);
+  svcvtb_f64_u32(svuint32_t_val);
+  svcvtt_f16(svint8_t_val);
+  svcvtt_f16(svuint8_t_val);
+  svcvtt_f16_s8(svint8_t_val);
+  svcvtt_f16_u8(svuint8_t_val);
+  svcvtt_f32(svint16_t_val);
+  svcvtt_f32(svuint16_t_val);
+  svcvtt_f32_s16(svint16_t_val);
+  svcvtt_f32_u16(svuint16_t_val);
+  svcvtt_f64(svint32_t_val);
+  svcvtt_f64(svuint32_t_val);
+  svcvtt_f64_s32(svint32_t_val);
+  svcvtt_f64_u32(svuint32_t_val);
+  svcvtzn_s8(svfloat16x2_t_val);
+  svcvtzn_s8_f16_x2(svfloat16x2_t_val);
+  svcvtzn_s16(svfloat32x2_t_val);
+  svcvtzn_s16_f32_x2(svfloat32x2_t_val);
+  svcvtzn_s32(svfloat64x2_t_val);
+  svcvtzn_s32_f64_x2(svfloat64x2_t_val);
+  svcvtzn_u8(svfloat16x2_t_val);
+  svcvtzn_u8_f16_x2(svfloat16x2_t_val);
+  svcvtzn_u16(svfloat32x2_t_val);
+  svcvtzn_u16_f32_x2(svfloat32x2_t_val);
+  svcvtzn_u32(svfloat64x2_t_val);
+  svcvtzn_u32_f64_x2(svfloat64x2_t_val);
   svdot(svint16_t_val, svint8_t_val, int8_t_val);
   svdot(svint16_t_val, svint8_t_val, svint8_t_val);
   svdot(svuint16_t_val, svuint8_t_val, svuint8_t_val);
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td 
b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 0c9be7f5fa9bc..ec084626d5b2a 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2618,24 +2618,14 @@ def int_aarch64_sve_fmlslt_lane   : 
SVE2_3VectorArgIndexed_Long_Intrinsic;
 // SVE2 - Multi-vector narrowing convert to floating point
 //
 
-class Builtin_SVCVT_UNPRED<LLVMType OUT, LLVMType IN>
-    : DefaultAttrsIntrinsic<[OUT], [IN], [IntrNoMem]>;
-
-def int_aarch64_sve_scvtfb_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, 
llvm_nxv16i8_ty>;
-def int_aarch64_sve_scvtfb_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, 
llvm_nxv8i16_ty>;
-def int_aarch64_sve_scvtfb_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, 
llvm_nxv4i32_ty>;
-
-def int_aarch64_sve_scvtflt_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, 
llvm_nxv16i8_ty>;
-def int_aarch64_sve_scvtflt_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, 
llvm_nxv8i16_ty>;
-def int_aarch64_sve_scvtflt_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, 
llvm_nxv4i32_ty>;
-
-def int_aarch64_sve_ucvtfb_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, 
llvm_nxv16i8_ty>;
-def int_aarch64_sve_ucvtfb_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, 
llvm_nxv8i16_ty>;
-def int_aarch64_sve_ucvtfb_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, 
llvm_nxv4i32_ty>;
-
-def int_aarch64_sve_ucvtflt_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, 
llvm_nxv16i8_ty>;
-def int_aarch64_sve_ucvtflt_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, 
llvm_nxv8i16_ty>;
-def int_aarch64_sve_ucvtflt_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, 
llvm_nxv4i32_ty>;
+def int_aarch64_sve_scvtfb
+    : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
+def int_aarch64_sve_scvtft
+    : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
+def int_aarch64_sve_ucvtfb
+    : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
+def int_aarch64_sve_ucvtft
+    : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
 
 //
 // SVE2 - Floating-point integer binary logarithm
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c95e0fbb301e3..6a03b9abb3c19 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4862,10 +4862,10 @@ let Predicates = [HasSVE2p3_or_SME2p3] in {
   defm FCVTZUN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzun", 0b1, 
int_aarch64_sve_fcvtzun_x2>;
 
   // SVE2 signed/unsigned integer convert to floating-point
-  defm SCVTF_ZZ   : sve2_int_to_fp_upcvt<"scvtf",   0b00, 
"int_aarch64_sve_scvtfb">;
-  defm SCVTFLT_ZZ : sve2_int_to_fp_upcvt<"scvtflt", 0b10, 
"int_aarch64_sve_scvtflt">;
-  defm UCVTF_ZZ   : sve2_int_to_fp_upcvt<"ucvtf",   0b01, 
"int_aarch64_sve_ucvtfb">;
-  defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11, 
"int_aarch64_sve_ucvtflt">;
+  defm SCVTF_ZZ   : sve2_int_to_fp_upcvt<"scvtf",   0b00, 
int_aarch64_sve_scvtfb>;
+  defm SCVTFLT_ZZ : sve2_int_to_fp_upcvt<"scvtflt", 0b10, 
int_aarch64_sve_scvtft>;
+  defm UCVTF_ZZ   : sve2_int_to_fp_upcvt<"ucvtf",   0b01, 
int_aarch64_sve_ucvtfb>;
+  defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11, 
int_aarch64_sve_ucvtft>;
 
   // SVE2 saturating shift right narrow by immediate and interleave
   defm SQRSHRN_Z2ZI_HtoB  : sve_multi_vec_round_shift_narrow<"sqrshrn",  
0b101, int_aarch64_sve_sqrshrn_x2>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td 
b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 18b9008ee314c..947ef7f9c9f6d 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -11512,12 +11512,12 @@ class sve2_int_to_fp_upcvt<string asm, ZPRRegOp ZdRC, 
ZPRRegOp ZnRC,
   let Inst{4-0}   = Zd;
 }
 
-multiclass sve2_int_to_fp_upcvt<string asm, bits<2> U, string op> {
+multiclass sve2_int_to_fp_upcvt<string asm, bits<2> U, SDPatternOperator op> {
   def _BtoH : sve2_int_to_fp_upcvt<asm, ZPR16, ZPR8,  0b01, U>;
   def _HtoS : sve2_int_to_fp_upcvt<asm, ZPR32, ZPR16, 0b10, U>;
   def _StoD : sve2_int_to_fp_upcvt<asm, ZPR64, ZPR32, 0b11, U>;
 
-  def : SVE_1_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # "_f16i8"), 
nxv16i8, !cast<Instruction>(NAME # _BtoH)>;
-  def : SVE_1_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # "_f32i16"), 
nxv8i16, !cast<Instruction>(NAME # _HtoS)>;
-  def : SVE_1_Op_Pat<nxv2f64, !cast<SDPatternOperator>(op # "_f64i32"), 
nxv4i32, !cast<Instruction>(NAME # _StoD)>;
+  def : SVE_1_Op_Pat<nxv8f16, op, nxv16i8, !cast<Instruction>(NAME # _BtoH)>;
+  def : SVE_1_Op_Pat<nxv4f32, op, nxv8i16, !cast<Instruction>(NAME # _HtoS)>;
+  def : SVE_1_Op_Pat<nxv2f64, op, nxv4i32, !cast<Instruction>(NAME # _StoD)>;
 }
diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll 
b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll
index b842571e1ef8e..1a07e4371810d 100644
--- a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll
@@ -11,7 +11,7 @@ define <vscale x 8 x half> @scvtfb_f16_i8(<vscale x 16 x i8> 
%zn) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    scvtf z0.h, z0.b
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 
x i8> %zn)
+  %res = call <vscale x 8 x half> 
@llvm.aarch64.sve.scvtfb.nxv8f16.nxv16i8(<vscale x 16 x i8> %zn)
   ret <vscale x 8 x half> %res
 }
 
@@ -20,7 +20,7 @@ define <vscale x 4 x float> @scvtfb_f32_i16(<vscale x 8 x 
i16> %zn) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    scvtf z0.s, z0.h
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 
x i16> %zn)
+  %res = call <vscale x 4 x float> 
@llvm.aarch64.sve.scvtfb.nxv4f32.nxv8i16(<vscale x 8 x i16> %zn)
   ret <vscale x 4 x float> %res
 }
 
@@ -29,7 +29,7 @@ define <vscale x 2 x double> @scvtfb_f64_i32(<vscale x 4 x 
i32> %zn) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    scvtf z0.d, z0.s
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 
4 x i32> %zn)
+  %res = call <vscale x 2 x double> 
@llvm.aarch64.sve.scvtfb.nxv2f64.nxv4i32(<vscale x 4 x i32> %zn)
   ret <vscale x 2 x double> %res
 }
 
@@ -38,7 +38,7 @@ define <vscale x 8 x half> @ucvtfb_f16_i8(<vscale x 16 x i8> 
%zn) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ucvtf z0.h, z0.b
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 
x i8> %zn)
+  %res = call <vscale x 8 x half> 
@llvm.aarch64.sve.ucvtfb.nxv8f16.nxv16i8(<vscale x 16 x i8> %zn)
   ret <vscale x 8 x half> %res
 }
 
@@ -47,7 +47,7 @@ define <vscale x 4 x float> @ucvtfb_f32_i16(<vscale x 8 x 
i16> %zn) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ucvtf z0.s, z0.h
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 
x i16> %zn)
+  %res = call <vscale x 4 x float> 
@llvm.aarch64.sve.ucvtfb.nxv4f32.nxv8i16(<vscale x 8 x i16> %zn)
   ret <vscale x 4 x float> %res
 }
 
@@ -56,7 +56,7 @@ define <vscale x 2 x double> @ucvtfb_f64_i32(<vscale x 4 x 
i32> %zn) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ucvtf z0.d, z0.s
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 
4 x i32> %zn)
+  %res = call <vscale x 2 x double> 
@llvm.aarch64.sve.ucvtfb.nxv2f64.nxv4i32(<vscale x 4 x i32> %zn)
   ret <vscale x 2 x double> %res
 }
 
@@ -69,7 +69,7 @@ define <vscale x 8 x half> @scvtflt_f16_i8(<vscale x 16 x i8> 
%zn) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    scvtflt z0.h, z0.b
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 
x i8> %zn)
+  %res = call <vscale x 8 x half> 
@llvm.aarch64.sve.scvtft.nxv8f16.nxv16i8(<vscale x 16 x i8> %zn)
   ret <vscale x 8 x half> %res
 }
 
@@ -78,7 +78,7 @@ define <vscale x 4 x float> @scvtflt_f32_i16(<vscale x 8 x 
i16> %zn) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    scvtflt z0.s, z0.h
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 
8 x i16> %zn)
+  %res = call <vscale x 4 x float> 
@llvm.aarch64.sve.scvtft.nxv4f32.nxv8i16(<vscale x 8 x i16> %zn)
   ret <vscale x 4 x float> %res
 }
 
@@ -87,7 +87,7 @@ define <vscale x 2 x double> @scvtflt_f64_i32(<vscale x 4 x 
i32> %zn) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    scvtflt z0.d, z0.s
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 
4 x i32> %zn)
+  %res = call <vscale x 2 x double> 
@llvm.aarch64.sve.scvtft.nxv2f64.nxv4i32(<vscale x 4 x i32> %zn)
   ret <vscale x 2 x double> %res
 }
 
@@ -96,7 +96,7 @@ define <vscale x 8 x half> @ucvtflt_f16_i8(<vscale x 16 x i8> 
%zn) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ucvtflt z0.h, z0.b
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 
x i8> %zn)
+  %res = call <vscale x 8 x half> 
@llvm.aarch64.sve.ucvtft.nxv8f16.nxv16i8(<vscale x 16 x i8> %zn)
   ret <vscale x 8 x half> %res
 }
 
@@ -105,7 +105,7 @@ define <vscale x 4 x float> @ucvtflt_f32_i16(<vscale x 8 x 
i16> %zn) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ucvtflt z0.s, z0.h
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 
8 x i16> %zn)
+  %res = call <vscale x 4 x float> 
@llvm.aarch64.sve.ucvtft.nxv4f32.nxv8i16(<vscale x 8 x i16> %zn)
   ret <vscale x 4 x float> %res
 }
 
@@ -114,7 +114,7 @@ define <vscale x 2 x double> @ucvtflt_f64_i32(<vscale x 4 x 
i32> %zn) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ucvtflt z0.d, z0.s
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 
4 x i32> %zn)
+  %res = call <vscale x 2 x double> 
@llvm.aarch64.sve.ucvtft.nxv2f64.nxv4i32(<vscale x 4 x i32> %zn)
   ret <vscale x 2 x double> %res
 }
 

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AArch64] Add 9.7 CVT data processing intrinsics (PR #186807)

Reply via email to