[clang] af27c81 - [AArch64] Remove aarch64_neon_vcvtfp2hf and aarch64_neon_vcvthf2fp (#203903)

via cfe-commits Tue, 16 Jun 2026 05:53:46 -0700

Author: David Green
Date: 2026-06-16T13:53:31+01:00
New Revision: af27c81cc4bbc8094b0ed82633bc8520898bbb51


URL: 
https://github.com/llvm/llvm-project/commit/af27c81cc4bbc8094b0ed82633bc8520898bbb51
DIFF: 
https://github.com/llvm/llvm-project/commit/af27c81cc4bbc8094b0ed82633bc8520898bbb51.diff

LOG: [AArch64] Remove aarch64_neon_vcvtfp2hf and aarch64_neon_vcvthf2fp 
(#203903)

This removes aarch64_neon_vcvtfp2hf and aarch64_neon_vcvthf2fp
intrinsics, relying on fp16 fpext and fptrunc instructions directly. Arm
is left using its version of the intrinsics, as the types in the backend
are more difficult without fullfp16.

Added: 
    

Modified: 
    clang/include/clang/Basic/AArch64CodeGenUtils.h
    clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
    clang/lib/CodeGen/TargetBuiltins/ARM.cpp
    clang/test/CodeGen/AArch64/neon-misc-constrained.c
    clang/test/CodeGen/AArch64/neon-misc.c
    clang/test/CodeGen/arm_neon_intrinsics.c
    llvm/include/llvm/IR/IntrinsicsAArch64.td
    llvm/lib/IR/AutoUpgrade.cpp
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
    llvm/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll
    llvm/test/CodeGen/AArch64/implicitly-set-zero-high-64-bits.ll
    llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vcvt_f32_su32.ll

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/AArch64CodeGenUtils.h 
b/clang/include/clang/Basic/AArch64CodeGenUtils.h
index f64a41df63cf8..3b9145920bd5f 100644
--- a/clang/include/clang/Basic/AArch64CodeGenUtils.h
+++ b/clang/include/clang/Basic/AArch64CodeGenUtils.h
@@ -158,10 +158,8 @@ const inline ARMVectorIntrinsicInfo 
AArch64SIMDIntrinsicMap [] = {
   NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
-  NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
   NEONMAP0(vcvt_f16_s16),
   NEONMAP0(vcvt_f16_u16),
-  NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
   NEONMAP0(vcvt_f32_v),
   NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
   NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),

diff  --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 84b9bb1007763..82f2e3954bb47 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -2692,6 +2692,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
   case NEON::BI__builtin_neon_vcvt_bf16_f32:
   case NEON::BI__builtin_neon_vcvtq_low_bf16_f32:
   case NEON::BI__builtin_neon_vcvtq_high_bf16_f32:
+  case NEON::BI__builtin_neon_vcvt_f16_f32:
+  case NEON::BI__builtin_neon_vcvt_f32_f16:
   case clang::AArch64::BI_InterlockedAdd:
   case clang::AArch64::BI_InterlockedAdd_acq:
   case clang::AArch64::BI_InterlockedAdd_rel:

diff  --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 4c668dabd53dc..6d6f87a9439df 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -595,10 +595,8 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] 
= {
   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
-  NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
   NEONMAP0(vcvt_f16_s16),
   NEONMAP0(vcvt_f16_u16),
-  NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
   NEONMAP0(vcvt_f32_v),
   NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
   NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
@@ -2662,10 +2660,15 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned 
BuiltinID,
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
                         "vsha1h");
 
-  case NEON::BI__builtin_neon_vcvth_bf16_f32: {
+  case NEON::BI__builtin_neon_vcvth_bf16_f32:
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
                         "vcvtbfp2bf");
-  }
+  case NEON::BI__builtin_neon_vcvt_f16_f32:
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf), Ops,
+                        "vcvtfp2hf");
+  case NEON::BI__builtin_neon_vcvt_f32_f16:
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp), Ops,
+                        "vcvthf2fp");
 
   // The ARM _MoveToCoprocessor builtins put the input register value as
   // the first argument, but the LLVM intrinsic expects it as the third one.
@@ -6050,6 +6053,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
         Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
     return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
   }
+  case NEON::BI__builtin_neon_vcvt_f16_f32: {
+    llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
+    llvm::Type *V4F16 = FixedVectorType::get(Builder.getHalfTy(), 4);
+    return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4F16);
+  }
+  case NEON::BI__builtin_neon_vcvt_f32_f16: {
+    llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
+    llvm::Type *V4F16 = FixedVectorType::get(Builder.getHalfTy(), 4);
+    return Builder.CreateFPExt(Builder.CreateBitCast(Ops[0], V4F16), V4F32);
+  }
 
   case clang::AArch64::BI_InterlockedAdd:
   case clang::AArch64::BI_InterlockedAdd_acq:

diff  --git a/clang/test/CodeGen/AArch64/neon-misc-constrained.c 
b/clang/test/CodeGen/AArch64/neon-misc-constrained.c
index 06ecfd91252a1..49208892e3035 100644
--- a/clang/test/CodeGen/AArch64/neon-misc-constrained.c
+++ b/clang/test/CodeGen/AArch64/neon-misc-constrained.c
@@ -103,3 +103,95 @@ float32x4_t test_vsqrtq_f32(float32x4_t a) {
 float64x2_t test_vsqrtq_f64(float64x2_t a) {
   return vsqrtq_f64(a);
 }
+
+// UNCONSTRAINED-LABEL: define dso_local <4 x half> @test_vcvt_f16_f32(
+// UNCONSTRAINED-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT:  [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// UNCONSTRAINED-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x 
i8>
+// UNCONSTRAINED-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x 
float>
+// UNCONSTRAINED-NEXT:    [[TMP3:%.*]] = fptrunc <4 x float> [[TMP2]] to <4 x 
half>
+// UNCONSTRAINED-NEXT:    ret <4 x half> [[TMP3]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x half> @test_vcvt_f16_f32(
+// CONSTRAINED-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT:  [[ENTRY:.*:]]
+// CONSTRAINED-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CONSTRAINED-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CONSTRAINED-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x 
float>
+// CONSTRAINED-NEXT:    [[TMP3:%.*]] = call <4 x half> 
@llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> [[TMP2]], 
metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT:    ret <4 x half> [[TMP3]]
+//
+float16x4_t test_vcvt_f16_f32(float32x4_t a) {
+  return vcvt_f16_f32(a);
+}
+
+// UNCONSTRAINED-LABEL: define dso_local <8 x half> @test_vcvt_high_f16_f32(
+// UNCONSTRAINED-SAME: <4 x half> noundef [[A:%.*]], <4 x float> noundef 
[[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT:  [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// UNCONSTRAINED-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x 
i8>
+// UNCONSTRAINED-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x 
float>
+// UNCONSTRAINED-NEXT:    [[TMP3:%.*]] = fptrunc <4 x float> [[TMP2]] to <4 x 
half>
+// UNCONSTRAINED-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], 
<4 x half> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 
6, i32 7>
+// UNCONSTRAINED-NEXT:    ret <8 x half> [[SHUFFLE_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <8 x half> @test_vcvt_high_f16_f32(
+// CONSTRAINED-SAME: <4 x half> noundef [[A:%.*]], <4 x float> noundef 
[[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT:  [[ENTRY:.*:]]
+// CONSTRAINED-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CONSTRAINED-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CONSTRAINED-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x 
float>
+// CONSTRAINED-NEXT:    [[TMP3:%.*]] = call <4 x half> 
@llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> [[TMP2]], 
metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 
x half> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7>
+// CONSTRAINED-NEXT:    ret <8 x half> [[SHUFFLE_I]]
+//
+float16x8_t test_vcvt_high_f16_f32(float16x4_t a, float32x4_t b) {
+  return vcvt_high_f16_f32(a, b);
+}
+
+// UNCONSTRAINED-LABEL: define dso_local <4 x float> @test_vcvt_f32_f16(
+// UNCONSTRAINED-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT:  [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// UNCONSTRAINED-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// UNCONSTRAINED-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x 
half>
+// UNCONSTRAINED-NEXT:    [[TMP3:%.*]] = fpext <4 x half> [[TMP2]] to <4 x 
float>
+// UNCONSTRAINED-NEXT:    ret <4 x float> [[TMP3]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x float> @test_vcvt_f32_f16(
+// CONSTRAINED-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT:  [[ENTRY:.*:]]
+// CONSTRAINED-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CONSTRAINED-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CONSTRAINED-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CONSTRAINED-NEXT:    [[TMP3:%.*]] = call <4 x float> 
@llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> [[TMP2]], metadata 
!"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT:    ret <4 x float> [[TMP3]]
+//
+float32x4_t test_vcvt_f32_f16(float16x4_t a) {
+  return vcvt_f32_f16(a);
+}
+
+// UNCONSTRAINED-LABEL: define dso_local <4 x float> @test_vcvt_high_f32_f16(
+// UNCONSTRAINED-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT:  [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], 
<8 x half> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// UNCONSTRAINED-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[SHUFFLE_I]] to 
<4 x i16>
+// UNCONSTRAINED-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// UNCONSTRAINED-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x 
half>
+// UNCONSTRAINED-NEXT:    [[TMP3:%.*]] = fpext <4 x half> [[TMP2]] to <4 x 
float>
+// UNCONSTRAINED-NEXT:    ret <4 x float> [[TMP3]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x float> @test_vcvt_high_f32_f16(
+// CONSTRAINED-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT:  [[ENTRY:.*:]]
+// CONSTRAINED-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 
x half> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CONSTRAINED-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[SHUFFLE_I]] to <4 
x i16>
+// CONSTRAINED-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CONSTRAINED-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CONSTRAINED-NEXT:    [[TMP3:%.*]] = call <4 x float> 
@llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> [[TMP2]], metadata 
!"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT:    ret <4 x float> [[TMP3]]
+//
+float32x4_t test_vcvt_high_f32_f16(float16x8_t a) {
+  return vcvt_high_f32_f16(a);
+}

diff  --git a/clang/test/CodeGen/AArch64/neon-misc.c 
b/clang/test/CodeGen/AArch64/neon-misc.c
index 964b0059662d2..720a1c22933d1 100644
--- a/clang/test/CodeGen/AArch64/neon-misc.c
+++ b/clang/test/CodeGen/AArch64/neon-misc.c
@@ -2606,11 +2606,8 @@ uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[VCVT_F16_F32_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x 
float>
-// CHECK-NEXT:    [[VCVT_F16_F321_I:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I]])
-// CHECK-NEXT:    [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> 
[[VCVT_F16_F321_I]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x 
i16>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT:    [[TMP3:%.*]] = fptrunc <4 x float> [[TMP2]] to <4 x half>
 // CHECK-NEXT:    ret <4 x half> [[TMP3]]
 //
 float16x4_t test_vcvt_f16_f32(float32x4_t a) {
@@ -2622,11 +2619,8 @@ float16x4_t test_vcvt_f16_f32(float32x4_t a) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[VCVT_F16_F32_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 
x float>
-// CHECK-NEXT:    [[VCVT_F16_F321_I_I:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I_I]])
-// CHECK-NEXT:    [[VCVT_F16_F322_I_I:%.*]] = bitcast <4 x i16> 
[[VCVT_F16_F321_I_I]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I_I]] to <4 
x i16>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT:    [[TMP3:%.*]] = fptrunc <4 x float> [[TMP2]] to <4 x half>
 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x 
half> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 
7>
 // CHECK-NEXT:    ret <8 x half> [[SHUFFLE_I]]
 //
@@ -2693,11 +2687,8 @@ float32x4_t test_vcvtx_high_f32_f64(float32x2_t a, 
float64x2_t b) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK-NEXT:    [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x 
i16>
-// CHECK-NEXT:    [[VCVT_F32_F161_I:%.*]] = call <4 x float> 
@llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]])
-// CHECK-NEXT:    [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> 
[[VCVT_F32_F161_I]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I]] to <4 x 
i32>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT:    [[TMP3:%.*]] = fpext <4 x half> [[TMP2]] to <4 x float>
 // CHECK-NEXT:    ret <4 x float> [[TMP3]]
 //
 float32x4_t test_vcvt_f32_f16(float16x4_t a) {
@@ -2710,11 +2701,8 @@ float32x4_t test_vcvt_f32_f16(float16x4_t a) {
 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x 
half> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[SHUFFLE_I]] to <4 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK-NEXT:    [[VCVT_F32_F16_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x 
i16>
-// CHECK-NEXT:    [[VCVT_F32_F161_I_I:%.*]] = call <4 x float> 
@llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I_I]])
-// CHECK-NEXT:    [[VCVT_F32_F162_I_I:%.*]] = bitcast <4 x float> 
[[VCVT_F32_F161_I_I]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I_I]] to <4 
x i32>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT:    [[TMP3:%.*]] = fpext <4 x half> [[TMP2]] to <4 x float>
 // CHECK-NEXT:    ret <4 x float> [[TMP3]]
 //
 float32x4_t test_vcvt_high_f32_f16(float16x8_t a) {

diff  --git a/clang/test/CodeGen/arm_neon_intrinsics.c 
b/clang/test/CodeGen/arm_neon_intrinsics.c
index eb9fe126ff2a0..a471ccf52e173 100644
--- a/clang/test/CodeGen/arm_neon_intrinsics.c
+++ b/clang/test/CodeGen/arm_neon_intrinsics.c
@@ -3330,12 +3330,10 @@ int64x1_t test_vcreate_s64(uint64_t a) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[VCVT_F16_F32_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x 
float>
-// CHECK-NEXT:    [[VCVT_F16_F321_I:%.*]] = call <4 x i16> 
@llvm.arm.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I]])
-// CHECK-NEXT:    [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> 
[[VCVT_F16_F321_I]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x 
i16>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
-// CHECK-NEXT:    ret <4 x half> [[TMP3]]
+// CHECK-NEXT:    [[VCVTFP2HF_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x 
float>
+// CHECK-NEXT:    [[VCVTFP2HF1_I:%.*]] = call <4 x i16> 
@llvm.arm.neon.vcvtfp2hf(<4 x float> [[VCVTFP2HF_I]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[VCVTFP2HF1_I]] to <4 x 
half>
+// CHECK-NEXT:    ret <4 x half> [[TMP2]]
 //
 float16x4_t test_vcvt_f16_f32(float32x4_t a) {
   return vcvt_f16_f32(a);
@@ -3394,12 +3392,9 @@ float32x4_t test_vcvtq_f32_u32(uint32x4_t a) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK-NEXT:    [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x 
i16>
-// CHECK-NEXT:    [[VCVT_F32_F161_I:%.*]] = call <4 x float> 
@llvm.arm.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]])
-// CHECK-NEXT:    [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> 
[[VCVT_F32_F161_I]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I]] to <4 x 
i32>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
-// CHECK-NEXT:    ret <4 x float> [[TMP3]]
+// CHECK-NEXT:    [[VCVTHF2FP_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT:    [[VCVTHF2FP1_I:%.*]] = call <4 x float> 
@llvm.arm.neon.vcvthf2fp(<4 x i16> [[VCVTHF2FP_I]])
+// CHECK-NEXT:    ret <4 x float> [[VCVTHF2FP1_I]]
 //
 float32x4_t test_vcvt_f32_f16(float16x4_t a) {
   return vcvt_f32_f16(a);

diff  --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td 
b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index b1a2b348bc04b..2923595486712 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -465,12 +465,6 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_neon_ursqrte : AdvSIMD_1VectorArg_Intrinsic;
   def int_aarch64_neon_frsqrte : AdvSIMD_1FloatArg_Intrinsic;
 
-  // Vector Conversions Between Half-Precision and Single-Precision.
-  def int_aarch64_neon_vcvtfp2hf
-    : DefaultAttrsIntrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_aarch64_neon_vcvthf2fp
-    : DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>;
-
   // Vector Conversions Between Floating-point and Fixed-point.
   def int_aarch64_neon_vcvtfp2fxs : AdvSIMD_CvtFPToFx_Intrinsic;
   def int_aarch64_neon_vcvtfp2fxu : AdvSIMD_CvtFPToFx_Intrinsic;

diff  --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index f27713df7f146..814e985ebf7be 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1000,6 +1000,12 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool 
IsArm, Function *F,
         return true;
       }
 
+      // vcvtfp2hf and vcvthf2fp -> fpext and fptrunc
+      if (Name == "vcvtfp2hf" || Name == "vcvthf2fp") {
+        NewFn = nullptr;
+        return true;
+      }
+
       return false; // No other 'aarch64.neon.*'.
     }
     if (Name.consume_front("sve.")) {
@@ -4670,6 +4676,19 @@ static Value *upgradeAArch64IntrinsicCall(StringRef 
Name, CallBase *CI,
                                    CI->getName());
   }
 
+  if (Name == "neon.vcvtfp2hf")
+    return Builder.CreateBitCast(
+        Builder.CreateFPTrunc(
+            CI->getOperand(0),
+            FixedVectorType::get(Type::getHalfTy(F->getContext()), 4)),
+        FixedVectorType::get(Type::getInt16Ty(F->getContext()), 4));
+  if (Name == "neon.vcvthf2fp")
+    return Builder.CreateFPExt(
+        Builder.CreateBitCast(
+            CI->getOperand(0),
+            FixedVectorType::get(Type::getHalfTy(F->getContext()), 4)),
+        FixedVectorType::get(Type::getFloatTy(F->getContext()), 4));
+
   llvm_unreachable("Unhandled Intrinsic!");
 }
 

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 3f3eec2a34f5a..96c77c2f75196 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5828,11 +5828,6 @@ defm FCMLT  : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", 
AArch64fcmltz>;
 defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, 
"fcvtas",int_aarch64_neon_fcvtas>;
 defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, 
"fcvtau",int_aarch64_neon_fcvtau>;
 defm FCVTL  : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
-def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
-          (FCVTLv4i16 V64:$Rn)>;
-def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 
V128:$Rn),
-                                                                (i64 4)))),
-          (FCVTLv8i16 V128:$Rn)>;
 def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))),
           (FCVTLv2i32 V64:$Rn)>;
 def : Pat<(v2f64 (any_fpextend (v2f32 (extract_high_v4f32 (v4f32 V128:$Rn))))),
@@ -5846,11 +5841,6 @@ defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, 
"fcvtmu",int_aarch64_neon_fcvtmu
 defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, 
"fcvtns",int_aarch64_neon_fcvtns>;
 defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, 
"fcvtnu",int_aarch64_neon_fcvtnu>;
 defm FCVTN  : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
-def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
-          (FCVTNv4i16 V128:$Rn)>;
-def : Pat<(concat_vectors V64:$Rd,
-                          (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 
V128:$Rn)))),
-          (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 
V128:$Rn)>;
 def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))),
           (FCVTNv2i32 V128:$Rn)>;
 def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))),

diff  --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp 
b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index c45ec68f3cd07..383f503a2e87f 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -7186,9 +7186,6 @@ struct MemorySanitizerVisitor : public 
InstVisitor<MemorySanitizerVisitor> {
     case Intrinsic::aarch64_neon_fcvtzu:
     // Floating-point convert to lower precision narrow, rounding to odd
     case Intrinsic::aarch64_neon_fcvtxn:
-    // Vector Conversions Between Half-Precision and Single-Precision
-    case Intrinsic::aarch64_neon_vcvthf2fp:
-    case Intrinsic::aarch64_neon_vcvtfp2hf:
       handleGenericVectorConvertIntrinsic(I, /*FixedPoint=*/false);
       break;
 

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll 
b/llvm/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll
index 310dc711fdc26..286390a8d80a0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll
@@ -33,6 +33,8 @@ define <4 x float> @scvtq(<4 x i32> %a) nounwind readnone ssp 
{
   ret <4 x float> %vcvt.i
 }
 
+; Test that vcvtfp2hf and vcvthf2fp are AutoUpgraded to fp16 fptrunc + fpext.
+
 define <4 x float> @cvtf16(<4 x i16> %a) nounwind readnone ssp {
 ; CHECK-LABEL: cvtf16:
 ; CHECK: fcvtl  v0.4s, v0.4h
@@ -50,8 +52,6 @@ define <4 x float> @cvtf16_high(<8 x i16> %a) nounwind 
readnone ssp {
   ret <4 x float> %vcvt1.i
 }
 
-
-
 define <4 x i16> @cvtf16f32(<4 x float> %a) nounwind readnone ssp {
 ; CHECK-LABEL: cvtf16f32:
 ; CHECK: fcvtn  v0.4h, v0.4s
@@ -68,6 +68,3 @@ define <8 x i16> @cvtf16f32_high(<4 x i16> %low, <4 x float> 
%high_big) {
   %res = shufflevector <4 x i16> %low, <4 x i16> %high, <8 x i32> <i32 0, i32 
1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i16> %res
 }
-
-declare <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16>) nounwind readnone
-declare <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float>) nounwind readnone

diff  --git a/llvm/test/CodeGen/AArch64/implicitly-set-zero-high-64-bits.ll 
b/llvm/test/CodeGen/AArch64/implicitly-set-zero-high-64-bits.ll
index c7e22b2f4301d..cef94f820d317 100644
--- a/llvm/test/CodeGen/AArch64/implicitly-set-zero-high-64-bits.ll
+++ b/llvm/test/CodeGen/AArch64/implicitly-set-zero-high-64-bits.ll
@@ -10,9 +10,8 @@ define <8 x half> @test1(<4 x float> noundef %a) {
 ; CHECK-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
 entry:
-  %vcvt_f16_f321.i = tail call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x 
float> %a)
-  %0 = bitcast <4 x i16> %vcvt_f16_f321.i to <4 x half>
-  %shuffle.i = shufflevector <4 x half> %0, <4 x half> zeroinitializer, <8 x 
i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %b = fptrunc <4 x float> %a to <4 x half>
+  %shuffle.i = shufflevector <4 x half> %b, <4 x half> zeroinitializer, <8 x 
i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x half> %shuffle.i
 }
 

diff  --git 
a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vcvt_f32_su32.ll 
b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vcvt_f32_su32.ll
index 802c3aa4784ee..cab8139c70042 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vcvt_f32_su32.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vcvt_f32_su32.ll
@@ -75,9 +75,9 @@ define <4 x float> @cvtf16(<4 x i16> %a) nounwind readnone 
ssp sanitize_memory {
 ; CHECK-SAME: <4 x i16> [[A:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <4 x i16> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
-; CHECK-NEXT:    [[VCVT1_I:%.*]] = tail call <4 x float> 
@llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[A]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[A]] to <4 x half>
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[VCVT1_I:%.*]] = fpext <4 x half> [[TMP2]] to <4 x float>
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x float> [[VCVT1_I]]
 ;
@@ -92,9 +92,9 @@ define <4 x float> @cvtf16_high(<8 x i16> %a) nounwind 
readnone ssp sanitize_mem
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> 
splat (i16 -1), <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[IN:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, 
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <4 x i16> [[_MSPROP]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
-; CHECK-NEXT:    [[VCVT1_I:%.*]] = tail call <4 x float> 
@llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[IN]]) #[[ATTR3]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[IN]] to <4 x half>
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[_MSPROP]] to <4 x i32>
+; CHECK-NEXT:    [[VCVT1_I:%.*]] = fpext <4 x half> [[TMP2]] to <4 x float>
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x float> [[VCVT1_I]]
 ;
@@ -110,9 +110,9 @@ define <4 x i16> @cvtf16f32(<4 x float> %a) nounwind 
readnone ssp  sanitize_memo
 ; CHECK-SAME: <4 x float> [[A:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
-; CHECK-NEXT:    [[VCVT1_I:%.*]] = tail call <4 x i16> 
@llvm.aarch64.neon.vcvtfp2hf(<4 x float> [[A]]) #[[ATTR3]]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = fptrunc <4 x float> [[A]] to <4 x half>
+; CHECK-NEXT:    [[VCVT1_I:%.*]] = bitcast <4 x half> [[TMP4]] to <4 x i16>
 ; CHECK-NEXT:    store <4 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i16> [[VCVT1_I]]
 ;
@@ -126,9 +126,9 @@ define <8 x i16> @cvtf16f32_high(<4 x i16> %low, <4 x 
float> %high_big)  sanitiz
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr getelementptr (i8, ptr 
@__msan_param_tls, i64 8), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
-; CHECK-NEXT:    [[HIGH:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 
x float> [[HIGH_BIG]])
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = fptrunc <4 x float> [[HIGH_BIG]] to <4 x half>
+; CHECK-NEXT:    [[HIGH:%.*]] = bitcast <4 x half> [[TMP5]] to <4 x i16>
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> 
[[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[RES:%.*]] = shufflevector <4 x i16> [[LOW]], <4 x i16> 
[[HIGH]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] af27c81 - [AArch64] Remove aarch64_neon_vcvtfp2hf and aarch64_neon_vcvthf2fp (#203903)

Reply via email to