llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Valeriy Savchenko (SavchenkoValeriy) <details> <summary>Changes</summary> They already produce identical assembly. --- Patch is 86.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/191365.diff 9 Files Affected: - (modified) clang/include/clang/Basic/AArch64CodeGenUtils.h (+26-12) - (modified) clang/lib/CodeGen/TargetBuiltins/ARM.cpp (+6-24) - (modified) clang/test/CodeGen/AArch64/neon-fcvt-intrinsics.c (+8-8) - (modified) clang/test/CodeGen/AArch64/neon-intrinsics-constrained.c (+12-12) - (modified) clang/test/CodeGen/AArch64/neon-intrinsics.c (+166-166) - (modified) clang/test/CodeGen/AArch64/neon-misc.c (+19-19) - (modified) clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c (+6-6) - (modified) clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c (+6-6) - (modified) clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c (+22-22) ``````````diff diff --git a/clang/include/clang/Basic/AArch64CodeGenUtils.h b/clang/include/clang/Basic/AArch64CodeGenUtils.h index 9a97f0001cb12..c747f1bf9d825 100644 --- a/clang/include/clang/Basic/AArch64CodeGenUtils.h +++ b/clang/include/clang/Basic/AArch64CodeGenUtils.h @@ -173,6 +173,12 @@ const inline ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap [] = { NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), + NEONMAP1(vcvt_s16_f16, fptosi_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvt_s32_v, fptosi_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvt_s64_v, fptosi_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvt_u16_f16, fptoui_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvt_u32_v, fptoui_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvt_u64_v, fptoui_sat, AddRetType | Add1ArgType), NEONMAP0(vcvtq_f16_s16), NEONMAP0(vcvtq_f16_u16), NEONMAP0(vcvtq_f32_v), @@ -186,6 +192,12 @@ const inline ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap [] = { NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), + NEONMAP1(vcvtq_s16_f16, fptosi_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvtq_s32_v, fptosi_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvtq_s64_v, fptosi_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvtq_u16_f16, fptoui_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvtq_u32_v, fptoui_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvtq_u64_v, fptoui_sat, AddRetType | Add1ArgType), NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), NEONMAP1(vdot_s32, aarch64_neon_sdot, 0), NEONMAP1(vdot_u32, aarch64_neon_udot, 0), @@ -406,10 +418,10 @@ const inline ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), - NEONMAP1(vcvtd_s32_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), - NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), - NEONMAP1(vcvtd_u32_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), - NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), + NEONMAP1(vcvtd_s32_f64, fptosi_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvtd_s64_f64, fptosi_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvtd_u32_f64, fptoui_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvtd_u64_f64, fptoui_sat, AddRetType | Add1ArgType), NEONMAP0(vcvth_bf16_f32), NEONMAP1(vcvtmd_s32_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), @@ -439,10 +451,10 @@ const inline ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), - NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), - NEONMAP1(vcvts_s64_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), - NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), - NEONMAP1(vcvts_u64_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), + NEONMAP1(vcvts_s32_f32, fptosi_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvts_s64_f32, fptosi_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvts_u32_f32, fptoui_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvts_u64_f32, fptoui_sat, AddRetType | Add1ArgType), NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), @@ -621,10 +633,12 @@ const inline ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), - NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), - NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), - NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), - NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), + NEONMAP1(vcvth_s16_f16, fptosi_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvth_s32_f16, fptosi_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvth_s64_f16, fptosi_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvth_u16_f16, fptoui_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvth_u32_f16, fptoui_sat, AddRetType | Add1ArgType), + NEONMAP1(vcvth_u64_f16, fptoui_sat, AddRetType | Add1ArgType), NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index 8ec2f5b83085c..7e6b429c02af3 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -1384,6 +1384,11 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vcvtq_s16_f16: case NEON::BI__builtin_neon_vcvtq_u16_f16: { Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); + // AArch64 uses saturating FP-to-int intrinsics; ARM uses plain fptoui/fptosi. + if (Int) { + llvm::Type *Tys[2] = {Ty, Ops[0]->getType()}; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz"); + } return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); } @@ -5428,12 +5433,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcvtmh_u16_f16: case NEON::BI__builtin_neon_vcvtnh_u16_f16: case NEON::BI__builtin_neon_vcvtph_u16_f16: - case NEON::BI__builtin_neon_vcvth_u16_f16: case NEON::BI__builtin_neon_vcvtah_s16_f16: case NEON::BI__builtin_neon_vcvtmh_s16_f16: case NEON::BI__builtin_neon_vcvtnh_s16_f16: - case NEON::BI__builtin_neon_vcvtph_s16_f16: - case NEON::BI__builtin_neon_vcvth_s16_f16: { + case NEON::BI__builtin_neon_vcvtph_s16_f16: { llvm::Type *InTy = Int16Ty; llvm::Type* FTy = HalfTy; llvm::Type *Tys[2] = {InTy, FTy}; @@ -5447,8 +5450,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_fcvtnu; break; case NEON::BI__builtin_neon_vcvtph_u16_f16: Int = Intrinsic::aarch64_neon_fcvtpu; break; - case NEON::BI__builtin_neon_vcvth_u16_f16: - Int = Intrinsic::aarch64_neon_fcvtzu; break; case NEON::BI__builtin_neon_vcvtah_s16_f16: Int = Intrinsic::aarch64_neon_fcvtas; break; case NEON::BI__builtin_neon_vcvtmh_s16_f16: @@ -5457,8 +5458,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_fcvtns; break; case NEON::BI__builtin_neon_vcvtph_s16_f16: Int = Intrinsic::aarch64_neon_fcvtps; break; - case NEON::BI__builtin_neon_vcvth_s16_f16: - Int = Intrinsic::aarch64_neon_fcvtzs; break; } return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt"); } @@ -6410,23 +6409,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); } - case NEON::BI__builtin_neon_vcvt_s32_v: - case NEON::BI__builtin_neon_vcvt_u32_v: - case NEON::BI__builtin_neon_vcvt_s64_v: - case NEON::BI__builtin_neon_vcvt_u64_v: - case NEON::BI__builtin_neon_vcvt_s16_f16: - case NEON::BI__builtin_neon_vcvt_u16_f16: - case NEON::BI__builtin_neon_vcvtq_s32_v: - case NEON::BI__builtin_neon_vcvtq_u32_v: - case NEON::BI__builtin_neon_vcvtq_s64_v: - case NEON::BI__builtin_neon_vcvtq_u64_v: - case NEON::BI__builtin_neon_vcvtq_s16_f16: - case NEON::BI__builtin_neon_vcvtq_u16_f16: { - Int = - usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs; - llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)}; - return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz"); - } case NEON::BI__builtin_neon_vcvta_s16_f16: case NEON::BI__builtin_neon_vcvta_u16_f16: case NEON::BI__builtin_neon_vcvta_s32_v: diff --git a/clang/test/CodeGen/AArch64/neon-fcvt-intrinsics.c b/clang/test/CodeGen/AArch64/neon-fcvt-intrinsics.c index 929df94aa60ef..f14df46b89177 100644 --- a/clang/test/CodeGen/AArch64/neon-fcvt-intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon-fcvt-intrinsics.c @@ -339,7 +339,7 @@ uint32_t test_vcvtpd_u32_f64(float64_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvts_s32_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTS_S32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float [[A]]) +// CHECK-NEXT: [[VCVTS_S32_F32_I:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[A]]) // CHECK-NEXT: ret i32 [[VCVTS_S32_F32_I]] // int32_t test_vcvts_s32_f32(float32_t a) { @@ -349,7 +349,7 @@ int32_t test_vcvts_s32_f32(float32_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtd_s64_f64 // CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTD_S64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double [[A]]) +// CHECK-NEXT: [[VCVTD_S64_F64_I:%.*]] = call i64 @llvm.fptosi.sat.i64.f64(double [[A]]) // CHECK-NEXT: ret i64 [[VCVTD_S64_F64_I]] // int64_t test_vcvtd_s64_f64(float64_t a) { @@ -359,7 +359,7 @@ int64_t test_vcvtd_s64_f64(float64_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvts_s64_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTS_S64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float [[A]]) +// CHECK-NEXT: [[VCVTS_S64_F32_I:%.*]] = call i64 @llvm.fptosi.sat.i64.f32(float [[A]]) // CHECK-NEXT: ret i64 [[VCVTS_S64_F32_I]] // int64_t test_vcvts_s64_f32(float32_t a) { @@ -369,7 +369,7 @@ int64_t test_vcvts_s64_f32(float32_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtd_s32_f64 // CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTD_S32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f64(double [[A]]) +// CHECK-NEXT: [[VCVTD_S32_F64_I:%.*]] = call i32 @llvm.fptosi.sat.i32.f64(double [[A]]) // CHECK-NEXT: ret i32 [[VCVTD_S32_F64_I]] // int32_t test_vcvtd_s32_f64(float64_t a) { @@ -379,7 +379,7 @@ int32_t test_vcvtd_s32_f64(float64_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvts_u32_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTS_U32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f32(float [[A]]) +// CHECK-NEXT: [[VCVTS_U32_F32_I:%.*]] = call i32 @llvm.fptoui.sat.i32.f32(float [[A]]) // CHECK-NEXT: ret i32 [[VCVTS_U32_F32_I]] // uint32_t test_vcvts_u32_f32(float32_t a) { @@ -389,7 +389,7 @@ uint32_t test_vcvts_u32_f32(float32_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtd_u64_f64 // CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTD_U64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double [[A]]) +// CHECK-NEXT: [[VCVTD_U64_F64_I:%.*]] = call i64 @llvm.fptoui.sat.i64.f64(double [[A]]) // CHECK-NEXT: ret i64 [[VCVTD_U64_F64_I]] // uint64_t test_vcvtd_u64_f64(float64_t a) { @@ -399,7 +399,7 @@ uint64_t test_vcvtd_u64_f64(float64_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvts_u64_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTS_U64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float [[A]]) +// CHECK-NEXT: [[VCVTS_U64_F32_I:%.*]] = call i64 @llvm.fptoui.sat.i64.f32(float [[A]]) // CHECK-NEXT: ret i64 [[VCVTS_U64_F32_I]] // uint64_t test_vcvts_u64_f32(float32_t a) { @@ -409,7 +409,7 @@ uint64_t test_vcvts_u64_f32(float32_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtd_u32_f64 // CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTD_U32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f64(double [[A]]) +// CHECK-NEXT: [[VCVTD_U32_F64_I:%.*]] = call i32 @llvm.fptoui.sat.i32.f64(double [[A]]) // CHECK-NEXT: ret i32 [[VCVTD_U32_F64_I]] // uint32_t test_vcvtd_u32_f64(float64_t a) { diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics-constrained.c b/clang/test/CodeGen/AArch64/neon-intrinsics-constrained.c index ba32cfb7f3bae..d38fae31c44d1 100644 --- a/clang/test/CodeGen/AArch64/neon-intrinsics-constrained.c +++ b/clang/test/CodeGen/AArch64/neon-intrinsics-constrained.c @@ -1522,9 +1522,9 @@ float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) { // UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64 // UNCONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0 // UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8> -// UNCONSTRAINED-NEXT: [[VCVTZ_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// UNCONSTRAINED-NEXT: [[VCVTZ1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> [[VCVTZ_I]]) -// UNCONSTRAINED-NEXT: ret <1 x i64> [[VCVTZ1_I]] +// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> +// UNCONSTRAINED-NEXT: [[VCVTZ_I:%.*]] = call <1 x i64> @llvm.fptosi.sat.v1i64.v1f64(<1 x double> [[TMP2]]) +// UNCONSTRAINED-NEXT: ret <1 x i64> [[VCVTZ_I]] // // CONSTRAINED-LABEL: define dso_local <1 x i64> @test_vcvt_s64_f64( // CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] { @@ -1532,9 +1532,9 @@ float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) { // CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64 // CONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0 // CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8> -// CONSTRAINED-NEXT: [[VCVTZ_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CONSTRAINED-NEXT: [[VCVTZ1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> [[VCVTZ_I]]) #[[ATTR3]] -// CONSTRAINED-NEXT: ret <1 x i64> [[VCVTZ1_I]] +// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> +// CONSTRAINED-NEXT: [[VCVTZ_I:%.*]] = call <1 x i64> @llvm.fptosi.sat.v1i64.v1f64(<1 x double> [[TMP2]]) #[[ATTR3]] +// CONSTRAINED-NEXT: ret <1 x i64> [[VCVTZ_I]] // int64x1_t test_vcvt_s64_f64(float64x1_t a) { return vcvt_s64_f64(a); @@ -1546,9 +1546,9 @@ int64x1_t test_vcvt_s64_f64(float64x1_t a) { // UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64 // UNCONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0 // UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8> -// UNCONSTRAINED-NEXT: [[VCVTZ_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// UNCONSTRAINED-NEXT: [[VCVTZ1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> [[VCVTZ_I]]) -// UNCONSTRAINED-NEXT: ret <1 x i64> [[VCVTZ1_I]] +// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> +// UNCONSTRAINED-NEXT: [[VCVTZ_I:%.*]] = call <1 x i64> @llvm.fptoui.sat.v1i64.v1f64(<1 x double> [[TMP2]]) +// UNCONSTRAINED-NEXT: ret <1 x i64> [[VCVTZ_I]] // // CONSTRAINED-LABEL: define dso_local <1 x i64> @test_vcvt_u64_f64( // CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] { @@ -1556,9 +1556,9 @@ int64x1_t test_vcvt_s64_f64(float64x1_t a) { // CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64 // CONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0 // CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8> -// CONSTRAINED-NEXT: [[VCVTZ_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CONSTRAINED-NEXT: [[VCVTZ1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> [[VCVTZ_I]]) #[[ATTR3]] -// CONSTRAINED-NEXT: ret <1 x i64> [[VCVTZ1_I]] +// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> +// CONSTRAINED-NEXT: [[VCVTZ_I:%.*]] = call <1 x i64> @llvm.fptoui.sat.v1i64.v1f64(<1 x double> [[TMP2]]) #[[ATTR3]] +// CONSTRAINED-NEXT: ret <1 x i64> [[VCVTZ_I]] // uint64x1_t test_vcvt_u64_f64(float64x1_t a) { return vcvt_u64_f64(a); diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c index 80bb22cc43c78..8704a7827ad1d 100644 --- a/clang/test/CodeGen/AArch64/neon-intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c @@ -10417,8 +10417,8 @@ uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { // CHECK-LABEL: define dso_local <8 x i16> @test_vabdl_s8( // CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[A]], <8 x i8> [[B]]) -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16> +// CHECK-NEXT: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[A]], <8 x i8> [[B]]) +// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I]] to <8 x i16> // CHECK-NEXT: ret <8 x i16> [[VMOVL_I_I]] // int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) { @@ -10430,10 +10430,10 @@ int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> -// CHECK-NEXT: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK-NEXT: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8> +// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> +// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I]] to <8 x i8> // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> // CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[VMOVL_I_I]] @@ -10447,10 +10447,10 @@ int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> -// CHECK-NEXT: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK-NEXT: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8> +// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> +// CHECK-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I]] to <8 x i8> // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> // CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> // CHECK-NEXT: ret <2 x i64> [[VMOVL_I_I]] @@ -10462,8 +1046... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/191365 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
