https://github.com/ericxu233 updated https://github.com/llvm/llvm-project/pull/162295
>From eec4aebc63665d0b2fd4e07d42bd62e268d1f69f Mon Sep 17 00:00:00 2001 From: ericxu233 <[email protected]> Date: Sun, 5 Oct 2025 01:41:40 -0400 Subject: [PATCH 1/4] [X86][Clang] Allow constexpr evaluation of F16C CVTPS2PH intrinsics --- clang/include/clang/Basic/BuiltinsX86.td | 6 +- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 81 ++++++++++++++++++++++++ clang/lib/AST/ExprConstant.cpp | 65 +++++++++++++++++++ clang/test/CodeGen/X86/f16c-builtins.c | 57 +++++++++++++++++ 4 files changed, 207 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 4388c09423a21..b125f986101d3 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -743,11 +743,13 @@ let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in { def gatherq_d : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int const *, _Vector<2, long long int>, _Vector<4, int>, _Constant char)">; } -let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "f16c", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vcvtps2ph : X86Builtin<"_Vector<8, short>(_Vector<4, float>, _Constant int)">; } -let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "f16c", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def vcvtps2ph256 : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 9991e365addb8..ace2b6116d8a6 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3475,6 +3475,83 @@ static bool interp__builtin_ia32_shuffle_generic( return true; } +static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + // Arguments are: vector of floats, rounding immediate + assert(Call->getNumArgs() == 2); + + APSInt Imm = popToAPSInt(S, Call->getArg(1)); + const Pointer &Src = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + + assert(Src.getFieldDesc()->isPrimitiveArray()); + assert(Dst.getFieldDesc()->isPrimitiveArray()); + + const auto *SrcVTy = Call->getArg(0)->getType()->castAs<VectorType>(); + unsigned SrcNumElems = SrcVTy->getNumElements(); + const auto *DstVTy = Call->getType()->castAs<VectorType>(); + unsigned DstNumElems = DstVTy->getNumElements(); + + const llvm::fltSemantics &HalfSem = + S.getASTContext().getFloatTypeSemantics(S.getASTContext().HalfTy); + + // imm[2] == 1 means use MXCSR rounding mode. + // In that case, we can only evaluate if the conversion is exact. + int ImmVal = Imm.getZExtValue(); + bool UseMXCSR = (ImmVal & 4) != 0; + + llvm::RoundingMode RM; + if (!UseMXCSR) { + switch (ImmVal & 3) { + case 0: RM = llvm::RoundingMode::NearestTiesToEven; break; + case 1: RM = llvm::RoundingMode::TowardNegative; break; + case 2: RM = llvm::RoundingMode::TowardPositive; break; + case 3: RM = llvm::RoundingMode::TowardZero; break; + default: llvm_unreachable("Invalid immediate rounding mode"); + } + } else { + // For MXCSR, we must check for exactness. We can use any rounding mode + // for the trial conversion since the result is the same if it's exact. + RM = llvm::RoundingMode::NearestTiesToEven; + } + + QualType DstElemQT = Dst.getFieldDesc()->getElemQualType(); + PrimType DstElemT = *S.getContext().classify(DstElemQT); + bool DstIsUnsigned = DstElemQT->isUnsignedIntegerOrEnumerationType(); + + for (unsigned I = 0; I < SrcNumElems; ++I) { + Floating SrcVal = Src.elem<Floating>(I); + APFloat DstVal = SrcVal.getAPFloat(); + + bool LostInfo; + APFloat::opStatus St = DstVal.convert(HalfSem, RM, &LostInfo); + + if (UseMXCSR && St != APFloat::opOK) { + S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_dynamic_rounding); + return false; + } + + INT_TYPE_SWITCH_NO_BOOL(DstElemT, { + // FIX: Extract the integer value before calling 'from'. + uint64_t RawBits = DstVal.bitcastToAPInt().getZExtValue(); + Dst.elem<T>(I) = T::from(RawBits); + }); + } + + // Zero out remaining elements if the destination has more elements + // (e.g., vcvtps2ph converting 4 floats to 8 shorts). + if (DstNumElems > SrcNumElems) { + for (unsigned I = SrcNumElems; I < DstNumElems; ++I) { + INT_TYPE_SWITCH_NO_BOOL(DstElemT, { + Dst.elem<T>(I) = T::from(0); + }); + } + } + + Dst.initializeAllElements(); + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -4632,6 +4709,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_vinsertf128_si256: case X86::BI__builtin_ia32_insert128i256: return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID); + + case clang::X86::BI__builtin_ia32_vcvtps2ph: + case clang::X86::BI__builtin_ia32_vcvtps2ph256: + return interp__builtin_ia32_vcvtps2ph(S, OpPC, Call); case X86::BI__builtin_ia32_vec_ext_v4hi: case X86::BI__builtin_ia32_vec_ext_v16qi: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 8fab6efafb983..859e88b03e4a3 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13175,6 +13175,71 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return false; return Success(R, E); } + + case clang::X86::BI__builtin_ia32_vcvtps2ph: + case clang::X86::BI__builtin_ia32_vcvtps2ph256: { + APValue SrcVec; + if (!EvaluateAsRValue(Info, E->getArg(0), SrcVec)) + return false; + + APSInt Imm; + if (!EvaluateInteger(E->getArg(1), Imm, Info)) + return false; + + assert(SrcVec.isVector()); + + const auto *SrcVTy = E->getArg(0)->getType()->castAs<VectorType>(); + unsigned SrcNumElems = SrcVTy->getNumElements(); + const auto *DstVTy = E->getType()->castAs<VectorType>(); + unsigned DstNumElems = DstVTy->getNumElements(); + QualType DstElemTy = DstVTy->getElementType(); + + const llvm::fltSemantics &HalfSem = Info.Ctx.getFloatTypeSemantics(Info.Ctx.HalfTy); + + int ImmVal = Imm.getZExtValue(); + bool UseMXCSR = (ImmVal & 4) != 0; + + llvm::RoundingMode RM; + if (!UseMXCSR) { + switch (ImmVal & 3) { + case 0: RM = llvm::RoundingMode::NearestTiesToEven; break; + case 1: RM = llvm::RoundingMode::TowardNegative; break; + case 2: RM = llvm::RoundingMode::TowardPositive; break; + case 3: RM = llvm::RoundingMode::TowardZero; break; + default: llvm_unreachable("Invalid immediate rounding mode"); + } + } else { + RM = llvm::RoundingMode::NearestTiesToEven; + } + + SmallVector<APValue, 8> ResultElements; + ResultElements.reserve(DstNumElems); + + for (unsigned I = 0; I < SrcNumElems; ++I) { + APFloat SrcVal = SrcVec.getVectorElt(I).getFloat(); + + bool LostInfo; + APFloat::opStatus St = SrcVal.convert(HalfSem, RM, &LostInfo); + + if (UseMXCSR && St != APFloat::opOK) { + Info.FFDiag(E, diag::note_constexpr_dynamic_rounding); + return false; + } + + APSInt DstInt(SrcVal.bitcastToAPInt(), + DstElemTy->isUnsignedIntegerOrEnumerationType()); + ResultElements.push_back(APValue(DstInt)); + } + + if (DstNumElems > SrcNumElems) { + APSInt Zero = Info.Ctx.MakeIntValue(0, DstElemTy); + for (unsigned I = SrcNumElems; I < DstNumElems; ++I) { + ResultElements.push_back(APValue(Zero)); + } + } + + return Success(ResultElements, E); + } } } diff --git a/clang/test/CodeGen/X86/f16c-builtins.c b/clang/test/CodeGen/X86/f16c-builtins.c index c08ef76d56981..de35c16c75ab4 100755 --- a/clang/test/CodeGen/X86/f16c-builtins.c +++ b/clang/test/CodeGen/X86/f16c-builtins.c @@ -67,3 +67,60 @@ __m128i test_mm256_cvtps_ph(__m256 a) { // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0) return _mm256_cvtps_ph(a, 0); } + +// A value exactly halfway between 1.0 and the next representable FP16 number. +// In binary, its significand ends in ...000, followed by a tie-bit 1. +#define POS_HALFWAY (1.0f + 0.00048828125f) // 1.0 + 2^-11, a tie-breaking case + +// +// __builtin_ia32_vcvtps2ph (128-bit, 4 floats -> 8 shorts, 4 are zero-padded) +// +// Test values: -2.5f, 1.123f, POS_HALFWAY +TEST_CONSTEXPR(match_v8hi( + __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT), + 0xC100, 0x3C7E, 0x3C00, 0x0000, 0, 0, 0, 0 +)); +TEST_CONSTEXPR(match_v8hi( + __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF), + 0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0 +)); +TEST_CONSTEXPR(match_v8hi( + __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF), + 0xC100, 0x3C7E, 0x3C01, 0x0000, 0, 0, 0, 0 +)); +TEST_CONSTEXPR(match_v8hi( + __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO), + 0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0 +)); + +// +// __builtin_ia32_vcvtps2ph256 (256-bit, 8 floats -> 8 shorts) +// +// Test values: -2.5f, 1.123f, POS_HALFWAY +TEST_CONSTEXPR(match_v8hi( + __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT), + 0xC100, 0x3C7E, 0x3C00, 0x0000, 0xC100, 0x3C7E, 0x3C00, 0x0000 +)); +TEST_CONSTEXPR(match_v8hi( + __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF), + 0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000 +)); +TEST_CONSTEXPR(match_v8hi( + __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF), + 0xC100, 0x3C7E, 0x3C01, 0x0000, 0xC100, 0x3C7E, 0x3C01, 0x0000 +)); +TEST_CONSTEXPR(match_v8hi( + __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO), + 0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000 +)); + +// +// Tests for Exact Dynamic Rounding +// +// Test that dynamic rounding SUCCEEDS for exactly representable values. +// We use _MM_FROUND_CUR_DIRECTION (value 4) to specify dynamic rounding. +// Inputs: -2.5f, 0.125f, -16.0f are all exactly representable in FP16. +TEST_CONSTEXPR(match_v8hi( + __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 0.125f, -16.0f, 0.0f, -2.5f, 0.125f, -16.0f, 0.0f), _MM_FROUND_CUR_DIRECTION), + 0xC100, 0x3000, 0xCC00, 0x0000, 0xC100, 0x3000, 0xCC00, 0x0000 +)); \ No newline at end of file >From c1b4300ded98f64df737adc103644c788bf4491d Mon Sep 17 00:00:00 2001 From: ericxu233 <[email protected]> Date: Tue, 7 Oct 2025 10:33:02 -0400 Subject: [PATCH 2/4] clang format --- clang/include/clang/Basic/BuiltinsX86.td | 4 ++-- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 30 +++++++++++++++--------- clang/lib/AST/ExprConstant.cpp | 26 +++++++++++++------- 3 files changed, 39 insertions(+), 21 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index b125f986101d3..5165a4d99d306 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -743,12 +743,12 @@ let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in { def gatherq_d : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int const *, _Vector<2, long long int>, _Vector<4, int>, _Constant char)">; } -let Features = "f16c", +let Features = "f16c", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vcvtps2ph : X86Builtin<"_Vector<8, short>(_Vector<4, float>, _Constant int)">; } -let Features = "f16c", +let Features = "f16c", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def vcvtps2ph256 : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index ace2b6116d8a6..0ff08e11018ad 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3503,11 +3503,20 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC, llvm::RoundingMode RM; if (!UseMXCSR) { switch (ImmVal & 3) { - case 0: RM = llvm::RoundingMode::NearestTiesToEven; break; - case 1: RM = llvm::RoundingMode::TowardNegative; break; - case 2: RM = llvm::RoundingMode::TowardPositive; break; - case 3: RM = llvm::RoundingMode::TowardZero; break; - default: llvm_unreachable("Invalid immediate rounding mode"); + case 0: + RM = llvm::RoundingMode::NearestTiesToEven; + break; + case 1: + RM = llvm::RoundingMode::TowardNegative; + break; + case 2: + RM = llvm::RoundingMode::TowardPositive; + break; + case 3: + RM = llvm::RoundingMode::TowardZero; + break; + default: + llvm_unreachable("Invalid immediate rounding mode"); } } else { // For MXCSR, we must check for exactness. We can use any rounding mode @@ -3527,7 +3536,8 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC, APFloat::opStatus St = DstVal.convert(HalfSem, RM, &LostInfo); if (UseMXCSR && St != APFloat::opOK) { - S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_dynamic_rounding); + S.FFDiag(S.Current->getSource(OpPC), + diag::note_constexpr_dynamic_rounding); return false; } @@ -3542,12 +3552,10 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC, // (e.g., vcvtps2ph converting 4 floats to 8 shorts). if (DstNumElems > SrcNumElems) { for (unsigned I = SrcNumElems; I < DstNumElems; ++I) { - INT_TYPE_SWITCH_NO_BOOL(DstElemT, { - Dst.elem<T>(I) = T::from(0); - }); + INT_TYPE_SWITCH_NO_BOOL(DstElemT, { Dst.elem<T>(I) = T::from(0); }); } } - + Dst.initializeAllElements(); return true; } @@ -4709,7 +4717,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_vinsertf128_si256: case X86::BI__builtin_ia32_insert128i256: return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID); - + case clang::X86::BI__builtin_ia32_vcvtps2ph: case clang::X86::BI__builtin_ia32_vcvtps2ph256: return interp__builtin_ia32_vcvtps2ph(S, OpPC, Call); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 859e88b03e4a3..0c01d38fd5739 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13194,7 +13194,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { unsigned DstNumElems = DstVTy->getNumElements(); QualType DstElemTy = DstVTy->getElementType(); - const llvm::fltSemantics &HalfSem = Info.Ctx.getFloatTypeSemantics(Info.Ctx.HalfTy); + const llvm::fltSemantics &HalfSem = + Info.Ctx.getFloatTypeSemantics(Info.Ctx.HalfTy); int ImmVal = Imm.getZExtValue(); bool UseMXCSR = (ImmVal & 4) != 0; @@ -13202,11 +13203,20 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { llvm::RoundingMode RM; if (!UseMXCSR) { switch (ImmVal & 3) { - case 0: RM = llvm::RoundingMode::NearestTiesToEven; break; - case 1: RM = llvm::RoundingMode::TowardNegative; break; - case 2: RM = llvm::RoundingMode::TowardPositive; break; - case 3: RM = llvm::RoundingMode::TowardZero; break; - default: llvm_unreachable("Invalid immediate rounding mode"); + case 0: + RM = llvm::RoundingMode::NearestTiesToEven; + break; + case 1: + RM = llvm::RoundingMode::TowardNegative; + break; + case 2: + RM = llvm::RoundingMode::TowardPositive; + break; + case 3: + RM = llvm::RoundingMode::TowardZero; + break; + default: + llvm_unreachable("Invalid immediate rounding mode"); } } else { RM = llvm::RoundingMode::NearestTiesToEven; @@ -13217,7 +13227,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { for (unsigned I = 0; I < SrcNumElems; ++I) { APFloat SrcVal = SrcVec.getVectorElt(I).getFloat(); - + bool LostInfo; APFloat::opStatus St = SrcVal.convert(HalfSem, RM, &LostInfo); @@ -13225,7 +13235,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { Info.FFDiag(E, diag::note_constexpr_dynamic_rounding); return false; } - + APSInt DstInt(SrcVal.bitcastToAPInt(), DstElemTy->isUnsignedIntegerOrEnumerationType()); ResultElements.push_back(APValue(DstInt)); >From 1141c401db423efb879ee6933ade2ff0d47cf5e4 Mon Sep 17 00:00:00 2001 From: ericxu233 <[email protected]> Date: Thu, 23 Oct 2025 17:38:02 -0400 Subject: [PATCH 3/4] Finished addressing review comments --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++-- clang/lib/AST/ExprConstant.cpp | 2 - clang/test/CodeGen/X86/f16c-builtins.c | 54 ++++++++++++------------ 3 files changed, 31 insertions(+), 33 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 0ff08e11018ad..c33b395e28ec2 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3526,9 +3526,8 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC, QualType DstElemQT = Dst.getFieldDesc()->getElemQualType(); PrimType DstElemT = *S.getContext().classify(DstElemQT); - bool DstIsUnsigned = DstElemQT->isUnsignedIntegerOrEnumerationType(); - for (unsigned I = 0; I < SrcNumElems; ++I) { + for (unsigned I = 0; I != SrcNumElems; ++I) { Floating SrcVal = Src.elem<Floating>(I); APFloat DstVal = SrcVal.getAPFloat(); @@ -3542,7 +3541,8 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC, } INT_TYPE_SWITCH_NO_BOOL(DstElemT, { - // FIX: Extract the integer value before calling 'from'. + // Convert the destination value's bit pattern to an unsigned integer, + // then reconstruct the element using the target type's 'from' method. uint64_t RawBits = DstVal.bitcastToAPInt().getZExtValue(); Dst.elem<T>(I) = T::from(RawBits); }); @@ -3551,7 +3551,7 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC, // Zero out remaining elements if the destination has more elements // (e.g., vcvtps2ph converting 4 floats to 8 shorts). if (DstNumElems > SrcNumElems) { - for (unsigned I = SrcNumElems; I < DstNumElems; ++I) { + for (unsigned I = SrcNumElems; I != DstNumElems; ++I) { INT_TYPE_SWITCH_NO_BOOL(DstElemT, { Dst.elem<T>(I) = T::from(0); }); } } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 0c01d38fd5739..7e15fbd1e7805 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13186,8 +13186,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!EvaluateInteger(E->getArg(1), Imm, Info)) return false; - assert(SrcVec.isVector()); - const auto *SrcVTy = E->getArg(0)->getType()->castAs<VectorType>(); unsigned SrcNumElems = SrcVTy->getNumElements(); const auto *DstVTy = E->getType()->castAs<VectorType>(); diff --git a/clang/test/CodeGen/X86/f16c-builtins.c b/clang/test/CodeGen/X86/f16c-builtins.c index de35c16c75ab4..47ff06b270541 100755 --- a/clang/test/CodeGen/X86/f16c-builtins.c +++ b/clang/test/CodeGen/X86/f16c-builtins.c @@ -46,71 +46,65 @@ __m128 test_mm_cvtph_ps(__m128i a) { return _mm_cvtph_ps(a); } -__m256 test_mm256_cvtph_ps(__m128i a) { - // CHECK-LABEL: test_mm256_cvtph_ps - // CHECK: fpext <8 x half> %{{.*}} to <8 x float> - return _mm256_cvtph_ps(a); -} -TEST_CONSTEXPR(match_m256( - _mm256_cvtph_ps(_mm_setr_epi16(0x3C00, 0x4000, 0x4200, 0x4400, 0x4500, 0x3800, 0xC000, 0x0000)), - 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.5f, -2.0f, 0.0f -)); - __m128i test_mm_cvtps_ph(__m128 a) { // CHECK-LABEL: test_mm_cvtps_ph // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0) return _mm_cvtps_ph(a, 0); } -__m128i test_mm256_cvtps_ph(__m256 a) { - // CHECK-LABEL: test_mm256_cvtps_ph - // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0) - return _mm256_cvtps_ph(a, 0); -} - // A value exactly halfway between 1.0 and the next representable FP16 number. // In binary, its significand ends in ...000, followed by a tie-bit 1. #define POS_HALFWAY (1.0f + 0.00048828125f) // 1.0 + 2^-11, a tie-breaking case // -// __builtin_ia32_vcvtps2ph (128-bit, 4 floats -> 8 shorts, 4 are zero-padded) +// _mm_cvtps_ph (128-bit, 4 floats -> 8 shorts, 4 are zero-padded) // // Test values: -2.5f, 1.123f, POS_HALFWAY TEST_CONSTEXPR(match_v8hi( - __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT), + _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT), 0xC100, 0x3C7E, 0x3C00, 0x0000, 0, 0, 0, 0 )); TEST_CONSTEXPR(match_v8hi( - __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF), + _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF), 0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0 )); TEST_CONSTEXPR(match_v8hi( - __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF), + _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF), 0xC100, 0x3C7E, 0x3C01, 0x0000, 0, 0, 0, 0 )); TEST_CONSTEXPR(match_v8hi( - __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO), + _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO), 0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0 )); +__m256 test_mm256_cvtph_ps(__m128i a) { + // CHECK-LABEL: test_mm256_cvtph_ps + // CHECK: fpext <8 x half> %{{.*}} to <8 x float> + return _mm256_cvtph_ps(a); +} +TEST_CONSTEXPR(match_m256( + _mm256_cvtph_ps(_mm_setr_epi16(0x3C00, 0x4000, 0x4200, 0x4400, 0x4500, 0x3800, 0xC000, 0x0000)), + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.5f, -2.0f, 0.0f +)); + // -// __builtin_ia32_vcvtps2ph256 (256-bit, 8 floats -> 8 shorts) +// _mm256_cvtps_ph (256-bit, 8 floats -> 8 shorts) // // Test values: -2.5f, 1.123f, POS_HALFWAY TEST_CONSTEXPR(match_v8hi( - __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT), + _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT), 0xC100, 0x3C7E, 0x3C00, 0x0000, 0xC100, 0x3C7E, 0x3C00, 0x0000 )); TEST_CONSTEXPR(match_v8hi( - __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF), + _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF), 0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000 )); TEST_CONSTEXPR(match_v8hi( - __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF), + _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF), 0xC100, 0x3C7E, 0x3C01, 0x0000, 0xC100, 0x3C7E, 0x3C01, 0x0000 )); TEST_CONSTEXPR(match_v8hi( - __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO), + _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO), 0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000 )); @@ -123,4 +117,10 @@ TEST_CONSTEXPR(match_v8hi( TEST_CONSTEXPR(match_v8hi( __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 0.125f, -16.0f, 0.0f, -2.5f, 0.125f, -16.0f, 0.0f), _MM_FROUND_CUR_DIRECTION), 0xC100, 0x3000, 0xCC00, 0x0000, 0xC100, 0x3000, 0xCC00, 0x0000 -)); \ No newline at end of file +)); + +__m128i test_mm256_cvtps_ph(__m256 a) { + // CHECK-LABEL: test_mm256_cvtps_ph + // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0) + return _mm256_cvtps_ph(a, 0); +} \ No newline at end of file >From 1308416e6804b510de6c97669be33223fbad943c Mon Sep 17 00:00:00 2001 From: ericxu233 <[email protected]> Date: Fri, 7 Nov 2025 16:20:46 -0500 Subject: [PATCH 4/4] Address review comments --- clang/test/CodeGen/X86/f16c-builtins.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/clang/test/CodeGen/X86/f16c-builtins.c b/clang/test/CodeGen/X86/f16c-builtins.c index 47ff06b270541..2ae4bc857b431 100755 --- a/clang/test/CodeGen/X86/f16c-builtins.c +++ b/clang/test/CodeGen/X86/f16c-builtins.c @@ -46,12 +46,6 @@ __m128 test_mm_cvtph_ps(__m128i a) { return _mm_cvtph_ps(a); } -__m128i test_mm_cvtps_ph(__m128 a) { - // CHECK-LABEL: test_mm_cvtps_ph - // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0) - return _mm_cvtps_ph(a, 0); -} - // A value exactly halfway between 1.0 and the next representable FP16 number. // In binary, its significand ends in ...000, followed by a tie-bit 1. #define POS_HALFWAY (1.0f + 0.00048828125f) // 1.0 + 2^-11, a tie-breaking case @@ -108,6 +102,12 @@ TEST_CONSTEXPR(match_v8hi( 0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000 )); +__m128i test_mm_cvtps_ph(__m128 a) { + // CHECK-LABEL: test_mm_cvtps_ph + // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0) + return _mm_cvtps_ph(a, 0); +} + // // Tests for Exact Dynamic Rounding // @@ -123,4 +123,4 @@ __m128i test_mm256_cvtps_ph(__m256 a) { // CHECK-LABEL: test_mm256_cvtps_ph // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0) return _mm256_cvtps_ph(a, 0); -} \ No newline at end of file +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
