https://github.com/RKSimon updated 
https://github.com/llvm/llvm-project/pull/162295

>From 88a8114585fc51439cd7561d9ae4957ba01fe212 Mon Sep 17 00:00:00 2001
From: ericxu233 <[email protected]>
Date: Sun, 5 Oct 2025 01:41:40 -0400
Subject: [PATCH 1/6] [X86][Clang] Allow constexpr evaluation of F16C CVTPS2PH
 intrinsics

---
 clang/include/clang/Basic/BuiltinsX86.td |  6 +-
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 80 ++++++++++++++++++++++++
 clang/lib/AST/ExprConstant.cpp           | 65 +++++++++++++++++++
 clang/test/CodeGen/X86/f16c-builtins.c   | 57 +++++++++++++++++
 4 files changed, 206 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index fcc3957f9b8ab..5dba82a9107ca 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -716,11 +716,13 @@ let Features = "avx2", Attributes = [NoThrow, 
RequiredVectorWidth<128>] in {
   def gatherq_d : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int const *, 
_Vector<2, long long int>, _Vector<4, int>, _Constant char)">;
 }
 
-let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] 
in {
+let Features = "f16c", 
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def vcvtps2ph : X86Builtin<"_Vector<8, short>(_Vector<4, float>, _Constant 
int)">;
 }
 
-let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] 
in {
+let Features = "f16c", 
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def vcvtps2ph256 : X86Builtin<"_Vector<8, short>(_Vector<8, float>, 
_Constant int)">;
 }
 
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 659b6985d3157..29c37346399ab 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3527,7 +3527,83 @@ static bool 
interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
   }
 
   pushInteger(S, RetMask, Call->getType());
+  return true;
+}
+
+static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
+                                           const CallExpr *Call) {
+  // Arguments are: vector of floats, rounding immediate
+  assert(Call->getNumArgs() == 2);
+
+  APSInt Imm = popToAPSInt(S, Call->getArg(1));
+  const Pointer &Src = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  assert(Src.getFieldDesc()->isPrimitiveArray());
+  assert(Dst.getFieldDesc()->isPrimitiveArray());
+
+  const auto *SrcVTy = Call->getArg(0)->getType()->castAs<VectorType>();
+  unsigned SrcNumElems = SrcVTy->getNumElements();
+  const auto *DstVTy = Call->getType()->castAs<VectorType>();
+  unsigned DstNumElems = DstVTy->getNumElements();
+
+  const llvm::fltSemantics &HalfSem =
+      S.getASTContext().getFloatTypeSemantics(S.getASTContext().HalfTy);
+
+  // imm[2] == 1 means use MXCSR rounding mode.
+  // In that case, we can only evaluate if the conversion is exact.
+  int ImmVal = Imm.getZExtValue();
+  bool UseMXCSR = (ImmVal & 4) != 0;
+
+  llvm::RoundingMode RM;
+  if (!UseMXCSR) {
+    switch (ImmVal & 3) {
+    case 0: RM = llvm::RoundingMode::NearestTiesToEven; break;
+    case 1: RM = llvm::RoundingMode::TowardNegative; break;
+    case 2: RM = llvm::RoundingMode::TowardPositive; break;
+    case 3: RM = llvm::RoundingMode::TowardZero; break;
+    default: llvm_unreachable("Invalid immediate rounding mode");
+    }
+  } else {
+    // For MXCSR, we must check for exactness. We can use any rounding mode
+    // for the trial conversion since the result is the same if it's exact.
+    RM = llvm::RoundingMode::NearestTiesToEven;
+  }
 
+  QualType DstElemQT = Dst.getFieldDesc()->getElemQualType();
+  PrimType DstElemT = *S.getContext().classify(DstElemQT);
+  bool DstIsUnsigned = DstElemQT->isUnsignedIntegerOrEnumerationType();
+
+  for (unsigned I = 0; I < SrcNumElems; ++I) {
+    Floating SrcVal = Src.elem<Floating>(I);
+    APFloat DstVal = SrcVal.getAPFloat();
+
+    bool LostInfo;
+    APFloat::opStatus St = DstVal.convert(HalfSem, RM, &LostInfo);
+
+    if (UseMXCSR && St != APFloat::opOK) {
+      S.FFDiag(S.Current->getSource(OpPC), 
diag::note_constexpr_dynamic_rounding);
+      return false;
+    }
+
+    INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
+      // FIX: Extract the integer value before calling 'from'.
+      uint64_t RawBits = DstVal.bitcastToAPInt().getZExtValue();
+      Dst.elem<T>(I) = T::from(RawBits);
+    });
+  }
+
+  // Zero out remaining elements if the destination has more elements
+  // (e.g., vcvtps2ph converting 4 floats to 8 shorts).
+  if (DstNumElems > SrcNumElems) {
+    for (unsigned I = SrcNumElems; I < DstNumElems; ++I) {
+      INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
+        Dst.elem<T>(I) = T::from(0);
+      });
+    }
+  }
+ 
+  Dst.initializeAllElements();
   return true;
 }
 
@@ -4897,6 +4973,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
   case X86::BI__builtin_ia32_vinsertf128_si256:
   case X86::BI__builtin_ia32_insert128i256:
     return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID);
+  
+  case clang::X86::BI__builtin_ia32_vcvtps2ph:
+  case clang::X86::BI__builtin_ia32_vcvtps2ph256:
+    return interp__builtin_ia32_vcvtps2ph(S, OpPC, Call);
 
   case X86::BI__builtin_ia32_vec_ext_v4hi:
   case X86::BI__builtin_ia32_vec_ext_v16qi:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 88d7c79d3b99e..d7d9bf9d22c93 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13870,6 +13870,71 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
       return false;
     return Success(R, E);
   }
+
+  case clang::X86::BI__builtin_ia32_vcvtps2ph:
+  case clang::X86::BI__builtin_ia32_vcvtps2ph256: {
+    APValue SrcVec;
+    if (!EvaluateAsRValue(Info, E->getArg(0), SrcVec))
+      return false;
+
+    APSInt Imm;
+    if (!EvaluateInteger(E->getArg(1), Imm, Info))
+      return false;
+
+    assert(SrcVec.isVector());
+
+    const auto *SrcVTy = E->getArg(0)->getType()->castAs<VectorType>();
+    unsigned SrcNumElems = SrcVTy->getNumElements();
+    const auto *DstVTy = E->getType()->castAs<VectorType>();
+    unsigned DstNumElems = DstVTy->getNumElements();
+    QualType DstElemTy = DstVTy->getElementType();
+
+    const llvm::fltSemantics &HalfSem = 
Info.Ctx.getFloatTypeSemantics(Info.Ctx.HalfTy);
+
+    int ImmVal = Imm.getZExtValue();
+    bool UseMXCSR = (ImmVal & 4) != 0;
+
+    llvm::RoundingMode RM;
+    if (!UseMXCSR) {
+      switch (ImmVal & 3) {
+      case 0: RM = llvm::RoundingMode::NearestTiesToEven; break;
+      case 1: RM = llvm::RoundingMode::TowardNegative; break;
+      case 2: RM = llvm::RoundingMode::TowardPositive; break;
+      case 3: RM = llvm::RoundingMode::TowardZero; break;
+      default: llvm_unreachable("Invalid immediate rounding mode");
+      }
+    } else {
+      RM = llvm::RoundingMode::NearestTiesToEven;
+    }
+
+    SmallVector<APValue, 8> ResultElements;
+    ResultElements.reserve(DstNumElems);
+
+    for (unsigned I = 0; I < SrcNumElems; ++I) {
+      APFloat SrcVal = SrcVec.getVectorElt(I).getFloat();
+      
+      bool LostInfo;
+      APFloat::opStatus St = SrcVal.convert(HalfSem, RM, &LostInfo);
+
+      if (UseMXCSR && St != APFloat::opOK) {
+        Info.FFDiag(E, diag::note_constexpr_dynamic_rounding);
+        return false;
+      }
+      
+      APSInt DstInt(SrcVal.bitcastToAPInt(),
+                    DstElemTy->isUnsignedIntegerOrEnumerationType());
+      ResultElements.push_back(APValue(DstInt));
+    }
+
+    if (DstNumElems > SrcNumElems) {
+      APSInt Zero = Info.Ctx.MakeIntValue(0, DstElemTy);
+      for (unsigned I = SrcNumElems; I < DstNumElems; ++I) {
+        ResultElements.push_back(APValue(Zero));
+      }
+    }
+
+    return Success(ResultElements, E);
+  }
   }
 }
 
diff --git a/clang/test/CodeGen/X86/f16c-builtins.c 
b/clang/test/CodeGen/X86/f16c-builtins.c
index c08ef76d56981..de35c16c75ab4 100755
--- a/clang/test/CodeGen/X86/f16c-builtins.c
+++ b/clang/test/CodeGen/X86/f16c-builtins.c
@@ -67,3 +67,60 @@ __m128i test_mm256_cvtps_ph(__m256 a) {
   // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0)
   return _mm256_cvtps_ph(a, 0);
 }
+
+// A value exactly halfway between 1.0 and the next representable FP16 number.
+// In binary, its significand ends in ...000, followed by a tie-bit 1.
+#define POS_HALFWAY (1.0f + 0.00048828125f) // 1.0 + 2^-11, a tie-breaking case
+
+//
+// __builtin_ia32_vcvtps2ph (128-bit, 4 floats -> 8 shorts, 4 are zero-padded)
+//
+// Test values: -2.5f, 1.123f, POS_HALFWAY
+TEST_CONSTEXPR(match_v8hi(
+  __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), 
_MM_FROUND_TO_NEAREST_INT),
+  0xC100, 0x3C7E, 0x3C00, 0x0000, 0, 0, 0, 0
+));
+TEST_CONSTEXPR(match_v8hi(
+  __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), 
_MM_FROUND_TO_NEG_INF),
+  0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0
+));
+TEST_CONSTEXPR(match_v8hi(
+  __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), 
_MM_FROUND_TO_POS_INF),
+  0xC100, 0x3C7E, 0x3C01, 0x0000, 0, 0, 0, 0
+));
+TEST_CONSTEXPR(match_v8hi(
+  __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), 
_MM_FROUND_TO_ZERO),
+  0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0
+));
+
+//
+// __builtin_ia32_vcvtps2ph256 (256-bit, 8 floats -> 8 shorts)
+//
+// Test values: -2.5f, 1.123f, POS_HALFWAY
+TEST_CONSTEXPR(match_v8hi(
+  __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, 
-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
+  0xC100, 0x3C7E, 0x3C00, 0x0000, 0xC100, 0x3C7E, 0x3C00, 0x0000
+));
+TEST_CONSTEXPR(match_v8hi(
+  __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, 
-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
+  0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000
+));
+TEST_CONSTEXPR(match_v8hi(
+  __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, 
-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
+  0xC100, 0x3C7E, 0x3C01, 0x0000, 0xC100, 0x3C7E, 0x3C01, 0x0000
+));
+TEST_CONSTEXPR(match_v8hi(
+  __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, 
-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
+  0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000
+));
+
+//
+// Tests for Exact Dynamic Rounding
+//
+// Test that dynamic rounding SUCCEEDS for exactly representable values.
+// We use _MM_FROUND_CUR_DIRECTION (value 4) to specify dynamic rounding.
+// Inputs: -2.5f, 0.125f, -16.0f are all exactly representable in FP16.
+TEST_CONSTEXPR(match_v8hi(
+  __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 0.125f, -16.0f, 0.0f, 
-2.5f, 0.125f, -16.0f, 0.0f), _MM_FROUND_CUR_DIRECTION),
+  0xC100, 0x3000, 0xCC00, 0x0000, 0xC100, 0x3000, 0xCC00, 0x0000
+));
\ No newline at end of file

>From 0fcfb81f244b78a5d6cfea2401c9727ac9d540f8 Mon Sep 17 00:00:00 2001
From: ericxu233 <[email protected]>
Date: Tue, 7 Oct 2025 10:33:02 -0400
Subject: [PATCH 2/6] clang format

---
 clang/include/clang/Basic/BuiltinsX86.td |  4 ++--
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 30 +++++++++++++++---------
 clang/lib/AST/ExprConstant.cpp           | 26 +++++++++++++-------
 3 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index 5dba82a9107ca..04ed4c7d76c7f 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -716,12 +716,12 @@ let Features = "avx2", Attributes = [NoThrow, 
RequiredVectorWidth<128>] in {
   def gatherq_d : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int const *, 
_Vector<2, long long int>, _Vector<4, int>, _Constant char)">;
 }
 
-let Features = "f16c", 
+let Features = "f16c",
     Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def vcvtps2ph : X86Builtin<"_Vector<8, short>(_Vector<4, float>, _Constant 
int)">;
 }
 
-let Features = "f16c", 
+let Features = "f16c",
     Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def vcvtps2ph256 : X86Builtin<"_Vector<8, short>(_Vector<8, float>, 
_Constant int)">;
 }
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 29c37346399ab..985f4c2093239 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3558,11 +3558,20 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState 
&S, CodePtr OpPC,
   llvm::RoundingMode RM;
   if (!UseMXCSR) {
     switch (ImmVal & 3) {
-    case 0: RM = llvm::RoundingMode::NearestTiesToEven; break;
-    case 1: RM = llvm::RoundingMode::TowardNegative; break;
-    case 2: RM = llvm::RoundingMode::TowardPositive; break;
-    case 3: RM = llvm::RoundingMode::TowardZero; break;
-    default: llvm_unreachable("Invalid immediate rounding mode");
+    case 0:
+      RM = llvm::RoundingMode::NearestTiesToEven;
+      break;
+    case 1:
+      RM = llvm::RoundingMode::TowardNegative;
+      break;
+    case 2:
+      RM = llvm::RoundingMode::TowardPositive;
+      break;
+    case 3:
+      RM = llvm::RoundingMode::TowardZero;
+      break;
+    default:
+      llvm_unreachable("Invalid immediate rounding mode");
     }
   } else {
     // For MXCSR, we must check for exactness. We can use any rounding mode
@@ -3582,7 +3591,8 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState 
&S, CodePtr OpPC,
     APFloat::opStatus St = DstVal.convert(HalfSem, RM, &LostInfo);
 
     if (UseMXCSR && St != APFloat::opOK) {
-      S.FFDiag(S.Current->getSource(OpPC), 
diag::note_constexpr_dynamic_rounding);
+      S.FFDiag(S.Current->getSource(OpPC),
+               diag::note_constexpr_dynamic_rounding);
       return false;
     }
 
@@ -3597,12 +3607,10 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState 
&S, CodePtr OpPC,
   // (e.g., vcvtps2ph converting 4 floats to 8 shorts).
   if (DstNumElems > SrcNumElems) {
     for (unsigned I = SrcNumElems; I < DstNumElems; ++I) {
-      INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
-        Dst.elem<T>(I) = T::from(0);
-      });
+      INT_TYPE_SWITCH_NO_BOOL(DstElemT, { Dst.elem<T>(I) = T::from(0); });
     }
   }
- 
+
   Dst.initializeAllElements();
   return true;
 }
@@ -4973,7 +4981,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const 
CallExpr *Call,
   case X86::BI__builtin_ia32_vinsertf128_si256:
   case X86::BI__builtin_ia32_insert128i256:
     return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID);
-  
+
   case clang::X86::BI__builtin_ia32_vcvtps2ph:
   case clang::X86::BI__builtin_ia32_vcvtps2ph256:
     return interp__builtin_ia32_vcvtps2ph(S, OpPC, Call);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index d7d9bf9d22c93..ee5fc0fe36d04 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13889,7 +13889,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
     unsigned DstNumElems = DstVTy->getNumElements();
     QualType DstElemTy = DstVTy->getElementType();
 
-    const llvm::fltSemantics &HalfSem = 
Info.Ctx.getFloatTypeSemantics(Info.Ctx.HalfTy);
+    const llvm::fltSemantics &HalfSem =
+        Info.Ctx.getFloatTypeSemantics(Info.Ctx.HalfTy);
 
     int ImmVal = Imm.getZExtValue();
     bool UseMXCSR = (ImmVal & 4) != 0;
@@ -13897,11 +13898,20 @@ bool VectorExprEvaluator::VisitCallExpr(const 
CallExpr *E) {
     llvm::RoundingMode RM;
     if (!UseMXCSR) {
       switch (ImmVal & 3) {
-      case 0: RM = llvm::RoundingMode::NearestTiesToEven; break;
-      case 1: RM = llvm::RoundingMode::TowardNegative; break;
-      case 2: RM = llvm::RoundingMode::TowardPositive; break;
-      case 3: RM = llvm::RoundingMode::TowardZero; break;
-      default: llvm_unreachable("Invalid immediate rounding mode");
+      case 0:
+        RM = llvm::RoundingMode::NearestTiesToEven;
+        break;
+      case 1:
+        RM = llvm::RoundingMode::TowardNegative;
+        break;
+      case 2:
+        RM = llvm::RoundingMode::TowardPositive;
+        break;
+      case 3:
+        RM = llvm::RoundingMode::TowardZero;
+        break;
+      default:
+        llvm_unreachable("Invalid immediate rounding mode");
       }
     } else {
       RM = llvm::RoundingMode::NearestTiesToEven;
@@ -13912,7 +13922,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
 
     for (unsigned I = 0; I < SrcNumElems; ++I) {
       APFloat SrcVal = SrcVec.getVectorElt(I).getFloat();
-      
+
       bool LostInfo;
       APFloat::opStatus St = SrcVal.convert(HalfSem, RM, &LostInfo);
 
@@ -13920,7 +13930,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
         Info.FFDiag(E, diag::note_constexpr_dynamic_rounding);
         return false;
       }
-      
+
       APSInt DstInt(SrcVal.bitcastToAPInt(),
                     DstElemTy->isUnsignedIntegerOrEnumerationType());
       ResultElements.push_back(APValue(DstInt));

>From ba6029302c2670ef81669ba9f746825a5ec2af5b Mon Sep 17 00:00:00 2001
From: ericxu233 <[email protected]>
Date: Thu, 23 Oct 2025 17:38:02 -0400
Subject: [PATCH 3/6] Finished addressing review comments

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp |  8 ++--
 clang/lib/AST/ExprConstant.cpp           |  2 -
 clang/test/CodeGen/X86/f16c-builtins.c   | 54 ++++++++++++------------
 3 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 985f4c2093239..d7956123c45d3 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3581,9 +3581,8 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState 
&S, CodePtr OpPC,
 
   QualType DstElemQT = Dst.getFieldDesc()->getElemQualType();
   PrimType DstElemT = *S.getContext().classify(DstElemQT);
-  bool DstIsUnsigned = DstElemQT->isUnsignedIntegerOrEnumerationType();
 
-  for (unsigned I = 0; I < SrcNumElems; ++I) {
+  for (unsigned I = 0; I != SrcNumElems; ++I) {
     Floating SrcVal = Src.elem<Floating>(I);
     APFloat DstVal = SrcVal.getAPFloat();
 
@@ -3597,7 +3596,8 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState 
&S, CodePtr OpPC,
     }
 
     INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
-      // FIX: Extract the integer value before calling 'from'.
+      // Convert the destination value's bit pattern to an unsigned integer,
+      // then reconstruct the element using the target type's 'from' method.
       uint64_t RawBits = DstVal.bitcastToAPInt().getZExtValue();
       Dst.elem<T>(I) = T::from(RawBits);
     });
@@ -3606,7 +3606,7 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState 
&S, CodePtr OpPC,
   // Zero out remaining elements if the destination has more elements
   // (e.g., vcvtps2ph converting 4 floats to 8 shorts).
   if (DstNumElems > SrcNumElems) {
-    for (unsigned I = SrcNumElems; I < DstNumElems; ++I) {
+    for (unsigned I = SrcNumElems; I != DstNumElems; ++I) {
       INT_TYPE_SWITCH_NO_BOOL(DstElemT, { Dst.elem<T>(I) = T::from(0); });
     }
   }
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ee5fc0fe36d04..2a18de91cb22c 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13881,8 +13881,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
     if (!EvaluateInteger(E->getArg(1), Imm, Info))
       return false;
 
-    assert(SrcVec.isVector());
-
     const auto *SrcVTy = E->getArg(0)->getType()->castAs<VectorType>();
     unsigned SrcNumElems = SrcVTy->getNumElements();
     const auto *DstVTy = E->getType()->castAs<VectorType>();
diff --git a/clang/test/CodeGen/X86/f16c-builtins.c 
b/clang/test/CodeGen/X86/f16c-builtins.c
index de35c16c75ab4..47ff06b270541 100755
--- a/clang/test/CodeGen/X86/f16c-builtins.c
+++ b/clang/test/CodeGen/X86/f16c-builtins.c
@@ -46,71 +46,65 @@ __m128 test_mm_cvtph_ps(__m128i a) {
   return _mm_cvtph_ps(a);
 }
 
-__m256 test_mm256_cvtph_ps(__m128i a) {
-  // CHECK-LABEL: test_mm256_cvtph_ps
-  // CHECK: fpext <8 x half> %{{.*}} to <8 x float>
-  return _mm256_cvtph_ps(a);
-}
-TEST_CONSTEXPR(match_m256(
-    _mm256_cvtph_ps(_mm_setr_epi16(0x3C00, 0x4000, 0x4200, 0x4400, 0x4500, 
0x3800, 0xC000, 0x0000)), 
-    1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.5f, -2.0f, 0.0f
-));
-
 __m128i test_mm_cvtps_ph(__m128 a) {
   // CHECK-LABEL: test_mm_cvtps_ph
   // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0)
   return _mm_cvtps_ph(a, 0);
 }
 
-__m128i test_mm256_cvtps_ph(__m256 a) {
-  // CHECK-LABEL: test_mm256_cvtps_ph
-  // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0)
-  return _mm256_cvtps_ph(a, 0);
-}
-
 // A value exactly halfway between 1.0 and the next representable FP16 number.
 // In binary, its significand ends in ...000, followed by a tie-bit 1.
 #define POS_HALFWAY (1.0f + 0.00048828125f) // 1.0 + 2^-11, a tie-breaking case
 
 //
-// __builtin_ia32_vcvtps2ph (128-bit, 4 floats -> 8 shorts, 4 are zero-padded)
+// _mm_cvtps_ph (128-bit, 4 floats -> 8 shorts, 4 are zero-padded)
 //
 // Test values: -2.5f, 1.123f, POS_HALFWAY
 TEST_CONSTEXPR(match_v8hi(
-  __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), 
_MM_FROUND_TO_NEAREST_INT),
+  _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), 
_MM_FROUND_TO_NEAREST_INT),
   0xC100, 0x3C7E, 0x3C00, 0x0000, 0, 0, 0, 0
 ));
 TEST_CONSTEXPR(match_v8hi(
-  __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), 
_MM_FROUND_TO_NEG_INF),
+  _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), 
_MM_FROUND_TO_NEG_INF),
   0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0
 ));
 TEST_CONSTEXPR(match_v8hi(
-  __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), 
_MM_FROUND_TO_POS_INF),
+  _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), 
_MM_FROUND_TO_POS_INF),
   0xC100, 0x3C7E, 0x3C01, 0x0000, 0, 0, 0, 0
 ));
 TEST_CONSTEXPR(match_v8hi(
-  __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), 
_MM_FROUND_TO_ZERO),
+  _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), 
_MM_FROUND_TO_ZERO),
   0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0
 ));
 
+__m256 test_mm256_cvtph_ps(__m128i a) {
+  // CHECK-LABEL: test_mm256_cvtph_ps
+  // CHECK: fpext <8 x half> %{{.*}} to <8 x float>
+  return _mm256_cvtph_ps(a);
+}
+TEST_CONSTEXPR(match_m256(
+    _mm256_cvtph_ps(_mm_setr_epi16(0x3C00, 0x4000, 0x4200, 0x4400, 0x4500, 
0x3800, 0xC000, 0x0000)), 
+    1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.5f, -2.0f, 0.0f
+));
+
 //
-// __builtin_ia32_vcvtps2ph256 (256-bit, 8 floats -> 8 shorts)
+// _mm256_cvtps_ph (256-bit, 8 floats -> 8 shorts)
 //
 // Test values: -2.5f, 1.123f, POS_HALFWAY
 TEST_CONSTEXPR(match_v8hi(
-  __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, 
-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
+  _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 
1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
   0xC100, 0x3C7E, 0x3C00, 0x0000, 0xC100, 0x3C7E, 0x3C00, 0x0000
 ));
 TEST_CONSTEXPR(match_v8hi(
-  __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, 
-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
+  _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 
1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
   0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000
 ));
 TEST_CONSTEXPR(match_v8hi(
-  __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, 
-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
+  _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 
1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
   0xC100, 0x3C7E, 0x3C01, 0x0000, 0xC100, 0x3C7E, 0x3C01, 0x0000
 ));
 TEST_CONSTEXPR(match_v8hi(
-  __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, 
-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
+  _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 
1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
   0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000
 ));
 
@@ -123,4 +117,10 @@ TEST_CONSTEXPR(match_v8hi(
 TEST_CONSTEXPR(match_v8hi(
   __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 0.125f, -16.0f, 0.0f, 
-2.5f, 0.125f, -16.0f, 0.0f), _MM_FROUND_CUR_DIRECTION),
   0xC100, 0x3000, 0xCC00, 0x0000, 0xC100, 0x3000, 0xCC00, 0x0000
-));
\ No newline at end of file
+));
+
+__m128i test_mm256_cvtps_ph(__m256 a) {
+  // CHECK-LABEL: test_mm256_cvtps_ph
+  // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0)
+  return _mm256_cvtps_ph(a, 0);
+}
\ No newline at end of file

>From 4df1e697272309cd29246d68deaa4e5a1a71c5b8 Mon Sep 17 00:00:00 2001
From: ericxu233 <[email protected]>
Date: Fri, 7 Nov 2025 16:20:46 -0500
Subject: [PATCH 4/6] Address review comments

---
 clang/test/CodeGen/X86/f16c-builtins.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/clang/test/CodeGen/X86/f16c-builtins.c 
b/clang/test/CodeGen/X86/f16c-builtins.c
index 47ff06b270541..2ae4bc857b431 100755
--- a/clang/test/CodeGen/X86/f16c-builtins.c
+++ b/clang/test/CodeGen/X86/f16c-builtins.c
@@ -46,12 +46,6 @@ __m128 test_mm_cvtph_ps(__m128i a) {
   return _mm_cvtph_ps(a);
 }
 
-__m128i test_mm_cvtps_ph(__m128 a) {
-  // CHECK-LABEL: test_mm_cvtps_ph
-  // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0)
-  return _mm_cvtps_ph(a, 0);
-}
-
 // A value exactly halfway between 1.0 and the next representable FP16 number.
 // In binary, its significand ends in ...000, followed by a tie-bit 1.
 #define POS_HALFWAY (1.0f + 0.00048828125f) // 1.0 + 2^-11, a tie-breaking case
@@ -108,6 +102,12 @@ TEST_CONSTEXPR(match_v8hi(
   0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000
 ));
 
+__m128i test_mm_cvtps_ph(__m128 a) {
+  // CHECK-LABEL: test_mm_cvtps_ph
+  // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0)
+  return _mm_cvtps_ph(a, 0);
+}
+
 //
 // Tests for Exact Dynamic Rounding
 //
@@ -123,4 +123,4 @@ __m128i test_mm256_cvtps_ph(__m256 a) {
   // CHECK-LABEL: test_mm256_cvtps_ph
   // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0)
   return _mm256_cvtps_ph(a, 0);
-}
\ No newline at end of file
+}

>From 21c8d5cd7f8767549116c84ddec79ac64f7d965f Mon Sep 17 00:00:00 2001
From: ericxu233 <[email protected]>
Date: Wed, 26 Nov 2025 23:33:40 -0500
Subject: [PATCH 5/6] Added strict fp mode checking

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 5 ++++-
 clang/lib/AST/ExprConstant.cpp           | 4 +++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index d7956123c45d3..33afe1164edca 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3554,6 +3554,9 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState 
&S, CodePtr OpPC,
   // In that case, we can only evaluate if the conversion is exact.
   int ImmVal = Imm.getZExtValue();
   bool UseMXCSR = (ImmVal & 4) != 0;
+  bool IsFPConstrained =
+      Call->getFPFeaturesInEffect(S.getASTContext().getLangOpts())
+          .isFPConstrained();
 
   llvm::RoundingMode RM;
   if (!UseMXCSR) {
@@ -3589,7 +3592,7 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState 
&S, CodePtr OpPC,
     bool LostInfo;
     APFloat::opStatus St = DstVal.convert(HalfSem, RM, &LostInfo);
 
-    if (UseMXCSR && St != APFloat::opOK) {
+    if (UseMXCSR && IsFPConstrained && St != APFloat::opOK) {
       S.FFDiag(S.Current->getSource(OpPC),
                diag::note_constexpr_dynamic_rounding);
       return false;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 2a18de91cb22c..846024693df31 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13892,6 +13892,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
 
     int ImmVal = Imm.getZExtValue();
     bool UseMXCSR = (ImmVal & 4) != 0;
+    bool IsFPConstrained =
+      E->getFPFeaturesInEffect(Info.Ctx.getLangOpts()).isFPConstrained();
 
     llvm::RoundingMode RM;
     if (!UseMXCSR) {
@@ -13924,7 +13926,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
       bool LostInfo;
       APFloat::opStatus St = SrcVal.convert(HalfSem, RM, &LostInfo);
 
-      if (UseMXCSR && St != APFloat::opOK) {
+      if (UseMXCSR && IsFPConstrained && St != APFloat::opOK) {
         Info.FFDiag(E, diag::note_constexpr_dynamic_rounding);
         return false;
       }

>From 53de5ae845a0c8e921c4d49a93ad6fb8e9706986 Mon Sep 17 00:00:00 2001
From: ericxu233 <[email protected]>
Date: Thu, 27 Nov 2025 00:46:16 -0500
Subject: [PATCH 6/6] git clang format

---
 clang/lib/AST/ExprConstant.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 846024693df31..3b7d50f5db6a5 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13893,7 +13893,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
     int ImmVal = Imm.getZExtValue();
     bool UseMXCSR = (ImmVal & 4) != 0;
     bool IsFPConstrained =
-      E->getFPFeaturesInEffect(Info.Ctx.getLangOpts()).isFPConstrained();
+        E->getFPFeaturesInEffect(Info.Ctx.getLangOpts()).isFPConstrained();
 
     llvm::RoundingMode RM;
     if (!UseMXCSR) {

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to