[clang] [X86][Clang] Support constexpr evaluation of cvtpd2ps intrinsics (PR #169980)

Hamza Hassanain via cfe-commits Sat, 29 Nov 2025 01:26:01 -0800

https://github.com/HamzaHassanain created 
https://github.com/llvm/llvm-project/pull/169980


This patch implements constant evaluation support for the following X86 
intrinsics:
- _mm_cvtpd_ps, _mm256_cvtpd_ps (Packed Double to Float)
- _mm_cvtsd_ss (Scalar Double to Float merge)
- Masked variants of the above

It implements the strict "Exact and Finite" rule: conversions that are
inexact, infinite, or NaN are rejected in constexpr contexts.

Fixes #169370

>From 29e2794651c50ccf60a28c2e08639913a68cd71c Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <[email protected]>
Date: Wed, 26 Nov 2025 17:05:45 +0200
Subject: [PATCH 1/6] add tests that should pass:
 clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp

---
 .../constexpr-x86-intrinsics-pd2ps.cpp        | 120 ++++++++++++++++++
 1 file changed, 120 insertions(+)
 create mode 100644 clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp

diff --git a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp 
b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
new file mode 100644
index 0000000000000..a082b23bfae03
--- /dev/null
+++ b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
@@ -0,0 +1,120 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature 
+avx -target-feature +avx512f -target-feature +avx512vl -verify %s
+
+// HACK: Prevent immintrin.h from pulling in standard library headers
+// that don't exist in this test environment.
+#define __MM_MALLOC_H
+
+#include <immintrin.h>
+
+namespace ExactFinite {
+constexpr __m128d d2 = { -1.0, +2.0 };
+constexpr __m128 r128 = _mm_cvtpd_ps(d2);
+static_assert(r128[0] == -1.0f && r128[1] == +2.0f, "");
+static_assert(r128[2] == 0.0f && r128[3] == 0.0f, "");
+
+constexpr __m128 src128 = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128 m128_full = _mm_mask_cvtpd_ps(src128, 0x3, d2);
+static_assert(m128_full[0] == -1.0f && m128_full[1] == +2.0f, "");
+static_assert(m128_full[2] == 9.0f && m128_full[3] == 9.0f, "");
+
+constexpr __m128 m128_partial = _mm_mask_cvtpd_ps(src128, 0x1, d2);
+static_assert(m128_partial[0] == -1.0f && m128_partial[1] == 9.0f, "");
+
+constexpr __m128 m128_zero = _mm_maskz_cvtpd_ps(0x1, d2);
+static_assert(m128_zero[0] == -1.0f && m128_zero[1] == 0.0f, "");
+static_assert(m128_zero[2] == 0.0f && m128_zero[3] == 0.0f, "");
+
+constexpr __m256d d4 = { 0.0, -1.0, +2.0, +3.5 };
+constexpr __m128 r256 = _mm256_cvtpd_ps(d4);
+static_assert(r256[0] == 0.0f && r256[1] == -1.0f, "");
+static_assert(r256[2] == +2.0f && r256[3] == +3.5f, "");
+
+constexpr __m512d d8 = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r512 = _mm512_cvtpd_ps(d8);
+static_assert(r512[0] == -1.0f && r512[7] == +128.0f, "");
+
+constexpr __m256 src256 = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m256 r512_mask = _mm512_mask_cvtpd_ps(src256, 0x05, d8);
+static_assert(r512_mask[0] == -1.0f && r512_mask[2] == +4.0f, "");
+static_assert(r512_mask[1] == 9.0f && r512_mask[3] == 9.0f, "");
+
+constexpr __m256 r512_maskz = _mm512_maskz_cvtpd_ps(0x81, d8);
+static_assert(r512_maskz[0] == -1.0f && r512_maskz[7] == +128.0f, "");
+static_assert(r512_maskz[1] == 0.0f && r512_maskz[6] == 0.0f, "");
+
+constexpr __m512 r512lo = _mm512_cvtpd_pslo(d8);
+static_assert(r512lo[0] == -1.0f && r512lo[7] == +128.0f, "");
+static_assert(r512lo[8] == 0.0f && r512lo[15] == 0.0f, "");
+
+constexpr __m512 ws = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512 r512lo_mask = _mm512_mask_cvtpd_pslo(ws, 0x3, d8);
+static_assert(r512lo_mask[0] == -1.0f, "");
+static_assert(r512lo_mask[1] == +2.0f, "");
+static_assert(r512lo_mask[2] == 9.0f && r512lo_mask[3] == 9.0f, "");
+
+constexpr __m128 src_ss = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128d b_ss = { -1.0, 42.0 };
+constexpr __m128 r_ss = _mm_cvtsd_ss(src_ss, b_ss);
+static_assert(r_ss[0] == -1.0f, "");
+static_assert(r_ss[1] == 5.0f && r_ss[3] == 7.0f, "");
+
+constexpr __m128 r_ss_mask_on = _mm_mask_cvtsd_ss(src_ss, 0x1, src_ss, b_ss);
+static_assert(r_ss_mask_on[0] == -1.0f && r_ss_mask_on[1] == 5.0f, "");
+constexpr __m128 r_ss_mask_off = _mm_mask_cvtsd_ss(src_ss, 0x0, src_ss, b_ss);
+static_assert(r_ss_mask_off[0] == 9.0f, "");
+constexpr __m128 r_ss_maskz_off = _mm_maskz_cvtsd_ss(0x0, src_ss, b_ss);
+static_assert(r_ss_maskz_off[0] == 0.0f && r_ss_maskz_off[1] == 0.0f, "");
+}
+
+namespace InexactOrSpecialReject {
+constexpr __m128d inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r_inexact = _mm_cvtpd_ps(inexact); // both-error {{not an 
integral constant expression}}
+static_assert(r_inexact[0] == 1.0f, "");           // both-note 
{{subexpression not valid in a constant expression}}
+
+constexpr __m128d dinf = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r_inf = _mm_cvtpd_ps(dinf); // both-error {{not an integral 
constant expression}}
+static_assert(r_inf[0] == __builtin_inff(), ""); // both-note {{subexpression 
not valid in a constant expression}}
+
+constexpr __m128d dnan = { __builtin_nan(""), 0.0 };
+constexpr __m128 r_nan = _mm_cvtpd_ps(dnan); // both-error {{not an integral 
constant expression}}
+static_assert(r_nan[0] != r_nan[0], "");  // both-note {{subexpression not 
valid in a constant expression}}
+
+constexpr __m128d dsub = { 1e-310, 0.0 };
+constexpr __m128 r_sub = _mm_cvtpd_ps(dsub); // both-error {{not an integral 
constant expression}}
+static_assert(r_sub[0] == 0.0f, ""); // both-note {{subexpression not valid in 
a constant expression}}
+
+constexpr __m128 src_ss2 = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d inexact_sd = { 1.0000000000000002, 0.0 };
+constexpr __m128 r_ss_inexact = _mm_cvtsd_ss(src_ss2, inexact_sd); // 
both-error {{not an integral constant expression}}
+static_assert(r_ss_inexact[0] == 1.0f, ""); // both-note {{subexpression not 
valid in a constant expression}}
+}
+
+namespace MaskedSpecialCasesAllowed {
+constexpr __m128 src128a = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d d2_inexact = { -1.0, 1.0000000000000002 };
+constexpr __m128 ok128 = _mm_mask_cvtpd_ps(src128a, 0x1, d2_inexact);
+static_assert(ok128[0] == -1.0f && ok128[1] == 9.0f, "");
+
+constexpr __m128 ok128z = _mm_maskz_cvtpd_ps(0x1, d2_inexact);
+static_assert(ok128z[0] == -1.0f && ok128z[1] == 0.0f, "");
+
+constexpr __m256d d4_inexact = { 0.0, 1.0000000000000002, 2.0, 3.0 };
+constexpr __m128 src_m = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128 ok256m = _mm256_mask_cvtpd_ps(src_m, 0b0101, d4_inexact);
+static_assert(ok256m[0] == 0.0f && ok256m[1] == 9.0f && ok256m[2] == 2.0f && 
ok256m[3] == 9.0f, "");
+
+constexpr __m128 ok256z = _mm256_maskz_cvtpd_ps(0b0101, d4_inexact);
+static_assert(ok256z[0] == 0.0f && ok256z[1] == 0.0f && ok256z[2] == 2.0f && 
ok256z[3] == 0.0f, "");
+
+constexpr __m512d d8_inexact = { -1.0, 2.0, 4.0, 8.0, 16.0, 
1.0000000000000002, 64.0, 128.0 };
+constexpr __m256 src256b = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m256 ok512m = _mm512_mask_cvtpd_ps(src256b, 0b110111, d8_inexact);
+static_assert(ok512m[0] == -1.0f && ok512m[5] == 9.0f && ok512m[7] == 128.0f, 
"");
+
+constexpr __m256 ok512z = _mm512_maskz_cvtpd_ps(0b110111, d8_inexact);
+static_assert(ok512z[5] == 0.0f && ok512z[0] == -1.0f && ok512z[7] == 128.0f, 
"");
+
+constexpr __m128 bad128 = _mm_mask_cvtpd_ps(src128a, 0x2, d2_inexact); // 
both-error {{not an integral constant expression}}
+static_assert(bad128[1] == 9.0f, ""); // both-note {{subexpression not valid 
in a constant expression}}
+}

>From 30c0dc75714191e31625bb074e6e62d54aeece7f Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <[email protected]>
Date: Wed, 26 Nov 2025 22:20:48 +0200
Subject: [PATCH 2/6] added __DEFAULT_FN_ATTRS_CONSTEXPR To Headers

---
 clang/lib/Headers/avx512fintrin.h  | 16 ++++++++--------
 clang/lib/Headers/avx512vlintrin.h |  8 ++++----
 clang/lib/Headers/avxintrin.h      |  4 ++--
 clang/lib/Headers/emmintrin.h      |  4 ++--
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/clang/lib/Headers/avx512fintrin.h 
b/clang/lib/Headers/avx512fintrin.h
index e1de56069870b..b9f1d1eecc09f 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -207,7 +207,7 @@ _mm512_undefined(void)
   return (__m512)__builtin_ia32_undef512();
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_undefined_ps(void)
 {
   return (__m512)__builtin_ia32_undef512();
@@ -3489,7 +3489,7 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, 
__m512i __A) {
                                            (__v8sf)_mm256_setzero_ps(), \
                                            (__mmask8)(U), (int)(R)))
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS512
+static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_cvtpd_ps (__m512d __A)
 {
   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
@@ -3498,7 +3498,7 @@ _mm512_cvtpd_ps (__m512d __A)
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS512
+static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
 {
   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
@@ -3507,7 +3507,7 @@ _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d 
__A)
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS512
+static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
 {
   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
@@ -3516,7 +3516,7 @@ _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_cvtpd_pslo (__m512d __A)
 {
   return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
@@ -3524,7 +3524,7 @@ _mm512_cvtpd_pslo (__m512d __A)
                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
 {
   return (__m512) __builtin_shufflevector (
@@ -8672,7 +8672,7 @@ _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 
__U, __m512i __A)
                                               (__v4sf)_mm_setzero_ps(), \
                                               (__mmask8)(U), (int)(R)))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
 {
   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
@@ -8681,7 +8681,7 @@ _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, 
__m128d __B)
                                              (__mmask8)__U, 
_MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
 {
   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
diff --git a/clang/lib/Headers/avx512vlintrin.h 
b/clang/lib/Headers/avx512vlintrin.h
index 99c057030a4cc..82a06edd28ba2 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -1791,14 +1791,14 @@ _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
   return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
             (__v4sf) __W,
             (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
   return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
             (__v4sf)
@@ -1806,14 +1806,14 @@ _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
             (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS256
+static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm256_cvtpd_ps(__A),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS256
+static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm256_cvtpd_ps(__A),
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 44ef88db5cbce..f3f444083edbf 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -2190,7 +2190,7 @@ _mm256_cvtepi32_ps(__m256i __a) {
 /// \param __a
 ///    A 256-bit vector of [4 x double].
 /// \returns A 128-bit vector of [4 x float] containing the converted values.
-static __inline __m128 __DEFAULT_FN_ATTRS
+static __inline __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_cvtpd_ps(__m256d __a)
 {
   return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
@@ -3610,7 +3610,7 @@ _mm256_undefined_pd(void)
 /// This intrinsic has no corresponding instruction.
 ///
 /// \returns A 256-bit vector of [8 x float] containing undefined values.
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_undefined_ps(void)
 {
   return (__m256)__builtin_ia32_undef256();
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index dbe5ca0379cf5..1701effedc5ce 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -1279,7 +1279,7 @@ static __inline__ int __DEFAULT_FN_ATTRS 
_mm_ucomineq_sd(__m128d __a,
 ///    A 128-bit vector of [2 x double].
 /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
 ///    converted values. The upper 64 bits are set to zero.
-static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtpd_ps(__m128d 
__a) {
   return __builtin_ia32_cvtpd2ps((__v2df)__a);
 }
 
@@ -1384,7 +1384,7 @@ static __inline__ int __DEFAULT_FN_ATTRS 
_mm_cvtsd_si32(__m128d __a) {
 /// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the
 ///    converted value from the second parameter. The upper 96 bits are copied
 ///    from the upper 96 bits of the first parameter.
-static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a,
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtsd_ss(__m128 __a,
                                                          __m128d __b) {
   return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);
 }

>From 9f1020ecf3a706df9537b38464b61748aa0278f0 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <[email protected]>
Date: Wed, 26 Nov 2025 22:24:54 +0200
Subject: [PATCH 3/6] added Constexpr to necessary builtins

---
 clang/include/clang/Basic/BuiltinsX86.td | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index 4aa3d51931980..283a0a3e6ae0c 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -24,12 +24,12 @@ def undef128 : X86Builtin<"_Vector<2, double>()"> {
   let Attributes = [Const, NoThrow, RequiredVectorWidth<128>];
 }
 
-def undef256 : X86Builtin<"_Vector<4, double>()"> {
-  let Attributes = [Const, NoThrow, RequiredVectorWidth<256>];
+def undef256 : X86Builtin<"_Vector<4, double>()"  > {
+  let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<256>];
 }
 
 def undef512 : X86Builtin<"_Vector<8, double>()"> {
-  let Attributes = [Const, NoThrow, RequiredVectorWidth<512>];
+  let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<512>];
 }
 
 // FLAGS
@@ -168,7 +168,7 @@ let Features = "sse2", Attributes = [NoThrow] in {
   def movnti : X86Builtin<"void(int *, int)">;
 }
 
-let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] 
in {
+let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
   def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, 
_Vector<16, char>)">;
   def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
   def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
@@ -468,7 +468,7 @@ let Features = "avx", Attributes = [NoThrow, Const, 
Constexpr, RequiredVectorWid
   def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, 
_Vector<8, int>)">;
 }
 
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] 
in {
+let Features = "avx", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
   def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, 
float>, _Constant char)">;
   def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, 
double>, _Constant char)">;
   def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, 
float>, _Constant char)">;
@@ -1009,7 +1009,7 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128
   def cmppd128_mask : X86Builtin<"unsigned char(_Vector<2, double>, _Vector<2, 
double>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
   def rndscaleps_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, 
_Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
   def rndscalepd_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, 
_Constant int, _Vector<8, double>, unsigned char, _Constant int)">;
   def cvtps2dq512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, float>, 
_Vector<16, int>, unsigned short, _Constant int)">;
@@ -1457,7 +1457,7 @@ let Features = "avx512vl", Attributes = [NoThrow, 
RequiredVectorWidth<256>] in {
   def compressstoresi256_mask : X86Builtin<"void(_Vector<8, int *>, _Vector<8, 
int>, unsigned char)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
   def cvtpd2dq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, 
_Vector<4, int>, unsigned char)">;
   def cvtpd2ps_mask : X86Builtin<"_Vector<4, float>(_Vector<2, double>, 
_Vector<4, float>, unsigned char)">;
   def cvtpd2udq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, 
_Vector<4, int>, unsigned char)">;
@@ -3301,7 +3301,7 @@ let Features = "avx512bw,avx512vl",
   def cvtw2mask256 : X86Builtin<"unsigned short(_Vector<16, short>)">;
 }
 
-let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
   def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, 
_Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">;
   def cvtsi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, int, 
_Constant int)">;
   def cvtss2sd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, 
_Vector<4, float>, _Vector<2, double>, unsigned char, _Constant int)">;

>From d28d6d8c7cc6e816f772a78dd0d177f0248d3178 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <[email protected]>
Date: Sat, 29 Nov 2025 11:08:42 +0200
Subject: [PATCH 4/6] added FULL tests for pd2ps constexpr

---
 .../constexpr-x86-intrinsics-pd2ps.cpp        | 559 ++++++++++++++----
 1 file changed, 459 insertions(+), 100 deletions(-)

diff --git a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp 
b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
index a082b23bfae03..4a1e9a9c5ae2c 100644
--- a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
+++ b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
@@ -1,120 +1,479 @@
 // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature 
+avx -target-feature +avx512f -target-feature +avx512vl -verify %s
 
-// HACK: Prevent immintrin.h from pulling in standard library headers
-// that don't exist in this test environment.
-#define __MM_MALLOC_H
-
+#define __MM_MALLOC_H 
 #include <immintrin.h>
 
-namespace ExactFinite {
-constexpr __m128d d2 = { -1.0, +2.0 };
-constexpr __m128 r128 = _mm_cvtpd_ps(d2);
-static_assert(r128[0] == -1.0f && r128[1] == +2.0f, "");
-static_assert(r128[2] == 0.0f && r128[3] == 0.0f, "");
-
-constexpr __m128 src128 = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128 m128_full = _mm_mask_cvtpd_ps(src128, 0x3, d2);
-static_assert(m128_full[0] == -1.0f && m128_full[1] == +2.0f, "");
-static_assert(m128_full[2] == 9.0f && m128_full[3] == 9.0f, "");
-
-constexpr __m128 m128_partial = _mm_mask_cvtpd_ps(src128, 0x1, d2);
-static_assert(m128_partial[0] == -1.0f && m128_partial[1] == 9.0f, "");
-
-constexpr __m128 m128_zero = _mm_maskz_cvtpd_ps(0x1, d2);
-static_assert(m128_zero[0] == -1.0f && m128_zero[1] == 0.0f, "");
-static_assert(m128_zero[2] == 0.0f && m128_zero[3] == 0.0f, "");
-
-constexpr __m256d d4 = { 0.0, -1.0, +2.0, +3.5 };
-constexpr __m128 r256 = _mm256_cvtpd_ps(d4);
-static_assert(r256[0] == 0.0f && r256[1] == -1.0f, "");
-static_assert(r256[2] == +2.0f && r256[3] == +3.5f, "");
-
-constexpr __m512d d8 = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m256 r512 = _mm512_cvtpd_ps(d8);
-static_assert(r512[0] == -1.0f && r512[7] == +128.0f, "");
-
-constexpr __m256 src256 = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m256 r512_mask = _mm512_mask_cvtpd_ps(src256, 0x05, d8);
-static_assert(r512_mask[0] == -1.0f && r512_mask[2] == +4.0f, "");
-static_assert(r512_mask[1] == 9.0f && r512_mask[3] == 9.0f, "");
-
-constexpr __m256 r512_maskz = _mm512_maskz_cvtpd_ps(0x81, d8);
-static_assert(r512_maskz[0] == -1.0f && r512_maskz[7] == +128.0f, "");
-static_assert(r512_maskz[1] == 0.0f && r512_maskz[6] == 0.0f, "");
-
-constexpr __m512 r512lo = _mm512_cvtpd_pslo(d8);
-static_assert(r512lo[0] == -1.0f && r512lo[7] == +128.0f, "");
-static_assert(r512lo[8] == 0.0f && r512lo[15] == 0.0f, "");
-
-constexpr __m512 ws = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
-                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512 r512lo_mask = _mm512_mask_cvtpd_pslo(ws, 0x3, d8);
-static_assert(r512lo_mask[0] == -1.0f, "");
-static_assert(r512lo_mask[1] == +2.0f, "");
-static_assert(r512lo_mask[2] == 9.0f && r512lo_mask[3] == 9.0f, "");
-
-constexpr __m128 src_ss = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128d b_ss = { -1.0, 42.0 };
-constexpr __m128 r_ss = _mm_cvtsd_ss(src_ss, b_ss);
-static_assert(r_ss[0] == -1.0f, "");
-static_assert(r_ss[1] == 5.0f && r_ss[3] == 7.0f, "");
+namespace Test_mm_cvtsd_ss {
+namespace OK {
+constexpr __m128 a = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+static_assert(r[0] == -1.0f && r[1] == 5.0f && r[2] == 6.0f && r[3] == 7.0f, 
"");
+}
+namespace Inexact {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{compile time floating point arithmetic 
suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 
2.000000e+00, 3.000000e+00}, {1.000000e+00, 0.000000e+00})'}}
+}
+namespace Inf {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces an 
infinity}}
+// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 
2.000000e+00, 3.000000e+00}, {INF, 0.000000e+00})'}}
+}
+namespace NaN {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 
2.000000e+00, 3.000000e+00}, {nan, 0.000000e+00})'}}
+}
+namespace Subnormal {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { 1e-310, 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{compile time floating point arithmetic 
suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 
2.000000e+00, 3.000000e+00}, {1.000000e-310, 0.000000e+00})'}}
+}
+}
 
-constexpr __m128 r_ss_mask_on = _mm_mask_cvtsd_ss(src_ss, 0x1, src_ss, b_ss);
-static_assert(r_ss_mask_on[0] == -1.0f && r_ss_mask_on[1] == 5.0f, "");
-constexpr __m128 r_ss_mask_off = _mm_mask_cvtsd_ss(src_ss, 0x0, src_ss, b_ss);
-static_assert(r_ss_mask_off[0] == 9.0f, "");
-constexpr __m128 r_ss_maskz_off = _mm_maskz_cvtsd_ss(0x0, src_ss, b_ss);
-static_assert(r_ss_maskz_off[0] == 0.0f && r_ss_maskz_off[1] == 0.0f, "");
+namespace Test_mm_mask_cvtsd_ss {
+namespace OK {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b);
+static_assert(r[0] == -1.0f && r[1] == 2.0f && r[2] == 3.0f && r[3] == 4.0f, 
"");
+}
+namespace MaskOff {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b);
+static_assert(r[0] == 9.0f && r[1] == 2.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b_inexact);
+static_assert(r[0] == 9.0f, "");
+}
+namespace MaskOnInexact {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inexact);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{compile time floating point arithmetic 
suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 
5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 
3.000000e+00, 4.000000e+00}, {1.000000e+00, 0.000000e+00})'}}
+}
+namespace MaskOnInf {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces an 
infinity}}
+// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 
5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 
3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_nan = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 
5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 
3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}}
+}
+namespace MaskOnSubnormal {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_sub = { 1e-310, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_sub);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{compile time floating point arithmetic 
suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 
5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 
3.000000e+00, 4.000000e+00}, {1.000000e-310, 0.000000e+00})'}}
+}
 }
 
-namespace InexactOrSpecialReject {
-constexpr __m128d inexact = { 1.0000000000000002, 0.0 };
-constexpr __m128 r_inexact = _mm_cvtpd_ps(inexact); // both-error {{not an 
integral constant expression}}
-static_assert(r_inexact[0] == 1.0f, "");           // both-note 
{{subexpression not valid in a constant expression}}
+namespace Test_mm_maskz_cvtsd_ss {
+namespace OK {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b);
+static_assert(r[0] == -1.0f && r[1] == 2.0f, "");
+}
+namespace MaskOff {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x0, a, b);
+static_assert(r[0] == 0.0f && r[1] == 2.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x0, a, b_inexact);
+static_assert(r[0] == 0.0f, "");
+}
+namespace MaskOnInf {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces an 
infinity}}
+// expected-note@-3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 
2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_nan = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 
2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}}
+}
+}
 
-constexpr __m128d dinf = { __builtin_huge_val(), 0.0 };
-constexpr __m128 r_inf = _mm_cvtpd_ps(dinf); // both-error {{not an integral 
constant expression}}
-static_assert(r_inf[0] == __builtin_inff(), ""); // both-note {{subexpression 
not valid in a constant expression}}
+namespace Test_mm_cvtpd_ps {
+namespace OK {
+constexpr __m128d a = { -1.0, +2.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+static_assert(r[0] == -1.0f && r[1] == +2.0f, "");
+static_assert(r[2] == 0.0f && r[3] == 0.0f, "");
+}
+namespace Inexact {
+constexpr __m128d a = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{compile time floating point arithmetic 
suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_cvtpd_ps({1.000000e+00, 0.000000e+00})'}}
+}
+namespace Inf {
+constexpr __m128d a = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces an 
infinity}}
+// expected-note@-3 {{in call to '_mm_cvtpd_ps({INF, 0.000000e+00})'}}
+}
+namespace NaN {
+constexpr __m128d a = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm_cvtpd_ps({nan, 0.000000e+00})'}}
+}
+namespace Subnormal {
+constexpr __m128d a = { 1e-310, 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{compile time floating point arithmetic 
suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_cvtpd_ps({1.000000e-310, 
0.000000e+00})'}}
+}
+}
 
-constexpr __m128d dnan = { __builtin_nan(""), 0.0 };
-constexpr __m128 r_nan = _mm_cvtpd_ps(dnan); // both-error {{not an integral 
constant expression}}
-static_assert(r_nan[0] != r_nan[0], "");  // both-note {{subexpression not 
valid in a constant expression}}
+namespace Test_mm_mask_cvtpd_ps {
+namespace OK {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a = { -1.0, +2.0 };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x3, a);
+static_assert(r[0] == -1.0f && r[1] == +2.0f, "");
+static_assert(r[2] == 9.0f && r[3] == 9.0f, "");
+}
+namespace Partial {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a = { -1.0, +2.0 };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x1, a);
+static_assert(r[0] == -1.0f && r[1] == 9.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x1, a_inexact);
+static_assert(r[0] == -1.0f && r[1] == 9.0f, "");
+}
+namespace MaskOnInexact {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inexact);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{compile time floating point arithmetic 
suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 
9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, 1.000000e+00})'}}
+}
+namespace MaskOnInf {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_inf = { -1.0, __builtin_huge_val() };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces an 
infinity}}
+// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 
9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, INF})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_nan = { -1.0, __builtin_nan("") };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces a 
NaN}}
+// expected-note@-3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 
9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, nan})'}}
+}
+}
 
-constexpr __m128d dsub = { 1e-310, 0.0 };
-constexpr __m128 r_sub = _mm_cvtpd_ps(dsub); // both-error {{not an integral 
constant expression}}
-static_assert(r_sub[0] == 0.0f, ""); // both-note {{subexpression not valid in 
a constant expression}}
+namespace Test_mm_maskz_cvtpd_ps {
+namespace OK {
+constexpr __m128d a = { -1.0, +2.0 };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x1, a);
+static_assert(r[0] == -1.0f && r[1] == 0.0f, "");
+static_assert(r[2] == 0.0f && r[3] == 0.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x1, a_inexact);
+static_assert(r[0] == -1.0f && r[1] == 0.0f, "");
+}
+namespace MaskOnInf {
+constexpr __m128d a_inf = { -1.0, __builtin_huge_val() };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces an 
infinity}}
+// expected-note@-3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, 
INF})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128d a_nan = { -1.0, __builtin_nan("") };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces a 
NaN}}
+// expected-note@-3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, 
nan})'}}
+}
+}
 
-constexpr __m128 src_ss2 = { 0.0f, 1.0f, 2.0f, 3.0f };
-constexpr __m128d inexact_sd = { 1.0000000000000002, 0.0 };
-constexpr __m128 r_ss_inexact = _mm_cvtsd_ss(src_ss2, inexact_sd); // 
both-error {{not an integral constant expression}}
-static_assert(r_ss_inexact[0] == 1.0f, ""); // both-note {{subexpression not 
valid in a constant expression}}
+namespace Test_mm256_cvtpd_ps {
+namespace OK {
+constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
+constexpr __m128 r = _mm256_cvtpd_ps(a);
+static_assert(r[0] == 0.0f && r[1] == -1.0f, "");
+static_assert(r[2] == +2.0f && r[3] == +3.5f, "");
+}
+namespace Inexact {
+constexpr __m256d a = { 1.0000000000000002, 0.0, 0.0, 0.0 };
+constexpr __m128 r = _mm256_cvtpd_ps(a);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{compile time floating point arithmetic 
suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm256_cvtpd_ps({1.000000e+00, 0.000000e+00, 
0.000000e+00, 0.000000e+00})'}}
+}
 }
 
-namespace MaskedSpecialCasesAllowed {
-constexpr __m128 src128a = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128d d2_inexact = { -1.0, 1.0000000000000002 };
-constexpr __m128 ok128 = _mm_mask_cvtpd_ps(src128a, 0x1, d2_inexact);
-static_assert(ok128[0] == -1.0f && ok128[1] == 9.0f, "");
+namespace Test_mm256_mask_cvtpd_ps {
+namespace OK {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
+constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0xF, a);
+static_assert(r[0] == 0.0f && r[1] == -1.0f && r[2] == +2.0f && r[3] == +3.5f, 
"");
+}
+namespace MaskOffInf {
+// Note: 256-bit masked operations use selectps, which evaluates ALL lanes 
before masking
+// So even masked-off Inf/NaN values cause errors (architectural limitation)
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 };
+constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x3, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces an 
infinity}}
+// [email protected]:* {{in call to 
'_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+// expected-note@-4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 
9.000000e+00, 9.000000e+00, 9.000000e+00}, 3, {-1.000000e+00, 2.000000e+00, 
INF, 8.000000e+00})'}}
+}
+namespace MaskOffNaN {
+// Note: 256-bit masked operations use selectps, which evaluates ALL lanes 
before masking
+// So even masked-off Inf/NaN values cause errors (architectural limitation)
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") };
+constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x7, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces a NaN}}
+// [email protected]:* {{in call to 
'_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+// expected-note@-4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 
9.000000e+00, 9.000000e+00, 9.000000e+00}, 7, {-1.000000e+00, 2.000000e+00, 
4.000000e+00, nan})'}}
+}
+}
 
-constexpr __m128 ok128z = _mm_maskz_cvtpd_ps(0x1, d2_inexact);
-static_assert(ok128z[0] == -1.0f && ok128z[1] == 0.0f, "");
+namespace Test_mm256_maskz_cvtpd_ps {
+namespace OK {
+constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
+constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x5, a);
+static_assert(r[0] == 0.0f && r[1] == 0.0f && r[2] == +2.0f && r[3] == 0.0f, 
"");
+}
+namespace MaskOffInf {
+// Note: 256-bit masked operations use selectps, which evaluates ALL lanes 
before masking
+// So even masked-off Inf/NaN values cause errors (architectural limitation)
+constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 };
+constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x3, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces an 
infinity}}
+// [email protected]:* {{in call to 
'_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+// expected-note@-4 {{in call to '_mm256_maskz_cvtpd_ps(3, {-1.000000e+00, 
2.000000e+00, INF, 8.000000e+00})'}}
+}
+namespace MaskOffNaN {
+// Note: 256-bit masked operations use selectps, which evaluates ALL lanes 
before masking
+// So even masked-off Inf/NaN values cause errors (architectural limitation)
+constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") };
+constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x7, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces a NaN}}
+// [email protected]:* {{in call to 
'_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+// expected-note@-4 {{in call to '_mm256_maskz_cvtpd_ps(7, {-1.000000e+00, 
2.000000e+00, 4.000000e+00, nan})'}}
+}
+}
 
-constexpr __m256d d4_inexact = { 0.0, 1.0000000000000002, 2.0, 3.0 };
-constexpr __m128 src_m = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128 ok256m = _mm256_mask_cvtpd_ps(src_m, 0b0101, d4_inexact);
-static_assert(ok256m[0] == 0.0f && ok256m[1] == 9.0f && ok256m[2] == 2.0f && 
ok256m[3] == 9.0f, "");
+namespace Test_mm512_cvtpd_ps {
+namespace OK {
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_cvtpd_ps(a);
+static_assert(r[0] == -1.0f && r[7] == +128.0f, "");
+}
+namespace Inexact {
+constexpr __m512d a = { 1.0000000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 
};
+constexpr __m256 r = _mm512_cvtpd_ps(a);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{compile time floating point arithmetic 
suppressed in strict evaluation modes}}
+// expected-note@-3 {{in call to '_mm512_cvtpd_ps({1.000000e+00, 0.000000e+00, 
0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 
0.000000e+00})'}}
+}
+}
 
-constexpr __m128 ok256z = _mm256_maskz_cvtpd_ps(0b0101, d4_inexact);
-static_assert(ok256z[0] == 0.0f && ok256z[1] == 0.0f && ok256z[2] == 2.0f && 
ok256z[3] == 0.0f, "");
+namespace Test_mm512_mask_cvtpd_ps {
+namespace OK {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x05, a);
+static_assert(r[0] == -1.0f && r[2] == +4.0f, "");
+static_assert(r[1] == 9.0f && r[3] == 9.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 
1.0000000000000002, +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0b11011111, a_inexact);
+static_assert(r[0] == -1.0f && r[5] == 9.0f && r[6] == 64.0f && r[7] == 
128.0f, "");
+}
+namespace MaskOffInf {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, 
__builtin_huge_val(), +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x1F, a_inf);
+static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 9.0f, "");
+}
+namespace MaskOffNaN {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), 
+64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x1F, a_nan);
+static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 9.0f, "");
+}
+namespace MaskOnInf {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, 
+32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces an 
infinity}}
+// expected-note@-3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 
9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 
9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, 
INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+namespace MaskOnNaN {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, 
+64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 
9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 
9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, 
nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+}
 
-constexpr __m512d d8_inexact = { -1.0, 2.0, 4.0, 8.0, 16.0, 
1.0000000000000002, 64.0, 128.0 };
-constexpr __m256 src256b = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m256 ok512m = _mm512_mask_cvtpd_ps(src256b, 0b110111, d8_inexact);
-static_assert(ok512m[0] == -1.0f && ok512m[5] == 9.0f && ok512m[7] == 128.0f, 
"");
+namespace Test_mm512_maskz_cvtpd_ps {
+namespace OK {
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x81, a);
+static_assert(r[0] == -1.0f && r[7] == +128.0f, "");
+static_assert(r[1] == 0.0f && r[6] == 0.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 
1.0000000000000002, +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0b11011111, a_inexact);
+static_assert(r[0] == -1.0f && r[5] == 0.0f && r[6] == 64.0f && r[7] == 
128.0f, "");
+}
+namespace MaskOffInf {
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, 
__builtin_huge_val(), +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x1F, a_inf);
+static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 0.0f, "");
+}
+namespace MaskOffNaN {
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), 
+64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x1F, a_nan);
+static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 0.0f, "");
+}
+namespace MaskOnInf {
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, 
+32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces an 
infinity}}
+// expected-note@-3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 
2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 
1.280000e+02})'}}
+}
+namespace MaskOnNaN {
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, 
+64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces a NaN}}
+// expected-note@-3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 
2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 
1.280000e+02})'}}
+}
+}
 
-constexpr __m256 ok512z = _mm512_maskz_cvtpd_ps(0b110111, d8_inexact);
-static_assert(ok512z[5] == 0.0f && ok512z[0] == -1.0f && ok512z[7] == 128.0f, 
"");
+namespace Test_mm512_cvtpd_pslo {
+namespace OK {
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_cvtpd_pslo(a);
+static_assert(r[0] == -1.0f && r[7] == +128.0f, "");
+static_assert(r[8] == 0.0f && r[15] == 0.0f, "");
+}
+}
 
-constexpr __m128 bad128 = _mm_mask_cvtpd_ps(src128a, 0x2, d2_inexact); // 
both-error {{not an integral constant expression}}
-static_assert(bad128[1] == 9.0f, ""); // both-note {{subexpression not valid 
in a constant expression}}
+namespace Test_mm512_mask_cvtpd_pslo {
+namespace OK {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x3, a);
+static_assert(r[0] == -1.0f && r[1] == +2.0f, "");
+static_assert(r[2] == 9.0f && r[3] == 9.0f, "");
+}
+namespace MaskOffInf {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, 
+32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x3, a_inf);
+static_assert(r[0] == -1.0f && r[1] == +2.0f && r[2] == 9.0f, "");
+}
+namespace MaskOffNaN {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, 
+64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x7, a_nan);
+static_assert(r[0] == -1.0f && r[1] == +2.0f && r[2] == 4.0f && r[3] == 9.0f, 
"");
+}
+namespace MaskOnInf {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, 
+32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_inf);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces an 
infinity}}
+// [email protected]:* {{in call to 
'_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 
9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 
2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 
1.280000e+02})'}}
+// expected-note@-4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 
9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 
9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 
9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, 
{-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 
6.400000e+01, 1.280000e+02})'}}
+}
+namespace MaskOnNaN {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+                                9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, __builtin_nan(""), +8.0, +16.0, +32.0, 
+64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_nan);
+// expected-error@-1 {{must be initialized by a constant expression}}
+// [email protected]:* {{floating point arithmetic produces a NaN}}
+// [email protected]:* {{in call to 
'_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 
9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 
2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 
1.280000e+02})'}}
+// expected-note@-4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 
9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 
9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 
9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, 
{-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 
6.400000e+01, 1.280000e+02})'}}
+}
 }

>From 4a2f59bee574efec48ac87e74dae356dc72fb2ae Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <[email protected]>
Date: Sat, 29 Nov 2025 11:09:57 +0200
Subject: [PATCH 5/6] fully implmeneted features in ExprConstant visiting logic

---
 clang/lib/AST/ExprConstant.cpp | 143 +++++++++++++++++++++++++++++++++
 1 file changed, 143 insertions(+)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3b91678f7d400..065d5c2e33a9c 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12165,7 +12165,37 @@ static bool evalShuffleGeneric(
   Out = APValue(ResultElements.data(), ResultElements.size());
   return true;
 }
+static bool ConvertDoubleToFloatStrict(EvalInfo &Info, const Expr *E,
+                                       APFloat OrigVal, APValue &Result) {
 
+  if (OrigVal.isInfinity()) {
+    Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 0; 
+    return false;
+  }
+  if (OrigVal.isNaN()) {
+    Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 1;
+    return false;
+  }
+
+  APFloat Val = OrigVal; 
+  bool LosesInfo = false;
+  APFloat::opStatus Status = Val.convert(APFloat::IEEEsingle(),
+                                         APFloat::rmNearestTiesToEven,
+                                         &LosesInfo);
+
+  if(LosesInfo || Val.isDenormal()) {
+    Info.CCEDiag(E, diag::note_constexpr_float_arithmetic_strict);
+    return false;
+  }
+
+  if(Status != APFloat::opOK) {
+    Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr);
+    return false;
+  }
+
+  Result = APValue(Val);
+  return true;
+}
 bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
   if (!IsConstantEvaluatedBuiltinCall(E))
     return ExprEvaluatorBaseTy::VisitCallExpr(E);
@@ -12878,6 +12908,119 @@ bool VectorExprEvaluator::VisitCallExpr(const 
CallExpr *E) {
 
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
+
+    case X86::BI__builtin_ia32_cvtsd2ss: {
+      APValue VecA, VecB;
+      if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
+          !EvaluateAsRValue(Info, E->getArg(1), VecB))
+        return false;
+
+      SmallVector<APValue, 4> Elements;
+
+      APValue ResultVal;
+      if (!ConvertDoubleToFloatStrict(Info, E, 
VecB.getVectorElt(0).getFloat(), ResultVal))
+        return false;
+    
+      Elements.push_back(ResultVal);
+
+      unsigned NumEltsA = VecA.getVectorLength();
+      for (unsigned I = 1; I < NumEltsA; ++I) {
+          Elements.push_back(VecA.getVectorElt(I));
+      }
+
+      return Success(Elements, E);
+    }
+    case X86::BI__builtin_ia32_cvtsd2ss_round_mask: {
+      APValue VecA, VecB, VecSrc, MaskValue;
+
+      if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
+          !EvaluateAsRValue(Info, E->getArg(1), VecB) ||
+          !EvaluateAsRValue(Info, E->getArg(2), VecSrc) ||
+          !EvaluateAsRValue(Info, E->getArg(3), MaskValue))
+        return false;
+
+      unsigned Mask = MaskValue.getInt().getZExtValue();
+      SmallVector<APValue, 4> Elements;
+
+
+      if (Mask & 1) {
+        APValue ResultVal;
+        if (!ConvertDoubleToFloatStrict(Info, E, 
VecB.getVectorElt(0).getFloat(), ResultVal))
+           return false;
+        Elements.push_back(ResultVal);
+      } else {
+        Elements.push_back(VecSrc.getVectorElt(0));
+      }
+
+      unsigned NumEltsA = VecA.getVectorLength();
+      for (unsigned I = 1; I < NumEltsA; ++I) {
+          Elements.push_back(VecA.getVectorElt(I));
+      }
+
+      return Success(Elements, E);
+    }
+    case X86::BI__builtin_ia32_cvtpd2ps:
+    case X86::BI__builtin_ia32_cvtpd2ps256: 
+    case X86::BI__builtin_ia32_cvtpd2ps_mask:     
+    case X86::BI__builtin_ia32_cvtpd2ps512_mask: {
+
+
+      const auto BuiltinID = E->getBuiltinCallee();
+      bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask || 
+                       BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
+
+      APValue InputValue;
+      if (!EvaluateAsRValue(Info, E->getArg(0), InputValue))
+        return false;
+
+      APValue MergeValue;
+      unsigned Mask = 0xFFFFFFFF;
+      bool NeedsMerge = false;
+      if (IsMasked) {
+          APValue MaskValue;
+          if (!EvaluateAsRValue(Info, E->getArg(2), MaskValue))
+              return false;
+          Mask = MaskValue.getInt().getZExtValue();
+          auto NumEltsResult = 
E->getType()->getAs<VectorType>()->getNumElements();
+          for (unsigned I = 0; I < NumEltsResult; ++I) {
+            if (!((Mask >> I) & 1)) {
+              NeedsMerge = true;
+              break;
+            }
+          }
+          if (NeedsMerge) {
+            if (!EvaluateAsRValue(Info, E->getArg(1), MergeValue))
+              return false;
+          }
+      }
+
+      unsigned NumEltsResult = 
E->getType()->getAs<VectorType>()->getNumElements();
+      unsigned NumEltsInput = InputValue.getVectorLength();
+      SmallVector<APValue, 8> Elements;
+      for (unsigned I = 0; I < NumEltsResult; ++I) {
+        if (IsMasked && !((Mask >> I) & 1)) {
+            if (!NeedsMerge) {
+              return false;
+            }
+            Elements.push_back(MergeValue.getVectorElt(I));
+            continue; 
+        }
+
+        if (I >= NumEltsInput) {
+           
Elements.push_back(APValue(APFloat::getZero(APFloat::IEEEsingle())));
+           continue;
+        }
+
+        APValue ResultVal;
+        if (!ConvertDoubleToFloatStrict(Info, E, 
InputValue.getVectorElt(I).getFloat(), ResultVal))
+           return false;
+        
+        Elements.push_back(ResultVal);
+      }
+      return Success(Elements, E);
+    }
+
+  
   case X86::BI__builtin_ia32_shufps:
   case X86::BI__builtin_ia32_shufps256:
   case X86::BI__builtin_ia32_shufps512: {

>From 0fb3292fe860e30de61d2df3a90912f27f04f143 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <[email protected]>
Date: Sat, 29 Nov 2025 11:21:17 +0200
Subject: [PATCH 6/6] Ran the git clang-format command

---
 clang/lib/AST/ExprConstant.cpp | 192 ++++++++++++++++-----------------
 1 file changed, 96 insertions(+), 96 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 065d5c2e33a9c..6f512dd538e7d 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12169,7 +12169,7 @@ static bool ConvertDoubleToFloatStrict(EvalInfo &Info, 
const Expr *E,
                                        APFloat OrigVal, APValue &Result) {
 
   if (OrigVal.isInfinity()) {
-    Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 0; 
+    Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 0;
     return false;
   }
   if (OrigVal.isNaN()) {
@@ -12177,18 +12177,17 @@ static bool ConvertDoubleToFloatStrict(EvalInfo 
&Info, const Expr *E,
     return false;
   }
 
-  APFloat Val = OrigVal; 
+  APFloat Val = OrigVal;
   bool LosesInfo = false;
-  APFloat::opStatus Status = Val.convert(APFloat::IEEEsingle(),
-                                         APFloat::rmNearestTiesToEven,
-                                         &LosesInfo);
+  APFloat::opStatus Status = Val.convert(
+      APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo);
 
-  if(LosesInfo || Val.isDenormal()) {
+  if (LosesInfo || Val.isDenormal()) {
     Info.CCEDiag(E, diag::note_constexpr_float_arithmetic_strict);
     return false;
   }
 
-  if(Status != APFloat::opOK) {
+  if (Status != APFloat::opOK) {
     Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr);
     return false;
   }
@@ -12909,118 +12908,119 @@ bool VectorExprEvaluator::VisitCallExpr(const 
CallExpr *E) {
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
 
-    case X86::BI__builtin_ia32_cvtsd2ss: {
-      APValue VecA, VecB;
-      if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
-          !EvaluateAsRValue(Info, E->getArg(1), VecB))
-        return false;
+  case X86::BI__builtin_ia32_cvtsd2ss: {
+    APValue VecA, VecB;
+    if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
+        !EvaluateAsRValue(Info, E->getArg(1), VecB))
+      return false;
 
-      SmallVector<APValue, 4> Elements;
+    SmallVector<APValue, 4> Elements;
 
-      APValue ResultVal;
-      if (!ConvertDoubleToFloatStrict(Info, E, 
VecB.getVectorElt(0).getFloat(), ResultVal))
-        return false;
-    
-      Elements.push_back(ResultVal);
+    APValue ResultVal;
+    if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(),
+                                    ResultVal))
+      return false;
 
-      unsigned NumEltsA = VecA.getVectorLength();
-      for (unsigned I = 1; I < NumEltsA; ++I) {
-          Elements.push_back(VecA.getVectorElt(I));
-      }
+    Elements.push_back(ResultVal);
 
-      return Success(Elements, E);
+    unsigned NumEltsA = VecA.getVectorLength();
+    for (unsigned I = 1; I < NumEltsA; ++I) {
+      Elements.push_back(VecA.getVectorElt(I));
     }
-    case X86::BI__builtin_ia32_cvtsd2ss_round_mask: {
-      APValue VecA, VecB, VecSrc, MaskValue;
-
-      if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
-          !EvaluateAsRValue(Info, E->getArg(1), VecB) ||
-          !EvaluateAsRValue(Info, E->getArg(2), VecSrc) ||
-          !EvaluateAsRValue(Info, E->getArg(3), MaskValue))
-        return false;
 
-      unsigned Mask = MaskValue.getInt().getZExtValue();
-      SmallVector<APValue, 4> Elements;
+    return Success(Elements, E);
+  }
+  case X86::BI__builtin_ia32_cvtsd2ss_round_mask: {
+    APValue VecA, VecB, VecSrc, MaskValue;
 
+    if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
+        !EvaluateAsRValue(Info, E->getArg(1), VecB) ||
+        !EvaluateAsRValue(Info, E->getArg(2), VecSrc) ||
+        !EvaluateAsRValue(Info, E->getArg(3), MaskValue))
+      return false;
 
-      if (Mask & 1) {
-        APValue ResultVal;
-        if (!ConvertDoubleToFloatStrict(Info, E, 
VecB.getVectorElt(0).getFloat(), ResultVal))
-           return false;
-        Elements.push_back(ResultVal);
-      } else {
-        Elements.push_back(VecSrc.getVectorElt(0));
-      }
+    unsigned Mask = MaskValue.getInt().getZExtValue();
+    SmallVector<APValue, 4> Elements;
 
-      unsigned NumEltsA = VecA.getVectorLength();
-      for (unsigned I = 1; I < NumEltsA; ++I) {
-          Elements.push_back(VecA.getVectorElt(I));
-      }
+    if (Mask & 1) {
+      APValue ResultVal;
+      if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(),
+                                      ResultVal))
+        return false;
+      Elements.push_back(ResultVal);
+    } else {
+      Elements.push_back(VecSrc.getVectorElt(0));
+    }
 
-      return Success(Elements, E);
+    unsigned NumEltsA = VecA.getVectorLength();
+    for (unsigned I = 1; I < NumEltsA; ++I) {
+      Elements.push_back(VecA.getVectorElt(I));
     }
-    case X86::BI__builtin_ia32_cvtpd2ps:
-    case X86::BI__builtin_ia32_cvtpd2ps256: 
-    case X86::BI__builtin_ia32_cvtpd2ps_mask:     
-    case X86::BI__builtin_ia32_cvtpd2ps512_mask: {
 
+    return Success(Elements, E);
+  }
+  case X86::BI__builtin_ia32_cvtpd2ps:
+  case X86::BI__builtin_ia32_cvtpd2ps256:
+  case X86::BI__builtin_ia32_cvtpd2ps_mask:
+  case X86::BI__builtin_ia32_cvtpd2ps512_mask: {
 
-      const auto BuiltinID = E->getBuiltinCallee();
-      bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask || 
-                       BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
+    const auto BuiltinID = E->getBuiltinCallee();
+    bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask ||
+                     BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
 
-      APValue InputValue;
-      if (!EvaluateAsRValue(Info, E->getArg(0), InputValue))
-        return false;
-
-      APValue MergeValue;
-      unsigned Mask = 0xFFFFFFFF;
-      bool NeedsMerge = false;
-      if (IsMasked) {
-          APValue MaskValue;
-          if (!EvaluateAsRValue(Info, E->getArg(2), MaskValue))
-              return false;
-          Mask = MaskValue.getInt().getZExtValue();
-          auto NumEltsResult = 
E->getType()->getAs<VectorType>()->getNumElements();
-          for (unsigned I = 0; I < NumEltsResult; ++I) {
-            if (!((Mask >> I) & 1)) {
-              NeedsMerge = true;
-              break;
-            }
-          }
-          if (NeedsMerge) {
-            if (!EvaluateAsRValue(Info, E->getArg(1), MergeValue))
-              return false;
-          }
-      }
+    APValue InputValue;
+    if (!EvaluateAsRValue(Info, E->getArg(0), InputValue))
+      return false;
 
-      unsigned NumEltsResult = 
E->getType()->getAs<VectorType>()->getNumElements();
-      unsigned NumEltsInput = InputValue.getVectorLength();
-      SmallVector<APValue, 8> Elements;
+    APValue MergeValue;
+    unsigned Mask = 0xFFFFFFFF;
+    bool NeedsMerge = false;
+    if (IsMasked) {
+      APValue MaskValue;
+      if (!EvaluateAsRValue(Info, E->getArg(2), MaskValue))
+        return false;
+      Mask = MaskValue.getInt().getZExtValue();
+      auto NumEltsResult = E->getType()->getAs<VectorType>()->getNumElements();
       for (unsigned I = 0; I < NumEltsResult; ++I) {
-        if (IsMasked && !((Mask >> I) & 1)) {
-            if (!NeedsMerge) {
-              return false;
-            }
-            Elements.push_back(MergeValue.getVectorElt(I));
-            continue; 
+        if (!((Mask >> I) & 1)) {
+          NeedsMerge = true;
+          break;
         }
+      }
+      if (NeedsMerge) {
+        if (!EvaluateAsRValue(Info, E->getArg(1), MergeValue))
+          return false;
+      }
+    }
 
-        if (I >= NumEltsInput) {
-           
Elements.push_back(APValue(APFloat::getZero(APFloat::IEEEsingle())));
-           continue;
+    unsigned NumEltsResult =
+        E->getType()->getAs<VectorType>()->getNumElements();
+    unsigned NumEltsInput = InputValue.getVectorLength();
+    SmallVector<APValue, 8> Elements;
+    for (unsigned I = 0; I < NumEltsResult; ++I) {
+      if (IsMasked && !((Mask >> I) & 1)) {
+        if (!NeedsMerge) {
+          return false;
         }
+        Elements.push_back(MergeValue.getVectorElt(I));
+        continue;
+      }
 
-        APValue ResultVal;
-        if (!ConvertDoubleToFloatStrict(Info, E, 
InputValue.getVectorElt(I).getFloat(), ResultVal))
-           return false;
-        
-        Elements.push_back(ResultVal);
+      if (I >= NumEltsInput) {
+        Elements.push_back(APValue(APFloat::getZero(APFloat::IEEEsingle())));
+        continue;
       }
-      return Success(Elements, E);
+
+      APValue ResultVal;
+      if (!ConvertDoubleToFloatStrict(
+              Info, E, InputValue.getVectorElt(I).getFloat(), ResultVal))
+        return false;
+
+      Elements.push_back(ResultVal);
     }
+    return Success(Elements, E);
+  }
 
-  
   case X86::BI__builtin_ia32_shufps:
   case X86::BI__builtin_ia32_shufps256:
   case X86::BI__builtin_ia32_shufps512: {

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [X86][Clang] Support constexpr evaluation of cvtpd2ps intrinsics (PR #169980)

Reply via email to