Author: stomfaig
Date: 2025-11-22T14:37:50Z
New Revision: 2e424deeb6180d112323f4df955c8034eb56780c

URL: 
https://github.com/llvm/llvm-project/commit/2e424deeb6180d112323f4df955c8034eb56780c
DIFF: 
https://github.com/llvm/llvm-project/commit/2e424deeb6180d112323f4df955c8034eb56780c.diff

LOG:  [Clang][X86] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - 
allow VPERMILPD/S variable mask intrinsics to be used in constexpr (#168861)

Allowing VPERMILPD/S intrinsics to be used in constexpr

Closes #167878

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsX86.td
    clang/lib/AST/ByteCode/InterpBuiltin.cpp
    clang/lib/AST/ExprConstant.cpp
    clang/lib/Headers/avx512fintrin.h
    clang/lib/Headers/avx512vlintrin.h
    clang/lib/Headers/avxintrin.h
    clang/test/CodeGen/X86/avx-builtins.c
    clang/test/CodeGen/X86/avx512f-builtins.c
    clang/test/CodeGen/X86/avx512vl-builtins.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index 132c1e8ea7190..3c9fbd912ceaf 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -461,11 +461,14 @@ let Features = "avx", Attributes = [NoThrow, Const, 
Constexpr, RequiredVectorWid
   def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, 
float>, _Constant int)">;
 }
 
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] 
in {
+let Features = "avx", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
   def vpermilvarpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, 
_Vector<2, long long int>)">;
   def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, 
_Vector<4, int>)">;
   def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, 
_Vector<4, long long int>)">;
   def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, 
_Vector<8, int>)">;
+}
+
+let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] 
in {
   def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, 
float>, _Constant char)">;
   def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, 
double>, _Constant char)">;
   def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, 
float>, _Constant char)">;
@@ -2338,15 +2341,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>
   def vcvttss2si32 : X86Builtin<"int(_Vector<4, float>, _Constant int)">;
   def vcvttss2usi32 : X86Builtin<"unsigned int(_Vector<4, float>, _Constant 
int)">;
 }
-let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
-  def vpermilpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, 
_Constant int)">;
-  def vpermilps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, 
_Constant int)">;
-}
-
-let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
-  def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, 
_Vector<8, long long int>)">;
-  def vpermilvarps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, 
_Vector<16, int>)">;
-}
 
 let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] in {
   def rndscalesd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, 
double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int, 
_Constant int)">;
@@ -2439,6 +2433,14 @@ let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>
 let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
   def shufpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, 
_Vector<8, double>, _Constant int)">;
   def shufps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, 
_Vector<16, float>, _Constant int)">;
+  def vpermilpd512
+      : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">;
+  def vpermilps512
+      : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">;
+  def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, "
+                                   "_Vector<8, long long int>)">;
+  def vpermilvarps512
+      : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">;
 }
 
 let Features = "avx512vl", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] in {

diff  --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index a403f293a5c34..27eb6c5c698f2 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4653,6 +4653,30 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
           return std::make_pair(0, static_cast<int>(LaneBase + Sel));
         });
 
+  case X86::BI__builtin_ia32_vpermilvarpd:
+  case X86::BI__builtin_ia32_vpermilvarpd256:
+  case X86::BI__builtin_ia32_vpermilvarpd512:
+    return interp__builtin_ia32_shuffle_generic(
+        S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+          unsigned NumElemPerLane = 2;
+          unsigned Lane = DstIdx / NumElemPerLane;
+          unsigned Offset = ShuffleMask & 0b10 ? 1 : 0;
+          return std::make_pair(
+              0, static_cast<int>(Lane * NumElemPerLane + Offset));
+        });
+
+  case X86::BI__builtin_ia32_vpermilvarps:
+  case X86::BI__builtin_ia32_vpermilvarps256:
+  case X86::BI__builtin_ia32_vpermilvarps512:
+    return interp__builtin_ia32_shuffle_generic(
+        S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+          unsigned NumElemPerLane = 4;
+          unsigned Lane = DstIdx / NumElemPerLane;
+          unsigned Offset = ShuffleMask & 0b11;
+          return std::make_pair(
+              0, static_cast<int>(Lane * NumElemPerLane + Offset));
+        });
+
   case X86::BI__builtin_ia32_vpermilpd:
   case X86::BI__builtin_ia32_vpermilpd256:
   case X86::BI__builtin_ia32_vpermilpd512:

diff  --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index bc885f4c89028..ad1f49ce9b04e 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13043,6 +13043,23 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
     return Success(R, E);
   }
 
+  case X86::BI__builtin_ia32_vpermilvarpd:
+  case X86::BI__builtin_ia32_vpermilvarpd256:
+  case X86::BI__builtin_ia32_vpermilvarpd512: {
+    APValue R;
+    if (!evalShuffleGeneric(
+            Info, E, R,
+            [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
+              unsigned NumElemPerLane = 2;
+              unsigned Lane = DstIdx / NumElemPerLane;
+              unsigned Offset = Mask & 0b10 ? 1 : 0;
+              return std::make_pair(
+                  0, static_cast<int>(Lane * NumElemPerLane + Offset));
+            }))
+      return false;
+    return Success(R, E);
+  }
+
   case X86::BI__builtin_ia32_vpermilpd:
   case X86::BI__builtin_ia32_vpermilpd256:
   case X86::BI__builtin_ia32_vpermilpd512: {
@@ -13062,6 +13079,23 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
     return Success(R, E);
   }
 
+  case X86::BI__builtin_ia32_vpermilvarps:
+  case X86::BI__builtin_ia32_vpermilvarps256:
+  case X86::BI__builtin_ia32_vpermilvarps512: {
+    APValue R;
+    if (!evalShuffleGeneric(
+            Info, E, R,
+            [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
+              unsigned NumElemPerLane = 4;
+              unsigned Lane = DstIdx / NumElemPerLane;
+              unsigned Offset = Mask & 0b11;
+              return std::make_pair(
+                  0, static_cast<int>(Lane * NumElemPerLane + Offset));
+            }))
+      return false;
+    return Success(R, E);
+  }
+
   case X86::BI__builtin_ia32_phminposuw128: {
     APValue Source;
     if (!Evaluate(Source, Info, E->getArg(0)))

diff  --git a/clang/lib/Headers/avx512fintrin.h 
b/clang/lib/Headers/avx512fintrin.h
index e4184795e47e9..e1de56069870b 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -5879,45 +5879,39 @@ _mm_cvttss_u64 (__m128 __A)
                                        (__v16sf)_mm512_permute_ps((X), (C)), \
                                        (__v16sf)_mm512_setzero_ps()))
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_permutevar_pd(__m512d __A, __m512i __C)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutevar_pd(__m512d __A, __m512i __C) {
   return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) 
{
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                          (__v8df)_mm512_permutevar_pd(__A, 
__C),
                                          (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                          (__v8df)_mm512_permutevar_pd(__A, 
__C),
                                          (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_permutevar_ps(__m512 __A, __m512i __C)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutevar_ps(__m512 __A, __m512i __C) {
   return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                         (__v16sf)_mm512_permutevar_ps(__A, 
__C),
                                         (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                         (__v16sf)_mm512_permutevar_ps(__A, 
__C),
                                         (__v16sf)_mm512_setzero_ps());

diff  --git a/clang/lib/Headers/avx512vlintrin.h 
b/clang/lib/Headers/avx512vlintrin.h
index 5a1b540e07e3a..99c057030a4cc 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -5847,65 +5847,57 @@ _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
                                        (__v8sf)_mm256_permute_ps((X), (C)), \
                                        (__v8sf)_mm256_setzero_ps()))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                             (__v2df)_mm_permutevar_pd(__A, 
__C),
                                             (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                             (__v2df)_mm_permutevar_pd(__A, 
__C),
                                             (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) 
{
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                          (__v4df)_mm256_permutevar_pd(__A, 
__C),
                                          (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                          (__v4df)_mm256_permutevar_pd(__A, 
__C),
                                          (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                             (__v4sf)_mm_permutevar_ps(__A, 
__C),
                                             (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                             (__v4sf)_mm_permutevar_ps(__A, 
__C),
                                             (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                           (__v8sf)_mm256_permutevar_ps(__A, 
__C),
                                           (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                           (__v8sf)_mm256_permutevar_ps(__A, 
__C),
                                           (__v8sf)_mm256_setzero_ps());

diff  --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 3e1618ed192c8..44ef88db5cbce 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -787,9 +787,8 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR 
_mm256_hsub_ps(__m256 __a,
 ///      1: Bits [127:64] of the source are copied to bits [127:64] of the
 ///         returned vector.
 /// \returns A 128-bit vector of [2 x double] containing the copied values.
-static __inline __m128d __DEFAULT_FN_ATTRS128
-_mm_permutevar_pd(__m128d __a, __m128i __c)
-{
+static __inline __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_permutevar_pd(__m128d __a, __m128i __c) {
   return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
 }
 
@@ -826,9 +825,8 @@ _mm_permutevar_pd(__m128d __a, __m128i __c)
 ///      1: Bits [255:192] of the source are copied to bits [255:192] of the
 ///    returned vector.
 /// \returns A 256-bit vector of [4 x double] containing the copied values.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_permutevar_pd(__m256d __a, __m256i __c)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_permutevar_pd(__m256d __a, __m256i __c) {
   return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
 }
 
@@ -881,9 +879,8 @@ _mm256_permutevar_pd(__m256d __a, __m256i __c)
 ///      11: Bits [127:96] of the source are copied to bits [127:96] of the
 ///          returned vector.
 /// \returns A 128-bit vector of [4 x float] containing the copied values.
-static __inline __m128 __DEFAULT_FN_ATTRS128
-_mm_permutevar_ps(__m128 __a, __m128i __c)
-{
+static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_permutevar_ps(__m128 __a, __m128i __c) {
   return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
 }
 
@@ -972,9 +969,8 @@ _mm_permutevar_ps(__m128 __a, __m128i __c)
 ///      11: Bits [255:224] of the source are copied to bits [255:224] of the
 ///          returned vector.
 /// \returns A 256-bit vector of [8 x float] containing the copied values.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_permutevar_ps(__m256 __a, __m256i __c)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_permutevar_ps(__m256 __a, __m256i __c) {
   return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
 }
 

diff  --git a/clang/test/CodeGen/X86/avx-builtins.c 
b/clang/test/CodeGen/X86/avx-builtins.c
index f8931e7e55410..00bcf9cc1da58 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1454,24 +1454,52 @@ __m128d test_mm_permutevar_pd(__m128d A, __m128i B) {
   // CHECK: call {{.*}}<2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> 
%{{.*}}, <2 x i64> %{{.*}})
   return _mm_permutevar_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(
+  _mm_permutevar_pd(
+    ((__m128d){0.0, 1.0}),
+    ((__m128i){0b10, 0b00})
+  ),
+  1.0, 0.0
+));
 
 __m256d test_mm256_permutevar_pd(__m256d A, __m256i B) {
   // CHECK-LABEL: test_mm256_permutevar_pd
   // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x 
double> %{{.*}}, <4 x i64> %{{.*}})
   return _mm256_permutevar_pd(A, B);
 }
+TEST_CONSTEXPR(match_m256d(
+  _mm256_permutevar_pd(
+    ((__m256d){0.0, 1.0, 2.0, 3.0}),
+    ((__m256i){0b10, 0b00, 0b00, 0b10})
+  ),
+  1.0, 0.0, 2.0, 3.0
+));
 
 __m128 test_mm_permutevar_ps(__m128 A, __m128i B) {
   // CHECK-LABEL: test_mm_permutevar_ps
   // CHECK: call {{.*}}<4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> 
%{{.*}}, <4 x i32> %{{.*}})
   return _mm_permutevar_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(
+  _mm_permutevar_ps(
+    ((__m128){0.0, 1.0, 2.0, 3.0}),
+    ((__m128i)(__v4si){0b11, 0b10, 0b01, 0b00})
+  ),
+  3.0, 2.0, 1.0, 0.0
+));
 
 __m256 test_mm256_permutevar_ps(__m256 A, __m256i B) {
   // CHECK-LABEL: test_mm256_permutevar_ps
   // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> 
%{{.*}}, <8 x i32> %{{.*}})
   return _mm256_permutevar_ps(A, B);
 }
+TEST_CONSTEXPR(match_m256(
+  _mm256_permutevar_ps(
+    ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+    ((__m256i)(__v8si){0b11, 0b10, 0b01, 0b00, 0b01, 0b00, 0b11, 0b10})
+  ),
+  3.0, 2.0, 1.0, 0.0, 5.0, 4.0, 7.0, 6.0
+));
 
 __m256 test_mm256_rcp_ps(__m256 A) {
   // CHECK-LABEL: test_mm256_rcp_ps

diff  --git a/clang/test/CodeGen/X86/avx512f-builtins.c 
b/clang/test/CodeGen/X86/avx512f-builtins.c
index 1f4bb51a2e34f..0d53dd6ef4c64 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -5622,6 +5622,13 @@ __m512d test_mm512_permutevar_pd(__m512d __A, __m512i 
__C) {
   // CHECK: @llvm.x86.avx512.vpermilvar.pd.512
   return _mm512_permutevar_pd(__A, __C); 
 }
+TEST_CONSTEXPR(match_m512d(
+  _mm512_permutevar_pd(
+    ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+    ((__m512i){0b00, 0b00, 0b10, 0b00, 0b00, 0b10, 0b10, 0b10})
+  ),
+  0.0, 0.0, 3.0, 2.0, 4.0, 5.0, 7.0, 7.0
+));
 
 __m512d test_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, 
__m512i __C) {
   // CHECK-LABEL: test_mm512_mask_permutevar_pd
@@ -5629,6 +5636,15 @@ __m512d test_mm512_mask_permutevar_pd(__m512d __W, 
__mmask8 __U, __m512d __A, __
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_permutevar_pd(__W, __U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m512d(
+  _mm512_mask_permutevar_pd(
+    ((__m512d){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+    (__mmask8)0b01010101,
+    ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+    ((__m512i){0b00, 0b00, 0b10, 0b00, 0b00, 0b10, 0b10, 0b10})
+  ),
+  0.0, 9.0, 3.0, 11.0, 4.0, 13.0, 7.0, 15.0
+));
 
 __m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) 
{
   // CHECK-LABEL: test_mm512_maskz_permutevar_pd
@@ -5636,12 +5652,27 @@ __m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, 
__m512d __A, __m512i __C) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_permutevar_pd(__U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m512d(
+  _mm512_maskz_permutevar_pd(
+    (__mmask8)0b01010101,
+    ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+    ((__m512i){0b00, 0b00, 0b10, 0b00, 0b00, 0b10, 0b10, 0b10})
+  ),
+  0.0, 0.0, 3.0, 0.0, 4.0, 0.0, 7.0, 0.0
+));
 
 __m512 test_mm512_permutevar_ps(__m512 __A, __m512i __C) {
   // CHECK-LABEL: test_mm512_permutevar_ps
   // CHECK: @llvm.x86.avx512.vpermilvar.ps.512
   return _mm512_permutevar_ps(__A, __C); 
 }
+TEST_CONSTEXPR(match_m512(
+  _mm512_permutevar_ps(
+    ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 
12.0, 13.0, 14.0, 15.0}),
+    ((__m512i)(__v16si){0b11, 0b10, 0b01, 0b00, 0b01, 0b00, 0b11, 0b10, 0b10, 
0b11, 0b00, 0b01, 0b00, 0b11, 0b01, 0b10})
+  ),
+  3.0, 2.0, 1.0, 0.0, 5.0, 4.0, 7.0, 6.0, 10.0, 11.0, 8.0, 9.0, 12.0, 15.0, 
13.0, 14.0
+));
 
 __m512 test_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, 
__m512i __C) {
   // CHECK-LABEL: test_mm512_mask_permutevar_ps
@@ -5649,6 +5680,15 @@ __m512 test_mm512_mask_permutevar_ps(__m512 __W, 
__mmask16 __U, __m512 __A, __m5
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> 
%{{.*}}
   return _mm512_mask_permutevar_ps(__W, __U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m512(
+  _mm512_mask_permutevar_ps(
+    ((__m512){16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 
26.0, 27.0, 28.0, 29.0, 30.0, 31.0}),
+    (__mmask16)0b0101010101010101,
+    ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 
12.0, 13.0, 14.0, 15.0}),
+    ((__m512i)(__v16si){0b11, 0b10, 0b01, 0b00, 0b01, 0b00, 0b11, 0b10, 0b10, 
0b11, 0b00, 0b01, 0b00, 0b11, 0b01, 0b10})
+  ),
+  3.0, 17.0, 1.0, 19.0, 5.0, 21.0, 7.0, 23.0, 10.0, 25.0, 8.0, 27.0, 12.0, 
29.0, 13.0, 31.0
+));
 
 __m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
   // CHECK-LABEL: test_mm512_maskz_permutevar_ps
@@ -5656,6 +5696,14 @@ __m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, 
__m512 __A, __m512i __C) {
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> 
%{{.*}}
   return _mm512_maskz_permutevar_ps(__U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m512(
+  _mm512_maskz_permutevar_ps(
+    (__mmask16)0b0101010101010101,
+    ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 
12.0, 13.0, 14.0, 15.0}),
+    ((__m512i)(__v16si){0b11, 0b10, 0b01, 0b00, 0b01, 0b00, 0b11, 0b10, 0b10, 
0b11, 0b00, 0b01, 0b00, 0b11, 0b01, 0b10})
+  ),
+  3.0, 0.0, 1.0, 0.0, 5.0, 0.0, 7.0, 0.0, 10.0, 0.0, 8.0, 0.0, 12.0, 0.0, 
13.0, 0.0
+));
 
 __m512i test_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) {
   // CHECK-LABEL: test_mm512_permutex2var_epi32

diff  --git a/clang/test/CodeGen/X86/avx512vl-builtins.c 
b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 4b2ba3341af29..e6eb91e6e8ce4 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -8131,6 +8131,15 @@ __m128d test_mm_mask_permutevar_pd(__m128d __W, __mmask8 
__U, __m128d __A, __m12
   // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm_mask_permutevar_pd(__W, __U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m128d(
+  _mm_mask_permutevar_pd(
+    ((__m128d){3.0, 4.0}),
+    (__mmask8)0b01,
+    ((__m128d){0.0, 1.0}),
+    ((__m128i){0b10, 0b00})
+  ),
+  1.0, 4.0
+));
 
 __m128d test_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) {
   // CHECK-LABEL: test_mm_maskz_permutevar_pd
@@ -8138,6 +8147,14 @@ __m128d test_mm_maskz_permutevar_pd(__mmask8 __U, 
__m128d __A, __m128i __C) {
   // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm_maskz_permutevar_pd(__U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m128d(
+  _mm_maskz_permutevar_pd(
+    (__mmask8)0b01,
+    ((__m128d){0.0, 1.0}),
+    ((__m128i){0b10, 0b00})
+  ),
+  1.0, 0.0
+));
 
 __m256d test_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, 
__m256i __C) {
   // CHECK-LABEL: test_mm256_mask_permutevar_pd
@@ -8145,6 +8162,15 @@ __m256d test_mm256_mask_permutevar_pd(__m256d __W, 
__mmask8 __U, __m256d __A, __
   // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_mask_permutevar_pd(__W, __U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m256d(
+  _mm256_mask_permutevar_pd(
+    ((__m256d){4.0, 5.0, 6.0, 7.0}),
+    (__mmask8)0b0101,
+    ((__m256d){0.0, 1.0, 2.0, 3.0}),
+    ((__m256i){0b10, 0b00, 0b00, 0b10})
+  ),
+  1.0, 5.0, 2.0, 7.0
+));
 
 __m256d test_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) 
{
   // CHECK-LABEL: test_mm256_maskz_permutevar_pd
@@ -8152,6 +8178,14 @@ __m256d test_mm256_maskz_permutevar_pd(__mmask8 __U, 
__m256d __A, __m256i __C) {
   // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_maskz_permutevar_pd(__U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m256d(
+  _mm256_maskz_permutevar_pd(
+    (__mmask8)0b0101,
+    ((__m256d){0.0, 1.0, 2.0, 3.0}),
+    ((__m256i){0b10, 0b00, 0b00, 0b10})
+  ),
+  1.0, 0.0, 2.0, 0.0
+));
 
 __m128 test_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, 
__m128i __C) {
   // CHECK-LABEL: test_mm_mask_permutevar_ps
@@ -8159,6 +8193,15 @@ __m128 test_mm_mask_permutevar_ps(__m128 __W, __mmask8 
__U, __m128 __A, __m128i
   // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm_mask_permutevar_ps(__W, __U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m128(
+  _mm_mask_permutevar_ps(
+    ((__m128){4.0, 5.0, 6.0, 7.0}),
+    (__mmask8)0b0101,
+    ((__m128){0.0, 1.0, 2.0, 3.0}),
+    ((__m128i)(__v4si){0b11, 0b10, 0b01, 0b00})
+  ),
+  3.0, 5.0, 1.0, 7.0
+));
 
 __m128 test_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) {
   // CHECK-LABEL: test_mm_maskz_permutevar_ps
@@ -8166,6 +8209,14 @@ __m128 test_mm_maskz_permutevar_ps(__mmask8 __U, __m128 
__A, __m128i __C) {
   // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm_maskz_permutevar_ps(__U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m128(
+  _mm_maskz_permutevar_ps(
+    (__mmask8)0b0101,
+    ((__m128){0.0, 1.0, 2.0, 3.0}),
+    ((__m128i)(__v4si){0b11, 0b10, 0b01, 0b00})
+  ),
+  3.0, 0.0, 1.0, 0.0
+));
 
 __m256 test_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, 
__m256i __C) {
   // CHECK-LABEL: test_mm256_mask_permutevar_ps
@@ -8173,6 +8224,15 @@ __m256 test_mm256_mask_permutevar_ps(__m256 __W, 
__mmask8 __U, __m256 __A, __m25
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_mask_permutevar_ps(__W, __U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m256(
+  _mm256_mask_permutevar_ps(
+    ((__m256){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+    (__mmask8)0b01010101,
+    ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+    ((__m256i)(__v8si){0b11, 0b10, 0b01, 0b00, 0b00, 0b11, 0b01, 0b10})
+  ),
+  3.0, 9.0, 1.0, 11.0, 4.0, 13.0, 5.0, 15.0
+));
 
 __m256 test_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) {
   // CHECK-LABEL: test_mm256_maskz_permutevar_ps
@@ -8180,6 +8240,14 @@ __m256 test_mm256_maskz_permutevar_ps(__mmask8 __U, 
__m256 __A, __m256i __C) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_maskz_permutevar_ps(__U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m256(
+  _mm256_maskz_permutevar_ps(
+    (__mmask8)0b01010101,
+    ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+    ((__m256i)(__v8si){0b11, 0b10, 0b01, 0b00, 0b00, 0b11, 0b01, 0b10})
+  ),
+  3.0, 0.0, 1.0, 0.0, 4.0, 0.0, 5.0, 0.0
+));
 
 __mmask8 test_mm_test_epi32_mask(__m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_test_epi32_mask


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to