https://github.com/RKSimon updated 
https://github.com/llvm/llvm-project/pull/168044

>From a1023b432060362055296691bc5822a910b24f05 Mon Sep 17 00:00:00 2001
From: stomfaig <[email protected]>
Date: Fri, 14 Nov 2025 10:49:57 +0000
Subject: [PATCH 1/6] adding cases for vpermilpd and vpermilps

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 30 ++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index cee3c1b8cf8f3..9ea66c6de7553 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4562,6 +4562,36 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
           unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3;
           return std::make_pair(0, static_cast<int>(LaneBase + Sel));
         });
+  
+  case X86::BI__builtin_ia32_vpermilpd:
+  case X86::BI__builtin_ia32_vpermilpd256:
+  case X86::BI__builtin_ia32_vpermilpd512:
+    return interp__builtin_ia32_shuffle_generic(S, OpPC, Call, [](unsigned 
DstIdx, unsigned Control) {
+      unsigned NumElemPerLane = 2;
+      unsigned BitsPerElem = 1;
+      unsigned MaskBits = 8;
+      unsigned IndexMask = 0x1;
+      unsigned Lane = DstIdx / NumElemPerLane;
+      unsigned LaneOffset = Lane * NumElemPerLane;
+      unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
+      unsigned Index = (Control >> BitIndex) & IndexMask;
+      return std::make_pair(0, static_cast<int>(LaneOffset + Index));
+    });
+  
+  case X86::BI__builtin_ia32_vpermilps:
+  case X86::BI__builtin_ia32_vpermilps256:
+  case X86::BI__builtin_ia32_vpermilps512:
+    return interp__builtin_ia32_shuffle_generic(S, OpPC, Call, [](unsigned 
DstIdx, unsigned Control) {
+      unsigned NumElemPerLane = 4;
+      unsigned BitsPerElem = 2;
+      unsigned MaskBits = 8;
+      unsigned IndexMask = 0x3;
+      unsigned Lane = DstIdx / NumElemPerLane;
+      unsigned LaneOffset = Lane * NumElemPerLane;
+      unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
+      unsigned Index = (Control >> BitIndex) & IndexMask;
+      return std::make_pair(0, static_cast<int>(LaneOffset + Index));
+    });
 
   case X86::BI__builtin_ia32_kandqi:
   case X86::BI__builtin_ia32_kandhi:

>From 495094e19f8de9b2aca3d5b4a262fb03445190a6 Mon Sep 17 00:00:00 2001
From: stomfaig <[email protected]>
Date: Fri, 14 Nov 2025 15:53:19 +0000
Subject: [PATCH 2/6] vpermilps has same impl as pshufd

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 9ea66c6de7553..4a09bc41dd4e0 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4555,6 +4555,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const 
CallExpr *Call,
   case X86::BI__builtin_ia32_pshufd:
   case X86::BI__builtin_ia32_pshufd256:
   case X86::BI__builtin_ia32_pshufd512:
+  case X86::BI__builtin_ia32_vpermilps:
+  case X86::BI__builtin_ia32_vpermilps256:
+  case X86::BI__builtin_ia32_vpermilps512:
     return interp__builtin_ia32_shuffle_generic(
         S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
           unsigned LaneBase = (DstIdx / 4) * 4;
@@ -4577,21 +4580,6 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
       unsigned Index = (Control >> BitIndex) & IndexMask;
       return std::make_pair(0, static_cast<int>(LaneOffset + Index));
     });
-  
-  case X86::BI__builtin_ia32_vpermilps:
-  case X86::BI__builtin_ia32_vpermilps256:
-  case X86::BI__builtin_ia32_vpermilps512:
-    return interp__builtin_ia32_shuffle_generic(S, OpPC, Call, [](unsigned 
DstIdx, unsigned Control) {
-      unsigned NumElemPerLane = 4;
-      unsigned BitsPerElem = 2;
-      unsigned MaskBits = 8;
-      unsigned IndexMask = 0x3;
-      unsigned Lane = DstIdx / NumElemPerLane;
-      unsigned LaneOffset = Lane * NumElemPerLane;
-      unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
-      unsigned Index = (Control >> BitIndex) & IndexMask;
-      return std::make_pair(0, static_cast<int>(LaneOffset + Index));
-    });
 
   case X86::BI__builtin_ia32_kandqi:
   case X86::BI__builtin_ia32_kandhi:

>From c004b68db4dc43b57db6ec39b5609f778b13d629 Mon Sep 17 00:00:00 2001
From: stomfaig <[email protected]>
Date: Fri, 14 Nov 2025 15:55:54 +0000
Subject: [PATCH 3/6] impl vpermilps and vpermilpd in ExprConstant

---
 clang/lib/AST/ExprConstant.cpp | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b7da89ab3dcf2..b30bdf3d24508 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12998,7 +12998,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
 
   case X86::BI__builtin_ia32_pshufd:
   case X86::BI__builtin_ia32_pshufd256:
-  case X86::BI__builtin_ia32_pshufd512: {
+  case X86::BI__builtin_ia32_pshufd512:
+  case X86::BI__builtin_ia32_vpermilps:
+  case X86::BI__builtin_ia32_vpermilps256:
+  case X86::BI__builtin_ia32_vpermilps512: {
     APValue R;
     if (!evalShuffleGeneric(
             Info, E, R,
@@ -13015,6 +13018,25 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
     return Success(R, E);
   }
 
+  case X86::BI__builtin_ia32_vpermilpd:
+  case X86::BI__builtin_ia32_vpermilpd256:
+  case X86::BI__builtin_ia32_vpermilpd512: {
+    APValue R;
+    if (!evalShuffleGeneric(Info, E, R, [](unsigned DstIdx, unsigned Control) {
+      unsigned NumElemPerLane = 2;
+      unsigned BitsPerElem = 1;
+      unsigned MaskBits = 8;
+      unsigned IndexMask = 0x1;
+      unsigned Lane = DstIdx / NumElemPerLane;
+      unsigned LaneOffset = Lane * NumElemPerLane;
+      unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
+      unsigned Index = (Control >> BitIndex) & IndexMask;
+      return std::make_pair(0, static_cast<int>(LaneOffset + Index));
+    }))
+      return false;
+    return Success(R, E);
+  }
+  
   case X86::BI__builtin_ia32_phminposuw128: {
     APValue Source;
     if (!Evaluate(Source, Info, E->getArg(0)))

>From f2c0a61b594c61669d0e205a7fec2d513e4e8122 Mon Sep 17 00:00:00 2001
From: stomfaig <[email protected]>
Date: Fri, 14 Nov 2025 15:56:39 +0000
Subject: [PATCH 4/6] format

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 25 ++++++++++++------------
 clang/lib/AST/ExprConstant.cpp           | 22 ++++++++++-----------
 2 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 4a09bc41dd4e0..f88174ca093d3 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4565,21 +4565,22 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
           unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3;
           return std::make_pair(0, static_cast<int>(LaneBase + Sel));
         });
-  
+
   case X86::BI__builtin_ia32_vpermilpd:
   case X86::BI__builtin_ia32_vpermilpd256:
   case X86::BI__builtin_ia32_vpermilpd512:
-    return interp__builtin_ia32_shuffle_generic(S, OpPC, Call, [](unsigned 
DstIdx, unsigned Control) {
-      unsigned NumElemPerLane = 2;
-      unsigned BitsPerElem = 1;
-      unsigned MaskBits = 8;
-      unsigned IndexMask = 0x1;
-      unsigned Lane = DstIdx / NumElemPerLane;
-      unsigned LaneOffset = Lane * NumElemPerLane;
-      unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
-      unsigned Index = (Control >> BitIndex) & IndexMask;
-      return std::make_pair(0, static_cast<int>(LaneOffset + Index));
-    });
+    return interp__builtin_ia32_shuffle_generic(
+        S, OpPC, Call, [](unsigned DstIdx, unsigned Control) {
+          unsigned NumElemPerLane = 2;
+          unsigned BitsPerElem = 1;
+          unsigned MaskBits = 8;
+          unsigned IndexMask = 0x1;
+          unsigned Lane = DstIdx / NumElemPerLane;
+          unsigned LaneOffset = Lane * NumElemPerLane;
+          unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
+          unsigned Index = (Control >> BitIndex) & IndexMask;
+          return std::make_pair(0, static_cast<int>(LaneOffset + Index));
+        });
 
   case X86::BI__builtin_ia32_kandqi:
   case X86::BI__builtin_ia32_kandhi:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b30bdf3d24508..25233d4ceff57 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13023,20 +13023,20 @@ bool VectorExprEvaluator::VisitCallExpr(const 
CallExpr *E) {
   case X86::BI__builtin_ia32_vpermilpd512: {
     APValue R;
     if (!evalShuffleGeneric(Info, E, R, [](unsigned DstIdx, unsigned Control) {
-      unsigned NumElemPerLane = 2;
-      unsigned BitsPerElem = 1;
-      unsigned MaskBits = 8;
-      unsigned IndexMask = 0x1;
-      unsigned Lane = DstIdx / NumElemPerLane;
-      unsigned LaneOffset = Lane * NumElemPerLane;
-      unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
-      unsigned Index = (Control >> BitIndex) & IndexMask;
-      return std::make_pair(0, static_cast<int>(LaneOffset + Index));
-    }))
+          unsigned NumElemPerLane = 2;
+          unsigned BitsPerElem = 1;
+          unsigned MaskBits = 8;
+          unsigned IndexMask = 0x1;
+          unsigned Lane = DstIdx / NumElemPerLane;
+          unsigned LaneOffset = Lane * NumElemPerLane;
+          unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
+          unsigned Index = (Control >> BitIndex) & IndexMask;
+          return std::make_pair(0, static_cast<int>(LaneOffset + Index));
+        }))
       return false;
     return Success(R, E);
   }
-  
+
   case X86::BI__builtin_ia32_phminposuw128: {
     APValue Source;
     if (!Evaluate(Source, Info, E->getArg(0)))

>From 87017fc335f207a368125329a01972cdcc6539f3 Mon Sep 17 00:00:00 2001
From: stomfaig <[email protected]>
Date: Tue, 18 Nov 2025 20:52:21 +0000
Subject: [PATCH 5/6] make intrinsics Constexpr

---
 clang/include/clang/Basic/BuiltinsX86.td | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index 69d18679fd6ec..0aa2249c1f8be 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -511,7 +511,7 @@ let Features = "avx", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] in
   def vperm2f128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, 
_Vector<8, int>, _Constant int)">;
 }
 
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] 
in {
+let Features = "avx", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
   def vpermilpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant 
int)">;
   def vpermilps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant 
int)">;
 }
@@ -527,6 +527,8 @@ let Features = "avx", Attributes = [NoThrow, Const, 
Constexpr, RequiredVectorWid
   def vinsertf128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, 
_Vector<2, double>, _Constant int)">;
   def vinsertf128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, 
_Vector<4, float>, _Constant int)">;
   def vinsertf128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, 
_Vector<4, int>, _Constant int)">;
+  def vpermilpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, 
_Constant int)">;
+  def vpermilps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, 
_Constant int)">;
 
   foreach Op = ["hadd", "hsub"] in {
     def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, 
_Vector<4, double>)">;
@@ -535,8 +537,6 @@ let Features = "avx", Attributes = [NoThrow, Const, 
Constexpr, RequiredVectorWid
 }
 
 let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] 
in {
-  def vpermilpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, 
_Constant int)">;
-  def vpermilps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, 
_Constant int)">;
   def sqrtpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>)">;
   def sqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
   def rsqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
@@ -2365,10 +2365,12 @@ let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>
   def vcvttss2si32 : X86Builtin<"int(_Vector<4, float>, _Constant int)">;
   def vcvttss2usi32 : X86Builtin<"unsigned int(_Vector<4, float>, _Constant 
int)">;
 }
-
-let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
   def vpermilpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, 
_Constant int)">;
   def vpermilps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, 
_Constant int)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
   def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, 
_Vector<8, long long int>)">;
   def vpermilvarps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, 
_Vector<16, int>)">;
 }

>From 1a7fd492bbedd9a59b617ef92e9fe7355ced5b65 Mon Sep 17 00:00:00 2001
From: stomfaig <[email protected]>
Date: Tue, 18 Nov 2025 20:52:32 +0000
Subject: [PATCH 6/6] adding tests

---
 clang/test/CodeGen/X86/avx-builtins.c      |  5 ++
 clang/test/CodeGen/X86/avx512f-builtins.c  | 32 +++++++++++++
 clang/test/CodeGen/X86/avx512vl-builtins.c | 53 ++++++++++++++++++++++
 3 files changed, 90 insertions(+)

diff --git a/clang/test/CodeGen/X86/avx-builtins.c 
b/clang/test/CodeGen/X86/avx-builtins.c
index 737febbc7fef6..6bf5df8a30b95 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1398,18 +1398,21 @@ __m128d test_mm_permute_pd(__m128d A) {
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> 
<i32 1, i32 0>
   return _mm_permute_pd(A, 1);
 }
+TEST_CONSTEXPR(match_m128d(_mm_permute_pd(((__m128d){1.0, 2.0}), 1), 2.0, 
1.0));
 
 __m256d test_mm256_permute_pd(__m256d A) {
   // CHECK-LABEL: test_mm256_permute_pd
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> 
<i32 1, i32 0, i32 3, i32 2>
   return _mm256_permute_pd(A, 5);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_permute_pd(((__m256d){1.0f, 2.0f, 3.0f, 
4.0f}), 5), 2.0f, 1.0f, 4.0f, 3.0f));
 
 __m128 test_mm_permute_ps(__m128 A) {
   // CHECK-LABEL: test_mm_permute_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> 
<i32 3, i32 2, i32 1, i32 0>
   return _mm_permute_ps(A, 0x1b);
 }
+TEST_CONSTEXPR(match_m128(_mm_permute_ps(((__m128){1.0, 2.0, 3.0, 4.0}), 
0x1b), 4.0, 3.0, 2.0, 1.0));
 
 // Test case for PR12401
 __m128 test2_mm_permute_ps(__m128 a) {
@@ -1417,12 +1420,14 @@ __m128 test2_mm_permute_ps(__m128 a) {
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> 
<i32 2, i32 1, i32 2, i32 3>
   return _mm_permute_ps(a, 0xe6);
 }
+TEST_CONSTEXPR(match_m128(_mm_permute_ps(((__m128){1.0, 2.0, 3.0, 4.0}), 
0xe6), 3.0, 2.0, 3.0, 4.0));
 
 __m256 test_mm256_permute_ps(__m256 A) {
   // CHECK-LABEL: test_mm256_permute_ps
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <8 x i32> 
<i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   return _mm256_permute_ps(A, 0x1b);
 }
+TEST_CONSTEXPR(match_m256(_mm256_permute_ps(((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 
6.0, 7.0, 8.0}), 0x1b), 4.0, 3.0, 2.0, 1.0, 8.0, 7.0, 6.0, 5.0));
 
 __m256d test_mm256_permute2f128_pd(__m256d A, __m256d B) {
   // CHECK-LABEL: test_mm256_permute2f128_pd
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c 
b/clang/test/CodeGen/X86/avx512f-builtins.c
index 71e700af0069e..5d550530108ca 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -5448,6 +5448,7 @@ __m512d test_mm512_permute_pd(__m512d __X) {
   // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> 
<i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
   return _mm512_permute_pd(__X, 2);
 }
+TEST_CONSTEXPR(match_m512d(_mm512_permute_pd(((__m512d){0.0, 1.0, 2.0, 3.0, 
4.0, 5.0, 6.0, 7.0}), 2), 0.0, 1.0, 2.0, 2.0, 4.0, 4.0, 6.0, 6.0));
 
 __m512d test_mm512_mask_permute_pd(__m512d __W, __mmask8 __U, __m512d __X) {
   // CHECK-LABEL: test_mm512_mask_permute_pd
@@ -5455,6 +5456,13 @@ __m512d test_mm512_mask_permute_pd(__m512d __W, __mmask8 
__U, __m512d __X) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_permute_pd(__W, __U, __X, 2);
 }
+TEST_CONSTEXPR(match_m512d(_mm512_mask_permute_pd(
+  ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+  (__mmask8)0b01010100,
+  ((__m512d){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+  2),
+  0.0, 1.0, 10.0, 3.0, 12.0, 5.0, 14.0, 7.0
+));
 
 __m512d test_mm512_maskz_permute_pd(__mmask8 __U, __m512d __X) {
   // CHECK-LABEL: test_mm512_maskz_permute_pd
@@ -5462,12 +5470,23 @@ __m512d test_mm512_maskz_permute_pd(__mmask8 __U, 
__m512d __X) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_permute_pd(__U, __X, 2);
 }
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_permute_pd(
+  (__mmask8)0b01010100,
+  ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+  2),
+  0.0, 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0
+));
 
 __m512 test_mm512_permute_ps(__m512 __X) {
   // CHECK-LABEL: test_mm512_permute_ps
   // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <16 x 
i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, 
i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
   return _mm512_permute_ps(__X, 2);
 }
+TEST_CONSTEXPR(match_m512(_mm512_permute_ps(
+  ((__m512){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}),
+  2),
+  2, 0, 0, 0, 6, 4, 4, 4, 10, 8, 8, 8, 14, 12, 12, 12
+));
 
 __m512 test_mm512_mask_permute_ps(__m512 __W, __mmask16 __U, __m512 __X) {
   // CHECK-LABEL: test_mm512_mask_permute_ps
@@ -5475,6 +5494,13 @@ __m512 test_mm512_mask_permute_ps(__m512 __W, __mmask16 
__U, __m512 __X) {
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> 
%{{.*}}
   return _mm512_mask_permute_ps(__W, __U, __X, 2);
 }
+TEST_CONSTEXPR(match_m512(_mm512_mask_permute_ps(
+  ((__m512){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}),
+  (__mmask16)0b1010101010101010,
+  ((__m512){16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}),
+  2),
+  0, 16, 2, 16, 4, 20, 6, 20, 8, 24, 10, 24, 12, 28, 14, 28
+));
 
 __m512 test_mm512_maskz_permute_ps(__mmask16 __U, __m512 __X) {
   // CHECK-LABEL: test_mm512_maskz_permute_ps
@@ -5482,6 +5508,12 @@ __m512 test_mm512_maskz_permute_ps(__mmask16 __U, __m512 
__X) {
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> 
%{{.*}}
   return _mm512_maskz_permute_ps(__U, __X, 2);
 }
+TEST_CONSTEXPR(match_m512(_mm512_maskz_permute_ps(
+  (__mmask16)0b1010101010101010,
+  ((__m512){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}),
+  2),
+  0, 0, 0, 0, 0, 4, 0, 4, 0, 8, 0, 8, 0, 12, 0, 12
+));
 
 __m512d test_mm512_permutevar_pd(__m512d __A, __m512i __C) {
   // CHECK-LABEL: test_mm512_permutevar_pd
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c 
b/clang/test/CodeGen/X86/avx512vl-builtins.c
index a7eee79c97539..1312057d3f7a2 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -7951,6 +7951,13 @@ __m128d test_mm_mask_permute_pd(__m128d __W, __mmask8 
__U, __m128d __X) {
   // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm_mask_permute_pd(__W, __U, __X, 1); 
 }
+TEST_CONSTEXPR(match_m128d(_mm_mask_permute_pd(
+  ((__m128d){0.0, 1.0}),
+  (__mmask8)0b10,
+  ((__m128d){2.0, 3.0}),
+  1),
+  0.0, 2.0
+));
 
 __m128d test_mm_maskz_permute_pd(__mmask8 __U, __m128d __X) {
   // CHECK-LABEL: test_mm_maskz_permute_pd
@@ -7958,6 +7965,12 @@ __m128d test_mm_maskz_permute_pd(__mmask8 __U, __m128d 
__X) {
   // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm_maskz_permute_pd(__U, __X, 1); 
 }
+TEST_CONSTEXPR(match_m128d(_mm_maskz_permute_pd(
+  (__mmask8)0b10,
+  ((__m128d){1.0, 2.0}),
+  1),
+  0.0, 1.0
+));
 
 __m256d test_mm256_mask_permute_pd(__m256d __W, __mmask8 __U, __m256d __X) {
   // CHECK-LABEL: test_mm256_mask_permute_pd
@@ -7965,6 +7978,13 @@ __m256d test_mm256_mask_permute_pd(__m256d __W, __mmask8 
__U, __m256d __X) {
   // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_mask_permute_pd(__W, __U, __X, 5); 
 }
+TEST_CONSTEXPR(match_m256d(_mm256_mask_permute_pd(
+  ((__m256d){0.0, 1.0, 2.0, 3.0}),
+  (__mmask8)0b1010,
+  ((__m256d){4.0, 5.0, 6.0, 7.0}),
+  5),
+  0.0, 4.0, 2.0, 6.0
+));
 
 __m256d test_mm256_maskz_permute_pd(__mmask8 __U, __m256d __X) {
   // CHECK-LABEL: test_mm256_maskz_permute_pd
@@ -7972,6 +7992,12 @@ __m256d test_mm256_maskz_permute_pd(__mmask8 __U, 
__m256d __X) {
   // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_maskz_permute_pd(__U, __X, 5); 
 }
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_permute_pd(
+  (__mmask8)0b1010,
+  ((__m256d){4.0, 5.0, 6.0, 7.0}),
+  5),
+  0.0, 4.0, 0.0, 6.0
+));
 
 __m128 test_mm_mask_permute_ps(__m128 __W, __mmask8 __U, __m128 __X) {
   // CHECK-LABEL: test_mm_mask_permute_ps
@@ -7979,6 +8005,13 @@ __m128 test_mm_mask_permute_ps(__m128 __W, __mmask8 __U, 
__m128 __X) {
   // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm_mask_permute_ps(__W, __U, __X, 0x1b); 
 }
+TEST_CONSTEXPR(match_m128(_mm_mask_permute_ps(
+  ((__m128){0.0, 1.0, 2.0, 3.0}),
+  (__mmask8)0b1010,
+  ((__m128){4.0, 5.0, 6.0, 7.0}),
+  0x1b),
+  0, 6.0, 2.0, 4.0
+));
 
 __m128 test_mm_maskz_permute_ps(__mmask8 __U, __m128 __X) {
   // CHECK-LABEL: test_mm_maskz_permute_ps
@@ -7986,6 +8019,13 @@ __m128 test_mm_maskz_permute_ps(__mmask8 __U, __m128 
__X) {
   // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm_maskz_permute_ps(__U, __X, 0x1b); 
 }
+TEST_CONSTEXPR(match_m128(_mm_maskz_permute_ps(
+  (__mmask8)0b1010,
+  ((__m128){4.0, 5.0, 6.0, 7.0}),
+  0x1b),
+  0.0, 6.0, 0.0, 4.0
+));
+
 
 __m256 test_mm256_mask_permute_ps(__m256 __W, __mmask8 __U, __m256 __X) {
   // CHECK-LABEL: test_mm256_mask_permute_ps
@@ -7993,6 +8033,13 @@ __m256 test_mm256_mask_permute_ps(__m256 __W, __mmask8 
__U, __m256 __X) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_mask_permute_ps(__W, __U, __X, 0x1b); 
 }
+TEST_CONSTEXPR(match_m256(_mm256_mask_permute_ps(
+  ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+  (__mmask8)0b10101010,
+  ((__m256){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+  0x1b),
+  0.0, 10.0, 2.0, 8.0, 4.0, 14.0, 6.0, 12.0
+));
 
 __m256 test_mm256_maskz_permute_ps(__mmask8 __U, __m256 __X) {
   // CHECK-LABEL: test_mm256_maskz_permute_ps
@@ -8000,6 +8047,12 @@ __m256 test_mm256_maskz_permute_ps(__mmask8 __U, __m256 
__X) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_maskz_permute_ps(__U, __X, 0x1b); 
 }
+TEST_CONSTEXPR(match_m256(_mm256_maskz_permute_ps(
+  (__mmask8)0b10101010,
+  ((__m256){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+  0x1b),
+  0.0, 10.0, 0.0, 8.0, 0.0, 14.0, 0.0, 12.0
+));
 
 __m128d test_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, 
__m128i __C) {
   // CHECK-LABEL: test_mm_mask_permutevar_pd

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to