[clang] [X86] Allow AVX2 per-element shift intrinsics to be used in constexpr (PR #154780)

Simon Pilgrim via cfe-commits Thu, 21 Aug 2025 17:32:05 -0700

https://github.com/RKSimon created 
https://github.com/llvm/llvm-project/pull/154780


This handles constant folding for the AVX2 per-element shift intrinsics, which 
handle out of bounds shift amounts (logical result = 0, arithmetic result = 
signbit splat)

AVX512 intrinsics will follow in follow up patches

First stage of #154287

>From 5fb843a4e9e636de92ff01f4b94007599dfea713 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-...@redking.me.uk>
Date: Thu, 21 Aug 2025 16:02:07 +0100
Subject: [PATCH] [X86] Allow AVX2 per-element shift intrinsics to be used in
 constexpr

This handles constant folding for the AVX2 per-element shift intrinsics, which 
handle out of bounds shift amounts (logical result = 0, arithmetic result = 
signbit splat)

AVX512 intrinsics will follow in follow up patches

First stage of #154287
---
 clang/include/clang/Basic/BuiltinsX86.td | 56 ++++++------------------
 clang/lib/AST/ExprConstant.cpp           | 51 +++++++++++++++++++--
 clang/lib/Headers/avx2intrin.h           | 20 ++++-----
 clang/test/CodeGen/X86/avx2-builtins.c   | 22 +++++-----
 4 files changed, 82 insertions(+), 67 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index cc1da937455a2..527acd9ef086e 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -627,11 +627,23 @@ let Features = "avx2", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] i
 let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
   def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, 
_Vector<8, int>)">;
   def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, 
_Vector<8, int>)">;
-}
 
-let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
   def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, 
unsigned short>, _Vector<16, unsigned short>)">;
   def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, 
_Vector<16, short>)">;
+
+  def psllv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, 
int>)">;
+  def psrav8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, 
int>)">;
+  def psrlv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, 
int>)">;
+  def psllv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Vector<4, long long int>)">;
+  def psrlv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
+  def psllv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, 
int>)">;
+  def psrav4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, 
int>)">;
+  def psrlv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, 
int>)">;
+  def psllv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long 
int>, _Vector<2, long long int>)">;
+  def psrlv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long 
int>, _Vector<2, long long int>)">;
 }
 
 let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
@@ -654,46 +666,6 @@ let Features = "avx2", Attributes = [NoThrow, 
RequiredVectorWidth<128>] in {
   def maskstoreq : X86Builtin<"void(_Vector<2, long long int *>, _Vector<2, 
long long int>, _Vector<2, long long int>)">;
 }
 
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] 
in {
-  def psllv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, 
int>)">;
-}
-
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] 
in {
-  def psllv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, 
int>)">;
-}
-
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] 
in {
-  def psllv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Vector<4, long long int>)">;
-}
-
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] 
in {
-  def psllv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long 
int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] 
in {
-  def psrav8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, 
int>)">;
-}
-
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] 
in {
-  def psrav4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, 
int>)">;
-}
-
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] 
in {
-  def psrlv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, 
int>)">;
-}
-
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] 
in {
-  def psrlv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, 
int>)">;
-}
-
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] 
in {
-  def psrlv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Vector<4, long long int>)">;
-}
-
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] 
in {
-  def psrlv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long 
int>, _Vector<2, long long int>)">;
-}
-
 let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
   def gatherd_pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, double 
const *, _Vector<4, int>, _Vector<2, double>, _Constant char)">;
 }
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index a03e64fcffde2..9b934753bcc3c 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11669,13 +11669,24 @@ bool VectorExprEvaluator::VisitCallExpr(const 
CallExpr *E) {
   case clang::X86::BI__builtin_ia32_pmulhuw512:
   case clang::X86::BI__builtin_ia32_pmulhw128:
   case clang::X86::BI__builtin_ia32_pmulhw256:
-  case clang::X86::BI__builtin_ia32_pmulhw512: {
+  case clang::X86::BI__builtin_ia32_pmulhw512:
+  case clang::X86::BI__builtin_ia32_psllv2di:
+  case clang::X86::BI__builtin_ia32_psllv4di:
+  case clang::X86::BI__builtin_ia32_psllv4si:
+  case clang::X86::BI__builtin_ia32_psllv8si:
+  case clang::X86::BI__builtin_ia32_psrav4si:
+  case clang::X86::BI__builtin_ia32_psrav8si:
+  case clang::X86::BI__builtin_ia32_psrlv2di:
+  case clang::X86::BI__builtin_ia32_psrlv4di:
+  case clang::X86::BI__builtin_ia32_psrlv4si:
+  case clang::X86::BI__builtin_ia32_psrlv8si:{
     APValue SourceLHS, SourceRHS;
     if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
         !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
       return false;
 
     QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+    bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
     unsigned SourceLen = SourceLHS.getVectorLength();
     SmallVector<APValue, 4> ResultElements;
     ResultElements.reserve(SourceLen);
@@ -11687,12 +11698,12 @@ bool VectorExprEvaluator::VisitCallExpr(const 
CallExpr *E) {
       case Builtin::BI__builtin_elementwise_add_sat:
         ResultElements.push_back(APValue(
             APSInt(LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS),
-                   DestEltTy->isUnsignedIntegerOrEnumerationType())));
+                   DestUnsigned)));
         break;
       case Builtin::BI__builtin_elementwise_sub_sat:
         ResultElements.push_back(APValue(
             APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS),
-                   DestEltTy->isUnsignedIntegerOrEnumerationType())));
+                   DestUnsigned)));
         break;
       case clang::X86::BI__builtin_ia32_pmulhuw128:
       case clang::X86::BI__builtin_ia32_pmulhuw256:
@@ -11706,6 +11717,40 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
         ResultElements.push_back(APValue(APSInt(llvm::APIntOps::mulhs(LHS, 
RHS),
                                                 /*isUnsigned=*/false)));
         break;
+      case clang::X86::BI__builtin_ia32_psllv2di:
+      case clang::X86::BI__builtin_ia32_psllv4di:
+      case clang::X86::BI__builtin_ia32_psllv4si:
+      case clang::X86::BI__builtin_ia32_psllv8si:
+        if (RHS.uge(RHS.getBitWidth())) {
+          ResultElements.push_back(
+              APValue(APSInt(APInt::getZero(RHS.getBitWidth()), 
DestUnsigned)));
+          break;
+        }
+        ResultElements.push_back(
+            APValue(APSInt(LHS.shl(RHS.getZExtValue()), DestUnsigned)));
+        break;
+      case clang::X86::BI__builtin_ia32_psrav4si:
+      case clang::X86::BI__builtin_ia32_psrav8si:
+        if (RHS.uge(RHS.getBitWidth())) {
+          ResultElements.push_back(
+              APValue(APSInt(LHS.ashr(RHS.getBitWidth() - 1), DestUnsigned)));
+          break;
+        }
+        ResultElements.push_back(
+            APValue(APSInt(LHS.ashr(RHS.getZExtValue()), DestUnsigned)));
+        break;
+      case clang::X86::BI__builtin_ia32_psrlv2di:
+      case clang::X86::BI__builtin_ia32_psrlv4di:
+      case clang::X86::BI__builtin_ia32_psrlv4si:
+      case clang::X86::BI__builtin_ia32_psrlv8si:
+        if (RHS.uge(RHS.getBitWidth())) {
+          ResultElements.push_back(
+              APValue(APSInt(APInt::getZero(RHS.getBitWidth()), 
DestUnsigned)));
+          break;
+        }
+        ResultElements.push_back(
+            APValue(APSInt(LHS.lshr(RHS.getZExtValue()), DestUnsigned)));
+        break;
       }
     }
 
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index c7e1c4446e85d..ce5b2b7544d8c 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -3721,7 +3721,7 @@ _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i 
__Y)
 ///    A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_sllv_epi32(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y);
@@ -3743,7 +3743,7 @@ _mm256_sllv_epi32(__m256i __X, __m256i __Y)
 ///    A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 128-bit vector of [4 x i32] containing the result.
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_sllv_epi32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y);
@@ -3765,7 +3765,7 @@ _mm_sllv_epi32(__m128i __X, __m128i __Y)
 ///    A 256-bit vector of [4 x i64] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_sllv_epi64(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y);
@@ -3787,7 +3787,7 @@ _mm256_sllv_epi64(__m256i __X, __m256i __Y)
 ///    A 128-bit vector of [2 x i64] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 128-bit vector of [2 x i64] containing the result.
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_sllv_epi64(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y);
@@ -3810,7 +3810,7 @@ _mm_sllv_epi64(__m128i __X, __m128i __Y)
 ///    A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_srav_epi32(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y);
@@ -3833,7 +3833,7 @@ _mm256_srav_epi32(__m256i __X, __m256i __Y)
 ///    A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 128-bit vector of [4 x i32] containing the result.
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_srav_epi32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y);
@@ -3855,7 +3855,7 @@ _mm_srav_epi32(__m128i __X, __m128i __Y)
 ///    A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_srlv_epi32(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y);
@@ -3877,7 +3877,7 @@ _mm256_srlv_epi32(__m256i __X, __m256i __Y)
 ///    A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 128-bit vector of [4 x i32] containing the result.
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_srlv_epi32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y);
@@ -3899,7 +3899,7 @@ _mm_srlv_epi32(__m128i __X, __m128i __Y)
 ///    A 256-bit vector of [4 x i64] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_srlv_epi64(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y);
@@ -3921,7 +3921,7 @@ _mm256_srlv_epi64(__m256i __X, __m256i __Y)
 ///    A 128-bit vector of [2 x i64] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 128-bit vector of [2 x i64] containing the result.
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_srlv_epi64(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y);
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c 
b/clang/test/CodeGen/X86/avx2-builtins.c
index 5b252fa315ef8..29cb3e8860be9 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -327,7 +327,6 @@ __m256i test_mm256_cvtepi8_epi16(__m128i a) {
   // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
   return _mm256_cvtepi8_epi16(a);
 }
-
 TEST_CONSTEXPR(match_v16hi(_mm256_cvtepi8_epi16(_mm_setr_epi8(-3, 2, -1, 0, 1, 
-2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -3, 2, -1, 0, 1, -2, 3, -4, 5, -6, 
7, -8, 9, -10, 11, -12));
 
 __m256i test_mm256_cvtepi8_epi32(__m128i a) {
@@ -336,7 +335,6 @@ __m256i test_mm256_cvtepi8_epi32(__m128i a) {
   // CHECK: sext <8 x i8> %{{.*}} to <8 x i32>
   return _mm256_cvtepi8_epi32(a);
 }
-
 TEST_CONSTEXPR(match_v8si(_mm256_cvtepi8_epi32(_mm_setr_epi8(-3, 2, -1, 0, 1, 
-2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -3, 2, -1, 0, 1, -2, 3, -4));
 
 __m256i test_mm256_cvtepi8_epi64(__m128i a) {
@@ -345,7 +343,6 @@ __m256i test_mm256_cvtepi8_epi64(__m128i a) {
   // CHECK: sext <4 x i8> %{{.*}} to <4 x i64>
   return _mm256_cvtepi8_epi64(a);
 }
-
 TEST_CONSTEXPR(match_v4di(_mm256_cvtepi8_epi64(_mm_setr_epi8(-3, 2, -1, 0, 1, 
-2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), -3, 2, -1, 0));
 
 __m256i test_mm256_cvtepi16_epi32(__m128i a) {
@@ -353,7 +350,6 @@ __m256i test_mm256_cvtepi16_epi32(__m128i a) {
   // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
   return _mm256_cvtepi16_epi32(a);
 }
-
 TEST_CONSTEXPR(match_v8si(_mm256_cvtepi16_epi32(_mm_setr_epi16(-300, 2, -1, 0, 
1, -2, 3, -4)), -300, 2, -1, 0, 1, -2, 3, -4));
 
 __m256i test_mm256_cvtepi16_epi64(__m128i a) {
@@ -362,7 +358,6 @@ __m256i test_mm256_cvtepi16_epi64(__m128i a) {
   // CHECK: sext <4 x i16> %{{.*}} to <4 x i64>
   return _mm256_cvtepi16_epi64(a);
 }
-
 TEST_CONSTEXPR(match_v4di(_mm256_cvtepi16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 
1, -2, 3, -4)), -300, 2, -1, 0));
 
 __m256i test_mm256_cvtepi32_epi64(__m128i a) {
@@ -370,7 +365,6 @@ __m256i test_mm256_cvtepi32_epi64(__m128i a) {
   // CHECK: sext <4 x i32> %{{.*}} to <4 x i64>
   return _mm256_cvtepi32_epi64(a);
 }
-
 TEST_CONSTEXPR(match_v4di(_mm256_cvtepi32_epi64(_mm_setr_epi32(-70000, 2, -1, 
0)), -70000, 2, -1, 0));
 
 __m256i test_mm256_cvtepu8_epi16(__m128i a) {
@@ -378,7 +372,6 @@ __m256i test_mm256_cvtepu8_epi16(__m128i a) {
   // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
   return _mm256_cvtepu8_epi16(a);
 }
-
 TEST_CONSTEXPR(match_v16hi(_mm256_cvtepu8_epi16(_mm_setr_epi8(-3, 2, -1, 0, 1, 
-2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 253, 2, 255, 0, 1, 254, 3, 252, 5, 
250, 7, 248, 9, 246, 11, 244));
 
 __m256i test_mm256_cvtepu8_epi32(__m128i a) {
@@ -387,7 +380,6 @@ __m256i test_mm256_cvtepu8_epi32(__m128i a) {
   // CHECK: zext <8 x i8> %{{.*}} to <8 x i32>
   return _mm256_cvtepu8_epi32(a);
 }
-
 TEST_CONSTEXPR(match_v8si(_mm256_cvtepu8_epi32(_mm_setr_epi8(-3, 2, -1, 0, 1, 
-2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 253, 2, 255, 0, 1, 254, 3, 252));
 
 __m256i test_mm256_cvtepu8_epi64(__m128i a) {
@@ -396,7 +388,6 @@ __m256i test_mm256_cvtepu8_epi64(__m128i a) {
   // CHECK: zext <4 x i8> %{{.*}} to <4 x i64>
   return _mm256_cvtepu8_epi64(a);
 }
-
 TEST_CONSTEXPR(match_v4di(_mm256_cvtepu8_epi64(_mm_setr_epi8(-3, 2, -1, 0, 1, 
-2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 253, 2, 255, 0));
 
 __m256i test_mm256_cvtepu16_epi32(__m128i a) {
@@ -404,7 +395,6 @@ __m256i test_mm256_cvtepu16_epi32(__m128i a) {
   // CHECK: zext <8 x i16> {{.*}} to <8 x i32>
   return _mm256_cvtepu16_epi32(a);
 }
-
 TEST_CONSTEXPR(match_v8si(_mm256_cvtepu16_epi32(_mm_setr_epi16(-300, 2, -1, 0, 
1, -2, 3, -4)), 65236, 2, 65535, 0, 1, 65534, 3, 65532));
 
 __m256i test_mm256_cvtepu16_epi64(__m128i a) {
@@ -413,7 +403,6 @@ __m256i test_mm256_cvtepu16_epi64(__m128i a) {
   // CHECK: zext <4 x i16> %{{.*}} to <4 x i64>
   return _mm256_cvtepu16_epi64(a);
 }
-
 TEST_CONSTEXPR(match_v4di(_mm256_cvtepu16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 
1, -2, 3, -4)), 65236, 2, 65535, 0));
 
 __m256i test_mm256_cvtepu32_epi64(__m128i a) {
@@ -421,7 +410,6 @@ __m256i test_mm256_cvtepu32_epi64(__m128i a) {
   // CHECK: zext <4 x i32> %{{.*}} to <4 x i64>
   return _mm256_cvtepu32_epi64(a);
 }
-
 TEST_CONSTEXPR(match_v4di(_mm256_cvtepu32_epi64(_mm_setr_epi32(-70000, 2, -1, 
0)), 4294897296, 2, 4294967295, 0));
 
 __m128i test0_mm256_extracti128_si256_0(__m256i a) {
@@ -1120,24 +1108,28 @@ __m128i test_mm_sllv_epi32(__m128i a, __m128i b) {
   // CHECK: call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %{{.*}}, <4 x i32> 
%{{.*}})
   return _mm_sllv_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v4si(_mm_sllv_epi32((__m128i)(__v4si){1, -2, 3, -4}, 
(__m128i)(__v4si){1, 2, 3, -4}), 2, -8, 24, 0));
 
 __m256i test_mm256_sllv_epi32(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_sllv_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %{{.*}}, <8 x 
i32> %{{.*}})
   return _mm256_sllv_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_sllv_epi32((__m256i)(__v8si){1, -2, 3, -4, 5, 
-6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 2, -8, 24, -64, 0, 
0, 0, 0));
 
 __m128i test_mm_sllv_epi64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_sllv_epi64
   // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %{{.*}}, <2 
x i64> %{{.*}})
   return _mm_sllv_epi64(a, b);
 }
+TEST_CONSTEXPR(match_m128i(_mm_sllv_epi64((__m128i)(__v2di){1, -3}, 
(__m128i)(__v2di){8, 63}), 256, 0x8000000000000000ULL));
 
 __m256i test_mm256_sllv_epi64(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_sllv_epi64
   // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %{{.*}}, 
<4 x i64> %{{.*}})
   return _mm256_sllv_epi64(a, b);
 }
+TEST_CONSTEXPR(match_m256i(_mm256_sllv_epi64((__m256i)(__v4di){1, -2, 3, -4}, 
(__m256i)(__v4di){1, 2, 3, -4}), 2, -8, 24, 0));
 
 __m256i test_mm256_sra_epi16(__m256i a, __m128i b) {
   // CHECK-LABEL: test_mm256_sra_epi16
@@ -1180,12 +1172,14 @@ __m128i test_mm_srav_epi32(__m128i a, __m128i b) {
   // CHECK: call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %{{.*}}, <4 x i32> 
%{{.*}})
   return _mm_srav_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v4si(_mm_srav_epi32((__m128i)(__v4si){1, -2, 3, -4}, 
(__m128i)(__v4si){1, 2, 3, -4}), 0, -1, 0, -1));
 
 __m256i test_mm256_srav_epi32(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_srav_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %{{.*}}, <8 x 
i32> %{{.*}})
   return _mm256_srav_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_srav_epi32((__m256i)(__v8si){1, -2, 3, -4, 5, 
-6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 0, -1, 0, -1, 0, 
-1, 0, -1));
 
 __m256i test_mm256_srl_epi16(__m256i a, __m128i b) {
   // CHECK-LABEL: test_mm256_srl_epi16
@@ -1252,24 +1246,28 @@ __m128i test_mm_srlv_epi32(__m128i a, __m128i b) {
   // CHECK: call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %{{.*}}, <4 x i32> 
%{{.*}})
   return _mm_srlv_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v4si(_mm_srlv_epi32((__m128i)(__v4si){1, -2, 3, -4}, 
(__m128i)(__v4si){1, 2, 3, -4}), 0, 1073741823, 0, 0));
 
 __m256i test_mm256_srlv_epi32(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_srlv_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %{{.*}}, <8 x 
i32> %{{.*}})
   return _mm256_srlv_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_srlv_epi32((__m256i)(__v8si){1, -2, 3, -4, 5, 
-6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 0, 1073741823, 0, 
268435455, 0, 1, 0, 7));
 
 __m128i test_mm_srlv_epi64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_srlv_epi64
   // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %{{.*}}, <2 
x i64> %{{.*}})
   return _mm_srlv_epi64(a, b);
 }
+TEST_CONSTEXPR(match_m128i(_mm_srlv_epi64((__m128i)(__v2di){1, -3}, 
(__m128i)(__v2di){8, 63}), 0, 1));
 
 __m256i test_mm256_srlv_epi64(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_srlv_epi64
   // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %{{.*}}, 
<4 x i64> %{{.*}})
   return _mm256_srlv_epi64(a, b);
 }
+TEST_CONSTEXPR(match_m256i(_mm256_srlv_epi64((__m256i)(__v4di){1, -2, 3, -4}, 
(__m256i)(__v4di){1, 2, 3, -4}), 0, 0x3FFFFFFFFFFFFFFFULL, 0, 0));
 
 __m256i test_mm256_stream_load_si256(__m256i const *a) {
   // CHECK-LABEL: test_mm256_stream_load_si256

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [X86] Allow AVX2 per-element shift intrinsics to be used in constexpr (PR #154780)

Reply via email to