Author: Ahmed Nour
Date: 2025-12-03T20:19:54Z
New Revision: 8f6e95ef45d20709f338b0753a362c172a51eff7

URL: 
https://github.com/llvm/llvm-project/commit/8f6e95ef45d20709f338b0753a362c172a51eff7
DIFF: 
https://github.com/llvm/llvm-project/commit/8f6e95ef45d20709f338b0753a362c172a51eff7.diff

LOG: [Clang][X86] Add constexpr support for permute4x64_pd and 
permute4x64_epi64 (#170442)

This PR adds constexpr support for the AVX2 cross-lane permute
intrinsics _mm256_permute4x64_pd and _mm256_permute4x64_epi64

Resolves https://github.com/llvm/llvm-project/issues/169304

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsX86.td
    clang/lib/AST/ByteCode/InterpBuiltin.cpp
    clang/lib/AST/ExprConstant.cpp
    clang/test/CodeGen/X86/avx2-builtins.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index 560f94ff2427e..a4b7215d6334d 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -585,13 +585,14 @@ let Features = "avx2", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] i
   def psadbw256
       : X86Builtin<
             "_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
-  def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant 
int)">;
   def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Vector<4, long long int>, _Constant int)">;
-  def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Constant int)">;
 }
 
-
 let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
+  def permdf256
+      : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
+  def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long "
+                             "int>, _Constant int)">;
   def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
   def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned 
char>, _Vector<32, unsigned char>)">;
   def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned 
short>, _Vector<16, unsigned short>)">;

diff  --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 382273e768919..4a789fe3a6af4 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -5116,6 +5116,16 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
           return std::make_pair(0, static_cast<int>(LaneOffset + Index));
         });
 
+  case X86::BI__builtin_ia32_permdf256:
+  case X86::BI__builtin_ia32_permdi256:
+    return interp__builtin_ia32_shuffle_generic(
+        S, OpPC, Call, [](unsigned DstIdx, unsigned Control) {
+          // permute4x64 operates on 4 64-bit elements
+          // For element i (0-3), extract bits [2*i+1:2*i] from Control
+          unsigned Index = (Control >> (2 * DstIdx)) & 0x3;
+          return std::make_pair(0, static_cast<int>(Index));
+        });
+
   case X86::BI__builtin_ia32_vpmultishiftqb128:
   case X86::BI__builtin_ia32_vpmultishiftqb256:
   case X86::BI__builtin_ia32_vpmultishiftqb512:

diff  --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index c1fb95c084d73..11c5e1c6e90f4 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13269,6 +13269,19 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
     return Success(R, E);
   }
 
+  case X86::BI__builtin_ia32_permdf256:
+  case X86::BI__builtin_ia32_permdi256: {
+    APValue R;
+    if (!evalShuffleGeneric(Info, E, R, [](unsigned DstIdx, unsigned Control) {
+          // permute4x64 operates on 4 64-bit elements
+          // For element i (0-3), extract bits [2*i+1:2*i] from Control
+          unsigned Index = (Control >> (2 * DstIdx)) & 0x3;
+          return std::make_pair(0, static_cast<int>(Index));
+        }))
+      return false;
+    return Success(R, E);
+  }
+
   case X86::BI__builtin_ia32_vpermilvarps:
   case X86::BI__builtin_ia32_vpermilvarps256:
   case X86::BI__builtin_ia32_vpermilvarps512: {

diff  --git a/clang/test/CodeGen/X86/avx2-builtins.c 
b/clang/test/CodeGen/X86/avx2-builtins.c
index d6facfea8962e..c9474e94476fc 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -1111,12 +1111,34 @@ __m256i test_mm256_permute4x64_epi64(__m256i a) {
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <4 x i32> <i32 
3, i32 0, i32 2, i32 0>
   return _mm256_permute4x64_epi64(a, 35);
 }
+// Control value 0x00: [0,0,0,0] -> broadcast element 0
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 
30LL, 20LL, 10LL}), 0x00), 40LL, 40LL, 40LL, 40LL));
+// Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A]
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 
30LL, 20LL, 10LL}), 0x1B), 10LL, 20LL, 30LL, 40LL));
+// Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A]
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 
30LL, 20LL, 10LL}), 0x39), 30LL, 20LL, 10LL, 40LL));
+// Control value 0x12: [2,0,1,0] -> [C,A,B,A]
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 
30LL, 20LL, 10LL}), 0x12), 20LL, 40LL, 30LL, 40LL));
+// Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D]
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 
30LL, 20LL, 10LL}), 0xE4), 40LL, 30LL, 20LL, 10LL));
+// Test with negative values
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){-40LL, 
-30LL, -20LL, -10LL}), 0x1B), -10LL, -20LL, -30LL, -40LL));
 
 __m256d test_mm256_permute4x64_pd(__m256d a) {
   // CHECK-LABEL: test_mm256_permute4x64_pd
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> 
<i32 1, i32 2, i32 1, i32 0>
   return _mm256_permute4x64_pd(a, 25);
 }
+// Control value 0x00: [0,0,0,0] -> broadcast element 0
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 
1.0}), 0x00), 4.0, 4.0, 4.0, 4.0));
+// Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A]
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 
1.0}), 0x1B), 1.0, 2.0, 3.0, 4.0));
+// Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A]
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 
1.0}), 0x39), 3.0, 2.0, 1.0, 4.0));
+// Control value 0x12: [2,0,1,0] -> [C,A,B,A]
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 
1.0}), 0x12), 2.0, 4.0, 3.0, 4.0));
+// Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D]
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 
1.0}), 0xE4), 4.0, 3.0, 2.0, 1.0));
 
 __m256i test_mm256_permutevar8x32_epi32(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_permutevar8x32_epi32


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to