[clang] [Clang][x86]: allow PCLMULQDQ intrinsics to be used in constexpr (PR #169214)

Ahmed Nour via cfe-commits Sun, 23 Nov 2025 06:24:46 -0800

https://github.com/ahmednoursphinx updated 
https://github.com/llvm/llvm-project/pull/169214


>From 60fac68bef81335aa12e2faa7e364bb647a51872 Mon Sep 17 00:00:00 2001
From: ahmed <[email protected]>
Date: Sun, 23 Nov 2025 14:21:51 +0200
Subject: [PATCH 1/6] [Clang][x86]:  allow PCLMULQDQ intrinsics to be used in
 constexpr

---
 clang/include/clang/Basic/BuiltinsX86.td     |  6 +-
 clang/lib/AST/ByteCode/InterpBuiltin.cpp     | 72 ++++++++++++++++++++
 clang/lib/AST/ExprConstant.cpp               | 63 +++++++++++++++++
 clang/test/CodeGen/X86/pclmul-builtins.c     | 18 ++++-
 clang/test/CodeGen/X86/vpclmulqdq-builtins.c | 13 ++++
 5 files changed, 168 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index f6069fdc5707a..1eee50a441e31 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -444,15 +444,15 @@ let Features = "avx512f,gfni", Attributes = [NoThrow, 
Const, RequiredVectorWidth
   def vgf2p8mulb_v64qi : X86Builtin<"_Vector<64, char>(_Vector<64, char>, 
_Vector<64, char>)">;
 }
 
-let Features = "pclmul", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] in {
+let Features = "pclmul", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
   def pclmulqdq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long 
long int>, _Vector<2, long long int>, _Constant char)">;
 }
 
-let Features = "vpclmulqdq", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] in {
+let Features = "vpclmulqdq", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
   def pclmulqdq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long 
long int>, _Vector<4, long long int>, _Constant char)">;
 }
 
-let Features = "avx512f,vpclmulqdq", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
+let Features = "avx512f,vpclmulqdq", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
   def pclmulqdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long 
long int>, _Vector<8, long long int>, _Constant char)">;
 }
 
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 83e40f64fd979..ef740c04c83da 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2745,6 +2745,73 @@ static bool interp__builtin_ia32_addsub(InterpState &S, 
CodePtr OpPC,
   return true;
 }
 
+static bool interp__builtin_ia32_pclmulqdq(InterpState &S, CodePtr OpPC,
+                                            const CallExpr *Call) {
+  // PCLMULQDQ: carry-less multiplication of selected 64-bit halves
+  // imm8 bit 0: selects lower (0) or upper (1) 64 bits of first operand
+  // imm8 bit 4: selects lower (0) or upper (1) 64 bits of second operand
+  assert(Call->getArg(0)->getType()->isVectorType() &&
+         Call->getArg(1)->getType()->isVectorType());
+  
+  // Extract imm8 argument
+  APSInt Imm8 = popToAPSInt(S, Call->getArg(2));
+  unsigned Imm8Val = static_cast<unsigned>(Imm8.getZExtValue());
+  bool SelectUpperA = (Imm8Val & 0x01) != 0;
+  bool SelectUpperB = (Imm8Val & 0x10) != 0;
+
+  const Pointer &RHS = S.Stk.pop<Pointer>();
+  const Pointer &LHS = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+  PrimType ElemT = *S.getContext().classify(VT->getElementType());
+  unsigned NumElems = VT->getNumElements();
+  const auto *DestVT = Call->getType()->castAs<VectorType>();
+  PrimType DestElemT = *S.getContext().classify(DestVT->getElementType());
+  bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
+
+  // Process each 128-bit lane (2 elements at a time)
+  for (unsigned Lane = 0; Lane < NumElems; Lane += 2) {
+    APSInt A0, A1, B0, B1;
+    INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+      A0 = LHS.elem<T>(Lane + 0).toAPSInt();
+      A1 = LHS.elem<T>(Lane + 1).toAPSInt();
+      B0 = RHS.elem<T>(Lane + 0).toAPSInt();
+      B1 = RHS.elem<T>(Lane + 1).toAPSInt();
+    });
+
+    // Select the appropriate 64-bit values based on imm8
+    APSInt A = SelectUpperA ? A1 : A0;
+    APSInt B = SelectUpperB ? B1 : B0;
+
+    // Perform carry-less multiplication (polynomial multiplication in 
GF(2^64))
+    // This multiplies two 64-bit values to produce a 128-bit result
+    APInt AVal = A.getValue().zextOrTrunc(64);
+    APInt BVal = B.getValue().zextOrTrunc(64);
+    APInt Result(128, 0);
+
+    // For each bit in A, if set, XOR B shifted left by that bit position
+    for (unsigned i = 0; i < 64; ++i) {
+      if (AVal[i]) {
+        APInt ShiftedB = BVal.zext(128) << i;
+        Result ^= ShiftedB;
+      }
+    }
+
+    // Split the 128-bit result into two 64-bit halves
+    APSInt ResultLow(Result.extractBits(64, 0), DestUnsigned);
+    APSInt ResultHigh(Result.extractBits(64, 64), DestUnsigned);
+
+    INT_TYPE_SWITCH_NO_BOOL(DestElemT, {
+      Dst.elem<T>(Lane + 0) = static_cast<T>(ResultLow);
+      Dst.elem<T>(Lane + 1) = static_cast<T>(ResultHigh);
+    });
+  }
+
+  Dst.initializeAllElements();
+  return true;
+}
+
 static bool interp__builtin_elementwise_triop_fp(
     InterpState &S, CodePtr OpPC, const CallExpr *Call,
     llvm::function_ref<APFloat(const APFloat &, const APFloat &,
@@ -4366,6 +4433,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
           return llvm::APIntOps::muluExtended(LoLHS, LoRHS);
         });
 
+  case clang::X86::BI__builtin_ia32_pclmulqdq128:
+  case clang::X86::BI__builtin_ia32_pclmulqdq256:
+  case clang::X86::BI__builtin_ia32_pclmulqdq512:
+    return interp__builtin_ia32_pclmulqdq(S, OpPC, Call);
+
   case Builtin::BI__builtin_elementwise_fma:
     return interp__builtin_elementwise_triop_fp(
         S, OpPC, Call,
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3b91678f7d400..ea4a7c320a3f2 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13483,6 +13483,69 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
     }
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
+  case clang::X86::BI__builtin_ia32_pclmulqdq128:
+  case clang::X86::BI__builtin_ia32_pclmulqdq256:
+  case clang::X86::BI__builtin_ia32_pclmulqdq512: {
+    // PCLMULQDQ: carry-less multiplication of selected 64-bit halves
+    // imm8 bit 0: selects lower (0) or upper (1) 64 bits of first operand
+    // imm8 bit 4: selects lower (0) or upper (1) 64 bits of second operand
+    APValue SourceLHS, SourceRHS;
+    if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+        !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+      return false;
+
+    APSInt Imm8;
+    if (!EvaluateInteger(E->getArg(2), Imm8, Info))
+      return false;
+
+    // Extract bits 0 and 4 from imm8
+    unsigned Imm8Val = static_cast<unsigned>(Imm8.getZExtValue());
+    bool SelectUpperA = (Imm8Val & 0x01) != 0;
+    bool SelectUpperB = (Imm8Val & 0x10) != 0;
+
+    unsigned NumElems = SourceLHS.getVectorLength();
+    SmallVector<APValue, 8> ResultElements;
+    ResultElements.reserve(NumElems);
+    QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+    bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+
+    // Process each 128-bit lane
+    for (unsigned Lane = 0; Lane < NumElems; Lane += 2) {
+      // Get the two 64-bit halves of the first operand
+      APSInt A0 = SourceLHS.getVectorElt(Lane + 0).getInt();
+      APSInt A1 = SourceLHS.getVectorElt(Lane + 1).getInt();
+      // Get the two 64-bit halves of the second operand
+      APSInt B0 = SourceRHS.getVectorElt(Lane + 0).getInt();
+      APSInt B1 = SourceRHS.getVectorElt(Lane + 1).getInt();
+
+      // Select the appropriate 64-bit values based on imm8
+      APSInt A = SelectUpperA ? A1 : A0;
+      APSInt B = SelectUpperB ? B1 : B0;
+
+      // Perform carry-less multiplication (polynomial multiplication in 
GF(2^64))
+      // This multiplies two 64-bit values to produce a 128-bit result
+      APInt AVal = A.getValue().zextOrTrunc(64);
+      APInt BVal = B.getValue().zextOrTrunc(64);
+      APInt Result(128, 0);
+
+      // For each bit in A, if set, XOR B shifted left by that bit position
+      for (unsigned i = 0; i < 64; ++i) {
+        if (AVal[i]) {
+          APInt ShiftedB = BVal.zext(128) << i;
+          Result ^= ShiftedB;
+        }
+      }
+
+      // Split the 128-bit result into two 64-bit halves
+      APSInt ResultLow(Result.extractBits(64, 0), DestUnsigned);
+      APSInt ResultHigh(Result.extractBits(64, 64), DestUnsigned);
+
+      ResultElements.push_back(APValue(ResultLow));
+      ResultElements.push_back(APValue(ResultHigh));
+    }
+
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
   case Builtin::BI__builtin_elementwise_fshl:
   case Builtin::BI__builtin_elementwise_fshr: {
     APValue SourceHi, SourceLo, SourceShift;
diff --git a/clang/test/CodeGen/X86/pclmul-builtins.c 
b/clang/test/CodeGen/X86/pclmul-builtins.c
index 44300f645a9d0..b1e3cc5719d97 100644
--- a/clang/test/CodeGen/X86/pclmul-builtins.c
+++ b/clang/test/CodeGen/X86/pclmul-builtins.c
@@ -1,9 +1,25 @@
 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin 
-target-feature +pclmul -emit-llvm -o - | FileCheck %s
-
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin 
-target-feature +pclmul -emit-llvm -o - -std=c++11 | FileCheck %s
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin 
-target-feature +pclmul -emit-llvm -o - -std=c++11 
-fexperimental-new-constant-interpreter | FileCheck %s
 
 #include <wmmintrin.h>
+#include "builtin_test_helpers.h"
 
 __m128i test_mm_clmulepi64_si128(__m128i a, __m128i b) {
   // CHECK: @llvm.x86.pclmulqdq
   return _mm_clmulepi64_si128(a, b, 0);
 }
+
+// Test constexpr evaluation for _mm_clmulepi64_si128
+// imm8=0x00: lower 64 bits of both operands
+// Test case: 0x1 * 0x3 = 0x3 (carry-less multiplication)
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128((__m128i){0x1ULL, 0x0ULL}, 
(__m128i){0x3ULL, 0x0ULL}, 0x00), 0x3ULL, 0x0ULL));
+
+// imm8=0x01: upper 64 bits of first operand, lower 64 bits of second
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128((__m128i){0x0ULL, 0x1ULL}, 
(__m128i){0x3ULL, 0x0ULL}, 0x01), 0x3ULL, 0x0ULL));
+
+// imm8=0x10: lower 64 bits of first operand, upper 64 bits of second
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128((__m128i){0x1ULL, 0x0ULL}, 
(__m128i){0x0ULL, 0x3ULL}, 0x10), 0x3ULL, 0x0ULL));
+
+// imm8=0x11: upper 64 bits of both operands
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128((__m128i){0x0ULL, 0x1ULL}, 
(__m128i){0x0ULL, 0x3ULL}, 0x11), 0x3ULL, 0x0ULL));
diff --git a/clang/test/CodeGen/X86/vpclmulqdq-builtins.c 
b/clang/test/CodeGen/X86/vpclmulqdq-builtins.c
index aa2b8bca91268..e408e0556e380 100644
--- a/clang/test/CodeGen/X86/vpclmulqdq-builtins.c
+++ b/clang/test/CodeGen/X86/vpclmulqdq-builtins.c
@@ -1,17 +1,30 @@
 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +vpclmulqdq -emit-llvm -o - | 
FileCheck %s --check-prefix AVX
 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +vpclmulqdq -target-feature 
+avx512f -emit-llvm -o - | FileCheck %s --check-prefixes AVX,AVX512
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +vpclmulqdq -emit-llvm -o - 
-std=c++11 | FileCheck %s --check-prefix AVX
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +vpclmulqdq -target-feature 
+avx512f -emit-llvm -o - -std=c++11 | FileCheck %s --check-prefixes AVX,AVX512
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +vpclmulqdq -emit-llvm -o - 
-std=c++11 -fexperimental-new-constant-interpreter | FileCheck %s 
--check-prefix AVX
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +vpclmulqdq -target-feature 
+avx512f -emit-llvm -o - -std=c++11 -fexperimental-new-constant-interpreter | 
FileCheck %s --check-prefixes AVX,AVX512
 
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 __m256i test_mm256_clmulepi64_epi128(__m256i A, __m256i B) {
   // AVX: @llvm.x86.pclmulqdq.256
   return _mm256_clmulepi64_epi128(A, B, 0);
 }
 
+// Test constexpr evaluation for _mm256_clmulepi64_epi128
+// Each 128-bit lane is processed independently
+TEST_CONSTEXPR(match_m256i(_mm256_clmulepi64_epi128((__m256i){0x1ULL, 0x0ULL, 
0x2ULL, 0x0ULL}, (__m256i){0x3ULL, 0x0ULL, 0x5ULL, 0x0ULL}, 0x00), 0x3ULL, 
0x0ULL, 0xaULL, 0x0ULL));
+
 #ifdef __AVX512F__
 __m512i test_mm512_clmulepi64_epi128(__m512i A, __m512i B) {
   // AVX512: @llvm.x86.pclmulqdq.512
   return _mm512_clmulepi64_epi128(A, B, 0);
 }
+
+// Test constexpr evaluation for _mm512_clmulepi64_epi128
+// Each 128-bit lane is processed independently
+TEST_CONSTEXPR(match_m512i(_mm512_clmulepi64_epi128((__m512i){0x1ULL, 0x0ULL, 
0x2ULL, 0x0ULL, 0x4ULL, 0x0ULL, 0x8ULL, 0x0ULL}, (__m512i){0x3ULL, 0x0ULL, 
0x5ULL, 0x0ULL, 0x7ULL, 0x0ULL, 0x9ULL, 0x0ULL}, 0x00), 0x3ULL, 0x0ULL, 0xaULL, 
0x0ULL, 0x1cULL, 0x0ULL, 0x48ULL, 0x0ULL));
 #endif
 

>From 12b605b944716fbe9fa9ad8075fdf2672098d7af Mon Sep 17 00:00:00 2001
From: ahmed <[email protected]>
Date: Sun, 23 Nov 2025 14:28:48 +0200
Subject: [PATCH 2/6] chore: Format files

---
 clang/include/clang/Basic/BuiltinsX86.td | 9 ++++++---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 ++--
 clang/lib/AST/ExprConstant.cpp           | 4 ++--
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index 1eee50a441e31..ecc05974adecb 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -444,15 +444,18 @@ let Features = "avx512f,gfni", Attributes = [NoThrow, 
Const, RequiredVectorWidth
   def vgf2p8mulb_v64qi : X86Builtin<"_Vector<64, char>(_Vector<64, char>, 
_Vector<64, char>)">;
 }
 
-let Features = "pclmul", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
+let Features = "pclmul",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def pclmulqdq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long 
long int>, _Vector<2, long long int>, _Constant char)">;
 }
 
-let Features = "vpclmulqdq", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
+let Features = "vpclmulqdq",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def pclmulqdq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long 
long int>, _Vector<4, long long int>, _Constant char)">;
 }
 
-let Features = "avx512f,vpclmulqdq", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
+let Features = "avx512f,vpclmulqdq",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def pclmulqdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long 
long int>, _Vector<8, long long int>, _Constant char)">;
 }
 
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index ef740c04c83da..83a61f496a3ec 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2746,13 +2746,13 @@ static bool interp__builtin_ia32_addsub(InterpState &S, 
CodePtr OpPC,
 }
 
 static bool interp__builtin_ia32_pclmulqdq(InterpState &S, CodePtr OpPC,
-                                            const CallExpr *Call) {
+                                           const CallExpr *Call) {
   // PCLMULQDQ: carry-less multiplication of selected 64-bit halves
   // imm8 bit 0: selects lower (0) or upper (1) 64 bits of first operand
   // imm8 bit 4: selects lower (0) or upper (1) 64 bits of second operand
   assert(Call->getArg(0)->getType()->isVectorType() &&
          Call->getArg(1)->getType()->isVectorType());
-  
+
   // Extract imm8 argument
   APSInt Imm8 = popToAPSInt(S, Call->getArg(2));
   unsigned Imm8Val = static_cast<unsigned>(Imm8.getZExtValue());
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ea4a7c320a3f2..fbd3701c784d3 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13522,8 +13522,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
       APSInt A = SelectUpperA ? A1 : A0;
       APSInt B = SelectUpperB ? B1 : B0;
 
-      // Perform carry-less multiplication (polynomial multiplication in 
GF(2^64))
-      // This multiplies two 64-bit values to produce a 128-bit result
+      // Perform carry-less multiplication (polynomial multiplication in
+      // GF(2^64)) This multiplies two 64-bit values to produce a 128-bit 
result
       APInt AVal = A.getValue().zextOrTrunc(64);
       APInt BVal = B.getValue().zextOrTrunc(64);
       APInt Result(128, 0);

>From 2f80098af72c5758151f3c60104b4e3ead925266 Mon Sep 17 00:00:00 2001
From: ahmed <[email protected]>
Date: Sun, 23 Nov 2025 15:30:51 +0200
Subject: [PATCH 3/6] refactor: PR Feedback

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 5 ++---
 clang/lib/AST/ExprConstant.cpp           | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 83a61f496a3ec..6a71ff9c01586 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2755,9 +2755,8 @@ static bool interp__builtin_ia32_pclmulqdq(InterpState 
&S, CodePtr OpPC,
 
   // Extract imm8 argument
   APSInt Imm8 = popToAPSInt(S, Call->getArg(2));
-  unsigned Imm8Val = static_cast<unsigned>(Imm8.getZExtValue());
-  bool SelectUpperA = (Imm8Val & 0x01) != 0;
-  bool SelectUpperB = (Imm8Val & 0x10) != 0;
+  bool SelectUpperA = (Imm8.getZExtValue() & 0x01) != 0;
+  bool SelectUpperB = (Imm8.getZExtValue() & 0x10) != 0;
 
   const Pointer &RHS = S.Stk.pop<Pointer>();
   const Pointer &LHS = S.Stk.pop<Pointer>();
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index fbd3701c784d3..ed309bdd3e377 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13499,9 +13499,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
       return false;
 
     // Extract bits 0 and 4 from imm8
-    unsigned Imm8Val = static_cast<unsigned>(Imm8.getZExtValue());
-    bool SelectUpperA = (Imm8Val & 0x01) != 0;
-    bool SelectUpperB = (Imm8Val & 0x10) != 0;
+    bool SelectUpperA = (Imm8.getZExtValue() & 0x01) != 0;
+    bool SelectUpperB = (Imm8.getZExtValue() & 0x10) != 0;
 
     unsigned NumElems = SourceLHS.getVectorLength();
     SmallVector<APValue, 8> ResultElements;

>From e3de2515f40786672c2a260a2ea290903f499a0f Mon Sep 17 00:00:00 2001
From: ahmed <[email protected]>
Date: Sun, 23 Nov 2025 15:46:02 +0200
Subject: [PATCH 4/6] refactor: simplify conversion from APSInt to APInt

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 ++--
 clang/lib/AST/ExprConstant.cpp           | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 6a71ff9c01586..be0b560814ebd 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2785,8 +2785,8 @@ static bool interp__builtin_ia32_pclmulqdq(InterpState 
&S, CodePtr OpPC,
 
     // Perform carry-less multiplication (polynomial multiplication in 
GF(2^64))
     // This multiplies two 64-bit values to produce a 128-bit result
-    APInt AVal = A.getValue().zextOrTrunc(64);
-    APInt BVal = B.getValue().zextOrTrunc(64);
+    APInt AVal = A.extOrTrunc(64);
+    APInt BVal = B.extOrTrunc(64);
     APInt Result(128, 0);
 
     // For each bit in A, if set, XOR B shifted left by that bit position
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ed309bdd3e377..22f042d515ac5 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13523,8 +13523,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
 
       // Perform carry-less multiplication (polynomial multiplication in
       // GF(2^64)) This multiplies two 64-bit values to produce a 128-bit 
result
-      APInt AVal = A.getValue().zextOrTrunc(64);
-      APInt BVal = B.getValue().zextOrTrunc(64);
+      APInt AVal = A.extOrTrunc(64);
+      APInt BVal = B.extOrTrunc(64);
       APInt Result(128, 0);
 
       // For each bit in A, if set, XOR B shifted left by that bit position

>From 5afa1b171ccd84eaa4935e429160696086b158ae Mon Sep 17 00:00:00 2001
From: ahmed <[email protected]>
Date: Sun, 23 Nov 2025 15:47:31 +0200
Subject: [PATCH 5/6] refactor: Use static casting

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 ++--
 clang/lib/AST/ExprConstant.cpp           | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index be0b560814ebd..c692e32cdefc8 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2785,8 +2785,8 @@ static bool interp__builtin_ia32_pclmulqdq(InterpState 
&S, CodePtr OpPC,
 
     // Perform carry-less multiplication (polynomial multiplication in 
GF(2^64))
     // This multiplies two 64-bit values to produce a 128-bit result
-    APInt AVal = A.extOrTrunc(64);
-    APInt BVal = B.extOrTrunc(64);
+    APInt AVal = static_cast<const APInt &>(A).zextOrTrunc(64);
+    APInt BVal = static_cast<const APInt &>(B).zextOrTrunc(64);
     APInt Result(128, 0);
 
     // For each bit in A, if set, XOR B shifted left by that bit position
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 22f042d515ac5..03ee822b57143 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13523,8 +13523,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
 
       // Perform carry-less multiplication (polynomial multiplication in
       // GF(2^64)) This multiplies two 64-bit values to produce a 128-bit 
result
-      APInt AVal = A.extOrTrunc(64);
-      APInt BVal = B.extOrTrunc(64);
+      APInt AVal = static_cast<const APInt &>(A).zextOrTrunc(64);
+      APInt BVal = static_cast<const APInt &>(B).zextOrTrunc(64);
       APInt Result(128, 0);
 
       // For each bit in A, if set, XOR B shifted left by that bit position

>From f5c5f23d16a9e4412e55ba8766e9d02f4184d5fa Mon Sep 17 00:00:00 2001
From: ahmed <[email protected]>
Date: Sun, 23 Nov 2025 16:24:02 +0200
Subject: [PATCH 6/6] refactor: update tests

---
 clang/test/CodeGen/X86/pclmul-builtins.c     | 8 ++++----
 clang/test/CodeGen/X86/vpclmulqdq-builtins.c | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/clang/test/CodeGen/X86/pclmul-builtins.c 
b/clang/test/CodeGen/X86/pclmul-builtins.c
index b1e3cc5719d97..5af4014b0f663 100644
--- a/clang/test/CodeGen/X86/pclmul-builtins.c
+++ b/clang/test/CodeGen/X86/pclmul-builtins.c
@@ -13,13 +13,13 @@ __m128i test_mm_clmulepi64_si128(__m128i a, __m128i b) {
 // Test constexpr evaluation for _mm_clmulepi64_si128
 // imm8=0x00: lower 64 bits of both operands
 // Test case: 0x1 * 0x3 = 0x3 (carry-less multiplication)
-TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128((__m128i){0x1ULL, 0x0ULL}, 
(__m128i){0x3ULL, 0x0ULL}, 0x00), 0x3ULL, 0x0ULL));
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128(((__m128i){0x1ULL, 0x0ULL}), 
((__m128i){0x3ULL, 0x0ULL}), 0x00), 0x3ULL, 0x0ULL));
 
 // imm8=0x01: upper 64 bits of first operand, lower 64 bits of second
-TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128((__m128i){0x0ULL, 0x1ULL}, 
(__m128i){0x3ULL, 0x0ULL}, 0x01), 0x3ULL, 0x0ULL));
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128(((__m128i){0x0ULL, 0x1ULL}), 
((__m128i){0x3ULL, 0x0ULL}), 0x01), 0x3ULL, 0x0ULL));
 
 // imm8=0x10: lower 64 bits of first operand, upper 64 bits of second
-TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128((__m128i){0x1ULL, 0x0ULL}, 
(__m128i){0x0ULL, 0x3ULL}, 0x10), 0x3ULL, 0x0ULL));
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128(((__m128i){0x1ULL, 0x0ULL}), 
((__m128i){0x0ULL, 0x3ULL}), 0x10), 0x3ULL, 0x0ULL));
 
 // imm8=0x11: upper 64 bits of both operands
-TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128((__m128i){0x0ULL, 0x1ULL}, 
(__m128i){0x0ULL, 0x3ULL}, 0x11), 0x3ULL, 0x0ULL));
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128(((__m128i){0x0ULL, 0x1ULL}), 
((__m128i){0x0ULL, 0x3ULL}), 0x11), 0x3ULL, 0x0ULL));
diff --git a/clang/test/CodeGen/X86/vpclmulqdq-builtins.c 
b/clang/test/CodeGen/X86/vpclmulqdq-builtins.c
index e408e0556e380..24b5594518009 100644
--- a/clang/test/CodeGen/X86/vpclmulqdq-builtins.c
+++ b/clang/test/CodeGen/X86/vpclmulqdq-builtins.c
@@ -15,7 +15,7 @@ __m256i test_mm256_clmulepi64_epi128(__m256i A, __m256i B) {
 
 // Test constexpr evaluation for _mm256_clmulepi64_epi128
 // Each 128-bit lane is processed independently
-TEST_CONSTEXPR(match_m256i(_mm256_clmulepi64_epi128((__m256i){0x1ULL, 0x0ULL, 
0x2ULL, 0x0ULL}, (__m256i){0x3ULL, 0x0ULL, 0x5ULL, 0x0ULL}, 0x00), 0x3ULL, 
0x0ULL, 0xaULL, 0x0ULL));
+TEST_CONSTEXPR(match_m256i(_mm256_clmulepi64_epi128(((__m256i){0x1ULL, 0x0ULL, 
0x2ULL, 0x0ULL}), ((__m256i){0x3ULL, 0x0ULL, 0x5ULL, 0x0ULL}), 0x00), 0x3ULL, 
0x0ULL, 0xaULL, 0x0ULL));
 
 #ifdef __AVX512F__
 __m512i test_mm512_clmulepi64_epi128(__m512i A, __m512i B) {
@@ -25,6 +25,6 @@ __m512i test_mm512_clmulepi64_epi128(__m512i A, __m512i B) {
 
 // Test constexpr evaluation for _mm512_clmulepi64_epi128
 // Each 128-bit lane is processed independently
-TEST_CONSTEXPR(match_m512i(_mm512_clmulepi64_epi128((__m512i){0x1ULL, 0x0ULL, 
0x2ULL, 0x0ULL, 0x4ULL, 0x0ULL, 0x8ULL, 0x0ULL}, (__m512i){0x3ULL, 0x0ULL, 
0x5ULL, 0x0ULL, 0x7ULL, 0x0ULL, 0x9ULL, 0x0ULL}, 0x00), 0x3ULL, 0x0ULL, 0xaULL, 
0x0ULL, 0x1cULL, 0x0ULL, 0x48ULL, 0x0ULL));
+TEST_CONSTEXPR(match_m512i(_mm512_clmulepi64_epi128(((__m512i){0x1ULL, 0x0ULL, 
0x2ULL, 0x0ULL, 0x4ULL, 0x0ULL, 0x8ULL, 0x0ULL}), ((__m512i){0x3ULL, 0x0ULL, 
0x5ULL, 0x0ULL, 0x7ULL, 0x0ULL, 0x9ULL, 0x0ULL}), 0x00), 0x3ULL, 0x0ULL, 
0xaULL, 0x0ULL, 0x1cULL, 0x0ULL, 0x48ULL, 0x0ULL));
 #endif
 

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][x86]: allow PCLMULQDQ intrinsics to be used in constexpr (PR #169214)

Reply via email to