[clang] [Clang][x86]: allow PCLMULQDQ intrinsics to be used in constexpr (PR #169214)

Ahmed Nour via cfe-commits Fri, 28 Nov 2025 20:12:05 -0800

================
@@ -2745,6 +2745,72 @@ static bool interp__builtin_ia32_addsub(InterpState &S, 
CodePtr OpPC,
   return true;
 }
 
+static bool interp__builtin_ia32_pclmulqdq(InterpState &S, CodePtr OpPC,
+                                           const CallExpr *Call) {
+  // PCLMULQDQ: carry-less multiplication of selected 64-bit halves
+  // imm8 bit 0: selects lower (0) or upper (1) 64 bits of first operand
+  // imm8 bit 4: selects lower (0) or upper (1) 64 bits of second operand
+  assert(Call->getArg(0)->getType()->isVectorType() &&
+         Call->getArg(1)->getType()->isVectorType());
+
+  // Extract imm8 argument
+  APSInt Imm8 = popToAPSInt(S, Call->getArg(2));
+  bool SelectUpperA = (Imm8 & 0x01) != 0;
+  bool SelectUpperB = (Imm8 & 0x10) != 0;
+
+  const Pointer &RHS = S.Stk.pop<Pointer>();
+  const Pointer &LHS = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+  PrimType ElemT = *S.getContext().classify(VT->getElementType());
+  unsigned NumElems = VT->getNumElements();
+  const auto *DestVT = Call->getType()->castAs<VectorType>();
+  PrimType DestElemT = *S.getContext().classify(DestVT->getElementType());
+  bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
+
+  // Process each 128-bit lane (2 elements at a time)
+  for (unsigned Lane = 0; Lane < NumElems; Lane += 2) {
+    APSInt A0, A1, B0, B1;
+    INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+      A0 = LHS.elem<T>(Lane + 0).toAPSInt();
+      A1 = LHS.elem<T>(Lane + 1).toAPSInt();
+      B0 = RHS.elem<T>(Lane + 0).toAPSInt();
+      B1 = RHS.elem<T>(Lane + 1).toAPSInt();
+    });
+
+    // Select the appropriate 64-bit values based on imm8
+    APInt A = SelectUpperA ? A1 : A0;
+    APInt B = SelectUpperB ? B1 : B0;
+
+    // Perform carry-less multiplication (polynomial multiplication in 
GF(2^64))
+    // This multiplies two 64-bit values to produce a 128-bit result
+    APInt AVal = A.zextOrTrunc(64);
+    APInt BVal = B.zextOrTrunc(64);
+    APInt Result(128, 0);
+
+    // For each bit in A, if set, XOR B shifted left by that bit position
+    for (unsigned i = 0; i < 64; ++i) {
+      if (AVal[i]) {
+        APInt ShiftedB = BVal.zext(128) << i;
+        Result ^= ShiftedB;
+      }
+    }
----------------
ahmednoursphinx wrote:


Good idea, updated it and extended to use equal bit width 

https://github.com/llvm/llvm-project/pull/169214
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][x86]: allow PCLMULQDQ intrinsics to be used in constexpr (PR #169214)

Reply via email to