Author: Jan Schultke
Date: 2026-05-25T16:30:59+01:00
New Revision: e9132e96ee84c15c16e4f482701756192c7b1c9a

URL: 
https://github.com/llvm/llvm-project/commit/e9132e96ee84c15c16e4f482701756192c7b1c9a
DIFF: 
https://github.com/llvm/llvm-project/commit/e9132e96ee84c15c16e4f482701756192c7b1c9a.diff

LOG: [clang] Implement `__builtin_elementwise_clmul` (#196633)

Follow-up to:
- https://github.com/llvm/llvm-project/pull/140301
- https://github.com/llvm/llvm-project/pull/168731

I'm mostly just following the steps of
https://github.com/llvm/llvm-project/pull/153113/ and other prior PRs
here. I don't have any idea how testing works yet.

CC @artagnon @oscardssmith

Added: 
    

Modified: 
    clang/docs/LanguageExtensions.rst
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Basic/Builtins.td
    clang/lib/AST/ByteCode/InterpBuiltin.cpp
    clang/lib/AST/ExprConstant.cpp
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/Sema/SemaChecking.cpp
    clang/test/AST/ByteCode/builtin-functions.cpp
    clang/test/CodeGen/builtins-elementwise-math.c
    clang/test/Sema/builtins-elementwise-math.c

Removed: 
    


################################################################################
diff  --git a/clang/docs/LanguageExtensions.rst 
b/clang/docs/LanguageExtensions.rst
index 03cb02deb5e7f..fbb9947f39d3e 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -903,6 +903,8 @@ T __builtin_elementwise_fshr(T x, T y, T z)     perform a 
funnel shift right. Co
                                                 the first argument is 0 and an 
optional second argument is provided,
                                                 the second argument is 
returned. It is undefined behaviour if the
                                                 first argument is 0 and no 
second argument is provided.
+T __builtin_elementwise_clmul(T x, T y)         perform a carry-less 
multiplication of x and y, returning the least    integer types
+                                                significant bits of the wide 
result.
 ============================================== 
====================================================================== 
=========================================
 
 

diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 3c1eacfc05dc8..3813a61d51b62 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -278,6 +278,10 @@ Non-comprehensive list of changes in this release
   extends bit-reversal support to all standard integers type, including
   ``_BitInt``
 
+- Added ``__builtin_elementwise_clmul`` for carry-less multiplication of
+  integers including ``_BitInt`` types. This includes constexpr evaluation
+  support.
+
 - Deprecated float types support from ``__builtin_elementwise_max`` and
   ``__builtin_elementwise_min``.
 

diff  --git a/clang/include/clang/Basic/Builtins.td 
b/clang/include/clang/Basic/Builtins.td
index 40ec94ab75046..84799929cee87 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1806,6 +1806,12 @@ def ElementwiseCttz : Builtin {
   let Prototype = "void(...)";
 }
 
+def ElementwiseClmul : Builtin {
+  let Spellings = ["__builtin_elementwise_clmul"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
+  let Prototype = "void(...)";
+}
+
 def ReduceMax : Builtin {
   let Spellings = ["__builtin_reduce_max"];
   let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];

diff  --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 3e9ce902427eb..ffdbc2346b5ca 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -5532,6 +5532,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const 
CallExpr *Call,
   case clang::X86::BI__builtin_ia32_pclmulqdq256:
   case clang::X86::BI__builtin_ia32_pclmulqdq512:
     return interp__builtin_ia32_pclmulqdq(S, OpPC, Call);
+  case Builtin::BI__builtin_elementwise_clmul:
+    return interp__builtin_elementwise_int_binop(S, OpPC, Call,
+                                                 llvm::APIntOps::clmul);
 
   case Builtin::BI__builtin_elementwise_fma:
     return interp__builtin_elementwise_triop_fp(

diff  --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 38aa5798cfeb9..0522d6f1dc944 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14084,6 +14084,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
 
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
+  case Builtin::BI__builtin_elementwise_clmul:
+    return EvaluateBinOpExpr(llvm::APIntOps::clmul);
   case Builtin::BI__builtin_elementwise_fshl:
   case Builtin::BI__builtin_elementwise_fshr: {
     APValue SourceHi, SourceLo, SourceShift;
@@ -17216,6 +17218,15 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const 
CallExpr *E,
     APInt Result = std::min(LHS, RHS);
     return Success(APSInt(Result, !LHS.isSigned()), E);
   }
+  case Builtin::BI__builtin_elementwise_clmul: {
+    APSInt LHS, RHS;
+    if (!EvaluateInteger(E->getArg(0), LHS, Info) ||
+        !EvaluateInteger(E->getArg(1), RHS, Info))
+      return false;
+
+    APInt Result = llvm::APIntOps::clmul(LHS, RHS);
+    return Success(APSInt(Result, LHS.isUnsigned()), E);
+  }
   case Builtin::BI__builtin_elementwise_fshl:
   case Builtin::BI__builtin_elementwise_fshr: {
     APSInt Hi, Lo, Shift;

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index cac1628e68721..b1d727cb5e0ad 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4246,6 +4246,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   case Builtin::BI__builtin_elementwise_fshr:
     return RValue::get(
         emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fshr));
+  case Builtin::BI__builtin_elementwise_clmul:
+    return RValue::get(
+        emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::clmul));
 
   case Builtin::BI__builtin_elementwise_add_sat:
   case Builtin::BI__builtin_elementwise_sub_sat: {

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index cc834bbee23c4..2309196ee1696 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3615,6 +3615,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, 
unsigned BuiltinID,
   // types only.
   case Builtin::BI__builtin_elementwise_add_sat:
   case Builtin::BI__builtin_elementwise_sub_sat:
+  case Builtin::BI__builtin_elementwise_clmul:
     if (BuiltinElementwiseMath(TheCall,
                                EltwiseBuiltinArgTyRestriction::IntegerTy))
       return ExprError();

diff  --git a/clang/test/AST/ByteCode/builtin-functions.cpp 
b/clang/test/AST/ByteCode/builtin-functions.cpp
index 97fa1760ee167..6e4b894d63db3 100644
--- a/clang/test/AST/ByteCode/builtin-functions.cpp
+++ b/clang/test/AST/ByteCode/builtin-functions.cpp
@@ -1385,6 +1385,23 @@ namespace ElementwisePopcount {
 #endif
 }
 
+namespace ElementwiseClmul {
+  static_assert(__builtin_elementwise_clmul(0U, 0U) == 0U);
+  static_assert(__builtin_elementwise_clmul(1U, 1U) == 1U);
+  static_assert(__builtin_elementwise_clmul(3U, 3U) == 5U);
+  static_assert(__builtin_elementwise_clmul(0xBU, 0xDU) == 0x7FU);
+  static_assert(__builtin_elementwise_clmul(0xFU, 0xFU) == 0x55U);
+#ifndef __AVR__
+  static_assert(__builtin_elementwise_clmul((unsigned _BitInt(31))3,
+                                            (unsigned _BitInt(31))3) ==
+                (unsigned _BitInt(31))5);
+#endif
+
+  static_assert(__builtin_reduce_add(__builtin_elementwise_clmul(
+                    (vector4uint){0U, 1U, 3U, 7U},
+                    (vector4uint){0U, 1U, 3U, 7U})) == 27U);
+}
+
 namespace BuiltinMemcpy {
   constexpr int simple() {
     int a = 12;

diff  --git a/clang/test/CodeGen/builtins-elementwise-math.c 
b/clang/test/CodeGen/builtins-elementwise-math.c
index ff6338a8b85e7..d0e4a6fa10cfc 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -1297,6 +1297,39 @@ void test_builtin_elementwise_fshl(long long int i1, 
long long int i2,
   u4 tmp_vu_r = __builtin_elementwise_fshr(vu1, vu2, vu3);
 }
 
+void test_builtin_elementwise_clmul(unsigned int ui1, unsigned int ui2,
+                                    unsigned short us1, unsigned short us2,
+                                    u4 vu1, u4 vu2,
+                                    unsigned _BitInt(31) bi1,
+                                    unsigned _BitInt(31) bi2) {
+  // CHECK:      [[UI1:%.+]] = load i32, ptr %ui1.addr, align 4
+  // CHECK-NEXT: [[UI2:%.+]] = load i32, ptr %ui2.addr, align 4
+  // CHECK-NEXT: [[UI3:%.+]] = call i32 @llvm.clmul.i32(i32 [[UI1]], i32 
[[UI2]])
+  // CHECK-NEXT: store i32 [[UI3]], ptr %ui1.addr, align 4
+  ui1 = __builtin_elementwise_clmul(ui1, ui2);
+
+  // CHECK:      [[US1:%.+]] = load i16, ptr %us1.addr, align 2
+  // CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr, align 2
+  // CHECK-NEXT: [[US3:%.+]] = call i16 @llvm.clmul.i16(i16 [[US1]], i16 
[[US2]])
+  // CHECK-NEXT: store i16 [[US3]], ptr %us1.addr, align 2
+  us1 = __builtin_elementwise_clmul(us1, us2);
+
+  // CHECK:      [[VU1:%.+]] = load <4 x i32>, ptr %vu1.addr, align 16
+  // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, ptr %vu2.addr, align 16
+  // CHECK-NEXT: [[VU3:%.+]] = call <4 x i32> @llvm.clmul.v4i32(<4 x i32> 
[[VU1]], <4 x i32> [[VU2]])
+  // CHECK-NEXT: store <4 x i32> [[VU3]], ptr %vu1.addr, align 16
+  vu1 = __builtin_elementwise_clmul(vu1, vu2);
+
+  // CHECK:      [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4
+  // CHECK-NEXT: [[BI1TRUNC:%.+]] = trunc i32 [[BI1]] to i31
+  // CHECK-NEXT: [[BI2:%.+]] = load i32, ptr %bi2.addr, align 4
+  // CHECK-NEXT: [[BI2TRUNC:%.+]] = trunc i32 [[BI2]] to i31
+  // CHECK-NEXT: [[BIRES:%.+]] = call i31 @llvm.clmul.i31(i31 [[BI1TRUNC]], 
i31 [[BI2TRUNC]])
+  // CHECK-NEXT: [[BIRESZEXT:%.+]] = zext i31 [[BIRES]] to i32
+  // CHECK-NEXT: store i32 [[BIRESZEXT]], ptr %bi1.addr, align 4
+  bi1 = __builtin_elementwise_clmul(bi1, bi2);
+}
+
 void test_builtin_elementwise_clzg(si8 vs1, si8 vs2, u4 vu1,
                                    long long int lli, short si,
                                    _BitInt(31) bi, int i,

diff  --git a/clang/test/Sema/builtins-elementwise-math.c 
b/clang/test/Sema/builtins-elementwise-math.c
index 53cccc6e855f0..4ffdcee3ca9c7 100644
--- a/clang/test/Sema/builtins-elementwise-math.c
+++ b/clang/test/Sema/builtins-elementwise-math.c
@@ -180,6 +180,40 @@ void test_builtin_elementwise_sub_sat(int i, short s, 
double d, float4 v, int3 i
   // expected-error@-1 {{1st argument must be a scalar or vector of integer 
types (was '_Complex float')}}
 }
 
+void test_builtin_elementwise_clmul(int i, short s, double d, float4 v,
+                                    int3 iv, unsigned3 uv, unsigned u,
+                                    unsigned4 vu, int *p) {
+  i = __builtin_elementwise_clmul(p, d);
+  // expected-error@-1 {{1st argument must be a scalar or vector of integer 
types (was 'int *')}}
+
+  struct Foo foo = __builtin_elementwise_clmul(i, i);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of 
incompatible type 'int'}}
+
+  i = __builtin_elementwise_clmul(i);
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 
1}}
+
+  i = __builtin_elementwise_clmul();
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 
0}}
+
+  i = __builtin_elementwise_clmul(i, i, i);
+  // expected-error@-1 {{too many arguments to function call, expected 2, have 
3}}
+
+  i = __builtin_elementwise_clmul(v, v);
+  // expected-error@-1 {{1st argument must be a scalar or vector of integer 
types (was 'float4' (vector of 4 'float' values))}}
+
+  i = __builtin_elementwise_clmul(i, s);
+  // expected-error@-1 {{arguments are of 
diff erent types ('int' vs 'short')}}
+
+  i = __builtin_elementwise_clmul(uv, iv);
+  // expected-error@-1 {{arguments are of 
diff erent types ('unsigned3' (vector of 3 'unsigned int' values) vs 'int3' 
(vector of 3 'int' values))}}
+
+  unsigned _BitInt(31) ext; // expected-warning {{'_BitInt' in C17 and earlier 
is a Clang extension}}
+  ext = __builtin_elementwise_clmul(ext, ext);
+
+  u = __builtin_elementwise_clmul(u, u);
+  vu = __builtin_elementwise_clmul(vu, vu);
+}
+
 void test_builtin_elementwise_max(int i, short s, double d, float4 v, int3 iv, 
unsigned3 uv, int *p) {
   i = __builtin_elementwise_max(p, d);
   // expected-error@-1 {{1st argument must be a vector, integer or 
floating-point type (was 'int *')}}


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to