Author: Jan Schultke Date: 2026-05-25T16:30:59+01:00 New Revision: e9132e96ee84c15c16e4f482701756192c7b1c9a
URL: https://github.com/llvm/llvm-project/commit/e9132e96ee84c15c16e4f482701756192c7b1c9a DIFF: https://github.com/llvm/llvm-project/commit/e9132e96ee84c15c16e4f482701756192c7b1c9a.diff LOG: [clang] Implement `__builtin_elementwise_clmul` (#196633) Follow-up to: - https://github.com/llvm/llvm-project/pull/140301 - https://github.com/llvm/llvm-project/pull/168731 I'm mostly just following the steps of https://github.com/llvm/llvm-project/pull/153113/ and other prior PRs here. I don't have any idea how testing works yet. CC @artagnon @oscardssmith Added: Modified: clang/docs/LanguageExtensions.rst clang/docs/ReleaseNotes.rst clang/include/clang/Basic/Builtins.td clang/lib/AST/ByteCode/InterpBuiltin.cpp clang/lib/AST/ExprConstant.cpp clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Sema/SemaChecking.cpp clang/test/AST/ByteCode/builtin-functions.cpp clang/test/CodeGen/builtins-elementwise-math.c clang/test/Sema/builtins-elementwise-math.c Removed: ################################################################################ diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 03cb02deb5e7f..fbb9947f39d3e 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -903,6 +903,8 @@ T __builtin_elementwise_fshr(T x, T y, T z) perform a funnel shift right. Co the first argument is 0 and an optional second argument is provided, the second argument is returned. It is undefined behaviour if the first argument is 0 and no second argument is provided. +T __builtin_elementwise_clmul(T x, T y) perform a carry-less multiplication of x and y, returning the least integer types + significant bits of the wide result. ============================================== ====================================================================== ========================================= diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 3c1eacfc05dc8..3813a61d51b62 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -278,6 +278,10 @@ Non-comprehensive list of changes in this release extends bit-reversal support to all standard integers type, including ``_BitInt`` +- Added ``__builtin_elementwise_clmul`` for carry-less multiplication of + integers including ``_BitInt`` types. This includes constexpr evaluation + support. + - Deprecated float types support from ``__builtin_elementwise_max`` and ``__builtin_elementwise_min``. diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 40ec94ab75046..84799929cee87 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1806,6 +1806,12 @@ def ElementwiseCttz : Builtin { let Prototype = "void(...)"; } +def ElementwiseClmul : Builtin { + let Spellings = ["__builtin_elementwise_clmul"]; + let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr]; + let Prototype = "void(...)"; +} + def ReduceMax : Builtin { let Spellings = ["__builtin_reduce_max"]; let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr]; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 3e9ce902427eb..ffdbc2346b5ca 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -5532,6 +5532,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_pclmulqdq256: case clang::X86::BI__builtin_ia32_pclmulqdq512: return interp__builtin_ia32_pclmulqdq(S, OpPC, Call); + case Builtin::BI__builtin_elementwise_clmul: + return interp__builtin_elementwise_int_binop(S, OpPC, Call, + llvm::APIntOps::clmul); case Builtin::BI__builtin_elementwise_fma: return interp__builtin_elementwise_triop_fp( diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 38aa5798cfeb9..0522d6f1dc944 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14084,6 +14084,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case Builtin::BI__builtin_elementwise_clmul: + return EvaluateBinOpExpr(llvm::APIntOps::clmul); case Builtin::BI__builtin_elementwise_fshl: case Builtin::BI__builtin_elementwise_fshr: { APValue SourceHi, SourceLo, SourceShift; @@ -17216,6 +17218,15 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, APInt Result = std::min(LHS, RHS); return Success(APSInt(Result, !LHS.isSigned()), E); } + case Builtin::BI__builtin_elementwise_clmul: { + APSInt LHS, RHS; + if (!EvaluateInteger(E->getArg(0), LHS, Info) || + !EvaluateInteger(E->getArg(1), RHS, Info)) + return false; + + APInt Result = llvm::APIntOps::clmul(LHS, RHS); + return Success(APSInt(Result, LHS.isUnsigned()), E); + } case Builtin::BI__builtin_elementwise_fshl: case Builtin::BI__builtin_elementwise_fshr: { APSInt Hi, Lo, Shift; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index cac1628e68721..b1d727cb5e0ad 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4246,6 +4246,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_elementwise_fshr: return RValue::get( emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fshr)); + case Builtin::BI__builtin_elementwise_clmul: + return RValue::get( + emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::clmul)); case Builtin::BI__builtin_elementwise_add_sat: case Builtin::BI__builtin_elementwise_sub_sat: { diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index cc834bbee23c4..2309196ee1696 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3615,6 +3615,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, // types only. case Builtin::BI__builtin_elementwise_add_sat: case Builtin::BI__builtin_elementwise_sub_sat: + case Builtin::BI__builtin_elementwise_clmul: if (BuiltinElementwiseMath(TheCall, EltwiseBuiltinArgTyRestriction::IntegerTy)) return ExprError(); diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp index 97fa1760ee167..6e4b894d63db3 100644 --- a/clang/test/AST/ByteCode/builtin-functions.cpp +++ b/clang/test/AST/ByteCode/builtin-functions.cpp @@ -1385,6 +1385,23 @@ namespace ElementwisePopcount { #endif } +namespace ElementwiseClmul { + static_assert(__builtin_elementwise_clmul(0U, 0U) == 0U); + static_assert(__builtin_elementwise_clmul(1U, 1U) == 1U); + static_assert(__builtin_elementwise_clmul(3U, 3U) == 5U); + static_assert(__builtin_elementwise_clmul(0xBU, 0xDU) == 0x7FU); + static_assert(__builtin_elementwise_clmul(0xFU, 0xFU) == 0x55U); +#ifndef __AVR__ + static_assert(__builtin_elementwise_clmul((unsigned _BitInt(31))3, + (unsigned _BitInt(31))3) == + (unsigned _BitInt(31))5); +#endif + + static_assert(__builtin_reduce_add(__builtin_elementwise_clmul( + (vector4uint){0U, 1U, 3U, 7U}, + (vector4uint){0U, 1U, 3U, 7U})) == 27U); +} + namespace BuiltinMemcpy { constexpr int simple() { int a = 12; diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c index ff6338a8b85e7..d0e4a6fa10cfc 100644 --- a/clang/test/CodeGen/builtins-elementwise-math.c +++ b/clang/test/CodeGen/builtins-elementwise-math.c @@ -1297,6 +1297,39 @@ void test_builtin_elementwise_fshl(long long int i1, long long int i2, u4 tmp_vu_r = __builtin_elementwise_fshr(vu1, vu2, vu3); } +void test_builtin_elementwise_clmul(unsigned int ui1, unsigned int ui2, + unsigned short us1, unsigned short us2, + u4 vu1, u4 vu2, + unsigned _BitInt(31) bi1, + unsigned _BitInt(31) bi2) { + // CHECK: [[UI1:%.+]] = load i32, ptr %ui1.addr, align 4 + // CHECK-NEXT: [[UI2:%.+]] = load i32, ptr %ui2.addr, align 4 + // CHECK-NEXT: [[UI3:%.+]] = call i32 @llvm.clmul.i32(i32 [[UI1]], i32 [[UI2]]) + // CHECK-NEXT: store i32 [[UI3]], ptr %ui1.addr, align 4 + ui1 = __builtin_elementwise_clmul(ui1, ui2); + + // CHECK: [[US1:%.+]] = load i16, ptr %us1.addr, align 2 + // CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr, align 2 + // CHECK-NEXT: [[US3:%.+]] = call i16 @llvm.clmul.i16(i16 [[US1]], i16 [[US2]]) + // CHECK-NEXT: store i16 [[US3]], ptr %us1.addr, align 2 + us1 = __builtin_elementwise_clmul(us1, us2); + + // CHECK: [[VU1:%.+]] = load <4 x i32>, ptr %vu1.addr, align 16 + // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, ptr %vu2.addr, align 16 + // CHECK-NEXT: [[VU3:%.+]] = call <4 x i32> @llvm.clmul.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]]) + // CHECK-NEXT: store <4 x i32> [[VU3]], ptr %vu1.addr, align 16 + vu1 = __builtin_elementwise_clmul(vu1, vu2); + + // CHECK: [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4 + // CHECK-NEXT: [[BI1TRUNC:%.+]] = trunc i32 [[BI1]] to i31 + // CHECK-NEXT: [[BI2:%.+]] = load i32, ptr %bi2.addr, align 4 + // CHECK-NEXT: [[BI2TRUNC:%.+]] = trunc i32 [[BI2]] to i31 + // CHECK-NEXT: [[BIRES:%.+]] = call i31 @llvm.clmul.i31(i31 [[BI1TRUNC]], i31 [[BI2TRUNC]]) + // CHECK-NEXT: [[BIRESZEXT:%.+]] = zext i31 [[BIRES]] to i32 + // CHECK-NEXT: store i32 [[BIRESZEXT]], ptr %bi1.addr, align 4 + bi1 = __builtin_elementwise_clmul(bi1, bi2); +} + void test_builtin_elementwise_clzg(si8 vs1, si8 vs2, u4 vu1, long long int lli, short si, _BitInt(31) bi, int i, diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c index 53cccc6e855f0..4ffdcee3ca9c7 100644 --- a/clang/test/Sema/builtins-elementwise-math.c +++ b/clang/test/Sema/builtins-elementwise-math.c @@ -180,6 +180,40 @@ void test_builtin_elementwise_sub_sat(int i, short s, double d, float4 v, int3 i // expected-error@-1 {{1st argument must be a scalar or vector of integer types (was '_Complex float')}} } +void test_builtin_elementwise_clmul(int i, short s, double d, float4 v, + int3 iv, unsigned3 uv, unsigned u, + unsigned4 vu, int *p) { + i = __builtin_elementwise_clmul(p, d); + // expected-error@-1 {{1st argument must be a scalar or vector of integer types (was 'int *')}} + + struct Foo foo = __builtin_elementwise_clmul(i, i); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} + + i = __builtin_elementwise_clmul(i); + // expected-error@-1 {{too few arguments to function call, expected 2, have 1}} + + i = __builtin_elementwise_clmul(); + // expected-error@-1 {{too few arguments to function call, expected 2, have 0}} + + i = __builtin_elementwise_clmul(i, i, i); + // expected-error@-1 {{too many arguments to function call, expected 2, have 3}} + + i = __builtin_elementwise_clmul(v, v); + // expected-error@-1 {{1st argument must be a scalar or vector of integer types (was 'float4' (vector of 4 'float' values))}} + + i = __builtin_elementwise_clmul(i, s); + // expected-error@-1 {{arguments are of diff erent types ('int' vs 'short')}} + + i = __builtin_elementwise_clmul(uv, iv); + // expected-error@-1 {{arguments are of diff erent types ('unsigned3' (vector of 3 'unsigned int' values) vs 'int3' (vector of 3 'int' values))}} + + unsigned _BitInt(31) ext; // expected-warning {{'_BitInt' in C17 and earlier is a Clang extension}} + ext = __builtin_elementwise_clmul(ext, ext); + + u = __builtin_elementwise_clmul(u, u); + vu = __builtin_elementwise_clmul(vu, vu); +} + void test_builtin_elementwise_max(int i, short s, double d, float4 v, int3 iv, unsigned3 uv, int *p) { i = __builtin_elementwise_max(p, d); // expected-error@-1 {{1st argument must be a vector, integer or floating-point type (was 'int *')}} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
