https://github.com/Temperz87 created https://github.com/llvm/llvm-project/pull/160636
This PR resolves #155805 and updates the following builtins to handle constant expressions: ``` _mm_mulhrs_pi16 mm_mulhrs_epi16 mm256_mulhrs_epi16 mm512_mulhrs_epi16 ``` >From 5114deca2ad04ce6b28ee62407324c18d7215e41 Mon Sep 17 00:00:00 2001 From: Shulin Gonsalves <[email protected]> Date: Wed, 24 Sep 2025 23:32:09 -0400 Subject: [PATCH 1/2] Add support for constexpr with PMULHRSW --- clang/include/clang/Basic/BuiltinsX86.td | 9 ++++++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 15 +++++++++++++++ clang/lib/AST/ExprConstant.cpp | 19 ++++++++++++++++++- clang/lib/Headers/avx2intrin.h | 2 +- clang/lib/Headers/avx512bwintrin.h | 6 +++--- clang/lib/Headers/avx512vlbwintrin.h | 6 +++--- clang/lib/Headers/tmmintrin.h | 11 +++++++---- clang/test/CodeGen/X86/avx2-builtins.c | 1 + clang/test/CodeGen/X86/avx512bw-builtins.c | 2 ++ clang/test/CodeGen/X86/mmx-builtins.c | 1 + clang/test/CodeGen/X86/ssse3-builtins.c | 1 + 11 files changed, 58 insertions(+), 15 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 0d44e78f879b9..d5cb2cadee075 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -125,12 +125,15 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } def pmaddubsw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>)">; - def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; def psignb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; def psignw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; def psignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; } + + let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; + } } // AVX @@ -584,7 +587,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def pmaddubsw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>)">; def pmaddwd256 : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">; def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">; - def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">; @@ -629,6 +631,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def psrawi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">; def psradi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">; + def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">; def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; @@ -1340,7 +1343,7 @@ let Features = "avx512bitalg,evex512", Attributes = [NoThrow, Const, RequiredVec def vpshufbitqmb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, unsigned long long int)">; } -let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def pmulhrsw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index ff6ef5a1f6864..3d6763a20098c 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -8,6 +8,7 @@ #include "../ExprConstShared.h" #include "Boolean.h" #include "EvalEmitter.h" +#include "FixedPoint.h" #include "Interp.h" #include "InterpBuiltinBitCast.h" #include "PrimType.h" @@ -3311,6 +3312,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS); }); + + case clang::X86::BI__builtin_ia32_pmulhrsw128: + case clang::X86::BI__builtin_ia32_pmulhrsw256: + case clang::X86::BI__builtin_ia32_pmulhrsw512: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, BuiltinID,[](const APSInt &LHS, const APSInt &RHS) { + unsigned width = LHS.getBitWidth(); + + APInt mul = llvm::APIntOps::mulhs(LHS, RHS); + mul = mul.relativeLShr(14); + mul = mul.sadd_sat(APInt(width, 1, true)); + return APInt(mul.relativeLShr(1)); + }); + case clang::X86::BI__builtin_ia32_pmulhuw128: case clang::X86::BI__builtin_ia32_pmulhuw256: case clang::X86::BI__builtin_ia32_pmulhuw512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 6c6909e5b2370..2d04b2e016cc6 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11679,7 +11679,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { } case Builtin::BI__builtin_elementwise_add_sat: - case Builtin::BI__builtin_elementwise_sub_sat: + case Builtin::BI__builtin_elementwise_sub_sat: + case clang::X86::BI__builtin_ia32_pmulhrsw128: + case clang::X86::BI__builtin_ia32_pmulhrsw256: + case clang::X86::BI__builtin_ia32_pmulhrsw512: case clang::X86::BI__builtin_ia32_pmulhuw128: case clang::X86::BI__builtin_ia32_pmulhuw256: case clang::X86::BI__builtin_ia32_pmulhuw512: @@ -11813,6 +11816,19 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS), DestUnsigned))); break; + + case clang::X86::BI__builtin_ia32_pmulhrsw128: + case clang::X86::BI__builtin_ia32_pmulhrsw256: + case clang::X86::BI__builtin_ia32_pmulhrsw512: { + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + unsigned width = Info.Ctx.getIntWidth(DestEltTy); + + APInt mul = llvm::APIntOps::mulhs(LHS, RHS); + mul = mul.relativeLShr(14); + mul = mul.sadd_sat(APInt(width, 1, true)); + ResultElements.push_back(APValue(APSInt(mul.relativeLShr(1)))); + break; + } case clang::X86::BI__builtin_ia32_pmulhuw128: case clang::X86::BI__builtin_ia32_pmulhuw256: case clang::X86::BI__builtin_ia32_pmulhuw512: @@ -11825,6 +11841,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { ResultElements.push_back(APValue(APSInt(llvm::APIntOps::mulhs(LHS, RHS), /*isUnsigned=*/false))); break; + case clang::X86::BI__builtin_ia32_psllv2di: case clang::X86::BI__builtin_ia32_psllv4di: case clang::X86::BI__builtin_ia32_psllv4si: diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 5a32312be200e..4fc5f8e469c53 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -1678,7 +1678,7 @@ _mm256_mul_epi32(__m256i __a, __m256i __b) { /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the rounded products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mulhrs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b); diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index a08735b937704..25bd6ceb8eac6 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -1046,13 +1046,13 @@ _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mulhrs_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1060,7 +1060,7 @@ _mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 888086dc214f1..90108a50e5f79 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -1571,21 +1571,21 @@ _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhrs_epi16(__X, __Y), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhrs_epi16(__X, __Y), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhrs_epi16(__X, __Y), diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h index f01c61afa8ea2..6214eb66f0375 100644 --- a/clang/lib/Headers/tmmintrin.h +++ b/clang/lib/Headers/tmmintrin.h @@ -29,6 +29,9 @@ #define __trunc64(x) \ (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0) +#define __zext128(x) \ + (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ + 1, 2, 3) #define __anyext128(x) \ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ 1, -1, -1) @@ -560,7 +563,7 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b) /// A 128-bit vector of [8 x i16] containing one of the source operands. /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled /// products of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mulhrs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); @@ -580,11 +583,11 @@ _mm_mulhrs_epi16(__m128i __a, __m128i __b) /// A 64-bit vector of [4 x i16] containing one of the source operands. /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled /// products of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mulhrs_pi16(__m64 __a, __m64 __b) { - return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__anyext128(__a), - (__v8hi)__anyext128(__b))); + return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__zext128(__a), + (__v8hi)__zext128(__b))); } /// Copies the 8-bit integers from a 128-bit integer vector to the diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 18ce88c8fb4d0..13a28dcc373b4 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -987,6 +987,7 @@ __m256i test_mm256_mulhrs_epi16(__m256i a, __m256i b) { // CHECK: call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_mulhrs_epi16(a, b); } +TEST_CONSTEXPR(match_v16hi(_mm256_mulhrs_epi16((__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2)); __m256i test_mm256_mullo_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_mullo_epi16 diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index f9330a1d914bc..72d0f1390aaea 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -1375,6 +1375,8 @@ __m512i test_mm512_mulhrs_epi16(__m512i __A, __m512i __B) { // CHECK: @llvm.x86.avx512.pmul.hr.sw.512 return _mm512_mulhrs_epi16(__A,__B); } +TEST_CONSTEXPR(match_v32hi(_mm512_mulhrs_epi16((__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2)); + __m512i test_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_mulhrs_epi16 // CHECK: @llvm.x86.avx512.pmul.hr.sw.512 diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index b19e82383cbfd..c65853ca76dfe 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -426,6 +426,7 @@ __m64 test_mm_mulhrs_pi16(__m64 a, __m64 b) { // CHECK: call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128( return _mm_mulhrs_pi16(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_mulhrs_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), 2, 2, 0, 0)); __m64 test_mm_mullo_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_mullo_pi16 diff --git a/clang/test/CodeGen/X86/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c index 56ff73f08ab32..fb2221d62dba9 100644 --- a/clang/test/CodeGen/X86/ssse3-builtins.c +++ b/clang/test/CodeGen/X86/ssse3-builtins.c @@ -102,6 +102,7 @@ __m128i test_mm_mulhrs_epi16(__m128i a, __m128i b) { // CHECK: call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_mulhrs_epi16(a, b); } +TEST_CONSTEXPR(match_v4si(_mm_mulhrs_epi16((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, +8, +6, -4}), 2, 2, 0, 0)); __m128i test_mm_shuffle_epi8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_shuffle_epi8 >From 8b3895110de80df82aeeace7c1f71a97fa93cb63 Mon Sep 17 00:00:00 2001 From: Shulin Gonsalves <[email protected]> Date: Wed, 24 Sep 2025 23:35:19 -0400 Subject: [PATCH 2/2] Remove accidentally new include directive --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 3d6763a20098c..373a651bb9fbe 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -8,7 +8,6 @@ #include "../ExprConstShared.h" #include "Boolean.h" #include "EvalEmitter.h" -#include "FixedPoint.h" #include "Interp.h" #include "InterpBuiltinBitCast.h" #include "PrimType.h" _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
