Author: Simon Pilgrim Date: 2025-09-03T07:58:22Z New Revision: 0f3ede911a2c54ae6c4cac9801edc28d83e4c6a0
URL: https://github.com/llvm/llvm-project/commit/0f3ede911a2c54ae6c4cac9801edc28d83e4c6a0 DIFF: https://github.com/llvm/llvm-project/commit/0f3ede911a2c54ae6c4cac9801edc28d83e4c6a0.diff LOG: [X86] Allow AVX512 512-bit variants of AVX2 per-element i32 shift intrinsics to be used in constexpr (#156480) Followup to #154780 Added: Modified: clang/include/clang/Basic/BuiltinsX86.td clang/lib/AST/ByteCode/InterpBuiltin.cpp clang/lib/AST/ExprConstant.cpp clang/lib/Headers/avx512fintrin.h clang/test/CodeGen/X86/avx512f-builtins.c Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index acd8f70c4a5f2..0d44e78f879b9 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -1993,6 +1993,10 @@ let Features = "avx512dq,evex512", Attributes = [NoThrow, Const, RequiredVectorW } let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { + def psllv16si : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">; + def psrav16si : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">; + def psrlv16si : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">; + def prold512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">; def prord512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">; def prolq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">; @@ -2422,15 +2426,12 @@ let Features = "avx512vl", let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { def pslld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">; def psllq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">; - def psllv16si : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">; def psllv8di : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">; def psrad512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">; def psraq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">; - def psrav16si : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">; def psrav8di : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">; def psrld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">; def psrlq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">; - def psrlv16si : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">; def psrlv8di : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">; def pternlogd512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">; def pternlogd512_maskz : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index e05b1a88c15bb..8c2b71160f7f3 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3268,6 +3268,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_psllv4di: case clang::X86::BI__builtin_ia32_psllv4si: case clang::X86::BI__builtin_ia32_psllv8si: + case clang::X86::BI__builtin_ia32_psllv16si: case clang::X86::BI__builtin_ia32_psllwi128: case clang::X86::BI__builtin_ia32_psllwi256: case clang::X86::BI__builtin_ia32_psllwi512: @@ -3287,6 +3288,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_psrav4si: case clang::X86::BI__builtin_ia32_psrav8si: + case clang::X86::BI__builtin_ia32_psrav16si: case clang::X86::BI__builtin_ia32_psrawi128: case clang::X86::BI__builtin_ia32_psrawi256: case clang::X86::BI__builtin_ia32_psrawi512: @@ -3308,6 +3310,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_psrlv4di: case clang::X86::BI__builtin_ia32_psrlv4si: case clang::X86::BI__builtin_ia32_psrlv8si: + case clang::X86::BI__builtin_ia32_psrlv16si: case clang::X86::BI__builtin_ia32_psrlwi128: case clang::X86::BI__builtin_ia32_psrlwi256: case clang::X86::BI__builtin_ia32_psrlwi512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index b9b2e6c791799..66362d44976c4 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11687,12 +11687,15 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_psllv4di: case clang::X86::BI__builtin_ia32_psllv4si: case clang::X86::BI__builtin_ia32_psllv8si: + case clang::X86::BI__builtin_ia32_psllv16si: case clang::X86::BI__builtin_ia32_psrav4si: case clang::X86::BI__builtin_ia32_psrav8si: + case clang::X86::BI__builtin_ia32_psrav16si: case clang::X86::BI__builtin_ia32_psrlv2di: case clang::X86::BI__builtin_ia32_psrlv4di: case clang::X86::BI__builtin_ia32_psrlv4si: case clang::X86::BI__builtin_ia32_psrlv8si: + case clang::X86::BI__builtin_ia32_psrlv16si: case clang::X86::BI__builtin_ia32_psllwi128: case clang::X86::BI__builtin_ia32_pslldi128: @@ -11823,6 +11826,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_psllv4di: case clang::X86::BI__builtin_ia32_psllv4si: case clang::X86::BI__builtin_ia32_psllv8si: + case clang::X86::BI__builtin_ia32_psllv16si: if (RHS.uge(RHS.getBitWidth())) { ResultElements.push_back( APValue(APSInt(APInt::getZero(RHS.getBitWidth()), DestUnsigned))); @@ -11833,6 +11837,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { break; case clang::X86::BI__builtin_ia32_psrav4si: case clang::X86::BI__builtin_ia32_psrav8si: + case clang::X86::BI__builtin_ia32_psrav16si: if (RHS.uge(RHS.getBitWidth())) { ResultElements.push_back( APValue(APSInt(LHS.ashr(RHS.getBitWidth() - 1), DestUnsigned))); @@ -11845,6 +11850,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_psrlv4di: case clang::X86::BI__builtin_ia32_psrlv4si: case clang::X86::BI__builtin_ia32_psrlv8si: + case clang::X86::BI__builtin_ia32_psrlv16si: if (RHS.uge(RHS.getBitWidth())) { ResultElements.push_back( APValue(APSInt(APInt::getZero(RHS.getBitWidth()), DestUnsigned))); diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 5222141d21606..d3eaaf07d98cb 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -5644,23 +5644,20 @@ _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_sllv_epi32(__m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sllv_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sllv_epi32(__X, __Y), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sllv_epi32(__X, __Y), (__v16si)_mm512_setzero_si512()); @@ -5732,23 +5729,20 @@ _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_srav_epi32(__m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_srav_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srav_epi32(__X, __Y), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srav_epi32(__X, __Y), (__v16si)_mm512_setzero_si512()); @@ -5820,23 +5814,20 @@ _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_srlv_epi32(__m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_srlv_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srlv_epi32(__X, __Y), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srlv_epi32(__X, __Y), (__v16si)_mm512_setzero_si512()); diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index eb3b28390947f..1d280d490f6a7 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -5918,6 +5918,7 @@ __m512i test_mm512_sllv_epi32(__m512i __X, __m512i __Y) { // CHECK: @llvm.x86.avx512.psllv.d.512 return _mm512_sllv_epi32(__X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_sllv_epi32((__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 2, -8, 24, -64, 0, 0, 0, 0, 536870912, 0, -2147483648, 0, 80, -48, 28, -16)); __m512i test_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_mask_sllv_epi32 @@ -5925,6 +5926,7 @@ __m512i test_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m5 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_sllv_epi32(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_sllv_epi32((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x912A, (__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 99, -8, 99, -64, 99, 0, 99, 99, 536870912, 99, 99, 99, 80, 99, 99, -16)); __m512i test_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_maskz_sllv_epi32 @@ -5932,6 +5934,7 @@ __m512i test_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_sllv_epi32(__U, __X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_sllv_epi32(0x3C8F, (__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 2, -8, 24, -64, 0, 0, 0, 0, 0, 0, -2147483648, 0, 80, -48, 0, 0)); __m512i test_mm512_sllv_epi64(__m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_sllv_epi64 @@ -5998,6 +6001,7 @@ __m512i test_mm512_srav_epi32(__m512i __X, __m512i __Y) { // CHECK: @llvm.x86.avx512.psrav.d.512 return _mm512_srav_epi32(__X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_srav_epi32((__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 1, -4)); __m512i test_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_mask_srav_epi32 @@ -6005,6 +6009,7 @@ __m512i test_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m5 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_srav_epi32(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_srav_epi32((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x912A, (__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 99, -1, 99, -1, 99, -1, 99, 99, 0, 99, 99, 99, 0, 99, 99, -4)); __m512i test_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_maskz_srav_epi32 @@ -6012,6 +6017,7 @@ __m512i test_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_srav_epi32(__U, __X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_srav_epi32(0x3C8F, (__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 0, -1, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, -1, 0, 0)); __m512i test_mm512_srav_epi64(__m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_srav_epi64 @@ -6078,6 +6084,7 @@ __m512i test_mm512_srlv_epi32(__m512i __X, __m512i __Y) { // CHECK: @llvm.x86.avx512.psrlv.d.512 return _mm512_srlv_epi32(__X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_srlv_epi32((__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 0, 1073741823, 0, 268435455, 0, 1, 0, 7, 0, 0, 0, 0, 0, 536870911, 1, 2147483644)); __m512i test_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_mask_srlv_epi32 @@ -6085,6 +6092,7 @@ __m512i test_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m5 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_srlv_epi32(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_srlv_epi32((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x912A, (__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 99, 1073741823, 99, 268435455, 99, 1, 99, 99, 0, 99, 99, 99, 0, 99, 99, 2147483644)); __m512i test_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_maskz_srlv_epi32 @@ -6092,6 +6100,7 @@ __m512i test_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_srlv_epi32(__U, __X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_srlv_epi32(0x3C8F, (__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 0, 1073741823, 0, 268435455, 0, 0, 0, 7, 0, 0, 0, 0, 0, 536870911, 0, 0)); __m512i test_mm512_srlv_epi64(__m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_srlv_epi64 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
