Author: Muhammad Abdul Date: 2025-11-22T11:53:34Z New Revision: 7305b6eb5458b5cea62b3ab70da95b790cf988f3
URL: https://github.com/llvm/llvm-project/commit/7305b6eb5458b5cea62b3ab70da95b790cf988f3 DIFF: https://github.com/llvm/llvm-project/commit/7305b6eb5458b5cea62b3ab70da95b790cf988f3.diff LOG: [clang][X86] Allow VALIGND/Q element-shift intrinsics in constexpr evaluation (#168206) Fixes #167681 Added: Modified: clang/include/clang/Basic/BuiltinsX86.td clang/lib/AST/ByteCode/InterpBuiltin.cpp clang/lib/AST/ExprConstant.cpp clang/test/CodeGen/X86/avx512f-builtins.c clang/test/CodeGen/X86/avx512vl-builtins.c Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index b760c3e06b8f7..132c1e8ea7190 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -1045,24 +1045,24 @@ let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<512>] in { def storeaps512_mask : X86Builtin<"void(_Vector<16, float *>, _Vector<16, float>, unsigned short)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def alignq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">; def alignd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def alignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def alignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def alignq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def alignq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index eba71d66bc4d6..a403f293a5c34 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4906,6 +4906,24 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return std::pair<unsigned, int>{VecIdx, ElemIdx}; }); + case X86::BI__builtin_ia32_alignd128: + case X86::BI__builtin_ia32_alignd256: + case X86::BI__builtin_ia32_alignd512: + case X86::BI__builtin_ia32_alignq128: + case X86::BI__builtin_ia32_alignq256: + case X86::BI__builtin_ia32_alignq512: { + unsigned NumElems = Call->getType()->castAs<VectorType>()->getNumElements(); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [NumElems](unsigned DstIdx, unsigned Shift) { + unsigned Imm = Shift & 0xFF; + unsigned EffectiveShift = Imm & (NumElems - 1); + unsigned SourcePos = DstIdx + EffectiveShift; + unsigned VecIdx = SourcePos < NumElems ? 1u : 0u; + unsigned ElemIdx = SourcePos & (NumElems - 1); + return std::pair<unsigned, int>{VecIdx, static_cast<int>(ElemIdx)}; + }); + } + default: S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index ce5301f17b3e7..bc885f4c89028 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13627,6 +13627,28 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return false; return Success(R, E); } + case X86::BI__builtin_ia32_alignd128: + case X86::BI__builtin_ia32_alignd256: + case X86::BI__builtin_ia32_alignd512: + case X86::BI__builtin_ia32_alignq128: + case X86::BI__builtin_ia32_alignq256: + case X86::BI__builtin_ia32_alignq512: { + APValue R; + unsigned NumElems = E->getType()->castAs<VectorType>()->getNumElements(); + if (!evalShuffleGeneric(Info, E, R, + [NumElems](unsigned DstIdx, unsigned Shift) { + unsigned Imm = Shift & 0xFF; + unsigned EffectiveShift = Imm & (NumElems - 1); + unsigned SourcePos = DstIdx + EffectiveShift; + unsigned VecIdx = SourcePos < NumElems ? 1 : 0; + unsigned ElemIdx = SourcePos & (NumElems - 1); + + return std::pair<unsigned, int>{ + VecIdx, static_cast<int>(ElemIdx)}; + })) + return false; + return Success(R, E); + } case X86::BI__builtin_ia32_permvarsi256: case X86::BI__builtin_ia32_permvarsf256: case X86::BI__builtin_ia32_permvardf512: diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index e4a9d9cb3781d..1f4bb51a2e34f 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -519,6 +519,40 @@ __m512i test_mm512_maskz_alignr_epi64( __mmask8 u, __m512i a, __m512i b) return _mm512_maskz_alignr_epi64(u, a, b, 2); } +TEST_CONSTEXPR(match_v16si(_mm512_alignr_epi32(((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, + 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}), + ((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15}), 19), + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 100, 200, 300)); +TEST_CONSTEXPR(match_v16si(_mm512_mask_alignr_epi32(((__m512i)(__v16si){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, + 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000}), + 0xA5A5, + ((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, + 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}), + ((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15}), 19), + 3, 2000, 5, 4000, 5000, 8, 7000, 10, + 11, 10000, 13, 12000, 13000, 100, 15000, 300)); +TEST_CONSTEXPR(match_v16si(_mm512_maskz_alignr_epi32(0x0F0F, + ((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, + 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}), + ((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15}), 19), + 3, 4, 5, 6, 0, 0, 0, 0, 11, 12, 13, 14, 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v8di(_mm512_alignr_epi64(((__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}), + ((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 11), + 4, 5, 6, 7, 8, 10, 11, 12)); +TEST_CONSTEXPR(match_v8di(_mm512_mask_alignr_epi64(((__m512i)(__v8di){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000}), + 0xA5, + ((__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}), + ((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 11), + 4, 2000, 6, 4000, 5000, 10, 7000, 12)); +TEST_CONSTEXPR(match_v8di(_mm512_maskz_alignr_epi64(0x33, + ((__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}), + ((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 11), + 4, 5, 0, 0, 8, 10, 0, 0)); + __m512d test_mm512_fmadd_round_pd(__m512d __A, __m512d __B, __m512d __C) { // CHECK-LABEL: test_mm512_fmadd_round_pd // CHECK: @llvm.x86.avx512.vfmadd.pd.512 diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 69adc75c80f1c..4b2ba3341af29 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -10642,6 +10642,53 @@ __m256i test_mm256_maskz_alignr_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_maskz_alignr_epi64(__U, __A, __B, 1); } +TEST_CONSTEXPR(match_v4si(_mm_alignr_epi32(((__m128i)(__v4si){100, 200, 300, 400}), + ((__m128i)(__v4si){10, 20, 30, 40}), 1), + 20, 30, 40, 100)); +TEST_CONSTEXPR(match_v4si(_mm_mask_alignr_epi32(((__m128i)(__v4si){1000, 2000, 3000, 4000}), 0x5, + ((__m128i)(__v4si){100, 200, 300, 400}), + ((__m128i)(__v4si){10, 20, 30, 40}), 1), + 20, 2000, 40, 4000)); +TEST_CONSTEXPR(match_v4si(_mm_maskz_alignr_epi32(0x3, + ((__m128i)(__v4si){100, 200, 300, 400}), + ((__m128i)(__v4si){10, 20, 30, 40}), 1), + 20, 30, 0, 0)); + +TEST_CONSTEXPR(match_v8si(_mm256_alignr_epi32(((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}), + ((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 3), + 4, 5, 6, 7, 8, 100, 200, 300)); +TEST_CONSTEXPR(match_v8si(_mm256_mask_alignr_epi32(((__m256i)(__v8si){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000}), + 0xA5, + ((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}), + ((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 3), + 4, 2000, 6, 4000, 5000, 100, 7000, 300)); +TEST_CONSTEXPR(match_v8si(_mm256_maskz_alignr_epi32(0x33, + ((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}), + ((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 3), + 4, 5, 0, 0, 8, 100, 0, 0)); + +TEST_CONSTEXPR(match_v2di(_mm_alignr_epi64(((__m128i)(__v2di){10, 11}), ((__m128i)(__v2di){1, 2}), 1), 2, 10)); +TEST_CONSTEXPR(match_v2di(_mm_mask_alignr_epi64(((__m128i)(__v2di){1000, 2000}), 0x1, + ((__m128i)(__v2di){10, 11}), + ((__m128i)(__v2di){1, 2}), 1), + 2, 2000)); +TEST_CONSTEXPR(match_v2di(_mm_maskz_alignr_epi64(0x2, + ((__m128i)(__v2di){10, 11}), + ((__m128i)(__v2di){1, 2}), 1), + 0, 10)); + +TEST_CONSTEXPR(match_v4di(_mm256_alignr_epi64(((__m256i)(__v4di){10, 11, 12, 13}), + ((__m256i)(__v4di){1, 2, 3, 4}), 2), + 3, 4, 10, 11)); +TEST_CONSTEXPR(match_v4di(_mm256_mask_alignr_epi64(((__m256i)(__v4di){1000, 2000, 3000, 4000}), 0x5, + ((__m256i)(__v4di){10, 11, 12, 13}), + ((__m256i)(__v4di){1, 2, 3, 4}), 2), + 3, 2000, 10, 4000)); +TEST_CONSTEXPR(match_v4di(_mm256_maskz_alignr_epi64(0xA, + ((__m256i)(__v4di){10, 11, 12, 13}), + ((__m256i)(__v4di){1, 2, 3, 4}), 2), + 0, 4, 0, 11)); + __m128 test_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) { // CHECK-LABEL: test_mm_mask_movehdup_ps // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 1, i32 1, i32 3, i32 3> _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
