https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/168206
>From 5e69309e94d0146759018f44e254acf9aff5b572 Mon Sep 17 00:00:00 2001 From: 0xzre <[email protected]> Date: Sat, 15 Nov 2025 15:18:13 +0700 Subject: [PATCH 1/7] [clang][X86] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow VALIGND/Q element shift intrinsics to be used in constexpr --- clang/include/clang/Basic/BuiltinsX86.td | 10 ++-- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 21 ++++++++ clang/lib/AST/ExprConstant.cpp | 24 +++++++++ .../test/AST/ByteCode/x86-valign-builtins.cpp | 53 +++++++++++++++++++ 4 files changed, 103 insertions(+), 5 deletions(-) create mode 100644 clang/test/AST/ByteCode/x86-valign-builtins.cpp diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 69d18679fd6ec..aad0361ba9a8b 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -1072,24 +1072,24 @@ let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<512>] in { def storeaps512_mask : X86Builtin<"void(_Vector<16, float *>, _Vector<16, float>, unsigned short)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def alignq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">; def alignd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def alignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def alignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def alignq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def alignq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index cee3c1b8cf8f3..7c08475065d9d 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4774,6 +4774,27 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return std::pair<unsigned, int>{VecIdx, ElemIdx}; }); + case X86::BI__builtin_ia32_alignd128: + case X86::BI__builtin_ia32_alignd256: + case X86::BI__builtin_ia32_alignd512: + case X86::BI__builtin_ia32_alignq128: + case X86::BI__builtin_ia32_alignq256: + case X86::BI__builtin_ia32_alignq512: { + const unsigned NumElts = + Call->getType()->castAs<VectorType>()->getNumElements(); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [NumElts](unsigned DstIdx, unsigned Shift) { + unsigned Imm = Shift & 0xFF; + unsigned EffectiveShift = Imm & (NumElts - 1); + unsigned SourcePos = DstIdx + EffectiveShift; + unsigned VecIdx = SourcePos < NumElts ? 1u : 0u; + unsigned ElemIdx = + SourcePos < NumElts ? SourcePos : SourcePos - NumElts; + return std::pair<unsigned, int>{VecIdx, + static_cast<int>(ElemIdx)}; + }); + } + default: S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 29357eec2eeb6..b3f17da8e1158 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13551,6 +13551,30 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return false; return Success(R, E); } + case X86::BI__builtin_ia32_alignd128: + case X86::BI__builtin_ia32_alignd256: + case X86::BI__builtin_ia32_alignd512: + case X86::BI__builtin_ia32_alignq128: + case X86::BI__builtin_ia32_alignq256: + case X86::BI__builtin_ia32_alignq512: { + APValue R; + const unsigned NumElts = + E->getType()->castAs<VectorType>()->getNumElements(); + if (!evalShuffleGeneric( + Info, E, R, [NumElts](unsigned DstIdx, unsigned Shift) { + unsigned Imm = Shift & 0xFF; + unsigned EffectiveShift = Imm & (NumElts - 1); + unsigned SourcePos = DstIdx + EffectiveShift; + unsigned VecIdx = SourcePos < NumElts ? 1 : 0; + unsigned ElemIdx = + SourcePos < NumElts ? SourcePos : SourcePos - NumElts; + + return std::pair<unsigned, int>{VecIdx, + static_cast<int>(ElemIdx)}; + })) + return false; + return Success(R, E); + } case X86::BI__builtin_ia32_permvarsi256: case X86::BI__builtin_ia32_permvarsf256: case X86::BI__builtin_ia32_permvardf512: diff --git a/clang/test/AST/ByteCode/x86-valign-builtins.cpp b/clang/test/AST/ByteCode/x86-valign-builtins.cpp new file mode 100644 index 0000000000000..a54e72fab90ce --- /dev/null +++ b/clang/test/AST/ByteCode/x86-valign-builtins.cpp @@ -0,0 +1,53 @@ +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512dq -verify=expected -fexperimental-new-constant-interpreter %s +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512dq -verify=ref %s + +// expected-no-diagnostics +// ref-no-diagnostics + +#define __MM_MALLOC_H +#include <immintrin.h> + +using v4si = int __attribute__((vector_size(16))); +using v8si = int __attribute__((vector_size(32))); +using v16si = int __attribute__((vector_size(64))); +using v4di = long long __attribute__((vector_size(32))); + +constexpr v4si test_alignr_epi32_128() { + v4si A = {100, 200, 300, 400}; + v4si B = {10, 20, 30, 40}; + return (v4si)_mm_alignr_epi32((__m128i)A, (__m128i)B, 1); +} + +constexpr v8si test_alignr_epi32_256() { + v8si A = {100, 200, 300, 400, 500, 600, 700, 800}; + v8si B = {1, 2, 3, 4, 5, 6, 7, 8}; + return (v8si)_mm256_alignr_epi32((__m256i)A, (__m256i)B, 3); +} + +constexpr v16si test_alignr_epi32_512_wrap() { + v16si A = {100, 200, 300, 400, 500, 600, 700, 800, + 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}; + v16si B = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + return (v16si)_mm512_alignr_epi32((__m512i)A, (__m512i)B, 19); +} + +constexpr v4di test_alignr_epi64_256() { + v4di A = {10, 11, 12, 13}; + v4di B = {1, 2, 3, 4}; + return (v4di)_mm256_alignr_epi64((__m256i)A, (__m256i)B, 2); +} + +constexpr v4si R128 = test_alignr_epi32_128(); +static_assert(R128[0] == 20 && R128[1] == 30 && R128[2] == 40 && R128[3] == 100); + +constexpr v8si R256 = test_alignr_epi32_256(); +static_assert(R256[0] == 4 && R256[1] == 5 && R256[2] == 6 && R256[3] == 7); +static_assert(R256[4] == 8 && R256[5] == 100 && R256[6] == 200 && R256[7] == 300); + +constexpr v16si R512 = test_alignr_epi32_512_wrap(); +static_assert(R512[0] == 3 && R512[1] == 4 && R512[2] == 5 && R512[3] == 6); +static_assert(R512[8] == 11 && R512[9] == 12 && R512[10] == 13 && R512[11] == 14); +static_assert(R512[12] == 15 && R512[13] == 100 && R512[14] == 200 && R512[15] == 300); + +constexpr v4di R64 = test_alignr_epi64_256(); +static_assert(R64[0] == 3 && R64[1] == 4 && R64[2] == 10 && R64[3] == 11); >From 3fe153eb13b5303154a8b2bcfe477742a69c902a Mon Sep 17 00:00:00 2001 From: 0xzre <[email protected]> Date: Sun, 16 Nov 2025 15:48:24 +0700 Subject: [PATCH 2/7] fix vars & clang format --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 7c08475065d9d..b4ccec866f3fb 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4780,18 +4780,17 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_alignq128: case X86::BI__builtin_ia32_alignq256: case X86::BI__builtin_ia32_alignq512: { - const unsigned NumElts = + unsigned NumElems = Call->getType()->castAs<VectorType>()->getNumElements(); return interp__builtin_ia32_shuffle_generic( - S, OpPC, Call, [NumElts](unsigned DstIdx, unsigned Shift) { + S, OpPC, Call, [NumElems](unsigned DstIdx, unsigned Shift) { unsigned Imm = Shift & 0xFF; - unsigned EffectiveShift = Imm & (NumElts - 1); + unsigned EffectiveShift = Imm & (NumElems - 1); unsigned SourcePos = DstIdx + EffectiveShift; - unsigned VecIdx = SourcePos < NumElts ? 1u : 0u; + unsigned VecIdx = SourcePos < NumElems ? 1u : 0u; unsigned ElemIdx = - SourcePos < NumElts ? SourcePos : SourcePos - NumElts; - return std::pair<unsigned, int>{VecIdx, - static_cast<int>(ElemIdx)}; + SourcePos < NumElems ? SourcePos : SourcePos - NumElems; + return std::pair<unsigned, int>{VecIdx, static_cast<int>(ElemIdx)}; }); } >From d17b6de83c15099a362ce0d78da8dee594fff6e7 Mon Sep 17 00:00:00 2001 From: 0xzre <[email protected]> Date: Sun, 16 Nov 2025 15:49:43 +0700 Subject: [PATCH 3/7] add test using TEST_CONSTEXPR on avx512*builtins.cpp --- clang/test/CodeGen/X86/avx512f-builtins.c | 34 ++++++++++++++++ clang/test/CodeGen/X86/avx512vl-builtins.c | 47 ++++++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 71e700af0069e..b96fef14dd86f 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -519,6 +519,40 @@ __m512i test_mm512_maskz_alignr_epi64( __mmask8 u, __m512i a, __m512i b) return _mm512_maskz_alignr_epi64(u, a, b, 2); } +TEST_CONSTEXPR(match_v16si(_mm512_alignr_epi32(((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, + 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}), + ((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15}), 19), + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 100, 200, 300)); +TEST_CONSTEXPR(match_v16si(_mm512_mask_alignr_epi32(((__m512i)(__v16si){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, + 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000}), + 0xA5A5, + ((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, + 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}), + ((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15}), 19), + 3, 2000, 5, 4000, 5000, 8, 7000, 10, + 11, 10000, 13, 12000, 13000, 100, 15000, 300)); +TEST_CONSTEXPR(match_v16si(_mm512_maskz_alignr_epi32(0x0F0F, + ((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, + 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}), + ((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15}), 19), + 3, 4, 5, 6, 0, 0, 0, 0, 11, 12, 13, 14, 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v8di(_mm512_alignr_epi64(((__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}), + ((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 11), + 4, 5, 6, 7, 8, 10, 11, 12)); +TEST_CONSTEXPR(match_v8di(_mm512_mask_alignr_epi64(((__m512i)(__v8di){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000}), + 0xA5, + ((__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}), + ((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 11), + 4, 2000, 6, 4000, 5000, 10, 7000, 12)); +TEST_CONSTEXPR(match_v8di(_mm512_maskz_alignr_epi64(0x33, + ((__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}), + ((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 11), + 4, 5, 0, 0, 8, 10, 0, 0)); + __m512d test_mm512_fmadd_round_pd(__m512d __A, __m512d __B, __m512d __C) { // CHECK-LABEL: test_mm512_fmadd_round_pd // CHECK: @llvm.x86.avx512.vfmadd.pd.512 diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index a7eee79c97539..a1730b55a5c42 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -10518,6 +10518,53 @@ __m256i test_mm256_maskz_alignr_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_maskz_alignr_epi64(__U, __A, __B, 1); } +TEST_CONSTEXPR(match_v4si(_mm_alignr_epi32(((__m128i)(__v4si){100, 200, 300, 400}), + ((__m128i)(__v4si){10, 20, 30, 40}), 1), + 20, 30, 40, 100)); +TEST_CONSTEXPR(match_v4si(_mm_mask_alignr_epi32(((__m128i)(__v4si){1000, 2000, 3000, 4000}), 0x5, + ((__m128i)(__v4si){100, 200, 300, 400}), + ((__m128i)(__v4si){10, 20, 30, 40}), 1), + 20, 2000, 40, 4000)); +TEST_CONSTEXPR(match_v4si(_mm_maskz_alignr_epi32(0x3, + ((__m128i)(__v4si){100, 200, 300, 400}), + ((__m128i)(__v4si){10, 20, 30, 40}), 1), + 20, 30, 0, 0)); + +TEST_CONSTEXPR(match_v8si(_mm256_alignr_epi32(((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}), + ((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 3), + 4, 5, 6, 7, 8, 100, 200, 300)); +TEST_CONSTEXPR(match_v8si(_mm256_mask_alignr_epi32(((__m256i)(__v8si){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000}), + 0xA5, + ((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}), + ((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 3), + 4, 2000, 6, 4000, 5000, 100, 7000, 300)); +TEST_CONSTEXPR(match_v8si(_mm256_maskz_alignr_epi32(0x33, + ((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}), + ((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 3), + 4, 5, 0, 0, 8, 100, 0, 0)); + +TEST_CONSTEXPR(match_v2di(_mm_alignr_epi64(((__m128i)(__v2di){10, 11}), ((__m128i)(__v2di){1, 2}), 1), 2, 10)); +TEST_CONSTEXPR(match_v2di(_mm_mask_alignr_epi64(((__m128i)(__v2di){1000, 2000}), 0x1, + ((__m128i)(__v2di){10, 11}), + ((__m128i)(__v2di){1, 2}), 1), + 2, 2000)); +TEST_CONSTEXPR(match_v2di(_mm_maskz_alignr_epi64(0x2, + ((__m128i)(__v2di){10, 11}), + ((__m128i)(__v2di){1, 2}), 1), + 0, 10)); + +TEST_CONSTEXPR(match_v4di(_mm256_alignr_epi64(((__m256i)(__v4di){10, 11, 12, 13}), + ((__m256i)(__v4di){1, 2, 3, 4}), 2), + 3, 4, 10, 11)); +TEST_CONSTEXPR(match_v4di(_mm256_mask_alignr_epi64(((__m256i)(__v4di){1000, 2000, 3000, 4000}), 0x5, + ((__m256i)(__v4di){10, 11, 12, 13}), + ((__m256i)(__v4di){1, 2, 3, 4}), 2), + 3, 2000, 10, 4000)); +TEST_CONSTEXPR(match_v4di(_mm256_maskz_alignr_epi64(0xA, + ((__m256i)(__v4di){10, 11, 12, 13}), + ((__m256i)(__v4di){1, 2, 3, 4}), 2), + 0, 4, 0, 11)); + __m128 test_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) { // CHECK-LABEL: test_mm_mask_movehdup_ps // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 1, i32 1, i32 3, i32 3> >From 85ad38f40aac7b268a08289a100fc29e147bdfdf Mon Sep 17 00:00:00 2001 From: 0xzre <[email protected]> Date: Sun, 16 Nov 2025 15:56:02 +0700 Subject: [PATCH 4/7] remove uneeded bytecode test --- .../test/AST/ByteCode/x86-valign-builtins.cpp | 53 ------------------- 1 file changed, 53 deletions(-) delete mode 100644 clang/test/AST/ByteCode/x86-valign-builtins.cpp diff --git a/clang/test/AST/ByteCode/x86-valign-builtins.cpp b/clang/test/AST/ByteCode/x86-valign-builtins.cpp deleted file mode 100644 index a54e72fab90ce..0000000000000 --- a/clang/test/AST/ByteCode/x86-valign-builtins.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512dq -verify=expected -fexperimental-new-constant-interpreter %s -// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512dq -verify=ref %s - -// expected-no-diagnostics -// ref-no-diagnostics - -#define __MM_MALLOC_H -#include <immintrin.h> - -using v4si = int __attribute__((vector_size(16))); -using v8si = int __attribute__((vector_size(32))); -using v16si = int __attribute__((vector_size(64))); -using v4di = long long __attribute__((vector_size(32))); - -constexpr v4si test_alignr_epi32_128() { - v4si A = {100, 200, 300, 400}; - v4si B = {10, 20, 30, 40}; - return (v4si)_mm_alignr_epi32((__m128i)A, (__m128i)B, 1); -} - -constexpr v8si test_alignr_epi32_256() { - v8si A = {100, 200, 300, 400, 500, 600, 700, 800}; - v8si B = {1, 2, 3, 4, 5, 6, 7, 8}; - return (v8si)_mm256_alignr_epi32((__m256i)A, (__m256i)B, 3); -} - -constexpr v16si test_alignr_epi32_512_wrap() { - v16si A = {100, 200, 300, 400, 500, 600, 700, 800, - 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}; - v16si B = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; - return (v16si)_mm512_alignr_epi32((__m512i)A, (__m512i)B, 19); -} - -constexpr v4di test_alignr_epi64_256() { - v4di A = {10, 11, 12, 13}; - v4di B = {1, 2, 3, 4}; - return (v4di)_mm256_alignr_epi64((__m256i)A, (__m256i)B, 2); -} - -constexpr v4si R128 = test_alignr_epi32_128(); -static_assert(R128[0] == 20 && R128[1] == 30 && R128[2] == 40 && R128[3] == 100); - -constexpr v8si R256 = test_alignr_epi32_256(); -static_assert(R256[0] == 4 && R256[1] == 5 && R256[2] == 6 && R256[3] == 7); -static_assert(R256[4] == 8 && R256[5] == 100 && R256[6] == 200 && R256[7] == 300); - -constexpr v16si R512 = test_alignr_epi32_512_wrap(); -static_assert(R512[0] == 3 && R512[1] == 4 && R512[2] == 5 && R512[3] == 6); -static_assert(R512[8] == 11 && R512[9] == 12 && R512[10] == 13 && R512[11] == 14); -static_assert(R512[12] == 15 && R512[13] == 100 && R512[14] == 200 && R512[15] == 300); - -constexpr v4di R64 = test_alignr_epi64_256(); -static_assert(R64[0] == 3 && R64[1] == 4 && R64[2] == 10 && R64[3] == 11); >From 24504472d94877d849269cd61bdcb4d32b1b5260 Mon Sep 17 00:00:00 2001 From: 0xzre <[email protected]> Date: Wed, 19 Nov 2025 09:46:49 +0700 Subject: [PATCH 5/7] high perf circular queues & fix format --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 6 ++---- clang/lib/AST/ExprConstant.cpp | 12 +++++------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b4ccec866f3fb..38f44b63974a3 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4780,16 +4780,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_alignq128: case X86::BI__builtin_ia32_alignq256: case X86::BI__builtin_ia32_alignq512: { - unsigned NumElems = - Call->getType()->castAs<VectorType>()->getNumElements(); + unsigned NumElems = Call->getType()->castAs<VectorType>()->getNumElements(); return interp__builtin_ia32_shuffle_generic( S, OpPC, Call, [NumElems](unsigned DstIdx, unsigned Shift) { unsigned Imm = Shift & 0xFF; unsigned EffectiveShift = Imm & (NumElems - 1); unsigned SourcePos = DstIdx + EffectiveShift; unsigned VecIdx = SourcePos < NumElems ? 1u : 0u; - unsigned ElemIdx = - SourcePos < NumElems ? SourcePos : SourcePos - NumElems; + unsigned ElemIdx = SourcePos & (NumElems - 1); return std::pair<unsigned, int>{VecIdx, static_cast<int>(ElemIdx)}; }); } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index b3f17da8e1158..eb16aa76a96ba 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13558,16 +13558,14 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_alignq256: case X86::BI__builtin_ia32_alignq512: { APValue R; - const unsigned NumElts = - E->getType()->castAs<VectorType>()->getNumElements(); + const unsigned NumElems = E->getType()->castAs<VectorType>()->getNumElements(); if (!evalShuffleGeneric( - Info, E, R, [NumElts](unsigned DstIdx, unsigned Shift) { + Info, E, R, [NumElems](unsigned DstIdx, unsigned Shift) { unsigned Imm = Shift & 0xFF; - unsigned EffectiveShift = Imm & (NumElts - 1); + unsigned EffectiveShift = Imm & (NumElems - 1); unsigned SourcePos = DstIdx + EffectiveShift; - unsigned VecIdx = SourcePos < NumElts ? 1 : 0; - unsigned ElemIdx = - SourcePos < NumElts ? SourcePos : SourcePos - NumElts; + unsigned VecIdx = SourcePos < NumElems ? 1 : 0; + unsigned ElemIdx = SourcePos & (NumElems - 1); return std::pair<unsigned, int>{VecIdx, static_cast<int>(ElemIdx)}; >From ebc0a10aca0943133907d81f49407e91258c285d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim <[email protected]> Date: Wed, 19 Nov 2025 10:29:20 +0000 Subject: [PATCH 6/7] Apply suggestion from @RKSimon Unnecessary const (and hopefully fix clang-format warning) --- clang/lib/AST/ExprConstant.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index eb16aa76a96ba..0a4599688c872 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13558,7 +13558,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_alignq256: case X86::BI__builtin_ia32_alignq512: { APValue R; - const unsigned NumElems = E->getType()->castAs<VectorType>()->getNumElements(); + unsigned NumElems = E->getType()->castAs<VectorType>()->getNumElements(); if (!evalShuffleGeneric( Info, E, R, [NumElems](unsigned DstIdx, unsigned Shift) { unsigned Imm = Shift & 0xFF; >From b77bea6d181cc5fbbb6cf50f4547b68a900b5ce4 Mon Sep 17 00:00:00 2001 From: 0xzre <[email protected]> Date: Sat, 22 Nov 2025 08:02:05 +0700 Subject: [PATCH 7/7] fix clang format --- clang/lib/AST/ExprConstant.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 0a4599688c872..98e3fac1e3eb9 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13559,17 +13559,17 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_alignq512: { APValue R; unsigned NumElems = E->getType()->castAs<VectorType>()->getNumElements(); - if (!evalShuffleGeneric( - Info, E, R, [NumElems](unsigned DstIdx, unsigned Shift) { - unsigned Imm = Shift & 0xFF; - unsigned EffectiveShift = Imm & (NumElems - 1); - unsigned SourcePos = DstIdx + EffectiveShift; - unsigned VecIdx = SourcePos < NumElems ? 1 : 0; - unsigned ElemIdx = SourcePos & (NumElems - 1); - - return std::pair<unsigned, int>{VecIdx, - static_cast<int>(ElemIdx)}; - })) + if (!evalShuffleGeneric(Info, E, R, + [NumElems](unsigned DstIdx, unsigned Shift) { + unsigned Imm = Shift & 0xFF; + unsigned EffectiveShift = Imm & (NumElems - 1); + unsigned SourcePos = DstIdx + EffectiveShift; + unsigned VecIdx = SourcePos < NumElems ? 1 : 0; + unsigned ElemIdx = SourcePos & (NumElems - 1); + + return std::pair<unsigned, int>{ + VecIdx, static_cast<int>(ElemIdx)}; + })) return false; return Success(R, E); } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
