llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: SeongJaePark (SeongjaeP) <details> <summary>Changes</summary> **This PR supersedes and replaces PR #<!-- -->158853** The original branch diverged too far from the main branch, resulting in significant merge conflicts that were difficult to resolve cleanly. To provide a clean and reviewable history, this new PR was created by cherry-picking the necessary commits onto a fresh branch based on the latest `main`. --- *(Original Description)* This patch enables the use of AVX/AVX512 subvector extraction intrinsics within `constexpr` functions. This is achieved by implementing the evaluation logic for these intrinsics in `VectorExprEvaluator::VisitCallExpr` and `InterpretBuiltin`. The original discussion and review comments can be found in the previous pull request for context: #<!-- -->158853 --- Patch is 40.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162836.diff 13 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsX86.td (+7-7) - (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+105) - (modified) clang/lib/AST/ExprConstant.cpp (+75) - (modified) clang/lib/Headers/avx512dqintrin.h (+4-4) - (modified) clang/lib/Headers/avx512fintrin.h (+4-4) - (modified) clang/lib/Headers/avx512vldqintrin.h (+2-2) - (modified) clang/lib/Headers/avx512vlintrin.h (+2-2) - (modified) clang/test/CodeGen/X86/avx-builtins.c (+7-1) - (modified) clang/test/CodeGen/X86/avx2-builtins.c (+1) - (modified) clang/test/CodeGen/X86/avx512dq-builtins.c (+12) - (modified) clang/test/CodeGen/X86/avx512f-builtins.c (+12-1) - (modified) clang/test/CodeGen/X86/avx512vl-builtins.c (+6) - (modified) clang/test/CodeGen/X86/avx512vldq-builtins.c (+6) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 217589d7add1d..b3b4d5e076fd8 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -471,7 +471,7 @@ let Features = "avx512f,vpclmulqdq", Attributes = [NoThrow, Const, RequiredVecto def pclmulqdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant char)">; } -let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def vpermilvarpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>)">; def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">; def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">; @@ -576,7 +576,7 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">; } -let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">; def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">; def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; @@ -1065,7 +1065,7 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256 def alignq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def extractf64x4_mask : X86Builtin<"_Vector<4, double>(_Vector<8, double>, _Constant int, _Vector<4, double>, unsigned char)">; def extractf32x4_mask : X86Builtin<"_Vector<4, float>(_Vector<16, float>, _Constant int, _Vector<4, float>, unsigned char)">; } @@ -2944,24 +2944,24 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in { def pmovqw256mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<4, long long int>, unsigned char)">; } -let Features = "avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def extractf32x8_mask : X86Builtin<"_Vector<8, float>(_Vector<16, float>, _Constant int, _Vector<8, float>, unsigned char)">; def extractf64x2_512_mask : X86Builtin<"_Vector<2, double>(_Vector<8, double>, _Constant int, _Vector<2, double>, unsigned char)">; def extracti32x8_mask : X86Builtin<"_Vector<8, int>(_Vector<16, int>, _Constant int, _Vector<8, int>, unsigned char)">; def extracti64x2_512_mask : X86Builtin<"_Vector<2, long long int>(_Vector<8, long long int>, _Constant int, _Vector<2, long long int>, unsigned char)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def extracti32x4_mask : X86Builtin<"_Vector<4, int>(_Vector<16, int>, _Constant int, _Vector<4, int>, unsigned char)">; def extracti64x4_mask : X86Builtin<"_Vector<4, long long int>(_Vector<8, long long int>, _Constant int, _Vector<4, long long int>, unsigned char)">; } -let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def extractf64x2_256_mask : X86Builtin<"_Vector<2, double>(_Vector<4, double>, _Constant int, _Vector<2, double>, unsigned char)">; def extracti64x2_256_mask : X86Builtin<"_Vector<2, long long int>(_Vector<4, long long int>, _Constant int, _Vector<2, long long int>, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def extractf32x4_256_mask : X86Builtin<"_Vector<4, float>(_Vector<8, float>, _Constant int, _Vector<4, float>, unsigned char)">; def extracti32x4_256_mask : X86Builtin<"_Vector<4, int>(_Vector<8, int>, _Constant int, _Vector<4, int>, unsigned char)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 922d67940e22f..d844f93d1b983 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2819,6 +2819,92 @@ static bool interp__builtin_elementwise_triop( return true; } +static bool interp__builtin_x86_extract_vector(InterpState &S, CodePtr OpPC, + const CallExpr *Call, + unsigned ID) { + assert(Call->getNumArgs() == 2); + + APSInt ImmAPS = popToAPSInt(S, Call->getArg(1)); + uint64_t Index = ImmAPS.getZExtValue(); + + const Pointer &Src = S.Stk.pop<Pointer>(); + if (!Src.getFieldDesc()->isPrimitiveArray()) + return false; + + const Pointer &Dst = S.Stk.peek<Pointer>(); + if (!Dst.getFieldDesc()->isPrimitiveArray()) + return false; + + unsigned SrcElems = Src.getNumElems(); + unsigned DstElems = Dst.getNumElems(); + + if (SrcElems == 0 || DstElems == 0 || (SrcElems % DstElems) != 0) + return false; + + unsigned NumLanes = SrcElems / DstElems; + unsigned Lane = static_cast<unsigned>(Index % NumLanes); + unsigned ExtractPos = Lane * DstElems; + + PrimType ElemT = Src.getFieldDesc()->getPrimType(); + if (ElemT != Dst.getFieldDesc()->getPrimType()) + return false; + + TYPE_SWITCH(ElemT, { + for (unsigned I = 0; I != DstElems; ++I) { + Dst.elem<T>(I) = Src.elem<T>(ExtractPos + I); + } + }); + + Dst.initializeAllElements(); + return true; +} + +static bool interp__builtin_x86_extract_vector_masked(InterpState &S, CodePtr OpPC, + const CallExpr *Call, + unsigned ID) { + assert(Call->getNumArgs() == 4); + + APSInt MaskAPS = popToAPSInt(S, Call->getArg(3)); + const Pointer &Merge = S.Stk.pop<Pointer>(); + APSInt ImmAPS = popToAPSInt(S, Call->getArg(1)); + const Pointer &Src = S.Stk.pop<Pointer>(); + + if (!Src.getFieldDesc()->isPrimitiveArray() || !Merge.getFieldDesc()->isPrimitiveArray()) + return false; + + const Pointer &Dst = S.Stk.peek<Pointer>(); + if (!Dst.getFieldDesc()->isPrimitiveArray()) + return false; + + unsigned SrcElems = Src.getNumElems(); + unsigned DstElems = Dst.getNumElems(); + if (!SrcElems || !DstElems || (SrcElems % DstElems) != 0) + return false; + + PrimType ElemT = Src.getFieldDesc()->getPrimType(); + if (ElemT != Dst.getFieldDesc()->getPrimType() || + ElemT != Merge.getFieldDesc()->getPrimType()) + return false; + + unsigned NumLanes = SrcElems / DstElems; + unsigned Lane = static_cast<unsigned>(ImmAPS.getZExtValue() % NumLanes); + unsigned Base = Lane * DstElems; + + uint64_t Mask = MaskAPS.getZExtValue(); + + TYPE_SWITCH(ElemT, { + for (unsigned I = 0; I != DstElems; ++I) { + if ((Mask >> I) & 1) + Dst.elem<T>(I) = Src.elem<T>(Base + I); + else + Dst.elem<T>(I) = Merge.elem<T>(I); + } + }); + + Dst.initializeAllElements(); + return true; +} + static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID) { @@ -3451,6 +3537,25 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS); }); + case X86::BI__builtin_ia32_extract128i256: + case X86::BI__builtin_ia32_vextractf128_pd256: + case X86::BI__builtin_ia32_vextractf128_ps256: + case X86::BI__builtin_ia32_vextractf128_si256: + return interp__builtin_x86_extract_vector(S, OpPC, Call, BuiltinID); + + case X86::BI__builtin_ia32_extractf32x4_256_mask: + case X86::BI__builtin_ia32_extractf32x4_mask: + case X86::BI__builtin_ia32_extractf32x8_mask: + case X86::BI__builtin_ia32_extractf64x2_256_mask: + case X86::BI__builtin_ia32_extractf64x2_512_mask: + case X86::BI__builtin_ia32_extractf64x4_mask: + case X86::BI__builtin_ia32_extracti32x4_256_mask: + case X86::BI__builtin_ia32_extracti32x4_mask: + case X86::BI__builtin_ia32_extracti32x8_mask: + case X86::BI__builtin_ia32_extracti64x2_256_mask: + case X86::BI__builtin_ia32_extracti64x2_512_mask: + case X86::BI__builtin_ia32_extracti64x4_mask: + return interp__builtin_x86_extract_vector_masked(S, OpPC, Call, BuiltinID); case clang::X86::BI__builtin_ia32_pavgb128: case clang::X86::BI__builtin_ia32_pavgw128: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 35a866ea5010f..5ac7837aa0fe3 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11769,6 +11769,81 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return EvaluateBinOpExpr([](const APSInt &LHS, const APSInt &RHS) { return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS); }); + + case X86::BI__builtin_ia32_extract128i256: + case X86::BI__builtin_ia32_vextractf128_pd256: + case X86::BI__builtin_ia32_vextractf128_ps256: + case X86::BI__builtin_ia32_vextractf128_si256: { + APValue SourceVec, SourceImm; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) || + !EvaluateAsRValue(Info, E->getArg(1), SourceImm)) + return false; + + if (!SourceVec.isVector()) + return false; + + const auto *RetVT = E->getType()->castAs<VectorType>(); + if (!RetVT) return false; + + unsigned RetLen = RetVT->getNumElements(); + unsigned SrcLen = SourceVec.getVectorLength(); + if (SrcLen != RetLen * 2) + return false; + + unsigned Idx = SourceImm.getInt().getZExtValue() & 1; + + SmallVector<APValue, 32> ResultElements; + ResultElements.reserve(RetLen); + + for (unsigned I = 0; I < RetLen; I++) + ResultElements.push_back(SourceVec.getVectorElt(Idx * RetLen + I)); + + return Success(APValue(ResultElements.data(), RetLen), E); + } + + case X86::BI__builtin_ia32_extracti32x4_256_mask: + case X86::BI__builtin_ia32_extractf32x4_256_mask: + case X86::BI__builtin_ia32_extracti32x4_mask: + case X86::BI__builtin_ia32_extractf32x4_mask: + case X86::BI__builtin_ia32_extracti32x8_mask: + case X86::BI__builtin_ia32_extractf32x8_mask: + case X86::BI__builtin_ia32_extracti64x2_256_mask: + case X86::BI__builtin_ia32_extractf64x2_256_mask: + case X86::BI__builtin_ia32_extracti64x2_512_mask: + case X86::BI__builtin_ia32_extractf64x2_512_mask: + case X86::BI__builtin_ia32_extracti64x4_mask: + case X86::BI__builtin_ia32_extractf64x4_mask:{ + APValue SourceVec, MergeVec; + APSInt Imm, MaskImm; + + if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) || + !EvaluateInteger(E->getArg(1), Imm, Info) || + !EvaluateAsRValue(Info, E->getArg(2), MergeVec) || + !EvaluateInteger(E->getArg(3), MaskImm, Info)) + return false; + + const auto *RetVT = E->getType()->castAs<VectorType>(); + unsigned RetLen = RetVT->getNumElements(); + + if (!SourceVec.isVector() || !MergeVec.isVector()) return false; + unsigned SrcLen = SourceVec.getVectorLength(); + if (!SrcLen || !RetLen || (SrcLen % RetLen) != 0) return false; + + unsigned Lanes = SrcLen / RetLen; + unsigned Lane = static_cast<unsigned>(Imm.getZExtValue() % Lanes); + unsigned Base = Lane * RetLen; + uint64_t Mask = MaskImm.getZExtValue(); + + SmallVector<APValue, 32> ResultElements; + ResultElements.reserve(RetLen); + for (unsigned I = 0; I < RetLen; ++I) { + if ((Mask >> I) & 1) + ResultElements.push_back(SourceVec.getVectorElt(Base + I)); + else + ResultElements.push_back(MergeVec.getVectorElt(I)); + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } case clang::X86::BI__builtin_ia32_pavgb128: case clang::X86::BI__builtin_ia32_pavgw128: diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index fb65bf933b8ad..953285d6ab414 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -1214,7 +1214,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) #define _mm512_extractf32x8_ps(A, imm) \ ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ - (__v8sf)_mm256_undefined_ps(), \ + (__v8sf)_mm_setzero_pd(), \ (__mmask8)-1)) #define _mm512_mask_extractf32x8_ps(W, U, A, imm) \ @@ -1230,7 +1230,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) #define _mm512_extractf64x2_pd(A, imm) \ ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ (int)(imm), \ - (__v2df)_mm_undefined_pd(), \ + (__v2df)_mm_setzero_pd(), \ (__mmask8)-1)) #define _mm512_mask_extractf64x2_pd(W, U, A, imm) \ @@ -1247,7 +1247,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) #define _mm512_extracti32x8_epi32(A, imm) \ ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ - (__v8si)_mm256_undefined_si256(), \ + (__v8si)_mm256_setzero_si256(), \ (__mmask8)-1)) #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \ @@ -1263,7 +1263,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) #define _mm512_extracti64x2_epi64(A, imm) \ ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ (int)(imm), \ - (__v2di)_mm_undefined_si128(), \ + (__v2di)_mm_setzero_si128(), \ (__mmask8)-1)) #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \ diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 80e58425cdd71..2768a5bae887d 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -3166,7 +3166,7 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, #define _mm512_extractf64x4_pd(A, I) \ ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \ - (__v4df)_mm256_undefined_pd(), \ + (__v4df)_mm256_setzero_pd(), \ (__mmask8)-1)) #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \ @@ -3181,7 +3181,7 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, #define _mm512_extractf32x4_ps(A, I) \ ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \ - (__v4sf)_mm_undefined_ps(), \ + (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1)) #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \ @@ -7107,7 +7107,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) #define _mm512_extracti32x4_epi32(A, imm) \ ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ - (__v4si)_mm_undefined_si128(), \ + (__v4si)_mm_setzero_si128(), \ (__mmask8)-1)) #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \ @@ -7122,7 +7122,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) #define _mm512_extracti64x4_epi64(A, imm) \ ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ - (__v4di)_mm256_undefined_si256(), \ + (__v4di)_mm256_setzero_si256(), \ (__mmask8)-1)) #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \ diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h index 68bd52e43981a..2d3c4b551e3b0 100644 --- a/clang/lib/Headers/avx512vldqintrin.h +++ b/clang/lib/Headers/avx512vldqintrin.h @@ -1075,7 +1075,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) #define _mm256_extractf64x2_pd(A, imm) \ ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ (int)(imm), \ - (__v2df)_mm_undefined_pd(), \ + (__v2df)_mm_setzero_pd(), \ (__mmask8)-1)) #define _mm256_mask_extractf64x2_pd(W, U, A, imm) \ @@ -1093,7 +1093,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) #define _mm256_extracti64x2_epi64(A, imm) \ ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ (int)(imm), \ - (__v2di)_mm_undefined_si128(), \ + (__v2di)_mm_setzero_si128(), \ (__mmask8)-1)) #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \ diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 965741f0ff944..252fb111988b0 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -7609,7 +7609,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) #define _mm256_extractf32x4_ps(A, imm) \ ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ (int)(imm), \ - (__v4sf)_mm_undefined_ps(), \ + (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1)) #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \ @@ -7627,7 +7627,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) #define _mm256_extracti32x4_epi32(A, imm) \ ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ (int)(imm), \ - (__v4si)_mm_undefined_si128(), \ + (__v4si)_mm_setzero_si128(), \ (__mmask8)-1)) #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \ diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 5f08b6be81ab7..11ed8498b8ecd 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1070,19 +1070,25 @@ __m128d test_mm256_extractf128_pd(__m256d A) { // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3> return _mm256_extractf128_pd(A, 1); } +TEST_CONSTEXPR(match_m128d(_mm256_extractf128_pd(((__m256d){0.0, 1.0, 2.0, 3.0}), 1), + 2.0, 3.0)); __m128 test_mm256_extractf128_ps(__m256 A) { // CHECK-LABEL: test_mm256_extractf128_ps // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/162836 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
