https://github.com/SeongjaeP created
https://github.com/llvm/llvm-project/pull/158853
[clang][WIP] Constant evaluation for AVX extract intrinsics
This patch adds initial constant evaluation support in ExprConstant.cpp
for a subset of AVX/AVX2/AVX-512 extract intrinsics.
Implemented cases include:
* _mm256_extracti128_si256
* _mm256_extractf128_pd / _mm256_extractf128_ps / _mm256_extractf128_si256
* _mm256_extracti32x4_epi32 / _mm512_extracti32x4_epi32 /
_mm512_extracti32x8_epi32
* _mm256_extracti64x2_epi64 / _mm512_extracti64x2_epi64 /
_mm512_extracti64x4_epi64
* _mm256_extractf32x4_ps / _mm512_extractf32x4_ps / _mm512_extractf32x8_ps
* _mm256_extractf64x2_pd / _mm512_extractf64x2_pd / _mm512_extractf64x4_pd
The evaluation follows the established pattern:
1. Evaluate the source vector and the immediate lane index.
2. Extract the selected lane into an intermediate vector.
3. Apply the mask/merge logic:
- plain : kmask = all-ones, merge = undef
- mask : merge = destination vector
- maskz : merge = zero-initialized vector
Note:
- Tests have not been written yet.
- This commit is for local development only, not intended for upstream review.
>From 85d7dd0fe86a4accebaa388640b36c5a68bf9a70 Mon Sep 17 00:00:00 2001
From: seongjaep <[email protected]>
Date: Fri, 12 Sep 2025 14:18:41 +0900
Subject: [PATCH 1/3] [WIP][Clang][ConstExpr] Add initial support for AVX
256->128 extract builtins
---
clang/lib/AST/ExprConstant.cpp | 31 +++++++++++++++++++
.../test/SemaCXX/constexpr-avx-intrinsics.cpp | 25 +++++++++++++++
2 files changed, 56 insertions(+)
create mode 100644 clang/test/SemaCXX/constexpr-avx-intrinsics.cpp
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 5145896930153..52c1299cfee1c 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11863,6 +11863,37 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr
*E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+
+ case X86::BI__builtin_ia32_vextracti128_si256:
+ case X86::BI__builtin_ia32_vextractf128_pd:
+ case X86::BI__builtin_ia32_vextractf128_ps:
+ case X86::BI__builtin_ia32_vextractf128_si256: {
+ APValue SourceHi, SourceLo, SourceAmt;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceHi) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceLo) ||
+ !EvaluateAsRValue(Info, E->getArg(2), SourceAmt))
+ return false;
+
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ unsigned SourceLen = SourceHi.getVectorLength();
+ SmallVector<APValue, 32> ResultElements;
+ ResultElements.reserve(SourceLen);
+
+ APInt Amt = SourceAmt.getInt();
+ for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
+ APInt Hi = SourceHi.getVectorElt(EltNum).getInt();
+ APInt Lo = SourceLo.getVectorElt(EltNum).getInt();
+ APInt R = llvm::APIntOps::fshl(Hi, Lo, Amt);
+ ResultElements.push_back(
+ APValue(APSInt(R, DestEltTy->isUnsignedIntegerOrEnumerationType())));
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
+
+
+
+
case X86::BI__builtin_ia32_vpshldd128:
case X86::BI__builtin_ia32_vpshldd256:
case X86::BI__builtin_ia32_vpshldd512:
diff --git a/clang/test/SemaCXX/constexpr-avx-intrinsics.cpp
b/clang/test/SemaCXX/constexpr-avx-intrinsics.cpp
new file mode 100644
index 0000000000000..30e1340601255
--- /dev/null
+++ b/clang/test/SemaCXX/constexpr-avx-intrinsics.cpp
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
+// expected-no-diagnostics
+
+#include <immintrin.h> // AVX/AVX512 헤더
+
+// // 테스트하려는 AVX/AVX512 내장 함수를 사용하는 constexpr 함수
+// constexpr int test_avx_subvector_extraction() {
+// __m256i a = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+
+// // 이슈의 핵심: 이 내장 함수 호출이 constexpr 문맥에서 가능해야 함
+// __m128i sub = _mm256_extracti128_si256(a, 0);
+
+// return _mm_cvtsi128_si32(sub); // 결과를 int로 변환하여 리턴
+// }
+
+// // 이 상수는 컴파일 시간에 평가되어야 함
+// constexpr int result = test_avx_subvector_extraction();
+
+// static_assert(result == 0, "Incorrect result");
+
+#include <immintrin.h>
+
+constexpr __m128 test(__m256 a) {
+ return _mm256_extractf128_ps(a, 1);
+}
\ No newline at end of file
>From dba3ed74fce9f7c9d0936a7be3f17ce8bf7a15a8 Mon Sep 17 00:00:00 2001
From: SeongjaeP <[email protected]>
Date: Fri, 12 Sep 2025 21:00:28 +0900
Subject: [PATCH 2/3] [clang] Support constexpr evaluation for AVX/AVX2 extract
intrinsics
Implements constexpr evaluation for:
- _mm256_extracti128_si256 (AVX2, VEXTRACTI128)
- _mm256_extractf128_ps
- _mm256_extractf128_pd
- _mm256_extractf128_si256
These now work correctly in constant expressions by extracting
the appropriate 128-bit lane from a 256-bit vector.
---
clang/lib/AST/ExprConstant.cpp | 43 +++++++++++++++-------------------
1 file changed, 19 insertions(+), 24 deletions(-)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 52c1299cfee1c..61236d023f3b5 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11864,35 +11864,30 @@ bool VectorExprEvaluator::VisitCallExpr(const
CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
- case X86::BI__builtin_ia32_vextracti128_si256:
- case X86::BI__builtin_ia32_vextractf128_pd:
- case X86::BI__builtin_ia32_vextractf128_ps:
+ case X86::BI__builtin_ia32_extract128i256:
+ case X86::BI__builtin_ia32_vextractf128_pd256:
+ case X86::BI__builtin_ia32_vextractf128_ps256:
case X86::BI__builtin_ia32_vextractf128_si256: {
- APValue SourceHi, SourceLo, SourceAmt;
- if (!EvaluateAsRValue(Info, E->getArg(0), SourceHi) ||
- !EvaluateAsRValue(Info, E->getArg(1), SourceLo) ||
- !EvaluateAsRValue(Info, E->getArg(2), SourceAmt))
+ APValue SourceVec, SourceImm;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceImm))
return false;
- QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
- unsigned SourceLen = SourceHi.getVectorLength();
- SmallVector<APValue, 32> ResultElements;
- ResultElements.reserve(SourceLen);
-
- APInt Amt = SourceAmt.getInt();
- for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
- APInt Hi = SourceHi.getVectorElt(EltNum).getInt();
- APInt Lo = SourceLo.getVectorElt(EltNum).getInt();
- APInt R = llvm::APIntOps::fshl(Hi, Lo, Amt);
- ResultElements.push_back(
- APValue(APSInt(R, DestEltTy->isUnsignedIntegerOrEnumerationType())));
- }
+ unsigned idx = SourceImm.getInt().getZExtValue() & 1;
+ const auto *RetVT = E->getType()->castAs<VectorType>();
+ unsigned RetLen = RetVT->getNumElements();
+ unsigned SrcLen = SourceVec.getVectorLength();
+ if (SrcLen != RetLen * 2)
+ return false;
+
+ SmallVector<APValue, 16> ResultElements;
+ ResultElements.reserve(RetLen);
- return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ for (unsigned i = 0; i < RetLen; i++)
+ ResultElements.push_back(SourceVec.getVectorElt(idx * RetLen + i));
+
+ return Success(APValue(ResultElements.data(), RetLen), E);
}
-
-
-
case X86::BI__builtin_ia32_vpshldd128:
case X86::BI__builtin_ia32_vpshldd256:
>From 33829bfb83fa7114b308b43ecaf11683fadfceb0 Mon Sep 17 00:00:00 2001
From: SeongjaeP <[email protected]>
Date: Tue, 16 Sep 2025 18:14:57 +0900
Subject: [PATCH 3/3] [clang] Implement constant evaluation for AVX extract
intrinsics (part)
---
clang/lib/AST/ExprConstant.cpp | 111 ++++++++++++++++++++++++++++++++-
1 file changed, 109 insertions(+), 2 deletions(-)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 61236d023f3b5..8c7ada71ab84b 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11864,7 +11864,114 @@ bool VectorExprEvaluator::VisitCallExpr(const
CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
- case X86::BI__builtin_ia32_extract128i256:
+ case X86::BI__builtin_ia32_extracti32x4_256_mask: //
_mm256_extracti32x4_epi32
+ case X86::BI__builtin_ia32_extracti32x4_mask: //
_mm512_extracti32x4_epi32
+ case X86::BI__builtin_ia32_extracti32x8_mask: //
_mm512_extracti32x8_epi32
+ case X86::BI__builtin_ia32_extracti64x2_256_mask: //
_mm256_extracti64x2_epi64
+ case X86::BI__builtin_ia32_extracti64x2_512_mask: //
_mm512_extracti64x2_epi64
+ case X86::BI__builtin_ia32_extracti64x4_mask: { //
_mm512_extracti64x4_epi64
+ APValue SourceVec, SourceImm, SourceMerge, SourceKmask;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceImm) ||
+ !EvaluateAsRValue(Info, E->getArg(2), SourceMerge) ||
+ !EvaluateAsRValue(Info, E->getArg(3), SourceKmask))
+ return false;
+
+ const auto *RetVT = E->getType()->castAs<VectorType>();
+ QualType EltTy = RetVT->getElementType();
+ unsigned RetLen = RetVT->getNumElements();
+
+ if (!SourceVec.isVector())
+ return false;
+ unsigned SrcLen = SourceVec.getVectorLength();
+ if (SrcLen % RetLen != 0)
+ return false;
+
+ unsigned NumLanes = SrcLen / RetLen;
+ unsigned idx = SourceImm.getInt().getZExtValue() & (NumLanes - 1);
+
+ // Step 2) Apply kmask (covers plain/mask/maskz):
+ // - plain : headers pass kmask=all-ones; merge is undef → always take
Extracted.
+ // - mask : merge=dst; take? Extracted[i] : dst[i]
+ // - maskz : merge=zero; take? Extracted[i] : 0
+ uint64_t KmaskBits = SourceKmask.getInt().getZExtValue();
+
+ auto makeZeroInt = [&]() -> APValue {
+ bool Uns = EltTy->isUnsignedIntegerOrEnumerationType();
+ unsigned BW = Info.Ctx.getIntWidth(EltTy);
+ return APValue(APSInt(APInt(BW, 0), Uns));
+ };
+
+ SmallVector<APValue, 32> ResultElements;
+ ResultElements.reserve(RetLen);
+ for (unsigned i = 0; i < RetLen; i++) {
+ bool Take = (KmaskBits >> i) & 1;
+ if (Take) {
+ ResultElements.push_back(SourceVec.getVectorElt(idx * RetLen + i));
+ } else {
+ // For plain (all-ones) this path is never taken.
+ // For mask : merge is the original dst element.
+ // For maskz : headers pass zero vector as merge.
+ const APValue &MergeElt =
+ SourceMerge.isVector() ? SourceMerge.getVectorElt(i) :
makeZeroInt();
+ ResultElements.push_back(MergeElt);
+ }
+ }
+ return Success(APValue(ResultElements.data(), RetLen), E);
+ }
+
+ case X86::BI__builtin_ia32_extractf32x4_256_mask: // _mm256_extractf32x4_ps
_mm256_mask_extractf32x4_ps _mm256_maskz_extractf32x4_ps
+ case X86::BI__builtin_ia32_extractf32x4_mask: // _mm512_extractf32x4_ps
_mm512_mask_extractf32x4_ps _mm512_maskz_extractf32x4_ps
+ case X86::BI__builtin_ia32_extractf32x8_mask: // _mm512_extractf32x8_ps
_mm512_mask_extractf32x8_ps _mm512_maskz_extractf32x8_ps
+
+ case X86::BI__builtin_ia32_extractf64x2_256_mask: // _mm256_extractf64x2_pd
_mm256_mask_extractf64x2_pd _mm256_maskz_extractf64x2_pd
+ case X86::BI__builtin_ia32_extractf64x2_512_mask: // _mm512_extractf64x2_pd
_mm512_mask_extractf64x2_pd _mm512_maskz_extractf64x2_pd
+ case X86::BI__builtin_ia32_extractf64x4_mask: { // _mm512_extractf64x4_pd
_mm512_mask_extractf64x4_pd _mm512_maskz_extractf64x4_pd
+ APValue SourceVec, SourceImm, SourceMerge, SourceKmask;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceImm) ||
+ !EvaluateAsRValue(Info, E->getArg(2), SourceMerge) ||
+ !EvaluateAsRValue(Info, E->getArg(3), SourceKmask))
+ return false;
+
+ const auto *RetVT = E->getType()->castAs<VectorType>();
+ QualType EltTy = RetVT->getElementType();
+ unsigned RetLen = RetVT->getNumElements();
+
+ if (!SourceVec.isVector())
+ return false;
+ unsigned SrcLen = SourceVec.getVectorLength();
+ if (SrcLen % RetLen != 0)
+ return false;
+
+ unsigned NumLanes = SrcLen / RetLen;
+ unsigned idx = SourceImm.getInt().getZExtValue() & (NumLanes - 1);
+
+ uint64_t KmaskBits = SourceKmask.getInt().getZExtValue();
+
+ auto makeZeroFP = [&]() -> APValue {
+ const llvm::fltSemantics &Sem =
+ Info.Ctx.getFloatTypeSemantics(EltTy);
+ return APValue(llvm::APFloat::getZero(Sem));
+ };
+
+ SmallVector<APValue, 32> ResultElements;
+ ResultElements.reserve(RetLen);
+ for (unsigned i = 0; i < RetLen; i++) {
+ bool Take = (KmaskBits >> i) & 1;
+ if (Take) {
+ ResultElements.push_back(SourceVec.getVectorElt(idx * RetLen + i));
+ } else {
+ const APValue &MergeElt =
+ SourceMerge.isVector() ? SourceMerge.getVectorElt(i) :
makeZeroInt();
+ ResultElements.push_back(MergeElt);
+ }
+ }
+ return Success(APValue(ResultElements.data(), RetLen), E);
+ }
+
+ // vector extract
+ case X86::BI__builtin_ia32_extract128i256: // avx2
case X86::BI__builtin_ia32_vextractf128_pd256:
case X86::BI__builtin_ia32_vextractf128_ps256:
case X86::BI__builtin_ia32_vextractf128_si256: {
@@ -11880,7 +11987,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr
*E) {
if (SrcLen != RetLen * 2)
return false;
- SmallVector<APValue, 16> ResultElements;
+ SmallVector<APValue, 32> ResultElements;
ResultElements.reserve(RetLen);
for (unsigned i = 0; i < RetLen; i++)
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits