Author: Ahmed Nour Date: 2025-11-17T15:34:34Z New Revision: 47c1aa4cef638c97b74f3afb7bed60e92bba1f90
URL: https://github.com/llvm/llvm-project/commit/47c1aa4cef638c97b74f3afb7bed60e92bba1f90 DIFF: https://github.com/llvm/llvm-project/commit/47c1aa4cef638c97b74f3afb7bed60e92bba1f90.diff LOG: [X86] Add constexpr support for addsub intrinsics (#167512) Recent commits (7fe069121b57a, 53ddeb493529a) marked several x86 intrinsics as constexpr in headers without providing the necessary constant evaluation support in the compiler backend. This caused compilation failures when attempting to use these intrinsics in constant expressions. Resolves #166814 Resolves #161203 Added: Modified: clang/include/clang/Basic/BuiltinsX86.td clang/lib/AST/ByteCode/InterpBuiltin.cpp clang/lib/AST/ExprConstant.cpp clang/lib/Headers/avxintrin.h clang/lib/Headers/pmmintrin.h clang/test/CodeGen/X86/avx-builtins.c clang/test/CodeGen/X86/sse3-builtins.c Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index bbe0aa3657c06..a656fe341c8e0 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -92,8 +92,8 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { def cmpsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">; } - - let Features = "sse3" in { + let Features = "sse3", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { foreach Op = ["addsub"] in { def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; @@ -121,8 +121,9 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } // AVX -let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in { - foreach Op = ["addsub", "max", "min"] in { +let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], + Features = "avx" in { + foreach Op = ["max", "min"] in { def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; } @@ -571,6 +572,15 @@ let Features = "avx", def movmskps256 : X86Builtin<"int(_Vector<8, float>)">; } +let Features = "avx", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def addsubpd256 + : X86Builtin< + "_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; + def addsubps256 + : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; +} + let Features = "avx", Attributes = [NoThrow] in { def vzeroall : X86Builtin<"void()">; def vzeroupper : X86Builtin<"void()">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index a2f99c7c234fe..30426565407ba 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2715,6 +2715,35 @@ static bool interp_builtin_horizontal_fp_binop( return true; } +static bool interp__builtin_ia32_addsub(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + // Addsub: alternates between subtraction and addition + // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i]) + const Pointer &RHS = S.Stk.pop<Pointer>(); + const Pointer &LHS = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts()); + llvm::RoundingMode RM = getRoundingMode(FPO); + const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); + unsigned NumElems = VT->getNumElements(); + + using T = PrimConv<PT_Float>::T; + for (unsigned I = 0; I != NumElems; ++I) { + APFloat LElem = LHS.elem<T>(I).getAPFloat(); + APFloat RElem = RHS.elem<T>(I).getAPFloat(); + if (I % 2 == 0) { + // Even indices: subtract + LElem.subtract(RElem, RM); + } else { + // Odd indices: add + LElem.add(RElem, RM); + } + Dst.elem<T>(I) = static_cast<T>(LElem); + } + Dst.initializeAllElements(); + return true; +} + static bool interp__builtin_elementwise_triop_fp( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref<APFloat(const APFloat &, const APFloat &, @@ -4196,6 +4225,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, F.subtract(RHS, RM); return F; }); + case clang::X86::BI__builtin_ia32_addsubpd: + case clang::X86::BI__builtin_ia32_addsubps: + case clang::X86::BI__builtin_ia32_addsubpd256: + case clang::X86::BI__builtin_ia32_addsubps256: + return interp__builtin_ia32_addsub(S, OpPC, Call); case clang::X86::BI__builtin_ia32_pmuldq128: case clang::X86::BI__builtin_ia32_pmuldq256: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index d9b3ee20e919f..ed1f1b7508ffc 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13373,6 +13373,35 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { } return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case clang::X86::BI__builtin_ia32_addsubpd: + case clang::X86::BI__builtin_ia32_addsubps: + case clang::X86::BI__builtin_ia32_addsubpd256: + case clang::X86::BI__builtin_ia32_addsubps256: { + // Addsub: alternates between subtraction and addition + // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i]) + APValue SourceLHS, SourceRHS; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || + !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) + return false; + unsigned NumElems = SourceLHS.getVectorLength(); + SmallVector<APValue, 8> ResultElements; + ResultElements.reserve(NumElems); + llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E); + + for (unsigned I = 0; I != NumElems; ++I) { + APFloat LHS = SourceLHS.getVectorElt(I).getFloat(); + APFloat RHS = SourceRHS.getVectorElt(I).getFloat(); + if (I % 2 == 0) { + // Even indices: subtract + LHS.subtract(RHS, RM); + } else { + // Odd indices: add + LHS.add(RHS, RM); + } + ResultElements.push_back(APValue(LHS)); + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } case Builtin::BI__builtin_elementwise_fshl: case Builtin::BI__builtin_elementwise_fshr: { APValue SourceHi, SourceLo, SourceShift; diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 4aef9245323fb..3e1618ed192c8 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -147,9 +147,8 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a, /// A 256-bit vector of [4 x double] containing the right source operand. /// \returns A 256-bit vector of [4 x double] containing the alternating sums /// and diff erences between both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_addsub_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_addsub_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b); } @@ -166,9 +165,8 @@ _mm256_addsub_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing the right source operand. /// \returns A 256-bit vector of [8 x float] containing the alternating sums and /// diff erences between both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_addsub_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_addsub_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b); } diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h index 6b152bde29fc1..a9a65440363c3 100644 --- a/clang/lib/Headers/pmmintrin.h +++ b/clang/lib/Headers/pmmintrin.h @@ -60,9 +60,8 @@ _mm_lddqu_si128(__m128i_u const *__p) /// A 128-bit vector of [4 x float] containing the right source operand. /// \returns A 128-bit vector of [4 x float] containing the alternating sums and /// diff erences of both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_addsub_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_addsub_ps(__m128 __a, __m128 __b) { return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b); } @@ -166,7 +165,7 @@ _mm_moveldup_ps(__m128 __a) /// A 128-bit vector of [2 x double] containing the right source operand. /// \returns A 128-bit vector of [2 x double] containing the alternating sums /// and diff erences of both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_addsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b); } diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 737febbc7fef6..46bc28b85d8db 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -45,12 +45,14 @@ __m256d test_mm256_addsub_pd(__m256d A, __m256d B) { // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_addsub_pd(A, B); } +TEST_CONSTEXPR(match_m256d(_mm256_addsub_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+1.0, +1.0, +1.0, +1.0}), +0.0, +3.0, +2.0, +5.0)); __m256 test_mm256_addsub_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_addsub_ps // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_addsub_ps(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_addsub_ps((__m256){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m256){+1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f}), +0.0f, +3.0f, +2.0f, +5.0f, +4.0f, +7.0f, +6.0f, +9.0f)); __m256d test_mm256_and_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_and_pd diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c index a82dd4080670b..44389fbdc6f77 100644 --- a/clang/test/CodeGen/X86/sse3-builtins.c +++ b/clang/test/CodeGen/X86/sse3-builtins.c @@ -19,12 +19,14 @@ __m128d test_mm_addsub_pd(__m128d A, __m128d B) { // CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_addsub_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_addsub_pd((__m128d){+2.0, +2.0}, (__m128d){+1.0, +2.0}), +1.0, +4.0)); __m128 test_mm_addsub_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_addsub_ps // CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_addsub_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_addsub_ps((__m128){+3.0f, +4.0f, +5.0f, +6.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), +2.0f, +6.0f, +2.0f, +10.0f)); __m128d test_mm_hadd_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_hadd_pd _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
