[clang] 47c1aa4 - [X86] Add constexpr support for addsub intrinsics (#167512)

via cfe-commits Wed, 19 Nov 2025 12:31:26 -0800

Author: Ahmed Nour
Date: 2025-11-17T15:34:34Z
New Revision: 47c1aa4cef638c97b74f3afb7bed60e92bba1f90


URL: 
https://github.com/llvm/llvm-project/commit/47c1aa4cef638c97b74f3afb7bed60e92bba1f90
DIFF: 
https://github.com/llvm/llvm-project/commit/47c1aa4cef638c97b74f3afb7bed60e92bba1f90.diff

LOG: [X86] Add constexpr support for addsub intrinsics  (#167512)

Recent commits (7fe069121b57a, 53ddeb493529a) marked several x86
intrinsics as constexpr in headers without providing the necessary
constant evaluation support in the compiler backend. This caused
compilation failures when attempting to use these intrinsics in constant
expressions.

Resolves #166814 
Resolves #161203

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsX86.td
    clang/lib/AST/ByteCode/InterpBuiltin.cpp
    clang/lib/AST/ExprConstant.cpp
    clang/lib/Headers/avxintrin.h
    clang/lib/Headers/pmmintrin.h
    clang/test/CodeGen/X86/avx-builtins.c
    clang/test/CodeGen/X86/sse3-builtins.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index bbe0aa3657c06..a656fe341c8e0 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -92,8 +92,8 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] 
in {
     def cmpsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, 
double>, _Constant char)">;
   }
 
-
-  let Features = "sse3" in {
+  let Features = "sse3",
+      Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
     foreach Op = ["addsub"] in {
       def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, 
float>)">;
       def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, 
_Vector<2, double>)">;
@@ -121,8 +121,9 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] 
in {
 }
 
 // AVX
-let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" 
in {
-  foreach Op = ["addsub", "max", "min"] in {
+let Attributes = [Const, NoThrow, RequiredVectorWidth<256>],
+    Features = "avx" in {
+  foreach Op = ["max", "min"] in {
     def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, 
_Vector<4, double>)">;
     def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, 
float>)">;
   }
@@ -571,6 +572,15 @@ let Features = "avx",
   def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
 }
 
+let Features = "avx",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+  def addsubpd256
+      : X86Builtin<
+            "_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
+  def addsubps256
+      : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">;
+}
+
 let Features = "avx", Attributes = [NoThrow] in {
   def vzeroall : X86Builtin<"void()">;
   def vzeroupper : X86Builtin<"void()">;

diff  --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index a2f99c7c234fe..30426565407ba 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2715,6 +2715,35 @@ static bool interp_builtin_horizontal_fp_binop(
   return true;
 }
 
+static bool interp__builtin_ia32_addsub(InterpState &S, CodePtr OpPC,
+                                        const CallExpr *Call) {
+  // Addsub: alternates between subtraction and addition
+  // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i])
+  const Pointer &RHS = S.Stk.pop<Pointer>();
+  const Pointer &LHS = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+  FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts());
+  llvm::RoundingMode RM = getRoundingMode(FPO);
+  const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+  unsigned NumElems = VT->getNumElements();
+
+  using T = PrimConv<PT_Float>::T;
+  for (unsigned I = 0; I != NumElems; ++I) {
+    APFloat LElem = LHS.elem<T>(I).getAPFloat();
+    APFloat RElem = RHS.elem<T>(I).getAPFloat();
+    if (I % 2 == 0) {
+      // Even indices: subtract
+      LElem.subtract(RElem, RM);
+    } else {
+      // Odd indices: add
+      LElem.add(RElem, RM);
+    }
+    Dst.elem<T>(I) = static_cast<T>(LElem);
+  }
+  Dst.initializeAllElements();
+  return true;
+}
+
 static bool interp__builtin_elementwise_triop_fp(
     InterpState &S, CodePtr OpPC, const CallExpr *Call,
     llvm::function_ref<APFloat(const APFloat &, const APFloat &,
@@ -4196,6 +4225,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
           F.subtract(RHS, RM);
           return F;
         });
+  case clang::X86::BI__builtin_ia32_addsubpd:
+  case clang::X86::BI__builtin_ia32_addsubps:
+  case clang::X86::BI__builtin_ia32_addsubpd256:
+  case clang::X86::BI__builtin_ia32_addsubps256:
+    return interp__builtin_ia32_addsub(S, OpPC, Call);
 
   case clang::X86::BI__builtin_ia32_pmuldq128:
   case clang::X86::BI__builtin_ia32_pmuldq256:

diff  --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index d9b3ee20e919f..ed1f1b7508ffc 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13373,6 +13373,35 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
     }
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
+  case clang::X86::BI__builtin_ia32_addsubpd:
+  case clang::X86::BI__builtin_ia32_addsubps:
+  case clang::X86::BI__builtin_ia32_addsubpd256:
+  case clang::X86::BI__builtin_ia32_addsubps256: {
+    // Addsub: alternates between subtraction and addition
+    // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i])
+    APValue SourceLHS, SourceRHS;
+    if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+        !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+      return false;
+    unsigned NumElems = SourceLHS.getVectorLength();
+    SmallVector<APValue, 8> ResultElements;
+    ResultElements.reserve(NumElems);
+    llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E);
+
+    for (unsigned I = 0; I != NumElems; ++I) {
+      APFloat LHS = SourceLHS.getVectorElt(I).getFloat();
+      APFloat RHS = SourceRHS.getVectorElt(I).getFloat();
+      if (I % 2 == 0) {
+        // Even indices: subtract
+        LHS.subtract(RHS, RM);
+      } else {
+        // Odd indices: add
+        LHS.add(RHS, RM);
+      }
+      ResultElements.push_back(APValue(LHS));
+    }
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
   case Builtin::BI__builtin_elementwise_fshl:
   case Builtin::BI__builtin_elementwise_fshr: {
     APValue SourceHi, SourceLo, SourceShift;

diff  --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 4aef9245323fb..3e1618ed192c8 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -147,9 +147,8 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR 
_mm256_sub_ps(__m256 __a,
 ///    A 256-bit vector of [4 x double] containing the right source operand.
 /// \returns A 256-bit vector of [4 x double] containing the alternating sums
 ///    and 
diff erences between both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_addsub_pd(__m256d __a, __m256d __b)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_addsub_pd(__m256d __a, __m256d __b) {
   return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
 }
 
@@ -166,9 +165,8 @@ _mm256_addsub_pd(__m256d __a, __m256d __b)
 ///    A 256-bit vector of [8 x float] containing the right source operand.
 /// \returns A 256-bit vector of [8 x float] containing the alternating sums 
and
 ///    
diff erences between both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_addsub_ps(__m256 __a, __m256 __b)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_addsub_ps(__m256 __a, __m256 __b) {
   return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
 }
 

diff  --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h
index 6b152bde29fc1..a9a65440363c3 100644
--- a/clang/lib/Headers/pmmintrin.h
+++ b/clang/lib/Headers/pmmintrin.h
@@ -60,9 +60,8 @@ _mm_lddqu_si128(__m128i_u const *__p)
 ///    A 128-bit vector of [4 x float] containing the right source operand.
 /// \returns A 128-bit vector of [4 x float] containing the alternating sums 
and
 ///    
diff erences of both operands.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_addsub_ps(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_addsub_ps(__m128 __a, __m128 __b) {
   return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);
 }
 
@@ -166,7 +165,7 @@ _mm_moveldup_ps(__m128 __a)
 ///    A 128-bit vector of [2 x double] containing the right source operand.
 /// \returns A 128-bit vector of [2 x double] containing the alternating sums
 ///    and 
diff erences of both operands.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_addsub_pd(__m128d __a, __m128d __b) {
   return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
 }

diff  --git a/clang/test/CodeGen/X86/avx-builtins.c 
b/clang/test/CodeGen/X86/avx-builtins.c
index 737febbc7fef6..46bc28b85d8db 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -45,12 +45,14 @@ __m256d test_mm256_addsub_pd(__m256d A, __m256d B) {
   // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> 
%{{.*}}, <4 x double> %{{.*}})
   return _mm256_addsub_pd(A, B);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_addsub_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, 
(__m256d){+1.0, +1.0, +1.0, +1.0}), +0.0, +3.0, +2.0, +5.0));
 
 __m256 test_mm256_addsub_ps(__m256 A, __m256 B) {
   // CHECK-LABEL: test_mm256_addsub_ps
   // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> 
%{{.*}}, <8 x float> %{{.*}})
   return _mm256_addsub_ps(A, B);
 }
+TEST_CONSTEXPR(match_m256(_mm256_addsub_ps((__m256){+1.0f, +2.0f, +3.0f, 
+4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m256){+1.0f, +1.0f, +1.0f, +1.0f, +1.0f, 
+1.0f, +1.0f, +1.0f}), +0.0f, +3.0f, +2.0f, +5.0f, +4.0f, +7.0f, +6.0f, +9.0f));
 
 __m256d test_mm256_and_pd(__m256d A, __m256d B) {
   // CHECK-LABEL: test_mm256_and_pd

diff  --git a/clang/test/CodeGen/X86/sse3-builtins.c 
b/clang/test/CodeGen/X86/sse3-builtins.c
index a82dd4080670b..44389fbdc6f77 100644
--- a/clang/test/CodeGen/X86/sse3-builtins.c
+++ b/clang/test/CodeGen/X86/sse3-builtins.c
@@ -19,12 +19,14 @@ __m128d test_mm_addsub_pd(__m128d A, __m128d B) {
   // CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> 
%{{.*}}, <2 x double> %{{.*}})
   return _mm_addsub_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_addsub_pd((__m128d){+2.0, +2.0}, 
(__m128d){+1.0, +2.0}), +1.0, +4.0));
 
 __m128 test_mm_addsub_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_addsub_ps
   // CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> 
%{{.*}}, <4 x float> %{{.*}})
   return _mm_addsub_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_addsub_ps((__m128){+3.0f, +4.0f, +5.0f, +6.0f}, 
(__m128){+1.0f, +2.0f, +3.0f, +4.0f}), +2.0f, +6.0f, +2.0f, +10.0f));
 
 __m128d test_mm_hadd_pd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_hadd_pd


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] 47c1aa4 - [X86] Add constexpr support for addsub intrinsics (#167512)

Reply via email to