Author: Simon Pilgrim Date: 2025-08-06T11:14:22+01:00 New Revision: a5d85a6ab5daf67b67da654c90adc494d37833c8
URL: https://github.com/llvm/llvm-project/commit/a5d85a6ab5daf67b67da654c90adc494d37833c8 DIFF: https://github.com/llvm/llvm-project/commit/a5d85a6ab5daf67b67da654c90adc494d37833c8.diff LOG: [Headers][X86] Allow AVX _mm256_set* intrinsics to be used in constexpr (#152173) Added: Modified: clang/lib/Headers/avxintrin.h clang/test/CodeGen/X86/avx-builtins.c clang/test/CodeGen/X86/builtin_test_helpers.h Removed: ################################################################################ diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 8e497a9823499..b9ca013c25c7a 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -3777,7 +3777,7 @@ _mm256_set_ps(float __a, float __b, float __c, float __d, /// \param __i7 /// A 32-bit integral value used to initialize bits [31:0] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7) { @@ -3825,7 +3825,7 @@ _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3, /// \param __w00 /// A 16-bit integral value used to initialize bits [15:0] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, @@ -3908,7 +3908,7 @@ _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12, /// \param __b00 /// An 8-bit integral value used to initialize bits [7:0] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, @@ -3943,7 +3943,7 @@ _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28, /// \param __d /// A 64-bit integral value used to initialize bits [63:0] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d) { return __extension__ (__m256i)(__v4di){ __d, __c, __b, __a }; @@ -4044,7 +4044,7 @@ _mm256_setr_ps(float __a, float __b, float __c, float __d, /// \param __i7 /// A 32-bit integral value used to initialize bits [255:224] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7) { @@ -4092,7 +4092,7 @@ _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3, /// \param __w00 /// A 16-bit integral value used to initialize bits [255:240] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, @@ -4177,7 +4177,7 @@ _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12, /// \param __b00 /// An 8-bit integral value used to initialize bits [255:248] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, @@ -4210,7 +4210,7 @@ _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28, /// \param __d /// A 64-bit integral value used to initialize bits [255:192] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d) { return _mm256_set_epi64x(__d, __c, __b, __a); @@ -4267,7 +4267,7 @@ _mm256_set1_ps(float __w) /// A 32-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [8 x i32]. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi32(int __i) { return _mm256_set_epi32(__i, __i, __i, __i, __i, __i, __i, __i); @@ -4285,7 +4285,7 @@ _mm256_set1_epi32(int __i) /// A 16-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [16 x i16]. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi16(short __w) { return _mm256_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w, @@ -4303,7 +4303,7 @@ _mm256_set1_epi16(short __w) /// An 8-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [32 x i8]. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi8(char __b) { return _mm256_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, @@ -4324,7 +4324,7 @@ _mm256_set1_epi8(char __b) /// A 64-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [4 x i64]. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi64x(long long __q) { return _mm256_set_epi64x(__q, __q, __q, __q); diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index ed39862377197..a6e70aae420ea 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1443,6 +1443,7 @@ __m256i test_mm256_set_epi8(char A0, char A1, char A2, char A3, char A4, char A5 // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 31 return _mm256_set_epi8(A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, A23, A24, A25, A26, A27, A28, A29, A30, A31); } +TEST_CONSTEXPR(match_v32qi(_mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31), 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); __m256i test_mm256_set_epi16(short A0, short A1, short A2, short A3, short A4, short A5, short A6, short A7, short A8, short A9, short A10, short A11, short A12, short A13, short A14, short A15) { @@ -1465,6 +1466,7 @@ __m256i test_mm256_set_epi16(short A0, short A1, short A2, short A3, short A4, s // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 15 return _mm256_set_epi16(A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15); } +TEST_CONSTEXPR(match_v16hi(_mm256_set_epi16(0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15), -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0)); __m256i test_mm256_set_epi32(int A0, int A1, int A2, int A3, int A4, int A5, int A6, int A7) { // CHECK-LABEL: test_mm256_set_epi32 @@ -1478,6 +1480,7 @@ __m256i test_mm256_set_epi32(int A0, int A1, int A2, int A3, int A4, int A5, int // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 7 return _mm256_set_epi32(A0, A1, A2, A3, A4, A5, A6, A7); } +TEST_CONSTEXPR(match_v8si(_mm256_set_epi32(1, -3, 5, -7, 9, -11, 13, -15), -15, 13, -11, 9, -7, 5, -3, 1)); __m256i test_mm256_set_epi64x(long long A0, long long A1, long long A2, long long A3) { // CHECK-LABEL: test_mm256_set_epi64x @@ -1487,6 +1490,7 @@ __m256i test_mm256_set_epi64x(long long A0, long long A1, long long A2, long lon // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 3 return _mm256_set_epi64x(A0, A1, A2, A3); } +TEST_CONSTEXPR(match_v4di(_mm256_set_epi64x(100, -1000, 2000, -200), -200, 2000, -1000, 100)); __m256 test_mm256_set_m128(__m128 A, __m128 B) { // CHECK-LABEL: test_mm256_set_m128 @@ -1566,6 +1570,7 @@ __m256i test_mm256_set1_epi8(char A) { // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 31 return _mm256_set1_epi8(A); } +TEST_CONSTEXPR(match_v32qi(_mm256_set1_epi8(99), 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); __m256i test_mm256_set1_epi16(short A) { // CHECK-LABEL: test_mm256_set1_epi16 @@ -1587,6 +1592,7 @@ __m256i test_mm256_set1_epi16(short A) { // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 15 return _mm256_set1_epi16(A); } +TEST_CONSTEXPR(match_v16hi(_mm256_set1_epi16(-128), -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128)); __m256i test_mm256_set1_epi32(int A) { // CHECK-LABEL: test_mm256_set1_epi32 @@ -1600,6 +1606,7 @@ __m256i test_mm256_set1_epi32(int A) { // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 7 return _mm256_set1_epi32(A); } +TEST_CONSTEXPR(match_v8si(_mm256_set1_epi32(55), 55, 55, 55, 55, 55, 55, 55, 55)); __m256i test_mm256_set1_epi64x(long long A) { // CHECK-LABEL: test_mm256_set1_epi64x @@ -1609,6 +1616,7 @@ __m256i test_mm256_set1_epi64x(long long A) { // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 3 return _mm256_set1_epi64x(A); } +TEST_CONSTEXPR(match_v4di(_mm256_set1_epi64x(-65535), -65535, -65535, -65535, -65535)); __m256d test_mm256_set1_pd(double A) { // CHECK-LABEL: test_mm256_set1_pd @@ -1673,6 +1681,7 @@ __m256i test_mm256_setr_epi8(char A0, char A1, char A2, char A3, char A4, char A // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 31 return _mm256_setr_epi8(A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, A23, A24, A25, A26, A27, A28, A29, A30, A31); } +TEST_CONSTEXPR(match_v32qi(_mm256_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)); __m256i test_mm256_setr_epi16(short A0, short A1, short A2, short A3, short A4, short A5, short A6, short A7, short A8, short A9, short A10, short A11, short A12, short A13, short A14, short A15) { @@ -1695,6 +1704,7 @@ __m256i test_mm256_setr_epi16(short A0, short A1, short A2, short A3, short A4, // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 15 return _mm256_setr_epi16(A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15); } +TEST_CONSTEXPR(match_v16hi(_mm256_setr_epi16(0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15), 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15)); __m256i test_mm256_setr_epi32(int A0, int A1, int A2, int A3, int A4, int A5, int A6, int A7) { // CHECK-LABEL: test_mm256_setr_epi32 @@ -1708,6 +1718,7 @@ __m256i test_mm256_setr_epi32(int A0, int A1, int A2, int A3, int A4, int A5, in // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 7 return _mm256_setr_epi32(A0, A1, A2, A3, A4, A5, A6, A7); } +TEST_CONSTEXPR(match_v8si(_mm256_setr_epi32(1, -3, 5, -7, 9, -11, 13, -15), 1, -3, 5, -7, 9, -11, 13, -15)); __m256i test_mm256_setr_epi64x(long long A0, long long A1, long long A2, long long A3) { // CHECK-LABEL: test_mm256_setr_epi64x @@ -1717,6 +1728,7 @@ __m256i test_mm256_setr_epi64x(long long A0, long long A1, long long A2, long lo // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 3 return _mm256_setr_epi64x(A0, A1, A2, A3); } +TEST_CONSTEXPR(match_v4di(_mm256_setr_epi64x(100, -1000, 2000, -200), 100, -1000, 2000, -200)); __m256 test_mm256_setr_m128(__m128 A, __m128 B) { // CHECK-LABEL: test_mm256_setr_m128 diff --git a/clang/test/CodeGen/X86/builtin_test_helpers.h b/clang/test/CodeGen/X86/builtin_test_helpers.h index 22a87ce9623be..f719694d41e25 100644 --- a/clang/test/CodeGen/X86/builtin_test_helpers.h +++ b/clang/test/CodeGen/X86/builtin_test_helpers.h @@ -83,6 +83,22 @@ constexpr bool match_v8si(__m256i _v, int a, int b, int c, int d, int e, int f, return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h; } +constexpr bool match_v16hi(__m256i _v, short a, short b, short c, short d, short e, short f, short g, short h, short i, short j, short k, short l, short m, short n, short o, short p) { + __v16hi v = (__v16hi)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p; +} + +constexpr bool match_v32qi(__m256i _v, char __b00, char __b01, char __b02, char __b03, char __b04, char __b05, char __b06, char __b07, + char __b08, char __b09, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15, + char __b16, char __b17, char __b18, char __b19, char __b20, char __b21, char __b22, char __b23, + char __b24, char __b25, char __b26, char __b27, char __b28, char __b29, char __b30, char __b31) { + __v32qi v = (__v32qi)_v; + return v[ 0] == __b00 && v[ 1] == __b01 && v[ 2] == __b02 && v[ 3] == __b03 && v[ 4] == __b04 && v[ 5] == __b05 && v[ 6] == __b06 && v[ 7] == __b07 && + v[ 8] == __b08 && v[ 9] == __b09 && v[10] == __b10 && v[11] == __b11 && v[12] == __b12 && v[13] == __b13 && v[14] == __b14 && v[15] == __b15 && + v[16] == __b16 && v[17] == __b17 && v[18] == __b18 && v[19] == __b19 && v[20] == __b20 && v[21] == __b21 && v[22] == __b22 && v[23] == __b23 && + v[24] == __b24 && v[25] == __b25 && v[26] == __b26 && v[27] == __b27 && v[28] == __b28 && v[29] == __b29 && v[30] == __b30 && v[31] == __b31; +} + constexpr bool match_m512(__m512 v, float a, float b, float c, float d, float e, float f, float g, float h, float i, float j, float k, float l, float m, float n, float o, float p) { return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p; } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits