https://github.com/ningxinr updated https://github.com/llvm/llvm-project/pull/153088
>From fcb1db6b8b0d13bfc84321a0e5bcd31db646524e Mon Sep 17 00:00:00 2001 From: ningxinr <ningx...@live.cn> Date: Mon, 11 Aug 2025 13:36:48 -0700 Subject: [PATCH 1/4] [Headers][X86] Allow MMX integer saturated arithmetic intrinsics to be used in constexpr --- clang/lib/Headers/mmintrin.h | 28 ++++++++++++--------------- clang/test/CodeGen/X86/mmx-builtins.c | 4 ++++ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h index 3961b790cea74..afb93ccba781c 100644 --- a/clang/lib/Headers/mmintrin.h +++ b/clang/lib/Headers/mmintrin.h @@ -448,10 +448,9 @@ _mm_add_pi32(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums /// of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_adds_pi8(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_adds_pi8(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2); } /// Adds, with saturation, each 16-bit signed integer element of the first @@ -472,10 +471,9 @@ _mm_adds_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums /// of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_adds_pi16(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_adds_pi16(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2); } /// Adds, with saturation, each 8-bit unsigned integer element of the first @@ -605,10 +603,9 @@ _mm_sub_pi32(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8] containing the subtrahends. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated /// differences of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_subs_pi8(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_subs_pi8(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2); } /// Subtracts, with saturation, each 16-bit signed integer element of the @@ -629,10 +626,9 @@ _mm_subs_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16] containing the subtrahends. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated /// differences of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_subs_pi16(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_subs_pi16(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2); } /// Subtracts each 8-bit unsigned integer element of the second 64-bit diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index d47368c207f4b..29d31f85f2bc5 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -62,12 +62,14 @@ __m64 test_mm_adds_pi8(__m64 a, __m64 b) { // CHECK: call <8 x i8> @llvm.sadd.sat.v8i8( return _mm_adds_pi8(a, b); } +TEST_CONSTEXPR(match_v8qi(_mm_adds_pi8(_mm_setr_pi8(+100, +50, -100, +20, +80, -50, +120, -20), _mm_setr_pi8(+50, +80, -50, +110, +60, -30, +20, -10)), +127, +127, -128, +127, +127, -80, +127, -30)); __m64 test_mm_adds_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_adds_pi16 // CHECK: call <4 x i16> @llvm.sadd.sat.v4i16( return _mm_adds_pi16(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_adds_pi16((__m64)(__v4hi){+32000, -32000, +32000, -32000}, (__m64)(__v4hi){+800, -800, -800, +800}), +32767, -32768, +31200, -31200)); __m64 test_mm_adds_pu8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_adds_pu8 @@ -718,12 +720,14 @@ __m64 test_mm_subs_pi8(__m64 a, __m64 b) { // CHECK: call <8 x i8> @llvm.ssub.sat.v8i8( return _mm_subs_pi8(a, b); } +TEST_CONSTEXPR(match_v8qi(_mm_subs_pi8(_mm_setr_pi8(+100, +50, -100, +20, +80, -50, +120, -20), _mm_setr_pi8(-50, -80, +50, -110, -60, +30, -20, +10)), +127, +127, -128, +127, +127, -80, +127, -30)); __m64 test_mm_subs_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_subs_pi16 // CHECK: call <4 x i16> @llvm.ssub.sat.v4i16( return _mm_subs_pi16(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_subs_pi16((__m64)(__v4hi){+32000, -32000, +32000, -32000}, (__m64)(__v4hi){-800, +800, +800, -800}), +32767, -32768, +31200, -31200)); __m64 test_mm_subs_pu8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_subs_pu8 >From 13322c6b50cbd63b2c9f8c948e4e45545634b064 Mon Sep 17 00:00:00 2001 From: ningxinr <ningx...@live.cn> Date: Mon, 11 Aug 2025 13:38:46 -0700 Subject: [PATCH 2/4] [Headers][X86] Allow SSE2 integer saturated arithmetic intrinsics to be used in constexpr --- clang/lib/Headers/emmintrin.h | 16 ++++++++-------- clang/test/CodeGen/X86/sse2-builtins.c | 4 ++++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 60d2000dfb809..8936f3110050f 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2170,8 +2170,8 @@ _mm_add_epi64(__m128i __a, __m128i __b) { /// A 128-bit signed [16 x i8] vector. /// \returns A 128-bit signed [16 x i8] vector containing the saturated sums of /// both parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_adds_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_add_sat((__v16qs)__a, (__v16qs)__b); } @@ -2192,8 +2192,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the saturated sums of /// both parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_adds_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_add_sat((__v8hi)__a, (__v8hi)__b); } @@ -2597,8 +2597,8 @@ _mm_sub_epi64(__m128i __a, __m128i __b) { /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_subs_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_sub_sat((__v16qs)__a, (__v16qs)__b); } @@ -2619,8 +2619,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_subs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_sub_sat((__v8hi)__a, (__v8hi)__b); } diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 23013dd75d641..04b357b88817c 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -63,12 +63,14 @@ __m128i test_mm_adds_epi8(__m128i A, __m128i B) { // CHECK: call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_adds_epi8(A, B); } +TEST_CONSTEXPR(match_v16qi(_mm_adds_epi8(_mm_setr_epi8(+100, +50, -100, +20, +80, -50, +120, -20, -100, -50, +100, -20, -80, +50, -120, +20), _mm_setr_epi8(+50, +80, -50, +110, +60, -30, +20, -10, +50, +80, -50, +110, +60, -30, +20, -10)), +127, +127, -128, +127, +127, -80, +127, -30, -50, +30, +50, +90, -20, +20, -100, +10)); __m128i test_mm_adds_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_adds_epi16 // CHECK: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_adds_epi16(A, B); } +TEST_CONSTEXPR(match_v8hi(_mm_adds_epi16(_mm_setr_epi16(+32000, -32000, +32000, -32000, +80, -50, +120, -20), _mm_setr_epi16(+800, -800, -800, +800, +60, -30, +20, -10)), +32767, -32768, +31200, -31200, +140, -80, +140, -30)); __m128i test_mm_adds_epu8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_adds_epu8 @@ -1691,12 +1693,14 @@ __m128i test_mm_subs_epi8(__m128i A, __m128i B) { // CHECK: call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_subs_epi8(A, B); } +TEST_CONSTEXPR(match_v16qi(_mm_subs_epi8(_mm_setr_epi8(+100, +50, -100, +20, +80, -50, +120, -20, -100, -50, +100, -20, -80, +50, -120, +20), _mm_setr_epi8(-50, -80, +50, -110, -60, +30, -20, +10, -50, -80, +50, -110, -60, +30, -20, +10)), +127, +127, -128, +127, +127, -80, +127, -30, -50, +30, +50, +90, -20, +20, -100, +10)); __m128i test_mm_subs_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_subs_epi16 // CHECK: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_subs_epi16(A, B); } +TEST_CONSTEXPR(match_v8hi(_mm_subs_epi16(_mm_setr_epi16(+32000, -32000, +32000, -32000, +80, -50, +120, -20), _mm_setr_epi16(-800, +800, +800, -800, -60, +30, -20, +10)), +32767, -32768, +31200, -31200, +140, -80, +140, -30)); __m128i test_mm_subs_epu8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_subs_epu8 >From 36cc2b3dbf5d2db1693299e1446e5a1ab1f637e3 Mon Sep 17 00:00:00 2001 From: ningxinr <ningx...@live.cn> Date: Mon, 11 Aug 2025 13:39:38 -0700 Subject: [PATCH 3/4] [Headers][X86] Allow AVX2 integer saturated arithmetic intrinsics to be used in constexpr --- clang/lib/Headers/avx2intrin.h | 20 ++++++++------------ clang/test/CodeGen/X86/avx2-builtins.c | 4 ++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index f00a8a516ecfe..b2e0b701c95ec 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -367,9 +367,8 @@ _mm256_add_epi64(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector containing one of the source operands. /// \returns A 256-bit integer vector containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_adds_epi8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_adds_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_add_sat((__v32qs)__a, (__v32qs)__b); } @@ -385,9 +384,8 @@ _mm256_adds_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_adds_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_adds_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_add_sat((__v16hi)__a, (__v16hi)__b); } @@ -2607,9 +2605,8 @@ _mm256_sub_epi64(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector containing the subtrahends. /// \returns A 256-bit integer vector containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_subs_epi8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_subs_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_sub_sat((__v32qs)__a, (__v32qs)__b); } @@ -2633,9 +2630,8 @@ _mm256_subs_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing the subtrahends. /// \returns A 256-bit vector of [16 x i16] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_subs_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_subs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_sub_sat((__v16hi)__a, (__v16hi)__b); } diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 8790485f00a8c..ce279b65a399d 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -60,12 +60,14 @@ __m256i test_mm256_adds_epi8(__m256i a, __m256i b) { // CHECK: call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_adds_epi8(a, b); } +TEST_CONSTEXPR(match_v32qi(_mm256_adds_epi8(_mm256_setr_epi8(0, +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +100, +50, -100, +20, +80, -50, +120, -20, -100, -50, +100, -20, -80, +50, -120, +20), _mm256_setr_epi8(0, +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +50, +80, -50, +110, +60, -30, +20, -10, +50, +80, -50, +110, +60, -30, +20, -10)), 0, +2, +4, +6, +8, +10, +12, +14, +16, +18, +20, +22, +24, +26, +28, +30, +127, +127, -128, +127, +127, -80, +127, -30, -50, +30, +50, +90, -20, +20, -100, +10)); __m256i test_mm256_adds_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_adds_epi16 // CHECK: call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_adds_epi16(a, b); } +TEST_CONSTEXPR(match_v16hi(_mm256_adds_epi16(_mm256_setr_epi16(0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, +32000, -32000, +32000, -32000), _mm256_setr_epi16(0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, +800, -800, -800, +800)),0, -2, -4, -6, -8, -10, -12, -14, -16, -18, -20, -22, +32767, -32768, +31200, -31200)); __m256i test_mm256_adds_epu8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_adds_epu8 @@ -1293,12 +1295,14 @@ __m256i test_mm256_subs_epi8(__m256i a, __m256i b) { // CHECK: call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_subs_epi8(a, b); } +TEST_CONSTEXPR(match_v32qi(_mm256_subs_epi8(_mm256_setr_epi8(0, +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +100, +50, -100, +20, +80, -50, +120, -20, -100, -50, +100, -20, -80, +50, -120, +20), _mm256_setr_epi8(0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -50, -80, +50, -110, -60, +30, -20, +10, -50, -80, +50, -110, -60, +30, -20, +10)), 0, +2, +4, +6, +8, +10, +12, +14, +16, +18, +20, +22, +24, +26, +28, +30, +127, +127, -128, +127, +127, -80, +127, -30, -50, +30, +50, +90, -20, +20, -100, +10)); __m256i test_mm256_subs_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_subs_epi16 // CHECK: call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_subs_epi16(a, b); } +TEST_CONSTEXPR(match_v16hi(_mm256_subs_epi16(_mm256_setr_epi16(0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, +32000, -32000, +32000, -32000), _mm256_setr_epi16(0, +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, -800, +800, +800, -800)),0, -2, -4, -6, -8, -10, -12, -14, -16, -18, -20, -22, +32767, -32768, +31200, -31200)); __m256i test_mm256_subs_epu8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_subs_epu8 >From c41317b2f64ae73348327eff10ed361bc1887e5e Mon Sep 17 00:00:00 2001 From: ningxinr <ningx...@live.cn> Date: Mon, 11 Aug 2025 13:40:05 -0700 Subject: [PATCH 4/4] [Headers][X86] Allow AVX512BW integer saturated arithmetic intrinsics to be used in constexpr --- clang/lib/Headers/avx512bwintrin.h | 20 ++++++++------------ clang/test/CodeGen/X86/avx512bw-builtins.c | 7 +++++++ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index d7377e5d5196c..57a261f30eadb 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -613,9 +613,8 @@ _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_adds_epi8 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_adds_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_add_sat((__v64qs)__A, (__v64qs)__B); } @@ -635,9 +634,8 @@ _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B) (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_adds_epi16 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_adds_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_add_sat((__v32hi)__A, (__v32hi)__B); } @@ -946,9 +944,8 @@ _mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_subs_epi8 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_subs_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_sub_sat((__v64qs)__A, (__v64qs)__B); } @@ -968,9 +965,8 @@ _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B) (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_subs_epi16 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_subs_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_sub_sat((__v32hi)__A, (__v32hi)__B); } diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 2e3e13d2769c4..c2426dc0e6b9d 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -957,6 +957,8 @@ __m512i test_mm512_adds_epi8(__m512i __A, __m512i __B) { // CHECK: @llvm.sadd.sat.v64i8 return _mm512_adds_epi8(__A,__B); } +TEST_CONSTEXPR(match_v64qi(_mm512_adds_epi8((__m512i)(__v64qi){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32, +33, -34, +35, -36, +37, -38, +39, -40, +41, -42, +43, -44, +45, -46, +47, +100, +50, -100, +20, +80, -50, +120, -20, -100, -50, +100, -20, -80, +50, -120, +20}, (__m512i)(__v64qi){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32, +33, -34, +35, -36, +37, -38, +39, -40, +41, -42, +43, -44, +45, -46, +47, +50, +80, -50, +110, +60, -30, +20, -10, +50, +80, -50, +110, +60, -30, +20, -10}), 0, +2, -4, +6, -8, +10, -12, +14, -16, +18, -20, +22, -24, +26, -28, +30, -32, +34, -36, +38, -40, +42, -44, +46, -48, +50, -52, +54, -56, +58, -60, +62, -64, +66, -68, +70, -72, +74, -76, +78, -80, +82, -84, +86, -88, +90, -92, +94, +127, +127, -128, +127, +127, -80, +127, -30, -50, +30, +50, +90, -20, +20, -100, +10)); + __m512i test_mm512_mask_adds_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_adds_epi8 // CHECK: @llvm.sadd.sat.v64i8 @@ -974,6 +976,8 @@ __m512i test_mm512_adds_epi16(__m512i __A, __m512i __B) { // CHECK: @llvm.sadd.sat.v32i16 return _mm512_adds_epi16(__A,__B); } +TEST_CONSTEXPR(match_v32hi(_mm512_adds_epi16((__m512i)(__v32hi){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, +32000, -32000, +32000, -32000}, (__m512i)(__v32hi){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, +800, -800, -800, +800}), 0, +2, -4, +6, -8, +10, -12, +14, -16, +18, -20, +22, -24, +26, -28, +30, -32, +34, -36, +38, -40, +42, -44, +46, -48, +50, -52, +54, +32767, -32768, +31200, -31200)); + __m512i test_mm512_mask_adds_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_adds_epi16 // CHECK: @llvm.sadd.sat.v32i16 @@ -1218,6 +1222,8 @@ __m512i test_mm512_subs_epi8(__m512i __A, __m512i __B) { // CHECK: @llvm.ssub.sat.v64i8 return _mm512_subs_epi8(__A,__B); } +TEST_CONSTEXPR(match_v64qi(_mm512_subs_epi8((__m512i)(__v64qi){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32, +33, -34, +35, -36, +37, -38, +39, -40, +41, -42, +43, -44, +45, -46, +47, +100, +50, -100, +20, +80, -50, +120, -20, -100, -50, +100, -20, -80, +50, -120, +20}, (__m512i)(__v64qi){0, -1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33, +34, -35, +36, -37, +38, -39, +40, -41, +42, -43, +44, -45, +46, -47, -50, -80, +50, -110, -60, +30, -20, +10, -50, -80, +50, -110, -60, +30, -20, +10}), 0, +2, -4, +6, -8, +10, -12, +14, -16, +18, -20, +22, -24, +26, -28, +30, -32, +34, -36, +38, -40, +42, -44, +46, -48, +50, -52, +54, -56, +58, -60, +62, -64, +66, -68, +70, -72, +74, -76, +78, -80, +82, -84, +86, -88, +90, -92, +94, +127, +127, -128, +127, +127, -80, +127, -30, -50, +30, +50, +90, -20, +20, -100, +10)); + __m512i test_mm512_mask_subs_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_subs_epi8 // CHECK: @llvm.ssub.sat.v64i8 @@ -1235,6 +1241,7 @@ __m512i test_mm512_subs_epi16(__m512i __A, __m512i __B) { // CHECK: @llvm.ssub.sat.v32i16 return _mm512_subs_epi16(__A,__B); } +TEST_CONSTEXPR(match_v32hi(_mm512_subs_epi16((__m512i)(__v32hi){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, +32000, -32000, +32000, -32000}, (__m512i)(__v32hi){0, -1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, -800, +800, +800, -800}), 0, +2, -4, +6, -8, +10, -12, +14, -16, +18, -20, +22, -24, +26, -28, +30, -32, +34, -36, +38, -40, +42, -44, +46, -48, +50, -52, +54, +32767, -32768, +31200, -31200)); __m512i test_mm512_mask_subs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_subs_epi16 // CHECK: @llvm.ssub.sat.v32i16 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits