https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/154149
>From 4b04f66cf9a79a06ed9ab2e7e6081c428cb7e9e8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim <llvm-...@redking.me.uk> Date: Mon, 18 Aug 2025 17:16:37 +0100 Subject: [PATCH] [X86] Enable MMX unpcklo/unpckhi intrinsics in constexpr Matches behaviour in SSE/AVX/AVX512 intrinsics - was missed in #153028 --- clang/lib/Headers/mmintrin.h | 48 +++++++++++---------------- clang/test/CodeGen/X86/mmx-builtins.c | 6 ++++ 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h index f62450bb1a67f..6fe9d67b8976d 100644 --- a/clang/lib/Headers/mmintrin.h +++ b/clang/lib/Headers/mmintrin.h @@ -242,11 +242,10 @@ _mm_packs_pu16(__m64 __m1, __m64 __m2) /// Bits [63:56] are written to bits [63:56] of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_unpackhi_pi8(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, - 4, 12, 5, 13, 6, 14, 7, 15); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_unpackhi_pi8(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 4, 12, 5, + 13, 6, 14, 7, 15); } /// Unpacks the upper 32 bits from two 64-bit integer vectors of @@ -266,11 +265,9 @@ _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) /// Bits [63:48] are written to bits [63:48] of the result. /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_unpackhi_pi16(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, - 2, 6, 3, 7); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_unpackhi_pi16(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 2, 6, 3, 7); } /// Unpacks the upper 32 bits from two 64-bit integer vectors of @@ -288,10 +285,9 @@ _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) /// the upper 32 bits of the result. /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_unpackhi_pi32(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_unpackhi_pi32(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3); } /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] @@ -315,11 +311,10 @@ _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) /// Bits [31:24] are written to bits [63:56] of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_unpacklo_pi8(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, - 0, 8, 1, 9, 2, 10, 3, 11); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_unpacklo_pi8(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 0, 8, 1, 9, + 2, 10, 3, 11); } /// Unpacks the lower 32 bits from two 64-bit integer vectors of @@ -339,11 +334,9 @@ _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) /// Bits [31:16] are written to bits [63:48] of the result. /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_unpacklo_pi16(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, - 0, 4, 1, 5); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_unpacklo_pi16(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 0, 4, 1, 5); } /// Unpacks the lower 32 bits from two 64-bit integer vectors of @@ -361,10 +354,9 @@ _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) /// the upper 32 bits of the result. /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_unpacklo_pi32(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_unpacklo_pi32(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2); } /// Adds each 8-bit integer element of the first 64-bit integer vector diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index b9682dade0c92..35f0d6c9b43e8 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -764,36 +764,42 @@ __m64 test_mm_unpackhi_pi8(__m64 a, __m64 b) { // CHECK: shufflevector <8 x i8> {{%.*}}, <8 x i8> {{%.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> return _mm_unpackhi_pi8(a, b); } +TEST_CONSTEXPR(match_v8qi(_mm_unpackhi_pi8((__m64)(__v8qi){0, 1, 2, 3, 4, 5, 6, 7}, (__m64)(__v8qi){8, 9, 10, 11, 12, 13, 14, 15}), 4, 12, 5, 13, 6, 14, 7, 15)); __m64 test_mm_unpackhi_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_unpackhi_pi16 // CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7> return _mm_unpackhi_pi16(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_unpackhi_pi16((__m64)(__v4hi){0, 1, 2, 3}, (__m64)(__v4hi){ 4, 5, 6, 7}), 2, 6, 3, 7)); __m64 test_mm_unpackhi_pi32(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_unpackhi_pi32 // CHECK: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 1, i32 3> return _mm_unpackhi_pi32(a, b); } +TEST_CONSTEXPR(match_v2si(_mm_unpackhi_pi32((__m64)(__v2si){0, 1}, (__m64)(__v2si){2, 3}), 1, 3)); __m64 test_mm_unpacklo_pi8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_unpacklo_pi8 // CHECK: shufflevector <8 x i8> {{%.*}}, <8 x i8> {{%.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> return _mm_unpacklo_pi8(a, b); } +TEST_CONSTEXPR(match_v8qi(_mm_unpacklo_pi8((__m64)(__v8qi){0, 1, 2, 3, 4, 5, 6, 7}, (__m64)(__v8qi){8, 9, 10, 11, 12, 13, 14, 15}), 0, 8, 1, 9, 2, 10, 3, 11)); __m64 test_mm_unpacklo_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_unpacklo_pi16 // CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5> return _mm_unpacklo_pi16(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_unpacklo_pi16((__m64)(__v4hi){0, 1, 2, 3}, (__m64)(__v4hi){ 4, 5, 6, 7}), 0, 4, 1, 5)); __m64 test_mm_unpacklo_pi32(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_unpacklo_pi32 // CHECK: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 0, i32 2> return _mm_unpacklo_pi32(a, b); } +TEST_CONSTEXPR(match_v2si(_mm_unpacklo_pi32((__m64)(__v2si){0, 1}, (__m64)(__v2si){2, 3}), 0, 2)); __m64 test_mm_xor_si64(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_xor_si64 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits