[clang] [X86] Enable MMX unpcklo/unpckhi intrinsics in constexpr (PR #154149)

Simon Pilgrim via cfe-commits Tue, 19 Aug 2025 00:38:41 -0700

https://github.com/RKSimon updated 
https://github.com/llvm/llvm-project/pull/154149


>From 4b04f66cf9a79a06ed9ab2e7e6081c428cb7e9e8 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-...@redking.me.uk>
Date: Mon, 18 Aug 2025 17:16:37 +0100
Subject: [PATCH] [X86] Enable MMX unpcklo/unpckhi intrinsics in constexpr

Matches behaviour in SSE/AVX/AVX512 intrinsics - was missed in #153028
---
 clang/lib/Headers/mmintrin.h          | 48 +++++++++++----------------
 clang/test/CodeGen/X86/mmx-builtins.c |  6 ++++
 2 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h
index f62450bb1a67f..6fe9d67b8976d 100644
--- a/clang/lib/Headers/mmintrin.h
+++ b/clang/lib/Headers/mmintrin.h
@@ -242,11 +242,10 @@ _mm_packs_pu16(__m64 __m1, __m64 __m2)
 ///    Bits [63:56] are written to bits [63:56] of the result.
 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2,
-                                          4, 12, 5, 13, 6, 14, 7, 15);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_unpackhi_pi8(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 4, 12, 5,
+                                        13, 6, 14, 7, 15);
 }
 
 /// Unpacks the upper 32 bits from two 64-bit integer vectors of
@@ -266,11 +265,9 @@ _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
 ///    Bits [63:48] are written to bits [63:48] of the result.
 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2,
-                                          2, 6, 3, 7);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_unpackhi_pi16(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 2, 6, 3, 
7);
 }
 
 /// Unpacks the upper 32 bits from two 64-bit integer vectors of
@@ -288,10 +285,9 @@ _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
 ///    the upper 32 bits of the result.
 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_unpackhi_pi32(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3);
 }
 
 /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
@@ -315,11 +311,10 @@ _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
 ///    Bits [31:24] are written to bits [63:56] of the result.
 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2,
-                                          0, 8, 1, 9, 2, 10, 3, 11);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_unpacklo_pi8(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 0, 8, 1, 9,
+                                        2, 10, 3, 11);
 }
 
 /// Unpacks the lower 32 bits from two 64-bit integer vectors of
@@ -339,11 +334,9 @@ _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
 ///    Bits [31:16] are written to bits [63:48] of the result.
 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2,
-                                          0, 4, 1, 5);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_unpacklo_pi16(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 0, 4, 1, 
5);
 }
 
 /// Unpacks the lower 32 bits from two 64-bit integer vectors of
@@ -361,10 +354,9 @@ _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
 ///    the upper 32 bits of the result.
 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_unpacklo_pi32(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2);
 }
 
 /// Adds each 8-bit integer element of the first 64-bit integer vector
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c 
b/clang/test/CodeGen/X86/mmx-builtins.c
index b9682dade0c92..35f0d6c9b43e8 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -764,36 +764,42 @@ __m64 test_mm_unpackhi_pi8(__m64 a, __m64 b) {
   // CHECK: shufflevector <8 x i8> {{%.*}}, <8 x i8> {{%.*}}, <8 x i32> <i32 
4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
   return _mm_unpackhi_pi8(a, b);
 }
+TEST_CONSTEXPR(match_v8qi(_mm_unpackhi_pi8((__m64)(__v8qi){0, 1, 2, 3, 4, 5, 
6, 7}, (__m64)(__v8qi){8, 9, 10, 11, 12, 13, 14, 15}), 4, 12, 5, 13, 6, 14, 7, 
15));
 
 __m64 test_mm_unpackhi_pi16(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_unpackhi_pi16
   // CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 
2, i32 6, i32 3, i32 7>
   return _mm_unpackhi_pi16(a, b);
 }
+TEST_CONSTEXPR(match_v4hi(_mm_unpackhi_pi16((__m64)(__v4hi){0, 1, 2, 3}, 
(__m64)(__v4hi){ 4, 5, 6, 7}), 2, 6, 3, 7));
 
 __m64 test_mm_unpackhi_pi32(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_unpackhi_pi32
   // CHECK: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 
1, i32 3>
   return _mm_unpackhi_pi32(a, b);
 }
+TEST_CONSTEXPR(match_v2si(_mm_unpackhi_pi32((__m64)(__v2si){0, 1}, 
(__m64)(__v2si){2, 3}), 1, 3));
 
 __m64 test_mm_unpacklo_pi8(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_unpacklo_pi8
   // CHECK: shufflevector <8 x i8> {{%.*}}, <8 x i8> {{%.*}}, <8 x i32> <i32 
0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   return _mm_unpacklo_pi8(a, b);
 }
+TEST_CONSTEXPR(match_v8qi(_mm_unpacklo_pi8((__m64)(__v8qi){0, 1, 2, 3, 4, 5, 
6, 7}, (__m64)(__v8qi){8, 9, 10, 11, 12, 13, 14, 15}), 0, 8, 1, 9, 2, 10, 3, 
11));
 
 __m64 test_mm_unpacklo_pi16(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_unpacklo_pi16
   // CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 
0, i32 4, i32 1, i32 5>
   return _mm_unpacklo_pi16(a, b);
 }
+TEST_CONSTEXPR(match_v4hi(_mm_unpacklo_pi16((__m64)(__v4hi){0, 1, 2, 3}, 
(__m64)(__v4hi){ 4, 5, 6, 7}), 0, 4, 1, 5));
 
 __m64 test_mm_unpacklo_pi32(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_unpacklo_pi32
   // CHECK: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 
0, i32 2>
   return _mm_unpacklo_pi32(a, b);
 }
+TEST_CONSTEXPR(match_v2si(_mm_unpacklo_pi32((__m64)(__v2si){0, 1}, 
(__m64)(__v2si){2, 3}), 0, 2));
 
 __m64 test_mm_xor_si64(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_xor_si64

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [X86] Enable MMX unpcklo/unpckhi intrinsics in constexpr (PR #154149)

Reply via email to