Re: [PATCH] libstdc++: Use __builtin_shufflevector for simd split and concat

2024-05-13 Thread Jonathan Wakely
On Tue, 7 May 2024 at 14:42, Matthias Kretz  wrote:
>
> Tested on x86_64-linux-gnu and aarch64-linux-gnu and with Clang 18 on x86_64-
> linux-gnu.
>
> OK for trunk and backport(s)?

OK for all.


>
> -- 8< 
>
> Signed-off-by: Matthias Kretz 
>
> libstdc++-v3/ChangeLog:
>
> PR libstdc++/114958
> * include/experimental/bits/simd.h (__as_vector): Return scalar
> simd as one-element vector. Return vector from single-vector
> fixed_size simd.
> (__vec_shuffle): New.
> (__extract_part): Adjust return type signature.
> (split): Use __extract_part for any split into non-fixed_size
> simds.
> (concat): If the return type stores a single vector, use
> __vec_shuffle (which calls __builtin_shufflevector) to produce
> the return value.
> * include/experimental/bits/simd_builtin.h
> (__shift_elements_right): Removed.
> (__extract_part): Return single elements directly. Use
> __vec_shuffle (which calls __builtin_shufflevector) to for all
> non-trivial cases.
> * include/experimental/bits/simd_fixed_size.h (__extract_part):
> Return single elements directly.
> * testsuite/experimental/simd/pr114958.cc: New test.
> ---
>  libstdc++-v3/include/experimental/bits/simd.h | 161 +-
>  .../include/experimental/bits/simd_builtin.h  | 152 +
>  .../experimental/bits/simd_fixed_size.h   |   4 +-
>  .../testsuite/experimental/simd/pr114958.cc   |  20 +++
>  4 files changed, 145 insertions(+), 192 deletions(-)
>  create mode 100644 libstdc++-v3/testsuite/experimental/simd/pr114958.cc
>
>
> --
> ──
>  Dr. Matthias Kretz   https://mattkretz.github.io
>  GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
>  stdₓ::simd
> ──



[PATCH] libstdc++: Use __builtin_shufflevector for simd split and concat

2024-05-07 Thread Matthias Kretz
Tested on x86_64-linux-gnu and aarch64-linux-gnu and with Clang 18 on x86_64-
linux-gnu.

OK for trunk and backport(s)?

-- 8< 

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/114958
* include/experimental/bits/simd.h (__as_vector): Return scalar
simd as one-element vector. Return vector from single-vector
fixed_size simd.
(__vec_shuffle): New.
(__extract_part): Adjust return type signature.
(split): Use __extract_part for any split into non-fixed_size
simds.
(concat): If the return type stores a single vector, use
__vec_shuffle (which calls __builtin_shufflevector) to produce
the return value.
* include/experimental/bits/simd_builtin.h
(__shift_elements_right): Removed.
(__extract_part): Return single elements directly. Use
__vec_shuffle (which calls __builtin_shufflevector) to for all
non-trivial cases.
* include/experimental/bits/simd_fixed_size.h (__extract_part):
Return single elements directly.
* testsuite/experimental/simd/pr114958.cc: New test.
---
 libstdc++-v3/include/experimental/bits/simd.h | 161 +-
 .../include/experimental/bits/simd_builtin.h  | 152 +
 .../experimental/bits/simd_fixed_size.h   |   4 +-
 .../testsuite/experimental/simd/pr114958.cc   |  20 +++
 4 files changed, 145 insertions(+), 192 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/experimental/simd/pr114958.cc


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 6ef9c955cfa..6a6fd4f109d 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -1651,7 +1651,24 @@ __as_vector(_V __x)
 if constexpr (__is_vector_type_v<_V>)
   return __x;
 else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
-  return __data(__x)._M_data;
+  {
+	if constexpr (__is_fixed_size_abi_v)
+	  {
+	static_assert(is_simd<_V>::value);
+	static_assert(_V::abi_type::template __traits<
+			typename _V::value_type>::_SimdMember::_S_tuple_size == 1);
+	return __as_vector(__data(__x).first);
+	  }
+	else if constexpr (_V::size() > 1)
+	  return __data(__x)._M_data;
+	else
+	  {
+	static_assert(is_simd<_V>::value);
+	using _Tp = typename _V::value_type;
+	using _RV [[__gnu__::__vector_size__(sizeof(_Tp))]] = _Tp;
+	return _RV{__data(__x)};
+	  }
+  }
 else if constexpr (__is_vectorizable_v<_V>)
   return __vector_type_t<_V, 2>{__x};
 else
@@ -2061,6 +2078,60 @@ __not(_Tp __a) noexcept
   return ~__a;
   }
 
+// }}}
+// __vec_shuffle{{{
+template 
+  _GLIBCXX_SIMD_INTRINSIC constexpr auto
+  __vec_shuffle(_T0 __x, _T1 __y, index_sequence<_Is...> __seq, _Fun __idx_perm)
+  {
+constexpr int _N0 = sizeof(__x) / sizeof(__x[0]);
+constexpr int _N1 = sizeof(__y) / sizeof(__y[0]);
+#if __has_builtin(__builtin_shufflevector)
+#ifdef __clang__
+// Clang requires _T0 == _T1
+if constexpr (sizeof(__x) > sizeof(__y) and _N1 == 1)
+  return __vec_shuffle(__x, _T0{__y[0]}, __seq, __idx_perm);
+else if constexpr (sizeof(__x) > sizeof(__y))
+  return __vec_shuffle(__x, __intrin_bitcast<_T0>(__y), __seq, __idx_perm);
+else if constexpr (sizeof(__x) < sizeof(__y) and _N0 == 1)
+  return __vec_shuffle(_T1{__x[0]}, __y, __seq, [=](int __i) {
+	   __i = __idx_perm(__i);
+	   return __i < _N0 ? __i : __i - _N0 + _N1;
+	 });
+else if constexpr (sizeof(__x) < sizeof(__y))
+  return __vec_shuffle(__intrin_bitcast<_T1>(__x), __y, __seq, [=](int __i) {
+	   __i = __idx_perm(__i);
+	   return __i < _N0 ? __i : __i - _N0 + _N1;
+	 });
+else
+#endif
+  return __builtin_shufflevector(__x, __y, [=] {
+	   constexpr int __j = __idx_perm(_Is);
+	   static_assert(__j < _N0 + _N1);
+	   return __j;
+	 }()...);
+#else
+using _Tp = __remove_cvref_t;
+return __vector_type_t<_Tp, sizeof...(_Is)> {
+  [=]() -> _Tp {
+	constexpr int __j = __idx_perm(_Is);
+	static_assert(__j < _N0 + _N1);
+	if constexpr (__j < 0)
+	  return 0;
+	else if constexpr (__j < _N0)
+	  return __x[__j];
+	else
+	  return __y[__j - _N0];
+  }()...
+};
+#endif
+  }
+
+template 
+  _GLIBCXX_SIMD_INTRINSIC constexpr auto
+  __vec_shuffle(_T0 __x, _Seq __seq, _Fun __idx_perm)
+  { return __vec_shuffle(__x, _T0(), __seq, __idx_perm); }
+
 // }}}
 // __concat{{{
 template ,
@@ -3947,7 +4018,7 @@ clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo, const