Tested on x86_64-linux-gnu and aarch64-linux-gnu and with Clang 18 on x86_64-
linux-gnu.
OK for trunk and backport(s)?
-- 8<
Signed-off-by: Matthias Kretz
libstdc++-v3/ChangeLog:
PR libstdc++/114958
* include/experimental/bits/simd.h (__as_vector): Return scalar
simd as one-element vector. Return vector from single-vector
fixed_size simd.
(__vec_shuffle): New.
(__extract_part): Adjust return type signature.
(split): Use __extract_part for any split into non-fixed_size
simds.
(concat): If the return type stores a single vector, use
__vec_shuffle (which calls __builtin_shufflevector) to produce
the return value.
* include/experimental/bits/simd_builtin.h
(__shift_elements_right): Removed.
(__extract_part): Return single elements directly. Use
__vec_shuffle (which calls __builtin_shufflevector) to for all
non-trivial cases.
* include/experimental/bits/simd_fixed_size.h (__extract_part):
Return single elements directly.
* testsuite/experimental/simd/pr114958.cc: New test.
---
libstdc++-v3/include/experimental/bits/simd.h | 161 +-
.../include/experimental/bits/simd_builtin.h | 152 +
.../experimental/bits/simd_fixed_size.h | 4 +-
.../testsuite/experimental/simd/pr114958.cc | 20 +++
4 files changed, 145 insertions(+), 192 deletions(-)
create mode 100644 libstdc++-v3/testsuite/experimental/simd/pr114958.cc
--
──
Dr. Matthias Kretz https://mattkretz.github.io
GSI Helmholtz Centre for Heavy Ion Research https://gsi.de
stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 6ef9c955cfa..6a6fd4f109d 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -1651,7 +1651,24 @@ __as_vector(_V __x)
if constexpr (__is_vector_type_v<_V>)
return __x;
else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
- return __data(__x)._M_data;
+ {
+ if constexpr (__is_fixed_size_abi_v)
+ {
+ static_assert(is_simd<_V>::value);
+ static_assert(_V::abi_type::template __traits<
+ typename _V::value_type>::_SimdMember::_S_tuple_size == 1);
+ return __as_vector(__data(__x).first);
+ }
+ else if constexpr (_V::size() > 1)
+ return __data(__x)._M_data;
+ else
+ {
+ static_assert(is_simd<_V>::value);
+ using _Tp = typename _V::value_type;
+ using _RV [[__gnu__::__vector_size__(sizeof(_Tp))]] = _Tp;
+ return _RV{__data(__x)};
+ }
+ }
else if constexpr (__is_vectorizable_v<_V>)
return __vector_type_t<_V, 2>{__x};
else
@@ -2061,6 +2078,60 @@ __not(_Tp __a) noexcept
return ~__a;
}
+// }}}
+// __vec_shuffle{{{
+template
+ _GLIBCXX_SIMD_INTRINSIC constexpr auto
+ __vec_shuffle(_T0 __x, _T1 __y, index_sequence<_Is...> __seq, _Fun __idx_perm)
+ {
+constexpr int _N0 = sizeof(__x) / sizeof(__x[0]);
+constexpr int _N1 = sizeof(__y) / sizeof(__y[0]);
+#if __has_builtin(__builtin_shufflevector)
+#ifdef __clang__
+// Clang requires _T0 == _T1
+if constexpr (sizeof(__x) > sizeof(__y) and _N1 == 1)
+ return __vec_shuffle(__x, _T0{__y[0]}, __seq, __idx_perm);
+else if constexpr (sizeof(__x) > sizeof(__y))
+ return __vec_shuffle(__x, __intrin_bitcast<_T0>(__y), __seq, __idx_perm);
+else if constexpr (sizeof(__x) < sizeof(__y) and _N0 == 1)
+ return __vec_shuffle(_T1{__x[0]}, __y, __seq, [=](int __i) {
+ __i = __idx_perm(__i);
+ return __i < _N0 ? __i : __i - _N0 + _N1;
+ });
+else if constexpr (sizeof(__x) < sizeof(__y))
+ return __vec_shuffle(__intrin_bitcast<_T1>(__x), __y, __seq, [=](int __i) {
+ __i = __idx_perm(__i);
+ return __i < _N0 ? __i : __i - _N0 + _N1;
+ });
+else
+#endif
+ return __builtin_shufflevector(__x, __y, [=] {
+ constexpr int __j = __idx_perm(_Is);
+ static_assert(__j < _N0 + _N1);
+ return __j;
+ }()...);
+#else
+using _Tp = __remove_cvref_t;
+return __vector_type_t<_Tp, sizeof...(_Is)> {
+ [=]() -> _Tp {
+ constexpr int __j = __idx_perm(_Is);
+ static_assert(__j < _N0 + _N1);
+ if constexpr (__j < 0)
+ return 0;
+ else if constexpr (__j < _N0)
+ return __x[__j];
+ else
+ return __y[__j - _N0];
+ }()...
+};
+#endif
+ }
+
+template
+ _GLIBCXX_SIMD_INTRINSIC constexpr auto
+ __vec_shuffle(_T0 __x, _Seq __seq, _Fun __idx_perm)
+ { return __vec_shuffle(__x, _T0(), __seq, __idx_perm); }
+
// }}}
// __concat{{{
template ,
@@ -3947,7 +4018,7 @@ clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo, const