Tested on x86_64-linux-gnu. OK for trunk?
----- 8< ------
Previously, this was necessary for efficient reductions. When I tried to
turn it into a missed-optimization PR, there was no issue (anymore). The
generic _S_static_permute implementation optimizes just fine (or better)
at this point.
The traits_impl.cc change is a simple drive-by fix.
libstdc++-v3/ChangeLog:
* include/bits/simd_vec.h (_M_elements_shifted_to_front): Remove
x86-specific implementation.
* testsuite/std/simd/traits_impl.cc: Fix incorrect macro name.
Signed-off-by: Matthias Kretz <[email protected]>
---
libstdc++-v3/include/bits/simd_vec.h | 23 -------------------
.../testsuite/std/simd/traits_impl.cc | 2 +-
2 files changed, 1 insertion(+), 24 deletions(-)
diff --git a/libstdc++-v3/include/bits/simd_vec.h b/libstdc++-v3/include/bits/
simd_vec.h
index bf6616df227..54c1502b23b 100644
--- a/libstdc++-v3/include/bits/simd_vec.h
+++ b/libstdc++-v3/include/bits/simd_vec.h
@@ -449,29 +449,6 @@ _M_elements_shifted_to_front() const
static_assert(_Shift < _S_size && -_Shift < _S_size);
if constexpr (_Shift == 0)
return *this;
-#ifdef __SSE2__
- else if (!__is_const_known(*this))
- {
- if constexpr (sizeof(_M_data) == 16 && _Shift > 0)
- return reinterpret_cast<_DataType>(
- __builtin_ia32_psrldqi128(__vec_bit_cast<long
long>(_M_data),
- _Shift *
sizeof(value_type) * 8));
- else if constexpr (sizeof(_M_data) == 16 && _Shift < 0)
- return reinterpret_cast<_DataType>(
- __builtin_ia32_pslldqi128(__vec_bit_cast<long
long>(_M_data),
- -_Shift *
sizeof(value_type) * 8));
- else if constexpr (sizeof(_M_data) < 16)
- {
- auto __x = reinterpret_cast<__vec_builtin_type_bytes<long
long, 16>>(
- __vec_zero_pad_to_16(_M_data));
- if constexpr (_Shift > 0)
- __x = __builtin_ia32_psrldqi128(__x, _Shift *
sizeof(value_type) * 8);
- else
- __x = __builtin_ia32_pslldqi128(__x, -_Shift *
sizeof(value_type) * 8);
- return
_VecOps<_DataType>::_S_extract(__vec_bit_cast<__canon_value_type>(__x));
- }
- }
-#endif
return _S_static_permute(*this, [](int __i) consteval {
int __off = __i + _Shift;
return __off >= _S_size || __off < 0 ? zero_element :
__off;
diff --git a/libstdc++-v3/testsuite/std/simd/traits_impl.cc b/libstdc++-v3/
testsuite/std/simd/traits_impl.cc
index dde41c70aca..9f6e9f42b6e 100644
--- a/libstdc++-v3/testsuite/std/simd/traits_impl.cc
+++ b/libstdc++-v3/testsuite/std/simd/traits_impl.cc
@@ -49,7 +49,7 @@ namespace simd
static_assert(__vectorizable<__integer_from<N>>);
}
template for (constexpr int N : {
-#ifdef __STDCPP_BFLOAT16_T__
+#ifdef __STDCPP_FLOAT16_T__
2,
#endif
4, 8})
--
──────────────────────────────────────────────────────────────────────────
Dr. Matthias Kretz https://mattkretz.github.io
GSI Helmholtz Center for Heavy Ion Research https://gsi.de
std::simd
──────────────────────────────────────────────────────────────────────────