I reimplemented uninitialized_copy and uninitialized_move in
r15-4473-g3abe751ea86e34 so that they no longer delegate to std::copy,
but that meant that they were no longer optimized for std::deque
iterators, leading to performance regressions for operations on a
std::deque with trivial element types. This adds new overloads of
__uninitialized_copy_a and __uninitialized_move_a to handle std::deque
iterators, restoring the lost performance.
There are also overloads of std::fill for deque iterators which are no
longer used for std::uninitialized_fill. This does not add replacements
for those, so there will still be lost performance for std::deque
operations that depend on std::uninitialized_fill. Similarly, inserting
or assigning from istreambuf_iterator into a std::deque no longer uses
the std::copy overloads for those types, and that isn't fixed by this
patch either.
libstdc++-v3/ChangeLog:
PR libstdc++/124463
* include/bits/deque.tcc (__uninitialized_copy_a): Define
overloads for input and output iterators being std::deque
iterators, and for only the output iterator being a std::deque
iterator.
(__uninitialized_move_a): Overload for input and output
iterators being std::deque iterators.
* include/bits/stl_uninitialized.h (__uninitialized_copy_a)
(__uninitialized_move_a): Declare overloads for std::deque
iterators.
---
I don't love this solution, but it's a quick fix for the reported
regression. For GCC 17 I'd like to add a more general segmented iterator
optimization, which all algos could opt in to generically, which
wouldn't require distinct overloads for each type of segmented iterator
(that's related to PR 123211).
Tested x86_64-linux.
libstdc++-v3/include/bits/deque.tcc | 94 +++++++++++++++++++
libstdc++-v3/include/bits/stl_uninitialized.h | 27 ++++++
2 files changed, 121 insertions(+)
diff --git a/libstdc++-v3/include/bits/deque.tcc
b/libstdc++-v3/include/bits/deque.tcc
index 7a690466cd58..b35f268b0db1 100644
--- a/libstdc++-v3/include/bits/deque.tcc
+++ b/libstdc++-v3/include/bits/deque.tcc
@@ -1549,6 +1549,100 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
return __last2 != __first2;
}
+#if __cplusplus >= 201103L
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr
+ template<typename _ITp, typename _IRef, typename _IPtr, typename _OTp,
+ typename _Tp>
+ _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*>
+ __uninitialized_copy_a(
+ _GLIBCXX_STD_C::_Deque_iterator<_ITp, _IRef, _IPtr> __first,
+ _GLIBCXX_STD_C::_Deque_iterator<_ITp, _IRef, _IPtr> __last,
+ _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*> __result,
+ allocator<_Tp>&)
+ {
+ // In order to unwind all initialized elements, we just use the default
+ // implementation if construction can throw.
+ if constexpr (!__is_nothrow_constructible(_OTp, _IRef))
+ return std::__do_uninit_copy(__first, __last, __result);
+ else
+ while (__first != __last)
+ {
+ auto __from = __first._M_cur;
+ ptrdiff_t __n;
+ if (__first._M_node == __last._M_node)
+ __n = __last._M_cur - __from;
+ else
+ __n = __first._M_last - __from;
+ _OTp* __sres = __result._M_cur;
+ __n = std::min<ptrdiff_t>(__n, __result._M_last - __result._M_cur);
+ std::uninitialized_copy(__from, __from + __n, __result._M_cur);
+ __first += __n;
+ __result += __n;
+ }
+ return __result;
+ }
+
+ template<typename _Iter, typename _OTp, typename _Tp>
+ __enable_if_t<__is_random_access_iter<_Iter>::value,
+ _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*>>
+ __uninitialized_copy_a(_Iter __first, _Iter __last,
+ _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*> __result,
+ allocator<_Tp>&)
+ {
+ // In order to unwind all initialized elements, we just use the default
+ // implementation if construction can throw.
+ if constexpr (!__is_nothrow_constructible(_OTp, decltype(*__first)))
+ return std::__do_uninit_copy(__first, __last, __result);
+ else
+ while (__first != __last)
+ {
+ auto __n = std::min<ptrdiff_t>(__last - __first,
+ __result._M_last - __result._M_cur);
+ std::uninitialized_copy(__first, __first + __n, __result._M_cur);
+ __first += __n;
+ __result += __n;
+ }
+ return __result;
+ }
+
+ template<typename _ITp, typename _IRef, typename _IPtr, typename _OTp,
+ typename _Tp>
+ _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*>
+ __uninitialized_move_a(
+ _GLIBCXX_STD_C::_Deque_iterator<_ITp, _IRef, _IPtr> __first,
+ _GLIBCXX_STD_C::_Deque_iterator<_ITp, _IRef, _IPtr> __last,
+ _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*> __result,
+ allocator<_Tp>&)
+ {
+ // In order to unwind all initialized elements, we just use the default
+ // implementation if construction can throw.
+ if constexpr (!__is_nothrow_constructible(_OTp,
+ decltype(std::move(*__first))))
+ return std::uninitialized_copy(std::make_move_iterator(__first),
+ std::make_move_iterator(__last),
+ __result);
+ else
+ while (__first != __last)
+ {
+ auto __from = __first._M_cur;
+ ptrdiff_t __n;
+ if (__first._M_node == __last._M_node)
+ __n = __last._M_cur - __from;
+ else
+ __n = __first._M_last - __from;
+ __n = std::min<ptrdiff_t>(__n, __result._M_last - __result._M_cur);
+ std::uninitialized_copy(std::make_move_iterator(__from),
+ std::make_move_iterator(__from + __n),
+ __result._M_cur);
+ __first += __n;
+ __result += __n;
+ }
+ return __result;
+ }
+#pragma GCC diagnostic pop
+#endif // C++11
+
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace std
diff --git a/libstdc++-v3/include/bits/stl_uninitialized.h
b/libstdc++-v3/include/bits/stl_uninitialized.h
index 82e4ba2ff7b7..ecc24023e98f 100644
--- a/libstdc++-v3/include/bits/stl_uninitialized.h
+++ b/libstdc++-v3/include/bits/stl_uninitialized.h
@@ -662,6 +662,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
#endif
+#if __cplusplus >= 201103L
+ template<typename _ITp, typename _IRef, typename _IPtr, typename _OTp,
+ typename _Tp>
+ _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*>
+ __uninitialized_copy_a(
+ _GLIBCXX_STD_C::_Deque_iterator<_ITp, _IRef, _IPtr> __first,
+ _GLIBCXX_STD_C::_Deque_iterator<_ITp, _IRef, _IPtr> __last,
+ _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*> __result,
+ allocator<_Tp>&);
+
+ template<typename _Iter, typename _OTp, typename _Tp>
+ __enable_if_t<__is_random_access_iter<_Iter>::value,
+ _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*>>
+ __uninitialized_copy_a(_Iter __first, _Iter __last,
+ _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*> __result,
+ allocator<_Tp>&);
+
+ template<typename _ITp, typename _IRef, typename _IPtr, typename _OTp,
+ typename _Tp>
+ _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*>
+ __uninitialized_move_a(
+ _GLIBCXX_STD_C::_Deque_iterator<_ITp, _IRef, _IPtr> __first,
+ _GLIBCXX_STD_C::_Deque_iterator<_ITp, _IRef, _IPtr> __last,
+ _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*> __result,
+ allocator<_Tp>&);
+#endif
+
template<typename _InputIterator, typename _ForwardIterator,
typename _Allocator>
_GLIBCXX20_CONSTEXPR
--
2.53.0