On Tue, 17 Mar 2026 at 16:11, Hewill Kang <[email protected]> wrote:
>
> I'm just curious why there isn't an overload of
> __uninitialized_move_a(_Iter __first, _Iter __last, 
> _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*> __result, ...)?

It isn't needed to fix the bug.

As the commit message says, I'm not trying to handle everything, I'm
just addressing the reported regression.

>
> Jonathan Wakely <[email protected]> 於 2026年3月17日週二 上午7:24寫道:
>>
>> I reimplemented uninitialized_copy and uninitialized_move in
>> r15-4473-g3abe751ea86e34 so that they no longer delegate to std::copy,
>> but that meant that they were no longer optimized for std::deque
>> iterators, leading to performance regressions for operations on a
>> std::deque with trivial element types. This adds new overloads of
>> __uninitialized_copy_a and __uninitialized_move_a to handle std::deque
>> iterators, restoring the lost performance.
>>
>> There are also overloads of std::fill for deque iterators which are no
>> longer used for std::uninitialized_fill. This does not add replacements
>> for those, so there will still be lost performance for std::deque
>> operations that depend on std::uninitialized_fill. Similarly, inserting
>> or assigning from istreambuf_iterator into a std::deque no longer uses
>> the std::copy overloads for those types, and that isn't fixed by this
>> patch either.
>>
>> libstdc++-v3/ChangeLog:
>>
>>         PR libstdc++/124463
>>         * include/bits/deque.tcc (__uninitialized_copy_a): Define
>>         overloads for input and output iterators being std::deque
>>         iterators, and for only the output iterator being a std::deque
>>         iterator.
>>         (__uninitialized_move_a): Overload for input and output
>>         iterators being std::deque iterators.
>>         * include/bits/stl_uninitialized.h (__uninitialized_copy_a)
>>         (__uninitialized_move_a): Declare overloads for std::deque
>>         iterators.
>> ---
>>
>> I don't love this solution, but it's a quick fix for the reported
>> regression. For GCC 17 I'd like to add a more general segmented iterator
>> optimization, which all algos could opt in to generically, which
>> wouldn't require distinct overloads for each type of segmented iterator
>> (that's related to PR 123211).
>>
>> Tested x86_64-linux.
>>
>>  libstdc++-v3/include/bits/deque.tcc           | 94 +++++++++++++++++++
>>  libstdc++-v3/include/bits/stl_uninitialized.h | 27 ++++++
>>  2 files changed, 121 insertions(+)
>>
>> diff --git a/libstdc++-v3/include/bits/deque.tcc 
>> b/libstdc++-v3/include/bits/deque.tcc
>> index 7a690466cd58..b35f268b0db1 100644
>> --- a/libstdc++-v3/include/bits/deque.tcc
>> +++ b/libstdc++-v3/include/bits/deque.tcc
>> @@ -1549,6 +1549,100 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
>>        return __last2 != __first2;
>>      }
>>
>> +#if __cplusplus >= 201103L
>> +#pragma GCC diagnostic push
>> +#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr
>> +  template<typename _ITp, typename _IRef, typename _IPtr, typename _OTp,
>> +          typename _Tp>
>> +    _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*>
>> +    __uninitialized_copy_a(
>> +      _GLIBCXX_STD_C::_Deque_iterator<_ITp, _IRef, _IPtr> __first,
>> +      _GLIBCXX_STD_C::_Deque_iterator<_ITp, _IRef, _IPtr> __last,
>> +      _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*> __result,
>> +      allocator<_Tp>&)
>> +    {
>> +      // In order to unwind all initialized elements, we just use the 
>> default
>> +      // implementation if construction can throw.
>> +      if constexpr (!__is_nothrow_constructible(_OTp, _IRef))
>> +       return std::__do_uninit_copy(__first, __last, __result);
>> +      else
>> +       while (__first != __last)
>> +         {
>> +           auto __from = __first._M_cur;
>> +           ptrdiff_t __n;
>> +           if (__first._M_node == __last._M_node)
>> +             __n = __last._M_cur - __from;
>> +           else
>> +             __n = __first._M_last - __from;
>> +           _OTp* __sres = __result._M_cur;
>> +           __n = std::min<ptrdiff_t>(__n, __result._M_last - 
>> __result._M_cur);
>> +           std::uninitialized_copy(__from, __from + __n, __result._M_cur);
>> +           __first += __n;
>> +           __result += __n;
>> +         }
>> +      return __result;
>> +    }
>> +
>> +  template<typename _Iter, typename _OTp, typename _Tp>
>> +    __enable_if_t<__is_random_access_iter<_Iter>::value,
>> +                 _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*>>
>> +    __uninitialized_copy_a(_Iter __first, _Iter __last,
>> +      _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*> __result,
>> +      allocator<_Tp>&)
>> +    {
>> +      // In order to unwind all initialized elements, we just use the 
>> default
>> +      // implementation if construction can throw.
>> +      if constexpr (!__is_nothrow_constructible(_OTp, decltype(*__first)))
>> +       return std::__do_uninit_copy(__first, __last, __result);
>> +      else
>> +       while (__first != __last)
>> +         {
>> +           auto __n = std::min<ptrdiff_t>(__last - __first,
>> +                                          __result._M_last - 
>> __result._M_cur);
>> +           std::uninitialized_copy(__first, __first + __n, __result._M_cur);
>> +           __first += __n;
>> +           __result += __n;
>> +         }
>> +      return __result;
>> +    }
>> +
>> +  template<typename _ITp, typename _IRef, typename _IPtr, typename _OTp,
>> +          typename _Tp>
>> +    _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*>
>> +    __uninitialized_move_a(
>> +      _GLIBCXX_STD_C::_Deque_iterator<_ITp, _IRef, _IPtr> __first,
>> +      _GLIBCXX_STD_C::_Deque_iterator<_ITp, _IRef, _IPtr> __last,
>> +      _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*> __result,
>> +      allocator<_Tp>&)
>> +    {
>> +      // In order to unwind all initialized elements, we just use the 
>> default
>> +      // implementation if construction can throw.
>> +      if constexpr (!__is_nothrow_constructible(_OTp,
>> +                                               
>> decltype(std::move(*__first))))
>> +       return std::uninitialized_copy(std::make_move_iterator(__first),
>> +                                      std::make_move_iterator(__last),
>> +                                      __result);
>> +      else
>> +       while (__first != __last)
>> +         {
>> +           auto __from = __first._M_cur;
>> +           ptrdiff_t __n;
>> +           if (__first._M_node == __last._M_node)
>> +             __n = __last._M_cur - __from;
>> +           else
>> +             __n = __first._M_last - __from;
>> +           __n = std::min<ptrdiff_t>(__n, __result._M_last - 
>> __result._M_cur);
>> +           std::uninitialized_copy(std::make_move_iterator(__from),
>> +                                   std::make_move_iterator(__from + __n),
>> +                                   __result._M_cur);
>> +           __first += __n;
>> +           __result += __n;
>> +         }
>> +      return __result;
>> +    }
>> +#pragma GCC diagnostic pop
>> +#endif // C++11
>> +
>>  _GLIBCXX_END_NAMESPACE_VERSION
>>  } // namespace std
>>
>> diff --git a/libstdc++-v3/include/bits/stl_uninitialized.h 
>> b/libstdc++-v3/include/bits/stl_uninitialized.h
>> index 82e4ba2ff7b7..ecc24023e98f 100644
>> --- a/libstdc++-v3/include/bits/stl_uninitialized.h
>> +++ b/libstdc++-v3/include/bits/stl_uninitialized.h
>> @@ -662,6 +662,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>      }
>>  #endif
>>
>> +#if __cplusplus >= 201103L
>> +  template<typename _ITp, typename _IRef, typename _IPtr, typename _OTp,
>> +          typename _Tp>
>> +    _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*>
>> +    __uninitialized_copy_a(
>> +      _GLIBCXX_STD_C::_Deque_iterator<_ITp, _IRef, _IPtr> __first,
>> +      _GLIBCXX_STD_C::_Deque_iterator<_ITp, _IRef, _IPtr> __last,
>> +      _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*> __result,
>> +      allocator<_Tp>&);
>> +
>> +  template<typename _Iter, typename _OTp, typename _Tp>
>> +    __enable_if_t<__is_random_access_iter<_Iter>::value,
>> +                 _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*>>
>> +    __uninitialized_copy_a(_Iter __first, _Iter __last,
>> +      _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*> __result,
>> +      allocator<_Tp>&);
>> +
>> +  template<typename _ITp, typename _IRef, typename _IPtr, typename _OTp,
>> +          typename _Tp>
>> +    _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*>
>> +    __uninitialized_move_a(
>> +      _GLIBCXX_STD_C::_Deque_iterator<_ITp, _IRef, _IPtr> __first,
>> +      _GLIBCXX_STD_C::_Deque_iterator<_ITp, _IRef, _IPtr> __last,
>> +      _GLIBCXX_STD_C::_Deque_iterator<_OTp, _OTp&, _OTp*> __result,
>> +      allocator<_Tp>&);
>> +#endif
>> +
>>    template<typename _InputIterator, typename _ForwardIterator,
>>            typename _Allocator>
>>      _GLIBCXX20_CONSTEXPR
>> --
>> 2.53.0
>>

Reply via email to