On Thu, Jun 12, 2025 at 9:05 PM Patrick Palka <ppa...@redhat.com> wrote:
> On Thu, 12 Jun 2025, Patrick Palka wrote: > > > On Thu, 12 Jun 2025, Jonathan Wakely wrote: > > > > > > > > > > > On Thu, 12 Jun 2025, 16:56 Patrick Palka, <ppa...@redhat.com> wrote: > > > Tested on x86_64-pc-linux-gnu, does this look OK for trunk? > > > > > > I'm sure if introducing a new overload is preferable to > > > introducing a constexpr if branch in the existing overload. > > > > > > > > > I think a constexpr if branch in the existing functions is cheaper. We > need to check the traits either way, but we can avoid doing overload > resolution. > > > > Ack. I opted to just define specialized functors for these helpers > > instead of using lambdas. That way we can easily optimize the case > > where only one of the functions is stateless. > > > > Changes in v2: > > - Use a functor utilizing [[no_unique_address]] instead of a lambda, > > to also concisely handle the case where only one of the functino > > object is stateless. In turn check is_copy_xible instead of > > is_default_xible. > > > > -- >8 -- > > > > When creating a composite comparator/predicate that invokes a given > > projection function, we don't need to capture a stateless function > > object by reference, instead we can capture it by value and use > > [[no_unique_address]] to elide its storage. This makes using > > __make_comp_proj zero-cost in the common case where the comparator or > > projection (or both) are stateless. > > > > libstdc++-v3/ChangeLog: > > > > * include/bits/ranges_algo.h (__detail::_Comp_proj): New. > > (__detail::__make_comp_proj): Use it instead. > > (__detail::_Pred_proj): New. > > (__detail::__make_pred_proj): Use it instead. > > --- > > libstdc++-v3/include/bits/ranges_algo.h | 70 +++++++++++++++++++------ > > 1 file changed, 54 insertions(+), 16 deletions(-) > > > > diff --git a/libstdc++-v3/include/bits/ranges_algo.h > b/libstdc++-v3/include/bits/ranges_algo.h > > index 9f94c6b1d57e..ce48ca047b69 100644 > > --- a/libstdc++-v3/include/bits/ranges_algo.h > > +++ b/libstdc++-v3/include/bits/ranges_algo.h > > @@ -49,27 +49,65 @@ namespace ranges > > namespace __detail > > { > > template<typename _Comp, typename _Proj> > > - constexpr auto > > + struct _Comp_proj > > + { > > + [[no_unique_address]] > > + __conditional_t<is_empty_v<_Comp> && > is_copy_constructible_v<_Comp>, > > + _Comp, _Comp&> _M_comp; > > Comparators, predicates and projections in ranges algorithms are already > constrained to by copyable, so checking is_copy_constructible_v here > is redundant. > > -- >8 -- > > Subject: [PATCH] libstdc++: Optimize __make_comp_proj and __make_pred_proj > > Changes in v3: > - Remove redundant is_copy_constructible check when deciding > whether to capture by reference or by value. > > Changes in v2: > - Use a functor utilizing [[no_unique_address]] instead of a lambda, > to also concisely handle the case where only one of the functino > object is stateless. In turn check is_copy_xible instead of > is_default_xible. > > -- >8 -- > > When creating a composite comparator/predicate that invokes a given > projection function, we don't need to capture a stateless function > object by reference, instead we can capture it by value and use > [[no_unique_address]] to elide its storage. This makes using > __make_comp_proj zero-cost in the common case where the comparator > and projection are both stateless. > > libstdc++-v3/ChangeLog: > > * include/bits/ranges_algo.h (__detail::_Comp_proj): New. > (__detail::__make_comp_proj): Use it instead. > (__detail::_Pred_proj): New. > (__detail::__make_pred_proj): Use it instead. > --- > libstdc++-v3/include/bits/ranges_algo.h | 64 ++++++++++++++++++------- > 1 file changed, 48 insertions(+), 16 deletions(-) > > diff --git a/libstdc++-v3/include/bits/ranges_algo.h > b/libstdc++-v3/include/bits/ranges_algo.h > index cc182d110b30..a7df4c9a13ca 100644 > --- a/libstdc++-v3/include/bits/ranges_algo.h > +++ b/libstdc++-v3/include/bits/ranges_algo.h > @@ -49,27 +49,59 @@ namespace ranges > namespace __detail > { > template<typename _Comp, typename _Proj> > - constexpr auto > + struct _Comp_proj > + { > + [[no_unique_address]] > + __conditional_t<is_empty_v<_Comp>, _Comp, _Comp&> _M_comp; > + [[no_unique_address]] > + __conditional_t<is_empty_v<_Proj>, _Proj, _Proj&> _M_proj; > I would also store function pointers and member pointers (for proj) by value, instead of by reference. So instead of is_empty_v I would check: is_empty_v<_Func> || is_pointer_v<_Func> || is_member_pointer_v<_Func> Or just: __or_v<is_scalar<_Func>, is_empty<_Func>>. > + > + constexpr > + _Comp_proj(_Comp& __comp, _Proj& __proj) > + : _M_comp(__comp), _M_proj(__proj) > + { } > + > + template<typename _Tp, typename _Up> > + constexpr bool > + operator()(_Tp&& __x, _Up&& __y) > + { > + return std::__invoke(_M_comp, > + std::__invoke(_M_proj, > std::forward<_Tp>(__x)), > + std::__invoke(_M_proj, > std::forward<_Up>(__y))); > + } > + }; > + > + template<typename _Comp, typename _Proj> > + constexpr _Comp_proj<_Comp, _Proj> > __make_comp_proj(_Comp& __comp, _Proj& __proj) > + { return {__comp, __proj}; } > + > + template<typename _Pred, typename _Proj> > + struct _Pred_proj > { > - return [&] (auto&& __lhs, auto&& __rhs) -> bool { > - using _TL = decltype(__lhs); > - using _TR = decltype(__rhs); > - return std::__invoke(__comp, > - std::__invoke(__proj, > std::forward<_TL>(__lhs)), > - std::__invoke(__proj, > std::forward<_TR>(__rhs))); > - }; > - } > + [[no_unique_address]] > + __conditional_t<is_empty_v<_Pred>, _Pred, _Pred&> _M_pred; > + [[no_unique_address]] > + __conditional_t<is_empty_v<_Proj>, _Proj, _Proj&> _M_proj; > + > + constexpr > + _Pred_proj(_Pred& __pred, _Proj& __proj) > + : _M_pred(__pred), _M_proj(__proj) > + { } > + > + template<typename _Tp> > + constexpr bool > + operator()(_Tp&& __x) > + { > + return std::__invoke(_M_pred, > + std::__invoke(_M_proj, > std::forward<_Tp>(__x))); > + } > + }; > > template<typename _Pred, typename _Proj> > - constexpr auto > + constexpr _Pred_proj<_Pred, _Proj> > __make_pred_proj(_Pred& __pred, _Proj& __proj) > - { > - return [&] <typename _Tp> (_Tp&& __arg) -> bool { > - return std::__invoke(__pred, > - std::__invoke(__proj, > std::forward<_Tp>(__arg))); > - }; > - } > + { return {__pred, __proj}; } > } // namespace __detail > > struct __all_of_fn > -- > 2.50.0.rc2 >