On Mon, 9 Feb 2026 at 15:19, Patrick Palka <[email protected]> wrote:
>
> Changes is v2:
>  - Rename __search_mode bool in __regex_algo_impl and _Executor's ctor
>    to __use_dfs and adjust _M_search_mode initialization accordingly.
>
> That _Executor's __dfs_mode flag is a template parameter means
> __regex_algo_impl has to instantiate two different (but largely the
> same) versions of _Executor even though the BFS version is rarely used.
> And in practice it seems the compiler has trouble DCEing the unused
> version of _Executor, which needlessly increases code size and burdens
> the optimizer.
>
> This patch replaces the template parameter with a run-time data member
> _M_search_mode.  We also need to inline the _State_info member functions
> and data members of both partial specializations into _Executor so that
> they depend on the run-time instead of compile-time flag.  This means
> _Executor is two pointers bigger when using DFS mode (due to the unused
> _M_match_queue and _M_visited_states members), but that's negligible
> compared to the other benefits.
>
> The __dfs_mode template parameter is now unused but not yet removed,
> that'll be done in the next patch.
>
> After this patch, both code size and run time for the microbenchmark
>
>     for (int i = 0; i < 10000; i++)
>       regex_match(string(200, 'a'), regex("(a|b|c)*"));
>
> decreases by about 15% each (with -O2).  Compile time and maximum memory
> use decreases by 5-10%.

OK


>
> libstdc++-v3/ChangeLog:
>
>         * include/bits/regex.tcc (__regex_algo_impl): Pass __use_dfs
>         parameter to _Executor's constructor.
>         * include/bits/regex_executor.h (_Executor::_Search_mode): New.
>         (_Executor::_Executor): Add __use_dfs parameter and initialize
>         _M_search_mode.  Adjust after inlining _State_info members into
>         _Executor.
>         (_Executor::~_Executor): Free _M_visted_states.
>         (_Executor::_M_main): Adjust after renaming _M_main_dispatch
>         overloads to _M_main_dfs and _M_main_bfs.
>         (_Executor::_State_info): Remove.
>         (_Executor::_M_visited): Inlined from _State_info.
>         (_Executor::_M_get_sol_pos): Likewise.
>         (_Executor::_M_states): Remove.
>         (_Executor::_M_start): Inlined from _State_info.
>         (_Executor::_M_sol_pos): Likewise.
>         (_Executor::_M_match_queue): Likewise.
>         (_Executor::_M_search_mode): New.
>         * include/bits/regex_executor.tcc (_Executor::_M_main_dispatch):
>         Renamed to...
>         (_Executor::_M_main_dfs, _Executor::_M_main_bfs): ... these.
>         (_Executor::_M_*): Adjust after _M_states removal.
>         (_Executor::_M_lookhead): Also adjust _Executor constructor
>         call.
> ---
>  libstdc++-v3/include/bits/regex.tcc          | 25 ++---
>  libstdc++-v3/include/bits/regex_executor.h   | 96 +++++++-------------
>  libstdc++-v3/include/bits/regex_executor.tcc | 67 +++++++-------
>  3 files changed, 78 insertions(+), 110 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/regex.tcc 
> b/libstdc++-v3/include/bits/regex.tcc
> index 061b60042eca..d16cfd5baf01 100644
> --- a/libstdc++-v3/include/bits/regex.tcc
> +++ b/libstdc++-v3/include/bits/regex.tcc
> @@ -61,26 +61,19 @@ namespace __detail
>        __m._M_resize(__re._M_automaton->_M_sub_count());
>
>        bool __ret;
> +      bool __use_dfs = true;
>        if ((__re.flags() & regex_constants::__polynomial)
>           || (__policy == _RegexExecutorPolicy::_S_alternate
>               && !__re._M_automaton->_M_has_backref))
> -       {
> -         _Executor<_BiIter, _Alloc, _TraitsT, false>
> -           __executor(__s, __e, __res, __re, __flags);
> -         if (__match_mode)
> -           __ret = __executor._M_match();
> -         else
> -           __ret = __executor._M_search();
> -       }
> +       __use_dfs = false;
> +
> +      _Executor<_BiIter, _Alloc, _TraitsT, true>
> +       __executor(__s, __e, __res, __re, __flags, __use_dfs);
> +      if (__match_mode)
> +       __ret = __executor._M_match();
>        else
> -       {
> -         _Executor<_BiIter, _Alloc, _TraitsT, true>
> -           __executor(__s, __e, __res, __re, __flags);
> -         if (__match_mode)
> -           __ret = __executor._M_match();
> -         else
> -           __ret = __executor._M_search();
> -       }
> +       __ret = __executor._M_search();
> +
>        if (__ret)
>         {
>           for (auto& __it : __res)
> diff --git a/libstdc++-v3/include/bits/regex_executor.h 
> b/libstdc++-v3/include/bits/regex_executor.h
> index 94048e1f632c..2638df4d5afb 100644
> --- a/libstdc++-v3/include/bits/regex_executor.h
> +++ b/libstdc++-v3/include/bits/regex_executor.h
> @@ -55,10 +55,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>            bool __dfs_mode>
>      class _Executor
>      {
> -      using __search_mode = integral_constant<bool, __dfs_mode>;
> -      using __dfs = true_type;
> -      using __bfs = false_type;
> -
> +      enum class _Search_mode : unsigned char { _BFS = 0, _DFS = 1 };
>        enum class _Match_mode : unsigned char { _Exact, _Prefix };
>
>      public:
> @@ -74,7 +71,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>                 _BiIter         __end,
>                 _ResultsVec&    __results,
>                 const _RegexT&  __re,
> -               _FlagT          __flags)
> +               _FlagT          __flags,
> +               bool            __use_dfs)
>        : _M_cur_results(__results.get_allocator()),
>         _M_begin(__begin),
>         _M_end(__end),
> @@ -82,14 +80,21 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>         _M_nfa(*__re._M_automaton),
>         _M_results(__results),
>         _M_rep_count(_M_nfa.size()),
> -       _M_states(_M_nfa._M_start(), _M_nfa.size()),
> -       _M_flags(__flags)
> +       _M_start(_M_nfa._M_start()),
> +       _M_visited_states(nullptr),
> +       _M_flags(__flags),
> +       _M_search_mode(__use_dfs ? _Search_mode::_DFS : _Search_mode::_BFS)
>        {
>         using namespace regex_constants;
>         if (__flags & match_prev_avail) // ignore not_bol and not_bow
>           _M_flags &= ~(match_not_bol | match_not_bow);
> +       if (_M_search_mode == _Search_mode::_BFS)
> +         _M_visited_states = new bool[_M_nfa.size()];
>        }
>
> +      ~_Executor()
> +      { delete[] _M_visited_states; }
> +
>        // Set matched when string exactly matches the pattern.
>        bool
>        _M_match()
> @@ -154,13 +159,18 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>
>        bool
>        _M_main(_Match_mode __match_mode)
> -      { return _M_main_dispatch(__match_mode, __search_mode{}); }
> +      {
> +       if (_M_search_mode == _Search_mode::_DFS)
> +         return _M_main_dfs(__match_mode);
> +       else
> +         return _M_main_bfs(__match_mode);
> +      }
>
>        bool
> -      _M_main_dispatch(_Match_mode __match_mode, __dfs);
> +      _M_main_dfs(_Match_mode __match_mode);
>
>        bool
> -      _M_main_dispatch(_Match_mode __match_mode, __bfs);
> +      _M_main_bfs(_Match_mode __match_mode);
>
>        bool
>        _M_is_word(_CharT __ch) const
> @@ -239,62 +249,19 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>         return (_M_re._M_automaton->_M_options() & __m) == __m;
>        }
>
> -       // Holds additional information used in BFS-mode.
> -      template<typename _SearchMode, typename _ResultsVec>
> -       struct _State_info;
> -
> -      template<typename _ResultsVec>
> -       struct _State_info<__bfs, _ResultsVec>
> -       {
> -         explicit
> -         _State_info(_StateIdT __start, size_t __n)
> -         : _M_visited_states(new bool[__n]()), _M_start(__start)
> -         { }
> -
> -         ~_State_info() { delete[] _M_visited_states; }
> -
> -         _State_info(const _State_info&) = delete;
> -         _State_info& operator=(const _State_info&) = delete;
> -
> -         bool _M_visited(_StateIdT __i)
> +      bool
> +      _M_visited(_StateIdT __i)
> +      {
> +       if (_M_visited_states)
>           {
>             if (_M_visited_states[__i])
>               return true;
>             _M_visited_states[__i] = true;
> -           return false;
>           }
> +       return false;
> +      }
>
> -         void _M_queue(_StateIdT __i, const _ResultsVec& __res)
> -         { _M_match_queue.emplace_back(__i, __res); }
> -
> -         // Dummy implementations for BFS mode.
> -         _BiIter* _M_get_sol_pos() { return nullptr; }
> -
> -         // Saves states that need to be considered for the next character.
> -         _GLIBCXX_STD_C::vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
> -         // Indicates which states are already visited.
> -         bool*     _M_visited_states;
> -         // To record current solution.
> -         _StateIdT _M_start;
> -       };
> -
> -      template<typename _ResultsVec>
> -       struct _State_info<__dfs, _ResultsVec>
> -       {
> -         explicit
> -         _State_info(_StateIdT __start, size_t) : _M_start(__start)
> -         { }
> -
> -         // Dummy implementations for DFS mode.
> -         bool _M_visited(_StateIdT) const { return false; }
> -         void _M_queue(_StateIdT, const _ResultsVec&) { }
> -
> -         _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
> -
> -         // To record current solution.
> -         _StateIdT _M_start;
> -         _BiIter   _M_sol_pos;
> -       };
> +      _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
>
>      public:
>        _GLIBCXX_STD_C::vector<_ExecutorFrame<_BiIter>>       _M_frames;
> @@ -306,8 +273,15 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>        const _NFAT&                                          _M_nfa;
>        _ResultsVec&                                          _M_results;
>        _GLIBCXX_STD_C::vector<pair<_BiIter, int>>            _M_rep_count;
> -      _State_info<__search_mode, _ResultsVec>              _M_states;
> +      // To record current solution.
> +      _StateIdT                                             _M_start;
> +      _BiIter                                               _M_sol_pos;
> +      // (BFS only) Saves states that need to be considered for the next 
> character.
> +      _GLIBCXX_STD_C::vector<pair<_StateIdT, _ResultsVec>>  _M_match_queue;
> +      // (BFS only) Indicates which states are already visited.
> +      bool*                                                 
> _M_visited_states;
>        _FlagT                                                _M_flags;
> +      const _Search_mode                                    _M_search_mode;
>        // Do we have a solution so far?
>        bool                                                  _M_has_sol;
>      };
> diff --git a/libstdc++-v3/include/bits/regex_executor.tcc 
> b/libstdc++-v3/include/bits/regex_executor.tcc
> index 19b5ad27df40..0ae7bdf6839a 100644
> --- a/libstdc++-v3/include/bits/regex_executor.tcc
> +++ b/libstdc++-v3/include/bits/regex_executor.tcc
> @@ -138,8 +138,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>      };
>
>    // The _M_main function operates in different modes, DFS mode or BFS mode,
> -  // indicated by template parameter __dfs_mode, and dispatches to one of the
> -  // _M_main_dispatch overloads.
> +  // indicated by _M_search_mode, and dispatches to either _M_main_dfs or
> +  // _M_main_bfs.
>    //
>    // ------------------------------------------------------------
>    //
> @@ -163,12 +163,12 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>    template<typename _BiIter, typename _Alloc, typename _TraitsT,
>            bool __dfs_mode>
>      bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
> -    _M_main_dispatch(_Match_mode __match_mode, __dfs)
> +    _M_main_dfs(_Match_mode __match_mode)
>      {
>        _M_has_sol = false;
> -      *_M_states._M_get_sol_pos() = _BiIter();
> +      *_M_get_sol_pos() = _BiIter();
>        _M_cur_results = _M_results;
> -      _M_dfs(__match_mode, _M_states._M_start);
> +      _M_dfs(__match_mode, _M_start);
>        return _M_has_sol;
>      }
>
> @@ -182,9 +182,9 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>    // It first computes epsilon closure (states that can be achieved without
>    // consuming characters) for every state that's still matching,
>    // using the same DFS algorithm, but doesn't re-enter states (using
> -  // _M_states._M_visited to check), nor follow _S_opcode_match.
> +  // _M_visited to check), nor follow _S_opcode_match.
>    //
> -  // Then apply DFS using every _S_opcode_match (in _M_states._M_match_queue)
> +  // Then apply DFS using every _S_opcode_match (in _M_match_queue)
>    // as the start state.
>    //
>    // It significantly reduces potential duplicate states, so has a better
> @@ -197,17 +197,17 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>    template<typename _BiIter, typename _Alloc, typename _TraitsT,
>            bool __dfs_mode>
>      bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
> -    _M_main_dispatch(_Match_mode __match_mode, __bfs)
> +    _M_main_bfs(_Match_mode __match_mode)
>      {
> -      _M_states._M_queue(_M_states._M_start, _M_results);
> +      _M_match_queue.emplace_back(_M_start, _M_results);
>        bool __ret = false;
>        while (1)
>         {
>           _M_has_sol = false;
> -         if (_M_states._M_match_queue.empty())
> +         if (_M_match_queue.empty())
>             break;
> -         std::fill_n(_M_states._M_visited_states, _M_nfa.size(), false);
> -         auto __old_queue = std::move(_M_states._M_match_queue);
> +         std::fill_n(_M_visited_states, _M_nfa.size(), false);
> +         auto __old_queue = std::move(_M_match_queue);
>           auto __alloc = _M_cur_results.get_allocator();
>           for (auto& __task : __old_queue)
>             {
> @@ -222,7 +222,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>         }
>        if (__match_mode == _Match_mode::_Exact)
>         __ret = _M_has_sol;
> -      _M_states._M_match_queue.clear();
> +      _M_match_queue.clear();
>        return __ret;
>      }
>
> @@ -236,8 +236,9 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>        // We may want to make this faster by not copying,
>        // but let's not be clever prematurely.
>        _ResultsVec __what(_M_cur_results);
> -      _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags);
> -      __sub._M_states._M_start = __next;
> +      _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags,
> +                     bool(_M_search_mode));
> +      __sub._M_start = __next;
>        if (__sub._M_search_from_first())
>         {
>           for (size_t __i = 0; __i < __what.size(); __i++)
> @@ -294,7 +295,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>        // Greedy.
>        if (!__state._M_neg)
>         {
> -         if constexpr (__dfs_mode)
> +         if (_M_search_mode == _Search_mode::_DFS)
>             // If it's DFS executor and already accepted, we're done.
>             _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next,
>                                    _M_current);
> @@ -304,7 +305,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>         }
>        else // Non-greedy mode
>         {
> -         if constexpr (__dfs_mode)
> +         if (_M_search_mode == _Search_mode::_DFS)
>             {
>               // vice-versa.
>               _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i,
> @@ -409,7 +410,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>        const auto& __state = _M_nfa[__i];
>        if (_M_current == _M_end)
>         return;
> -      if constexpr (__dfs_mode)
> +      if (_M_search_mode == _Search_mode::_DFS)
>         {
>           if (__state._M_matches(*_M_current))
>             {
> @@ -419,7 +420,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>         }
>        else
>         if (__state._M_matches(*_M_current))
> -         _M_states._M_queue(__state._M_next, _M_cur_results);
> +         _M_match_queue.emplace_back(__state._M_next, _M_cur_results);
>      }
>
>    template<typename _BiIter, typename _TraitsT>
> @@ -479,7 +480,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>      void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
>      _M_handle_backref(_Match_mode __match_mode, _StateIdT __i)
>      {
> -      static_assert(__dfs_mode, "this should never be instantiated");
> +      __glibcxx_assert(_M_search_mode == _Search_mode::_DFS);
>
>        const auto& __state = _M_nfa[__i];
>        auto& __submatch = _M_cur_results[__state._M_backref_index];
> @@ -505,7 +506,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>      void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
>      _M_handle_accept(_Match_mode __match_mode, _StateIdT)
>      {
> -      if constexpr (__dfs_mode)
> +      if (_M_search_mode == _Search_mode::_DFS)
>         {
>           __glibcxx_assert(!_M_has_sol);
>           if (__match_mode == _Match_mode::_Exact)
> @@ -521,7 +522,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>                 _M_results = _M_cur_results;
>               else // POSIX
>                 {
> -                 __glibcxx_assert(_M_states._M_get_sol_pos());
> +                 __glibcxx_assert(_M_get_sol_pos());
>                   // Here's POSIX's logic: match the longest one. However
>                   // we never know which one (lhs or rhs of "|") is longer
>                   // unless we try both of them and compare the results.
> @@ -529,12 +530,11 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>                   // position of the last successful match. It's better
>                   // to be larger, because POSIX regex is always greedy.
>                   // TODO: This could be slow.
> -                 if (*_M_states._M_get_sol_pos() == _BiIter()
> -                     || std::distance(_M_begin,
> -                                      *_M_states._M_get_sol_pos())
> +                 if (*_M_get_sol_pos() == _BiIter()
> +                     || std::distance(_M_begin, *_M_get_sol_pos())
>                          < std::distance(_M_begin, _M_current))
>                     {
> -                     *_M_states._M_get_sol_pos() = _M_current;
> +                     *_M_get_sol_pos() = _M_current;
>                       _M_results = _M_cur_results;
>                     }
>                 }
> @@ -586,7 +586,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>      inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
>      _M_node(_Match_mode __match_mode, _StateIdT __i)
>      {
> -      if (_M_states._M_visited(__i))
> +      if (_M_visited(__i))
>         return;
>
>        switch (_M_nfa[__i]._M_opcode())
> @@ -608,7 +608,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>         case _S_opcode_match:
>           _M_handle_match(__match_mode, __i); break;
>         case _S_opcode_backref:
> -         if constexpr (__dfs_mode)
> +         if (_M_search_mode == _Search_mode::_DFS)
>             _M_handle_backref(__match_mode, __i);
>           else
>             __builtin_unreachable();
> @@ -623,10 +623,11 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>      }
>
>    template<typename _BiIter, typename _Alloc, typename _TraitsT,
> -          bool __dfs_mode>
> -    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
> +          bool __dummy>
> +    void _Executor<_BiIter, _Alloc, _TraitsT, __dummy>::
>      _M_dfs(_Match_mode __match_mode, _StateIdT __start)
>      {
> +      const bool __dfs_mode = (_M_search_mode == _Search_mode::_DFS);
>        _M_frames.emplace_back(_S_fopcode_next, __start);
>
>        while (!_M_frames.empty())
> @@ -639,7 +640,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>             case _S_fopcode_fallback_next:
>               if (_M_has_sol)
>                 break;
> -             if constexpr (__dfs_mode)
> +             if (__dfs_mode)
>                 _M_current = __frame._M_pos;
>               [[__fallthrough__]];
>             case _S_fopcode_next:
> @@ -649,7 +650,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>             case _S_fopcode_fallback_rep_once_more:
>               if (_M_has_sol)
>                 break;
> -             if constexpr (__dfs_mode)
> +             if (__dfs_mode)
>                 _M_current = __frame._M_pos;
>               [[__fallthrough__]];
>             case _S_fopcode_rep_once_more:
> @@ -659,7 +660,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>             case _S_fopcode_posix_alternative:
>               _M_frames.emplace_back(_S_fopcode_merge_sol, 0, _M_has_sol);
>               _M_frames.emplace_back(_S_fopcode_next, __frame._M_state_id);
> -             if constexpr (__dfs_mode)
> +             if (__dfs_mode)
>                 _M_current = __frame._M_pos;
>               _M_has_sol = false;
>               break;
> --
> 2.53.0.40.g3e0db84c88
>

Reply via email to