On Mon, 9 Feb 2026, Tomasz Kaminski wrote:

> 
> 
> On Fri, Feb 6, 2026 at 6:09 PM Patrick Palka <[email protected]> wrote:
>       That _Executor's __dfs_mode flag is a template parameter means
>       __regex_algo_impl has to instantiate two different (but largely the
>       same) versions of _Executor even though the BFS version is rarely used.
>       And in practice it seems the compiler has trouble DCEing the unused
>       version of _Executor, which needlessly increases code size and burdens
>       the optimizer.
> 
>       This patch replaces the template parameter with a run-time data member
>       _M_search_mode.  We also need to inline the _State_info member functions
>       and data members of both partial specializations into _Executor so that
>       they depend on the run-time instead of compile-time flag.  This means
>       _Executor is two pointers bigger when using DFS mode (due to the unused
>       _M_match_queue and _M_visited_states members), but that's negligible
>       compared to the other benefits.
> 
>       The __dfs_mode template parameter is now unused but not yet removed,
>       that'll be done in the next patch.
> 
>       After this patch, both code size and run time for the microbenchmark
> 
>           for (int i = 0; i < 10000; i++)
>             regex_match(string(200, 'a'), regex("(a|b|c)*"));
> 
>       decreases by about 15% each (with -O2).  Compile time and maximum memory
>       use decreases by 5-10%.
> 
>       libstdc++-v3/ChangeLog:
> 
>               * include/bits/regex.tcc (__regex_algo_impl): Pass __search_mode
>               parameter indicating whether to use DFS or BFS to _Executor's
>               constructor.
>               * include/bits/regex_executor.h (_Executor::_Search_mode): New.
>               (_Executor::_Executor): Add __search_mode parameter and
>               initialize _M_search_mode.  Adjust after inlining _State_info
>               members into _Executor.
>               (_Executor::~_Executor): Free _M_visted_states.
>               (_Executor::_M_main): Adjust after renaming _M_main_dispatch
>               overloads to _M_main_dfs and _M_main_bfs.
>               (_Executor::_State_info): Remove.
>               (_Executor::_M_visited): Inlined from _State_info.
>               (_Executor::_M_get_sol_pos): Likewise.
>               (_Executor::_M_states): Remove.
>               (_Executor::_M_start): Inlined from _State_info.
>               (_Executor::_M_sol_pos): Likewise.
>               (_Executor::_M_match_queue): Likewise.
>               (_Executor::_M_search_mode): New.
>               * include/bits/regex_executor.tcc (_Executor::_M_main_dispatch):
>               Renamed to...
>               (_Executor::_M_main_dfs, _Executor::_M_main_bfs): ... these.
>               (_Executor::_M_*): Adjust after _M_states removal.
>               (_Executor::_M_lookhead): Also adjust _Executor constructor
>               call.
>       ---
> 
> Only small suggestions regarding  the naming.
>        libstdc++-v3/include/bits/regex.tcc          | 25 ++---
>        libstdc++-v3/include/bits/regex_executor.h   | 97 ++++++++------------
>        libstdc++-v3/include/bits/regex_executor.tcc | 67 +++++++-------
>        3 files changed, 79 insertions(+), 110 deletions(-)
> 
>       diff --git a/libstdc++-v3/include/bits/regex.tcc 
> b/libstdc++-v3/include/bits/regex.tcc
>       index 6ec259c25019..6ffe1beca08a 100644
>       --- a/libstdc++-v3/include/bits/regex.tcc
>       +++ b/libstdc++-v3/include/bits/regex.tcc
>       @@ -61,26 +61,19 @@ namespace __detail
>              __m._M_resize(__re._M_automaton->_M_sub_count());
> 
>              bool __ret;
>       +      bool __search_mode = true; // by default, use DFS
> 
> I would name this bool __dfs_mode. 

Sure

>              if ((__re.flags() & regex_constants::__polynomial)
>                 || (__policy == _RegexExecutorPolicy::_S_alternate
>                     && !__re._M_automaton->_M_has_backref))
>       -       {
>       -         _Executor<_BiIter, _Alloc, _TraitsT, false>
>       -           __executor(__s, __e, __res, __re, __flags);
>       -         if (__match_mode)
>       -           __ret = __executor._M_match();
>       -         else
>       -           __ret = __executor._M_search();
>       -       }
>       +       __search_mode = false; // use BFS when requested
>       +
>       +      _Executor<_BiIter, _Alloc, _TraitsT, true>
>       +       __executor(__s, __e, __res, __re, __flags, __search_mode);
>       +      if (__match_mode)
>       +       __ret = __executor._M_match();
>              else
>       -       {
>       -         _Executor<_BiIter, _Alloc, _TraitsT, true>
>       -           __executor(__s, __e, __res, __re, __flags);
>       -         if (__match_mode)
>       -           __ret = __executor._M_match();
>       -         else
>       -           __ret = __executor._M_search();
>       -       }
>       +       __ret = __executor._M_search();
>       +
>              if (__ret)
>               {
>                 for (auto& __it : __res)
>       diff --git a/libstdc++-v3/include/bits/regex_executor.h 
> b/libstdc++-v3/include/bits/regex_executor.h
>       index 94048e1f632c..550fa751c002 100644
>       --- a/libstdc++-v3/include/bits/regex_executor.h
>       +++ b/libstdc++-v3/include/bits/regex_executor.h
>       @@ -55,10 +55,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>                  bool __dfs_mode>
>            class _Executor
>            {
>       -      using __search_mode = integral_constant<bool, __dfs_mode>;
>       -      using __dfs = true_type;
>       -      using __bfs = false_type;
>       -
>       +      enum class _Search_mode : unsigned char { _BFS = 0, _DFS = 1 };
>              enum class _Match_mode : unsigned char { _Exact, _Prefix };
> 
>            public:
>       @@ -74,7 +71,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>                       _BiIter         __end,
>                       _ResultsVec&    __results,
>                       const _RegexT&  __re,
>       -               _FlagT          __flags)
>       +               _FlagT          __flags,
>       +               bool            __search_mode)
> 
> Same for the name of the parameter. 

Done

>              : _M_cur_results(__results.get_allocator()),
>               _M_begin(__begin),
>               _M_end(__end),
>       @@ -82,14 +80,22 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>               _M_nfa(*__re._M_automaton),
>               _M_results(__results),
>               _M_rep_count(_M_nfa.size()),
>       -       _M_states(_M_nfa._M_start(), _M_nfa.size()),
>       -       _M_flags(__flags)
>       +       _M_start(_M_nfa._M_start()),
>       +       _M_flags(__flags),
>       +       _M_search_mode(_Search_mode(__search_mode))
> 
> I think __dfs_mode ?  _Search_mode::_DFS : _Search_mode::_BFS would be 
> cleaner.

Done

> 
>              {
>               using namespace regex_constants;
>               if (__flags & match_prev_avail) // ignore not_bol and not_bow
>                 _M_flags &= ~(match_not_bol | match_not_bow);
>       +       if (_M_search_mode == _Search_mode::_BFS)
>       +         _M_visited_states = new bool[_M_nfa.size()];
>       +       else
>       +         _M_visited_states = nullptr;
>              }
> 
>       +      ~_Executor()
>       +      { delete[] _M_visited_states; }
>       +
>              // Set matched when string exactly matches the pattern.
>              bool
>              _M_match()
>       @@ -154,13 +160,18 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> 
>              bool
>              _M_main(_Match_mode __match_mode)
>       -      { return _M_main_dispatch(__match_mode, __search_mode{}); }
>       +      {
>       +       if (_M_search_mode == _Search_mode::_DFS)
>       +         return _M_main_dfs(__match_mode);
>       +       else
>       +         return _M_main_bfs(__match_mode);
>       +      }
> 
>              bool
>       -      _M_main_dispatch(_Match_mode __match_mode, __dfs);
>       +      _M_main_dfs(_Match_mode __match_mode);
> 
>              bool
>       -      _M_main_dispatch(_Match_mode __match_mode, __bfs);
>       +      _M_main_bfs(_Match_mode __match_mode);
> 
>              bool
>              _M_is_word(_CharT __ch) const
>       @@ -239,62 +250,19 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>               return (_M_re._M_automaton->_M_options() & __m) == __m;
>              }
> 
>       -       // Holds additional information used in BFS-mode.
>       -      template<typename _SearchMode, typename _ResultsVec>
>       -       struct _State_info;
>       -
>       -      template<typename _ResultsVec>
>       -       struct _State_info<__bfs, _ResultsVec>
>       -       {
>       -         explicit
>       -         _State_info(_StateIdT __start, size_t __n)
>       -         : _M_visited_states(new bool[__n]()), _M_start(__start)
>       -         { }
>       -
>       -         ~_State_info() { delete[] _M_visited_states; }
>       -
>       -         _State_info(const _State_info&) = delete;
>       -         _State_info& operator=(const _State_info&) = delete;
>       -
>       -         bool _M_visited(_StateIdT __i)
>       +      bool
>       +      _M_visited(_StateIdT __i)
>       +      {
>       +       if (_M_visited_states)
>                 {
>                   if (_M_visited_states[__i])
>                     return true;
>                   _M_visited_states[__i] = true;
>       -           return false;
>                 }
>       +       return false;
>       +      }
> 
>       -         void _M_queue(_StateIdT __i, const _ResultsVec& __res)
>       -         { _M_match_queue.emplace_back(__i, __res); }
>       -
>       -         // Dummy implementations for BFS mode.
>       -         _BiIter* _M_get_sol_pos() { return nullptr; }
>       -
>       -         // Saves states that need to be considered for the next 
> character.
>       -         _GLIBCXX_STD_C::vector<pair<_StateIdT, _ResultsVec>> 
> _M_match_queue;
>       -         // Indicates which states are already visited.
>       -         bool*     _M_visited_states;
>       -         // To record current solution.
>       -         _StateIdT _M_start;
>       -       };
>       -
>       -      template<typename _ResultsVec>
>       -       struct _State_info<__dfs, _ResultsVec>
>       -       {
>       -         explicit
>       -         _State_info(_StateIdT __start, size_t) : _M_start(__start)
>       -         { }
>       -
>       -         // Dummy implementations for DFS mode.
>       -         bool _M_visited(_StateIdT) const { return false; }
>       -         void _M_queue(_StateIdT, const _ResultsVec&) { }
>       -
>       -         _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
>       -
>       -         // To record current solution.
>       -         _StateIdT _M_start;
>       -         _BiIter   _M_sol_pos;
>       -       };
>       +      _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
> 
>            public:
>              _GLIBCXX_STD_C::vector<_ExecutorFrame<_BiIter>>       _M_frames;
>       @@ -306,8 +274,15 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>              const _NFAT&                                          _M_nfa;
>              _ResultsVec&                                          _M_results;
>              _GLIBCXX_STD_C::vector<pair<_BiIter, int>>            
> _M_rep_count;
>       -      _State_info<__search_mode, _ResultsVec>              _M_states;
>       +      // To record current solution.
>       +      _StateIdT                                             _M_start;
>       +      _BiIter                                               _M_sol_pos;
>       +      // Saves states that need to be considered for the next 
> character. (BFS mode)
>       +      _GLIBCXX_STD_C::vector<pair<_StateIdT, _ResultsVec>>  
> _M_match_queue;
>       +      // Indicates which states are already visited. (BFS mode)
>       +      bool*                                                 
> _M_visited_states;
>              _FlagT                                                _M_flags;
>       +      const _Search_mode                                    
> _M_search_mode;
>              // Do we have a solution so far?
>              bool                                                  _M_has_sol;
>            };
>       diff --git a/libstdc++-v3/include/bits/regex_executor.tcc 
> b/libstdc++-v3/include/bits/regex_executor.tcc
>       index 19b5ad27df40..0ae7bdf6839a 100644
>       --- a/libstdc++-v3/include/bits/regex_executor.tcc
>       +++ b/libstdc++-v3/include/bits/regex_executor.tcc
>       @@ -138,8 +138,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>            };
> 
>          // The _M_main function operates in different modes, DFS mode or BFS 
> mode,
>       -  // indicated by template parameter __dfs_mode, and dispatches to one 
> of the
>       -  // _M_main_dispatch overloads.
>       +  // indicated by _M_search_mode, and dispatches to either _M_main_dfs 
> or
>       +  // _M_main_bfs.
>          //
>          // ------------------------------------------------------------
>          //
>       @@ -163,12 +163,12 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>          template<typename _BiIter, typename _Alloc, typename _TraitsT,
>                  bool __dfs_mode>
>            bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
>       -    _M_main_dispatch(_Match_mode __match_mode, __dfs)
>       +    _M_main_dfs(_Match_mode __match_mode)
>            {
>              _M_has_sol = false;
>       -      *_M_states._M_get_sol_pos() = _BiIter();
>       +      *_M_get_sol_pos() = _BiIter();
>              _M_cur_results = _M_results;
>       -      _M_dfs(__match_mode, _M_states._M_start);
>       +      _M_dfs(__match_mode, _M_start);
>              return _M_has_sol;
>            }
> 
>       @@ -182,9 +182,9 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>          // It first computes epsilon closure (states that can be achieved 
> without
>          // consuming characters) for every state that's still matching,
>          // using the same DFS algorithm, but doesn't re-enter states (using
>       -  // _M_states._M_visited to check), nor follow _S_opcode_match.
>       +  // _M_visited to check), nor follow _S_opcode_match.
>          //
>       -  // Then apply DFS using every _S_opcode_match (in 
> _M_states._M_match_queue)
>       +  // Then apply DFS using every _S_opcode_match (in _M_match_queue)
>          // as the start state.
>          //
>          // It significantly reduces potential duplicate states, so has a 
> better
>       @@ -197,17 +197,17 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>          template<typename _BiIter, typename _Alloc, typename _TraitsT,
>                  bool __dfs_mode>
>            bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
>       -    _M_main_dispatch(_Match_mode __match_mode, __bfs)
>       +    _M_main_bfs(_Match_mode __match_mode)
>            {
>       -      _M_states._M_queue(_M_states._M_start, _M_results);
>       +      _M_match_queue.emplace_back(_M_start, _M_results);
>              bool __ret = false;
>              while (1)
>               {
>                 _M_has_sol = false;
>       -         if (_M_states._M_match_queue.empty())
>       +         if (_M_match_queue.empty())
>                   break;
>       -         std::fill_n(_M_states._M_visited_states, _M_nfa.size(), 
> false);
>       -         auto __old_queue = std::move(_M_states._M_match_queue);
>       +         std::fill_n(_M_visited_states, _M_nfa.size(), false);
>       +         auto __old_queue = std::move(_M_match_queue);
>                 auto __alloc = _M_cur_results.get_allocator();
>                 for (auto& __task : __old_queue)
>                   {
>       @@ -222,7 +222,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>               }
>              if (__match_mode == _Match_mode::_Exact)
>               __ret = _M_has_sol;
>       -      _M_states._M_match_queue.clear();
>       +      _M_match_queue.clear();
>              return __ret;
>            }
> 
>       @@ -236,8 +236,9 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>              // We may want to make this faster by not copying,
>              // but let's not be clever prematurely.
>              _ResultsVec __what(_M_cur_results);
>       -      _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags);
>       -      __sub._M_states._M_start = __next;
>       +      _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags,
>       +                     bool(_M_search_mode));
>       +      __sub._M_start = __next;
>              if (__sub._M_search_from_first())
>               {
>                 for (size_t __i = 0; __i < __what.size(); __i++)
>       @@ -294,7 +295,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>              // Greedy.
>              if (!__state._M_neg)
>               {
>       -         if constexpr (__dfs_mode)
>       +         if (_M_search_mode == _Search_mode::_DFS)
>                   // If it's DFS executor and already accepted, we're done.
>                   _M_frames.emplace_back(_S_fopcode_fallback_next, 
> __state._M_next,
>                                          _M_current);
>       @@ -304,7 +305,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>               }
>              else // Non-greedy mode
>               {
>       -         if constexpr (__dfs_mode)
>       +         if (_M_search_mode == _Search_mode::_DFS)
>                   {
>                     // vice-versa.
>                     _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, 
> __i,
>       @@ -409,7 +410,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>              const auto& __state = _M_nfa[__i];
>              if (_M_current == _M_end)
>               return;
>       -      if constexpr (__dfs_mode)
>       +      if (_M_search_mode == _Search_mode::_DFS)
>               {
>                 if (__state._M_matches(*_M_current))
>                   {
>       @@ -419,7 +420,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>               }
>              else
>               if (__state._M_matches(*_M_current))
>       -         _M_states._M_queue(__state._M_next, _M_cur_results);
>       +         _M_match_queue.emplace_back(__state._M_next, _M_cur_results);
>            }
> 
>          template<typename _BiIter, typename _TraitsT>
>       @@ -479,7 +480,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>            void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
>            _M_handle_backref(_Match_mode __match_mode, _StateIdT __i)
>            {
>       -      static_assert(__dfs_mode, "this should never be instantiated");
>       +      __glibcxx_assert(_M_search_mode == _Search_mode::_DFS);
> 
>              const auto& __state = _M_nfa[__i];
>              auto& __submatch = _M_cur_results[__state._M_backref_index];
>       @@ -505,7 +506,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>            void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
>            _M_handle_accept(_Match_mode __match_mode, _StateIdT)
>            {
>       -      if constexpr (__dfs_mode)
>       +      if (_M_search_mode == _Search_mode::_DFS)
>               {
>                 __glibcxx_assert(!_M_has_sol);
>                 if (__match_mode == _Match_mode::_Exact)
>       @@ -521,7 +522,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>                       _M_results = _M_cur_results;
>                     else // POSIX
>                       {
>       -                 __glibcxx_assert(_M_states._M_get_sol_pos());
>       +                 __glibcxx_assert(_M_get_sol_pos());
>                         // Here's POSIX's logic: match the longest one. 
> However
>                         // we never know which one (lhs or rhs of "|") is 
> longer
>                         // unless we try both of them and compare the results.
>       @@ -529,12 +530,11 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>                         // position of the last successful match. It's better
>                         // to be larger, because POSIX regex is always greedy.
>                         // TODO: This could be slow.
>       -                 if (*_M_states._M_get_sol_pos() == _BiIter()
>       -                     || std::distance(_M_begin,
>       -                                      *_M_states._M_get_sol_pos())
>       +                 if (*_M_get_sol_pos() == _BiIter()
>       +                     || std::distance(_M_begin, *_M_get_sol_pos())
>                                < std::distance(_M_begin, _M_current))
>                           {
>       -                     *_M_states._M_get_sol_pos() = _M_current;
>       +                     *_M_get_sol_pos() = _M_current;
>                             _M_results = _M_cur_results;
>                           }
>                       }
>       @@ -586,7 +586,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>            inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
>            _M_node(_Match_mode __match_mode, _StateIdT __i)
>            {
>       -      if (_M_states._M_visited(__i))
>       +      if (_M_visited(__i))
>               return;
> 
>              switch (_M_nfa[__i]._M_opcode())
>       @@ -608,7 +608,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>               case _S_opcode_match:
>                 _M_handle_match(__match_mode, __i); break;
>               case _S_opcode_backref:
>       -         if constexpr (__dfs_mode)
>       +         if (_M_search_mode == _Search_mode::_DFS)
>                   _M_handle_backref(__match_mode, __i);
>                 else
>                   __builtin_unreachable();
>       @@ -623,10 +623,11 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>            }
> 
>          template<typename _BiIter, typename _Alloc, typename _TraitsT,
>       -          bool __dfs_mode>
>       -    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
>       +          bool __dummy>
>       +    void _Executor<_BiIter, _Alloc, _TraitsT, __dummy>::
>            _M_dfs(_Match_mode __match_mode, _StateIdT __start)
>            {
>       +      const bool __dfs_mode = (_M_search_mode == _Search_mode::_DFS);
>              _M_frames.emplace_back(_S_fopcode_next, __start);
> 
>              while (!_M_frames.empty())
>       @@ -639,7 +640,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>                   case _S_fopcode_fallback_next:
>                     if (_M_has_sol)
>                       break;
>       -             if constexpr (__dfs_mode)
>       +             if (__dfs_mode)
>                       _M_current = __frame._M_pos;
>                     [[__fallthrough__]];
>                   case _S_fopcode_next:
>       @@ -649,7 +650,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>                   case _S_fopcode_fallback_rep_once_more:
>                     if (_M_has_sol)
>                       break;
>       -             if constexpr (__dfs_mode)
>       +             if (__dfs_mode)
> 
> As this is now runtime, do we need it? Can we always restore _M_current in 
> this
> cases?

I think it should be harmless in BFS mode since only _M_main_bfs
modifies _M_current before proceeding with DFS (without consuming
further input), so the _M_current restorations are a no-op.  But I
prefer keeping it conditional for sake of preserving the property
that in BFS mode only _M_main_bfs modifies _M_current.  The extra
branch should have negligible overhead in DFS mode.

>                       _M_current = __frame._M_pos;
>                     [[__fallthrough__]];
>                   case _S_fopcode_rep_once_more:
>       @@ -659,7 +660,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>                   case _S_fopcode_posix_alternative:
>                     _M_frames.emplace_back(_S_fopcode_merge_sol, 0, 
> _M_has_sol);
>                     _M_frames.emplace_back(_S_fopcode_next, 
> __frame._M_state_id);
>       -             if constexpr (__dfs_mode)
>       +             if (__dfs_mode)
>                       _M_current = __frame._M_pos;
>                     _M_has_sol = false;
>                     break;
>       --
>       2.53.0.rc1.65.gea24e2c554
> 
> 
> 

Reply via email to