On Wed, Jan 28, 2026 at 11:47 AM Tomasz Kaminski <[email protected]>
wrote:

>
>
> On Sat, Jan 24, 2026 at 6:21 PM Patrick Palka <[email protected]> wrote:
>
>> Changes in v5:
>>   - Replace char _ExecutorFrame::_M_char with
>>     uint8_t _ExecutorFrame::_M_bytes[7] to fully account for
>>     the implicit struct padding and make it char size/signedness
>>     agnostic.
>>
>> Changes in v4:
>>   - Do not remove the BFS executor.  I convinced myself that it's
>>     worthwhile to keep it around for backwards compatibility and its
>>     better time complexity (e.g. for PR78276).
>>   - However, this means the mangling of _Executor is now the same as
>>     before (since the __dfs_mode template parameter is no longer gone).
>>     I believe we need to give this class a different mangling so
>>     programs that mix old/new std::regex impls continue to work.
>>
>> Changes in v3:
>>
>>   - Add char _M_char member to _ExecutorFrame (without increasing the
>>     size of struct thanks to padding)
>>   - Use it to fix bug in restore_subexpr_end frame code whereby we
>>     weren't restoring _M_cur_results.matched properly
>>   - Combine restore_subexpr_begin/end frame codes into one
>>     restore_cur_results frame code
>>   - Combine restore_rep_count_val/posn frame codes into single
>>     restore_rep_count frame_code (this makes the patch 3/4 from v2
>>     obsolete)
>>   - Store the frame stack in the data member _ExecutorFrame::_M_frames
>>     alongside the rest of the executor state instead of using a local.
>>     Using a local variable is just more code with no real benefit.
>>
>> -- >8 --
>>
>> libstdc++/regex: Convert DFS executor to use iteration [PR86164]
>>
>> This replaces the recursion and implicit system stack usage of the DFS
>> executor with iteration and an explicit heap-based stack.  After this
>> patch, we no longer stack overflow on the PR86164 testcases since system
>> stack usage is no longer linear with respect to input size.  This should
>> otherwise be a non-functional change.
>>
> Except the one suggestion below, this LGTM.

>
>>         PR libstdc++/86164
>>
>> libstdc++-v3/ChangeLog:
>>
>>         * include/bits/regex_executor.h (__detail::_ExecutorFrame):
>>         Declare.
>>         (__detail::_Executor::_M_node): Declare.
>>         (__detail::_Executor::_M_frames): New data member.
>>         * include/bits/regex_executor.tcc
>> (__detail::_ExecutorFrameOpcode):
>>         New.
>>         (__detail::_ExecutorFrame): New.
>>         (__detail::_Executor::_M_rep_once_more): Replace recursive
>>         _M_dfs calls with an _S_opcode_next frame push, and any tail work
>>         after such calls with an appropriate frame push.
>>         (__detail::_M_handle_repeat): Likewise.
>>         (__detail::_M_handle_subexpr_begin): Likewise.
>>         (__detail::_M_handle_subexpr_end): Likewise.
>>         (__detail::_M_handle_line_begin_assertion): Likewise.
>>         (__detail::_M_handle_line_end_assertion): Likewise.
>>         (__detail::_M_handle_word_boundary): Likewise.
>>         (__detail::_M_handle_subexpr_lookahead): Likewise.
>>         (__detail::_M_handle_match): Likewise.
>>         (__detail::_M_handle_backref): Likewise.
>>         (__detail::_M_handle_accept): Likewise.
>>         (__detail::_M_handle_alternative): Likewise.
>>         (__detail::_M_node): Factored out from _M_dfs.
>>         (__detail::_M_dfs): Push an initial frame to _M_frames that
>>         visits the starting node and pass this stack each subroutine.
>>         Pop the latest _ExecutorFrame from _M_frames and handle
>>         appropriately according to its _ExecutorFrameOpcode.  Loop until
>>         _M_frames is empty.
>>         * include/std/regex: Include <cstdint>.
>> ---
>>  libstdc++-v3/include/bits/regex_executor.h   |   7 +
>>  libstdc++-v3/include/bits/regex_executor.tcc | 217 +++++++++++++++----
>>  libstdc++-v3/include/std/regex               |   1 +
>>  3 files changed, 180 insertions(+), 45 deletions(-)
>>
>> diff --git a/libstdc++-v3/include/bits/regex_executor.h
>> b/libstdc++-v3/include/bits/regex_executor.h
>> index f9857bfc4c1d..e9af007840bb 100644
>> --- a/libstdc++-v3/include/bits/regex_executor.h
>> +++ b/libstdc++-v3/include/bits/regex_executor.h
>> @@ -41,6 +41,9 @@ namespace __detail
>>     * @{
>>     */
>>
>> +  template<typename _BiIter, bool _Trivial =
>> is_trivially_copyable<_BiIter>::value>
>> +    struct _ExecutorFrame;
>> +
>>    /**
>>     * @brief Takes a regex and an input string and does the matching.
>>     *
>> @@ -142,6 +145,9 @@ namespace __detail
>>        void
>>        _M_handle_alternative(_Match_mode, _StateIdT);
>>
>> +      void
>> +      _M_node(_Match_mode, _StateIdT);
>> +
>>        void
>>        _M_dfs(_Match_mode __match_mode, _StateIdT __start);
>>
>> @@ -290,6 +296,7 @@ namespace __detail
>>         };
>>
>>      public:
>> +      _GLIBCXX_STD_C::vector<_ExecutorFrame<_BiIter>>       _M_frames;
>>        _ResultsVec
>>  _M_cur_results;
>>        _BiIter                                               _M_current;
>>        _BiIter                                               _M_begin;
>> diff --git a/libstdc++-v3/include/bits/regex_executor.tcc
>> b/libstdc++-v3/include/bits/regex_executor.tcc
>> index 421e585f39d9..bc7731953b0e 100644
>> --- a/libstdc++-v3/include/bits/regex_executor.tcc
>> +++ b/libstdc++-v3/include/bits/regex_executor.tcc
>> @@ -55,6 +55,71 @@ namespace __detail
>>        return false;
>>      }
>>
>> +  enum _ExecutorFrameOpcode : uint8_t
>> +  {
>> +    _S_fopcode_next,
>> +    _S_fopcode_fallback_next,
>> +    _S_fopcode_rep_once_more,
>> +    _S_fopcode_fallback_rep_once_more,
>> +    _S_fopcode_posix_alternative,
>> +    _S_fopcode_merge_sol,
>> +    _S_fopcode_restore_current,
>> +    _S_fopcode_restore_cur_results,
>> +    _S_fopcode_restore_rep_count,
>> +    _S_fopcode_decrement_rep_count,
>> +  };
>> +
>> +  template<typename _BiIter, bool _Trivial /* =
>> is_trivially_copyable<_BiIter>::value */>
>> +    struct _ExecutorFrame
>> +    {
>> +      _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i)
>> +      : _M_op(__op), _M_state_id(__i)
>> +      { }
>> +
>> +      _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i, _BiIter
>> __p)
>> +      : _M_op(__op), _M_state_id(__i), _M_pos(__p)
>> +      { }
>> +
>> +      _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i, long __v)
>> +      : _M_op(__op), _M_state_id(__i), _M_val(__v)
>> +      { }
>> +
>> +      _ExecutorFrameOpcode _M_op;
>> +      uint8_t _M_bytes[7];
>> +      _StateIdT _M_state_id;
>> +      // _M_pos and _M_val are mutually exclusive, which the optimized
>> +      // partial specialization below depends on.
>> +      _BiIter _M_pos = _BiIter();
>> +      long _M_val = 0;
>> +    };
>> +
>> +  // Space-optimized partial specialization for when the input iterator
>> is
>> +  // trivially copyable.
>> +  template<typename _BiIter>
>> +    struct _ExecutorFrame<_BiIter, true>
>> +    {
>> +      _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i)
>> +      : _M_op(__op), _M_state_id(__i)
>> +      { }
>> +
>> +      _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i, _BiIter
>> __p)
>> +      : _M_op(__op), _M_state_id(__i), _M_pos(__p)
>> +      { }
>> +
>> +      _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i, long __v)
>> +      : _M_op(__op), _M_state_id(__i), _M_val(__v)
>> +      { }
>> +
>> +      _ExecutorFrameOpcode _M_op;
>> +      uint8_t _M_bytes[7];
>> +      _StateIdT _M_state_id;
>> +      union
>> +      {
>> +       _BiIter _M_pos;
>> +       long _M_val;
>> +      };
>> +    };
>> +
>>    // The _M_main function operates in different modes, DFS mode or BFS
>> mode,
>>    // indicated by template parameter __dfs_mode, and dispatches to one
>> of the
>>    // _M_main_dispatch overloads.
>> @@ -181,19 +246,20 @@ namespace __detail
>>        auto& __rep_count = _M_rep_count[__i];
>>        if (__rep_count.second == 0 || __rep_count.first != _M_current)
>>         {
>> -         auto __back = __rep_count;
>> +         _M_frames.emplace_back(_S_fopcode_restore_rep_count,
>> +                                __i, __rep_count.first);
>> +         _M_frames.back()._M_bytes[0] = __rep_count.second;
>>           __rep_count.first = _M_current;
>>           __rep_count.second = 1;
>> -         _M_dfs(__match_mode, __state._M_alt);
>> -         __rep_count = __back;
>> +         _M_frames.emplace_back(_S_fopcode_next, __state._M_alt);
>>         }
>>        else
>>         {
>>           if (__rep_count.second < 2)
>>             {
>>               __rep_count.second++;
>> -             _M_dfs(__match_mode, __state._M_alt);
>> -             __rep_count.second--;
>> +             _M_frames.emplace_back(_S_fopcode_decrement_rep_count, __i);
>> +             _M_frames.emplace_back(_S_fopcode_next, __state._M_alt);
>>             }
>>         }
>>      }
>> @@ -208,23 +274,23 @@ namespace __detail
>>      _M_handle_repeat(_Match_mode __match_mode, _StateIdT __i)
>>      {
>>        const auto& __state = _M_nfa[__i];
>> -
>>        // Greedy.
>>        if (!__state._M_neg)
>>         {
>> -         _M_rep_once_more(__match_mode, __i);
>> -         // If it's DFS executor and already accepted, we're done.
>> -         if (!__dfs_mode || !_M_has_sol)
>> -           _M_dfs(__match_mode, __state._M_next);
>> +         if constexpr (__dfs_mode)
>> +           // If it's DFS executor and already accepted, we're done.
>> +           _M_frames.emplace_back(_S_fopcode_fallback_next,
>> __state._M_next);
>> +         else
>> +           _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
>> +         _M_frames.emplace_back(_S_fopcode_rep_once_more, __i);
>>         }
>>        else // Non-greedy mode
>>         {
>>           if constexpr (__dfs_mode)
>>             {
>>               // vice-versa.
>> -             _M_dfs(__match_mode, __state._M_next);
>> -             if (!_M_has_sol)
>> -               _M_rep_once_more(__match_mode, __i);
>> +             _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more,
>> __i);
>> +             _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
>>             }
>>           else
>>             {
>> @@ -233,12 +299,11 @@ namespace __detail
>>               // be better by attempting its next node.
>>               if (!_M_has_sol)
>>                 {
>> -                 _M_dfs(__match_mode, __state._M_next);
>>                   // DON'T attempt anything if it's already accepted. An
>>                   // accepted state *must* be better than a solution that
>>                   // matches a non-greedy quantifier one more time.
>> -                 if (!_M_has_sol)
>> -                   _M_rep_once_more(__match_mode, __i);
>> +
>>  _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i);
>> +                 _M_frames.emplace_back(_S_fopcode_next,
>> __state._M_next);
>>                 }
>>             }
>>         }
>> @@ -250,12 +315,12 @@ namespace __detail
>>      _M_handle_subexpr_begin(_Match_mode __match_mode, _StateIdT __i)
>>      {
>>        const auto& __state = _M_nfa[__i];
>> -
>>        auto& __res = _M_cur_results[__state._M_subexpr];
>> -      auto __back = __res.first;
>> +      _M_frames.emplace_back(_S_fopcode_restore_cur_results,
>> +                            __state._M_subexpr, __res.first);
>> +      _M_frames.back()._M_bytes[0] = 0xff;
>>        __res.first = _M_current;
>> -      _M_dfs(__match_mode, __state._M_next);
>> -      __res.first = __back;
>> +      _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
>>      }
>>
>>    template<typename _BiIter, typename _Alloc, typename _TraitsT,
>> @@ -264,13 +329,13 @@ namespace __detail
>>      _M_handle_subexpr_end(_Match_mode __match_mode, _StateIdT __i)
>>      {
>>        const auto& __state = _M_nfa[__i];
>> -
>>        auto& __res = _M_cur_results[__state._M_subexpr];
>> -      auto __back = __res;
>> +      _M_frames.emplace_back(_S_fopcode_restore_cur_results,
>> +                            __state._M_subexpr, __res.second);
>> +      _M_frames.back()._M_bytes[0] = __res.matched;
>>        __res.second = _M_current;
>>        __res.matched = true;
>> -      _M_dfs(__match_mode, __state._M_next);
>> -      __res = __back;
>> +      _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
>>      }
>>
>>    template<typename _BiIter, typename _Alloc, typename _TraitsT,
>> @@ -280,7 +345,7 @@ namespace __detail
>>      {
>>        const auto& __state = _M_nfa[__i];
>>        if (_M_at_begin())
>> -       _M_dfs(__match_mode, __state._M_next);
>> +       _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
>>      }
>>
>>    template<typename _BiIter, typename _Alloc, typename _TraitsT,
>> @@ -290,7 +355,7 @@ namespace __detail
>>      {
>>        const auto& __state = _M_nfa[__i];
>>        if (_M_at_end())
>> -       _M_dfs(__match_mode, __state._M_next);
>> +       _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
>>      }
>>
>>    template<typename _BiIter, typename _Alloc, typename _TraitsT,
>> @@ -300,7 +365,7 @@ namespace __detail
>>      {
>>        const auto& __state = _M_nfa[__i];
>>        if (_M_word_boundary() == !__state._M_neg)
>> -       _M_dfs(__match_mode, __state._M_next);
>> +       _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
>>      }
>>
>>    // Here __state._M_alt offers a single start node for a sub-NFA.
>> @@ -312,7 +377,7 @@ namespace __detail
>>      {
>>        const auto& __state = _M_nfa[__i];
>>        if (_M_lookahead(__state._M_alt) == !__state._M_neg)
>> -       _M_dfs(__match_mode, __state._M_next);
>> +       _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
>>      }
>>
>>    template<typename _BiIter, typename _Alloc, typename _TraitsT,
>> @@ -321,16 +386,15 @@ namespace __detail
>>      _M_handle_match(_Match_mode __match_mode, _StateIdT __i)
>>      {
>>        const auto& __state = _M_nfa[__i];
>> -
>>        if (_M_current == _M_end)
>>         return;
>>        if constexpr (__dfs_mode)
>>         {
>>           if (__state._M_matches(*_M_current))
>>             {
>> +             _M_frames.emplace_back(_S_fopcode_restore_current, 0,
>> _M_current);
>>               ++_M_current;
>> -             _M_dfs(__match_mode, __state._M_next);
>> -             --_M_current;
>> +             _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
>>             }
>>         }
>>        else
>> @@ -413,13 +477,12 @@ namespace __detail
>>         {
>>           if (__last != _M_current)
>>             {
>> -             auto __backup = _M_current;
>> +             _M_frames.emplace_back(_S_fopcode_restore_current, 0,
>> _M_current);
>>               _M_current = __last;
>> -             _M_dfs(__match_mode, __state._M_next);
>> -             _M_current = __backup;
>> +             _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
>>             }
>>           else
>> -           _M_dfs(__match_mode, __state._M_next);
>> +           _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
>>         }
>>      }
>>
>> @@ -483,31 +546,26 @@ namespace __detail
>>      _M_handle_alternative(_Match_mode __match_mode, _StateIdT __i)
>>      {
>>        const auto& __state = _M_nfa[__i];
>> -
>>        if (_M_nfa._M_flags & regex_constants::ECMAScript)
>>         {
>>           // TODO: Fix BFS support. It is wrong.
>> -         _M_dfs(__match_mode, __state._M_alt);
>>           // Pick lhs if it matches. Only try rhs if it doesn't.
>> -         if (!_M_has_sol)
>> -           _M_dfs(__match_mode, __state._M_next);
>> +         _M_frames.emplace_back(_S_fopcode_fallback_next,
>> __state._M_next);
>> +         _M_frames.emplace_back(_S_fopcode_next, __state._M_alt);
>>         }
>>        else
>>         {
>>           // Try both and compare the result.
>>           // See "case _S_opcode_accept:" handling above.
>> -         _M_dfs(__match_mode, __state._M_alt);
>> -         auto __has_sol = _M_has_sol;
>> -         _M_has_sol = false;
>> -         _M_dfs(__match_mode, __state._M_next);
>> -         _M_has_sol |= __has_sol;
>> +         _M_frames.emplace_back(_S_fopcode_posix_alternative,
>> __state._M_next);
>> +         _M_frames.emplace_back(_S_fopcode_next, __state._M_alt);
>>         }
>>      }
>>
>>    template<typename _BiIter, typename _Alloc, typename _TraitsT,
>>            bool __dfs_mode>
>>      void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
>> -    _M_dfs(_Match_mode __match_mode, _StateIdT __i)
>> +    _M_node(_Match_mode __match_mode, _StateIdT __i)
>>      {
>>        if (_M_states._M_visited(__i))
>>         return;
>> @@ -545,6 +603,75 @@ namespace __detail
>>         }
>>      }
>>
>> +  template<typename _BiIter, typename _Alloc, typename _TraitsT,
>> +          bool __dfs_mode>
>> +    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
>> +    _M_dfs(_Match_mode __match_mode, _StateIdT __start)
>> +    {
>> +      _M_frames.emplace_back(_S_fopcode_next, __start);
>> +
>> +      while (!_M_frames.empty())
>> +       {
>> +         _ExecutorFrame<_BiIter> __frame = std::move(_M_frames.back());
>> +         _M_frames.pop_back();
>> +
>> +         switch (__frame._M_op)
>> +           {
>> +           case _S_fopcode_next:
>> +             _M_node(__match_mode, __frame._M_state_id);
>> +             break;
>> +
>> +           case _S_fopcode_fallback_next:
>> +             if (!_M_has_sol)
>> +               _M_frames.emplace_back(_S_fopcode_next,
>> __frame._M_state_id);
>>
> We are inserting frame here, potentially expanding the vector, while I
> think
> this could be expressed equivalently as:
>    case _S_fopcode_fallback_next:
>       if (!_M_has_sol)
>         _M_node(__match_mode, __frame._M_state_id);
>     break;
> Or to make sure that fallback_next is same as next.
>    case _S_fopcode_fallback_next:
>        if (_M_has_sol)
>          break;
>        // fallthrough
>     case _S_fopcode_next:
>          _M_node(__match_mode, __frame._M_state_id);
>          break;
>  WDYT?
>
>> +             break;
>> +
>> +           case _S_fopcode_fallback_rep_once_more:
>> +             if (!_M_has_sol)
>> +               _M_frames.emplace_back(_S_fopcode_rep_once_more,
>> +                                      __frame._M_state_id);
>>
>
>
>> +             break;
>> +
>> +           case _S_fopcode_rep_once_more:
>> +             _M_rep_once_more(__match_mode, __frame._M_state_id);
>> +             break;
>> +
>> +           case _S_fopcode_posix_alternative:
>> +             _M_frames.emplace_back(_S_fopcode_merge_sol, 0, _M_has_sol);
>> +             _M_frames.emplace_back(_S_fopcode_next,
>> __frame._M_state_id);
>> +             _M_has_sol = false;
>> +             break;
>> +
>> +           case _S_fopcode_merge_sol:
>> +             _M_has_sol |= __frame._M_val;
>> +             break;
>> +
>> +           case _S_fopcode_restore_current:
>> +             _M_current = __frame._M_pos;
>> +             break;
>> +
>> +           case _S_fopcode_restore_cur_results:
>> +             if (__frame._M_bytes[0] == 0xff)
>> +               _M_cur_results[__frame._M_state_id].first =
>> __frame._M_pos;
>> +             else
>> +               {
>> +                 _M_cur_results[__frame._M_state_id].second =
>> __frame._M_pos;
>> +                 _M_cur_results[__frame._M_state_id].matched =
>> __frame._M_bytes[0];
>> +               }
>> +             break;
>> +
>> +           case _S_fopcode_restore_rep_count:
>> +             _M_rep_count[__frame._M_state_id].first = __frame._M_pos;
>> +             _M_rep_count[__frame._M_state_id].second =
>> __frame._M_bytes[0];
>> +             break;
>> +
>> +           case _S_fopcode_decrement_rep_count:
>> +             _M_rep_count[__frame._M_state_id].second--;
>> +             break;
>> +           }
>> +       }
>> +    }
>> +
>>    // Return whether now is at some word boundary.
>>    template<typename _BiIter, typename _Alloc, typename _TraitsT,
>>            bool __dfs_mode>
>> diff --git a/libstdc++-v3/include/std/regex
>> b/libstdc++-v3/include/std/regex
>> index c8fecfb2ad7a..003499c1bab5 100644
>> --- a/libstdc++-v3/include/std/regex
>> +++ b/libstdc++-v3/include/std/regex
>> @@ -40,6 +40,7 @@
>>  #else
>>
>>  #include <bitset>
>> +#include <cstdint>
>>  #include <locale>
>>  #include <stack>
>>  #include <stdexcept>
>> --
>> 2.53.0.rc1.65.gea24e2c554
>>
>>

Reply via email to