On Wed, Jan 28, 2026 at 4:54 PM Patrick Palka <[email protected]> wrote:
> On Wed, 28 Jan 2026, Tomasz Kaminski wrote: > > > > > > > On Wed, Jan 28, 2026 at 11:47 AM Tomasz Kaminski <[email protected]> > wrote: > > > > > > On Sat, Jan 24, 2026 at 6:21 PM Patrick Palka <[email protected]> wrote: > > Changes in v5: > > - Replace char _ExecutorFrame::_M_char with > > uint8_t _ExecutorFrame::_M_bytes[7] to fully account for > > the implicit struct padding and make it char size/signedness > > agnostic. > > > > Changes in v4: > > - Do not remove the BFS executor. I convinced myself that it's > > worthwhile to keep it around for backwards compatibility and > its > > better time complexity (e.g. for PR78276). > > - However, this means the mangling of _Executor is now the same > as > > before (since the __dfs_mode template parameter is no longer > gone). > > I believe we need to give this class a different mangling so > > programs that mix old/new std::regex impls continue to work. > > > > Changes in v3: > > > > - Add char _M_char member to _ExecutorFrame (without increasing > the > > size of struct thanks to padding) > > - Use it to fix bug in restore_subexpr_end frame code whereby we > > weren't restoring _M_cur_results.matched properly > > - Combine restore_subexpr_begin/end frame codes into one > > restore_cur_results frame code > > - Combine restore_rep_count_val/posn frame codes into single > > restore_rep_count frame_code (this makes the patch 3/4 from v2 > > obsolete) > > - Store the frame stack in the data member > _ExecutorFrame::_M_frames > > alongside the rest of the executor state instead of using a > local. > > Using a local variable is just more code with no real benefit. > > > > -- >8 -- > > > > libstdc++/regex: Convert DFS executor to use iteration [PR86164] > > > > This replaces the recursion and implicit system stack usage of the > DFS > > executor with iteration and an explicit heap-based stack. After > this > > patch, we no longer stack overflow on the PR86164 testcases since > system > > stack usage is no longer linear with respect to input size. This > should > > otherwise be a non-functional change. > > > > Except the one suggestion below, this LGTM. > > > > PR libstdc++/86164 > > > > libstdc++-v3/ChangeLog: > > > > * include/bits/regex_executor.h > (__detail::_ExecutorFrame): > > Declare. > > (__detail::_Executor::_M_node): Declare. > > (__detail::_Executor::_M_frames): New data member. > > * include/bits/regex_executor.tcc > (__detail::_ExecutorFrameOpcode): > > New. > > (__detail::_ExecutorFrame): New. > > (__detail::_Executor::_M_rep_once_more): Replace > recursive > > _M_dfs calls with an _S_opcode_next frame push, and > any tail work > > after such calls with an appropriate frame push. > > (__detail::_M_handle_repeat): Likewise. > > (__detail::_M_handle_subexpr_begin): Likewise. > > (__detail::_M_handle_subexpr_end): Likewise. > > (__detail::_M_handle_line_begin_assertion): Likewise. > > (__detail::_M_handle_line_end_assertion): Likewise. > > (__detail::_M_handle_word_boundary): Likewise. > > (__detail::_M_handle_subexpr_lookahead): Likewise. > > (__detail::_M_handle_match): Likewise. > > (__detail::_M_handle_backref): Likewise. > > (__detail::_M_handle_accept): Likewise. > > (__detail::_M_handle_alternative): Likewise. > > (__detail::_M_node): Factored out from _M_dfs. > > (__detail::_M_dfs): Push an initial frame to > _M_frames that > > visits the starting node and pass this stack each > subroutine. > > Pop the latest _ExecutorFrame from _M_frames and > handle > > appropriately according to its > _ExecutorFrameOpcode. Loop until > > _M_frames is empty. > > * include/std/regex: Include <cstdint>. > > --- > > libstdc++-v3/include/bits/regex_executor.h | 7 + > > libstdc++-v3/include/bits/regex_executor.tcc | 217 > +++++++++++++++---- > > libstdc++-v3/include/std/regex | 1 + > > 3 files changed, 180 insertions(+), 45 deletions(-) > > > > diff --git a/libstdc++-v3/include/bits/regex_executor.h > b/libstdc++-v3/include/bits/regex_executor.h > > index f9857bfc4c1d..e9af007840bb 100644 > > --- a/libstdc++-v3/include/bits/regex_executor.h > > +++ b/libstdc++-v3/include/bits/regex_executor.h > > @@ -41,6 +41,9 @@ namespace __detail > > * @{ > > */ > > > > + template<typename _BiIter, bool _Trivial = > is_trivially_copyable<_BiIter>::value> > > + struct _ExecutorFrame; > > + > > /** > > * @brief Takes a regex and an input string and does the > matching. > > * > > @@ -142,6 +145,9 @@ namespace __detail > > void > > _M_handle_alternative(_Match_mode, _StateIdT); > > > > + void > > + _M_node(_Match_mode, _StateIdT); > > + > > void > > _M_dfs(_Match_mode __match_mode, _StateIdT __start); > > > > @@ -290,6 +296,7 @@ namespace __detail > > }; > > > > public: > > + _GLIBCXX_STD_C::vector<_ExecutorFrame<_BiIter>> > _M_frames; > > _ResultsVec > _M_cur_results; > > _BiIter > _M_current; > > _BiIter > _M_begin; > > diff --git a/libstdc++-v3/include/bits/regex_executor.tcc > b/libstdc++-v3/include/bits/regex_executor.tcc > > index 421e585f39d9..bc7731953b0e 100644 > > --- a/libstdc++-v3/include/bits/regex_executor.tcc > > +++ b/libstdc++-v3/include/bits/regex_executor.tcc > > @@ -55,6 +55,71 @@ namespace __detail > > return false; > > } > > > > + enum _ExecutorFrameOpcode : uint8_t > > + { > > + _S_fopcode_next, > > + _S_fopcode_fallback_next, > > + _S_fopcode_rep_once_more, > > + _S_fopcode_fallback_rep_once_more, > > + _S_fopcode_posix_alternative, > > + _S_fopcode_merge_sol, > > + _S_fopcode_restore_current, > > + _S_fopcode_restore_cur_results, > > + _S_fopcode_restore_rep_count, > > + _S_fopcode_decrement_rep_count, > > + }; > > + > > + template<typename _BiIter, bool _Trivial /* = > is_trivially_copyable<_BiIter>::value */> > > + struct _ExecutorFrame > > + { > > + _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT > __i) > > + : _M_op(__op), _M_state_id(__i) > > + { } > > + > > + _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT > __i, _BiIter __p) > > + : _M_op(__op), _M_state_id(__i), _M_pos(__p) > > + { } > > + > > + _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT > __i, long __v) > > + : _M_op(__op), _M_state_id(__i), _M_val(__v) > > + { } > > + > > + _ExecutorFrameOpcode _M_op; > > + uint8_t _M_bytes[7]; > > + _StateIdT _M_state_id; > > + // _M_pos and _M_val are mutually exclusive, which > the optimized > > + // partial specialization below depends on. > > + _BiIter _M_pos = _BiIter(); > > + long _M_val = 0; > > + }; > > + > > + // Space-optimized partial specialization for when the > input iterator is > > + // trivially copyable. > > + template<typename _BiIter> > > + struct _ExecutorFrame<_BiIter, true> > > + { > > + _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT > __i) > > + : _M_op(__op), _M_state_id(__i) > > + { } > > + > > + _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT > __i, _BiIter __p) > > + : _M_op(__op), _M_state_id(__i), _M_pos(__p) > > + { } > > + > > + _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT > __i, long __v) > > + : _M_op(__op), _M_state_id(__i), _M_val(__v) > > + { } > > + > > + _ExecutorFrameOpcode _M_op; > > + uint8_t _M_bytes[7]; > > + _StateIdT _M_state_id; > > + union > > + { > > + _BiIter _M_pos; > > + long _M_val; > > + }; > > + }; > > + > > // The _M_main function operates in different modes, DFS > mode or BFS mode, > > // indicated by template parameter __dfs_mode, and > dispatches to one of the > > // _M_main_dispatch overloads. > > @@ -181,19 +246,20 @@ namespace __detail > > auto& __rep_count = _M_rep_count[__i]; > > if (__rep_count.second == 0 || __rep_count.first != > _M_current) > > { > > - auto __back = __rep_count; > > + > _M_frames.emplace_back(_S_fopcode_restore_rep_count, > > + __i, __rep_count.first); > > + _M_frames.back()._M_bytes[0] = __rep_count.second; > > __rep_count.first = _M_current; > > __rep_count.second = 1; > > - _M_dfs(__match_mode, __state._M_alt); > > - __rep_count = __back; > > + _M_frames.emplace_back(_S_fopcode_next, > __state._M_alt); > > } > > else > > { > > if (__rep_count.second < 2) > > { > > __rep_count.second++; > > - _M_dfs(__match_mode, __state._M_alt); > > - __rep_count.second--; > > + > _M_frames.emplace_back(_S_fopcode_decrement_rep_count, __i); > > + _M_frames.emplace_back(_S_fopcode_next, > __state._M_alt); > > } > > } > > } > > @@ -208,23 +274,23 @@ namespace __detail > > _M_handle_repeat(_Match_mode __match_mode, _StateIdT > __i) > > { > > const auto& __state = _M_nfa[__i]; > > - > > // Greedy. > > if (!__state._M_neg) > > { > > - _M_rep_once_more(__match_mode, __i); > > - // If it's DFS executor and already accepted, > we're done. > > - if (!__dfs_mode || !_M_has_sol) > > - _M_dfs(__match_mode, __state._M_next); > > + if constexpr (__dfs_mode) > > + // If it's DFS executor and already accepted, > we're done. > > + _M_frames.emplace_back(_S_fopcode_fallback_next, > __state._M_next); > > + else > > + _M_frames.emplace_back(_S_fopcode_next, > __state._M_next); > > + _M_frames.emplace_back(_S_fopcode_rep_once_more, > __i); > > } > > else // Non-greedy mode > > { > > if constexpr (__dfs_mode) > > { > > // vice-versa. > > - _M_dfs(__match_mode, __state._M_next); > > - if (!_M_has_sol) > > - _M_rep_once_more(__match_mode, __i); > > + > _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i); > > + _M_frames.emplace_back(_S_fopcode_next, > __state._M_next); > > } > > else > > { > > @@ -233,12 +299,11 @@ namespace __detail > > // be better by attempting its next node. > > if (!_M_has_sol) > > { > > - _M_dfs(__match_mode, __state._M_next); > > // DON'T attempt anything if it's already > accepted. An > > // accepted state *must* be better than a > solution that > > // matches a non-greedy quantifier one > more time. > > - if (!_M_has_sol) > > - _M_rep_once_more(__match_mode, __i); > > + > _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i); > > + _M_frames.emplace_back(_S_fopcode_next, > __state._M_next); > > } > > } > > } > > @@ -250,12 +315,12 @@ namespace __detail > > _M_handle_subexpr_begin(_Match_mode __match_mode, > _StateIdT __i) > > { > > const auto& __state = _M_nfa[__i]; > > - > > auto& __res = _M_cur_results[__state._M_subexpr]; > > - auto __back = __res.first; > > + _M_frames.emplace_back(_S_fopcode_restore_cur_results, > > + __state._M_subexpr, > __res.first); > > + _M_frames.back()._M_bytes[0] = 0xff; > > __res.first = _M_current; > > - _M_dfs(__match_mode, __state._M_next); > > - __res.first = __back; > > + _M_frames.emplace_back(_S_fopcode_next, > __state._M_next); > > } > > > > template<typename _BiIter, typename _Alloc, typename > _TraitsT, > > @@ -264,13 +329,13 @@ namespace __detail > > _M_handle_subexpr_end(_Match_mode __match_mode, > _StateIdT __i) > > { > > const auto& __state = _M_nfa[__i]; > > - > > auto& __res = _M_cur_results[__state._M_subexpr]; > > - auto __back = __res; > > + _M_frames.emplace_back(_S_fopcode_restore_cur_results, > > + __state._M_subexpr, > __res.second); > > + _M_frames.back()._M_bytes[0] = __res.matched; > > __res.second = _M_current; > > __res.matched = true; > > - _M_dfs(__match_mode, __state._M_next); > > - __res = __back; > > + _M_frames.emplace_back(_S_fopcode_next, > __state._M_next); > > } > > > > template<typename _BiIter, typename _Alloc, typename > _TraitsT, > > @@ -280,7 +345,7 @@ namespace __detail > > { > > const auto& __state = _M_nfa[__i]; > > if (_M_at_begin()) > > - _M_dfs(__match_mode, __state._M_next); > > + _M_frames.emplace_back(_S_fopcode_next, > __state._M_next); > > } > > > > template<typename _BiIter, typename _Alloc, typename > _TraitsT, > > @@ -290,7 +355,7 @@ namespace __detail > > { > > const auto& __state = _M_nfa[__i]; > > if (_M_at_end()) > > - _M_dfs(__match_mode, __state._M_next); > > + _M_frames.emplace_back(_S_fopcode_next, > __state._M_next); > > } > > > > template<typename _BiIter, typename _Alloc, typename > _TraitsT, > > @@ -300,7 +365,7 @@ namespace __detail > > { > > const auto& __state = _M_nfa[__i]; > > if (_M_word_boundary() == !__state._M_neg) > > - _M_dfs(__match_mode, __state._M_next); > > + _M_frames.emplace_back(_S_fopcode_next, > __state._M_next); > > } > > > > // Here __state._M_alt offers a single start node for a > sub-NFA. > > @@ -312,7 +377,7 @@ namespace __detail > > { > > const auto& __state = _M_nfa[__i]; > > if (_M_lookahead(__state._M_alt) == !__state._M_neg) > > - _M_dfs(__match_mode, __state._M_next); > > + _M_frames.emplace_back(_S_fopcode_next, > __state._M_next); > > } > > > > template<typename _BiIter, typename _Alloc, typename > _TraitsT, > > @@ -321,16 +386,15 @@ namespace __detail > > _M_handle_match(_Match_mode __match_mode, _StateIdT __i) > > { > > const auto& __state = _M_nfa[__i]; > > - > > if (_M_current == _M_end) > > return; > > if constexpr (__dfs_mode) > > { > > if (__state._M_matches(*_M_current)) > > { > > + > _M_frames.emplace_back(_S_fopcode_restore_current, 0, _M_current); > > ++_M_current; > > - _M_dfs(__match_mode, __state._M_next); > > - --_M_current; > > + _M_frames.emplace_back(_S_fopcode_next, > __state._M_next); > > } > > } > > else > > @@ -413,13 +477,12 @@ namespace __detail > > { > > if (__last != _M_current) > > { > > - auto __backup = _M_current; > > + > _M_frames.emplace_back(_S_fopcode_restore_current, 0, _M_current); > > _M_current = __last; > > - _M_dfs(__match_mode, __state._M_next); > > - _M_current = __backup; > > + _M_frames.emplace_back(_S_fopcode_next, > __state._M_next); > > } > > else > > - _M_dfs(__match_mode, __state._M_next); > > + _M_frames.emplace_back(_S_fopcode_next, > __state._M_next); > > } > > } > > > > @@ -483,31 +546,26 @@ namespace __detail > > _M_handle_alternative(_Match_mode __match_mode, > _StateIdT __i) > > { > > const auto& __state = _M_nfa[__i]; > > - > > if (_M_nfa._M_flags & regex_constants::ECMAScript) > > { > > // TODO: Fix BFS support. It is wrong. > > - _M_dfs(__match_mode, __state._M_alt); > > // Pick lhs if it matches. Only try rhs if it > doesn't. > > - if (!_M_has_sol) > > - _M_dfs(__match_mode, __state._M_next); > > + _M_frames.emplace_back(_S_fopcode_fallback_next, > __state._M_next); > > + _M_frames.emplace_back(_S_fopcode_next, > __state._M_alt); > > } > > else > > { > > // Try both and compare the result. > > // See "case _S_opcode_accept:" handling above. > > - _M_dfs(__match_mode, __state._M_alt); > > - auto __has_sol = _M_has_sol; > > - _M_has_sol = false; > > - _M_dfs(__match_mode, __state._M_next); > > - _M_has_sol |= __has_sol; > > + > _M_frames.emplace_back(_S_fopcode_posix_alternative, __state._M_next); > > + _M_frames.emplace_back(_S_fopcode_next, > __state._M_alt); > > } > > } > > > > template<typename _BiIter, typename _Alloc, typename > _TraitsT, > > bool __dfs_mode> > > void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: > > - _M_dfs(_Match_mode __match_mode, _StateIdT __i) > > + _M_node(_Match_mode __match_mode, _StateIdT __i) > > { > > if (_M_states._M_visited(__i)) > > return; > > @@ -545,6 +603,75 @@ namespace __detail > > } > > } > > > > + template<typename _BiIter, typename _Alloc, typename > _TraitsT, > > + bool __dfs_mode> > > + void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: > > + _M_dfs(_Match_mode __match_mode, _StateIdT __start) > > + { > > + _M_frames.emplace_back(_S_fopcode_next, __start); > > + > > + while (!_M_frames.empty()) > > + { > > + _ExecutorFrame<_BiIter> __frame = > std::move(_M_frames.back()); > > + _M_frames.pop_back(); > > + > > + switch (__frame._M_op) > > + { > > + case _S_fopcode_next: > > + _M_node(__match_mode, __frame._M_state_id); > > + break; > > + > > + case _S_fopcode_fallback_next: > > + if (!_M_has_sol) > > + _M_frames.emplace_back(_S_fopcode_next, > __frame._M_state_id); > > > > We are inserting frame here, potentially expanding the vector, > while I think > > this could be expressed equivalently as: > > case _S_fopcode_fallback_next: > > if (!_M_has_sol) > > _M_node(__match_mode, __frame._M_state_id); > > break; > > Yes I think in theory we could do this shortcut wherever there's an > emplace_back call in the tail position (even in the subroutines e.g. > _M_rep_once_more) since we know we're going to immediately pop that > frame in the next iteration of the loop. It does hurt readability a > little bit and makes the implementation less obviously non-recursive. > Maybe it's worthwhile doing it selectively where it makes an observable > performance difference (particularly with -O0). > > Another option is to wrap the _M_frames stack in a class that contains > an additional member holding the top of the stack: > > struct { > std::vector<_ExecutorFrame> _M_frames; > _ExecutorFrame _M_top; > // emplace_back, pop_back, back, empty > }; > > That way consecutive push+pops won't touch the vector at all, I > think? I'll try these ideas out. > As explained below, I will find the implementation of fallback opcodes easier, if they would literally fallback/through in implementation. Using a separate _M_top haven't crossed my mind at that time. > > > Or to make sure that fallback_next is same as next. > > case _S_fopcode_fallback_next: > > if (_M_has_sol) > > break; > > // fallthrough > > case _S_fopcode_next: > > _M_node(__match_mode, __frame._M_state_id); > > break; > > IIUC this isn't true after the 2nd patch which makes > _S_fopcode_fallback_next also restore _M_current. > We can add a restore of _M_current before fallthrough. I personally will find it cleaner, iff _S_fopcode_fallback_X will just do some steps, and fall through for _S_fopcode_X. But this is very subjective, so I am fine either way. > > > WDYT? > > + break; > > + > > + case _S_fopcode_fallback_rep_once_more: > > + if (!_M_has_sol) > > + _M_frames.emplace_back(_S_fopcode_rep_once_more, > > + __frame._M_state_id); > > > > > > + break; > > + > > + case _S_fopcode_rep_once_more: > > + _M_rep_once_more(__match_mode, __frame._M_state_id); > > + break; > > + > > + case _S_fopcode_posix_alternative: > > + _M_frames.emplace_back(_S_fopcode_merge_sol, 0, > _M_has_sol); > > + _M_frames.emplace_back(_S_fopcode_next, > __frame._M_state_id); > > + _M_has_sol = false; > > + break; > > + > > + case _S_fopcode_merge_sol: > > + _M_has_sol |= __frame._M_val; > > + break; > > + > > + case _S_fopcode_restore_current: > > + _M_current = __frame._M_pos; > > + break; > > + > > + case _S_fopcode_restore_cur_results: > > + if (__frame._M_bytes[0] == 0xff) > > + _M_cur_results[__frame._M_state_id].first = > __frame._M_pos; > > + else > > + { > > + _M_cur_results[__frame._M_state_id].second = > __frame._M_pos; > > + _M_cur_results[__frame._M_state_id].matched = > __frame._M_bytes[0]; > > + } > > + break; > > + > > + case _S_fopcode_restore_rep_count: > > + _M_rep_count[__frame._M_state_id].first = > __frame._M_pos; > > + _M_rep_count[__frame._M_state_id].second = > __frame._M_bytes[0]; > > + break; > > + > > + case _S_fopcode_decrement_rep_count: > > + _M_rep_count[__frame._M_state_id].second--; > > + break; > > + } > > + } > > + } > > + > > // Return whether now is at some word boundary. > > template<typename _BiIter, typename _Alloc, typename _TraitsT, > > bool __dfs_mode> > > diff --git a/libstdc++-v3/include/std/regex > b/libstdc++-v3/include/std/regex > > index c8fecfb2ad7a..003499c1bab5 100644 > > --- a/libstdc++-v3/include/std/regex > > +++ b/libstdc++-v3/include/std/regex > > @@ -40,6 +40,7 @@ > > #else > > > > #include <bitset> > > +#include <cstdint> > > #include <locale> > > #include <stack> > > #include <stdexcept> > > -- > > 2.53.0.rc1.65.gea24e2c554 > > > > > >
