On Wed, Jan 28, 2026 at 11:47 AM Tomasz Kaminski <[email protected]> wrote:
> > > On Sat, Jan 24, 2026 at 6:21 PM Patrick Palka <[email protected]> wrote: > >> Changes in v5: >> - Replace char _ExecutorFrame::_M_char with >> uint8_t _ExecutorFrame::_M_bytes[7] to fully account for >> the implicit struct padding and make it char size/signedness >> agnostic. >> >> Changes in v4: >> - Do not remove the BFS executor. I convinced myself that it's >> worthwhile to keep it around for backwards compatibility and its >> better time complexity (e.g. for PR78276). >> - However, this means the mangling of _Executor is now the same as >> before (since the __dfs_mode template parameter is no longer gone). >> I believe we need to give this class a different mangling so >> programs that mix old/new std::regex impls continue to work. >> >> Changes in v3: >> >> - Add char _M_char member to _ExecutorFrame (without increasing the >> size of struct thanks to padding) >> - Use it to fix bug in restore_subexpr_end frame code whereby we >> weren't restoring _M_cur_results.matched properly >> - Combine restore_subexpr_begin/end frame codes into one >> restore_cur_results frame code >> - Combine restore_rep_count_val/posn frame codes into single >> restore_rep_count frame_code (this makes the patch 3/4 from v2 >> obsolete) >> - Store the frame stack in the data member _ExecutorFrame::_M_frames >> alongside the rest of the executor state instead of using a local. >> Using a local variable is just more code with no real benefit. >> >> -- >8 -- >> >> libstdc++/regex: Convert DFS executor to use iteration [PR86164] >> >> This replaces the recursion and implicit system stack usage of the DFS >> executor with iteration and an explicit heap-based stack. After this >> patch, we no longer stack overflow on the PR86164 testcases since system >> stack usage is no longer linear with respect to input size. This should >> otherwise be a non-functional change. >> > Except the one suggestion below, this LGTM. > >> PR libstdc++/86164 >> >> libstdc++-v3/ChangeLog: >> >> * include/bits/regex_executor.h (__detail::_ExecutorFrame): >> Declare. >> (__detail::_Executor::_M_node): Declare. >> (__detail::_Executor::_M_frames): New data member. >> * include/bits/regex_executor.tcc >> (__detail::_ExecutorFrameOpcode): >> New. >> (__detail::_ExecutorFrame): New. >> (__detail::_Executor::_M_rep_once_more): Replace recursive >> _M_dfs calls with an _S_opcode_next frame push, and any tail work >> after such calls with an appropriate frame push. >> (__detail::_M_handle_repeat): Likewise. >> (__detail::_M_handle_subexpr_begin): Likewise. >> (__detail::_M_handle_subexpr_end): Likewise. >> (__detail::_M_handle_line_begin_assertion): Likewise. >> (__detail::_M_handle_line_end_assertion): Likewise. >> (__detail::_M_handle_word_boundary): Likewise. >> (__detail::_M_handle_subexpr_lookahead): Likewise. >> (__detail::_M_handle_match): Likewise. >> (__detail::_M_handle_backref): Likewise. >> (__detail::_M_handle_accept): Likewise. >> (__detail::_M_handle_alternative): Likewise. >> (__detail::_M_node): Factored out from _M_dfs. >> (__detail::_M_dfs): Push an initial frame to _M_frames that >> visits the starting node and pass this stack each subroutine. >> Pop the latest _ExecutorFrame from _M_frames and handle >> appropriately according to its _ExecutorFrameOpcode. Loop until >> _M_frames is empty. >> * include/std/regex: Include <cstdint>. >> --- >> libstdc++-v3/include/bits/regex_executor.h | 7 + >> libstdc++-v3/include/bits/regex_executor.tcc | 217 +++++++++++++++---- >> libstdc++-v3/include/std/regex | 1 + >> 3 files changed, 180 insertions(+), 45 deletions(-) >> >> diff --git a/libstdc++-v3/include/bits/regex_executor.h >> b/libstdc++-v3/include/bits/regex_executor.h >> index f9857bfc4c1d..e9af007840bb 100644 >> --- a/libstdc++-v3/include/bits/regex_executor.h >> +++ b/libstdc++-v3/include/bits/regex_executor.h >> @@ -41,6 +41,9 @@ namespace __detail >> * @{ >> */ >> >> + template<typename _BiIter, bool _Trivial = >> is_trivially_copyable<_BiIter>::value> >> + struct _ExecutorFrame; >> + >> /** >> * @brief Takes a regex and an input string and does the matching. >> * >> @@ -142,6 +145,9 @@ namespace __detail >> void >> _M_handle_alternative(_Match_mode, _StateIdT); >> >> + void >> + _M_node(_Match_mode, _StateIdT); >> + >> void >> _M_dfs(_Match_mode __match_mode, _StateIdT __start); >> >> @@ -290,6 +296,7 @@ namespace __detail >> }; >> >> public: >> + _GLIBCXX_STD_C::vector<_ExecutorFrame<_BiIter>> _M_frames; >> _ResultsVec >> _M_cur_results; >> _BiIter _M_current; >> _BiIter _M_begin; >> diff --git a/libstdc++-v3/include/bits/regex_executor.tcc >> b/libstdc++-v3/include/bits/regex_executor.tcc >> index 421e585f39d9..bc7731953b0e 100644 >> --- a/libstdc++-v3/include/bits/regex_executor.tcc >> +++ b/libstdc++-v3/include/bits/regex_executor.tcc >> @@ -55,6 +55,71 @@ namespace __detail >> return false; >> } >> >> + enum _ExecutorFrameOpcode : uint8_t >> + { >> + _S_fopcode_next, >> + _S_fopcode_fallback_next, >> + _S_fopcode_rep_once_more, >> + _S_fopcode_fallback_rep_once_more, >> + _S_fopcode_posix_alternative, >> + _S_fopcode_merge_sol, >> + _S_fopcode_restore_current, >> + _S_fopcode_restore_cur_results, >> + _S_fopcode_restore_rep_count, >> + _S_fopcode_decrement_rep_count, >> + }; >> + >> + template<typename _BiIter, bool _Trivial /* = >> is_trivially_copyable<_BiIter>::value */> >> + struct _ExecutorFrame >> + { >> + _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i) >> + : _M_op(__op), _M_state_id(__i) >> + { } >> + >> + _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i, _BiIter >> __p) >> + : _M_op(__op), _M_state_id(__i), _M_pos(__p) >> + { } >> + >> + _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i, long __v) >> + : _M_op(__op), _M_state_id(__i), _M_val(__v) >> + { } >> + >> + _ExecutorFrameOpcode _M_op; >> + uint8_t _M_bytes[7]; >> + _StateIdT _M_state_id; >> + // _M_pos and _M_val are mutually exclusive, which the optimized >> + // partial specialization below depends on. >> + _BiIter _M_pos = _BiIter(); >> + long _M_val = 0; >> + }; >> + >> + // Space-optimized partial specialization for when the input iterator >> is >> + // trivially copyable. >> + template<typename _BiIter> >> + struct _ExecutorFrame<_BiIter, true> >> + { >> + _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i) >> + : _M_op(__op), _M_state_id(__i) >> + { } >> + >> + _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i, _BiIter >> __p) >> + : _M_op(__op), _M_state_id(__i), _M_pos(__p) >> + { } >> + >> + _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i, long __v) >> + : _M_op(__op), _M_state_id(__i), _M_val(__v) >> + { } >> + >> + _ExecutorFrameOpcode _M_op; >> + uint8_t _M_bytes[7]; >> + _StateIdT _M_state_id; >> + union >> + { >> + _BiIter _M_pos; >> + long _M_val; >> + }; >> + }; >> + >> // The _M_main function operates in different modes, DFS mode or BFS >> mode, >> // indicated by template parameter __dfs_mode, and dispatches to one >> of the >> // _M_main_dispatch overloads. >> @@ -181,19 +246,20 @@ namespace __detail >> auto& __rep_count = _M_rep_count[__i]; >> if (__rep_count.second == 0 || __rep_count.first != _M_current) >> { >> - auto __back = __rep_count; >> + _M_frames.emplace_back(_S_fopcode_restore_rep_count, >> + __i, __rep_count.first); >> + _M_frames.back()._M_bytes[0] = __rep_count.second; >> __rep_count.first = _M_current; >> __rep_count.second = 1; >> - _M_dfs(__match_mode, __state._M_alt); >> - __rep_count = __back; >> + _M_frames.emplace_back(_S_fopcode_next, __state._M_alt); >> } >> else >> { >> if (__rep_count.second < 2) >> { >> __rep_count.second++; >> - _M_dfs(__match_mode, __state._M_alt); >> - __rep_count.second--; >> + _M_frames.emplace_back(_S_fopcode_decrement_rep_count, __i); >> + _M_frames.emplace_back(_S_fopcode_next, __state._M_alt); >> } >> } >> } >> @@ -208,23 +274,23 @@ namespace __detail >> _M_handle_repeat(_Match_mode __match_mode, _StateIdT __i) >> { >> const auto& __state = _M_nfa[__i]; >> - >> // Greedy. >> if (!__state._M_neg) >> { >> - _M_rep_once_more(__match_mode, __i); >> - // If it's DFS executor and already accepted, we're done. >> - if (!__dfs_mode || !_M_has_sol) >> - _M_dfs(__match_mode, __state._M_next); >> + if constexpr (__dfs_mode) >> + // If it's DFS executor and already accepted, we're done. >> + _M_frames.emplace_back(_S_fopcode_fallback_next, >> __state._M_next); >> + else >> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next); >> + _M_frames.emplace_back(_S_fopcode_rep_once_more, __i); >> } >> else // Non-greedy mode >> { >> if constexpr (__dfs_mode) >> { >> // vice-versa. >> - _M_dfs(__match_mode, __state._M_next); >> - if (!_M_has_sol) >> - _M_rep_once_more(__match_mode, __i); >> + _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, >> __i); >> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next); >> } >> else >> { >> @@ -233,12 +299,11 @@ namespace __detail >> // be better by attempting its next node. >> if (!_M_has_sol) >> { >> - _M_dfs(__match_mode, __state._M_next); >> // DON'T attempt anything if it's already accepted. An >> // accepted state *must* be better than a solution that >> // matches a non-greedy quantifier one more time. >> - if (!_M_has_sol) >> - _M_rep_once_more(__match_mode, __i); >> + >> _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i); >> + _M_frames.emplace_back(_S_fopcode_next, >> __state._M_next); >> } >> } >> } >> @@ -250,12 +315,12 @@ namespace __detail >> _M_handle_subexpr_begin(_Match_mode __match_mode, _StateIdT __i) >> { >> const auto& __state = _M_nfa[__i]; >> - >> auto& __res = _M_cur_results[__state._M_subexpr]; >> - auto __back = __res.first; >> + _M_frames.emplace_back(_S_fopcode_restore_cur_results, >> + __state._M_subexpr, __res.first); >> + _M_frames.back()._M_bytes[0] = 0xff; >> __res.first = _M_current; >> - _M_dfs(__match_mode, __state._M_next); >> - __res.first = __back; >> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next); >> } >> >> template<typename _BiIter, typename _Alloc, typename _TraitsT, >> @@ -264,13 +329,13 @@ namespace __detail >> _M_handle_subexpr_end(_Match_mode __match_mode, _StateIdT __i) >> { >> const auto& __state = _M_nfa[__i]; >> - >> auto& __res = _M_cur_results[__state._M_subexpr]; >> - auto __back = __res; >> + _M_frames.emplace_back(_S_fopcode_restore_cur_results, >> + __state._M_subexpr, __res.second); >> + _M_frames.back()._M_bytes[0] = __res.matched; >> __res.second = _M_current; >> __res.matched = true; >> - _M_dfs(__match_mode, __state._M_next); >> - __res = __back; >> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next); >> } >> >> template<typename _BiIter, typename _Alloc, typename _TraitsT, >> @@ -280,7 +345,7 @@ namespace __detail >> { >> const auto& __state = _M_nfa[__i]; >> if (_M_at_begin()) >> - _M_dfs(__match_mode, __state._M_next); >> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next); >> } >> >> template<typename _BiIter, typename _Alloc, typename _TraitsT, >> @@ -290,7 +355,7 @@ namespace __detail >> { >> const auto& __state = _M_nfa[__i]; >> if (_M_at_end()) >> - _M_dfs(__match_mode, __state._M_next); >> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next); >> } >> >> template<typename _BiIter, typename _Alloc, typename _TraitsT, >> @@ -300,7 +365,7 @@ namespace __detail >> { >> const auto& __state = _M_nfa[__i]; >> if (_M_word_boundary() == !__state._M_neg) >> - _M_dfs(__match_mode, __state._M_next); >> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next); >> } >> >> // Here __state._M_alt offers a single start node for a sub-NFA. >> @@ -312,7 +377,7 @@ namespace __detail >> { >> const auto& __state = _M_nfa[__i]; >> if (_M_lookahead(__state._M_alt) == !__state._M_neg) >> - _M_dfs(__match_mode, __state._M_next); >> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next); >> } >> >> template<typename _BiIter, typename _Alloc, typename _TraitsT, >> @@ -321,16 +386,15 @@ namespace __detail >> _M_handle_match(_Match_mode __match_mode, _StateIdT __i) >> { >> const auto& __state = _M_nfa[__i]; >> - >> if (_M_current == _M_end) >> return; >> if constexpr (__dfs_mode) >> { >> if (__state._M_matches(*_M_current)) >> { >> + _M_frames.emplace_back(_S_fopcode_restore_current, 0, >> _M_current); >> ++_M_current; >> - _M_dfs(__match_mode, __state._M_next); >> - --_M_current; >> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next); >> } >> } >> else >> @@ -413,13 +477,12 @@ namespace __detail >> { >> if (__last != _M_current) >> { >> - auto __backup = _M_current; >> + _M_frames.emplace_back(_S_fopcode_restore_current, 0, >> _M_current); >> _M_current = __last; >> - _M_dfs(__match_mode, __state._M_next); >> - _M_current = __backup; >> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next); >> } >> else >> - _M_dfs(__match_mode, __state._M_next); >> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next); >> } >> } >> >> @@ -483,31 +546,26 @@ namespace __detail >> _M_handle_alternative(_Match_mode __match_mode, _StateIdT __i) >> { >> const auto& __state = _M_nfa[__i]; >> - >> if (_M_nfa._M_flags & regex_constants::ECMAScript) >> { >> // TODO: Fix BFS support. It is wrong. >> - _M_dfs(__match_mode, __state._M_alt); >> // Pick lhs if it matches. Only try rhs if it doesn't. >> - if (!_M_has_sol) >> - _M_dfs(__match_mode, __state._M_next); >> + _M_frames.emplace_back(_S_fopcode_fallback_next, >> __state._M_next); >> + _M_frames.emplace_back(_S_fopcode_next, __state._M_alt); >> } >> else >> { >> // Try both and compare the result. >> // See "case _S_opcode_accept:" handling above. >> - _M_dfs(__match_mode, __state._M_alt); >> - auto __has_sol = _M_has_sol; >> - _M_has_sol = false; >> - _M_dfs(__match_mode, __state._M_next); >> - _M_has_sol |= __has_sol; >> + _M_frames.emplace_back(_S_fopcode_posix_alternative, >> __state._M_next); >> + _M_frames.emplace_back(_S_fopcode_next, __state._M_alt); >> } >> } >> >> template<typename _BiIter, typename _Alloc, typename _TraitsT, >> bool __dfs_mode> >> void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: >> - _M_dfs(_Match_mode __match_mode, _StateIdT __i) >> + _M_node(_Match_mode __match_mode, _StateIdT __i) >> { >> if (_M_states._M_visited(__i)) >> return; >> @@ -545,6 +603,75 @@ namespace __detail >> } >> } >> >> + template<typename _BiIter, typename _Alloc, typename _TraitsT, >> + bool __dfs_mode> >> + void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: >> + _M_dfs(_Match_mode __match_mode, _StateIdT __start) >> + { >> + _M_frames.emplace_back(_S_fopcode_next, __start); >> + >> + while (!_M_frames.empty()) >> + { >> + _ExecutorFrame<_BiIter> __frame = std::move(_M_frames.back()); >> + _M_frames.pop_back(); >> + >> + switch (__frame._M_op) >> + { >> + case _S_fopcode_next: >> + _M_node(__match_mode, __frame._M_state_id); >> + break; >> + >> + case _S_fopcode_fallback_next: >> + if (!_M_has_sol) >> + _M_frames.emplace_back(_S_fopcode_next, >> __frame._M_state_id); >> > We are inserting frame here, potentially expanding the vector, while I > think > this could be expressed equivalently as: > case _S_fopcode_fallback_next: > if (!_M_has_sol) > _M_node(__match_mode, __frame._M_state_id); > break; > Or to make sure that fallback_next is same as next. > case _S_fopcode_fallback_next: > if (_M_has_sol) > break; > // fallthrough > case _S_fopcode_next: > _M_node(__match_mode, __frame._M_state_id); > break; > WDYT? > >> + break; >> + >> + case _S_fopcode_fallback_rep_once_more: >> + if (!_M_has_sol) >> + _M_frames.emplace_back(_S_fopcode_rep_once_more, >> + __frame._M_state_id); >> > > >> + break; >> + >> + case _S_fopcode_rep_once_more: >> + _M_rep_once_more(__match_mode, __frame._M_state_id); >> + break; >> + >> + case _S_fopcode_posix_alternative: >> + _M_frames.emplace_back(_S_fopcode_merge_sol, 0, _M_has_sol); >> + _M_frames.emplace_back(_S_fopcode_next, >> __frame._M_state_id); >> + _M_has_sol = false; >> + break; >> + >> + case _S_fopcode_merge_sol: >> + _M_has_sol |= __frame._M_val; >> + break; >> + >> + case _S_fopcode_restore_current: >> + _M_current = __frame._M_pos; >> + break; >> + >> + case _S_fopcode_restore_cur_results: >> + if (__frame._M_bytes[0] == 0xff) >> + _M_cur_results[__frame._M_state_id].first = >> __frame._M_pos; >> + else >> + { >> + _M_cur_results[__frame._M_state_id].second = >> __frame._M_pos; >> + _M_cur_results[__frame._M_state_id].matched = >> __frame._M_bytes[0]; >> + } >> + break; >> + >> + case _S_fopcode_restore_rep_count: >> + _M_rep_count[__frame._M_state_id].first = __frame._M_pos; >> + _M_rep_count[__frame._M_state_id].second = >> __frame._M_bytes[0]; >> + break; >> + >> + case _S_fopcode_decrement_rep_count: >> + _M_rep_count[__frame._M_state_id].second--; >> + break; >> + } >> + } >> + } >> + >> // Return whether now is at some word boundary. >> template<typename _BiIter, typename _Alloc, typename _TraitsT, >> bool __dfs_mode> >> diff --git a/libstdc++-v3/include/std/regex >> b/libstdc++-v3/include/std/regex >> index c8fecfb2ad7a..003499c1bab5 100644 >> --- a/libstdc++-v3/include/std/regex >> +++ b/libstdc++-v3/include/std/regex >> @@ -40,6 +40,7 @@ >> #else >> >> #include <bitset> >> +#include <cstdint> >> #include <locale> >> #include <stack> >> #include <stdexcept> >> -- >> 2.53.0.rc1.65.gea24e2c554 >> >>
