On Fri, 30 Jan 2026 at 15:28, Patrick Palka <[email protected]> wrote:
>
> Changes in v8:
> - Rebased
>
> Changes in v7:
> - Rebased
>
> Changes in v6:
> - Rebased
>
> Changes in v5:
> - Restrict _M_current restoration to DFS mode, it's unnecessary
> (and a harmless no-op) in BFS mode
>
> Changes in v4:
> - Rebased
>
> Changes in v3:
> - Rebased against latest version of previous patch
>
> -- >8 --
>
> The actual incrementing of the current input string position is done by
> _M_handle_match which also makes sure to restore it afterwards, via a
> restore_current frame. But restoring _M_current is naturally only
> necessary when backtracking is involved, not after every single match.
>
> So this patch moves the responsibility of saving/restoring _M_current
> from _M_handle_match to the branching nodes _M_handle_alternative and
> _M_handle_repeat. This is done by storing _M_current within the
> fallback_next, fallback_rep_once_more and posix_alternative frames.
> And we can get rid of the restore_current frame.
>
> This reduces the maximum size of the _M_frames stack by 15% for
>
> regex_match(string(200000, 'a'), "(a|b|c)*")
OK, thanks
> PR libstdc++/86164
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/regex_executor.tcc (__detail::_ExecutorFrameOpcode):
> Remove _S_fopcode_restore_current.
> (__detail::_Executor::_M_handle_repeat): Pass _M_current when
> pushing a fallback_next or fallback_rep_once_more frame.
> (__detail::_Executor::_M_handle_match): Don't push a
> restore_current frame.
> (__detail::_Executor::_M_handle_backref): Likewise and simplify
> accordingly.
> (__detail::_Executor::_M_handle_alternative): Pass _M_current when
> pushing a fallback_next or posix_alternative frame.
> (__detail::_Executor::_M_dfs) <case _S_fopcode_fallback_next>:
> Restore _M_current.
> <case _S_fopcode_fallback_rep_once_more>: Likewise.
> <case _S_fopcode_posix_alternative>: Likewise.
> <case _S_fopcode_restore_current>: Remove.
> ---
> libstdc++-v3/include/bits/regex_executor.tcc | 34 +++++++++-----------
> 1 file changed, 16 insertions(+), 18 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/regex_executor.tcc
> b/libstdc++-v3/include/bits/regex_executor.tcc
> index 36c7e05dcdfb..5d8ce24d47f2 100644
> --- a/libstdc++-v3/include/bits/regex_executor.tcc
> +++ b/libstdc++-v3/include/bits/regex_executor.tcc
> @@ -64,7 +64,6 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> _S_fopcode_fallback_rep_once_more,
> _S_fopcode_posix_alternative,
> _S_fopcode_merge_sol,
> - _S_fopcode_restore_current,
> _S_fopcode_restore_cur_results,
> _S_fopcode_restore_rep_count,
> _S_fopcode_decrement_rep_count,
> @@ -294,7 +293,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> {
> if constexpr (__dfs_mode)
> // If it's DFS executor and already accepted, we're done.
> - _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next);
> + _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next,
> + _M_current);
> else
> _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
> _M_frames.emplace_back(_S_fopcode_rep_once_more, __i);
> @@ -304,7 +304,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> if constexpr (__dfs_mode)
> {
> // vice-versa.
> - _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i);
> + _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i,
> + _M_current);
> _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
> }
> else
> @@ -410,7 +411,6 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> {
> if (__state._M_matches(*_M_current))
> {
> - _M_frames.emplace_back(_S_fopcode_restore_current, 0,
> _M_current);
> ++_M_current;
> _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
> }
> @@ -493,14 +493,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> _M_re._M_automaton->_M_traits)._M_apply(
> __submatch.first, __submatch.second, _M_current, __last))
> {
> - if (__last != _M_current)
> - {
> - _M_frames.emplace_back(_S_fopcode_restore_current, 0,
> _M_current);
> - _M_current = __last;
> - _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
> - }
> - else
> - _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
> + _M_current = __last;
> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
> }
> }
>
> @@ -568,14 +562,16 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> {
> // TODO: Fix BFS support. It is wrong.
> // Pick lhs if it matches. Only try rhs if it doesn't.
> - _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next);
> + _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next,
> + _M_current);
> _M_frames.emplace_back(_S_fopcode_next, __state._M_alt);
> }
> else
> {
> // Try both and compare the result.
> // See "case _S_opcode_accept:" handling above.
> - _M_frames.emplace_back(_S_fopcode_posix_alternative,
> __state._M_next);
> + _M_frames.emplace_back(_S_fopcode_posix_alternative,
> __state._M_next,
> + _M_current);
> _M_frames.emplace_back(_S_fopcode_next, __state._M_alt);
> }
> }
> @@ -638,6 +634,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> case _S_fopcode_fallback_next:
> if (_M_has_sol)
> break;
> + if constexpr (__dfs_mode)
> + _M_current = __frame._M_pos;
> [[__fallthrough__]];
> case _S_fopcode_next:
> _M_node(__match_mode, __frame._M_state_id);
> @@ -646,6 +644,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> case _S_fopcode_fallback_rep_once_more:
> if (_M_has_sol)
> break;
> + if constexpr (__dfs_mode)
> + _M_current = __frame._M_pos;
> [[__fallthrough__]];
> case _S_fopcode_rep_once_more:
> _M_rep_once_more(__match_mode, __frame._M_state_id);
> @@ -654,6 +654,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> case _S_fopcode_posix_alternative:
> _M_frames.emplace_back(_S_fopcode_merge_sol, 0, _M_has_sol);
> _M_frames.emplace_back(_S_fopcode_next, __frame._M_state_id);
> + if constexpr (__dfs_mode)
> + _M_current = __frame._M_pos;
> _M_has_sol = false;
> break;
>
> @@ -661,10 +663,6 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> _M_has_sol |= __frame._M_val;
> break;
>
> - case _S_fopcode_restore_current:
> - _M_current = __frame._M_pos;
> - break;
> -
> case _S_fopcode_restore_cur_results:
> if (!__frame._M_end)
> _M_cur_results[__frame._M_state_id].first = __frame._M_pos;
> --
> 2.53.0.rc1.65.gea24e2c554
>