On Fri, Feb 6, 2026 at 6:09 PM Patrick Palka <[email protected]> wrote:
> That _Executor's __dfs_mode flag is a template parameter means
> __regex_algo_impl has to instantiate two different (but largely the
> same) versions of _Executor even though the BFS version is rarely used.
> And in practice it seems the compiler has trouble DCEing the unused
> version of _Executor, which needlessly increases code size and burdens
> the optimizer.
>
> This patch replaces the template parameter with a run-time data member
> _M_search_mode. We also need to inline the _State_info member functions
> and data members of both partial specializations into _Executor so that
> they depend on the run-time instead of compile-time flag. This means
> _Executor is two pointers bigger when using DFS mode (due to the unused
> _M_match_queue and _M_visited_states members), but that's negligible
> compared to the other benefits.
>
> The __dfs_mode template parameter is now unused but not yet removed,
> that'll be done in the next patch.
>
> After this patch, both code size and run time for the microbenchmark
>
> for (int i = 0; i < 10000; i++)
> regex_match(string(200, 'a'), regex("(a|b|c)*"));
>
> decreases by about 15% each (with -O2). Compile time and maximum memory
> use decreases by 5-10%.
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/regex.tcc (__regex_algo_impl): Pass __search_mode
> parameter indicating whether to use DFS or BFS to _Executor's
> constructor.
> * include/bits/regex_executor.h (_Executor::_Search_mode): New.
> (_Executor::_Executor): Add __search_mode parameter and
> initialize _M_search_mode. Adjust after inlining _State_info
> members into _Executor.
> (_Executor::~_Executor): Free _M_visted_states.
> (_Executor::_M_main): Adjust after renaming _M_main_dispatch
> overloads to _M_main_dfs and _M_main_bfs.
> (_Executor::_State_info): Remove.
> (_Executor::_M_visited): Inlined from _State_info.
> (_Executor::_M_get_sol_pos): Likewise.
> (_Executor::_M_states): Remove.
> (_Executor::_M_start): Inlined from _State_info.
> (_Executor::_M_sol_pos): Likewise.
> (_Executor::_M_match_queue): Likewise.
> (_Executor::_M_search_mode): New.
> * include/bits/regex_executor.tcc (_Executor::_M_main_dispatch):
> Renamed to...
> (_Executor::_M_main_dfs, _Executor::_M_main_bfs): ... these.
> (_Executor::_M_*): Adjust after _M_states removal.
> (_Executor::_M_lookhead): Also adjust _Executor constructor
> call.
> ---
>
Only small suggestions regarding the naming.
> libstdc++-v3/include/bits/regex.tcc | 25 ++---
> libstdc++-v3/include/bits/regex_executor.h | 97 ++++++++------------
> libstdc++-v3/include/bits/regex_executor.tcc | 67 +++++++-------
> 3 files changed, 79 insertions(+), 110 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/regex.tcc
> b/libstdc++-v3/include/bits/regex.tcc
> index 6ec259c25019..6ffe1beca08a 100644
> --- a/libstdc++-v3/include/bits/regex.tcc
> +++ b/libstdc++-v3/include/bits/regex.tcc
> @@ -61,26 +61,19 @@ namespace __detail
> __m._M_resize(__re._M_automaton->_M_sub_count());
>
> bool __ret;
> + bool __search_mode = true; // by default, use DFS
>
I would name this bool __dfs_mode.
> if ((__re.flags() & regex_constants::__polynomial)
> || (__policy == _RegexExecutorPolicy::_S_alternate
> && !__re._M_automaton->_M_has_backref))
> - {
> - _Executor<_BiIter, _Alloc, _TraitsT, false>
> - __executor(__s, __e, __res, __re, __flags);
> - if (__match_mode)
> - __ret = __executor._M_match();
> - else
> - __ret = __executor._M_search();
> - }
> + __search_mode = false; // use BFS when requested
> +
> + _Executor<_BiIter, _Alloc, _TraitsT, true>
> + __executor(__s, __e, __res, __re, __flags, __search_mode);
> + if (__match_mode)
> + __ret = __executor._M_match();
> else
> - {
> - _Executor<_BiIter, _Alloc, _TraitsT, true>
> - __executor(__s, __e, __res, __re, __flags);
> - if (__match_mode)
> - __ret = __executor._M_match();
> - else
> - __ret = __executor._M_search();
> - }
> + __ret = __executor._M_search();
> +
> if (__ret)
> {
> for (auto& __it : __res)
> diff --git a/libstdc++-v3/include/bits/regex_executor.h
> b/libstdc++-v3/include/bits/regex_executor.h
> index 94048e1f632c..550fa751c002 100644
> --- a/libstdc++-v3/include/bits/regex_executor.h
> +++ b/libstdc++-v3/include/bits/regex_executor.h
> @@ -55,10 +55,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> bool __dfs_mode>
> class _Executor
> {
> - using __search_mode = integral_constant<bool, __dfs_mode>;
> - using __dfs = true_type;
> - using __bfs = false_type;
> -
> + enum class _Search_mode : unsigned char { _BFS = 0, _DFS = 1 };
> enum class _Match_mode : unsigned char { _Exact, _Prefix };
>
> public:
> @@ -74,7 +71,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> _BiIter __end,
> _ResultsVec& __results,
> const _RegexT& __re,
> - _FlagT __flags)
> + _FlagT __flags,
> + bool __search_mode)
>
Same for the name of the parameter.
> : _M_cur_results(__results.get_allocator()),
> _M_begin(__begin),
> _M_end(__end),
> @@ -82,14 +80,22 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> _M_nfa(*__re._M_automaton),
> _M_results(__results),
> _M_rep_count(_M_nfa.size()),
> - _M_states(_M_nfa._M_start(), _M_nfa.size()),
> - _M_flags(__flags)
> + _M_start(_M_nfa._M_start()),
> + _M_flags(__flags),
> + _M_search_mode(_Search_mode(__search_mode))
>
I think __dfs_mode ? _Search_mode::_DFS : _Search_mode::_BFS would be
cleaner.
{
> using namespace regex_constants;
> if (__flags & match_prev_avail) // ignore not_bol and not_bow
> _M_flags &= ~(match_not_bol | match_not_bow);
> + if (_M_search_mode == _Search_mode::_BFS)
> + _M_visited_states = new bool[_M_nfa.size()];
> + else
> + _M_visited_states = nullptr;
> }
>
> + ~_Executor()
> + { delete[] _M_visited_states; }
> +
> // Set matched when string exactly matches the pattern.
> bool
> _M_match()
> @@ -154,13 +160,18 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
>
> bool
> _M_main(_Match_mode __match_mode)
> - { return _M_main_dispatch(__match_mode, __search_mode{}); }
> + {
> + if (_M_search_mode == _Search_mode::_DFS)
> + return _M_main_dfs(__match_mode);
> + else
> + return _M_main_bfs(__match_mode);
> + }
>
> bool
> - _M_main_dispatch(_Match_mode __match_mode, __dfs);
> + _M_main_dfs(_Match_mode __match_mode);
>
> bool
> - _M_main_dispatch(_Match_mode __match_mode, __bfs);
> + _M_main_bfs(_Match_mode __match_mode);
>
> bool
> _M_is_word(_CharT __ch) const
> @@ -239,62 +250,19 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> return (_M_re._M_automaton->_M_options() & __m) == __m;
> }
>
> - // Holds additional information used in BFS-mode.
> - template<typename _SearchMode, typename _ResultsVec>
> - struct _State_info;
> -
> - template<typename _ResultsVec>
> - struct _State_info<__bfs, _ResultsVec>
> - {
> - explicit
> - _State_info(_StateIdT __start, size_t __n)
> - : _M_visited_states(new bool[__n]()), _M_start(__start)
> - { }
> -
> - ~_State_info() { delete[] _M_visited_states; }
> -
> - _State_info(const _State_info&) = delete;
> - _State_info& operator=(const _State_info&) = delete;
> -
> - bool _M_visited(_StateIdT __i)
> + bool
> + _M_visited(_StateIdT __i)
> + {
> + if (_M_visited_states)
> {
> if (_M_visited_states[__i])
> return true;
> _M_visited_states[__i] = true;
> - return false;
> }
> + return false;
> + }
>
> - void _M_queue(_StateIdT __i, const _ResultsVec& __res)
> - { _M_match_queue.emplace_back(__i, __res); }
> -
> - // Dummy implementations for BFS mode.
> - _BiIter* _M_get_sol_pos() { return nullptr; }
> -
> - // Saves states that need to be considered for the next
> character.
> - _GLIBCXX_STD_C::vector<pair<_StateIdT, _ResultsVec>>
> _M_match_queue;
> - // Indicates which states are already visited.
> - bool* _M_visited_states;
> - // To record current solution.
> - _StateIdT _M_start;
> - };
> -
> - template<typename _ResultsVec>
> - struct _State_info<__dfs, _ResultsVec>
> - {
> - explicit
> - _State_info(_StateIdT __start, size_t) : _M_start(__start)
> - { }
> -
> - // Dummy implementations for DFS mode.
> - bool _M_visited(_StateIdT) const { return false; }
> - void _M_queue(_StateIdT, const _ResultsVec&) { }
> -
> - _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
> -
> - // To record current solution.
> - _StateIdT _M_start;
> - _BiIter _M_sol_pos;
> - };
> + _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
>
> public:
> _GLIBCXX_STD_C::vector<_ExecutorFrame<_BiIter>> _M_frames;
> @@ -306,8 +274,15 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> const _NFAT& _M_nfa;
> _ResultsVec& _M_results;
> _GLIBCXX_STD_C::vector<pair<_BiIter, int>> _M_rep_count;
> - _State_info<__search_mode, _ResultsVec> _M_states;
> + // To record current solution.
> + _StateIdT _M_start;
> + _BiIter _M_sol_pos;
> + // Saves states that need to be considered for the next character.
> (BFS mode)
> + _GLIBCXX_STD_C::vector<pair<_StateIdT, _ResultsVec>>
> _M_match_queue;
> + // Indicates which states are already visited. (BFS mode)
> + bool*
> _M_visited_states;
> _FlagT _M_flags;
> + const _Search_mode
> _M_search_mode;
> // Do we have a solution so far?
> bool _M_has_sol;
> };
> diff --git a/libstdc++-v3/include/bits/regex_executor.tcc
> b/libstdc++-v3/include/bits/regex_executor.tcc
> index 19b5ad27df40..0ae7bdf6839a 100644
> --- a/libstdc++-v3/include/bits/regex_executor.tcc
> +++ b/libstdc++-v3/include/bits/regex_executor.tcc
> @@ -138,8 +138,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> };
>
> // The _M_main function operates in different modes, DFS mode or BFS
> mode,
> - // indicated by template parameter __dfs_mode, and dispatches to one of
> the
> - // _M_main_dispatch overloads.
> + // indicated by _M_search_mode, and dispatches to either _M_main_dfs or
> + // _M_main_bfs.
> //
> // ------------------------------------------------------------
> //
> @@ -163,12 +163,12 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> template<typename _BiIter, typename _Alloc, typename _TraitsT,
> bool __dfs_mode>
> bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
> - _M_main_dispatch(_Match_mode __match_mode, __dfs)
> + _M_main_dfs(_Match_mode __match_mode)
> {
> _M_has_sol = false;
> - *_M_states._M_get_sol_pos() = _BiIter();
> + *_M_get_sol_pos() = _BiIter();
> _M_cur_results = _M_results;
> - _M_dfs(__match_mode, _M_states._M_start);
> + _M_dfs(__match_mode, _M_start);
> return _M_has_sol;
> }
>
> @@ -182,9 +182,9 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> // It first computes epsilon closure (states that can be achieved
> without
> // consuming characters) for every state that's still matching,
> // using the same DFS algorithm, but doesn't re-enter states (using
> - // _M_states._M_visited to check), nor follow _S_opcode_match.
> + // _M_visited to check), nor follow _S_opcode_match.
> //
> - // Then apply DFS using every _S_opcode_match (in
> _M_states._M_match_queue)
> + // Then apply DFS using every _S_opcode_match (in _M_match_queue)
> // as the start state.
> //
> // It significantly reduces potential duplicate states, so has a better
> @@ -197,17 +197,17 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> template<typename _BiIter, typename _Alloc, typename _TraitsT,
> bool __dfs_mode>
> bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
> - _M_main_dispatch(_Match_mode __match_mode, __bfs)
> + _M_main_bfs(_Match_mode __match_mode)
> {
> - _M_states._M_queue(_M_states._M_start, _M_results);
> + _M_match_queue.emplace_back(_M_start, _M_results);
> bool __ret = false;
> while (1)
> {
> _M_has_sol = false;
> - if (_M_states._M_match_queue.empty())
> + if (_M_match_queue.empty())
> break;
> - std::fill_n(_M_states._M_visited_states, _M_nfa.size(), false);
> - auto __old_queue = std::move(_M_states._M_match_queue);
> + std::fill_n(_M_visited_states, _M_nfa.size(), false);
> + auto __old_queue = std::move(_M_match_queue);
> auto __alloc = _M_cur_results.get_allocator();
> for (auto& __task : __old_queue)
> {
> @@ -222,7 +222,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> }
> if (__match_mode == _Match_mode::_Exact)
> __ret = _M_has_sol;
> - _M_states._M_match_queue.clear();
> + _M_match_queue.clear();
> return __ret;
> }
>
> @@ -236,8 +236,9 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> // We may want to make this faster by not copying,
> // but let's not be clever prematurely.
> _ResultsVec __what(_M_cur_results);
> - _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags);
> - __sub._M_states._M_start = __next;
> + _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags,
> + bool(_M_search_mode));
> + __sub._M_start = __next;
> if (__sub._M_search_from_first())
> {
> for (size_t __i = 0; __i < __what.size(); __i++)
> @@ -294,7 +295,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> // Greedy.
> if (!__state._M_neg)
> {
> - if constexpr (__dfs_mode)
> + if (_M_search_mode == _Search_mode::_DFS)
> // If it's DFS executor and already accepted, we're done.
> _M_frames.emplace_back(_S_fopcode_fallback_next,
> __state._M_next,
> _M_current);
> @@ -304,7 +305,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> }
> else // Non-greedy mode
> {
> - if constexpr (__dfs_mode)
> + if (_M_search_mode == _Search_mode::_DFS)
> {
> // vice-versa.
> _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more,
> __i,
> @@ -409,7 +410,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> const auto& __state = _M_nfa[__i];
> if (_M_current == _M_end)
> return;
> - if constexpr (__dfs_mode)
> + if (_M_search_mode == _Search_mode::_DFS)
> {
> if (__state._M_matches(*_M_current))
> {
> @@ -419,7 +420,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> }
> else
> if (__state._M_matches(*_M_current))
> - _M_states._M_queue(__state._M_next, _M_cur_results);
> + _M_match_queue.emplace_back(__state._M_next, _M_cur_results);
> }
>
> template<typename _BiIter, typename _TraitsT>
> @@ -479,7 +480,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
> _M_handle_backref(_Match_mode __match_mode, _StateIdT __i)
> {
> - static_assert(__dfs_mode, "this should never be instantiated");
> + __glibcxx_assert(_M_search_mode == _Search_mode::_DFS);
>
> const auto& __state = _M_nfa[__i];
> auto& __submatch = _M_cur_results[__state._M_backref_index];
> @@ -505,7 +506,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
> _M_handle_accept(_Match_mode __match_mode, _StateIdT)
> {
> - if constexpr (__dfs_mode)
> + if (_M_search_mode == _Search_mode::_DFS)
> {
> __glibcxx_assert(!_M_has_sol);
> if (__match_mode == _Match_mode::_Exact)
> @@ -521,7 +522,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> _M_results = _M_cur_results;
> else // POSIX
> {
> - __glibcxx_assert(_M_states._M_get_sol_pos());
> + __glibcxx_assert(_M_get_sol_pos());
> // Here's POSIX's logic: match the longest one. However
> // we never know which one (lhs or rhs of "|") is longer
> // unless we try both of them and compare the results.
> @@ -529,12 +530,11 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> // position of the last successful match. It's better
> // to be larger, because POSIX regex is always greedy.
> // TODO: This could be slow.
> - if (*_M_states._M_get_sol_pos() == _BiIter()
> - || std::distance(_M_begin,
> - *_M_states._M_get_sol_pos())
> + if (*_M_get_sol_pos() == _BiIter()
> + || std::distance(_M_begin, *_M_get_sol_pos())
> < std::distance(_M_begin, _M_current))
> {
> - *_M_states._M_get_sol_pos() = _M_current;
> + *_M_get_sol_pos() = _M_current;
> _M_results = _M_cur_results;
> }
> }
> @@ -586,7 +586,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
> _M_node(_Match_mode __match_mode, _StateIdT __i)
> {
> - if (_M_states._M_visited(__i))
> + if (_M_visited(__i))
> return;
>
> switch (_M_nfa[__i]._M_opcode())
> @@ -608,7 +608,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> case _S_opcode_match:
> _M_handle_match(__match_mode, __i); break;
> case _S_opcode_backref:
> - if constexpr (__dfs_mode)
> + if (_M_search_mode == _Search_mode::_DFS)
> _M_handle_backref(__match_mode, __i);
> else
> __builtin_unreachable();
> @@ -623,10 +623,11 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> }
>
> template<typename _BiIter, typename _Alloc, typename _TraitsT,
> - bool __dfs_mode>
> - void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
> + bool __dummy>
> + void _Executor<_BiIter, _Alloc, _TraitsT, __dummy>::
> _M_dfs(_Match_mode __match_mode, _StateIdT __start)
> {
> + const bool __dfs_mode = (_M_search_mode == _Search_mode::_DFS);
> _M_frames.emplace_back(_S_fopcode_next, __start);
>
> while (!_M_frames.empty())
> @@ -639,7 +640,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> case _S_fopcode_fallback_next:
> if (_M_has_sol)
> break;
> - if constexpr (__dfs_mode)
> + if (__dfs_mode)
> _M_current = __frame._M_pos;
> [[__fallthrough__]];
> case _S_fopcode_next:
> @@ -649,7 +650,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> case _S_fopcode_fallback_rep_once_more:
> if (_M_has_sol)
> break;
> - if constexpr (__dfs_mode)
> + if (__dfs_mode)
>
As this is now runtime, do we need it? Can we always restore _M_current in
this
cases?
> _M_current = __frame._M_pos;
> [[__fallthrough__]];
> case _S_fopcode_rep_once_more:
> @@ -659,7 +660,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> case _S_fopcode_posix_alternative:
> _M_frames.emplace_back(_S_fopcode_merge_sol, 0, _M_has_sol);
> _M_frames.emplace_back(_S_fopcode_next, __frame._M_state_id);
> - if constexpr (__dfs_mode)
> + if (__dfs_mode)
> _M_current = __frame._M_pos;
> _M_has_sol = false;
> break;
> --
> 2.53.0.rc1.65.gea24e2c554
>
>