That _Executor's __dfs_mode flag is a template parameter means
__regex_algo_impl has to instantiate two different (but largely the
same) versions of _Executor even though the BFS version is rarely used.
And in practice it seems the compiler has trouble DCEing the unused
version of _Executor, which needlessly increases code size and burdens
the optimizer.
This patch replaces the template parameter with a run-time data member
_M_search_mode. We also need to inline the _State_info member functions
and data members of both partial specializations into _Executor so that
they depend on the run-time instead of compile-time flag. This means
_Executor is two pointers bigger when using DFS mode (due to the unused
_M_match_queue and _M_visited_states members), but that's negligible
compared to the other benefits.
The __dfs_mode template parameter is now unused but not yet removed,
that'll be done in the next patch.
After this patch, both code size and run time for the microbenchmark
for (int i = 0; i < 10000; i++)
regex_match(string(200, 'a'), regex("(a|b|c)*"));
decreases by about 15% each (with -O2). Compile time and maximum memory
use decreases by 5-10%.
libstdc++-v3/ChangeLog:
* include/bits/regex.tcc (__regex_algo_impl): Pass __search_mode
parameter indicating whether to use DFS or BFS to _Executor's
constructor.
* include/bits/regex_executor.h (_Executor::_Search_mode): New.
(_Executor::_Executor): Add __search_mode parameter and
initialize _M_search_mode. Adjust after inlining _State_info
members into _Executor.
(_Executor::~_Executor): Free _M_visted_states.
(_Executor::_M_main): Adjust after renaming _M_main_dispatch
overloads to _M_main_dfs and _M_main_bfs.
(_Executor::_State_info): Remove.
(_Executor::_M_visited): Inlined from _State_info.
(_Executor::_M_get_sol_pos): Likewise.
(_Executor::_M_states): Remove.
(_Executor::_M_start): Inlined from _State_info.
(_Executor::_M_sol_pos): Likewise.
(_Executor::_M_match_queue): Likewise.
(_Executor::_M_search_mode): New.
* include/bits/regex_executor.tcc (_Executor::_M_main_dispatch):
Renamed to...
(_Executor::_M_main_dfs, _Executor::_M_main_bfs): ... these.
(_Executor::_M_*): Adjust after _M_states removal.
(_Executor::_M_lookhead): Also adjust _Executor constructor
call.
---
libstdc++-v3/include/bits/regex.tcc | 25 ++---
libstdc++-v3/include/bits/regex_executor.h | 97 ++++++++------------
libstdc++-v3/include/bits/regex_executor.tcc | 67 +++++++-------
3 files changed, 79 insertions(+), 110 deletions(-)
diff --git a/libstdc++-v3/include/bits/regex.tcc
b/libstdc++-v3/include/bits/regex.tcc
index 6ec259c25019..6ffe1beca08a 100644
--- a/libstdc++-v3/include/bits/regex.tcc
+++ b/libstdc++-v3/include/bits/regex.tcc
@@ -61,26 +61,19 @@ namespace __detail
__m._M_resize(__re._M_automaton->_M_sub_count());
bool __ret;
+ bool __search_mode = true; // by default, use DFS
if ((__re.flags() & regex_constants::__polynomial)
|| (__policy == _RegexExecutorPolicy::_S_alternate
&& !__re._M_automaton->_M_has_backref))
- {
- _Executor<_BiIter, _Alloc, _TraitsT, false>
- __executor(__s, __e, __res, __re, __flags);
- if (__match_mode)
- __ret = __executor._M_match();
- else
- __ret = __executor._M_search();
- }
+ __search_mode = false; // use BFS when requested
+
+ _Executor<_BiIter, _Alloc, _TraitsT, true>
+ __executor(__s, __e, __res, __re, __flags, __search_mode);
+ if (__match_mode)
+ __ret = __executor._M_match();
else
- {
- _Executor<_BiIter, _Alloc, _TraitsT, true>
- __executor(__s, __e, __res, __re, __flags);
- if (__match_mode)
- __ret = __executor._M_match();
- else
- __ret = __executor._M_search();
- }
+ __ret = __executor._M_search();
+
if (__ret)
{
for (auto& __it : __res)
diff --git a/libstdc++-v3/include/bits/regex_executor.h
b/libstdc++-v3/include/bits/regex_executor.h
index 94048e1f632c..550fa751c002 100644
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
@@ -55,10 +55,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
bool __dfs_mode>
class _Executor
{
- using __search_mode = integral_constant<bool, __dfs_mode>;
- using __dfs = true_type;
- using __bfs = false_type;
-
+ enum class _Search_mode : unsigned char { _BFS = 0, _DFS = 1 };
enum class _Match_mode : unsigned char { _Exact, _Prefix };
public:
@@ -74,7 +71,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
_BiIter __end,
_ResultsVec& __results,
const _RegexT& __re,
- _FlagT __flags)
+ _FlagT __flags,
+ bool __search_mode)
: _M_cur_results(__results.get_allocator()),
_M_begin(__begin),
_M_end(__end),
@@ -82,14 +80,22 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
_M_nfa(*__re._M_automaton),
_M_results(__results),
_M_rep_count(_M_nfa.size()),
- _M_states(_M_nfa._M_start(), _M_nfa.size()),
- _M_flags(__flags)
+ _M_start(_M_nfa._M_start()),
+ _M_flags(__flags),
+ _M_search_mode(_Search_mode(__search_mode))
{
using namespace regex_constants;
if (__flags & match_prev_avail) // ignore not_bol and not_bow
_M_flags &= ~(match_not_bol | match_not_bow);
+ if (_M_search_mode == _Search_mode::_BFS)
+ _M_visited_states = new bool[_M_nfa.size()];
+ else
+ _M_visited_states = nullptr;
}
+ ~_Executor()
+ { delete[] _M_visited_states; }
+
// Set matched when string exactly matches the pattern.
bool
_M_match()
@@ -154,13 +160,18 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
bool
_M_main(_Match_mode __match_mode)
- { return _M_main_dispatch(__match_mode, __search_mode{}); }
+ {
+ if (_M_search_mode == _Search_mode::_DFS)
+ return _M_main_dfs(__match_mode);
+ else
+ return _M_main_bfs(__match_mode);
+ }
bool
- _M_main_dispatch(_Match_mode __match_mode, __dfs);
+ _M_main_dfs(_Match_mode __match_mode);
bool
- _M_main_dispatch(_Match_mode __match_mode, __bfs);
+ _M_main_bfs(_Match_mode __match_mode);
bool
_M_is_word(_CharT __ch) const
@@ -239,62 +250,19 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
return (_M_re._M_automaton->_M_options() & __m) == __m;
}
- // Holds additional information used in BFS-mode.
- template<typename _SearchMode, typename _ResultsVec>
- struct _State_info;
-
- template<typename _ResultsVec>
- struct _State_info<__bfs, _ResultsVec>
- {
- explicit
- _State_info(_StateIdT __start, size_t __n)
- : _M_visited_states(new bool[__n]()), _M_start(__start)
- { }
-
- ~_State_info() { delete[] _M_visited_states; }
-
- _State_info(const _State_info&) = delete;
- _State_info& operator=(const _State_info&) = delete;
-
- bool _M_visited(_StateIdT __i)
+ bool
+ _M_visited(_StateIdT __i)
+ {
+ if (_M_visited_states)
{
if (_M_visited_states[__i])
return true;
_M_visited_states[__i] = true;
- return false;
}
+ return false;
+ }
- void _M_queue(_StateIdT __i, const _ResultsVec& __res)
- { _M_match_queue.emplace_back(__i, __res); }
-
- // Dummy implementations for BFS mode.
- _BiIter* _M_get_sol_pos() { return nullptr; }
-
- // Saves states that need to be considered for the next character.
- _GLIBCXX_STD_C::vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
- // Indicates which states are already visited.
- bool* _M_visited_states;
- // To record current solution.
- _StateIdT _M_start;
- };
-
- template<typename _ResultsVec>
- struct _State_info<__dfs, _ResultsVec>
- {
- explicit
- _State_info(_StateIdT __start, size_t) : _M_start(__start)
- { }
-
- // Dummy implementations for DFS mode.
- bool _M_visited(_StateIdT) const { return false; }
- void _M_queue(_StateIdT, const _ResultsVec&) { }
-
- _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
-
- // To record current solution.
- _StateIdT _M_start;
- _BiIter _M_sol_pos;
- };
+ _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
public:
_GLIBCXX_STD_C::vector<_ExecutorFrame<_BiIter>> _M_frames;
@@ -306,8 +274,15 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
const _NFAT& _M_nfa;
_ResultsVec& _M_results;
_GLIBCXX_STD_C::vector<pair<_BiIter, int>> _M_rep_count;
- _State_info<__search_mode, _ResultsVec> _M_states;
+ // To record current solution.
+ _StateIdT _M_start;
+ _BiIter _M_sol_pos;
+ // Saves states that need to be considered for the next character. (BFS
mode)
+ _GLIBCXX_STD_C::vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
+ // Indicates which states are already visited. (BFS mode)
+ bool* _M_visited_states;
_FlagT _M_flags;
+ const _Search_mode _M_search_mode;
// Do we have a solution so far?
bool _M_has_sol;
};
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc
b/libstdc++-v3/include/bits/regex_executor.tcc
index 19b5ad27df40..0ae7bdf6839a 100644
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -138,8 +138,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
};
// The _M_main function operates in different modes, DFS mode or BFS mode,
- // indicated by template parameter __dfs_mode, and dispatches to one of the
- // _M_main_dispatch overloads.
+ // indicated by _M_search_mode, and dispatches to either _M_main_dfs or
+ // _M_main_bfs.
//
// ------------------------------------------------------------
//
@@ -163,12 +163,12 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
template<typename _BiIter, typename _Alloc, typename _TraitsT,
bool __dfs_mode>
bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
- _M_main_dispatch(_Match_mode __match_mode, __dfs)
+ _M_main_dfs(_Match_mode __match_mode)
{
_M_has_sol = false;
- *_M_states._M_get_sol_pos() = _BiIter();
+ *_M_get_sol_pos() = _BiIter();
_M_cur_results = _M_results;
- _M_dfs(__match_mode, _M_states._M_start);
+ _M_dfs(__match_mode, _M_start);
return _M_has_sol;
}
@@ -182,9 +182,9 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
// It first computes epsilon closure (states that can be achieved without
// consuming characters) for every state that's still matching,
// using the same DFS algorithm, but doesn't re-enter states (using
- // _M_states._M_visited to check), nor follow _S_opcode_match.
+ // _M_visited to check), nor follow _S_opcode_match.
//
- // Then apply DFS using every _S_opcode_match (in _M_states._M_match_queue)
+ // Then apply DFS using every _S_opcode_match (in _M_match_queue)
// as the start state.
//
// It significantly reduces potential duplicate states, so has a better
@@ -197,17 +197,17 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
template<typename _BiIter, typename _Alloc, typename _TraitsT,
bool __dfs_mode>
bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
- _M_main_dispatch(_Match_mode __match_mode, __bfs)
+ _M_main_bfs(_Match_mode __match_mode)
{
- _M_states._M_queue(_M_states._M_start, _M_results);
+ _M_match_queue.emplace_back(_M_start, _M_results);
bool __ret = false;
while (1)
{
_M_has_sol = false;
- if (_M_states._M_match_queue.empty())
+ if (_M_match_queue.empty())
break;
- std::fill_n(_M_states._M_visited_states, _M_nfa.size(), false);
- auto __old_queue = std::move(_M_states._M_match_queue);
+ std::fill_n(_M_visited_states, _M_nfa.size(), false);
+ auto __old_queue = std::move(_M_match_queue);
auto __alloc = _M_cur_results.get_allocator();
for (auto& __task : __old_queue)
{
@@ -222,7 +222,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
}
if (__match_mode == _Match_mode::_Exact)
__ret = _M_has_sol;
- _M_states._M_match_queue.clear();
+ _M_match_queue.clear();
return __ret;
}
@@ -236,8 +236,9 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
// We may want to make this faster by not copying,
// but let's not be clever prematurely.
_ResultsVec __what(_M_cur_results);
- _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags);
- __sub._M_states._M_start = __next;
+ _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags,
+ bool(_M_search_mode));
+ __sub._M_start = __next;
if (__sub._M_search_from_first())
{
for (size_t __i = 0; __i < __what.size(); __i++)
@@ -294,7 +295,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
// Greedy.
if (!__state._M_neg)
{
- if constexpr (__dfs_mode)
+ if (_M_search_mode == _Search_mode::_DFS)
// If it's DFS executor and already accepted, we're done.
_M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next,
_M_current);
@@ -304,7 +305,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
}
else // Non-greedy mode
{
- if constexpr (__dfs_mode)
+ if (_M_search_mode == _Search_mode::_DFS)
{
// vice-versa.
_M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i,
@@ -409,7 +410,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
const auto& __state = _M_nfa[__i];
if (_M_current == _M_end)
return;
- if constexpr (__dfs_mode)
+ if (_M_search_mode == _Search_mode::_DFS)
{
if (__state._M_matches(*_M_current))
{
@@ -419,7 +420,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
}
else
if (__state._M_matches(*_M_current))
- _M_states._M_queue(__state._M_next, _M_cur_results);
+ _M_match_queue.emplace_back(__state._M_next, _M_cur_results);
}
template<typename _BiIter, typename _TraitsT>
@@ -479,7 +480,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_handle_backref(_Match_mode __match_mode, _StateIdT __i)
{
- static_assert(__dfs_mode, "this should never be instantiated");
+ __glibcxx_assert(_M_search_mode == _Search_mode::_DFS);
const auto& __state = _M_nfa[__i];
auto& __submatch = _M_cur_results[__state._M_backref_index];
@@ -505,7 +506,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_handle_accept(_Match_mode __match_mode, _StateIdT)
{
- if constexpr (__dfs_mode)
+ if (_M_search_mode == _Search_mode::_DFS)
{
__glibcxx_assert(!_M_has_sol);
if (__match_mode == _Match_mode::_Exact)
@@ -521,7 +522,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
_M_results = _M_cur_results;
else // POSIX
{
- __glibcxx_assert(_M_states._M_get_sol_pos());
+ __glibcxx_assert(_M_get_sol_pos());
// Here's POSIX's logic: match the longest one. However
// we never know which one (lhs or rhs of "|") is longer
// unless we try both of them and compare the results.
@@ -529,12 +530,11 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
// position of the last successful match. It's better
// to be larger, because POSIX regex is always greedy.
// TODO: This could be slow.
- if (*_M_states._M_get_sol_pos() == _BiIter()
- || std::distance(_M_begin,
- *_M_states._M_get_sol_pos())
+ if (*_M_get_sol_pos() == _BiIter()
+ || std::distance(_M_begin, *_M_get_sol_pos())
< std::distance(_M_begin, _M_current))
{
- *_M_states._M_get_sol_pos() = _M_current;
+ *_M_get_sol_pos() = _M_current;
_M_results = _M_cur_results;
}
}
@@ -586,7 +586,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_node(_Match_mode __match_mode, _StateIdT __i)
{
- if (_M_states._M_visited(__i))
+ if (_M_visited(__i))
return;
switch (_M_nfa[__i]._M_opcode())
@@ -608,7 +608,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
case _S_opcode_match:
_M_handle_match(__match_mode, __i); break;
case _S_opcode_backref:
- if constexpr (__dfs_mode)
+ if (_M_search_mode == _Search_mode::_DFS)
_M_handle_backref(__match_mode, __i);
else
__builtin_unreachable();
@@ -623,10 +623,11 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
}
template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
- void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ bool __dummy>
+ void _Executor<_BiIter, _Alloc, _TraitsT, __dummy>::
_M_dfs(_Match_mode __match_mode, _StateIdT __start)
{
+ const bool __dfs_mode = (_M_search_mode == _Search_mode::_DFS);
_M_frames.emplace_back(_S_fopcode_next, __start);
while (!_M_frames.empty())
@@ -639,7 +640,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
case _S_fopcode_fallback_next:
if (_M_has_sol)
break;
- if constexpr (__dfs_mode)
+ if (__dfs_mode)
_M_current = __frame._M_pos;
[[__fallthrough__]];
case _S_fopcode_next:
@@ -649,7 +650,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
case _S_fopcode_fallback_rep_once_more:
if (_M_has_sol)
break;
- if constexpr (__dfs_mode)
+ if (__dfs_mode)
_M_current = __frame._M_pos;
[[__fallthrough__]];
case _S_fopcode_rep_once_more:
@@ -659,7 +660,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
case _S_fopcode_posix_alternative:
_M_frames.emplace_back(_S_fopcode_merge_sol, 0, _M_has_sol);
_M_frames.emplace_back(_S_fopcode_next, __frame._M_state_id);
- if constexpr (__dfs_mode)
+ if (__dfs_mode)
_M_current = __frame._M_pos;
_M_has_sol = false;
break;
--
2.53.0.rc1.65.gea24e2c554