That _Executor's __dfs_mode flag is a template parameter means
__regex_algo_impl has to instantiate two different (but largely the
same) versions of _Executor even though the BFS version is rarely used.
And in practice it seems the compiler has trouble DCEing the unused
version of _Executor, which needlessly increases code size and burdens
the optimizer.

This patch replaces the template parameter with a run-time data member
_M_search_mode.  We also need to inline the _State_info member functions
and data members of both partial specializations into _Executor so that
they depend on the run-time instead of compile-time flag.  This means
_Executor is two pointers bigger when using DFS mode (due to the unused
_M_match_queue and _M_visited_states members), but that's negligible
compared to the other benefits.

The __dfs_mode template parameter is now unused but not yet removed,
that'll be done in the next patch.

After this patch, both code size and run time for the microbenchmark

    for (int i = 0; i < 10000; i++)
      regex_match(string(200, 'a'), regex("(a|b|c)*"));

decreases by about 15% each (with -O2).  Compile time and maximum memory
use decreases by 5-10%.

libstdc++-v3/ChangeLog:

        * include/bits/regex.tcc (__regex_algo_impl): Pass __search_mode
        parameter indicating whether to use DFS or BFS to _Executor's
        constructor.
        * include/bits/regex_executor.h (_Executor::_Search_mode): New.
        (_Executor::_Executor): Add __search_mode parameter and
        initialize _M_search_mode.  Adjust after inlining _State_info
        members into _Executor.
        (_Executor::~_Executor): Free _M_visted_states.
        (_Executor::_M_main): Adjust after renaming _M_main_dispatch
        overloads to _M_main_dfs and _M_main_bfs.
        (_Executor::_State_info): Remove.
        (_Executor::_M_visited): Inlined from _State_info.
        (_Executor::_M_get_sol_pos): Likewise.
        (_Executor::_M_states): Remove.
        (_Executor::_M_start): Inlined from _State_info.
        (_Executor::_M_sol_pos): Likewise.
        (_Executor::_M_match_queue): Likewise.
        (_Executor::_M_search_mode): New.
        * include/bits/regex_executor.tcc (_Executor::_M_main_dispatch):
        Renamed to...
        (_Executor::_M_main_dfs, _Executor::_M_main_bfs): ... these.
        (_Executor::_M_*): Adjust after _M_states removal.
        (_Executor::_M_lookhead): Also adjust _Executor constructor
        call.
---
 libstdc++-v3/include/bits/regex.tcc          | 25 ++---
 libstdc++-v3/include/bits/regex_executor.h   | 97 ++++++++------------
 libstdc++-v3/include/bits/regex_executor.tcc | 67 +++++++-------
 3 files changed, 79 insertions(+), 110 deletions(-)

diff --git a/libstdc++-v3/include/bits/regex.tcc 
b/libstdc++-v3/include/bits/regex.tcc
index 6ec259c25019..6ffe1beca08a 100644
--- a/libstdc++-v3/include/bits/regex.tcc
+++ b/libstdc++-v3/include/bits/regex.tcc
@@ -61,26 +61,19 @@ namespace __detail
       __m._M_resize(__re._M_automaton->_M_sub_count());
 
       bool __ret;
+      bool __search_mode = true; // by default, use DFS
       if ((__re.flags() & regex_constants::__polynomial)
          || (__policy == _RegexExecutorPolicy::_S_alternate
              && !__re._M_automaton->_M_has_backref))
-       {
-         _Executor<_BiIter, _Alloc, _TraitsT, false>
-           __executor(__s, __e, __res, __re, __flags);
-         if (__match_mode)
-           __ret = __executor._M_match();
-         else
-           __ret = __executor._M_search();
-       }
+       __search_mode = false; // use BFS when requested
+
+      _Executor<_BiIter, _Alloc, _TraitsT, true>
+       __executor(__s, __e, __res, __re, __flags, __search_mode);
+      if (__match_mode)
+       __ret = __executor._M_match();
       else
-       {
-         _Executor<_BiIter, _Alloc, _TraitsT, true>
-           __executor(__s, __e, __res, __re, __flags);
-         if (__match_mode)
-           __ret = __executor._M_match();
-         else
-           __ret = __executor._M_search();
-       }
+       __ret = __executor._M_search();
+
       if (__ret)
        {
          for (auto& __it : __res)
diff --git a/libstdc++-v3/include/bits/regex_executor.h 
b/libstdc++-v3/include/bits/regex_executor.h
index 94048e1f632c..550fa751c002 100644
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
@@ -55,10 +55,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
           bool __dfs_mode>
     class _Executor
     {
-      using __search_mode = integral_constant<bool, __dfs_mode>;
-      using __dfs = true_type;
-      using __bfs = false_type;
-
+      enum class _Search_mode : unsigned char { _BFS = 0, _DFS = 1 };
       enum class _Match_mode : unsigned char { _Exact, _Prefix };
 
     public:
@@ -74,7 +71,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
                _BiIter         __end,
                _ResultsVec&    __results,
                const _RegexT&  __re,
-               _FlagT          __flags)
+               _FlagT          __flags,
+               bool            __search_mode)
       : _M_cur_results(__results.get_allocator()),
        _M_begin(__begin),
        _M_end(__end),
@@ -82,14 +80,22 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
        _M_nfa(*__re._M_automaton),
        _M_results(__results),
        _M_rep_count(_M_nfa.size()),
-       _M_states(_M_nfa._M_start(), _M_nfa.size()),
-       _M_flags(__flags)
+       _M_start(_M_nfa._M_start()),
+       _M_flags(__flags),
+       _M_search_mode(_Search_mode(__search_mode))
       {
        using namespace regex_constants;
        if (__flags & match_prev_avail) // ignore not_bol and not_bow
          _M_flags &= ~(match_not_bol | match_not_bow);
+       if (_M_search_mode == _Search_mode::_BFS)
+         _M_visited_states = new bool[_M_nfa.size()];
+       else
+         _M_visited_states = nullptr;
       }
 
+      ~_Executor()
+      { delete[] _M_visited_states; }
+
       // Set matched when string exactly matches the pattern.
       bool
       _M_match()
@@ -154,13 +160,18 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
 
       bool
       _M_main(_Match_mode __match_mode)
-      { return _M_main_dispatch(__match_mode, __search_mode{}); }
+      {
+       if (_M_search_mode == _Search_mode::_DFS)
+         return _M_main_dfs(__match_mode);
+       else
+         return _M_main_bfs(__match_mode);
+      }
 
       bool
-      _M_main_dispatch(_Match_mode __match_mode, __dfs);
+      _M_main_dfs(_Match_mode __match_mode);
 
       bool
-      _M_main_dispatch(_Match_mode __match_mode, __bfs);
+      _M_main_bfs(_Match_mode __match_mode);
 
       bool
       _M_is_word(_CharT __ch) const
@@ -239,62 +250,19 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
        return (_M_re._M_automaton->_M_options() & __m) == __m;
       }
 
-       // Holds additional information used in BFS-mode.
-      template<typename _SearchMode, typename _ResultsVec>
-       struct _State_info;
-
-      template<typename _ResultsVec>
-       struct _State_info<__bfs, _ResultsVec>
-       {
-         explicit
-         _State_info(_StateIdT __start, size_t __n)
-         : _M_visited_states(new bool[__n]()), _M_start(__start)
-         { }
-
-         ~_State_info() { delete[] _M_visited_states; }
-
-         _State_info(const _State_info&) = delete;
-         _State_info& operator=(const _State_info&) = delete;
-
-         bool _M_visited(_StateIdT __i)
+      bool
+      _M_visited(_StateIdT __i)
+      {
+       if (_M_visited_states)
          {
            if (_M_visited_states[__i])
              return true;
            _M_visited_states[__i] = true;
-           return false;
          }
+       return false;
+      }
 
-         void _M_queue(_StateIdT __i, const _ResultsVec& __res)
-         { _M_match_queue.emplace_back(__i, __res); }
-
-         // Dummy implementations for BFS mode.
-         _BiIter* _M_get_sol_pos() { return nullptr; }
-
-         // Saves states that need to be considered for the next character.
-         _GLIBCXX_STD_C::vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
-         // Indicates which states are already visited.
-         bool*     _M_visited_states;
-         // To record current solution.
-         _StateIdT _M_start;
-       };
-
-      template<typename _ResultsVec>
-       struct _State_info<__dfs, _ResultsVec>
-       {
-         explicit
-         _State_info(_StateIdT __start, size_t) : _M_start(__start)
-         { }
-
-         // Dummy implementations for DFS mode.
-         bool _M_visited(_StateIdT) const { return false; }
-         void _M_queue(_StateIdT, const _ResultsVec&) { }
-
-         _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
-
-         // To record current solution.
-         _StateIdT _M_start;
-         _BiIter   _M_sol_pos;
-       };
+      _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
 
     public:
       _GLIBCXX_STD_C::vector<_ExecutorFrame<_BiIter>>       _M_frames;
@@ -306,8 +274,15 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
       const _NFAT&                                          _M_nfa;
       _ResultsVec&                                          _M_results;
       _GLIBCXX_STD_C::vector<pair<_BiIter, int>>            _M_rep_count;
-      _State_info<__search_mode, _ResultsVec>              _M_states;
+      // To record current solution.
+      _StateIdT                                             _M_start;
+      _BiIter                                               _M_sol_pos;
+      // Saves states that need to be considered for the next character. (BFS 
mode)
+      _GLIBCXX_STD_C::vector<pair<_StateIdT, _ResultsVec>>  _M_match_queue;
+      // Indicates which states are already visited. (BFS mode)
+      bool*                                                 _M_visited_states;
       _FlagT                                                _M_flags;
+      const _Search_mode                                    _M_search_mode;
       // Do we have a solution so far?
       bool                                                  _M_has_sol;
     };
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc 
b/libstdc++-v3/include/bits/regex_executor.tcc
index 19b5ad27df40..0ae7bdf6839a 100644
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -138,8 +138,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
     };
 
   // The _M_main function operates in different modes, DFS mode or BFS mode,
-  // indicated by template parameter __dfs_mode, and dispatches to one of the
-  // _M_main_dispatch overloads.
+  // indicated by _M_search_mode, and dispatches to either _M_main_dfs or
+  // _M_main_bfs.
   //
   // ------------------------------------------------------------
   //
@@ -163,12 +163,12 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
   template<typename _BiIter, typename _Alloc, typename _TraitsT,
           bool __dfs_mode>
     bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
-    _M_main_dispatch(_Match_mode __match_mode, __dfs)
+    _M_main_dfs(_Match_mode __match_mode)
     {
       _M_has_sol = false;
-      *_M_states._M_get_sol_pos() = _BiIter();
+      *_M_get_sol_pos() = _BiIter();
       _M_cur_results = _M_results;
-      _M_dfs(__match_mode, _M_states._M_start);
+      _M_dfs(__match_mode, _M_start);
       return _M_has_sol;
     }
 
@@ -182,9 +182,9 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
   // It first computes epsilon closure (states that can be achieved without
   // consuming characters) for every state that's still matching,
   // using the same DFS algorithm, but doesn't re-enter states (using
-  // _M_states._M_visited to check), nor follow _S_opcode_match.
+  // _M_visited to check), nor follow _S_opcode_match.
   //
-  // Then apply DFS using every _S_opcode_match (in _M_states._M_match_queue)
+  // Then apply DFS using every _S_opcode_match (in _M_match_queue)
   // as the start state.
   //
   // It significantly reduces potential duplicate states, so has a better
@@ -197,17 +197,17 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
   template<typename _BiIter, typename _Alloc, typename _TraitsT,
           bool __dfs_mode>
     bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
-    _M_main_dispatch(_Match_mode __match_mode, __bfs)
+    _M_main_bfs(_Match_mode __match_mode)
     {
-      _M_states._M_queue(_M_states._M_start, _M_results);
+      _M_match_queue.emplace_back(_M_start, _M_results);
       bool __ret = false;
       while (1)
        {
          _M_has_sol = false;
-         if (_M_states._M_match_queue.empty())
+         if (_M_match_queue.empty())
            break;
-         std::fill_n(_M_states._M_visited_states, _M_nfa.size(), false);
-         auto __old_queue = std::move(_M_states._M_match_queue);
+         std::fill_n(_M_visited_states, _M_nfa.size(), false);
+         auto __old_queue = std::move(_M_match_queue);
          auto __alloc = _M_cur_results.get_allocator();
          for (auto& __task : __old_queue)
            {
@@ -222,7 +222,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
        }
       if (__match_mode == _Match_mode::_Exact)
        __ret = _M_has_sol;
-      _M_states._M_match_queue.clear();
+      _M_match_queue.clear();
       return __ret;
     }
 
@@ -236,8 +236,9 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
       // We may want to make this faster by not copying,
       // but let's not be clever prematurely.
       _ResultsVec __what(_M_cur_results);
-      _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags);
-      __sub._M_states._M_start = __next;
+      _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags,
+                     bool(_M_search_mode));
+      __sub._M_start = __next;
       if (__sub._M_search_from_first())
        {
          for (size_t __i = 0; __i < __what.size(); __i++)
@@ -294,7 +295,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
       // Greedy.
       if (!__state._M_neg)
        {
-         if constexpr (__dfs_mode)
+         if (_M_search_mode == _Search_mode::_DFS)
            // If it's DFS executor and already accepted, we're done.
            _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next,
                                   _M_current);
@@ -304,7 +305,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
        }
       else // Non-greedy mode
        {
-         if constexpr (__dfs_mode)
+         if (_M_search_mode == _Search_mode::_DFS)
            {
              // vice-versa.
              _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i,
@@ -409,7 +410,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
       const auto& __state = _M_nfa[__i];
       if (_M_current == _M_end)
        return;
-      if constexpr (__dfs_mode)
+      if (_M_search_mode == _Search_mode::_DFS)
        {
          if (__state._M_matches(*_M_current))
            {
@@ -419,7 +420,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
        }
       else
        if (__state._M_matches(*_M_current))
-         _M_states._M_queue(__state._M_next, _M_cur_results);
+         _M_match_queue.emplace_back(__state._M_next, _M_cur_results);
     }
 
   template<typename _BiIter, typename _TraitsT>
@@ -479,7 +480,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
     void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
     _M_handle_backref(_Match_mode __match_mode, _StateIdT __i)
     {
-      static_assert(__dfs_mode, "this should never be instantiated");
+      __glibcxx_assert(_M_search_mode == _Search_mode::_DFS);
 
       const auto& __state = _M_nfa[__i];
       auto& __submatch = _M_cur_results[__state._M_backref_index];
@@ -505,7 +506,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
     void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
     _M_handle_accept(_Match_mode __match_mode, _StateIdT)
     {
-      if constexpr (__dfs_mode)
+      if (_M_search_mode == _Search_mode::_DFS)
        {
          __glibcxx_assert(!_M_has_sol);
          if (__match_mode == _Match_mode::_Exact)
@@ -521,7 +522,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
                _M_results = _M_cur_results;
              else // POSIX
                {
-                 __glibcxx_assert(_M_states._M_get_sol_pos());
+                 __glibcxx_assert(_M_get_sol_pos());
                  // Here's POSIX's logic: match the longest one. However
                  // we never know which one (lhs or rhs of "|") is longer
                  // unless we try both of them and compare the results.
@@ -529,12 +530,11 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
                  // position of the last successful match. It's better
                  // to be larger, because POSIX regex is always greedy.
                  // TODO: This could be slow.
-                 if (*_M_states._M_get_sol_pos() == _BiIter()
-                     || std::distance(_M_begin,
-                                      *_M_states._M_get_sol_pos())
+                 if (*_M_get_sol_pos() == _BiIter()
+                     || std::distance(_M_begin, *_M_get_sol_pos())
                         < std::distance(_M_begin, _M_current))
                    {
-                     *_M_states._M_get_sol_pos() = _M_current;
+                     *_M_get_sol_pos() = _M_current;
                      _M_results = _M_cur_results;
                    }
                }
@@ -586,7 +586,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
     inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
     _M_node(_Match_mode __match_mode, _StateIdT __i)
     {
-      if (_M_states._M_visited(__i))
+      if (_M_visited(__i))
        return;
 
       switch (_M_nfa[__i]._M_opcode())
@@ -608,7 +608,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
        case _S_opcode_match:
          _M_handle_match(__match_mode, __i); break;
        case _S_opcode_backref:
-         if constexpr (__dfs_mode)
+         if (_M_search_mode == _Search_mode::_DFS)
            _M_handle_backref(__match_mode, __i);
          else
            __builtin_unreachable();
@@ -623,10 +623,11 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
     }
 
   template<typename _BiIter, typename _Alloc, typename _TraitsT,
-          bool __dfs_mode>
-    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+          bool __dummy>
+    void _Executor<_BiIter, _Alloc, _TraitsT, __dummy>::
     _M_dfs(_Match_mode __match_mode, _StateIdT __start)
     {
+      const bool __dfs_mode = (_M_search_mode == _Search_mode::_DFS);
       _M_frames.emplace_back(_S_fopcode_next, __start);
 
       while (!_M_frames.empty())
@@ -639,7 +640,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
            case _S_fopcode_fallback_next:
              if (_M_has_sol)
                break;
-             if constexpr (__dfs_mode)
+             if (__dfs_mode)
                _M_current = __frame._M_pos;
              [[__fallthrough__]];
            case _S_fopcode_next:
@@ -649,7 +650,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
            case _S_fopcode_fallback_rep_once_more:
              if (_M_has_sol)
                break;
-             if constexpr (__dfs_mode)
+             if (__dfs_mode)
                _M_current = __frame._M_pos;
              [[__fallthrough__]];
            case _S_fopcode_rep_once_more:
@@ -659,7 +660,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
            case _S_fopcode_posix_alternative:
              _M_frames.emplace_back(_S_fopcode_merge_sol, 0, _M_has_sol);
              _M_frames.emplace_back(_S_fopcode_next, __frame._M_state_id);
-             if constexpr (__dfs_mode)
+             if (__dfs_mode)
                _M_current = __frame._M_pos;
              _M_has_sol = false;
              break;
-- 
2.53.0.rc1.65.gea24e2c554

Reply via email to