On Tue, Apr 29, 2025 at 10:58 AM Jonathan Wakely <[email protected]> wrote:
> This will hardly make a dent in the very slow compile times for <regex>
> but it seems worth doing anyway.
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/regex_compiler.h: Replace _GLIBCXX17_CONSTEXPR
> with constexpr and disable diagnostics with pragmas.
> (_AnyMatcher::operator()): Use constexpr-if instead of tag
> dispatching. Postpone calls to _M_translate until after checking
> result of earlier calls.
> (_AnyMatcher::_M_apply): Remove both overloads.
> (_BracketMatcher::operator(), _BracketMatcher::_M_ready):
> Replace tag dispatching with 'if constexpr'.
> (_BracketMatcher::_M_apply(_CharT, true_type)): Remove.
> (_BracketMatcher::_M_apply(_CharT, false_type)): Remove second
> parameter.
> (_BracketMatcher::_M_make_cache): Remove both overloads.
> * include/bits/regex_compiler.tcc (_BracketMatcher::_M_apply):
> Remove second parameter.
> * include/bits/regex_executor.tcc: Replace _GLIBCXX17_CONSTEXPR
> with constexpr and disable diagnostics with pragmas.
> (_Executor::_M_handle_backref): Replace __glibcxx_assert with
> constexpr-if and use __builtin_unreachable for non-DFS mode
> specialization.
> (_Executor::_M_handle_accept): Mark _S_opcode_backref case as
> unreachable for non-DFS mode.
> ---
>
> Tested x86_64-linux.
>
> The v2 patch uses constexpr-if in <bits/regex_executor.tcc> as well, and
> optimizes _AnyMatcher so it doesn't do all the _M_translate calls up
> front.
>
> libstdc++-v3/include/bits/regex_compiler.h | 67 ++++++++------------
> libstdc++-v3/include/bits/regex_compiler.tcc | 2 +-
> libstdc++-v3/include/bits/regex_executor.tcc | 62 ++++++++++--------
> 3 files changed, 64 insertions(+), 67 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/regex_compiler.h
> b/libstdc++-v3/include/bits/regex_compiler.h
> index f24c7e3baa6..21e7065e066 100644
> --- a/libstdc++-v3/include/bits/regex_compiler.h
> +++ b/libstdc++-v3/include/bits/regex_compiler.h
> @@ -38,6 +38,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
>
> _GLIBCXX_END_NAMESPACE_CXX11
>
> +#pragma GCC diagnostic push
> +#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr
> namespace __detail
> {
> /**
> @@ -221,9 +223,9 @@ namespace __detail
> _CharT
> _M_translate(_CharT __ch) const
> {
> - if _GLIBCXX17_CONSTEXPR (__icase)
> + if constexpr (__icase)
> return _M_traits.translate_nocase(__ch);
> - else if _GLIBCXX17_CONSTEXPR (__collate)
> + else if constexpr (__collate)
> return _M_traits.translate(__ch);
> else
> return __ch;
> @@ -285,7 +287,7 @@ namespace __detail
> bool
> _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const
> {
> - if _GLIBCXX17_CONSTEXPR (!__icase)
> + if constexpr (!__icase)
> return __first <= __ch && __ch <= __last;
> else
> return this->_M_in_range_icase(__first, __last, __ch);
> @@ -376,26 +378,20 @@ namespace __detail
>
> bool
> operator()(_CharT __ch) const
> - { return _M_apply(__ch, typename is_same<_CharT, char>::type()); }
> -
> - bool
> - _M_apply(_CharT __ch, true_type) const
> {
> - auto __c = _M_translator._M_translate(__ch);
> - auto __n = _M_translator._M_translate('\n');
> - auto __r = _M_translator._M_translate('\r');
> - return __c != __n && __c != __r;
> - }
> -
> - bool
> - _M_apply(_CharT __ch, false_type) const
> - {
> - auto __c = _M_translator._M_translate(__ch);
> - auto __n = _M_translator._M_translate('\n');
> - auto __r = _M_translator._M_translate('\r');
> - auto __u2028 = _M_translator._M_translate(u'\u2028');
> - auto __u2029 = _M_translator._M_translate(u'\u2029');
> - return __c != __n && __c != __r && __c != __u2028 && __c !=
> __u2029;
> + const auto __c = _M_translator._M_translate(__ch);
> + if (__c == _M_translator._M_translate('\n'))
> + return false;
> + if (__c == _M_translator._M_translate('\r'))
> + return false;
> + if constexpr (!is_same<_CharT, char>::value)
> + {
> + if (__c == _M_translator._M_translate(u'\u2028')) // line sep
> + return false;
> + if (__c == _M_translator._M_translate(u'\u2029')) // para sep
> + return false;
> + }
> + return true;
> }
>
> _TransT _M_translator;
> @@ -441,7 +437,10 @@ namespace __detail
> operator()(_CharT __ch) const
> {
> _GLIBCXX_DEBUG_ASSERT(_M_is_ready);
> - return _M_apply(__ch, _UseCache());
> + if constexpr (_UseCache::value)
> + if (!(__ch & 0x80)) [[__likely__]]
> + return _M_cache[static_cast<_UnsignedCharT>(__ch)];
> + return _M_apply(__ch);
> }
>
> void
> @@ -512,7 +511,9 @@ namespace __detail
> std::sort(_M_char_set.begin(), _M_char_set.end());
> auto __end = std::unique(_M_char_set.begin(), _M_char_set.end());
> _M_char_set.erase(__end, _M_char_set.end());
> - _M_make_cache(_UseCache());
> + if constexpr (_UseCache::value)
> + for (unsigned __i = 0; __i < 128; __i++) // Only cache 7-bit
> chars
> + _M_cache[__i] = _M_apply(static_cast<_CharT>(__i));
> _GLIBCXX_DEBUG_ONLY(_M_is_ready = true);
> }
>
> @@ -531,22 +532,7 @@ namespace __detail
> using _UnsignedCharT = typename std::make_unsigned<_CharT>::type;
>
> bool
> - _M_apply(_CharT __ch, false_type) const;
> -
> - bool
> - _M_apply(_CharT __ch, true_type) const
> - { return _M_cache[static_cast<_UnsignedCharT>(__ch)]; }
> -
> - void
> - _M_make_cache(true_type)
> - {
> - for (unsigned __i = 0; __i < _M_cache.size(); __i++)
> - _M_cache[__i] = _M_apply(static_cast<_CharT>(__i), false_type());
> - }
> -
> - void
> - _M_make_cache(false_type)
> - { }
> + _M_apply(_CharT __ch) const;
>
> private:
> _GLIBCXX_STD_C::vector<_CharT> _M_char_set;
> @@ -565,6 +551,7 @@ namespace __detail
>
> ///@} regex-detail
> } // namespace __detail
> +#pragma GCC diagnostic pop
> _GLIBCXX_END_NAMESPACE_VERSION
> } // namespace std
>
> diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc
> b/libstdc++-v3/include/bits/regex_compiler.tcc
> index cd0db2761b5..59b79fdd106 100644
> --- a/libstdc++-v3/include/bits/regex_compiler.tcc
> +++ b/libstdc++-v3/include/bits/regex_compiler.tcc
> @@ -598,7 +598,7 @@ namespace __detail
> template<typename _TraitsT, bool __icase, bool __collate>
> bool
> _BracketMatcher<_TraitsT, __icase, __collate>::
> - _M_apply(_CharT __ch, false_type) const
> + _M_apply(_CharT __ch) const
> {
> return [this, __ch]
> {
> diff --git a/libstdc++-v3/include/bits/regex_executor.tcc
> b/libstdc++-v3/include/bits/regex_executor.tcc
> index e887e28854f..f310291b4c4 100644
> --- a/libstdc++-v3/include/bits/regex_executor.tcc
> +++ b/libstdc++-v3/include/bits/regex_executor.tcc
> @@ -32,6 +32,8 @@ namespace std _GLIBCXX_VISIBILITY(default)
> {
> _GLIBCXX_BEGIN_NAMESPACE_VERSION
>
> +#pragma GCC diagnostic push
> +#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr
> namespace __detail
> {
> template<typename _BiIter, typename _Alloc, typename _TraitsT,
> @@ -217,7 +219,7 @@ namespace __detail
> }
> else // Non-greedy mode
> {
> - if (__dfs_mode)
> + if constexpr (__dfs_mode)
> {
> // vice-versa.
> _M_dfs(__match_mode, __state._M_next);
> @@ -322,7 +324,7 @@ namespace __detail
>
> if (_M_current == _M_end)
> return;
> - if (__dfs_mode)
> + if constexpr (__dfs_mode)
> {
> if (__state._M_matches(*_M_current))
> {
> @@ -393,32 +395,35 @@ namespace __detail
> void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
> _M_handle_backref(_Match_mode __match_mode, _StateIdT __i)
> {
> - __glibcxx_assert(__dfs_mode);
> -
> - const auto& __state = _M_nfa[__i];
> - auto& __submatch = _M_cur_results[__state._M_backref_index];
> - if (!__submatch.matched)
> - return;
> - auto __last = _M_current;
> - for (auto __tmp = __submatch.first;
> - __last != _M_end && __tmp != __submatch.second;
> - ++__tmp)
> - ++__last;
> - if (_Backref_matcher<_BiIter, _TraitsT>(
> - _M_re.flags() & regex_constants::icase,
> - _M_re._M_automaton->_M_traits)._M_apply(
> - __submatch.first, __submatch.second, _M_current, __last))
> + if constexpr (__dfs_mode)
>
With the changes to _M_dfs and if constexpr there, couldn't this be changed
to just static_assert(__dfs_mode)? Or we hit the problem that expr in
static_assert
is not dependent?
> {
> - if (__last != _M_current)
> + const auto& __state = _M_nfa[__i];
> + auto& __submatch = _M_cur_results[__state._M_backref_index];
> + if (!__submatch.matched)
> + return;
> + auto __last = _M_current;
> + for (auto __tmp = __submatch.first;
> + __last != _M_end && __tmp != __submatch.second;
> + ++__tmp)
> + ++__last;
> + if (_Backref_matcher<_BiIter, _TraitsT>(
> + _M_re.flags() & regex_constants::icase,
> + _M_re._M_automaton->_M_traits)._M_apply(
> + __submatch.first, __submatch.second, _M_current,
> __last))
> {
> - auto __backup = _M_current;
> - _M_current = __last;
> - _M_dfs(__match_mode, __state._M_next);
> - _M_current = __backup;
> + if (__last != _M_current)
> + {
> + auto __backup = _M_current;
> + _M_current = __last;
> + _M_dfs(__match_mode, __state._M_next);
> + _M_current = __backup;
> + }
> + else
> + _M_dfs(__match_mode, __state._M_next);
> }
> - else
> - _M_dfs(__match_mode, __state._M_next);
> }
> + else
> + __builtin_unreachable();
> }
>
> template<typename _BiIter, typename _Alloc, typename _TraitsT,
> @@ -426,7 +431,7 @@ namespace __detail
> void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
> _M_handle_accept(_Match_mode __match_mode, _StateIdT)
> {
> - if _GLIBCXX17_CONSTEXPR (__dfs_mode)
> + if constexpr (__dfs_mode)
> {
> __glibcxx_assert(!_M_has_sol);
> if (__match_mode == _Match_mode::_Exact)
> @@ -529,7 +534,11 @@ namespace __detail
> case _S_opcode_match:
> _M_handle_match(__match_mode, __i); break;
> case _S_opcode_backref:
> - _M_handle_backref(__match_mode, __i); break;
> + if constexpr (__dfs_mode)
> + _M_handle_backref(__match_mode, __i);
> + else
> + __builtin_unreachable();
> + break;
> case _S_opcode_accept:
> _M_handle_accept(__match_mode, __i); break;
> case _S_opcode_alternative:
> @@ -564,6 +573,7 @@ namespace __detail
> return __left_is_word != __right_is_word;
> }
> } // namespace __detail
> +#pragma GCC diagnostic pop
>
> _GLIBCXX_END_NAMESPACE_VERSION
> } // namespace
> --
> 2.49.0
>
>