On Sat, Jun 11, 2016 at 5:01 AM, Jonathan Wakely wrote: > N.B. The "typename" and "::type" are redundant here, because it names > the same type as the integral_constant itself, and you could > use __bool_constant<__collate> instead: > > return _M_transform_impl(_M_translate(__ch), > __bool_constant<__collate>()); > > OK for trunk without the redundant typename ...::type, your choice > whether to use __bool_constant or not.
Thanks! I was looking at std::bool_constant but that's in C++17. __bool_constant is even better. :) > > Will this fix apply cleanly to the branches too? > For gcc6 yes; for gcc5 there needs more work. I guess it's OK for backporting to gcc6? Updated the patch according to the discussion in the libstdc++/71500 bug. -- Regards, Tim Shen
commit 46d269bcfeebc497af4b3dc427d857f10f0ab931 Author: Tim Shen <tims...@google.com> Date: Sat Jun 11 00:41:09 2016 -0700 2016-06-11 Tim Shen <tims...@google.com> PR libstdc++/71500 * include/bits/regex_compiler.h (_RegexTranslator<>::_M_transform): Take case into consideration when looking at range expression. * include/bits/regex_compiler.tcc (_Compiler<>::_M_insert_bracket_matcher, _Compiler<>::_M_expression_term): Re-define __last_char to buffer the last character, which may be flushed to _M_add_char or be used as the beginning of a range. * testsuite/28_regex/regression.cc: Add new testcase. diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index 410d61b..73c0af4 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -226,18 +226,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { if (__icase) return _M_traits.translate_nocase(__ch); - else if (__collate) - return _M_traits.translate(__ch); else - return __ch; + return _M_traits.translate(__ch); } _StrTransT _M_transform(_CharT __ch) const - { - return _M_transform_impl(__ch, typename integral_constant<bool, - __collate>::type()); - } + { return _M_transform_impl(__ch, __bool_constant<__collate>()); } private: _StrTransT @@ -247,7 +242,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _StrTransT _M_transform_impl(_CharT __ch, true_type) const { - _StrTransT __str = _StrTransT(1, _M_translate(__ch)); + _StrTransT __str = _StrTransT(1, __ch); return _M_traits.transform(__str.begin(), __str.end()); } @@ -433,6 +428,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION void _M_make_range(_CharT __l, _CharT __r) { + __l = _M_translator._M_translate(__l); + __r = _M_translator._M_translate(__r); if (__l > __r) __throw_regex_error(regex_constants::error_range, "Invalid range in bracket expression."); diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc index ff69e16..050435d 100644 --- a/libstdc++-v3/include/bits/regex_compiler.tcc +++ b/libstdc++-v3/include/bits/regex_compiler.tcc @@ -428,11 +428,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (!(_M_flags & regex_constants::ECMAScript)) if (_M_try_char()) { - __matcher._M_add_char(_M_value[0]); __last_char.first = true; __last_char.second = _M_value[0]; } while (_M_expression_term(__last_char, __matcher)); + if (__last_char.first) + __matcher._M_add_char(__last_char.second); + __matcher._M_ready(); _M_stack.push(_StateSeqT( *_M_nfa, @@ -449,8 +451,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (_M_match_token(_ScannerT::_S_token_bracket_end)) return false; + const auto __flush = [&] + { + if (__last_char.first) + { + __matcher._M_add_char(__last_char.second); + __last_char.first = false; + } + }; if (_M_match_token(_ScannerT::_S_token_collsymbol)) { + __flush(); auto __symbol = __matcher._M_add_collate_element(_M_value); if (__symbol.size() == 1) { @@ -459,9 +470,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } } else if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) - __matcher._M_add_equivalence_class(_M_value); + { + __flush(); + __matcher._M_add_equivalence_class(_M_value); + } else if (_M_match_token(_ScannerT::_S_token_char_class_name)) - __matcher._M_add_character_class(_M_value, false); + { + __flush(); + __matcher._M_add_character_class(_M_value, false); + } // POSIX doesn't allow '-' as a start-range char (say [a-z--0]), // except when the '-' is the first or last character in the bracket // expression ([--0]). ECMAScript treats all '-' after a range as a @@ -476,7 +493,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { if (!__last_char.first) { - __matcher._M_add_char(_M_value[0]); + __last_char.first = true; + __last_char.second = _M_value[0]; if (_M_value[0] == '-' && !(_M_flags & regex_constants::ECMAScript)) { @@ -488,8 +506,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION "a dash is not treated literally only when it is at " "beginning or end."); } - __last_char.first = true; - __last_char.second = _M_value[0]; } else { @@ -499,22 +515,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { __matcher._M_make_range(__last_char.second , _M_value[0]); __last_char.first = false; + return true; } - else - { - if (_M_scanner._M_get_token() - != _ScannerT::_S_token_bracket_end) - __throw_regex_error( - regex_constants::error_range, - "Unexpected end of bracket expression."); - __matcher._M_add_char(_M_value[0]); - } - } - else - { - __matcher._M_add_char(_M_value[0]); - __last_char.second = _M_value[0]; + if (_M_scanner._M_get_token() + != _ScannerT::_S_token_bracket_end) + __throw_regex_error( + regex_constants::error_range, + "Unexpected end of bracket expression."); } + __matcher._M_add_char(__last_char.second); + __last_char.second = _M_value[0]; } } else if (_M_match_token(_ScannerT::_S_token_quoted_class)) @@ -580,8 +590,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _BracketMatcher<_TraitsT, __icase, __collate>:: _M_apply(_CharT __ch, false_type) const { + __ch = _M_translator._M_translate(__ch); bool __ret = std::binary_search(_M_char_set.begin(), _M_char_set.end(), - _M_translator._M_translate(__ch)); + __ch); if (!__ret) { auto __s = _M_translator._M_transform(__ch); diff --git a/libstdc++-v3/testsuite/28_regex/regression.cc b/libstdc++-v3/testsuite/28_regex/regression.cc index d367c8b..45b9f93 100644 --- a/libstdc++-v3/testsuite/28_regex/regression.cc +++ b/libstdc++-v3/testsuite/28_regex/regression.cc @@ -61,12 +61,40 @@ test03() VERIFY(!regex_search_debug("a", regex(R"(\b$)"), regex_constants::match_not_eow)); } +// PR libstdc++/71500 +void +test04() +{ + bool test __attribute__((unused)) = true; + + { + regex re1("[A-F]+", regex::ECMAScript | regex::icase); + VERIFY(regex_match_debug("aaa", re1)); + VERIFY(regex_match_debug("AAA", re1)); + VERIFY(regex_match_debug("fff", re1)); + VERIFY(regex_match_debug("FFF", re1)); + } + { + bool caught = false; + try + { + (void)regex("[T-f]+", regex::ECMAScript | regex::icase); + } + catch (...) + { + caught = true; + } + VERIFY(caught); + } +} + int main() { test01(); test02(); test03(); + test04(); return 0; }