[PATCH] D42693: [libcxx] Handle invalid escaped characters in POSIX regex
miyuki added a comment. ping https://reviews.llvm.org/D42693 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D42693: [libcxx] Handle invalid escaped characters in POSIX regex
miyuki added inline comments. Comment at: include/regex:3490 { switch (*__temp) { mclow.lists wrote: > Do we need any more cases here? Probably not, but we should throw an exception here as well. https://reviews.llvm.org/D42693 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D42693: [libcxx] Handle invalid escaped characters in POSIX regex
miyuki updated this revision to Diff 132352. miyuki edited the summary of this revision. miyuki added a comment. Updated __parse_QUOTED_CHAR_ERE. Added more test cases. https://reviews.llvm.org/D42693 Files: include/regex test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp Index: test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp === --- test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp +++ test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp @@ -19,11 +19,13 @@ #include #include "test_macros.h" -static bool error_escape_thrown(const char *pat) +static bool error_escape_thrown(const char *pat, +std::regex_constants::syntax_option_type +syntax = std::regex_constants::ECMAScript) { bool result = false; try { -std::regex re(pat); +std::regex re(pat, syntax); } catch (const std::regex_error ) { result = (ex.code() == std::regex_constants::error_escape); } @@ -45,4 +47,58 @@ assert(!error_escape_thrown("[\\cA]")); assert(!error_escape_thrown("\\cA")); +const std::regex_constants::syntax_option_type basic = +std::regex_constants::basic; + +assert(error_escape_thrown("\\a", basic)); +assert(error_escape_thrown("\\n", basic)); +assert(error_escape_thrown("\\t", basic)); +assert(error_escape_thrown("\\0", basic)); +assert(error_escape_thrown("\\/", basic)); +assert(error_escape_thrown("\\\n", basic)); +assert(error_escape_thrown("\\", basic)); + +assert(!error_escape_thrown("[\\a]", basic)); +assert(!error_escape_thrown("\\(a\\)", basic)); +assert(!error_escape_thrown("\\(ab\\)\\1", basic)); +assert(!error_escape_thrown("a\\{1,2\\}", basic)); +assert(!error_escape_thrown("\\.", basic)); +assert(!error_escape_thrown("\\*", basic)); + +const std::regex_constants::syntax_option_type extended = +std::regex_constants::extended; + +assert(error_escape_thrown("\\a", extended)); +assert(error_escape_thrown("\\n", extended)); +assert(error_escape_thrown("\\t", extended)); +assert(error_escape_thrown("\\0", extended)); +assert(error_escape_thrown("(ab)\\1", extended)); +assert(error_escape_thrown("\\/", extended)); +assert(error_escape_thrown("\\\n", extended)); +assert(error_escape_thrown("\\", extended)); + +assert(!error_escape_thrown("[\\a]", extended)); +assert(!error_escape_thrown("\\(a\\)", extended)); +assert(!error_escape_thrown("\\{\\}", extended)); +assert(!error_escape_thrown("\\.", extended)); +assert(!error_escape_thrown("\\*", extended)); + +const std::regex_constants::syntax_option_type awk = +std::regex_constants::awk; + +assert(error_escape_thrown("\\z", awk)); +assert(error_escape_thrown("[\\z]", awk)); +assert(error_escape_thrown("\\9", awk)); +assert(error_escape_thrown("\\\n", awk)); +assert(error_escape_thrown("\\", awk)); + +assert(!error_escape_thrown("\\n", awk)); +assert(!error_escape_thrown("\\t", awk)); +assert(!error_escape_thrown("\\/", awk)); +assert(!error_escape_thrown("\\(a\\)", awk)); +assert(!error_escape_thrown("\\{\\}", awk)); +assert(!error_escape_thrown("\\0", awk)); +assert(!error_escape_thrown("\\1", awk)); +assert(!error_escape_thrown("\\.", awk)); +assert(!error_escape_thrown("\\*", awk)); } Index: include/regex === --- include/regex +++ include/regex @@ -3442,23 +3442,32 @@ { if (__first != __last) { -_ForwardIterator __temp = _VSTD::next(__first); -if (__temp != __last) +if (*__first == '\\') { -if (*__first == '\\') +_ForwardIterator __temp = _VSTD::next(__first); +if (__temp == __last) +__throw_regex_error(); + +switch (*__temp) { -switch (*__temp) -{ -case '^': -case '.': -case '*': -case '[': -case '$': -case '\\': -__push_char(*__temp); -__first = ++__temp; +case '^': +case '.': +case '*': +case '[': +case '$': +case '\\': +__push_char(*__temp); +__first = ++__temp; +break; +case '(': +case ')': +case '{': +case '}': +break; +default: +if (*__temp >= '1' && *__temp <= '9') break; -} +__throw_regex_error(); } } } @@ -3473,34 +3482,36 @@ { if (__first != __last) { -_ForwardIterator __temp =
[PATCH] D42693: [libcxx] Handle invalid escaped characters in POSIX regex
miyuki added inline comments. Comment at: include/regex:3465 +case '{': +case '}': +break; Quuxplusone wrote: > FWIW, I don't understand what's going on in this switch. > Is it intentional that `'('` and `'|'` now take different paths here? Yes. As far as I understand, only extended POSIX regular expressions support alternation: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html, so "\\|" should be treated as error_escape https://reviews.llvm.org/D42693 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D42693: [libcxx] Handle invalid escaped characters in POSIX regex
Quuxplusone added inline comments. Comment at: include/regex:3465 +case '{': +case '}': +break; FWIW, I don't understand what's going on in this switch. Is it intentional that `'('` and `'|'` now take different paths here? Comment at: test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp:60 +assert(!error_escape_thrown("\\.", basic)); +assert(!error_escape_thrown("\\*", basic)); } I would think about adding test cases here to document the intended behavior of - "\\n" and "\\t" which are valid of course; - "\\\n" which could be a common typo and should probably throw; - "\\/" which is common in Perl but maybe should throw anyway; - "\\1" in a regex mode that doesn't support backreferences; - "\\0". If these are already covered elsewhere in the suite, then never mind me. https://reviews.llvm.org/D42693 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D42693: [libcxx] Handle invalid escaped characters in POSIX regex
mclow.lists added a comment. I like this. One nit and a question. Comment at: include/regex:3490 { switch (*__temp) { Do we need any more cases here? Comment at: test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp:50 +std::regex_constants::syntax_option_type basic = +std::regex_constants::basic; should be `const`. (Yes, this is me being picky) https://reviews.llvm.org/D42693 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D42693: [libcxx] Handle invalid escaped characters in POSIX regex
miyuki created this revision. miyuki added reviewers: EricWF, mclow.lists. Currently when parsing basic POSIX regular expressions libc++ silently skips invalid escaped characters and trailing escapes. This patch changes the behavior, so that a std::regex_error with code set to error_escape is thrown in these cases. https://reviews.llvm.org/D42693 Files: include/regex test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp Index: test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp === --- test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp +++ test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp @@ -19,11 +19,13 @@ #include #include "test_macros.h" -static bool error_escape_thrown(const char *pat) +static bool error_escape_thrown(const char *pat, +std::regex_constants::syntax_option_type +syntax = std::regex_constants::ECMAScript) { bool result = false; try { -std::regex re(pat); +std::regex re(pat, syntax); } catch (const std::regex_error ) { result = (ex.code() == std::regex_constants::error_escape); } @@ -45,4 +47,15 @@ assert(!error_escape_thrown("[\\cA]")); assert(!error_escape_thrown("\\cA")); +std::regex_constants::syntax_option_type basic = +std::regex_constants::basic; + +assert(error_escape_thrown("\\a", basic)); +assert(error_escape_thrown("\\", basic)); + +assert(!error_escape_thrown("\\(a\\)", basic)); +assert(!error_escape_thrown("\\(a+\\)\\1", basic)); +assert(!error_escape_thrown("a\\{1,2\\}", basic)); +assert(!error_escape_thrown("\\.", basic)); +assert(!error_escape_thrown("\\*", basic)); } Index: include/regex === --- include/regex +++ include/regex @@ -3442,23 +3442,32 @@ { if (__first != __last) { -_ForwardIterator __temp = _VSTD::next(__first); -if (__temp != __last) +if (*__first == '\\') { -if (*__first == '\\') +_ForwardIterator __temp = _VSTD::next(__first); +if (__temp == __last) +__throw_regex_error(); + +switch (*__temp) { -switch (*__temp) -{ -case '^': -case '.': -case '*': -case '[': -case '$': -case '\\': -__push_char(*__temp); -__first = ++__temp; +case '^': +case '.': +case '*': +case '[': +case '$': +case '\\': +__push_char(*__temp); +__first = ++__temp; +break; +case '(': +case ')': +case '{': +case '}': +break; +default: +if (*__temp >= '1' && *__temp <= '9') break; -} +__throw_regex_error(); } } } Index: test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp === --- test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp +++ test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp @@ -19,11 +19,13 @@ #include #include "test_macros.h" -static bool error_escape_thrown(const char *pat) +static bool error_escape_thrown(const char *pat, +std::regex_constants::syntax_option_type +syntax = std::regex_constants::ECMAScript) { bool result = false; try { -std::regex re(pat); +std::regex re(pat, syntax); } catch (const std::regex_error ) { result = (ex.code() == std::regex_constants::error_escape); } @@ -45,4 +47,15 @@ assert(!error_escape_thrown("[\\cA]")); assert(!error_escape_thrown("\\cA")); +std::regex_constants::syntax_option_type basic = +std::regex_constants::basic; + +assert(error_escape_thrown("\\a", basic)); +assert(error_escape_thrown("\\", basic)); + +assert(!error_escape_thrown("\\(a\\)", basic)); +assert(!error_escape_thrown("\\(a+\\)\\1", basic)); +assert(!error_escape_thrown("a\\{1,2\\}", basic)); +assert(!error_escape_thrown("\\.", basic)); +assert(!error_escape_thrown("\\*", basic)); } Index: include/regex === --- include/regex +++ include/regex @@ -3442,23 +3442,32 @@ { if (__first != __last) { -_ForwardIterator __temp = _VSTD::next(__first); -if (__temp != __last) +if (*__first == '\\') { -if (*__first == '\\') +_ForwardIterator __temp = _VSTD::next(__first); +if (__temp