[PATCH] D42693: [libcxx] Handle invalid escaped characters in POSIX regex

2018-02-14 Thread Mikhail Maltsev via Phabricator via cfe-commits
miyuki added a comment.

ping


https://reviews.llvm.org/D42693



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D42693: [libcxx] Handle invalid escaped characters in POSIX regex

2018-02-01 Thread Mikhail Maltsev via Phabricator via cfe-commits
miyuki added inline comments.



Comment at: include/regex:3490
 {
 switch (*__temp)
 {

mclow.lists wrote:
> Do we need any more cases here?
Probably not, but we should throw an exception here as well.


https://reviews.llvm.org/D42693



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D42693: [libcxx] Handle invalid escaped characters in POSIX regex

2018-02-01 Thread Mikhail Maltsev via Phabricator via cfe-commits
miyuki updated this revision to Diff 132352.
miyuki edited the summary of this revision.
miyuki added a comment.

Updated __parse_QUOTED_CHAR_ERE. Added more test cases.


https://reviews.llvm.org/D42693

Files:
  include/regex
  test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp

Index: test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp
===
--- test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp
+++ test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp
@@ -19,11 +19,13 @@
 #include 
 #include "test_macros.h"
 
-static bool error_escape_thrown(const char *pat)
+static bool error_escape_thrown(const char *pat,
+std::regex_constants::syntax_option_type
+syntax = std::regex_constants::ECMAScript)
 {
 bool result = false;
 try {
-std::regex re(pat);
+std::regex re(pat, syntax);
 } catch (const std::regex_error ) {
 result = (ex.code() == std::regex_constants::error_escape);
 }
@@ -45,4 +47,58 @@
 assert(!error_escape_thrown("[\\cA]"));
 assert(!error_escape_thrown("\\cA"));
 
+const std::regex_constants::syntax_option_type basic =
+std::regex_constants::basic;
+
+assert(error_escape_thrown("\\a", basic));
+assert(error_escape_thrown("\\n", basic));
+assert(error_escape_thrown("\\t", basic));
+assert(error_escape_thrown("\\0", basic));
+assert(error_escape_thrown("\\/", basic));
+assert(error_escape_thrown("\\\n", basic));
+assert(error_escape_thrown("\\", basic));
+
+assert(!error_escape_thrown("[\\a]", basic));
+assert(!error_escape_thrown("\\(a\\)", basic));
+assert(!error_escape_thrown("\\(ab\\)\\1", basic));
+assert(!error_escape_thrown("a\\{1,2\\}", basic));
+assert(!error_escape_thrown("\\.", basic));
+assert(!error_escape_thrown("\\*", basic));
+
+const std::regex_constants::syntax_option_type extended =
+std::regex_constants::extended;
+
+assert(error_escape_thrown("\\a", extended));
+assert(error_escape_thrown("\\n", extended));
+assert(error_escape_thrown("\\t", extended));
+assert(error_escape_thrown("\\0", extended));
+assert(error_escape_thrown("(ab)\\1", extended));
+assert(error_escape_thrown("\\/", extended));
+assert(error_escape_thrown("\\\n", extended));
+assert(error_escape_thrown("\\", extended));
+
+assert(!error_escape_thrown("[\\a]", extended));
+assert(!error_escape_thrown("\\(a\\)", extended));
+assert(!error_escape_thrown("\\{\\}", extended));
+assert(!error_escape_thrown("\\.", extended));
+assert(!error_escape_thrown("\\*", extended));
+
+const std::regex_constants::syntax_option_type awk =
+std::regex_constants::awk;
+
+assert(error_escape_thrown("\\z", awk));
+assert(error_escape_thrown("[\\z]", awk));
+assert(error_escape_thrown("\\9", awk));
+assert(error_escape_thrown("\\\n", awk));
+assert(error_escape_thrown("\\", awk));
+
+assert(!error_escape_thrown("\\n", awk));
+assert(!error_escape_thrown("\\t", awk));
+assert(!error_escape_thrown("\\/", awk));
+assert(!error_escape_thrown("\\(a\\)", awk));
+assert(!error_escape_thrown("\\{\\}", awk));
+assert(!error_escape_thrown("\\0", awk));
+assert(!error_escape_thrown("\\1", awk));
+assert(!error_escape_thrown("\\.", awk));
+assert(!error_escape_thrown("\\*", awk));
 }
Index: include/regex
===
--- include/regex
+++ include/regex
@@ -3442,23 +3442,32 @@
 {
 if (__first != __last)
 {
-_ForwardIterator __temp = _VSTD::next(__first);
-if (__temp != __last)
+if (*__first == '\\')
 {
-if (*__first == '\\')
+_ForwardIterator __temp = _VSTD::next(__first);
+if (__temp == __last)
+__throw_regex_error();
+
+switch (*__temp)
 {
-switch (*__temp)
-{
-case '^':
-case '.':
-case '*':
-case '[':
-case '$':
-case '\\':
-__push_char(*__temp);
-__first = ++__temp;
+case '^':
+case '.':
+case '*':
+case '[':
+case '$':
+case '\\':
+__push_char(*__temp);
+__first = ++__temp;
+break;
+case '(':
+case ')':
+case '{':
+case '}':
+break;
+default:
+if (*__temp >= '1' && *__temp <= '9')
 break;
-}
+__throw_regex_error();
 }
 }
 }
@@ -3473,34 +3482,36 @@
 {
 if (__first != __last)
 {
-_ForwardIterator __temp = 

[PATCH] D42693: [libcxx] Handle invalid escaped characters in POSIX regex

2018-01-31 Thread Mikhail Maltsev via Phabricator via cfe-commits
miyuki added inline comments.



Comment at: include/regex:3465
+case '{':
+case '}':
+break;

Quuxplusone wrote:
> FWIW, I don't understand what's going on in this switch.
> Is it intentional that `'('` and `'|'` now take different paths here?
Yes. As far as I understand, only extended POSIX regular expressions support 
alternation: 
http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html, so 
"\\|" should be treated as error_escape



https://reviews.llvm.org/D42693



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D42693: [libcxx] Handle invalid escaped characters in POSIX regex

2018-01-30 Thread Arthur O'Dwyer via Phabricator via cfe-commits
Quuxplusone added inline comments.



Comment at: include/regex:3465
+case '{':
+case '}':
+break;

FWIW, I don't understand what's going on in this switch.
Is it intentional that `'('` and `'|'` now take different paths here?



Comment at: test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp:60
+assert(!error_escape_thrown("\\.", basic));
+assert(!error_escape_thrown("\\*", basic));
 }

I would think about adding test cases here to document the intended behavior of
- "\\n" and "\\t" which are valid of course;
- "\\\n" which could be a common typo and should probably throw;
- "\\/" which is common in Perl but maybe should throw anyway;
- "\\1" in a regex mode that doesn't support backreferences;
- "\\0".
If these are already covered elsewhere in the suite, then never mind me.


https://reviews.llvm.org/D42693



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D42693: [libcxx] Handle invalid escaped characters in POSIX regex

2018-01-30 Thread Marshall Clow via Phabricator via cfe-commits
mclow.lists added a comment.

I like this.  One nit and a question.




Comment at: include/regex:3490
 {
 switch (*__temp)
 {

Do we need any more cases here?



Comment at: test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp:50
 
+std::regex_constants::syntax_option_type basic =
+std::regex_constants::basic;

should be `const`.   (Yes, this is me being picky)


https://reviews.llvm.org/D42693



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D42693: [libcxx] Handle invalid escaped characters in POSIX regex

2018-01-30 Thread Mikhail Maltsev via Phabricator via cfe-commits
miyuki created this revision.
miyuki added reviewers: EricWF, mclow.lists.

Currently when parsing basic POSIX regular expressions libc++
silently skips invalid escaped characters and trailing escapes.
This patch changes the behavior, so that a std::regex_error with
code set to error_escape is thrown in these cases.


https://reviews.llvm.org/D42693

Files:
  include/regex
  test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp


Index: test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp
===
--- test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp
+++ test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp
@@ -19,11 +19,13 @@
 #include 
 #include "test_macros.h"
 
-static bool error_escape_thrown(const char *pat)
+static bool error_escape_thrown(const char *pat,
+std::regex_constants::syntax_option_type
+syntax = std::regex_constants::ECMAScript)
 {
 bool result = false;
 try {
-std::regex re(pat);
+std::regex re(pat, syntax);
 } catch (const std::regex_error ) {
 result = (ex.code() == std::regex_constants::error_escape);
 }
@@ -45,4 +47,15 @@
 assert(!error_escape_thrown("[\\cA]"));
 assert(!error_escape_thrown("\\cA"));
 
+std::regex_constants::syntax_option_type basic =
+std::regex_constants::basic;
+
+assert(error_escape_thrown("\\a", basic));
+assert(error_escape_thrown("\\", basic));
+
+assert(!error_escape_thrown("\\(a\\)", basic));
+assert(!error_escape_thrown("\\(a+\\)\\1", basic));
+assert(!error_escape_thrown("a\\{1,2\\}", basic));
+assert(!error_escape_thrown("\\.", basic));
+assert(!error_escape_thrown("\\*", basic));
 }
Index: include/regex
===
--- include/regex
+++ include/regex
@@ -3442,23 +3442,32 @@
 {
 if (__first != __last)
 {
-_ForwardIterator __temp = _VSTD::next(__first);
-if (__temp != __last)
+if (*__first == '\\')
 {
-if (*__first == '\\')
+_ForwardIterator __temp = _VSTD::next(__first);
+if (__temp == __last)
+__throw_regex_error();
+
+switch (*__temp)
 {
-switch (*__temp)
-{
-case '^':
-case '.':
-case '*':
-case '[':
-case '$':
-case '\\':
-__push_char(*__temp);
-__first = ++__temp;
+case '^':
+case '.':
+case '*':
+case '[':
+case '$':
+case '\\':
+__push_char(*__temp);
+__first = ++__temp;
+break;
+case '(':
+case ')':
+case '{':
+case '}':
+break;
+default:
+if (*__temp >= '1' && *__temp <= '9')
 break;
-}
+__throw_regex_error();
 }
 }
 }


Index: test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp
===
--- test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp
+++ test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp
@@ -19,11 +19,13 @@
 #include 
 #include "test_macros.h"
 
-static bool error_escape_thrown(const char *pat)
+static bool error_escape_thrown(const char *pat,
+std::regex_constants::syntax_option_type
+syntax = std::regex_constants::ECMAScript)
 {
 bool result = false;
 try {
-std::regex re(pat);
+std::regex re(pat, syntax);
 } catch (const std::regex_error ) {
 result = (ex.code() == std::regex_constants::error_escape);
 }
@@ -45,4 +47,15 @@
 assert(!error_escape_thrown("[\\cA]"));
 assert(!error_escape_thrown("\\cA"));
 
+std::regex_constants::syntax_option_type basic =
+std::regex_constants::basic;
+
+assert(error_escape_thrown("\\a", basic));
+assert(error_escape_thrown("\\", basic));
+
+assert(!error_escape_thrown("\\(a\\)", basic));
+assert(!error_escape_thrown("\\(a+\\)\\1", basic));
+assert(!error_escape_thrown("a\\{1,2\\}", basic));
+assert(!error_escape_thrown("\\.", basic));
+assert(!error_escape_thrown("\\*", basic));
 }
Index: include/regex
===
--- include/regex
+++ include/regex
@@ -3442,23 +3442,32 @@
 {
 if (__first != __last)
 {
-_ForwardIterator __temp = _VSTD::next(__first);
-if (__temp != __last)
+if (*__first == '\\')
 {
-if (*__first == '\\')
+_ForwardIterator __temp = _VSTD::next(__first);
+if (__temp