miyuki created this revision.
miyuki added reviewers: EricWF, mclow.lists.

Currently when parsing basic POSIX regular expressions libc++
silently skips invalid escaped characters and trailing escapes.
This patch changes the behavior, so that a std::regex_error with
code set to error_escape is thrown in these cases.


https://reviews.llvm.org/D42693

Files:
  include/regex
  test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp


Index: test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp
===================================================================
--- test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp
+++ test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp
@@ -19,11 +19,13 @@
 #include <cassert>
 #include "test_macros.h"
 
-static bool error_escape_thrown(const char *pat)
+static bool error_escape_thrown(const char *pat,
+                                std::regex_constants::syntax_option_type
+                                    syntax = std::regex_constants::ECMAScript)
 {
     bool result = false;
     try {
-        std::regex re(pat);
+        std::regex re(pat, syntax);
     } catch (const std::regex_error &ex) {
         result = (ex.code() == std::regex_constants::error_escape);
     }
@@ -45,4 +47,15 @@
     assert(!error_escape_thrown("[\\cA]"));
     assert(!error_escape_thrown("\\cA"));
 
+    std::regex_constants::syntax_option_type basic =
+        std::regex_constants::basic;
+
+    assert(error_escape_thrown("\\a", basic));
+    assert(error_escape_thrown("\\", basic));
+
+    assert(!error_escape_thrown("\\(a\\)", basic));
+    assert(!error_escape_thrown("\\(a+\\)\\1", basic));
+    assert(!error_escape_thrown("a\\{1,2\\}", basic));
+    assert(!error_escape_thrown("\\.", basic));
+    assert(!error_escape_thrown("\\*", basic));
 }
Index: include/regex
===================================================================
--- include/regex
+++ include/regex
@@ -3442,23 +3442,32 @@
 {
     if (__first != __last)
     {
-        _ForwardIterator __temp = _VSTD::next(__first);
-        if (__temp != __last)
+        if (*__first == '\\')
         {
-            if (*__first == '\\')
+            _ForwardIterator __temp = _VSTD::next(__first);
+            if (__temp == __last)
+                __throw_regex_error<regex_constants::error_escape>();
+
+            switch (*__temp)
             {
-                switch (*__temp)
-                {
-                case '^':
-                case '.':
-                case '*':
-                case '[':
-                case '$':
-                case '\\':
-                    __push_char(*__temp);
-                    __first = ++__temp;
+            case '^':
+            case '.':
+            case '*':
+            case '[':
+            case '$':
+            case '\\':
+                __push_char(*__temp);
+                __first = ++__temp;
+                break;
+            case '(':
+            case ')':
+            case '{':
+            case '}':
+                break;
+            default:
+                if (*__temp >= '1' && *__temp <= '9')
                     break;
-                }
+                __throw_regex_error<regex_constants::error_escape>();
             }
         }
     }


Index: test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp
===================================================================
--- test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp
+++ test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp
@@ -19,11 +19,13 @@
 #include <cassert>
 #include "test_macros.h"
 
-static bool error_escape_thrown(const char *pat)
+static bool error_escape_thrown(const char *pat,
+                                std::regex_constants::syntax_option_type
+                                    syntax = std::regex_constants::ECMAScript)
 {
     bool result = false;
     try {
-        std::regex re(pat);
+        std::regex re(pat, syntax);
     } catch (const std::regex_error &ex) {
         result = (ex.code() == std::regex_constants::error_escape);
     }
@@ -45,4 +47,15 @@
     assert(!error_escape_thrown("[\\cA]"));
     assert(!error_escape_thrown("\\cA"));
 
+    std::regex_constants::syntax_option_type basic =
+        std::regex_constants::basic;
+
+    assert(error_escape_thrown("\\a", basic));
+    assert(error_escape_thrown("\\", basic));
+
+    assert(!error_escape_thrown("\\(a\\)", basic));
+    assert(!error_escape_thrown("\\(a+\\)\\1", basic));
+    assert(!error_escape_thrown("a\\{1,2\\}", basic));
+    assert(!error_escape_thrown("\\.", basic));
+    assert(!error_escape_thrown("\\*", basic));
 }
Index: include/regex
===================================================================
--- include/regex
+++ include/regex
@@ -3442,23 +3442,32 @@
 {
     if (__first != __last)
     {
-        _ForwardIterator __temp = _VSTD::next(__first);
-        if (__temp != __last)
+        if (*__first == '\\')
         {
-            if (*__first == '\\')
+            _ForwardIterator __temp = _VSTD::next(__first);
+            if (__temp == __last)
+                __throw_regex_error<regex_constants::error_escape>();
+
+            switch (*__temp)
             {
-                switch (*__temp)
-                {
-                case '^':
-                case '.':
-                case '*':
-                case '[':
-                case '$':
-                case '\\':
-                    __push_char(*__temp);
-                    __first = ++__temp;
+            case '^':
+            case '.':
+            case '*':
+            case '[':
+            case '$':
+            case '\\':
+                __push_char(*__temp);
+                __first = ++__temp;
+                break;
+            case '(':
+            case ')':
+            case '{':
+            case '}':
+                break;
+            default:
+                if (*__temp >= '1' && *__temp <= '9')
                     break;
-                }
+                __throw_regex_error<regex_constants::error_escape>();
             }
         }
     }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to