Serhiy Storchaka added the comment:

> Sounds ok, but it would be nice to add some tests.

Thank you. Here is a patch with added test.

----------
Added file: http://bugs.python.org/file36836/re_error_attrs2.patch

_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue22578>
_______________________________________
diff -r f21f0de30544 Doc/library/re.rst
--- a/Doc/library/re.rst        Wed Oct 08 13:15:36 2014 +0300
+++ b/Doc/library/re.rst        Wed Oct 08 17:19:56 2014 +0300
@@ -726,13 +726,36 @@ form.
    Clear the regular expression cache.
 
 
-.. exception:: error
+.. exception:: error(msg, pattern=None, pos=None)
 
    Exception raised when a string passed to one of the functions here is not a
    valid regular expression (for example, it might contain unmatched 
parentheses)
    or when some other error occurs during compilation or matching.  It is 
never an
-   error if a string contains no match for a pattern.
+   error if a string contains no match for a pattern.  The error instance has
+   the following additional attributes:
 
+   .. attribute:: msg
+
+      The unformatted error message
+
+   .. attribute:: pattern
+
+      The regular expression pattern.
+
+   .. attribute:: pos
+
+      The index of *pattern* where compilation failed
+
+   .. attribute:: lineno
+
+      The line corresponding to *pos*
+
+   .. attribute:: colno
+
+      The column corresponding to *pos*
+
+   .. versionchanged:: 3.5
+      Added additional attributes.
 
 .. _re-objects:
 
diff -r f21f0de30544 Lib/sre_constants.py
--- a/Lib/sre_constants.py      Wed Oct 08 13:15:36 2014 +0300
+++ b/Lib/sre_constants.py      Wed Oct 08 17:19:56 2014 +0300
@@ -21,8 +21,37 @@ from _sre import MAXREPEAT, MAXGROUPS
 # should this really be here?
 
 class error(Exception):
-    pass
+    def __init__(self, msg, pattern=None, pos=None):
+        self.msg = msg
+        self.pattern = pattern
+        self.pos = pos
+        if pattern is not None and pos is not None:
+            msg = '%s at position %d' % (msg, pos)
+            if isinstance(pattern, str):
+                newline = '\n'
+            else:
+                newline = b'\n'
+            self.lineno = pattern.count(newline, 0, pos) + 1
+            if self.lineno == 1:
+                self.colno = pos + 1
+            else:
+                self.colno = pos - pattern.rindex(newline, 0, pos)
+                msg = '%s (line %d, column %d)' % (msg, self.lineno, 
self.colno)
+        else:
+            self.lineno = self.colno = None
+        super().__init__(msg)
 
+def linecol(doc, pos):
+    if isinstance(pattern, str):
+        newline = '\n'
+    else:
+        newline = b'\n'
+    lineno = pattern.count(newline, 0, pos) + 1
+    if lineno == 1:
+        colno = pos + 1
+    else:
+        colno = pos - doc.rindex(newline, 0, pos)
+    return lineno, colno
 # operators
 
 FAILURE = "failure"
diff -r f21f0de30544 Lib/sre_parse.py
--- a/Lib/sre_parse.py  Wed Oct 08 13:15:36 2014 +0300
+++ b/Lib/sre_parse.py  Wed Oct 08 17:19:56 2014 +0300
@@ -207,7 +207,8 @@ class Tokenizer:
             try:
                 c = self.string[self.index + 1]
             except IndexError:
-                raise error("bogus escape (end of line)")
+                self.next = None
+                raise self.error("bogus escape (end of line)", 0)
             if not self.istext:
                 c = chr(c)
             char = char + c
@@ -233,9 +234,13 @@ class Tokenizer:
             self.__next()
         return result
     def tell(self):
-        return self.index, self.next
+        return self.index - len(self.next or '')
     def seek(self, index):
-        self.index, self.next = index
+        self.index = index
+        self.__next()
+
+    def error(self, msg, offset):
+        return error(msg, self.string, self.tell() - offset)
 
 # The following three functions are not used in this module anymore, but we 
keep
 # them here (with DeprecationWarnings) for backwards compatibility.
@@ -299,8 +304,8 @@ def _class_escape(source, escape):
             escape += source.getwhile(2, OCTDIGITS)
             c = int(escape[1:], 8)
             if c > 0o377:
-                raise error('octal escape value %r outside of '
-                            'range 0-0o377' % escape)
+                raise source.error('octal escape value %r outside of '
+                                   'range 0-0o377' % escape, len(escape))
             return LITERAL, c
         elif c in DIGITS:
             raise ValueError
@@ -308,7 +313,7 @@ def _class_escape(source, escape):
             return LITERAL, ord(escape[1])
     except ValueError:
         pass
-    raise error("bogus escape: %s" % repr(escape))
+    raise source.error("bogus escape: %s" % repr(escape), len(escape))
 
 def _escape(source, escape, state):
     # handle escape code in expression
@@ -354,21 +359,23 @@ def _escape(source, escape, state):
                     escape = escape + source.get()
                     c = int(escape[1:], 8)
                     if c > 0o377:
-                        raise error('octal escape value %r outside of '
-                                    'range 0-0o377' % escape)
+                        raise source.error('octal escape value %r outside of '
+                                           'range 0-0o377' % escape,
+                                           len(escape))
                     return LITERAL, c
             # not an octal escape, so this is a group reference
             group = int(escape[1:])
             if group < state.groups:
                 if not state.checkgroup(group):
-                    raise error("cannot refer to open group")
+                    raise source.error("cannot refer to open group",
+                                       len(escape))
                 return GROUPREF, group
             raise ValueError
         if len(escape) == 2:
             return LITERAL, ord(escape[1])
     except ValueError:
         pass
-    raise error("bogus escape: %s" % repr(escape))
+    raise source.error("bogus escape: %s" % repr(escape), len(escape))
 
 def _parse_sub(source, state, nested=1):
     # parse an alternation: a|b|c
@@ -385,7 +392,7 @@ def _parse_sub(source, state, nested=1):
         if not source.next or sourcematch(")", 0):
             break
         else:
-            raise error("pattern not properly closed")
+            raise source.error("pattern not properly closed", 0)
 
     if len(items) == 1:
         return items[0]
@@ -434,11 +441,12 @@ def _parse_sub_cond(source, state, condg
     if source.match("|"):
         item_no = _parse(source, state)
         if source.match("|"):
-            raise error("conditional backref with more than two branches")
+            raise source.error("conditional backref with more than two 
branches",
+                               1)
     else:
         item_no = None
     if source.next and not source.match(")", 0):
-        raise error("pattern not properly closed")
+        raise source.error("pattern not properly closed", 0)
     subpattern = SubPattern(state)
     subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
     return subpattern
@@ -503,7 +511,7 @@ def _parse(source, state):
                 elif this:
                     code1 = LITERAL, ord(this)
                 else:
-                    raise error("unexpected end of regular expression")
+                    raise source.error("unexpected end of regular expression", 
0)
                 if sourcematch("-"):
                     # potential range
                     this = sourceget()
@@ -519,14 +527,14 @@ def _parse(source, state):
                         else:
                             code2 = LITERAL, ord(this)
                         if code1[0] != LITERAL or code2[0] != LITERAL:
-                            raise error("bad character range")
+                            raise source.error("bad character range", 
len(this))
                         lo = code1[1]
                         hi = code2[1]
                         if hi < lo:
-                            raise error("bad character range")
+                            raise source.error("bad character range", 
len(this))
                         setappend((RANGE, (lo, hi)))
                     else:
-                        raise error("unexpected end of regular expression")
+                        raise source.error("unexpected end of regular 
expression", 0)
                 else:
                     if code1[0] is IN:
                         code1 = code1[1][0]
@@ -543,6 +551,7 @@ def _parse(source, state):
 
         elif this and this[0] in REPEAT_CHARS:
             # repeat previous item
+            here = source.tell()
             if this == "?":
                 min, max = 0, 1
             elif this == "*":
@@ -554,7 +563,6 @@ def _parse(source, state):
                 if source.next == "}":
                     subpatternappend((LITERAL, ord(this)))
                     continue
-                here = source.tell()
                 min, max = 0, MAXREPEAT
                 lo = hi = ""
                 while source.next in DIGITS:
@@ -577,18 +585,21 @@ def _parse(source, state):
                     if max >= MAXREPEAT:
                         raise OverflowError("the repetition number is too 
large")
                     if max < min:
-                        raise error("bad repeat interval")
+                        raise source.error("bad repeat interval",
+                                           source.tell() - here)
             else:
-                raise error("not supported")
+                raise source.error("not supported", len(this))
             # figure out which item to repeat
             if subpattern:
                 item = subpattern[-1:]
             else:
                 item = None
             if not item or (_len(item) == 1 and item[0][0] == AT):
-                raise error("nothing to repeat")
+                raise source.error("nothing to repeat",
+                                   source.tell() - here + len(this))
             if item[0][0] in REPEATCODES:
-                raise error("multiple repeat")
+                raise source.error("multiple repeat",
+                                   source.tell() - here + len(this))
             if sourcematch("?"):
                 subpattern[-1] = (MIN_REPEAT, (min, max, item))
             else:
@@ -612,41 +623,45 @@ def _parse(source, state):
                         while 1:
                             char = sourceget()
                             if char is None:
-                                raise error("unterminated name")
+                                raise source.error("unterminated name", 0)
                             if char == ">":
                                 break
                             name = name + char
                         group = 1
                         if not name:
-                            raise error("missing group name")
+                            raise source.error("missing group name", 1)
                         if not name.isidentifier():
-                            raise error("bad character in group name %r" % 
name)
+                            raise source.error("bad character in group name "
+                                               "%r" % name,
+                                               len(name) + 1)
                     elif sourcematch("="):
                         # named backreference
                         name = ""
                         while 1:
                             char = sourceget()
                             if char is None:
-                                raise error("unterminated name")
+                                raise source.error("unterminated name", 0)
                             if char == ")":
                                 break
                             name = name + char
                         if not name:
-                            raise error("missing group name")
+                            raise source.error("missing group name", 1)
                         if not name.isidentifier():
-                            raise error("bad character in backref group name "
-                                        "%r" % name)
+                            raise source.error("bad character in backref "
+                                               "group name %r" % name,
+                                               len(name) + 1)
                         gid = state.groupdict.get(name)
                         if gid is None:
                             msg = "unknown group name: {0!r}".format(name)
-                            raise error(msg)
+                            raise source.error(msg, len(name) + 1)
                         subpatternappend((GROUPREF, gid))
                         continue
                     else:
                         char = sourceget()
                         if char is None:
-                            raise error("unexpected end of pattern")
-                        raise error("unknown specifier: ?P%s" % char)
+                            raise source.error("unexpected end of pattern", 0)
+                        raise source.error("unknown specifier: ?P%s" % char,
+                                           len(char))
                 elif sourcematch(":"):
                     # non-capturing group
                     group = 2
@@ -657,7 +672,7 @@ def _parse(source, state):
                             break
                         sourceget()
                     if not sourcematch(")"):
-                        raise error("unbalanced parenthesis")
+                        raise source.error("unbalanced parenthesis", 0)
                     continue
                 elif source.next in ASSERTCHARS:
                     # lookahead assertions
@@ -665,12 +680,12 @@ def _parse(source, state):
                     dir = 1
                     if char == "<":
                         if source.next not in LOOKBEHINDASSERTCHARS:
-                            raise error("syntax error")
+                            raise source.error("syntax error", 0)
                         dir = -1 # lookbehind
                         char = sourceget()
                     p = _parse_sub(source, state)
                     if not sourcematch(")"):
-                        raise error("unbalanced parenthesis")
+                        raise source.error("unbalanced parenthesis", 0)
                     if char == "=":
                         subpatternappend((ASSERT, (dir, p)))
                     else:
@@ -682,33 +697,36 @@ def _parse(source, state):
                     while 1:
                         char = sourceget()
                         if char is None:
-                            raise error("unterminated name")
+                            raise source.error("unterminated name", 0)
                         if char == ")":
                             break
                         condname = condname + char
                     group = 2
                     if not condname:
-                        raise error("missing group name")
+                        raise source.error("missing group name", 1)
                     if condname.isidentifier():
                         condgroup = state.groupdict.get(condname)
                         if condgroup is None:
                             msg = "unknown group name: {0!r}".format(condname)
-                            raise error(msg)
+                            raise source.error(msg, len(condname) + 1)
                     else:
                         try:
                             condgroup = int(condname)
                             if condgroup < 0:
                                 raise ValueError
                         except ValueError:
-                            raise error("bad character in group name")
+                            raise source.error("bad character in group name",
+                                               len(condname) + 1)
                         if not condgroup:
-                            raise error("bad group number")
+                            raise source.error("bad group number",
+                                               len(condname) + 1)
                         if condgroup >= MAXGROUPS:
-                            raise error("the group number is too large")
+                            raise source.error("the group number is too large",
+                                               len(condname) + 1)
                 else:
                     # flags
                     if not source.next in FLAGS:
-                        raise error("unexpected end of pattern")
+                        raise source.error("unexpected end of pattern", 0)
                     while source.next in FLAGS:
                         state.flags = state.flags | FLAGS[sourceget()]
             if group:
@@ -717,13 +735,16 @@ def _parse(source, state):
                     # anonymous group
                     group = None
                 else:
-                    group = state.opengroup(name)
+                    try:
+                        group = state.opengroup(name)
+                    except error as err:
+                        raise source.error(err.msg, len(name) + 1)
                 if condgroup:
                     p = _parse_sub_cond(source, state, condgroup)
                 else:
                     p = _parse_sub(source, state)
                 if not sourcematch(")"):
-                    raise error("unbalanced parenthesis")
+                    raise source.error("unbalanced parenthesis", 0)
                 if group is not None:
                     state.closegroup(group)
                 subpatternappend((SUBPATTERN, (group, p)))
@@ -731,10 +752,10 @@ def _parse(source, state):
                 while 1:
                     char = sourceget()
                     if char is None:
-                        raise error("unexpected end of pattern")
+                        raise source.error("unexpected end of pattern", 0)
                     if char == ")":
                         break
-                    raise error("unknown extension")
+                    raise source.error("unknown extension", len(char))
 
         elif this == "^":
             subpatternappend((AT, AT_BEGINNING))
@@ -747,7 +768,7 @@ def _parse(source, state):
             subpatternappend(code)
 
         else:
-            raise error("parser error")
+            raise source.error("parser error", len(this))
 
     return subpattern
 
@@ -778,9 +799,10 @@ def parse(str, flags=0, pattern=None):
 
     tail = source.get()
     if tail == ")":
-        raise error("unbalanced parenthesis")
+        raise source.error("unbalanced parenthesis", 1)
     elif tail:
-        raise error("bogus characters at end of regular expression")
+        raise source.error("bogus characters at end of regular expression",
+                           len(tail))
 
     if flags & SRE_FLAG_DEBUG:
         p.dump()
@@ -820,21 +842,23 @@ def parse_template(source, pattern):
                     while True:
                         char = sget()
                         if char is None:
-                            raise error("unterminated group name")
+                            raise s.error("unterminated group name", 0)
                         if char == ">":
                             break
                         name += char
                 if not name:
-                    raise error("missing group name")
+                    raise s.error("missing group name", 1)
                 try:
                     index = int(name)
                     if index < 0:
-                        raise error("negative group number")
+                        raise s.error("negative group number", len(name) + 1)
                     if index >= MAXGROUPS:
-                        raise error("the group number is too large")
+                        raise s.error("the group number is too large",
+                                      len(name) + 1)
                 except ValueError:
                     if not name.isidentifier():
-                        raise error("bad character in group name")
+                        raise s.error("bad character in group name",
+                                      len(name) + 1)
                     try:
                         index = pattern.groupindex[name]
                     except KeyError:
@@ -857,8 +881,8 @@ def parse_template(source, pattern):
                         isoctal = True
                         c = int(this[1:], 8)
                         if c > 0o377:
-                            raise error('octal escape value %r outside of '
-                                        'range 0-0o377' % this)
+                            raise s.error('octal escape value %r outside of '
+                                          'range 0-0o377' % this, len(this))
                         lappend(chr(c))
                 if not isoctal:
                     addgroup(int(this[1:]))
diff -r f21f0de30544 Lib/test/test_re.py
--- a/Lib/test/test_re.py       Wed Oct 08 13:15:36 2014 +0300
+++ b/Lib/test/test_re.py       Wed Oct 08 17:19:56 2014 +0300
@@ -1270,6 +1270,40 @@ subpattern None
         # with ignore case.
         self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3))
 
+    def test_error(self):
+        with self.assertRaises(re.error) as cm:
+            re.compile('(\u20ac))')
+        err = cm.exception
+        self.assertIsInstance(err.pattern, str)
+        self.assertEqual(err.pattern, '(\u20ac))')
+        self.assertEqual(err.pos, 3)
+        self.assertEqual(err.lineno, 1)
+        self.assertEqual(err.colno, 4)
+        self.assertIn(err.msg, str(err))
+        self.assertIn(' at position 3', str(err))
+        self.assertNotIn(' at position 3', err.msg)
+        with self.assertRaises(re.error) as cm:
+            re.compile(b'(\xa4))')
+        err = cm.exception
+        self.assertIsInstance(err.pattern, bytes)
+        self.assertEqual(err.pattern, b'(\xa4))')
+        self.assertEqual(err.pos, 3)
+        with self.assertRaises(re.error) as cm:
+            re.compile("""
+                (
+                    abc
+                )
+                )
+                (
+                """, re.VERBOSE)
+        err = cm.exception
+        self.assertEqual(err.pos, 77)
+        self.assertEqual(err.lineno, 5)
+        self.assertEqual(err.colno, 17)
+        self.assertIn(err.msg, str(err))
+        self.assertIn(' at position 77', str(err))
+        self.assertIn('(line 5, column 17)', str(err))
+
 
 class PatternReprTests(unittest.TestCase):
     def check(self, pattern, expected):
_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to