[pypy-commit] pypy default: merge pyparser-improvements-2

cfbolz Tue, 10 Apr 2018 01:45:00 -0700

Author: Carl Friedrich Bolz-Tereick <cfb...@gmx.de>
Branch: 
Changeset: r94294:e66f24650daf
Date: 2018-04-10 09:41 +0200
http://bitbucket.org/pypy/pypy/changeset/e66f24650daf/


Log:    merge pyparser-improvements-2

        - fixes .offset values of SyntaxError, which is 1-based (but the
        raising code sometimes assumed it was 0-based)

        - expand some abbreviations

        - better error messages for non-matching parenthesis

diff --git a/lib-python/2.7/test/test_eof.py b/lib-python/2.7/test/test_eof.py
--- a/lib-python/2.7/test/test_eof.py
+++ b/lib-python/2.7/test/test_eof.py
@@ -5,7 +5,7 @@
 
 class EOFTestCase(unittest.TestCase):
     def test_EOFC(self):
-        expect = "EOL while scanning string literal (<string>, line 1)"
+        expect = "end of line (EOL) while scanning string literal (<string>, 
line 1)"
         try:
             eval("""'this is a test\
             """)
@@ -15,7 +15,7 @@
             raise test_support.TestFailed
 
     def test_EOFS(self):
-        expect = ("EOF while scanning triple-quoted string literal "
+        expect = ("end of file (EOF) while scanning triple-quoted string 
literal "
                   "(<string>, line 1)")
         try:
             eval("""'''this is a test""")
diff --git a/lib-python/2.7/test/test_traceback.py 
b/lib-python/2.7/test/test_traceback.py
--- a/lib-python/2.7/test/test_traceback.py
+++ b/lib-python/2.7/test/test_traceback.py
@@ -123,10 +123,7 @@
         self.assertEqual(len(err), 4)
         self.assertEqual(err[1].strip(), "print(2)")
         self.assertIn("^", err[2])
-        if check_impl_detail():
-            self.assertEqual(err[1].find("p"), err[2].find("^"))
-        if check_impl_detail(pypy=True):
-            self.assertEqual(err[1].find("2)") + 1, err[2].find("^"))
+        self.assertEqual(err[1].find("p"), err[2].find("^"))
 
     def test_base_exception(self):
         # Test that exceptions derived from BaseException are formatted right
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -10,3 +10,9 @@
 Fix for python-level classes that inherit from C-API types, previously the
 `w_obj` was not necessarily preserved throughout the lifetime of the `pyobj`
 which led to cases where instance attributes were lost. Fixes issue #2793
+
+
+.. branch: pyparser-improvements-2
+
+Improve line offsets that are reported by SyntaxError. Improve error messages
+for a few situations, including mismatched parenthesis.
diff --git a/pypy/interpreter/pyparser/error.py 
b/pypy/interpreter/pyparser/error.py
--- a/pypy/interpreter/pyparser/error.py
+++ b/pypy/interpreter/pyparser/error.py
@@ -6,6 +6,7 @@
                  lastlineno=0):
         self.msg = msg
         self.lineno = lineno
+        # NB: offset is a 1-based index!
         self.offset = offset
         self.text = text
         self.filename = filename
diff --git a/pypy/interpreter/pyparser/parser.py 
b/pypy/interpreter/pyparser/parser.py
--- a/pypy/interpreter/pyparser/parser.py
+++ b/pypy/interpreter/pyparser/parser.py
@@ -199,6 +199,7 @@
         self.token_type = token_type
         self.value = value
         self.lineno = lineno
+        # this is a 0-based index
         self.column = column
         self.line = line
         self.expected = expected
diff --git a/pypy/interpreter/pyparser/pyparse.py 
b/pypy/interpreter/pyparser/pyparse.py
--- a/pypy/interpreter/pyparser/pyparse.py
+++ b/pypy/interpreter/pyparser/pyparse.py
@@ -188,7 +188,9 @@
                     if e.expected_str is not None:
                         msg += " (expected '%s')" % e.expected_str
 
-                raise new_err(msg, e.lineno, e.column, e.line,
+                # parser.ParseError(...).column is 0-based, but the offsets in 
the
+                # exceptions in the error module are 1-based, hence the '+ 1'
+                raise new_err(msg, e.lineno, e.column + 1, e.line,
                               compile_info.filename)
             else:
                 tree = self.root
diff --git a/pypy/interpreter/pyparser/pytokenizer.py 
b/pypy/interpreter/pyparser/pytokenizer.py
--- a/pypy/interpreter/pyparser/pytokenizer.py
+++ b/pypy/interpreter/pyparser/pytokenizer.py
@@ -73,14 +73,14 @@
         logical line; continuation lines are included.
     """
     token_list = []
-    lnum = parenlev = continued = 0
+    lnum = continued = 0
     namechars = NAMECHARS
     numchars = NUMCHARS
     contstr, needcont = '', 0
     contline = None
     indents = [0]
     last_comment = ''
-    parenlevstart = (0, 0, "")
+    parenstack = []
 
     # make the annotator happy
     endDFA = DUMMY_DFA
@@ -97,7 +97,7 @@
         if contstr:
             if not line:
                 raise TokenError(
-                    "EOF while scanning triple-quoted string literal",
+                    "end of file (EOF) while scanning triple-quoted string 
literal",
                     strstart[2], strstart[0], strstart[1]+1,
                     token_list, lnum-1)
             endmatch = endDFA.recognize(line)
@@ -123,7 +123,7 @@
                 contline = contline + line
                 continue
 
-        elif parenlev == 0 and not continued:  # new statement
+        elif not parenstack and not continued:  # new statement
             if not line: break
             column = 0
             while pos < max:                   # measure leading whitespace
@@ -143,21 +143,21 @@
                 token_list.append((tokens.INDENT, line[:pos], lnum, 0, line))
                 last_comment = ''
             while column < indents[-1]:
-                indents = indents[:-1]
+                indents.pop()
                 token_list.append((tokens.DEDENT, '', lnum, pos, line))
                 last_comment = ''
             if column != indents[-1]:
                 err = "unindent does not match any outer indentation level"
-                raise TokenIndentationError(err, line, lnum, 0, token_list)
+                raise TokenIndentationError(err, line, lnum, column+1, 
token_list)
 
         else:                                  # continued statement
             if not line:
-                if parenlev > 0:
-                    lnum1, start1, line1 = parenlevstart
+                if parenstack:
+                    _, lnum1, start1, line1 = parenstack[0]
                     raise TokenError("parenthesis is never closed", line1,
                                      lnum1, start1 + 1, token_list, lnum)
-                raise TokenError("EOF in multi-line statement", line,
-                                 lnum, 0, token_list)
+                raise TokenError("end of file (EOF) in multi-line statement", 
line,
+                                 lnum, 0, token_list) # XXX why is the offset 
0 here?
             continued = 0
 
         while pos < max:
@@ -180,7 +180,7 @@
                     token_list.append((tokens.NUMBER, token, lnum, start, 
line))
                     last_comment = ''
                 elif initial in '\r\n':
-                    if parenlev <= 0:
+                    if not parenstack:
                         tok = (tokens.NEWLINE, last_comment, lnum, start, line)
                         token_list.append(tok)
                     last_comment = ''
@@ -222,14 +222,22 @@
                     continued = 1
                 else:
                     if initial in '([{':
-                        if parenlev == 0:
-                            parenlevstart = (lnum, start, line)
-                        parenlev = parenlev + 1
+                        parenstack.append((initial, lnum, start, line))
                     elif initial in ')]}':
-                        parenlev = parenlev - 1
-                        if parenlev < 0:
+                        if not parenstack:
                             raise TokenError("unmatched '%s'" % initial, line,
                                              lnum, start + 1, token_list)
+                        opening, lnum1, start1, line1 = parenstack.pop()
+                        if not ((opening == "(" and initial == ")") or
+                                (opening == "[" and initial == "]") or
+                                (opening == "{" and initial == "}")):
+                            msg = "closing parenthesis '%s' does not match 
opening parenthesis '%s'" % (
+                                        initial, opening)
+
+                            if lnum1 != lnum:
+                                msg += " on line " + str(lnum1)
+                            raise TokenError(
+                                    msg, line, lnum, start + 1, token_list)
                     if token in python_opmap:
                         punct = python_opmap[token]
                     else:
@@ -241,7 +249,7 @@
                 if start < 0:
                     start = pos
                 if start<max and line[start] in single_quoted:
-                    raise TokenError("EOL while scanning string literal",
+                    raise TokenError("end of line (EOL) while scanning string 
literal",
                              line, lnum, start+1, token_list)
                 tok = (tokens.ERRORTOKEN, line[pos], lnum, pos, line)
                 token_list.append(tok)
diff --git a/pypy/interpreter/pyparser/test/targetparse.py 
b/pypy/interpreter/pyparser/test/targetparse.py
--- a/pypy/interpreter/pyparser/test/targetparse.py
+++ b/pypy/interpreter/pyparser/test/targetparse.py
@@ -8,25 +8,36 @@
 
 
 
-with file("../../../rpython/rlib/unicodedata/unicodedb_5_2_0.py") as f:
-    s = f.read()
-
 class FakeSpace(object):
     pass
 
 fakespace = FakeSpace()
 
-def bench(title):
+def bench(fn, s):
     a = time.clock()
     info = pyparse.CompileInfo("<string>", "exec")
     parser = pyparse.PythonParser(fakespace)
     tree = parser._parse(s, info)
     b = time.clock()
-    print title, (b-a)
+    print fn, (b-a)
 
 
 def entry_point(argv):
-    bench("foo")
+    if len(argv) == 2:
+        fn = argv[1]
+    else:
+        fn = "../../../../rpython/rlib/unicodedata/unicodedb_5_2_0.py"
+    fd = os.open(fn, os.O_RDONLY, 0777)
+    res = []
+    while True:
+        s = os.read(fd, 4096)
+        if not s:
+            break
+        res.append(s)
+    os.close(fd)
+    s = "".join(res)
+    print len(s)
+    bench(fn, s)
 
     return 0
 
diff --git a/pypy/interpreter/pyparser/test/test_pyparse.py 
b/pypy/interpreter/pyparser/test/test_pyparse.py
--- a/pypy/interpreter/pyparser/test/test_pyparse.py
+++ b/pypy/interpreter/pyparser/test/test_pyparse.py
@@ -76,14 +76,14 @@
         exc = py.test.raises(SyntaxError, parse, "name another for").value
         assert exc.msg == "invalid syntax"
         assert exc.lineno == 1
-        assert exc.offset == 5
+        assert exc.offset == 6
         assert exc.text.startswith("name another for")
         exc = py.test.raises(SyntaxError, parse, "x = \"blah\n\n\n").value
-        assert exc.msg == "EOL while scanning string literal"
+        assert exc.msg == "end of line (EOL) while scanning string literal"
         assert exc.lineno == 1
         assert exc.offset == 5
         exc = py.test.raises(SyntaxError, parse, "x = '''\n\n\n").value
-        assert exc.msg == "EOF while scanning triple-quoted string literal"
+        assert exc.msg == "end of file (EOF) while scanning triple-quoted 
string literal"
         assert exc.lineno == 1
         assert exc.offset == 5
         assert exc.lastlineno == 3
@@ -112,7 +112,7 @@
         assert exc.msg == "expected an indented block"
         assert exc.lineno == 3
         assert exc.text.startswith("pass")
-        assert exc.offset == 0
+        assert exc.offset == 1
         input = "hi\n    indented"
         exc = py.test.raises(IndentationError, parse, input).value
         assert exc.msg == "unexpected indent"
@@ -120,6 +120,7 @@
         exc = py.test.raises(IndentationError, parse, input).value
         assert exc.msg == "unindent does not match any outer indentation level"
         assert exc.lineno == 3
+        assert exc.offset == 3
 
     def test_mac_newline(self):
         self.parse("this_is\ra_mac\rfile")
diff --git a/pypy/interpreter/pyparser/test/test_pytokenizer.py 
b/pypy/interpreter/pyparser/test/test_pytokenizer.py
new file mode 100644
--- /dev/null
+++ b/pypy/interpreter/pyparser/test/test_pytokenizer.py
@@ -0,0 +1,66 @@
+import pytest
+from pypy.interpreter.pyparser import pytokenizer
+from pypy.interpreter.pyparser.pygram import tokens
+from pypy.interpreter.pyparser.error import TokenError
+
+def tokenize(s):
+    return pytokenizer.generate_tokens(s.splitlines(True) + ["\n"], 0)
+
+def check_token_error(s, msg=None, pos=-1, line=-1):
+    error = pytest.raises(TokenError, tokenize, s)
+    if msg is not None:
+        assert error.value.msg == msg
+    if pos != -1:
+        assert error.value.offset == pos
+    if line != -1:
+        assert error.value.lineno == line
+
+
+class TestTokenizer(object):
+
+    def test_simple(self):
+        line = "a+1"
+        tks = tokenize(line)
+        assert tks == [
+            (tokens.NAME, 'a', 1, 0, line),
+            (tokens.PLUS, '+', 1, 1, line),
+            (tokens.NUMBER, '1', 1, 2, line),
+            (tokens.NEWLINE, '', 2, 0, '\n'),
+            (tokens.NEWLINE, '', 2, 0, '\n'),
+            (tokens.ENDMARKER, '', 2, 0, ''),
+            ]
+
+    def test_error_parenthesis(self):
+        for paren in "([{":
+            check_token_error(paren + "1 + 2",
+                              "parenthesis is never closed",
+                              1)
+
+        for paren in ")]}":
+            check_token_error("1 + 2" + paren,
+                              "unmatched '%s'" % (paren, ),
+                              6)
+
+        for i, opening in enumerate("([{"):
+            for j, closing in enumerate(")]}"):
+                if i == j:
+                    continue
+                check_token_error(opening + "1\n" + closing,
+                        "closing parenthesis '%s' does not match opening 
parenthesis '%s' on line 1" % (closing, opening),
+                        pos=1, line=2)
+                check_token_error(opening + "1" + closing,
+                        "closing parenthesis '%s' does not match opening 
parenthesis '%s'" % (closing, opening),
+                        pos=3, line=1)
+                check_token_error(opening + closing,
+                        "closing parenthesis '%s' does not match opening 
parenthesis '%s'" % (closing, opening),
+                        pos=2, line=1)
+
+
+    def test_unknown_char(self):
+        check_token_error("?", "Unknown character", 1)
+
+    def test_eol_string(self):
+        check_token_error("x = 'a", pos=5, line=1)
+
+    def test_eof_triple_quoted(self):
+        check_token_error("'''", pos=1, line=1)
diff --git a/pypy/interpreter/test/test_compiler.py 
b/pypy/interpreter/test/test_compiler.py
--- a/pypy/interpreter/test/test_compiler.py
+++ b/pypy/interpreter/test/test_compiler.py
@@ -77,7 +77,7 @@
         """)
         assert self.space.unwrap(w_args) == (
             'unindent does not match any outer indentation level',
-            ('<string>', 3, 0, ' y\n'))
+            ('<string>', 3, 2, ' y\n'))
 
     def test_getcodeflags(self):
         code = self.compiler.compile('from __future__ import division\n',
diff --git a/pypy/interpreter/test/test_syntax.py 
b/pypy/interpreter/test/test_syntax.py
--- a/pypy/interpreter/test/test_syntax.py
+++ b/pypy/interpreter/test/test_syntax.py
@@ -750,7 +750,7 @@
         except SyntaxError as e:
             assert e.lineno == 4
             assert e.text.endswith('a b c d e\n')
-            assert e.offset == e.text.index('b')
+            assert e.offset == e.text.index('b') + 1 # offset is 1-based
         else:
             raise Exception("no SyntaxError??")
 
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy default: merge pyparser-improvements-2

Reply via email to