Author: Carl Friedrich Bolz-Tereick <cfb...@gmx.de> Branch: Changeset: r94294:e66f24650daf Date: 2018-04-10 09:41 +0200 http://bitbucket.org/pypy/pypy/changeset/e66f24650daf/
Log: merge pyparser-improvements-2 - fixes .offset values of SyntaxError, which is 1-based (but the raising code sometimes assumed it was 0-based) - expand some abbreviations - better error messages for non-matching parenthesis diff --git a/lib-python/2.7/test/test_eof.py b/lib-python/2.7/test/test_eof.py --- a/lib-python/2.7/test/test_eof.py +++ b/lib-python/2.7/test/test_eof.py @@ -5,7 +5,7 @@ class EOFTestCase(unittest.TestCase): def test_EOFC(self): - expect = "EOL while scanning string literal (<string>, line 1)" + expect = "end of line (EOL) while scanning string literal (<string>, line 1)" try: eval("""'this is a test\ """) @@ -15,7 +15,7 @@ raise test_support.TestFailed def test_EOFS(self): - expect = ("EOF while scanning triple-quoted string literal " + expect = ("end of file (EOF) while scanning triple-quoted string literal " "(<string>, line 1)") try: eval("""'''this is a test""") diff --git a/lib-python/2.7/test/test_traceback.py b/lib-python/2.7/test/test_traceback.py --- a/lib-python/2.7/test/test_traceback.py +++ b/lib-python/2.7/test/test_traceback.py @@ -123,10 +123,7 @@ self.assertEqual(len(err), 4) self.assertEqual(err[1].strip(), "print(2)") self.assertIn("^", err[2]) - if check_impl_detail(): - self.assertEqual(err[1].find("p"), err[2].find("^")) - if check_impl_detail(pypy=True): - self.assertEqual(err[1].find("2)") + 1, err[2].find("^")) + self.assertEqual(err[1].find("p"), err[2].find("^")) def test_base_exception(self): # Test that exceptions derived from BaseException are formatted right diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -10,3 +10,9 @@ Fix for python-level classes that inherit from C-API types, previously the `w_obj` was not necessarily preserved throughout the lifetime of the `pyobj` which led to cases where instance attributes were lost. Fixes issue #2793 + + +.. branch: pyparser-improvements-2 + +Improve line offsets that are reported by SyntaxError. Improve error messages +for a few situations, including mismatched parenthesis. diff --git a/pypy/interpreter/pyparser/error.py b/pypy/interpreter/pyparser/error.py --- a/pypy/interpreter/pyparser/error.py +++ b/pypy/interpreter/pyparser/error.py @@ -6,6 +6,7 @@ lastlineno=0): self.msg = msg self.lineno = lineno + # NB: offset is a 1-based index! self.offset = offset self.text = text self.filename = filename diff --git a/pypy/interpreter/pyparser/parser.py b/pypy/interpreter/pyparser/parser.py --- a/pypy/interpreter/pyparser/parser.py +++ b/pypy/interpreter/pyparser/parser.py @@ -199,6 +199,7 @@ self.token_type = token_type self.value = value self.lineno = lineno + # this is a 0-based index self.column = column self.line = line self.expected = expected diff --git a/pypy/interpreter/pyparser/pyparse.py b/pypy/interpreter/pyparser/pyparse.py --- a/pypy/interpreter/pyparser/pyparse.py +++ b/pypy/interpreter/pyparser/pyparse.py @@ -188,7 +188,9 @@ if e.expected_str is not None: msg += " (expected '%s')" % e.expected_str - raise new_err(msg, e.lineno, e.column, e.line, + # parser.ParseError(...).column is 0-based, but the offsets in the + # exceptions in the error module are 1-based, hence the '+ 1' + raise new_err(msg, e.lineno, e.column + 1, e.line, compile_info.filename) else: tree = self.root diff --git a/pypy/interpreter/pyparser/pytokenizer.py b/pypy/interpreter/pyparser/pytokenizer.py --- a/pypy/interpreter/pyparser/pytokenizer.py +++ b/pypy/interpreter/pyparser/pytokenizer.py @@ -73,14 +73,14 @@ logical line; continuation lines are included. """ token_list = [] - lnum = parenlev = continued = 0 + lnum = continued = 0 namechars = NAMECHARS numchars = NUMCHARS contstr, needcont = '', 0 contline = None indents = [0] last_comment = '' - parenlevstart = (0, 0, "") + parenstack = [] # make the annotator happy endDFA = DUMMY_DFA @@ -97,7 +97,7 @@ if contstr: if not line: raise TokenError( - "EOF while scanning triple-quoted string literal", + "end of file (EOF) while scanning triple-quoted string literal", strstart[2], strstart[0], strstart[1]+1, token_list, lnum-1) endmatch = endDFA.recognize(line) @@ -123,7 +123,7 @@ contline = contline + line continue - elif parenlev == 0 and not continued: # new statement + elif not parenstack and not continued: # new statement if not line: break column = 0 while pos < max: # measure leading whitespace @@ -143,21 +143,21 @@ token_list.append((tokens.INDENT, line[:pos], lnum, 0, line)) last_comment = '' while column < indents[-1]: - indents = indents[:-1] + indents.pop() token_list.append((tokens.DEDENT, '', lnum, pos, line)) last_comment = '' if column != indents[-1]: err = "unindent does not match any outer indentation level" - raise TokenIndentationError(err, line, lnum, 0, token_list) + raise TokenIndentationError(err, line, lnum, column+1, token_list) else: # continued statement if not line: - if parenlev > 0: - lnum1, start1, line1 = parenlevstart + if parenstack: + _, lnum1, start1, line1 = parenstack[0] raise TokenError("parenthesis is never closed", line1, lnum1, start1 + 1, token_list, lnum) - raise TokenError("EOF in multi-line statement", line, - lnum, 0, token_list) + raise TokenError("end of file (EOF) in multi-line statement", line, + lnum, 0, token_list) # XXX why is the offset 0 here? continued = 0 while pos < max: @@ -180,7 +180,7 @@ token_list.append((tokens.NUMBER, token, lnum, start, line)) last_comment = '' elif initial in '\r\n': - if parenlev <= 0: + if not parenstack: tok = (tokens.NEWLINE, last_comment, lnum, start, line) token_list.append(tok) last_comment = '' @@ -222,14 +222,22 @@ continued = 1 else: if initial in '([{': - if parenlev == 0: - parenlevstart = (lnum, start, line) - parenlev = parenlev + 1 + parenstack.append((initial, lnum, start, line)) elif initial in ')]}': - parenlev = parenlev - 1 - if parenlev < 0: + if not parenstack: raise TokenError("unmatched '%s'" % initial, line, lnum, start + 1, token_list) + opening, lnum1, start1, line1 = parenstack.pop() + if not ((opening == "(" and initial == ")") or + (opening == "[" and initial == "]") or + (opening == "{" and initial == "}")): + msg = "closing parenthesis '%s' does not match opening parenthesis '%s'" % ( + initial, opening) + + if lnum1 != lnum: + msg += " on line " + str(lnum1) + raise TokenError( + msg, line, lnum, start + 1, token_list) if token in python_opmap: punct = python_opmap[token] else: @@ -241,7 +249,7 @@ if start < 0: start = pos if start<max and line[start] in single_quoted: - raise TokenError("EOL while scanning string literal", + raise TokenError("end of line (EOL) while scanning string literal", line, lnum, start+1, token_list) tok = (tokens.ERRORTOKEN, line[pos], lnum, pos, line) token_list.append(tok) diff --git a/pypy/interpreter/pyparser/test/targetparse.py b/pypy/interpreter/pyparser/test/targetparse.py --- a/pypy/interpreter/pyparser/test/targetparse.py +++ b/pypy/interpreter/pyparser/test/targetparse.py @@ -8,25 +8,36 @@ -with file("../../../rpython/rlib/unicodedata/unicodedb_5_2_0.py") as f: - s = f.read() - class FakeSpace(object): pass fakespace = FakeSpace() -def bench(title): +def bench(fn, s): a = time.clock() info = pyparse.CompileInfo("<string>", "exec") parser = pyparse.PythonParser(fakespace) tree = parser._parse(s, info) b = time.clock() - print title, (b-a) + print fn, (b-a) def entry_point(argv): - bench("foo") + if len(argv) == 2: + fn = argv[1] + else: + fn = "../../../../rpython/rlib/unicodedata/unicodedb_5_2_0.py" + fd = os.open(fn, os.O_RDONLY, 0777) + res = [] + while True: + s = os.read(fd, 4096) + if not s: + break + res.append(s) + os.close(fd) + s = "".join(res) + print len(s) + bench(fn, s) return 0 diff --git a/pypy/interpreter/pyparser/test/test_pyparse.py b/pypy/interpreter/pyparser/test/test_pyparse.py --- a/pypy/interpreter/pyparser/test/test_pyparse.py +++ b/pypy/interpreter/pyparser/test/test_pyparse.py @@ -76,14 +76,14 @@ exc = py.test.raises(SyntaxError, parse, "name another for").value assert exc.msg == "invalid syntax" assert exc.lineno == 1 - assert exc.offset == 5 + assert exc.offset == 6 assert exc.text.startswith("name another for") exc = py.test.raises(SyntaxError, parse, "x = \"blah\n\n\n").value - assert exc.msg == "EOL while scanning string literal" + assert exc.msg == "end of line (EOL) while scanning string literal" assert exc.lineno == 1 assert exc.offset == 5 exc = py.test.raises(SyntaxError, parse, "x = '''\n\n\n").value - assert exc.msg == "EOF while scanning triple-quoted string literal" + assert exc.msg == "end of file (EOF) while scanning triple-quoted string literal" assert exc.lineno == 1 assert exc.offset == 5 assert exc.lastlineno == 3 @@ -112,7 +112,7 @@ assert exc.msg == "expected an indented block" assert exc.lineno == 3 assert exc.text.startswith("pass") - assert exc.offset == 0 + assert exc.offset == 1 input = "hi\n indented" exc = py.test.raises(IndentationError, parse, input).value assert exc.msg == "unexpected indent" @@ -120,6 +120,7 @@ exc = py.test.raises(IndentationError, parse, input).value assert exc.msg == "unindent does not match any outer indentation level" assert exc.lineno == 3 + assert exc.offset == 3 def test_mac_newline(self): self.parse("this_is\ra_mac\rfile") diff --git a/pypy/interpreter/pyparser/test/test_pytokenizer.py b/pypy/interpreter/pyparser/test/test_pytokenizer.py new file mode 100644 --- /dev/null +++ b/pypy/interpreter/pyparser/test/test_pytokenizer.py @@ -0,0 +1,66 @@ +import pytest +from pypy.interpreter.pyparser import pytokenizer +from pypy.interpreter.pyparser.pygram import tokens +from pypy.interpreter.pyparser.error import TokenError + +def tokenize(s): + return pytokenizer.generate_tokens(s.splitlines(True) + ["\n"], 0) + +def check_token_error(s, msg=None, pos=-1, line=-1): + error = pytest.raises(TokenError, tokenize, s) + if msg is not None: + assert error.value.msg == msg + if pos != -1: + assert error.value.offset == pos + if line != -1: + assert error.value.lineno == line + + +class TestTokenizer(object): + + def test_simple(self): + line = "a+1" + tks = tokenize(line) + assert tks == [ + (tokens.NAME, 'a', 1, 0, line), + (tokens.PLUS, '+', 1, 1, line), + (tokens.NUMBER, '1', 1, 2, line), + (tokens.NEWLINE, '', 2, 0, '\n'), + (tokens.NEWLINE, '', 2, 0, '\n'), + (tokens.ENDMARKER, '', 2, 0, ''), + ] + + def test_error_parenthesis(self): + for paren in "([{": + check_token_error(paren + "1 + 2", + "parenthesis is never closed", + 1) + + for paren in ")]}": + check_token_error("1 + 2" + paren, + "unmatched '%s'" % (paren, ), + 6) + + for i, opening in enumerate("([{"): + for j, closing in enumerate(")]}"): + if i == j: + continue + check_token_error(opening + "1\n" + closing, + "closing parenthesis '%s' does not match opening parenthesis '%s' on line 1" % (closing, opening), + pos=1, line=2) + check_token_error(opening + "1" + closing, + "closing parenthesis '%s' does not match opening parenthesis '%s'" % (closing, opening), + pos=3, line=1) + check_token_error(opening + closing, + "closing parenthesis '%s' does not match opening parenthesis '%s'" % (closing, opening), + pos=2, line=1) + + + def test_unknown_char(self): + check_token_error("?", "Unknown character", 1) + + def test_eol_string(self): + check_token_error("x = 'a", pos=5, line=1) + + def test_eof_triple_quoted(self): + check_token_error("'''", pos=1, line=1) diff --git a/pypy/interpreter/test/test_compiler.py b/pypy/interpreter/test/test_compiler.py --- a/pypy/interpreter/test/test_compiler.py +++ b/pypy/interpreter/test/test_compiler.py @@ -77,7 +77,7 @@ """) assert self.space.unwrap(w_args) == ( 'unindent does not match any outer indentation level', - ('<string>', 3, 0, ' y\n')) + ('<string>', 3, 2, ' y\n')) def test_getcodeflags(self): code = self.compiler.compile('from __future__ import division\n', diff --git a/pypy/interpreter/test/test_syntax.py b/pypy/interpreter/test/test_syntax.py --- a/pypy/interpreter/test/test_syntax.py +++ b/pypy/interpreter/test/test_syntax.py @@ -750,7 +750,7 @@ except SyntaxError as e: assert e.lineno == 4 assert e.text.endswith('a b c d e\n') - assert e.offset == e.text.index('b') + assert e.offset == e.text.index('b') + 1 # offset is 1-based else: raise Exception("no SyntaxError??") _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit