Author: Carl Friedrich Bolz-Tereick <[email protected]>
Branch: pyparser-improvements-2
Changeset: r94278:3405c95f9e61
Date: 2018-04-08 15:44 +0200
http://bitbucket.org/pypy/pypy/changeset/3405c95f9e61/
Log: fix SyntaxError offsets (so far, the errors that came from the
parser had an off-by-one error, which was the source of me thinking
that SyntaxError.offset is 0-based. In reality, the parser is wrong
and it should be 1-based).
diff --git a/pypy/interpreter/pyparser/error.py
b/pypy/interpreter/pyparser/error.py
--- a/pypy/interpreter/pyparser/error.py
+++ b/pypy/interpreter/pyparser/error.py
@@ -6,6 +6,7 @@
lastlineno=0):
self.msg = msg
self.lineno = lineno
+ # NB: offset is a 1-based index!
self.offset = offset
self.text = text
self.filename = filename
diff --git a/pypy/interpreter/pyparser/parser.py
b/pypy/interpreter/pyparser/parser.py
--- a/pypy/interpreter/pyparser/parser.py
+++ b/pypy/interpreter/pyparser/parser.py
@@ -199,6 +199,7 @@
self.token_type = token_type
self.value = value
self.lineno = lineno
+ # this is a 0-based index
self.column = column
self.line = line
self.expected = expected
diff --git a/pypy/interpreter/pyparser/pyparse.py
b/pypy/interpreter/pyparser/pyparse.py
--- a/pypy/interpreter/pyparser/pyparse.py
+++ b/pypy/interpreter/pyparser/pyparse.py
@@ -188,7 +188,9 @@
if e.expected_str is not None:
msg += " (expected '%s')" % e.expected_str
- raise new_err(msg, e.lineno, e.column, e.line,
+ # parser.ParseError(...).column is 0-based, but the offsets in
the
+ # exceptions in the error module are 1-based, hence the '+ 1'
+ raise new_err(msg, e.lineno, e.column + 1, e.line,
compile_info.filename)
else:
tree = self.root
diff --git a/pypy/interpreter/pyparser/pytokenizer.py
b/pypy/interpreter/pyparser/pytokenizer.py
--- a/pypy/interpreter/pyparser/pytokenizer.py
+++ b/pypy/interpreter/pyparser/pytokenizer.py
@@ -98,7 +98,7 @@
if not line:
raise TokenError(
"end of file (EOF) while scanning triple-quoted string
literal",
- strstart[2], strstart[0], strstart[1],
+ strstart[2], strstart[0], strstart[1]+1,
token_list, lnum-1)
endmatch = endDFA.recognize(line)
if endmatch >= 0:
@@ -148,16 +148,16 @@
last_comment = ''
if column != indents[-1]:
err = "unindent does not match any outer indentation level"
- raise TokenIndentationError(err, line, lnum, 0, token_list)
+ raise TokenIndentationError(err, line, lnum, column+1,
token_list)
else: # continued statement
if not line:
if parenstack:
_, lnum1, start1, line1 = parenstack[0]
raise TokenError("parenthesis is never closed", line1,
- lnum1, start1, token_list, lnum)
+ lnum1, start1 + 1, token_list, lnum)
raise TokenError("end of file (EOF) in multi-line statement",
line,
- lnum, 0, token_list)
+ lnum, 0, token_list) # XXX why is the offset
0 here?
continued = 0
while pos < max:
@@ -171,7 +171,7 @@
if start == end:
raise TokenError("Unknown character", line,
- lnum, start, token_list)
+ lnum, start + 1, token_list)
pos = end
token, initial = line[start:end], line[start]
@@ -226,7 +226,7 @@
elif initial in ')]}':
if not parenstack:
raise TokenError("unmatched '%s'" % initial, line,
- lnum, start, token_list)
+ lnum, start + 1, token_list)
opening, lnum1, start1, line1 = parenstack.pop()
if not ((opening == "(" and initial == ")") or
(opening == "[" and initial == "]") or
@@ -237,7 +237,7 @@
if lnum1 != lnum:
msg += " on line " + str(lnum1)
raise TokenError(
- msg, line, lnum, start, token_list)
+ msg, line, lnum, start + 1, token_list)
if token in python_opmap:
punct = python_opmap[token]
else:
@@ -250,7 +250,7 @@
start = pos
if start<max and line[start] in single_quoted:
raise TokenError("end of line (EOL) while scanning string
literal",
- line, lnum, start, token_list)
+ line, lnum, start+1, token_list)
tok = (tokens.ERRORTOKEN, line[pos], lnum, pos, line)
token_list.append(tok)
last_comment = ''
diff --git a/pypy/interpreter/pyparser/test/test_pyparse.py
b/pypy/interpreter/pyparser/test/test_pyparse.py
--- a/pypy/interpreter/pyparser/test/test_pyparse.py
+++ b/pypy/interpreter/pyparser/test/test_pyparse.py
@@ -76,28 +76,28 @@
exc = py.test.raises(SyntaxError, parse, "name another for").value
assert exc.msg == "invalid syntax"
assert exc.lineno == 1
- assert exc.offset == 5
+ assert exc.offset == 6
assert exc.text.startswith("name another for")
exc = py.test.raises(SyntaxError, parse, "x = \"blah\n\n\n").value
assert exc.msg == "end of line (EOL) while scanning string literal"
assert exc.lineno == 1
- assert exc.offset == 4
+ assert exc.offset == 5
exc = py.test.raises(SyntaxError, parse, "x = '''\n\n\n").value
assert exc.msg == "end of file (EOF) while scanning triple-quoted
string literal"
assert exc.lineno == 1
- assert exc.offset == 4
+ assert exc.offset == 5
assert exc.lastlineno == 3
for input in ("())", "(()", "((", "))"):
py.test.raises(SyntaxError, parse, input)
exc = py.test.raises(SyntaxError, parse, "x = (\n\n(),\n(),").value
assert exc.msg == "parenthesis is never closed"
assert exc.lineno == 1
- assert exc.offset == 4
+ assert exc.offset == 5
assert exc.lastlineno == 5
exc = py.test.raises(SyntaxError, parse, "abc)").value
assert exc.msg == "unmatched ')'"
assert exc.lineno == 1
- assert exc.offset == 3
+ assert exc.offset == 4
def test_is(self):
self.parse("x is y")
@@ -112,7 +112,7 @@
assert exc.msg == "expected an indented block"
assert exc.lineno == 3
assert exc.text.startswith("pass")
- assert exc.offset == 0
+ assert exc.offset == 1
input = "hi\n indented"
exc = py.test.raises(IndentationError, parse, input).value
assert exc.msg == "unexpected indent"
@@ -120,6 +120,7 @@
exc = py.test.raises(IndentationError, parse, input).value
assert exc.msg == "unindent does not match any outer indentation level"
assert exc.lineno == 3
+ assert exc.offset == 3
def test_mac_newline(self):
self.parse("this_is\ra_mac\rfile")
diff --git a/pypy/interpreter/pyparser/test/test_pytokenizer.py
b/pypy/interpreter/pyparser/test/test_pytokenizer.py
--- a/pypy/interpreter/pyparser/test/test_pytokenizer.py
+++ b/pypy/interpreter/pyparser/test/test_pytokenizer.py
@@ -34,12 +34,12 @@
for paren in "([{":
check_token_error(paren + "1 + 2",
"parenthesis is never closed",
- 0)
+ 1)
for paren in ")]}":
check_token_error("1 + 2" + paren,
"unmatched '%s'" % (paren, ),
- 5)
+ 6)
for i, opening in enumerate("([{"):
for j, closing in enumerate(")]}"):
@@ -47,17 +47,20 @@
continue
check_token_error(opening + "1\n" + closing,
"closing parenthesis '%s' does not match opening
parenthesis '%s' on line 1" % (closing, opening),
- pos=0, line=2)
+ pos=1, line=2)
check_token_error(opening + "1" + closing,
"closing parenthesis '%s' does not match opening
parenthesis '%s'" % (closing, opening),
+ pos=3, line=1)
+ check_token_error(opening + closing,
+ "closing parenthesis '%s' does not match opening
parenthesis '%s'" % (closing, opening),
pos=2, line=1)
def test_unknown_char(self):
- check_token_error("?", "Unknown character", 0)
+ check_token_error("?", "Unknown character", 1)
def test_eol_string(self):
- check_token_error("x = 'a", pos=4, line=1)
+ check_token_error("x = 'a", pos=5, line=1)
def test_eof_triple_quoted(self):
- check_token_error("'''", pos=0, line=1)
+ check_token_error("'''", pos=1, line=1)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit