[pypy-commit] pypy pyparser-improvements-2: fix SyntaxError offsets

cfbolz Sun, 08 Apr 2018 06:46:09 -0700

Author: Carl Friedrich Bolz-Tereick <[email protected]>
Branch: pyparser-improvements-2
Changeset: r94278:3405c95f9e61
Date: 2018-04-08 15:44 +0200
http://bitbucket.org/pypy/pypy/changeset/3405c95f9e61/


Log:    fix SyntaxError offsets (so far, the errors that came from the
        parser had an off-by-one error, which was the source of me thinking
        that SyntaxError.offset is 0-based. In reality, the parser is wrong
        and it should be 1-based).

diff --git a/pypy/interpreter/pyparser/error.py 
b/pypy/interpreter/pyparser/error.py
--- a/pypy/interpreter/pyparser/error.py
+++ b/pypy/interpreter/pyparser/error.py
@@ -6,6 +6,7 @@
                  lastlineno=0):
         self.msg = msg
         self.lineno = lineno
+        # NB: offset is a 1-based index!
         self.offset = offset
         self.text = text
         self.filename = filename
diff --git a/pypy/interpreter/pyparser/parser.py 
b/pypy/interpreter/pyparser/parser.py
--- a/pypy/interpreter/pyparser/parser.py
+++ b/pypy/interpreter/pyparser/parser.py
@@ -199,6 +199,7 @@
         self.token_type = token_type
         self.value = value
         self.lineno = lineno
+        # this is a 0-based index
         self.column = column
         self.line = line
         self.expected = expected
diff --git a/pypy/interpreter/pyparser/pyparse.py 
b/pypy/interpreter/pyparser/pyparse.py
--- a/pypy/interpreter/pyparser/pyparse.py
+++ b/pypy/interpreter/pyparser/pyparse.py
@@ -188,7 +188,9 @@
                     if e.expected_str is not None:
                         msg += " (expected '%s')" % e.expected_str
 
-                raise new_err(msg, e.lineno, e.column, e.line,
+                # parser.ParseError(...).column is 0-based, but the offsets in 
the
+                # exceptions in the error module are 1-based, hence the '+ 1'
+                raise new_err(msg, e.lineno, e.column + 1, e.line,
                               compile_info.filename)
             else:
                 tree = self.root
diff --git a/pypy/interpreter/pyparser/pytokenizer.py 
b/pypy/interpreter/pyparser/pytokenizer.py
--- a/pypy/interpreter/pyparser/pytokenizer.py
+++ b/pypy/interpreter/pyparser/pytokenizer.py
@@ -98,7 +98,7 @@
             if not line:
                 raise TokenError(
                     "end of file (EOF) while scanning triple-quoted string 
literal",
-                    strstart[2], strstart[0], strstart[1],
+                    strstart[2], strstart[0], strstart[1]+1,
                     token_list, lnum-1)
             endmatch = endDFA.recognize(line)
             if endmatch >= 0:
@@ -148,16 +148,16 @@
                 last_comment = ''
             if column != indents[-1]:
                 err = "unindent does not match any outer indentation level"
-                raise TokenIndentationError(err, line, lnum, 0, token_list)
+                raise TokenIndentationError(err, line, lnum, column+1, 
token_list)
 
         else:                                  # continued statement
             if not line:
                 if parenstack:
                     _, lnum1, start1, line1 = parenstack[0]
                     raise TokenError("parenthesis is never closed", line1,
-                                     lnum1, start1, token_list, lnum)
+                                     lnum1, start1 + 1, token_list, lnum)
                 raise TokenError("end of file (EOF) in multi-line statement", 
line,
-                                 lnum, 0, token_list)
+                                 lnum, 0, token_list) # XXX why is the offset 
0 here?
             continued = 0
 
         while pos < max:
@@ -171,7 +171,7 @@
 
                 if start == end:
                     raise TokenError("Unknown character", line,
-                                     lnum, start, token_list)
+                                     lnum, start + 1, token_list)
 
                 pos = end
                 token, initial = line[start:end], line[start]
@@ -226,7 +226,7 @@
                     elif initial in ')]}':
                         if not parenstack:
                             raise TokenError("unmatched '%s'" % initial, line,
-                                             lnum, start, token_list)
+                                             lnum, start + 1, token_list)
                         opening, lnum1, start1, line1 = parenstack.pop()
                         if not ((opening == "(" and initial == ")") or
                                 (opening == "[" and initial == "]") or
@@ -237,7 +237,7 @@
                             if lnum1 != lnum:
                                 msg += " on line " + str(lnum1)
                             raise TokenError(
-                                    msg, line, lnum, start, token_list)
+                                    msg, line, lnum, start + 1, token_list)
                     if token in python_opmap:
                         punct = python_opmap[token]
                     else:
@@ -250,7 +250,7 @@
                     start = pos
                 if start<max and line[start] in single_quoted:
                     raise TokenError("end of line (EOL) while scanning string 
literal",
-                             line, lnum, start, token_list)
+                             line, lnum, start+1, token_list)
                 tok = (tokens.ERRORTOKEN, line[pos], lnum, pos, line)
                 token_list.append(tok)
                 last_comment = ''
diff --git a/pypy/interpreter/pyparser/test/test_pyparse.py 
b/pypy/interpreter/pyparser/test/test_pyparse.py
--- a/pypy/interpreter/pyparser/test/test_pyparse.py
+++ b/pypy/interpreter/pyparser/test/test_pyparse.py
@@ -76,28 +76,28 @@
         exc = py.test.raises(SyntaxError, parse, "name another for").value
         assert exc.msg == "invalid syntax"
         assert exc.lineno == 1
-        assert exc.offset == 5
+        assert exc.offset == 6
         assert exc.text.startswith("name another for")
         exc = py.test.raises(SyntaxError, parse, "x = \"blah\n\n\n").value
         assert exc.msg == "end of line (EOL) while scanning string literal"
         assert exc.lineno == 1
-        assert exc.offset == 4
+        assert exc.offset == 5
         exc = py.test.raises(SyntaxError, parse, "x = '''\n\n\n").value
         assert exc.msg == "end of file (EOF) while scanning triple-quoted 
string literal"
         assert exc.lineno == 1
-        assert exc.offset == 4
+        assert exc.offset == 5
         assert exc.lastlineno == 3
         for input in ("())", "(()", "((", "))"):
             py.test.raises(SyntaxError, parse, input)
         exc = py.test.raises(SyntaxError, parse, "x = (\n\n(),\n(),").value
         assert exc.msg == "parenthesis is never closed"
         assert exc.lineno == 1
-        assert exc.offset == 4
+        assert exc.offset == 5
         assert exc.lastlineno == 5
         exc = py.test.raises(SyntaxError, parse, "abc)").value
         assert exc.msg == "unmatched ')'"
         assert exc.lineno == 1
-        assert exc.offset == 3
+        assert exc.offset == 4
 
     def test_is(self):
         self.parse("x is y")
@@ -112,7 +112,7 @@
         assert exc.msg == "expected an indented block"
         assert exc.lineno == 3
         assert exc.text.startswith("pass")
-        assert exc.offset == 0
+        assert exc.offset == 1
         input = "hi\n    indented"
         exc = py.test.raises(IndentationError, parse, input).value
         assert exc.msg == "unexpected indent"
@@ -120,6 +120,7 @@
         exc = py.test.raises(IndentationError, parse, input).value
         assert exc.msg == "unindent does not match any outer indentation level"
         assert exc.lineno == 3
+        assert exc.offset == 3
 
     def test_mac_newline(self):
         self.parse("this_is\ra_mac\rfile")
diff --git a/pypy/interpreter/pyparser/test/test_pytokenizer.py 
b/pypy/interpreter/pyparser/test/test_pytokenizer.py
--- a/pypy/interpreter/pyparser/test/test_pytokenizer.py
+++ b/pypy/interpreter/pyparser/test/test_pytokenizer.py
@@ -34,12 +34,12 @@
         for paren in "([{":
             check_token_error(paren + "1 + 2",
                               "parenthesis is never closed",
-                              0)
+                              1)
 
         for paren in ")]}":
             check_token_error("1 + 2" + paren,
                               "unmatched '%s'" % (paren, ),
-                              5)
+                              6)
 
         for i, opening in enumerate("([{"):
             for j, closing in enumerate(")]}"):
@@ -47,17 +47,20 @@
                     continue
                 check_token_error(opening + "1\n" + closing,
                         "closing parenthesis '%s' does not match opening 
parenthesis '%s' on line 1" % (closing, opening),
-                        pos=0, line=2)
+                        pos=1, line=2)
                 check_token_error(opening + "1" + closing,
                         "closing parenthesis '%s' does not match opening 
parenthesis '%s'" % (closing, opening),
+                        pos=3, line=1)
+                check_token_error(opening + closing,
+                        "closing parenthesis '%s' does not match opening 
parenthesis '%s'" % (closing, opening),
                         pos=2, line=1)
 
 
     def test_unknown_char(self):
-        check_token_error("?", "Unknown character", 0)
+        check_token_error("?", "Unknown character", 1)
 
     def test_eol_string(self):
-        check_token_error("x = 'a", pos=4, line=1)
+        check_token_error("x = 'a", pos=5, line=1)
 
     def test_eof_triple_quoted(self):
-        check_token_error("'''", pos=0, line=1)
+        check_token_error("'''", pos=1, line=1)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy pyparser-improvements-2: fix SyntaxError offsets

Reply via email to