Author: Philip Jenvey <[email protected]>
Branch: py3k
Changeset: r84440:7d1402694892
Date: 2016-05-14 13:31 -0700
http://bitbucket.org/pypy/pypy/changeset/7d1402694892/
Log: cpython issue2382: adjust SyntaxError offset w/ multibyte chars
diff --git a/pypy/interpreter/pyparser/error.py
b/pypy/interpreter/pyparser/error.py
--- a/pypy/interpreter/pyparser/error.py
+++ b/pypy/interpreter/pyparser/error.py
@@ -13,17 +13,23 @@
def wrap_info(self, space):
w_text = w_filename = space.w_None
+ offset = self.offset
if self.text is not None:
from rpython.rlib.runicode import str_decode_utf_8
- # self.text may not be UTF-8 in case of decoding errors
- w_text = space.wrap(str_decode_utf_8(self.text, len(self.text),
- 'replace')[0])
+ # self.text may not be UTF-8 in case of decoding errors.
+ # adjust the encoded text offset to a decoded offset
+ text, _ = str_decode_utf_8(self.text, offset, 'replace')
+ offset = len(text)
+ if len(self.text) != offset:
+ text, _ = str_decode_utf_8(self.text, len(self.text),
+ 'replace')
+ w_text = space.wrap(text)
if self.filename is not None:
w_filename = space.fsdecode(space.wrapbytes(self.filename))
return space.newtuple([space.wrap(self.msg),
space.newtuple([w_filename,
space.wrap(self.lineno),
- space.wrap(self.offset),
+ space.wrap(offset),
w_text,
space.wrap(self.lastlineno)])])
diff --git a/pypy/interpreter/test/test_syntax.py
b/pypy/interpreter/test/test_syntax.py
--- a/pypy/interpreter/test/test_syntax.py
+++ b/pypy/interpreter/test/test_syntax.py
@@ -720,6 +720,11 @@
print_error()
# implicit "del e" here
+ def test_cpython_issue2382(self):
+ code = 'Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +'
+ exc = raises(SyntaxError, compile, code, 'foo', 'exec')
+ assert exc.value.offset in (19, 20) # pypy, cpython
+
if __name__ == '__main__':
# only to check on top of CPython (you need 2.4)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit