Author: Armin Rigo <ar...@tunes.org> Branch: py3.6 Changeset: r96520:4a70f02715ab Date: 2019-04-18 12:03 +0200 http://bitbucket.org/pypy/pypy/changeset/4a70f02715ab/
Log: Fix. The general issue is space.newtext("string") where the string might contain some random byte-chars from app-level. diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -958,9 +958,17 @@ unicode_name_handler) if first_escape_error_char is not None: + # Here, 'first_escape_error_char' is a single string character. + # Careful, it might be >= '\x80'. If it is, it would made an + # invalid utf-8 string when pasted directory in it. + if ' ' <= first_escape_error_char < '\x7f': + msg = "invalid escape sequence '\\%s'" % (first_escape_error_char,) + else: + msg = "invalid escape sequence: '\\' followed by %s" % ( + space.text_w(space.repr( + space.newbytes(first_escape_error_char))),) space.warn( - space.newtext("invalid escape sequence '\\%s'" - % str(first_escape_error_char)), + space.newtext(msg), space.w_DeprecationWarning ) return space.newtuple([space.newutf8(result, lgt), space.newint(u_len)]) diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -1394,10 +1394,11 @@ with warnings.catch_warnings(record=True) as l: warnings.simplefilter("always") codecs.unicode_escape_decode(b'\\A') - codecs.unicode_escape_decode(b"\\A") + codecs.unicode_escape_decode(b"\\" + b"\xff") assert len(l) == 2 assert isinstance(l[0].message, DeprecationWarning) + assert isinstance(l[1].message, DeprecationWarning) def test_invalid_type_errors(self): # hex is not a text encoding. it works via the codecs functions, but _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit