Author: Yusuke Tsutsumi <yus...@tsutsumi.io> Branch: fix_test_codecs Changeset: r94698:2fd74c4c379d Date: 2018-05-16 07:13 -0700 http://bitbucket.org/pypy/pypy/changeset/2fd74c4c379d/
Log: Adding warning with escaping invalid unicode escape characters. diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -117,7 +117,7 @@ from pypy.module._codecs import interp_codecs state = space.fromcache(interp_codecs.CodecState) unicodedata_handler = state.get_unicodedata_handler(space) - result, consumed = runicode.str_decode_unicode_escape( + result, consumed, first_escape_error_char = runicode.str_decode_unicode_escape( string, len(string), "strict", final=True, errorhandler=decode_error_handler(space), unicodedata_handler=unicodedata_handler) diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -949,11 +949,18 @@ unicode_name_handler = state.get_unicodedata_handler(space) - result, consumed = runicode.str_decode_unicode_escape( + result, consumed, first_escape_error_char = runicode.str_decode_unicode_escape( string, len(string), errors, final, state.decode_error_handler, unicode_name_handler) + if first_escape_error_char is not None: + space.warn( + space.newtext("invalid escape sequence '\\%s'" + % str(first_escape_error_char)), + space.w_DeprecationWarning + ) + return space.newtuple([space.newunicode(result), space.newint(consumed)]) # ____________________________________________________________ diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py --- a/rpython/rlib/runicode.py +++ b/rpython/rlib/runicode.py @@ -1435,10 +1435,11 @@ errorhandler = default_unicode_error_decode if size == 0: - return u'', 0 + return u'', 0, None builder = UnicodeBuilder(size) pos = 0 + first_escape_error_char = None while pos < size: ch = s[pos] @@ -1541,10 +1542,11 @@ message, s, pos-1, look+1) builder.append(res) else: + first_escape_error_char = unichr(ord(ch)) builder.append(u'\\') builder.append(unichr(ord(ch))) - return builder.build(), pos + return builder.build(), pos, first_escape_error_char def make_unicode_escape_function(pass_printable=False, unicode_output=False, quotes=False, prefix=None): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit