Author: Carl Friedrich Bolz-Tereick <cfb...@gmx.de> Branch: py3.6 Changeset: r96518:b4f7c179d353 Date: 2019-04-18 11:24 +0200 http://bitbucket.org/pypy/pypy/changeset/b4f7c179d353/
Log: fix issue #3001: weird codecs can return strange types when using codes.en/decode but not via unicode.encode/bytes.decode diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -575,14 +575,7 @@ if encoding is None: encoding = space.sys.defaultencoding w_encoder = space.getitem(lookup_codec(space, encoding), space.newint(0)) - w_retval = _call_codec(space, w_encoder, w_obj, "encoding", encoding, errors) - if not space.isinstance_w(w_retval, space.w_bytes): - raise oefmt(space.w_TypeError, - "'%s' encoder returned '%T' instead of 'bytes'; " - "use codecs.encode() to encode to arbitrary types", - encoding, - w_retval) - return w_retval + return _call_codec(space, w_encoder, w_obj, "encoding", encoding, errors) @unwrap_spec(errors='text_or_none') def readbuffer_encode(space, w_data, errors='strict'): @@ -604,14 +597,7 @@ if encoding is None: encoding = space.sys.defaultencoding w_decoder = space.getitem(lookup_codec(space, encoding), space.newint(1)) - w_retval = _call_codec(space, w_decoder, w_obj, "decoding", encoding, errors) - if not isinstance(w_retval, W_UnicodeObject): - raise oefmt(space.w_TypeError, - "'%s' decoder returned '%T' instead of 'str'; " - "use codecs.decode() to decode to arbitrary types", - encoding, - w_retval) - return w_retval + return _call_codec(space, w_decoder, w_obj, "decoding", encoding, errors) @unwrap_spec(errors='text') def register_error(space, errors, w_handler): diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -1386,7 +1386,7 @@ "foo\udca5bar") assert ("foo\udca5bar".encode("iso-8859-3", "surrogateescape") == b"foo\xa5bar") - + def test_warn_escape_decode(self): import warnings import codecs @@ -1399,5 +1399,31 @@ assert len(l) == 2 assert isinstance(l[0].message, DeprecationWarning) + def test_invalid_type_errors(self): + # hex is not a text encoding. it works via the codecs functions, but + # not the methods + import codecs + res = codecs.decode(b"aabb", "hex") + assert res == b"\xaa\xbb" + res = codecs.decode(u"aabb", "hex") + assert res == b"\xaa\xbb" + res = codecs.encode(b"\xaa\xbb", "hex") + assert res == b"aabb" + raises(LookupError, u"abc".encode, "hex") + def test_non_text_codec(self): + import _codecs + def search_function(encoding): + def f(input, errors="strict"): + return 52, len(input) + if encoding == 'test.mynontextenc': + return (f, f, None, None) + return None + _codecs.register(search_function) + res = _codecs.encode(u"abc", "test.mynontextenc") + assert res == 52 + res = _codecs.decode(b"abc", "test.mynontextenc") + assert res == 52 + raises(TypeError, u"abc".encode, "test.mynontextenc") + raises(TypeError, b"abc".decode, "test.mynontextenc") diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -1235,7 +1235,14 @@ a.pos, a.pos + 1) assert False, "always raises" return space.newbytes(utf8) - return encode(space, w_obj, encoding, errors) + w_retval = encode(space, w_obj, encoding, errors) + if not space.isinstance_w(w_retval, space.w_bytes): + raise oefmt(space.w_TypeError, + "'%s' encoder returned '%T' instead of 'bytes'; " + "use codecs.encode() to encode to arbitrary types", + encoding, + w_retval) + return w_retval def decode_object(space, w_obj, encoding, errors=None): @@ -1250,7 +1257,14 @@ lgt = unicodehelper.check_utf8_or_raise(space, s) return space.newutf8(s, lgt) from pypy.module._codecs.interp_codecs import decode - return decode(space, w_obj, encoding, errors) + w_retval = decode(space, w_obj, encoding, errors) + if not isinstance(w_retval, W_UnicodeObject): + raise oefmt(space.w_TypeError, + "'%s' decoder returned '%T' instead of 'str'; " + "use codecs.decode() to decode to arbitrary types", + encoding, + w_retval) + return w_retval def unicode_from_object(space, w_obj): if space.is_w(space.type(w_obj), space.w_unicode): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit