Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r44726:45ef0fa73152 Date: 2011-06-06 09:53 +0200 http://bitbucket.org/pypy/pypy/changeset/45ef0fa73152/
Log: Be stricter: like CPython, check that the encoding error handlers really return a unicode, not a string. Fix a few built-in error handlers to return a unicode. diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -32,7 +32,10 @@ space.wrap(reason)) w_res = space.call_function(w_errorhandler, w_exc) if (not space.is_true(space.isinstance(w_res, space.w_tuple)) - or space.len_w(w_res) != 2): + or space.len_w(w_res) != 2 + or not space.is_true(space.isinstance( + space.getitem(w_res, space.wrap(0)), + space.w_unicode))): if decode: msg = ("decoding error handler must return " "(unicode, int) tuple, not %s") @@ -172,15 +175,7 @@ def ignore_errors(space, w_exc): check_exception(space, w_exc) w_end = space.getattr(w_exc, space.wrap('end')) - if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): - return space.newtuple([space.wrap(''), w_end]) - elif (space.isinstance_w(w_exc, space.w_UnicodeDecodeError) or - space.isinstance_w(w_exc, space.w_UnicodeTranslateError)): - return space.newtuple([space.wrap(u''), w_end]) - else: - typename = space.type(w_exc).getname(space, '?') - raise operationerrfmt(space.w_TypeError, - "don't know how to handle %s in error callback", typename) + return space.newtuple([space.wrap(u''), w_end]) def replace_errors(space, w_exc): check_exception(space, w_exc) @@ -188,7 +183,7 @@ w_end = space.getattr(w_exc, space.wrap('end')) size = space.int_w(w_end) - space.int_w(w_start) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): - text = '?' * size + text = u'?' * size return space.newtuple([space.wrap(text), w_end]) elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError): text = u'\ufffd' diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -604,3 +604,11 @@ assert u'caf\xe9'.encode('mbcs') == 'caf\xe9' assert u'\u040a'.encode('mbcs') == '?' # some cyrillic letter assert 'cafx\e9'.decode('mbcs') == u'cafx\e9' + + def test_bad_handler_string_result(self): + import _codecs + def f(exc): + return ('foo', exc.end) + _codecs.register_error("test.test_codecs_not_a_string", f) + raises(TypeError, u'\u1234'.encode, 'ascii', + 'test.test_codecs_not_a_string') diff --git a/pypy/module/_multibytecodec/test/test_app_codecs.py b/pypy/module/_multibytecodec/test/test_app_codecs.py --- a/pypy/module/_multibytecodec/test/test_app_codecs.py +++ b/pypy/module/_multibytecodec/test/test_app_codecs.py @@ -63,7 +63,7 @@ import codecs import sys codecs.register_error("test.test_decode_custom_error_handler_overflow", - lambda e: ('', sys.maxint + 1)) + lambda e: (u'', sys.maxint + 1)) raises(IndexError, "abc\xDD".decode, "hz", "test.test_decode_custom_error_handler_overflow") def test_encode_hz(self): _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit