Author: Ronan Lamy <ronan.l...@gmail.com> Branch: py3.5 Changeset: r88046:22a3e8d96787 Date: 2016-11-01 17:59 +0000 http://bitbucket.org/pypy/pypy/changeset/22a3e8d96787/
Log: merge heads diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -1,10 +1,11 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import we_are_translated -from rpython.rlib.rstring import UnicodeBuilder +from rpython.rlib.rstring import UnicodeBuilder, StringBuilder from rpython.rlib.runicode import code_to_unichr, MAXUNICODE from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault +from pypy.module.unicodedata import unicodedb class VersionTag(object): @@ -295,6 +296,26 @@ raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc) +def namereplace_errors(space, w_exc): + check_exception(space, w_exc) + if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): + obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object'))) + start = space.int_w(space.getattr(w_exc, space.wrap('start'))) + w_end = space.getattr(w_exc, space.wrap('end')) + end = space.int_w(w_end) + builder = StringBuilder() + pos = start + while pos < end: + oc = ord(obj[pos]) + builder.append('\\N{') + builder.append(unicodedb.name(oc)) + builder.append('}') + pos += 1 + return space.newtuple([space.newbytes(builder.build()), w_end]) + else: + raise oefmt(space.w_TypeError, + "don't know how to handle %T in error callback", w_exc) + def surrogatepass_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): @@ -382,7 +403,8 @@ "NOT_RPYTHON" state = space.fromcache(CodecState) for error in ("strict", "ignore", "replace", "xmlcharrefreplace", - "backslashreplace", "surrogateescape", "surrogatepass"): + "backslashreplace", "surrogateescape", "surrogatepass", + "namereplace"): name = error + "_errors" state.codec_error_registry[error] = space.wrap(interp2app(globals()[name])) @@ -667,7 +689,7 @@ return errorchar raise oefmt(space.w_TypeError, - "character mapping must return integer, None or unicode") + "character mapping must return integer, None or str") class Charmap_Encode: def __init__(self, space, w_mapping): @@ -700,7 +722,7 @@ return errorchar raise oefmt(space.w_TypeError, - "character mapping must return integer, None or str") + "character mapping must return integer, bytes or None, not str") @unwrap_spec(string='bufferstr', errors='str_or_None') diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -50,7 +50,7 @@ ] for s in insecure: buf = b"S" + s + b"\012p0\012." - raises (ValueError, pickle.loads, buf) + raises ((ValueError, pickle.UnpicklingError), pickle.loads, buf) def test_unicodedecodeerror(self): assert str(UnicodeDecodeError( @@ -112,7 +112,7 @@ assert charmap_decode(b'xxx\xff', 'strict', map) == ('xxx\xff', 4) exc = raises(TypeError, charmap_decode, b'\xff', "strict", {0xff: b'a'}) - assert str(exc.value) == "character mapping must return integer, None or unicode" + assert str(exc.value) == "character mapping must return integer, None or str" raises(TypeError, charmap_decode, b'\xff', "strict", {0xff: 0x110000}) assert (charmap_decode(b"\x00\x01\x02", "strict", {0: 0x10FFFF, 1: ord('b'), 2: ord('c')}) == @@ -561,9 +561,14 @@ assert b'\xff'.decode('utf-7', 'ignore') == '' assert b'\x00'.decode('unicode-internal', 'ignore') == '' - def test_backslahreplace(self): + def test_backslashreplace(self): assert 'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace') == b'a\\xac\u1234\u20ac\u8000' + def test_namereplace(self): + assert 'a\xac\u1234\u20ac\u8000'.encode('ascii', 'namereplace') == ( + b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}' + b'\\N{CJK UNIFIED IDEOGRAPH-8000}') + def test_surrogateescape(self): assert b'a\x80b'.decode('utf-8', 'surrogateescape') == 'a\udc80b' assert 'a\udc80b'.encode('utf-8', 'surrogateescape') == b'a\x80b' @@ -682,7 +687,7 @@ exc = raises(TypeError, codecs.charmap_encode, u'\xff', "replace", {0xff: 300}) assert str(exc.value) == 'character mapping must be in range(256)' exc = raises(TypeError, codecs.charmap_encode, u'\xff', "replace", {0xff: u'a'}) - assert str(exc.value) == 'character mapping must return integer, None or str' + assert str(exc.value) == 'character mapping must return integer, bytes or None, not str' raises(UnicodeError, codecs.charmap_encode, u"\xff", "replace", {0xff: None}) def test_charmap_encode_replace(self): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit