Author: Amaury Forgeot d'Arc <amaur...@gmail.com> Branch: py3.5 Changeset: r88038:d581f95a1c58 Date: 2016-11-01 17:44 +0100 http://bitbucket.org/pypy/pypy/changeset/d581f95a1c58/
Log: Implement the codec "namereplace" error handler. u'\uabcd' ==> b'\N{SOME NAME}' diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -1,10 +1,11 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import we_are_translated -from rpython.rlib.rstring import UnicodeBuilder +from rpython.rlib.rstring import UnicodeBuilder, StringBuilder from rpython.rlib.runicode import code_to_unichr, MAXUNICODE from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault +from pypy.module.unicodedata import unicodedb class VersionTag(object): @@ -295,6 +296,26 @@ raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc) +def namereplace_errors(space, w_exc): + check_exception(space, w_exc) + if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): + obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object'))) + start = space.int_w(space.getattr(w_exc, space.wrap('start'))) + w_end = space.getattr(w_exc, space.wrap('end')) + end = space.int_w(w_end) + builder = StringBuilder() + pos = start + while pos < end: + oc = ord(obj[pos]) + builder.append('\\N{') + builder.append(unicodedb.name(oc)) + builder.append('}') + pos += 1 + return space.newtuple([space.newbytes(builder.build()), w_end]) + else: + raise oefmt(space.w_TypeError, + "don't know how to handle %T in error callback", w_exc) + def surrogatepass_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): @@ -382,7 +403,8 @@ "NOT_RPYTHON" state = space.fromcache(CodecState) for error in ("strict", "ignore", "replace", "xmlcharrefreplace", - "backslashreplace", "surrogateescape", "surrogatepass"): + "backslashreplace", "surrogateescape", "surrogatepass", + "namereplace"): name = error + "_errors" state.codec_error_registry[error] = space.wrap(interp2app(globals()[name])) diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -561,9 +561,14 @@ assert b'\xff'.decode('utf-7', 'ignore') == '' assert b'\x00'.decode('unicode-internal', 'ignore') == '' - def test_backslahreplace(self): + def test_backslashreplace(self): assert 'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace') == b'a\\xac\u1234\u20ac\u8000' + def test_namereplace(self): + assert 'a\xac\u1234\u20ac\u8000'.encode('ascii', 'namereplace') == ( + b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}' + b'\\N{CJK UNIFIED IDEOGRAPH-8000}') + def test_surrogateescape(self): assert b'a\x80b'.decode('utf-8', 'surrogateescape') == 'a\udc80b' assert 'a\udc80b'.encode('utf-8', 'surrogateescape') == b'a\x80b' _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit