Author: Amaury Forgeot d'Arc <[email protected]>
Branch: py3.5
Changeset: r88038:d581f95a1c58
Date: 2016-11-01 17:44 +0100
http://bitbucket.org/pypy/pypy/changeset/d581f95a1c58/
Log: Implement the codec "namereplace" error handler. u'\uabcd' ==>
b'\N{SOME NAME}'
diff --git a/pypy/module/_codecs/interp_codecs.py
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -1,10 +1,11 @@
from rpython.rlib import jit
from rpython.rlib.objectmodel import we_are_translated
-from rpython.rlib.rstring import UnicodeBuilder
+from rpython.rlib.rstring import UnicodeBuilder, StringBuilder
from rpython.rlib.runicode import code_to_unichr, MAXUNICODE
from pypy.interpreter.error import OperationError, oefmt
from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
+from pypy.module.unicodedata import unicodedb
class VersionTag(object):
@@ -295,6 +296,26 @@
raise oefmt(space.w_TypeError,
"don't know how to handle %T in error callback", w_exc)
+def namereplace_errors(space, w_exc):
+ check_exception(space, w_exc)
+ if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
+ obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object')))
+ start = space.int_w(space.getattr(w_exc, space.wrap('start')))
+ w_end = space.getattr(w_exc, space.wrap('end'))
+ end = space.int_w(w_end)
+ builder = StringBuilder()
+ pos = start
+ while pos < end:
+ oc = ord(obj[pos])
+ builder.append('\\N{')
+ builder.append(unicodedb.name(oc))
+ builder.append('}')
+ pos += 1
+ return space.newtuple([space.newbytes(builder.build()), w_end])
+ else:
+ raise oefmt(space.w_TypeError,
+ "don't know how to handle %T in error callback", w_exc)
+
def surrogatepass_errors(space, w_exc):
check_exception(space, w_exc)
if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
@@ -382,7 +403,8 @@
"NOT_RPYTHON"
state = space.fromcache(CodecState)
for error in ("strict", "ignore", "replace", "xmlcharrefreplace",
- "backslashreplace", "surrogateescape", "surrogatepass"):
+ "backslashreplace", "surrogateescape", "surrogatepass",
+ "namereplace"):
name = error + "_errors"
state.codec_error_registry[error] =
space.wrap(interp2app(globals()[name]))
diff --git a/pypy/module/_codecs/test/test_codecs.py
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -561,9 +561,14 @@
assert b'\xff'.decode('utf-7', 'ignore') == ''
assert b'\x00'.decode('unicode-internal', 'ignore') == ''
- def test_backslahreplace(self):
+ def test_backslashreplace(self):
assert 'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace')
== b'a\\xac\u1234\u20ac\u8000'
+ def test_namereplace(self):
+ assert 'a\xac\u1234\u20ac\u8000'.encode('ascii', 'namereplace') == (
+ b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
+ b'\\N{CJK UNIFIED IDEOGRAPH-8000}')
+
def test_surrogateescape(self):
assert b'a\x80b'.decode('utf-8', 'surrogateescape') == 'a\udc80b'
assert 'a\udc80b'.encode('utf-8', 'surrogateescape') == b'a\x80b'
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit