[pypy-commit] pypy py3.5: Implement the codec "namereplace" error handler.

amauryfa Tue, 01 Nov 2016 09:59:24 -0700

Author: Amaury Forgeot d'Arc <[email protected]>
Branch: py3.5
Changeset: r88038:d581f95a1c58
Date: 2016-11-01 17:44 +0100
http://bitbucket.org/pypy/pypy/changeset/d581f95a1c58/


Log:    Implement the codec "namereplace" error handler. u'\uabcd' ==>
        b'\N{SOME NAME}'

diff --git a/pypy/module/_codecs/interp_codecs.py 
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -1,10 +1,11 @@
 from rpython.rlib import jit
 from rpython.rlib.objectmodel import we_are_translated
-from rpython.rlib.rstring import UnicodeBuilder
+from rpython.rlib.rstring import UnicodeBuilder, StringBuilder
 from rpython.rlib.runicode import code_to_unichr, MAXUNICODE
 
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
+from pypy.module.unicodedata import unicodedb
 
 
 class VersionTag(object):
@@ -295,6 +296,26 @@
         raise oefmt(space.w_TypeError,
                     "don't know how to handle %T in error callback", w_exc)
 
+def namereplace_errors(space, w_exc):
+    check_exception(space, w_exc)
+    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
+        obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object')))
+        start = space.int_w(space.getattr(w_exc, space.wrap('start')))
+        w_end = space.getattr(w_exc, space.wrap('end'))
+        end = space.int_w(w_end)
+        builder = StringBuilder()
+        pos = start
+        while pos < end:
+            oc = ord(obj[pos])
+            builder.append('\\N{')
+            builder.append(unicodedb.name(oc))
+            builder.append('}')
+            pos += 1
+        return space.newtuple([space.newbytes(builder.build()), w_end])
+    else:
+        raise oefmt(space.w_TypeError,
+                    "don't know how to handle %T in error callback", w_exc)
+
 def surrogatepass_errors(space, w_exc):
     check_exception(space, w_exc)
     if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
@@ -382,7 +403,8 @@
     "NOT_RPYTHON"
     state = space.fromcache(CodecState)
     for error in ("strict", "ignore", "replace", "xmlcharrefreplace",
-                  "backslashreplace", "surrogateescape", "surrogatepass"):
+                  "backslashreplace", "surrogateescape", "surrogatepass",
+                  "namereplace"):
         name = error + "_errors"
         state.codec_error_registry[error] = 
space.wrap(interp2app(globals()[name]))
 
diff --git a/pypy/module/_codecs/test/test_codecs.py 
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -561,9 +561,14 @@
         assert b'\xff'.decode('utf-7', 'ignore') == ''
         assert b'\x00'.decode('unicode-internal', 'ignore') == ''
 
-    def test_backslahreplace(self):
+    def test_backslashreplace(self):
         assert 'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace') 
== b'a\\xac\u1234\u20ac\u8000'
 
+    def test_namereplace(self):
+        assert 'a\xac\u1234\u20ac\u8000'.encode('ascii', 'namereplace') == (
+            b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
+            b'\\N{CJK UNIFIED IDEOGRAPH-8000}')
+
     def test_surrogateescape(self):
         assert b'a\x80b'.decode('utf-8', 'surrogateescape') == 'a\udc80b'
         assert 'a\udc80b'.encode('utf-8', 'surrogateescape') == b'a\x80b'
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy py3.5: Implement the codec "namereplace" error handler.

Reply via email to