Author: Philip Jenvey <[email protected]>
Branch: py3k
Changeset: r63601:ce157ecca710
Date: 2013-04-24 18:03 -0700
http://bitbucket.org/pypy/pypy/changeset/ce157ecca710/

Log:    cpython issue850997: support some basic error handling in the mbcs
        codec. the encoder now supports strict/replace, decoder
        strict/ignore

diff --git a/pypy/module/_codecs/interp_codecs.py 
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -538,8 +538,30 @@
     make_decoder_wrapper(decoders)
 
 if hasattr(runicode, 'str_decode_mbcs'):
-    make_encoder_wrapper('mbcs_encode')
-    make_decoder_wrapper('mbcs_decode')
+    # mbcs functions are not regular, because we have to pass
+    # "force_ignore/replace=False"
+    @unwrap_spec(uni=unicode, errors='str_or_None')
+    def mbcs_encode(space, uni, errors="strict"):
+        if errors is None:
+            errors = 'strict'
+        state = space.fromcache(CodecState)
+        result = runicode.unicode_encode_mbcs(
+            uni, len(uni), errors, state.encode_error_handler,
+            force_replace=False)
+        return space.newtuple([space.wrapbytes(result), space.wrap(len(uni))])
+
+    @unwrap_spec(string='bufferstr', errors='str_or_None',
+                 w_final=WrappedDefault(False))
+    def mbcs_decode(space, string, errors="strict", w_final=None):
+        if errors is None:
+            errors = 'strict'
+        final = space.is_true(w_final)
+        state = space.fromcache(CodecState)
+        result, consumed = runicode.str_decode_mbcs(
+            string, len(string), errors,
+            final, state.decode_error_handler,
+            force_ignore=False)
+        return space.newtuple([space.wrap(result), space.wrap(consumed)])
 
 # utf-8 functions are not regular, because we have to pass
 # "allow_surrogates=False"
diff --git a/pypy/module/_codecs/test/test_codecs.py 
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -668,5 +668,7 @@
             return
         assert 'test'.encode('mbcs') == b'test'
         assert 'caf\xe9'.encode('mbcs') == b'caf\xe9'
-        assert '\u040a'.encode('mbcs') == b'?' # some cyrillic letter
+        raises(UnicodeEncodeError, '\u040a'.encode, 'mbcs')
+        raises(UnicodeEncodeError,
+               "-\u5171\u0141\u2661\u0363\uDC80".encode, 'mbcs')
         assert b'cafx\e9'.decode('mbcs') == 'cafx\e9'
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to