Author: Philip Jenvey <[email protected]>
Branch: py3k
Changeset: r63601:ce157ecca710
Date: 2013-04-24 18:03 -0700
http://bitbucket.org/pypy/pypy/changeset/ce157ecca710/
Log: cpython issue850997: support some basic error handling in the mbcs
codec. the encoder now supports strict/replace, decoder
strict/ignore
diff --git a/pypy/module/_codecs/interp_codecs.py
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -538,8 +538,30 @@
make_decoder_wrapper(decoders)
if hasattr(runicode, 'str_decode_mbcs'):
- make_encoder_wrapper('mbcs_encode')
- make_decoder_wrapper('mbcs_decode')
+ # mbcs functions are not regular, because we have to pass
+ # "force_ignore/replace=False"
+ @unwrap_spec(uni=unicode, errors='str_or_None')
+ def mbcs_encode(space, uni, errors="strict"):
+ if errors is None:
+ errors = 'strict'
+ state = space.fromcache(CodecState)
+ result = runicode.unicode_encode_mbcs(
+ uni, len(uni), errors, state.encode_error_handler,
+ force_replace=False)
+ return space.newtuple([space.wrapbytes(result), space.wrap(len(uni))])
+
+ @unwrap_spec(string='bufferstr', errors='str_or_None',
+ w_final=WrappedDefault(False))
+ def mbcs_decode(space, string, errors="strict", w_final=None):
+ if errors is None:
+ errors = 'strict'
+ final = space.is_true(w_final)
+ state = space.fromcache(CodecState)
+ result, consumed = runicode.str_decode_mbcs(
+ string, len(string), errors,
+ final, state.decode_error_handler,
+ force_ignore=False)
+ return space.newtuple([space.wrap(result), space.wrap(consumed)])
# utf-8 functions are not regular, because we have to pass
# "allow_surrogates=False"
diff --git a/pypy/module/_codecs/test/test_codecs.py
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -668,5 +668,7 @@
return
assert 'test'.encode('mbcs') == b'test'
assert 'caf\xe9'.encode('mbcs') == b'caf\xe9'
- assert '\u040a'.encode('mbcs') == b'?' # some cyrillic letter
+ raises(UnicodeEncodeError, '\u040a'.encode, 'mbcs')
+ raises(UnicodeEncodeError,
+ "-\u5171\u0141\u2661\u0363\uDC80".encode, 'mbcs')
assert b'cafx\e9'.decode('mbcs') == 'cafx\e9'
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit