Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r46145:8949a315da3f Date: 2011-07-31 18:34 +0200 http://bitbucket.org/pypy/pypy/changeset/8949a315da3f/
Log: MultibyteIncrementalDecoder. diff --git a/pypy/module/_multibytecodec/__init__.py b/pypy/module/_multibytecodec/__init__.py --- a/pypy/module/_multibytecodec/__init__.py +++ b/pypy/module/_multibytecodec/__init__.py @@ -7,13 +7,14 @@ # for compatibility this name is obscured, and should be called # via the _codecs_*.py modules written in lib_pypy. '__getcodec': 'interp_multibytecodec.getcodec', + + 'MultibyteIncrementalDecoder': + 'interp_incremental.MultibyteIncrementalDecoder', } appleveldefs = { 'MultibyteIncrementalEncoder': 'app_multibytecodec.MultibyteIncrementalEncoder', - 'MultibyteIncrementalDecoder': - 'app_multibytecodec.MultibyteIncrementalDecoder', 'MultibyteStreamReader': 'app_multibytecodec.MultibyteStreamReader', 'MultibyteStreamWriter': diff --git a/pypy/module/_multibytecodec/interp_incremental.py b/pypy/module/_multibytecodec/interp_incremental.py new file mode 100644 --- /dev/null +++ b/pypy/module/_multibytecodec/interp_incremental.py @@ -0,0 +1,80 @@ +from pypy.rpython.lltypesystem import lltype +from pypy.module._multibytecodec import c_codecs +from pypy.module._multibytecodec.interp_multibytecodec import ( + MultibyteCodec, wrap_unicodedecodeerror, wrap_runtimeerror) +from pypy.interpreter.baseobjspace import Wrappable +from pypy.interpreter.gateway import interp2app, unwrap_spec +from pypy.interpreter.typedef import TypeDef +from pypy.module._codecs.interp_codecs import CodecState + + +class MultibyteIncrementalDecoder(Wrappable): + + def __init__(self, space, errors): + if errors is None: + errors = 'strict' + self.space = space + self.errors = errors + w_codec = space.getattr(space.wrap(self), space.wrap("codec")) + codec = space.interp_w(MultibyteCodec, w_codec) + self.codec = codec.codec + self.name = codec.name + self._initialize() + + def _initialize(self): + self.decodebuf = c_codecs.pypy_cjk_dec_new(self.codec) + self.pending = "" + + def _free(self): + self.pending = None + if self.decodebuf: + pypy_cjk_dec_free(self.decodebuf) + self.decodebuf = lltype.nullptr(DECODEBUF_P.TO) + + def __del__(self): + self._free() + + def reset_w(self): + self._free() + self._initialize() + + @unwrap_spec(object=str, final=bool) + def decode_w(self, object, final=False): + space = self.space + state = space.fromcache(CodecState) + if len(self.pending) > 0: + object = self.pending + object + try: + output = c_codecs.decodeex(self.decodebuf, object, self.errors, + state.decode_error_handler, self.name, + get_ignore_error(final)) + except c_codecs.EncodeDecodeError, e: + raise wrap_unicodedecodeerror(space, e, object, self.name) + except RuntimeError: + raise wrap_runtimeerror(space) + pos = c_codecs.pypy_cjk_dec_inbuf_consumed(self.decodebuf) + assert 0 <= pos <= len(object) + self.pending = object[pos:] + return space.wrap(output) + + +@unwrap_spec(errors="str_or_None") +def mbidecoder_new(space, w_subtype, errors=None): + r = space.allocate_instance(MultibyteIncrementalDecoder, w_subtype) + r.__init__(space, errors) + return space.wrap(r) + +MultibyteIncrementalDecoder.typedef = TypeDef( + 'MultibyteIncrementalDecoder', + __module__ = '_multibytecodec', + __new__ = interp2app(mbidecoder_new), + decode = interp2app(MultibyteIncrementalDecoder.decode_w), + reset = interp2app(MultibyteIncrementalDecoder.reset_w), + ) + + +def get_ignore_error(final): + if final: + return 0 # don't ignore any error + else: + return c_codecs.MBERR_TOOFEW diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py --- a/pypy/module/_multibytecodec/interp_multibytecodec.py +++ b/pypy/module/_multibytecodec/interp_multibytecodec.py @@ -22,17 +22,9 @@ output = c_codecs.decode(self.codec, input, errors, state.decode_error_handler, self.name) except c_codecs.EncodeDecodeError, e: - raise OperationError( - space.w_UnicodeDecodeError, - space.newtuple([ - space.wrap(self.name), - space.wrap(input), - space.wrap(e.start), - space.wrap(e.end), - space.wrap(e.reason)])) + raise wrap_unicodedecodeerror(space, e, input, self.name) except RuntimeError: - raise OperationError(space.w_RuntimeError, - space.wrap("internal codec error")) + raise wrap_runtimeerror(space) return space.newtuple([space.wrap(output), space.wrap(len(input))]) @@ -46,17 +38,9 @@ output = c_codecs.encode(self.codec, input, errors, state.encode_error_handler, self.name) except c_codecs.EncodeDecodeError, e: - raise OperationError( - space.w_UnicodeEncodeError, - space.newtuple([ - space.wrap(self.name), - space.wrap(input), - space.wrap(e.start), - space.wrap(e.end), - space.wrap(e.reason)])) + raise wrap_unicodeencodeerror(space, e, input, self.name) except RuntimeError: - raise OperationError(space.w_RuntimeError, - space.wrap("internal codec error")) + raise wrap_runtimeerror(space) return space.newtuple([space.wrap(output), space.wrap(len(input))]) @@ -78,3 +62,28 @@ raise OperationError(space.w_LookupError, space.wrap("no such codec is supported.")) return space.wrap(MultibyteCodec(name, codec)) + + +def wrap_unicodedecodeerror(space, e, input, name): + return OperationError( + space.w_UnicodeDecodeError, + space.newtuple([ + space.wrap(name), + space.wrap(input), + space.wrap(e.start), + space.wrap(e.end), + space.wrap(e.reason)])) + +def wrap_unicodeencodeerror(space, e, input, name): + raise OperationError( + space.w_UnicodeEncodeError, + space.newtuple([ + space.wrap(name), + space.wrap(input), + space.wrap(e.start), + space.wrap(e.end), + space.wrap(e.reason)])) + +def wrap_runtimeerror(space): + raise OperationError(space.w_RuntimeError, + space.wrap("internal codec error")) diff --git a/pypy/module/_multibytecodec/test/test_app_incremental.py b/pypy/module/_multibytecodec/test/test_app_incremental.py new file mode 100644 --- /dev/null +++ b/pypy/module/_multibytecodec/test/test_app_incremental.py @@ -0,0 +1,51 @@ +from pypy.conftest import gettestobjspace + + +class AppTestClasses: + def setup_class(cls): + cls.space = gettestobjspace(usemodules=['_multibytecodec']) + cls.w_IncrementalHzDecoder = cls.space.appexec([], """(): + import _codecs_cn + from _multibytecodec import MultibyteIncrementalDecoder + + class IncrementalHzDecoder(MultibyteIncrementalDecoder): + codec = _codecs_cn.getcodec('hz') + + return IncrementalHzDecoder + """) + + def test_decode_hz(self): + d = self.IncrementalHzDecoder() + r = d.decode("~{abcd~}") + assert r == u'\u5f95\u6c85' + r = d.decode("~{efgh~}") + assert r == u'\u5f50\u73b7' + for c, output in zip("!~{abcd~}xyz~{efgh", + [u'!', # ! + u'', # ~ + u'', # { + u'', # a + u'\u5f95', # b + u'', # c + u'\u6c85', # d + u'', # ~ + u'', # } + u'x', # x + u'y', # y + u'z', # z + u'', # ~ + u'', # { + u'', # e + u'\u5f50', # f + u'', # g + u'\u73b7', # h + ]): + r = d.decode(c) + assert r == output + + def test_decode_hz_final(self): + d = self.IncrementalHzDecoder() + r = d.decode("~{", True) + assert r == u'' + raises(UnicodeDecodeError, d.decode, "~", True) + raises(UnicodeDecodeError, d.decode, "~{a", True) _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit