Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r46143:f040a9a3f4fb Date: 2011-07-31 18:16 +0200 http://bitbucket.org/pypy/pypy/changeset/f040a9a3f4fb/
Log: Incremental support: keep the decodebuf around several calls to decodeex(), and don't complain when getting MBERR_TOOFEW. diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py --- a/pypy/module/_multibytecodec/c_codecs.py +++ b/pypy/module/_multibytecodec/c_codecs.py @@ -52,11 +52,13 @@ includes = ['src/cjkcodecs/multibytecodec.h'], include_dirs = [str(srcdir)], export_symbols = [ + "pypy_cjk_dec_new", "pypy_cjk_dec_init", "pypy_cjk_dec_free", "pypy_cjk_dec_chunk", "pypy_cjk_dec_outbuf", "pypy_cjk_dec_outlen", "pypy_cjk_dec_inbuf_remaining", "pypy_cjk_dec_inbuf_consumed", "pypy_cjk_dec_replace_on_error", + "pypy_cjk_enc_new", "pypy_cjk_enc_init", "pypy_cjk_enc_free", "pypy_cjk_enc_chunk", "pypy_cjk_enc_reset", "pypy_cjk_enc_outbuf", "pypy_cjk_enc_outlen", "pypy_cjk_enc_inbuf_remaining", "pypy_cjk_enc_inbuf_consumed", @@ -92,9 +94,11 @@ # Decoding DECODEBUF_P = rffi.COpaquePtr('struct pypy_cjk_dec_s', compilation_info=eci) +pypy_cjk_dec_new = llexternal('pypy_cjk_dec_new', + [MULTIBYTECODEC_P], DECODEBUF_P) pypy_cjk_dec_init = llexternal('pypy_cjk_dec_init', - [MULTIBYTECODEC_P, rffi.CCHARP, rffi.SSIZE_T], - DECODEBUF_P) + [DECODEBUF_P, rffi.CCHARP, rffi.SSIZE_T], + rffi.SSIZE_T) pypy_cjk_dec_free = llexternal('pypy_cjk_dec_free', [DECODEBUF_P], lltype.Void) pypy_cjk_dec_chunk = llexternal('pypy_cjk_dec_chunk', [DECODEBUF_P], @@ -113,25 +117,33 @@ rffi.SSIZE_T) def decode(codec, stringdata, errors="strict", errorcb=None, namecb=None): + decodebuf = pypy_cjk_dec_new(codec) + if not decodebuf: + raise MemoryError + try: + return decodeex(decodebuf, stringdata, errors, errorcb, namecb) + finally: + pypy_cjk_dec_free(decodebuf) + +def decodeex(decodebuf, stringdata, errors="strict", errorcb=None, namecb=None, + incompletepos=None): inleft = len(stringdata) inbuf = rffi.get_nonmovingbuffer(stringdata) try: - decodebuf = pypy_cjk_dec_init(codec, inbuf, inleft) - if not decodebuf: + if pypy_cjk_dec_init(decodebuf, inbuf, inleft) < 0: raise MemoryError - try: - while True: - r = pypy_cjk_dec_chunk(decodebuf) - if r == 0: - break - multibytecodec_decerror(decodebuf, r, errors, - errorcb, namecb, stringdata) - src = pypy_cjk_dec_outbuf(decodebuf) - length = pypy_cjk_dec_outlen(decodebuf) - return rffi.wcharpsize2unicode(src, length) - # - finally: - pypy_cjk_dec_free(decodebuf) + while True: + r = pypy_cjk_dec_chunk(decodebuf) + if r == 0: + break + if incompletepos is not None and r == MBERR_TOOFEW: + incompletepos[0] = pypy_cjk_dec_inbuf_consumed(decodebuf) + break + multibytecodec_decerror(decodebuf, r, errors, + errorcb, namecb, stringdata) + src = pypy_cjk_dec_outbuf(decodebuf) + length = pypy_cjk_dec_outlen(decodebuf) + return rffi.wcharpsize2unicode(src, length) # finally: rffi.free_nonmovingbuffer(stringdata, inbuf) diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py --- a/pypy/module/_multibytecodec/test/test_c_codecs.py +++ b/pypy/module/_multibytecodec/test/test_c_codecs.py @@ -2,6 +2,7 @@ from pypy.module._multibytecodec.c_codecs import getcodec, codecs from pypy.module._multibytecodec.c_codecs import decode, encode from pypy.module._multibytecodec.c_codecs import EncodeDecodeError +from pypy.module._multibytecodec import c_codecs def test_codecs_existence(): @@ -22,6 +23,51 @@ c = getcodec("hz") u = decode(c, "~{abc}") assert u == u'\u5f95\u6cef' + u = decode(c, "~{") + assert u == u'' + +def test_decodeex_hz(): + c = getcodec("hz") + decodebuf = c_codecs.pypy_cjk_dec_new(c) + u = c_codecs.decodeex(decodebuf, "~{abcd~}") + assert u == u'\u5f95\u6c85' + u = c_codecs.decodeex(decodebuf, "~{efgh~}") + assert u == u'\u5f50\u73b7' + u = c_codecs.decodeex(decodebuf, "!~{abcd~}xyz~{efgh") + assert u == u'!\u5f95\u6c85xyz\u5f50\u73b7' + c_codecs.pypy_cjk_dec_free(decodebuf) + +def test_decodeex_hz_incomplete(): + c = getcodec("hz") + decodebuf = c_codecs.pypy_cjk_dec_new(c) + buf = '' + for c, output in zip("!~{abcd~}xyz~{efgh", + [u'!', # ! + u'', # ~ + u'', # { + u'', # a + u'\u5f95', # b + u'', # c + u'\u6c85', # d + u'', # ~ + u'', # } + u'x', # x + u'y', # y + u'z', # z + u'', # ~ + u'', # { + u'', # e + u'\u5f50', # f + u'', # g + u'\u73b7', # h + ]): + buf += c + incompletepos = [len(buf)] + u = c_codecs.decodeex(decodebuf, buf, incompletepos=incompletepos) + assert u == output + buf = buf[incompletepos[0]:] + assert buf == '' + c_codecs.pypy_cjk_dec_free(decodebuf) def test_decode_hz_error(): # error diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.c b/pypy/translator/c/src/cjkcodecs/multibytecodec.c --- a/pypy/translator/c/src/cjkcodecs/multibytecodec.c +++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c @@ -3,31 +3,38 @@ #include "src/cjkcodecs/multibytecodec.h" -struct pypy_cjk_dec_s *pypy_cjk_dec_init(const MultibyteCodec *codec, - char *inbuf, Py_ssize_t inlen) +struct pypy_cjk_dec_s *pypy_cjk_dec_new(const MultibyteCodec *codec) { struct pypy_cjk_dec_s *d = malloc(sizeof(struct pypy_cjk_dec_s)); if (!d) return NULL; if (codec->decinit != NULL && codec->decinit(&d->state, codec->config) != 0) - goto errorexit; + { + free(d); + return NULL; + } + d->codec = codec; + d->outbuf_start = NULL; + return d; +} - d->codec = codec; +Py_ssize_t pypy_cjk_dec_init(struct pypy_cjk_dec_s *d, + char *inbuf, Py_ssize_t inlen) +{ d->inbuf_start = inbuf; d->inbuf = inbuf; d->inbuf_end = inbuf + inlen; - d->outbuf_start = (inlen <= (PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) ? - malloc(inlen * sizeof(Py_UNICODE)) : - NULL); - if (!d->outbuf_start) - goto errorexit; + if (d->outbuf_start == NULL) + { + d->outbuf_start = (inlen <= (PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) ? + malloc(inlen * sizeof(Py_UNICODE)) : + NULL); + if (d->outbuf_start == NULL) + return -1; + d->outbuf_end = d->outbuf_start + inlen; + } d->outbuf = d->outbuf_start; - d->outbuf_end = d->outbuf_start + inlen; - return d; - - errorexit: - free(d); - return NULL; + return 0; } void pypy_cjk_dec_free(struct pypy_cjk_dec_s *d) diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.h b/pypy/translator/c/src/cjkcodecs/multibytecodec.h --- a/pypy/translator/c/src/cjkcodecs/multibytecodec.h +++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h @@ -94,8 +94,9 @@ Py_UNICODE *outbuf_start, *outbuf, *outbuf_end; }; -struct pypy_cjk_dec_s *pypy_cjk_dec_init(const MultibyteCodec *codec, - char *inbuf, Py_ssize_t inlen); +struct pypy_cjk_dec_s *pypy_cjk_dec_new(const MultibyteCodec *codec); +Py_ssize_t pypy_cjk_dec_init(struct pypy_cjk_dec_s *d, + char *inbuf, Py_ssize_t inlen); void pypy_cjk_dec_free(struct pypy_cjk_dec_s *); Py_ssize_t pypy_cjk_dec_chunk(struct pypy_cjk_dec_s *); Py_UNICODE *pypy_cjk_dec_outbuf(struct pypy_cjk_dec_s *); _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit