Author: Amaury Forgeot d'Arc <amaur...@gmail.com> Branch: merge-2.7.2 Changeset: r51668:0c697ef6b87f Date: 2012-01-22 22:32 +0100 http://bitbucket.org/pypy/pypy/changeset/0c697ef6b87f/
Log: CPython Issue #12100: Don't reset incremental encoders of CJK codecs at each call to encode(). diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py --- a/pypy/module/_multibytecodec/c_codecs.py +++ b/pypy/module/_multibytecodec/c_codecs.py @@ -230,14 +230,14 @@ if ignore_error == 0: flags = MBENC_FLUSH | MBENC_RESET else: - flags = MBENC_RESET + flags = 0 while True: r = pypy_cjk_enc_chunk(encodebuf, flags) if r == 0 or r == ignore_error: break multibytecodec_encerror(encodebuf, r, errors, errorcb, namecb, unicodedata) - while True: + while flags & MBENC_RESET: r = pypy_cjk_enc_reset(encodebuf) if r == 0: break diff --git a/pypy/module/_multibytecodec/test/test_app_incremental.py b/pypy/module/_multibytecodec/test/test_app_incremental.py --- a/pypy/module/_multibytecodec/test/test_app_incremental.py +++ b/pypy/module/_multibytecodec/test/test_app_incremental.py @@ -129,6 +129,15 @@ r = e.encode(u"xyz\u5f95\u6c85") assert r == 'xyz~{abcd~}' + def test_encode_hz_noreset(self): + text = (u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' + u'Bye.') + out = '' + e = self.IncrementalHzEncoder() + for c in text: + out += e.encode(c) + assert out == b'~{<:Ky2;S{#,NpJ)l6HK!#~}Bye.' + def test_encode_hz_error(self): e = self.IncrementalHzEncoder() raises(UnicodeEncodeError, e.encode, u"\u4321", True) diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.c b/pypy/translator/c/src/cjkcodecs/multibytecodec.c --- a/pypy/translator/c/src/cjkcodecs/multibytecodec.c +++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c @@ -187,7 +187,7 @@ Py_ssize_t r; Py_ssize_t inleft = (Py_ssize_t)(d->inbuf_end - d->inbuf); Py_ssize_t outleft = (Py_ssize_t)(d->outbuf_end - d->outbuf); - if (inleft == 0) + if (inleft == 0 && !(flags & MBENC_RESET)) return 0; r = d->codec->encode(&d->state, d->codec->config, &d->inbuf, inleft, &d->outbuf, outleft, flags); diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.h b/pypy/translator/c/src/cjkcodecs/multibytecodec.h --- a/pypy/translator/c/src/cjkcodecs/multibytecodec.h +++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h @@ -84,6 +84,7 @@ #define MBERR_NOMEMORY (-4) /* out of memory */ #define MBENC_FLUSH 0x0001 /* encode all characters encodable */ +#define MBENC_RESET 0x0002 /* reset after an encoding session */ #define MBENC_MAX MBENC_FLUSH _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit