Author: Armin Rigo <ar...@tunes.org>
Branch: 
Changeset: r46143:f040a9a3f4fb
Date: 2011-07-31 18:16 +0200
http://bitbucket.org/pypy/pypy/changeset/f040a9a3f4fb/

Log:    Incremental support: keep the decodebuf around several calls to
        decodeex(), and don't complain when getting MBERR_TOOFEW.

diff --git a/pypy/module/_multibytecodec/c_codecs.py 
b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -52,11 +52,13 @@
     includes = ['src/cjkcodecs/multibytecodec.h'],
     include_dirs = [str(srcdir)],
     export_symbols = [
+        "pypy_cjk_dec_new",
         "pypy_cjk_dec_init", "pypy_cjk_dec_free", "pypy_cjk_dec_chunk",
         "pypy_cjk_dec_outbuf", "pypy_cjk_dec_outlen",
         "pypy_cjk_dec_inbuf_remaining", "pypy_cjk_dec_inbuf_consumed",
         "pypy_cjk_dec_replace_on_error",
 
+        "pypy_cjk_enc_new",
         "pypy_cjk_enc_init", "pypy_cjk_enc_free", "pypy_cjk_enc_chunk",
         "pypy_cjk_enc_reset", "pypy_cjk_enc_outbuf", "pypy_cjk_enc_outlen",
         "pypy_cjk_enc_inbuf_remaining", "pypy_cjk_enc_inbuf_consumed",
@@ -92,9 +94,11 @@
 # Decoding
 
 DECODEBUF_P = rffi.COpaquePtr('struct pypy_cjk_dec_s', compilation_info=eci)
+pypy_cjk_dec_new = llexternal('pypy_cjk_dec_new',
+                              [MULTIBYTECODEC_P], DECODEBUF_P)
 pypy_cjk_dec_init = llexternal('pypy_cjk_dec_init',
-                               [MULTIBYTECODEC_P, rffi.CCHARP, rffi.SSIZE_T],
-                               DECODEBUF_P)
+                               [DECODEBUF_P, rffi.CCHARP, rffi.SSIZE_T],
+                               rffi.SSIZE_T)
 pypy_cjk_dec_free = llexternal('pypy_cjk_dec_free', [DECODEBUF_P],
                                lltype.Void)
 pypy_cjk_dec_chunk = llexternal('pypy_cjk_dec_chunk', [DECODEBUF_P],
@@ -113,25 +117,33 @@
                                            rffi.SSIZE_T)
 
 def decode(codec, stringdata, errors="strict", errorcb=None, namecb=None):
+    decodebuf = pypy_cjk_dec_new(codec)
+    if not decodebuf:
+        raise MemoryError
+    try:
+        return decodeex(decodebuf, stringdata, errors, errorcb, namecb)
+    finally:
+        pypy_cjk_dec_free(decodebuf)
+
+def decodeex(decodebuf, stringdata, errors="strict", errorcb=None, namecb=None,
+             incompletepos=None):
     inleft = len(stringdata)
     inbuf = rffi.get_nonmovingbuffer(stringdata)
     try:
-        decodebuf = pypy_cjk_dec_init(codec, inbuf, inleft)
-        if not decodebuf:
+        if pypy_cjk_dec_init(decodebuf, inbuf, inleft) < 0:
             raise MemoryError
-        try:
-            while True:
-                r = pypy_cjk_dec_chunk(decodebuf)
-                if r == 0:
-                    break
-                multibytecodec_decerror(decodebuf, r, errors,
-                                        errorcb, namecb, stringdata)
-            src = pypy_cjk_dec_outbuf(decodebuf)
-            length = pypy_cjk_dec_outlen(decodebuf)
-            return rffi.wcharpsize2unicode(src, length)
-        #
-        finally:
-            pypy_cjk_dec_free(decodebuf)
+        while True:
+            r = pypy_cjk_dec_chunk(decodebuf)
+            if r == 0:
+                break
+            if incompletepos is not None and r == MBERR_TOOFEW:
+                incompletepos[0] = pypy_cjk_dec_inbuf_consumed(decodebuf)
+                break
+            multibytecodec_decerror(decodebuf, r, errors,
+                                    errorcb, namecb, stringdata)
+        src = pypy_cjk_dec_outbuf(decodebuf)
+        length = pypy_cjk_dec_outlen(decodebuf)
+        return rffi.wcharpsize2unicode(src, length)
     #
     finally:
         rffi.free_nonmovingbuffer(stringdata, inbuf)
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py 
b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -2,6 +2,7 @@
 from pypy.module._multibytecodec.c_codecs import getcodec, codecs
 from pypy.module._multibytecodec.c_codecs import decode, encode
 from pypy.module._multibytecodec.c_codecs import EncodeDecodeError
+from pypy.module._multibytecodec import c_codecs
 
 
 def test_codecs_existence():
@@ -22,6 +23,51 @@
     c = getcodec("hz")
     u = decode(c, "~{abc}")
     assert u == u'\u5f95\u6cef'
+    u = decode(c, "~{")
+    assert u == u''
+
+def test_decodeex_hz():
+    c = getcodec("hz")
+    decodebuf = c_codecs.pypy_cjk_dec_new(c)
+    u = c_codecs.decodeex(decodebuf, "~{abcd~}")
+    assert u == u'\u5f95\u6c85'
+    u = c_codecs.decodeex(decodebuf, "~{efgh~}")
+    assert u == u'\u5f50\u73b7'
+    u = c_codecs.decodeex(decodebuf, "!~{abcd~}xyz~{efgh")
+    assert u == u'!\u5f95\u6c85xyz\u5f50\u73b7'
+    c_codecs.pypy_cjk_dec_free(decodebuf)
+
+def test_decodeex_hz_incomplete():
+    c = getcodec("hz")
+    decodebuf = c_codecs.pypy_cjk_dec_new(c)
+    buf = ''
+    for c, output in zip("!~{abcd~}xyz~{efgh",
+          [u'!',  # !
+           u'',   # ~
+           u'',   # {
+           u'',   # a
+           u'\u5f95',   # b
+           u'',   # c
+           u'\u6c85',   # d
+           u'',   # ~
+           u'',   # }
+           u'x',  # x
+           u'y',  # y
+           u'z',  # z
+           u'',   # ~
+           u'',   # {
+           u'',   # e
+           u'\u5f50',   # f
+           u'',   # g
+           u'\u73b7',   # h
+           ]):
+        buf += c
+        incompletepos = [len(buf)]
+        u = c_codecs.decodeex(decodebuf, buf, incompletepos=incompletepos)
+        assert u == output
+        buf = buf[incompletepos[0]:]
+    assert buf == ''
+    c_codecs.pypy_cjk_dec_free(decodebuf)
 
 def test_decode_hz_error():
     # error
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.c 
b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.c
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
@@ -3,31 +3,38 @@
 #include "src/cjkcodecs/multibytecodec.h"
 
 
-struct pypy_cjk_dec_s *pypy_cjk_dec_init(const MultibyteCodec *codec,
-                                         char *inbuf, Py_ssize_t inlen)
+struct pypy_cjk_dec_s *pypy_cjk_dec_new(const MultibyteCodec *codec)
 {
   struct pypy_cjk_dec_s *d = malloc(sizeof(struct pypy_cjk_dec_s));
   if (!d)
     return NULL;
   if (codec->decinit != NULL && codec->decinit(&d->state, codec->config) != 0)
-    goto errorexit;
+    {
+      free(d);
+      return NULL;
+    }
+  d->codec = codec;
+  d->outbuf_start = NULL;
+  return d;
+}
 
-  d->codec = codec;
+Py_ssize_t pypy_cjk_dec_init(struct pypy_cjk_dec_s *d,
+                             char *inbuf, Py_ssize_t inlen)
+{
   d->inbuf_start = inbuf;
   d->inbuf = inbuf;
   d->inbuf_end = inbuf + inlen;
-  d->outbuf_start = (inlen <= (PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) ?
-                     malloc(inlen * sizeof(Py_UNICODE)) :
-                     NULL);
-  if (!d->outbuf_start)
-    goto errorexit;
+  if (d->outbuf_start == NULL)
+    {
+      d->outbuf_start = (inlen <= (PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) ?
+                         malloc(inlen * sizeof(Py_UNICODE)) :
+                         NULL);
+      if (d->outbuf_start == NULL)
+        return -1;
+      d->outbuf_end = d->outbuf_start + inlen;
+    }
   d->outbuf = d->outbuf_start;
-  d->outbuf_end = d->outbuf_start + inlen;
-  return d;
-
- errorexit:
-  free(d);
-  return NULL;
+  return 0;
 }
 
 void pypy_cjk_dec_free(struct pypy_cjk_dec_s *d)
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.h 
b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.h
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
@@ -94,8 +94,9 @@
   Py_UNICODE *outbuf_start, *outbuf, *outbuf_end;
 };
 
-struct pypy_cjk_dec_s *pypy_cjk_dec_init(const MultibyteCodec *codec,
-                                         char *inbuf, Py_ssize_t inlen);
+struct pypy_cjk_dec_s *pypy_cjk_dec_new(const MultibyteCodec *codec);
+Py_ssize_t pypy_cjk_dec_init(struct pypy_cjk_dec_s *d,
+                             char *inbuf, Py_ssize_t inlen);
 void pypy_cjk_dec_free(struct pypy_cjk_dec_s *);
 Py_ssize_t pypy_cjk_dec_chunk(struct pypy_cjk_dec_s *);
 Py_UNICODE *pypy_cjk_dec_outbuf(struct pypy_cjk_dec_s *);
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to