Author: Armin Rigo <[email protected]>
Branch: 
Changeset: r44055:b3db989ddedc
Date: 2011-05-10 18:13 +0200
http://bitbucket.org/pypy/pypy/changeset/b3db989ddedc/

Log:    In-progress: the very first test passes.

diff --git a/pypy/module/_multibytecodec/c_codecs.py 
b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -1,5 +1,8 @@
-import py
-from pypy.rpython.lltypesystem import lltype, rffi
+import py, sys
+from pypy.rpython.lltypesystem import lltype, llmemory, rffi
+from pypy.rpython.lltypesystem.rstr import UNICODE
+from pypy.rpython.annlowlevel import hlunicode
+from pypy.rlib.objectmodel import keepalive_until_here
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 from pypy.tool.autopath import pypydir
 
@@ -14,11 +17,13 @@
         srcdir.join('_codecs_jp.c'),
         srcdir.join('_codecs_kr.c'),
         srcdir.join('_codecs_tw.c'),
+        srcdir.join('multibytecodec.c'),
     ],
 )
 
 
-MULTIBYTECODEC_PTR = rffi.VOIDP
+MULTIBYTECODEC_P = rffi.COpaquePtr('struct MultibyteCodec_s',
+                                   compilation_info=eci)
 
 codecs = [
     # _codecs_cn
@@ -42,17 +47,77 @@
     'big5', 'cp950',
     ]
 
+def llexternal(*args, **kwds):
+    kwds.setdefault('compilation_info', eci)
+    kwds.setdefault('sandboxsafe', True)
+    kwds.setdefault('_nowrapper', True)
+    return rffi.llexternal(*args, **kwds)
+
 def getter_for(name):
-    return rffi.llexternal('pypy_cjkcodec_%s' % name, [], MULTIBYTECODEC_PTR,
-                           compilation_info=eci, sandboxsafe=True,
-                           _nowrapper=True)
+    return llexternal('pypy_cjkcodec_%s' % name, [], MULTIBYTECODEC_P)
 
 _codecs_getters = dict([(name, getter_for(name)) for name in codecs])
+assert len(_codecs_getters) == len(codecs)
 
 def getcodec(name):
     try:
         getter = _codecs_getters[name]
     except KeyError:
-        return lltype.nullptr(MULTIBYTECODEC_PTR.TO)
+        return lltype.nullptr(MULTIBYTECODEC_P.TO)
     else:
         return getter()
+
+# ____________________________________________________________
+
+DECODEBUF_P = rffi.COpaquePtr('struct pypy_cjk_dec_s', compilation_info=eci)
+pypy_cjk_dec_init = llexternal('pypy_cjk_dec_init',
+                               [MULTIBYTECODEC_P, rffi.CCHARP, rffi.SSIZE_T],
+                               DECODEBUF_P)
+pypy_cjk_dec_free = llexternal('pypy_cjk_dec_free', [DECODEBUF_P],
+                               lltype.Void)
+pypy_cjk_dec_chunk = llexternal('pypy_cjk_dec_chunk', [DECODEBUF_P],
+                                lltype.Signed)
+pypy_cjk_dec_outbuf = llexternal('pypy_cjk_dec_outbuf', [DECODEBUF_P],
+                                 rffi.CWCHARP)
+pypy_cjk_dec_outlen = llexternal('pypy_cjk_dec_outlen', [DECODEBUF_P],
+                                 rffi.SSIZE_T)
+
+def decode(codec, stringdata):
+    inleft = len(stringdata)
+    if inleft > sys.maxint // 4:
+        raise MemoryError
+    inbuf = rffi.get_nonmovingbuffer(stringdata)
+    try:
+        decodebuf = pypy_cjk_dec_init(codec, inbuf, inleft)
+        if not decodebuf:
+            raise MemoryError
+        try:
+            while True:
+                r = pypy_cjk_dec_chunk(decodebuf)
+                if r == 0:
+                    break
+                multibytecodec_decerror(xxx)
+            src = pypy_cjk_dec_outbuf(decodebuf)
+            length = pypy_cjk_dec_outlen(decodebuf)
+            return unicode_from_raw(src, length)
+        #
+        finally:
+            pypy_cjk_dec_free(decodebuf)
+    #
+    finally:
+        rffi.free_nonmovingbuffer(stringdata, inbuf)
+
+# ____________________________________________________________
+
+def unicode_from_raw(src, length):
+    result = lltype.malloc(UNICODE, length)
+    try:
+        uni_chars_offset = (rffi.offsetof(UNICODE, 'chars') + \
+                            rffi.itemoffsetof(UNICODE.chars, 0))
+        dest = rffi.cast_ptr_to_adr(result) + uni_chars_offset
+        src = rffi.cast_ptr_to_adr(src) + rffi.itemoffsetof(rffi.CWCHARP.TO)
+        rffi.raw_memcopy(src, dest,
+                         llmemory.sizeof(lltype.UniChar) * length)
+        return hlunicode(result)
+    finally:
+        keepalive_until_here(result)
diff --git a/pypy/module/_multibytecodec/cjkcodecs/cjkcodecs.h 
b/pypy/module/_multibytecodec/cjkcodecs/cjkcodecs.h
--- a/pypy/module/_multibytecodec/cjkcodecs/cjkcodecs.h
+++ b/pypy/module/_multibytecodec/cjkcodecs/cjkcodecs.h
@@ -209,12 +209,12 @@
 #define END_MAPPINGS_LIST /* empty */
 
 #define BEGIN_CODECS_LIST /* empty */
-#define _CODEC(name)                            \
-  const MultibyteCodec _pypy_cjkcodec_##name;   \
-  void *pypy_cjkcodec_##name(void) {            \
-    return (void *)&_pypy_cjkcodec_##name;      \
-  }                                             \
-  const MultibyteCodec _pypy_cjkcodec_##name
+#define _CODEC(name)                                    \
+  static const MultibyteCodec _pypy_cjkcodec_##name;    \
+  const MultibyteCodec *pypy_cjkcodec_##name(void) {    \
+    return &_pypy_cjkcodec_##name;                      \
+  }                                                     \
+  static const MultibyteCodec _pypy_cjkcodec_##name
 #define _STATEFUL_METHODS(enc)          \
     enc##_encode,                       \
     enc##_encode_init,                  \
diff --git a/pypy/module/_multibytecodec/cjkcodecs/multibytecodec.c 
b/pypy/module/_multibytecodec/cjkcodecs/multibytecodec.c
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/cjkcodecs/multibytecodec.c
@@ -0,0 +1,53 @@
+#include <stdlib.h>
+#include "multibytecodec.h"
+
+
+struct pypy_cjk_dec_s *pypy_cjk_dec_init(const MultibyteCodec *codec,
+                                         char *inbuf, Py_ssize_t inlen)
+{
+  struct pypy_cjk_dec_s *d = malloc(sizeof(struct pypy_cjk_dec_s));
+  if (!d)
+    return NULL;
+  if (codec->decinit != NULL && codec->decinit(&d->state, codec->config) != 0)
+    goto errorexit;
+
+  d->codec = codec;
+  d->inbuf = inbuf;
+  d->inbuf_end = inbuf + inlen;
+  d->outbuf_start = malloc(inlen * sizeof(Py_UNICODE));
+  if (!d->outbuf_start)
+    goto errorexit;
+  d->outbuf = d->outbuf_start;
+  d->outbuf_end = d->outbuf_start + inlen;
+  return d;
+
+ errorexit:
+  free(d);
+  return NULL;
+}
+
+void pypy_cjk_dec_free(struct pypy_cjk_dec_s *d)
+{
+  free(d->outbuf_start);
+  free(d);
+}
+
+long pypy_cjk_dec_chunk(struct pypy_cjk_dec_s *d)
+{
+  Py_ssize_t inleft = (Py_ssize_t)(d->inbuf_end - d->inbuf);
+  Py_ssize_t outleft = (Py_ssize_t)(d->outbuf_end - d->outbuf);
+  if (inleft == 0)
+    return 0;
+  return d->codec->decode(&d->state, d->codec->config,
+                          &d->inbuf, inleft, &d->outbuf, outleft);
+}
+
+Py_UNICODE *pypy_cjk_dec_outbuf(struct pypy_cjk_dec_s *d)
+{
+  return d->outbuf_start;
+}
+
+Py_ssize_t pypy_cjk_dec_outlen(struct pypy_cjk_dec_s *d)
+{
+  return d->outbuf - d->outbuf_start;
+}
diff --git a/pypy/module/_multibytecodec/cjkcodecs/multibytecodec.h 
b/pypy/module/_multibytecodec/cjkcodecs/multibytecodec.h
--- a/pypy/module/_multibytecodec/cjkcodecs/multibytecodec.h
+++ b/pypy/module/_multibytecodec/cjkcodecs/multibytecodec.h
@@ -42,7 +42,7 @@
 typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state,
                                          const void *config);
 
-typedef struct {
+typedef struct MultibyteCodec_s {
     const char *encoding;
     const void *config;
     mbcodec_init codecinit;
@@ -64,4 +64,18 @@
 #define MBENC_MAX               MBENC_FLUSH
 
 
+struct pypy_cjk_dec_s {
+  MultibyteCodec *codec;
+  MultibyteCodec_State state;
+  char *inbuf, *inbuf_end;
+  Py_UNICODE *outbuf_start, *outbuf, *outbuf_end;
+};
+
+struct pypy_cjk_dec_s *pypy_cjk_dec_init(const MultibyteCodec *codec,
+                                         char *inbuf, Py_ssize_t inlen);
+void pypy_cjk_dec_free(struct pypy_cjk_dec_s *);
+long pypy_cjk_dec_chunk(struct pypy_cjk_dec_s *);
+Py_UNICODE *pypy_cjk_dec_outbuf(struct pypy_cjk_dec_s *);
+Py_ssize_t pypy_cjk_dec_outlen(struct pypy_cjk_dec_s *);
+
 #endif
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py 
b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -1,4 +1,5 @@
 from pypy.module._multibytecodec.c_codecs import getcodec, codecs
+from pypy.module._multibytecodec.c_codecs import decode
 
 
 def test_codecs_existence():
@@ -7,3 +8,8 @@
         assert c
     c = getcodec("foobar")
     assert not c
+
+def test_gbk_simple():
+    c = getcodec("gbk")
+    u = decode(c, "\xA1\xAA")
+    assert u == unichr(0x2014)
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to