Author: Armin Rigo <[email protected]>
Branch:
Changeset: r44055:b3db989ddedc
Date: 2011-05-10 18:13 +0200
http://bitbucket.org/pypy/pypy/changeset/b3db989ddedc/
Log: In-progress: the very first test passes.
diff --git a/pypy/module/_multibytecodec/c_codecs.py
b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -1,5 +1,8 @@
-import py
-from pypy.rpython.lltypesystem import lltype, rffi
+import py, sys
+from pypy.rpython.lltypesystem import lltype, llmemory, rffi
+from pypy.rpython.lltypesystem.rstr import UNICODE
+from pypy.rpython.annlowlevel import hlunicode
+from pypy.rlib.objectmodel import keepalive_until_here
from pypy.translator.tool.cbuild import ExternalCompilationInfo
from pypy.tool.autopath import pypydir
@@ -14,11 +17,13 @@
srcdir.join('_codecs_jp.c'),
srcdir.join('_codecs_kr.c'),
srcdir.join('_codecs_tw.c'),
+ srcdir.join('multibytecodec.c'),
],
)
-MULTIBYTECODEC_PTR = rffi.VOIDP
+MULTIBYTECODEC_P = rffi.COpaquePtr('struct MultibyteCodec_s',
+ compilation_info=eci)
codecs = [
# _codecs_cn
@@ -42,17 +47,77 @@
'big5', 'cp950',
]
+def llexternal(*args, **kwds):
+ kwds.setdefault('compilation_info', eci)
+ kwds.setdefault('sandboxsafe', True)
+ kwds.setdefault('_nowrapper', True)
+ return rffi.llexternal(*args, **kwds)
+
def getter_for(name):
- return rffi.llexternal('pypy_cjkcodec_%s' % name, [], MULTIBYTECODEC_PTR,
- compilation_info=eci, sandboxsafe=True,
- _nowrapper=True)
+ return llexternal('pypy_cjkcodec_%s' % name, [], MULTIBYTECODEC_P)
_codecs_getters = dict([(name, getter_for(name)) for name in codecs])
+assert len(_codecs_getters) == len(codecs)
def getcodec(name):
try:
getter = _codecs_getters[name]
except KeyError:
- return lltype.nullptr(MULTIBYTECODEC_PTR.TO)
+ return lltype.nullptr(MULTIBYTECODEC_P.TO)
else:
return getter()
+
+# ____________________________________________________________
+
+DECODEBUF_P = rffi.COpaquePtr('struct pypy_cjk_dec_s', compilation_info=eci)
+pypy_cjk_dec_init = llexternal('pypy_cjk_dec_init',
+ [MULTIBYTECODEC_P, rffi.CCHARP, rffi.SSIZE_T],
+ DECODEBUF_P)
+pypy_cjk_dec_free = llexternal('pypy_cjk_dec_free', [DECODEBUF_P],
+ lltype.Void)
+pypy_cjk_dec_chunk = llexternal('pypy_cjk_dec_chunk', [DECODEBUF_P],
+ lltype.Signed)
+pypy_cjk_dec_outbuf = llexternal('pypy_cjk_dec_outbuf', [DECODEBUF_P],
+ rffi.CWCHARP)
+pypy_cjk_dec_outlen = llexternal('pypy_cjk_dec_outlen', [DECODEBUF_P],
+ rffi.SSIZE_T)
+
+def decode(codec, stringdata):
+ inleft = len(stringdata)
+ if inleft > sys.maxint // 4:
+ raise MemoryError
+ inbuf = rffi.get_nonmovingbuffer(stringdata)
+ try:
+ decodebuf = pypy_cjk_dec_init(codec, inbuf, inleft)
+ if not decodebuf:
+ raise MemoryError
+ try:
+ while True:
+ r = pypy_cjk_dec_chunk(decodebuf)
+ if r == 0:
+ break
+ multibytecodec_decerror(xxx)
+ src = pypy_cjk_dec_outbuf(decodebuf)
+ length = pypy_cjk_dec_outlen(decodebuf)
+ return unicode_from_raw(src, length)
+ #
+ finally:
+ pypy_cjk_dec_free(decodebuf)
+ #
+ finally:
+ rffi.free_nonmovingbuffer(stringdata, inbuf)
+
+# ____________________________________________________________
+
+def unicode_from_raw(src, length):
+ result = lltype.malloc(UNICODE, length)
+ try:
+ uni_chars_offset = (rffi.offsetof(UNICODE, 'chars') + \
+ rffi.itemoffsetof(UNICODE.chars, 0))
+ dest = rffi.cast_ptr_to_adr(result) + uni_chars_offset
+ src = rffi.cast_ptr_to_adr(src) + rffi.itemoffsetof(rffi.CWCHARP.TO)
+ rffi.raw_memcopy(src, dest,
+ llmemory.sizeof(lltype.UniChar) * length)
+ return hlunicode(result)
+ finally:
+ keepalive_until_here(result)
diff --git a/pypy/module/_multibytecodec/cjkcodecs/cjkcodecs.h
b/pypy/module/_multibytecodec/cjkcodecs/cjkcodecs.h
--- a/pypy/module/_multibytecodec/cjkcodecs/cjkcodecs.h
+++ b/pypy/module/_multibytecodec/cjkcodecs/cjkcodecs.h
@@ -209,12 +209,12 @@
#define END_MAPPINGS_LIST /* empty */
#define BEGIN_CODECS_LIST /* empty */
-#define _CODEC(name) \
- const MultibyteCodec _pypy_cjkcodec_##name; \
- void *pypy_cjkcodec_##name(void) { \
- return (void *)&_pypy_cjkcodec_##name; \
- } \
- const MultibyteCodec _pypy_cjkcodec_##name
+#define _CODEC(name) \
+ static const MultibyteCodec _pypy_cjkcodec_##name; \
+ const MultibyteCodec *pypy_cjkcodec_##name(void) { \
+ return &_pypy_cjkcodec_##name; \
+ } \
+ static const MultibyteCodec _pypy_cjkcodec_##name
#define _STATEFUL_METHODS(enc) \
enc##_encode, \
enc##_encode_init, \
diff --git a/pypy/module/_multibytecodec/cjkcodecs/multibytecodec.c
b/pypy/module/_multibytecodec/cjkcodecs/multibytecodec.c
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/cjkcodecs/multibytecodec.c
@@ -0,0 +1,53 @@
+#include <stdlib.h>
+#include "multibytecodec.h"
+
+
+struct pypy_cjk_dec_s *pypy_cjk_dec_init(const MultibyteCodec *codec,
+ char *inbuf, Py_ssize_t inlen)
+{
+ struct pypy_cjk_dec_s *d = malloc(sizeof(struct pypy_cjk_dec_s));
+ if (!d)
+ return NULL;
+ if (codec->decinit != NULL && codec->decinit(&d->state, codec->config) != 0)
+ goto errorexit;
+
+ d->codec = codec;
+ d->inbuf = inbuf;
+ d->inbuf_end = inbuf + inlen;
+ d->outbuf_start = malloc(inlen * sizeof(Py_UNICODE));
+ if (!d->outbuf_start)
+ goto errorexit;
+ d->outbuf = d->outbuf_start;
+ d->outbuf_end = d->outbuf_start + inlen;
+ return d;
+
+ errorexit:
+ free(d);
+ return NULL;
+}
+
+void pypy_cjk_dec_free(struct pypy_cjk_dec_s *d)
+{
+ free(d->outbuf_start);
+ free(d);
+}
+
+long pypy_cjk_dec_chunk(struct pypy_cjk_dec_s *d)
+{
+ Py_ssize_t inleft = (Py_ssize_t)(d->inbuf_end - d->inbuf);
+ Py_ssize_t outleft = (Py_ssize_t)(d->outbuf_end - d->outbuf);
+ if (inleft == 0)
+ return 0;
+ return d->codec->decode(&d->state, d->codec->config,
+ &d->inbuf, inleft, &d->outbuf, outleft);
+}
+
+Py_UNICODE *pypy_cjk_dec_outbuf(struct pypy_cjk_dec_s *d)
+{
+ return d->outbuf_start;
+}
+
+Py_ssize_t pypy_cjk_dec_outlen(struct pypy_cjk_dec_s *d)
+{
+ return d->outbuf - d->outbuf_start;
+}
diff --git a/pypy/module/_multibytecodec/cjkcodecs/multibytecodec.h
b/pypy/module/_multibytecodec/cjkcodecs/multibytecodec.h
--- a/pypy/module/_multibytecodec/cjkcodecs/multibytecodec.h
+++ b/pypy/module/_multibytecodec/cjkcodecs/multibytecodec.h
@@ -42,7 +42,7 @@
typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state,
const void *config);
-typedef struct {
+typedef struct MultibyteCodec_s {
const char *encoding;
const void *config;
mbcodec_init codecinit;
@@ -64,4 +64,18 @@
#define MBENC_MAX MBENC_FLUSH
+struct pypy_cjk_dec_s {
+ MultibyteCodec *codec;
+ MultibyteCodec_State state;
+ char *inbuf, *inbuf_end;
+ Py_UNICODE *outbuf_start, *outbuf, *outbuf_end;
+};
+
+struct pypy_cjk_dec_s *pypy_cjk_dec_init(const MultibyteCodec *codec,
+ char *inbuf, Py_ssize_t inlen);
+void pypy_cjk_dec_free(struct pypy_cjk_dec_s *);
+long pypy_cjk_dec_chunk(struct pypy_cjk_dec_s *);
+Py_UNICODE *pypy_cjk_dec_outbuf(struct pypy_cjk_dec_s *);
+Py_ssize_t pypy_cjk_dec_outlen(struct pypy_cjk_dec_s *);
+
#endif
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py
b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -1,4 +1,5 @@
from pypy.module._multibytecodec.c_codecs import getcodec, codecs
+from pypy.module._multibytecodec.c_codecs import decode
def test_codecs_existence():
@@ -7,3 +8,8 @@
assert c
c = getcodec("foobar")
assert not c
+
+def test_gbk_simple():
+ c = getcodec("gbk")
+ u = decode(c, "\xA1\xAA")
+ assert u == unichr(0x2014)
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit