Author: Amaury Forgeot d'Arc <amaur...@gmail.com> Branch: Changeset: r83223:edf35def96ce Date: 2016-03-21 17:55 +0100 http://bitbucket.org/pypy/pypy/changeset/edf35def96ce/
Log: Cherry-pick a few changes from the cpyext-ext branch: 3df26326119c 43629fab94e1 931af853eaab - expose "defenc" and "hash" fields of PyUnicodeObject - Allow PyString_AsString to process unicode objects. The "defenc" field is returned. diff --git a/pypy/module/cpyext/bytesobject.py b/pypy/module/cpyext/bytesobject.py --- a/pypy/module/cpyext/bytesobject.py +++ b/pypy/module/cpyext/bytesobject.py @@ -1,4 +1,4 @@ -from pypy.interpreter.error import OperationError +from pypy.interpreter.error import OperationError, oefmt from rpython.rtyper.lltypesystem import rffi, lltype from pypy.module.cpyext.api import ( cpython_api, cpython_struct, bootstrap_function, build_type_checkers, @@ -134,8 +134,14 @@ if from_ref(space, rffi.cast(PyObject, ref.c_ob_type)) is space.w_str: pass # typecheck returned "ok" without forcing 'ref' at all elif not PyString_Check(space, ref): # otherwise, use the alternate way - raise OperationError(space.w_TypeError, space.wrap( - "PyString_AsString only support strings")) + from pypy.module.cpyext.unicodeobject import ( + PyUnicode_Check, _PyUnicode_AsDefaultEncodedString) + if PyUnicode_Check(space, ref): + ref = _PyUnicode_AsDefaultEncodedString(space, ref, None) + else: + raise oefmt(space.w_TypeError, + "expected string or Unicode object, %T found", + from_ref(space, ref)) ref_str = rffi.cast(PyStringObject, ref) if not ref_str.c_buffer: # copy string buffer @@ -147,8 +153,14 @@ @cpython_api([PyObject, rffi.CCHARPP, rffi.CArrayPtr(Py_ssize_t)], rffi.INT_real, error=-1) def PyString_AsStringAndSize(space, ref, buffer, length): if not PyString_Check(space, ref): - raise OperationError(space.w_TypeError, space.wrap( - "PyString_AsStringAndSize only support strings")) + from pypy.module.cpyext.unicodeobject import ( + PyUnicode_Check, _PyUnicode_AsDefaultEncodedString) + if PyUnicode_Check(space, ref): + ref = _PyUnicode_AsDefaultEncodedString(space, ref, None) + else: + raise oefmt(space.w_TypeError, + "expected string or Unicode object, %T found", + from_ref(space, ref)) ref_str = rffi.cast(PyStringObject, ref) if not ref_str.c_buffer: # copy string buffer diff --git a/pypy/module/cpyext/include/unicodeobject.h b/pypy/module/cpyext/include/unicodeobject.h --- a/pypy/module/cpyext/include/unicodeobject.h +++ b/pypy/module/cpyext/include/unicodeobject.h @@ -20,8 +20,12 @@ typedef struct { PyObject_HEAD - Py_UNICODE *buffer; + Py_UNICODE *str; Py_ssize_t size; + long hash; /* Hash value; -1 if not set */ + PyObject *defenc; /* (Default) Encoded version as Python + string, or NULL; this is used for + implementing the buffer protocol */ } PyUnicodeObject; diff --git a/pypy/module/cpyext/test/test_bytesobject.py b/pypy/module/cpyext/test/test_bytesobject.py --- a/pypy/module/cpyext/test/test_bytesobject.py +++ b/pypy/module/cpyext/test/test_bytesobject.py @@ -139,6 +139,44 @@ ]) module.getstring() + def test_py_string_as_string_Unicode(self): + module = self.import_extension('foo', [ + ("getstring_unicode", "METH_NOARGS", + """ + Py_UNICODE chars[] = {'t', 'e', 's', 't'}; + PyObject* u1 = PyUnicode_FromUnicode(chars, 4); + char *buf; + buf = PyString_AsString(u1); + if (buf == NULL) + return NULL; + if (buf[3] != 't') { + PyErr_SetString(PyExc_AssertionError, "Bad conversion"); + return NULL; + } + Py_DECREF(u1); + Py_INCREF(Py_None); + return Py_None; + """), + ("getstringandsize_unicode", "METH_NOARGS", + """ + Py_UNICODE chars[] = {'t', 'e', 's', 't'}; + PyObject* u1 = PyUnicode_FromUnicode(chars, 4); + char *buf; + Py_ssize_t len; + if (PyString_AsStringAndSize(u1, &buf, &len) < 0) + return NULL; + if (len != 4) { + PyErr_SetString(PyExc_AssertionError, "Bad Length"); + return NULL; + } + Py_DECREF(u1); + Py_INCREF(Py_None); + return Py_None; + """), + ]) + module.getstring_unicode() + module.getstringandsize_unicode() + def test_format_v(self): module = self.import_extension('foo', [ ("test_string_format_v", "METH_VARARGS", diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -24,7 +24,7 @@ if(PyUnicode_GetSize(s) == 11) { result = 1; } - if(s->ob_type->tp_basicsize != sizeof(void*)*5) + if(s->ob_type->tp_basicsize != sizeof(void*)*7) result = 0; Py_DECREF(s); return PyBool_FromLong(result); @@ -66,6 +66,7 @@ c = PyUnicode_AsUnicode(s); c[0] = 'a'; c[1] = 0xe9; + c[2] = 0x00; c[3] = 'c'; return s; """), @@ -74,7 +75,35 @@ assert len(s) == 4 assert s == u'a�\x00c' + def test_hash(self): + module = self.import_extension('foo', [ + ("test_hash", "METH_VARARGS", + ''' + PyObject* obj = (PyTuple_GetItem(args, 0)); + long hash = ((PyUnicodeObject*)obj)->hash; + return PyLong_FromLong(hash); + ''' + ), + ]) + res = module.test_hash(u"xyz") + assert res == hash(u'xyz') + def test_default_encoded_string(self): + module = self.import_extension('foo', [ + ("test_default_encoded_string", "METH_O", + ''' + PyObject* result = _PyUnicode_AsDefaultEncodedString(args, "replace"); + Py_INCREF(result); + return result; + ''' + ), + ]) + res = module.test_default_encoded_string(u"xyz") + assert isinstance(res, str) + assert res == 'xyz' + res = module.test_default_encoded_string(u"caf\xe9") + assert isinstance(res, str) + assert res == 'caf?' class TestUnicode(BaseApiTest): def test_unicodeobject(self, space, api): @@ -155,22 +184,22 @@ def test_unicode_resize(self, space, api): py_uni = new_empty_unicode(space, 10) ar = lltype.malloc(PyObjectP.TO, 1, flavor='raw') - py_uni.c_buffer[0] = u'a' - py_uni.c_buffer[1] = u'b' - py_uni.c_buffer[2] = u'c' + py_uni.c_str[0] = u'a' + py_uni.c_str[1] = u'b' + py_uni.c_str[2] = u'c' ar[0] = rffi.cast(PyObject, py_uni) api.PyUnicode_Resize(ar, 3) py_uni = rffi.cast(PyUnicodeObject, ar[0]) assert py_uni.c_size == 3 - assert py_uni.c_buffer[1] == u'b' - assert py_uni.c_buffer[3] == u'\x00' + assert py_uni.c_str[1] == u'b' + assert py_uni.c_str[3] == u'\x00' # the same for growing ar[0] = rffi.cast(PyObject, py_uni) api.PyUnicode_Resize(ar, 10) py_uni = rffi.cast(PyUnicodeObject, ar[0]) assert py_uni.c_size == 10 - assert py_uni.c_buffer[1] == 'b' - assert py_uni.c_buffer[10] == '\x00' + assert py_uni.c_str[1] == 'b' + assert py_uni.c_str[10] == '\x00' Py_DecRef(space, ar[0]) lltype.free(ar, flavor='raw') diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -22,7 +22,8 @@ PyUnicodeObjectStruct = lltype.ForwardReference() PyUnicodeObject = lltype.Ptr(PyUnicodeObjectStruct) PyUnicodeObjectFields = (PyObjectFields + - (("buffer", rffi.CWCHARP), ("size", Py_ssize_t))) + (("str", rffi.CWCHARP), ("size", Py_ssize_t), + ("hash", rffi.LONG), ("defenc", PyObject))) cpython_struct("PyUnicodeObject", PyUnicodeObjectFields, PyUnicodeObjectStruct) @bootstrap_function @@ -54,16 +55,20 @@ buflen = length + 1 py_uni.c_size = length - py_uni.c_buffer = lltype.malloc(rffi.CWCHARP.TO, buflen, - flavor='raw', zero=True, - add_memory_pressure=True) + py_uni.c_str = lltype.malloc(rffi.CWCHARP.TO, buflen, + flavor='raw', zero=True, + add_memory_pressure=True) + py_uni.c_hash = -1 + py_uni.c_defenc = lltype.nullptr(PyObject.TO) return py_uni def unicode_attach(space, py_obj, w_obj): "Fills a newly allocated PyUnicodeObject with a unicode string" py_unicode = rffi.cast(PyUnicodeObject, py_obj) py_unicode.c_size = len(space.unicode_w(w_obj)) - py_unicode.c_buffer = lltype.nullptr(rffi.CWCHARP.TO) + py_unicode.c_str = lltype.nullptr(rffi.CWCHARP.TO) + py_unicode.c_hash = space.hash_w(w_obj) + py_unicode.c_defenc = lltype.nullptr(PyObject.TO) def unicode_realize(space, py_obj): """ @@ -71,17 +76,20 @@ be modified after this call. """ py_uni = rffi.cast(PyUnicodeObject, py_obj) - s = rffi.wcharpsize2unicode(py_uni.c_buffer, py_uni.c_size) + s = rffi.wcharpsize2unicode(py_uni.c_str, py_uni.c_size) w_obj = space.wrap(s) + py_uni.c_hash = space.hash_w(w_obj) track_reference(space, py_obj, w_obj) return w_obj @cpython_api([PyObject], lltype.Void, header=None) def unicode_dealloc(space, py_obj): py_unicode = rffi.cast(PyUnicodeObject, py_obj) - if py_unicode.c_buffer: - lltype.free(py_unicode.c_buffer, flavor="raw") + if py_unicode.c_str: + lltype.free(py_unicode.c_str, flavor="raw") from pypy.module.cpyext.object import PyObject_dealloc + if py_unicode.c_defenc: + PyObject_dealloc(space, py_unicode.c_defenc) PyObject_dealloc(space, py_obj) @cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL) @@ -205,12 +213,12 @@ """Return a pointer to the internal Py_UNICODE buffer of the object. ref has to be a PyUnicodeObject (not checked).""" ref_unicode = rffi.cast(PyUnicodeObject, ref) - if not ref_unicode.c_buffer: + if not ref_unicode.c_str: # Copy unicode buffer w_unicode = from_ref(space, ref) u = space.unicode_w(w_unicode) - ref_unicode.c_buffer = rffi.unicode2wcharp(u) - return ref_unicode.c_buffer + ref_unicode.c_str = rffi.unicode2wcharp(u) + return ref_unicode.c_str @cpython_api([PyObject], rffi.CWCHARP) def PyUnicode_AsUnicode(space, ref): @@ -241,7 +249,7 @@ string may or may not be 0-terminated. It is the responsibility of the caller to make sure that the wchar_t string is 0-terminated in case this is required by the application.""" - c_buffer = PyUnicode_AS_UNICODE(space, rffi.cast(PyObject, ref)) + c_str = PyUnicode_AS_UNICODE(space, rffi.cast(PyObject, ref)) c_size = ref.c_size # If possible, try to copy the 0-termination as well @@ -251,7 +259,7 @@ i = 0 while i < size: - buf[i] = c_buffer[i] + buf[i] = c_str[i] i += 1 if size > c_size: @@ -343,8 +351,15 @@ return PyUnicode_FromUnicode(space, wchar_p, length) @cpython_api([PyObject, CONST_STRING], PyObject) -def _PyUnicode_AsDefaultEncodedString(space, w_unicode, errors): - return PyUnicode_AsEncodedString(space, w_unicode, lltype.nullptr(rffi.CCHARP.TO), errors) +def _PyUnicode_AsDefaultEncodedString(space, ref, errors): + # Returns a borrowed reference. + py_uni = rffi.cast(PyUnicodeObject, ref) + if not py_uni.c_defenc: + py_uni.c_defenc = make_ref( + space, PyUnicode_AsEncodedString( + space, ref, + lltype.nullptr(rffi.CCHARP.TO), errors)) + return py_uni.c_defenc @cpython_api([CONST_STRING, Py_ssize_t, CONST_STRING, CONST_STRING], PyObject) def PyUnicode_Decode(space, s, size, encoding, errors): @@ -444,7 +459,7 @@ def PyUnicode_Resize(space, ref, newsize): # XXX always create a new string so far py_uni = rffi.cast(PyUnicodeObject, ref[0]) - if not py_uni.c_buffer: + if not py_uni.c_str: raise OperationError(space.w_SystemError, space.wrap( "PyUnicode_Resize called on already created string")) try: @@ -458,7 +473,7 @@ if oldsize < newsize: to_cp = oldsize for i in range(to_cp): - py_newuni.c_buffer[i] = py_uni.c_buffer[i] + py_newuni.c_str[i] = py_uni.c_str[i] Py_DecRef(space, ref[0]) ref[0] = rffi.cast(PyObject, py_newuni) return 0 _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit