Author: Amaury Forgeot d'Arc <amaur...@gmail.com> Branch: cpyext-ext Changeset: r83057:931af853eaab Date: 2016-03-14 22:20 +0100 http://bitbucket.org/pypy/pypy/changeset/931af853eaab/
Log: Fix PyString_AsString() to accept a Unicode object, it is encoded using the default encoding. Also fix _PyUnicode_AsDefaultEncodedString() to return a referrence borrowed on the unicode. diff --git a/pypy/module/cpyext/bytesobject.py b/pypy/module/cpyext/bytesobject.py --- a/pypy/module/cpyext/bytesobject.py +++ b/pypy/module/cpyext/bytesobject.py @@ -1,4 +1,4 @@ -from pypy.interpreter.error import OperationError +from pypy.interpreter.error import OperationError, oefmt from rpython.rtyper.lltypesystem import rffi, lltype from pypy.module.cpyext.api import ( cpython_api, cpython_struct, bootstrap_function, build_type_checkers, @@ -171,8 +171,14 @@ if from_ref(space, rffi.cast(PyObject, ref.c_ob_type)) is space.w_str: pass # typecheck returned "ok" without forcing 'ref' at all elif not PyString_Check(space, ref): # otherwise, use the alternate way - raise OperationError(space.w_TypeError, space.wrap( - "PyString_AsString only support strings")) + from pypy.module.cpyext.unicodeobject import ( + PyUnicode_Check, _PyUnicode_AsDefaultEncodedString) + if PyUnicode_Check(space, ref): + ref = _PyUnicode_AsDefaultEncodedString(space, ref, None) + else: + raise oefmt(space.w_TypeError, + "expected string or Unicode object, %T found", + from_ref(space, ref)) ref_str = rffi.cast(PyStringObject, ref) if not ref_str.c_buffer: # copy string buffer @@ -184,8 +190,14 @@ @cpython_api([PyObject, rffi.CCHARPP, rffi.CArrayPtr(Py_ssize_t)], rffi.INT_real, error=-1) def PyString_AsStringAndSize(space, ref, buffer, length): if not PyString_Check(space, ref): - raise OperationError(space.w_TypeError, space.wrap( - "PyString_AsStringAndSize only support strings")) + from pypy.module.cpyext.unicodeobject import ( + PyUnicode_Check, _PyUnicode_AsDefaultEncodedString) + if PyUnicode_Check(space, ref): + ref = _PyUnicode_AsDefaultEncodedString(space, ref, None) + else: + raise oefmt(space.w_TypeError, + "expected string or Unicode object, %T found", + from_ref(space, ref)) ref_str = rffi.cast(PyStringObject, ref) if not ref_str.c_buffer: # copy string buffer diff --git a/pypy/module/cpyext/test/test_bytesobject.py b/pypy/module/cpyext/test/test_bytesobject.py --- a/pypy/module/cpyext/test/test_bytesobject.py +++ b/pypy/module/cpyext/test/test_bytesobject.py @@ -172,6 +172,44 @@ ]) module.getstring() + def test_py_string_as_string_Unicode(self): + module = self.import_extension('foo', [ + ("getstring_unicode", "METH_NOARGS", + """ + Py_UNICODE chars[] = {'t', 'e', 's', 't'}; + PyObject* u1 = PyUnicode_FromUnicode(chars, 4); + char *buf; + buf = PyString_AsString(u1); + if (buf == NULL) + return NULL; + if (buf[3] != 't') { + PyErr_SetString(PyExc_AssertionError, "Bad conversion"); + return NULL; + } + Py_DECREF(u1); + Py_INCREF(Py_None); + return Py_None; + """), + ("getstringandsize_unicode", "METH_NOARGS", + """ + Py_UNICODE chars[] = {'t', 'e', 's', 't'}; + PyObject* u1 = PyUnicode_FromUnicode(chars, 4); + char *buf; + Py_ssize_t len; + if (PyString_AsStringAndSize(u1, &buf, &len) < 0) + return NULL; + if (len != 4) { + PyErr_SetString(PyExc_AssertionError, "Bad Length"); + return NULL; + } + Py_DECREF(u1); + Py_INCREF(Py_None); + return Py_None; + """), + ]) + module.getstring_unicode() + module.getstringandsize_unicode() + def test_format_v(self): module = self.import_extension('foo', [ ("test_string_format_v", "METH_VARARGS", diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -89,6 +89,22 @@ res = module.test_hash(u"xyz") assert res == hash(u'xyz') + def test_default_encoded_string(self): + module = self.import_extension('foo', [ + ("test_default_encoded_string", "METH_O", + ''' + PyObject* result = _PyUnicode_AsDefaultEncodedString(args, "replace"); + Py_INCREF(result); + return result; + ''' + ), + ]) + res = module.test_default_encoded_string(u"xyz") + assert isinstance(res, str) + assert res == 'xyz' + res = module.test_default_encoded_string(u"caf\xe9") + assert isinstance(res, str) + assert res == 'caf?' class TestUnicode(BaseApiTest): def test_unicodeobject(self, space, api): diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -375,8 +375,15 @@ return PyUnicode_FromUnicode(space, wchar_p, length) @cpython_api([PyObject, CONST_STRING], PyObject) -def _PyUnicode_AsDefaultEncodedString(space, w_unicode, errors): - return PyUnicode_AsEncodedString(space, w_unicode, lltype.nullptr(rffi.CCHARP.TO), errors) +def _PyUnicode_AsDefaultEncodedString(space, ref, errors): + # Returns a borrowed reference. + py_uni = rffi.cast(PyUnicodeObject, ref) + if not py_uni.c_defenc: + py_uni.c_defenc = make_ref( + space, PyUnicode_AsEncodedString( + space, ref, + lltype.nullptr(rffi.CCHARP.TO), errors)) + return py_uni.c_defenc @cpython_api([CONST_STRING, Py_ssize_t, CONST_STRING, CONST_STRING], PyObject) def PyUnicode_Decode(space, s, size, encoding, errors): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit