Author: Ronan Lamy <ronan.l...@gmail.com> Branch: py3.5 Changeset: r90040:8360e512f159 Date: 2017-02-10 19:41 +0000 http://bitbucket.org/pypy/pypy/changeset/8360e512f159/
Log: Use CPython implemntations for PyUnicode_GetSize, PyUnicode_GetLength diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -563,6 +563,7 @@ 'PyArg_ParseTuple', 'PyArg_UnpackTuple', 'PyArg_ParseTupleAndKeywords', 'PyArg_VaParse', 'PyArg_VaParseTupleAndKeywords', '_PyArg_NoKeywords', 'PyUnicode_FromFormat', 'PyUnicode_FromFormatV', 'PyUnicode_AsWideCharString', + 'PyUnicode_GetSize', 'PyUnicode_GetLength', 'PyModule_AddObject', 'PyModule_AddIntConstant', 'PyModule_AddStringConstant', 'Py_BuildValue', 'Py_VaBuildValue', 'PyTuple_Pack', '_PyArg_Parse_SizeT', '_PyArg_ParseTuple_SizeT', diff --git a/pypy/module/cpyext/include/unicodeobject.h b/pypy/module/cpyext/include/unicodeobject.h --- a/pypy/module/cpyext/include/unicodeobject.h +++ b/pypy/module/cpyext/include/unicodeobject.h @@ -217,6 +217,19 @@ /* === Public API ========================================================= */ +/* Get the length of the Unicode object. */ + +PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength( + PyObject *unicode +); + +/* Get the number of Py_UNICODE units in the + string representation. */ + +PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize( + PyObject *unicode /* Unicode object */ + ); + PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV( const char *format, /* ASCII-encoded string */ va_list vargs diff --git a/pypy/module/cpyext/src/unicodeobject.c b/pypy/module/cpyext/src/unicodeobject.c --- a/pypy/module/cpyext/src/unicodeobject.c +++ b/pypy/module/cpyext/src/unicodeobject.c @@ -551,3 +551,27 @@ return buffer; } +Py_ssize_t +PyUnicode_GetSize(PyObject *unicode) +{ + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + goto onError; + } + return PyUnicode_GET_SIZE(unicode); + + onError: + return -1; +} + +Py_ssize_t +PyUnicode_GetLength(PyObject *unicode) +{ + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + return -1; + } + if (PyUnicode_READY(unicode) == -1) + return -1; + return PyUnicode_GET_LENGTH(unicode); +} diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -68,6 +68,16 @@ assert module.test_is_unicode(u"") assert not module.test_is_unicode(()) + def test_strlen(self): + module = self.import_extension('foo', [ + ('strlen', "METH_O", + """ + PyObject* s = PyObject_Str(args); + return PyLong_FromLong(PyUnicode_GetLength(s)); + """)]) + print(module.strlen(True)) + assert module.strlen(True) == 4 + def test_unicode_buffer_init(self): module = self.import_extension('foo', [ ("getunicode", "METH_NOARGS", @@ -213,9 +223,6 @@ class TestUnicode(BaseApiTest): def test_unicodeobject(self, space): - assert PyUnicode_GetSize(space, space.wrap(u'späm')) == 4 - unichar = rffi.sizeof(Py_UNICODE) - encoding = rffi.charp2str(PyUnicode_GetDefaultEncoding(space, )) w_default_encoding = space.call_function( space.sys.get('getdefaultencoding') @@ -627,7 +634,7 @@ assert space.eq_w(w_y, space.wrap(u"abcd")) - size = PyUnicode_GetSize(space, w_x) + size = get_wsize(as_pyobj(space, w_x)) Py_UNICODE_COPY(space, target_chunk, x_chunk, size) w_y = space.wrap(rffi.wcharpsize2unicode(target_chunk, size)) diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -273,6 +273,7 @@ w_obj._value, len(w_obj._value), errors='strict')) set_data(py_obj, cts.cast('void*', ucs1_data)) set_kind(py_obj, _1BYTE_KIND) + set_len(py_obj, get_wsize(py_obj)) if maxchar < 128: set_ascii(py_obj, 1) set_utf8(py_obj, cts.cast('char*', get_data(py_obj))) @@ -328,28 +329,6 @@ set_utf8(ref, rffi.str2charp(s)) return get_utf8(ref) -@cpython_api([PyObject], Py_ssize_t, error=-1) -def PyUnicode_GetSize(space, ref): - """Return the size of the deprecated Py_UNICODE representation, in code - units (this includes surrogate pairs as 2 units). - - Please migrate to using PyUnicode_GetLength(). - """ - if from_ref(space, rffi.cast(PyObject, ref.c_ob_type)) is space.w_unicode: - return get_wsize(ref) - else: - w_obj = from_ref(space, ref) - return space.len_w(w_obj) - -@cpython_api([PyObject], Py_ssize_t, error=-1) -def PyUnicode_GetLength(space, unicode): - """Return the length of the Unicode object, in code points.""" - # XXX: this is a stub - if not PyUnicode_Check(space, unicode): - PyErr_BadArgument(space) - #PyUnicode_READY(w_unicode) - return get_len(unicode) - @cpython_api([PyObject, rffi.CWCHARP, Py_ssize_t], Py_ssize_t, error=-1) def PyUnicode_AsWideChar(space, ref, buf, size): """Copy the Unicode object contents into the wchar_t buffer w. At most _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit