Author: Ronan Lamy <[email protected]>
Branch: py3.5
Changeset: r92635:0037bb35a103
Date: 2017-10-07 17:45 +0200
http://bitbucket.org/pypy/pypy/changeset/0037bb35a103/
Log: Create ready pyobjects when calling create_ref() on str; fixes #2673
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py
b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -307,6 +307,18 @@
assert module.utf8('xyz') == b'xyz'
assert module.utf8('café') == 'café'.encode('utf-8')
+ def test_ready(self):
+ module = self.import_extension('foo', [
+ ("unsafe_len", "METH_O",
+ """
+ Py_ssize_t size = PyUnicode_GET_LENGTH(args);
+ return PyLong_FromSsize_t(size);
+ """)])
+ assert module.unsafe_len(u"abc") == 3
+ assert module.unsafe_len(u"café") == 4
+ assert module.unsafe_len(u'aАbБcСdД') == 8
+ assert module.unsafe_len(u"café\U0001F4A9") == 5
+
class TestUnicode(BaseApiTest):
def test_unicodeobject(self, space):
@@ -863,30 +875,31 @@
assert space.unwrap(PyUnicode_Substring(space, w_str, 4, 3)) == u""
def test_Ready(self, space):
- w_str = space.wrap(u'abc') # ASCII
- py_str = as_pyobj(space, w_str)
+ def as_py_uni(val):
+ py_obj = new_empty_unicode(space, len(val))
+ set_wbuffer(py_obj, rffi.unicode2wcharp(val))
+ return py_obj
+
+ py_str = as_py_uni(u'abc') # ASCII
assert get_kind(py_str) == 0
- _PyUnicode_Ready(space, w_str)
+ _PyUnicode_Ready(space, py_str)
assert get_kind(py_str) == 1
assert get_ascii(py_str) == 1
- w_str = space.wrap(u'café') # latin1
- py_str = as_pyobj(space, w_str)
+ py_str = as_py_uni(u'café') # latin1
assert get_kind(py_str) == 0
- _PyUnicode_Ready(space, w_str)
+ _PyUnicode_Ready(space, py_str)
assert get_kind(py_str) == 1
assert get_ascii(py_str) == 0
- w_str =
space.wrap(u'Росси́я') # UCS2
- py_str = as_pyobj(space, w_str)
+ py_str =
as_py_uni(u'Росси́я') # UCS2
assert get_kind(py_str) == 0
- _PyUnicode_Ready(space, w_str)
+ _PyUnicode_Ready(space, py_str)
assert get_kind(py_str) == 2
assert get_ascii(py_str) == 0
- w_str = space.wrap(u'***\U0001f4a9***') # UCS4
- py_str = as_pyobj(space, w_str)
+ py_str = as_py_uni(u'***\U0001f4a9***') # UCS4
assert get_kind(py_str) == 0
- _PyUnicode_Ready(space, w_str)
+ _PyUnicode_Ready(space, py_str)
assert get_kind(py_str) == 4
assert get_ascii(py_str) == 0
diff --git a/pypy/module/cpyext/unicodeobject.py
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -70,6 +70,7 @@
"Fills a newly allocated PyUnicodeObject with a unicode string"
set_wsize(py_obj, len(space.unicode_w(w_obj)))
set_wbuffer(py_obj, lltype.nullptr(rffi.CWCHARP.TO))
+ _readify(space, py_obj, w_obj._value)
def unicode_realize(space, py_obj):
"""
@@ -265,8 +266,11 @@
assert isinstance(w_obj, unicodeobject.W_UnicodeObject)
py_obj = as_pyobj(space, w_obj)
assert get_kind(py_obj) == WCHAR_KIND
+ return _readify(space, py_obj, w_obj._value)
+
+def _readify(space, py_obj, value):
maxchar = 0
- for c in w_obj._value:
+ for c in value:
if ord(c) > maxchar:
maxchar = ord(c)
if maxchar > MAX_UNICODE:
@@ -275,7 +279,7 @@
maxchar)
if maxchar < 256:
ucs1_data = rffi.str2charp(unicode_encode_latin_1(
- w_obj._value, len(w_obj._value), errors='strict'))
+ value, len(value), errors='strict'))
set_data(py_obj, cts.cast('void*', ucs1_data))
set_kind(py_obj, _1BYTE_KIND)
set_len(py_obj, get_wsize(py_obj))
@@ -290,7 +294,7 @@
elif maxchar < 65536:
# XXX: assumes that sizeof(wchar_t) == 4
ucs2_str = unicode_encode_utf_16_helper(
- w_obj._value, len(w_obj._value), errors='strict',
+ value, len(value), errors='strict',
byteorder=runicode.BYTEORDER)
ucs2_data = cts.cast('Py_UCS2 *', rffi.str2charp(ucs2_str))
set_data(py_obj, cts.cast('void*', ucs2_data))
@@ -302,9 +306,8 @@
# XXX: assumes that sizeof(wchar_t) == 4
if not get_wbuffer(py_obj):
# Copy unicode buffer
- u = w_obj._value
- set_wbuffer(py_obj, rffi.unicode2wcharp(u))
- set_wsize(py_obj, len(u))
+ set_wbuffer(py_obj, rffi.unicode2wcharp(value))
+ set_wsize(py_obj, len(value))
ucs4_data = get_wbuffer(py_obj)
set_data(py_obj, cts.cast('void*', ucs4_data))
set_len(py_obj, get_wsize(py_obj))
@@ -332,9 +335,7 @@
w_res = utf_32_decode(space, value, w_final=space.w_False)
else:
raise oefmt(space.w_SystemError, "invalid kind")
- w_ret = space.unpackiterable(w_res)[0]
- _PyUnicode_Ready(space, w_ret)
- return w_ret
+ return space.unpackiterable(w_res)[0]
@cts.decl("Py_UNICODE * PyUnicode_AsUnicodeAndSize(PyObject *unicode,
Py_ssize_t *size)")
def PyUnicode_AsUnicodeAndSize(space, ref, psize):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit