Author: Ronan Lamy <[email protected]>
Branch: py3.5
Changeset: r92635:0037bb35a103
Date: 2017-10-07 17:45 +0200
http://bitbucket.org/pypy/pypy/changeset/0037bb35a103/

Log:    Create ready pyobjects when calling create_ref() on str; fixes #2673

diff --git a/pypy/module/cpyext/test/test_unicodeobject.py 
b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -307,6 +307,18 @@
         assert module.utf8('xyz') == b'xyz'
         assert module.utf8('caf&#233;') == 'caf&#233;'.encode('utf-8')
 
+    def test_ready(self):
+        module = self.import_extension('foo', [
+            ("unsafe_len", "METH_O",
+             """
+                Py_ssize_t size = PyUnicode_GET_LENGTH(args);
+                return PyLong_FromSsize_t(size);
+             """)])
+        assert module.unsafe_len(u"abc") == 3
+        assert module.unsafe_len(u"caf&#233;") == 4
+        assert module.unsafe_len(u'a&#1040;b&#1041;c&#1057;d&#1044;') == 8
+        assert module.unsafe_len(u"caf&#233;\U0001F4A9") == 5
+
 
 class TestUnicode(BaseApiTest):
     def test_unicodeobject(self, space):
@@ -863,30 +875,31 @@
         assert space.unwrap(PyUnicode_Substring(space, w_str, 4, 3)) == u""
 
     def test_Ready(self, space):
-        w_str = space.wrap(u'abc')  # ASCII
-        py_str = as_pyobj(space, w_str)
+        def as_py_uni(val):
+            py_obj = new_empty_unicode(space, len(val))
+            set_wbuffer(py_obj, rffi.unicode2wcharp(val))
+            return py_obj
+
+        py_str = as_py_uni(u'abc')  # ASCII
         assert get_kind(py_str) == 0
-        _PyUnicode_Ready(space, w_str)
+        _PyUnicode_Ready(space, py_str)
         assert get_kind(py_str) == 1
         assert get_ascii(py_str) == 1
 
-        w_str = space.wrap(u'caf&#233;')  # latin1
-        py_str = as_pyobj(space, w_str)
+        py_str = as_py_uni(u'caf&#233;')  # latin1
         assert get_kind(py_str) == 0
-        _PyUnicode_Ready(space, w_str)
+        _PyUnicode_Ready(space, py_str)
         assert get_kind(py_str) == 1
         assert get_ascii(py_str) == 0
 
-        w_str = 
space.wrap(u'&#1056;&#1086;&#1089;&#1089;&#1080;&#769;&#1103;')  # UCS2
-        py_str = as_pyobj(space, w_str)
+        py_str = 
as_py_uni(u'&#1056;&#1086;&#1089;&#1089;&#1080;&#769;&#1103;')  # UCS2
         assert get_kind(py_str) == 0
-        _PyUnicode_Ready(space, w_str)
+        _PyUnicode_Ready(space, py_str)
         assert get_kind(py_str) == 2
         assert get_ascii(py_str) == 0
 
-        w_str = space.wrap(u'***\U0001f4a9***')  # UCS4
-        py_str = as_pyobj(space, w_str)
+        py_str = as_py_uni(u'***\U0001f4a9***')  # UCS4
         assert get_kind(py_str) == 0
-        _PyUnicode_Ready(space, w_str)
+        _PyUnicode_Ready(space, py_str)
         assert get_kind(py_str) == 4
         assert get_ascii(py_str) == 0
diff --git a/pypy/module/cpyext/unicodeobject.py 
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -70,6 +70,7 @@
     "Fills a newly allocated PyUnicodeObject with a unicode string"
     set_wsize(py_obj, len(space.unicode_w(w_obj)))
     set_wbuffer(py_obj, lltype.nullptr(rffi.CWCHARP.TO))
+    _readify(space, py_obj, w_obj._value)
 
 def unicode_realize(space, py_obj):
     """
@@ -265,8 +266,11 @@
     assert isinstance(w_obj, unicodeobject.W_UnicodeObject)
     py_obj = as_pyobj(space, w_obj)
     assert get_kind(py_obj) == WCHAR_KIND
+    return _readify(space, py_obj, w_obj._value)
+
+def _readify(space, py_obj, value):
     maxchar = 0
-    for c in w_obj._value:
+    for c in value:
         if ord(c) > maxchar:
             maxchar = ord(c)
             if maxchar > MAX_UNICODE:
@@ -275,7 +279,7 @@
                     maxchar)
     if maxchar < 256:
         ucs1_data = rffi.str2charp(unicode_encode_latin_1(
-            w_obj._value, len(w_obj._value), errors='strict'))
+            value, len(value), errors='strict'))
         set_data(py_obj, cts.cast('void*', ucs1_data))
         set_kind(py_obj, _1BYTE_KIND)
         set_len(py_obj, get_wsize(py_obj))
@@ -290,7 +294,7 @@
     elif maxchar < 65536:
         # XXX: assumes that sizeof(wchar_t) == 4
         ucs2_str = unicode_encode_utf_16_helper(
-            w_obj._value, len(w_obj._value), errors='strict',
+            value, len(value), errors='strict',
             byteorder=runicode.BYTEORDER)
         ucs2_data = cts.cast('Py_UCS2 *', rffi.str2charp(ucs2_str))
         set_data(py_obj, cts.cast('void*', ucs2_data))
@@ -302,9 +306,8 @@
         # XXX: assumes that sizeof(wchar_t) == 4
         if not get_wbuffer(py_obj):
             # Copy unicode buffer
-            u = w_obj._value
-            set_wbuffer(py_obj, rffi.unicode2wcharp(u))
-            set_wsize(py_obj, len(u))
+            set_wbuffer(py_obj, rffi.unicode2wcharp(value))
+            set_wsize(py_obj, len(value))
         ucs4_data = get_wbuffer(py_obj)
         set_data(py_obj, cts.cast('void*', ucs4_data))
         set_len(py_obj, get_wsize(py_obj))
@@ -332,9 +335,7 @@
         w_res = utf_32_decode(space, value, w_final=space.w_False)
     else:
         raise oefmt(space.w_SystemError, "invalid kind")
-    w_ret = space.unpackiterable(w_res)[0]
-    _PyUnicode_Ready(space, w_ret)
-    return w_ret
+    return space.unpackiterable(w_res)[0]
 
 @cts.decl("Py_UNICODE * PyUnicode_AsUnicodeAndSize(PyObject *unicode, 
Py_ssize_t *size)")
 def PyUnicode_AsUnicodeAndSize(space, ref, psize):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to