[pypy-commit] pypy py3k: hg merge default

amauryfa Tue, 01 May 2012 16:04:30 -0700

Author: Amaury Forgeot d'Arc <amaur...@gmail.com>
Branch: py3k
Changeset: r54862:1fb96540cdbc
Date: 2012-05-01 18:33 +0200
http://bitbucket.org/pypy/pypy/changeset/1fb96540cdbc/


Log:    hg merge default

diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -1947,35 +1947,6 @@
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
 
-@cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP], PyObject)
-def PyUnicode_DecodeUTF32(space, s, size, errors, byteorder):
-    """Decode length bytes from a UTF-32 encoded buffer string and return the
-    corresponding Unicode object.  errors (if non-NULL) defines the error
-    handling. It defaults to "strict".
-
-    If byteorder is non-NULL, the decoder starts decoding using the given byte
-    order:
-
-    *byteorder == -1: little endian
-    *byteorder == 0:  native order
-    *byteorder == 1:  big endian
-
-    If *byteorder is zero, and the first four bytes of the input data are a
-    byte order mark (BOM), the decoder switches to this byte order and the BOM 
is
-    not copied into the resulting Unicode string.  If *byteorder is -1 or
-    1, any byte order mark is copied to the output.
-
-    After completion, *byteorder is set to the current byte order at the end
-    of input data.
-
-    In a narrow build codepoints outside the BMP will be decoded as surrogate 
pairs.
-
-    If byteorder is NULL, the codec starts in native order mode.
-
-    Return NULL if an exception was raised by the codec.
-    """
-    raise NotImplementedError
-
 @cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP, Py_ssize_t], 
PyObject)
 def PyUnicode_DecodeUTF32Stateful(space, s, size, errors, byteorder, consumed):
     """If consumed is NULL, behave like PyUnicode_DecodeUTF32(). If
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py 
b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -391,6 +391,42 @@
         test("\xFE\xFF\x00\x61\x00\x62\x00\x63\x00\x64", 0, 1)
         test("\xFF\xFE\x61\x00\x62\x00\x63\x00\x64\x00", 0, -1)
 
+    def test_decode_utf32(self, space, api):
+        def test(encoded, endian, realendian=None):
+            encoded_charp = rffi.str2charp(encoded)
+            strict_charp = rffi.str2charp("strict")
+            if endian is not None:
+                if endian < 0:
+                    value = -1
+                elif endian > 0:
+                    value = 1
+                else:
+                    value = 0
+                pendian = lltype.malloc(rffi.INTP.TO, 1, flavor='raw')
+                pendian[0] = rffi.cast(rffi.INT, value)
+            else:
+                pendian = None
+
+            w_ustr = api.PyUnicode_DecodeUTF32(encoded_charp, len(encoded), 
strict_charp, pendian)
+            assert space.eq_w(space.call_method(w_ustr, 'encode', 
space.wrap('ascii')),
+                              space.wrap("ab"))
+
+            rffi.free_charp(encoded_charp)
+            rffi.free_charp(strict_charp)
+            if pendian:
+                if realendian is not None:
+                    assert rffi.cast(rffi.INT, realendian) == pendian[0]
+                lltype.free(pendian, flavor='raw')
+
+        test("\x61\x00\x00\x00\x62\x00\x00\x00", -1)
+
+        test("\x61\x00\x00\x00\x62\x00\x00\x00", None)
+
+        test("\x00\x00\x00\x61\x00\x00\x00\x62", 1)
+
+        test("\x00\x00\xFE\xFF\x00\x00\x00\x61\x00\x00\x00\x62", 0, 1)
+        test("\xFF\xFE\x00\x00\x61\x00\x00\x00\x62\x00\x00\x00", 0, -1)
+
     def test_compare(self, space, api):
         assert api.PyUnicode_Compare(space.wrap('a'), space.wrap('b')) == -1
 
diff --git a/pypy/module/cpyext/unicodeobject.py 
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -529,9 +529,8 @@
 
     string = rffi.charpsize2str(s, size)
 
-    #FIXME: I don't like these prefixes
-    if pbyteorder is not None: # correct NULL check?
-        llbyteorder = rffi.cast(lltype.Signed, pbyteorder[0]) # compatible 
with int?
+    if pbyteorder is not None:
+        llbyteorder = rffi.cast(lltype.Signed, pbyteorder[0])
         if llbyteorder < 0:
             byteorder = "little"
         elif llbyteorder > 0:
@@ -546,11 +545,67 @@
     else:
         errors = None
 
-    result, length, byteorder = runicode.str_decode_utf_16_helper(string, size,
-                                           errors,
-                                           True, # final ? false for multiple 
passes?
-                                           None, # errorhandler
-                                           byteorder)
+    result, length, byteorder = runicode.str_decode_utf_16_helper(
+        string, size, errors,
+        True, # final ? false for multiple passes?
+        None, # errorhandler
+        byteorder)
+    if pbyteorder is not None:
+        pbyteorder[0] = rffi.cast(rffi.INT, byteorder)
+
+    return space.wrap(result)
+
+@cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP], PyObject)
+def PyUnicode_DecodeUTF32(space, s, size, llerrors, pbyteorder):
+    """Decode length bytes from a UTF-32 encoded buffer string and
+    return the corresponding Unicode object.  errors (if non-NULL)
+    defines the error handling. It defaults to "strict".
+
+    If byteorder is non-NULL, the decoder starts decoding using the
+    given byte order:
+    *byteorder == -1: little endian
+    *byteorder == 0:  native order
+    *byteorder == 1:  big endian
+
+    If *byteorder is zero, and the first four bytes of the input data
+    are a byte order mark (BOM), the decoder switches to this byte
+    order and the BOM is not copied into the resulting Unicode string.
+    If *byteorder is -1 or 1, any byte order mark is copied to the
+    output.
+
+    After completion, *byteorder is set to the current byte order at
+    the end of input data.
+
+    In a narrow build codepoints outside the BMP will be decoded as
+    surrogate pairs.
+
+    If byteorder is NULL, the codec starts in native order mode.
+
+    Return NULL if an exception was raised by the codec.
+    """
+    string = rffi.charpsize2str(s, size)
+
+    if pbyteorder:
+        llbyteorder = rffi.cast(lltype.Signed, pbyteorder[0])
+        if llbyteorder < 0:
+            byteorder = "little"
+        elif llbyteorder > 0:
+            byteorder = "big"
+        else:
+            byteorder = "native"
+    else:
+        byteorder = "native"
+
+    if llerrors:
+        errors = rffi.charp2str(llerrors)
+    else:
+        errors = None
+
+    result, length, byteorder = runicode.str_decode_utf_32_helper(
+        string, size, errors,
+        True, # final ? false for multiple passes?
+        None, # errorhandler
+        byteorder)
     if pbyteorder is not None:
         pbyteorder[0] = rffi.cast(rffi.INT, byteorder)
 
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
http://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy py3k: hg merge default

Reply via email to