Author: Amaury Forgeot d'Arc <amaur...@gmail.com> Branch: py3k Changeset: r54862:1fb96540cdbc Date: 2012-05-01 18:33 +0200 http://bitbucket.org/pypy/pypy/changeset/1fb96540cdbc/
Log: hg merge default diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py --- a/pypy/module/cpyext/stubs.py +++ b/pypy/module/cpyext/stubs.py @@ -1947,35 +1947,6 @@ changes in your code for properly supporting 64-bit systems.""" raise NotImplementedError -@cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP], PyObject) -def PyUnicode_DecodeUTF32(space, s, size, errors, byteorder): - """Decode length bytes from a UTF-32 encoded buffer string and return the - corresponding Unicode object. errors (if non-NULL) defines the error - handling. It defaults to "strict". - - If byteorder is non-NULL, the decoder starts decoding using the given byte - order: - - *byteorder == -1: little endian - *byteorder == 0: native order - *byteorder == 1: big endian - - If *byteorder is zero, and the first four bytes of the input data are a - byte order mark (BOM), the decoder switches to this byte order and the BOM is - not copied into the resulting Unicode string. If *byteorder is -1 or - 1, any byte order mark is copied to the output. - - After completion, *byteorder is set to the current byte order at the end - of input data. - - In a narrow build codepoints outside the BMP will be decoded as surrogate pairs. - - If byteorder is NULL, the codec starts in native order mode. - - Return NULL if an exception was raised by the codec. - """ - raise NotImplementedError - @cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP, Py_ssize_t], PyObject) def PyUnicode_DecodeUTF32Stateful(space, s, size, errors, byteorder, consumed): """If consumed is NULL, behave like PyUnicode_DecodeUTF32(). If diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -391,6 +391,42 @@ test("\xFE\xFF\x00\x61\x00\x62\x00\x63\x00\x64", 0, 1) test("\xFF\xFE\x61\x00\x62\x00\x63\x00\x64\x00", 0, -1) + def test_decode_utf32(self, space, api): + def test(encoded, endian, realendian=None): + encoded_charp = rffi.str2charp(encoded) + strict_charp = rffi.str2charp("strict") + if endian is not None: + if endian < 0: + value = -1 + elif endian > 0: + value = 1 + else: + value = 0 + pendian = lltype.malloc(rffi.INTP.TO, 1, flavor='raw') + pendian[0] = rffi.cast(rffi.INT, value) + else: + pendian = None + + w_ustr = api.PyUnicode_DecodeUTF32(encoded_charp, len(encoded), strict_charp, pendian) + assert space.eq_w(space.call_method(w_ustr, 'encode', space.wrap('ascii')), + space.wrap("ab")) + + rffi.free_charp(encoded_charp) + rffi.free_charp(strict_charp) + if pendian: + if realendian is not None: + assert rffi.cast(rffi.INT, realendian) == pendian[0] + lltype.free(pendian, flavor='raw') + + test("\x61\x00\x00\x00\x62\x00\x00\x00", -1) + + test("\x61\x00\x00\x00\x62\x00\x00\x00", None) + + test("\x00\x00\x00\x61\x00\x00\x00\x62", 1) + + test("\x00\x00\xFE\xFF\x00\x00\x00\x61\x00\x00\x00\x62", 0, 1) + test("\xFF\xFE\x00\x00\x61\x00\x00\x00\x62\x00\x00\x00", 0, -1) + def test_compare(self, space, api): assert api.PyUnicode_Compare(space.wrap('a'), space.wrap('b')) == -1 diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -529,9 +529,8 @@ string = rffi.charpsize2str(s, size) - #FIXME: I don't like these prefixes - if pbyteorder is not None: # correct NULL check? - llbyteorder = rffi.cast(lltype.Signed, pbyteorder[0]) # compatible with int? + if pbyteorder is not None: + llbyteorder = rffi.cast(lltype.Signed, pbyteorder[0]) if llbyteorder < 0: byteorder = "little" elif llbyteorder > 0: @@ -546,11 +545,67 @@ else: errors = None - result, length, byteorder = runicode.str_decode_utf_16_helper(string, size, - errors, - True, # final ? false for multiple passes? - None, # errorhandler - byteorder) + result, length, byteorder = runicode.str_decode_utf_16_helper( + string, size, errors, + True, # final ? false for multiple passes? + None, # errorhandler + byteorder) + if pbyteorder is not None: + pbyteorder[0] = rffi.cast(rffi.INT, byteorder) + + return space.wrap(result) + +@cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP], PyObject) +def PyUnicode_DecodeUTF32(space, s, size, llerrors, pbyteorder): + """Decode length bytes from a UTF-32 encoded buffer string and + return the corresponding Unicode object. errors (if non-NULL) + defines the error handling. It defaults to "strict". + + If byteorder is non-NULL, the decoder starts decoding using the + given byte order: + *byteorder == -1: little endian + *byteorder == 0: native order + *byteorder == 1: big endian + + If *byteorder is zero, and the first four bytes of the input data + are a byte order mark (BOM), the decoder switches to this byte + order and the BOM is not copied into the resulting Unicode string. + If *byteorder is -1 or 1, any byte order mark is copied to the + output. + + After completion, *byteorder is set to the current byte order at + the end of input data. + + In a narrow build codepoints outside the BMP will be decoded as + surrogate pairs. + + If byteorder is NULL, the codec starts in native order mode. + + Return NULL if an exception was raised by the codec. + """ + string = rffi.charpsize2str(s, size) + + if pbyteorder: + llbyteorder = rffi.cast(lltype.Signed, pbyteorder[0]) + if llbyteorder < 0: + byteorder = "little" + elif llbyteorder > 0: + byteorder = "big" + else: + byteorder = "native" + else: + byteorder = "native" + + if llerrors: + errors = rffi.charp2str(llerrors) + else: + errors = None + + result, length, byteorder = runicode.str_decode_utf_32_helper( + string, size, errors, + True, # final ? false for multiple passes? + None, # errorhandler + byteorder) if pbyteorder is not None: pbyteorder[0] = rffi.cast(rffi.INT, byteorder) _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit