Author: Ronan Lamy <ronan.l...@gmail.com> Branch: py3.5 Changeset: r92410:b6ba2262940e Date: 2017-09-16 18:28 +0100 http://bitbucket.org/pypy/pypy/changeset/b6ba2262940e/
Log: Fix 2BYTE case in _PyUnicode_Ready(): don't prepend a BOM to the data diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -190,6 +190,26 @@ b = s.encode('utf-32')[4:] # Skip the BOM assert module.from_ucs4(b) == s + def test_substring(self): + module = self.import_extension('foo', [ + ("slice_start", "METH_VARARGS", + ''' + PyObject* text; + Py_ssize_t start, length; + if (!PyArg_ParseTuple(args, "On", &text, &start)) + return NULL; + if (PyUnicode_READY(text) == -1) return NULL; + length = PyUnicode_GET_LENGTH(text); + if (start > length) return PyLong_FromSsize_t(start); + return PyUnicode_FromKindAndData(PyUnicode_KIND(text), + PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), + length-start); + ''')]) + s = 'aАbБcСdД' + assert module.slice_start(s, 2) == 'bБcСdД' + s = 'xx\N{PILE OF POO}' + assert module.slice_start(s, 2) == '\N{PILE OF POO}' + def test_aswidecharstring(self): module = self.import_extension('foo', [ ("aswidecharstring", "METH_O", diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -1,6 +1,6 @@ from pypy.interpreter.error import OperationError, oefmt from rpython.rtyper.lltypesystem import rffi, lltype -from rpython.rlib.runicode import unicode_encode_latin_1, unicode_encode_utf_16 +from rpython.rlib.runicode import unicode_encode_latin_1, unicode_encode_utf_16_helper from rpython.rlib.rarithmetic import widen from pypy.module.unicodedata import unicodedb @@ -289,8 +289,9 @@ set_utf8_len(py_obj, 0) elif maxchar < 65536: # XXX: assumes that sizeof(wchar_t) == 4 - ucs2_str = unicode_encode_utf_16( - w_obj._value, len(w_obj._value), errors='strict') + ucs2_str = unicode_encode_utf_16_helper( + w_obj._value, len(w_obj._value), errors='strict', + byteorder=runicode.BYTEORDER) ucs2_data = cts.cast('Py_UCS2 *', rffi.str2charp(ucs2_str)) set_data(py_obj, cts.cast('void*', ucs2_data)) set_len(py_obj, get_wsize(py_obj)) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit