Author: fijal Branch: unicode-utf8 Changeset: r92855:84d1ebd9002d Date: 2017-10-26 20:11 +0200 http://bitbucket.org/pypy/pypy/changeset/84d1ebd9002d/
Log: improve the slice tests and fix it diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -907,16 +907,31 @@ def test_getslice(self): assert u'123456'.__getslice__(1, 5) == u'2345' - s = u"abc" - assert s[:] == "abc" - assert s[1:] == "bc" - assert s[:2] == "ab" - assert s[1:2] == "b" - assert s[-2:] == "bc" - assert s[:-1] == "ab" - assert s[-2:2] == "b" - assert s[1:-1] == "b" - assert s[-2:-1] == "b" + s = u"\u0105b\u0107" + assert s[:] == u"\u0105b\u0107" + assert s[1:] == u"b\u0107" + assert s[:2] == u"\u0105b" + assert s[1:2] == u"b" + assert s[-2:] == u"b\u0107" + assert s[:-1] == u"\u0105b" + assert s[-2:2] == u"b" + assert s[1:-1] == u"b" + assert s[-2:-1] == u"b" + + def test_getitem_slice(self): + assert u'123456'.__getitem__(slice(1, 5)) == u'2345' + s = u"\u0105b\u0107" + assert s[slice(3)] == u"\u0105b\u0107" + assert s[slice(1, 3)] == u"b\u0107" + assert s[slice(2)] == u"\u0105b" + assert s[slice(1,2)] == u"b" + assert s[slice(-2,3)] == u"b\u0107" + assert s[slice(-1)] == u"\u0105b" + assert s[slice(-2,2)] == u"b" + assert s[slice(1,-1)] == u"b" + assert s[slice(-2,-1)] == u"b" + assert u"abcde"[::2] == u"ace" + assert u"\u0105\u0106\u0107abcd"[::2] == u"\u0105\u0107bd" def test_no_len_on_str_iter(self): iterable = u"hello" diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -21,7 +21,7 @@ from pypy.objspace.std import newformat from pypy.objspace.std.basestringtype import basestring_typedef from pypy.objspace.std.formatting import mod_format -from pypy.objspace.std.sliceobject import ( +from pypy.objspace.std.sliceobject import (W_SliceObject, unwrap_start_stop, normalize_simple_slice) from pypy.objspace.std.stringmethods import StringMethods from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT @@ -724,8 +724,36 @@ return space.newlist_utf8(res) + def descr_getitem(self, space, w_index): + if isinstance(w_index, W_SliceObject): + length = self._len() + start, stop, step, sl = w_index.indices4(space, length) + if sl == 0: + return self._empty() + elif step == 1: + assert start >= 0 and stop >= 0 + return self._unicode_sliced(space, start, stop) + else: + return self._getitem_slice_slowpath(space, start, step, sl) + + index = space.getindex_w(w_index, space.w_IndexError, "string index") + return self._getitem_result(space, index) + + def _getitem_slice_slowpath(self, space, start, step, sl): + # XXX same comment as in _unicode_sliced + builder = StringBuilder(step * sl) + byte_pos = self._index_to_byte(start) + i = 0 + while True: + next_pos = rutf8.next_codepoint_pos(self._utf8, byte_pos) + builder.append(self._utf8[byte_pos:next_pos]) + if i == sl - 1: + break + i += 1 + byte_pos = self._index_to_byte(start + i * step) + return W_UnicodeObject(builder.build(), sl) + def descr_getslice(self, space, w_start, w_stop): - selfvalue = self._utf8 start, stop = normalize_simple_slice( space, self._len(), w_start, w_stop) if start == stop: _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit