Author: Armin Rigo <[email protected]>
Branch: unicode-utf8
Changeset: r92753:f029e18069d3
Date: 2017-10-14 10:47 +0200
http://bitbucket.org/pypy/pypy/changeset/f029e18069d3/
Log: Add startswith/endswith in the hypothesis test. Fixes. Includes a
fix that should also be in default...
diff --git a/pypy/objspace/std/test/test_unicodeobject.py
b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -80,6 +80,28 @@
space.newint(start + len1))
assert space.int_w(w_index) == rexpected
+ expected = u.startswith(v, start)
+ w_res = space.call_method(w_u, 'startswith', w_v,
+ space.newint(start))
+ assert w_res is space.newbool(expected)
+
+ expected = u.startswith(v, start, start + len1)
+ w_res = space.call_method(w_u, 'startswith', w_v,
+ space.newint(start),
+ space.newint(start + len1))
+ assert w_res is space.newbool(expected)
+
+ expected = u.endswith(v, start)
+ w_res = space.call_method(w_u, 'endswith', w_v,
+ space.newint(start))
+ assert w_res is space.newbool(expected)
+
+ expected = u.endswith(v, start, start + len1)
+ w_res = space.call_method(w_u, 'endswith', w_v,
+ space.newint(start),
+ space.newint(start + len1))
+ assert w_res is space.newbool(expected)
+
class AppTestUnicodeStringStdOnly:
def test_compares(self):
@@ -475,6 +497,9 @@
assert u''.startswith(u'a') is False
assert u'x'.startswith(u'xx') is False
assert u'y'.startswith(u'xx') is False
+ assert u'\u1234\u5678\u4321'.startswith(u'\u1234') is True
+ assert u'\u1234\u5678\u4321'.startswith(u'\u1234\u4321') is False
+ assert u'\u1234'.startswith(u'', 1, 0) is True
def test_startswith_more(self):
assert u'ab'.startswith(u'a', 0) is True
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -599,8 +599,8 @@
def _startswith(self, space, value, w_prefix, start, end):
prefix = self.convert_arg_to_w_unicode(space, w_prefix)._utf8
- if start > len(value):
- return len(prefix) == 0
+ if len(prefix) == 0:
+ return True
return startswith(value, prefix, start, end)
def descr_endswith(self, space, w_suffix, w_start=None, w_end=None):
@@ -613,8 +613,8 @@
def _endswith(self, space, value, w_prefix, start, end):
prefix = self.convert_arg_to_w_unicode(space, w_prefix)._utf8
- if start > len(value):
- return len(prefix) == 0
+ if len(prefix) == 0:
+ return True
return endswith(value, prefix, start, end)
def descr_add(self, space, w_other):
@@ -925,26 +925,20 @@
return space.newint(res)
def _unwrap_and_compute_idx_params(self, space, w_start, w_end):
+ # unwrap start and stop indices, optimized for the case where
+ # start == 0 and end == self._length. Note that 'start' and
+ # 'end' are measured in codepoints whereas 'start_index' and
+ # 'end_index' are measured in bytes.
start, end = unwrap_start_stop(space, self._length, w_start, w_end)
- # XXX for now just create index
start_index = 0
end_index = len(self._utf8)
- if start > 0 or end != self._length:
- storage = self._get_index_storage()
- if start > 0:
- # :-(
- if start > self._length:
- start_index = start
- else:
- start_index = rutf8.codepoint_position_at_index(
- self._utf8, storage, start)
- if end != self._length:
- # :-(
- if end > self._length:
- end_index = end
- else:
- end_index = rutf8.codepoint_position_at_index(
- self._utf8, storage, end)
+ if start > 0:
+ if start > self._length:
+ start_index = end_index
+ else:
+ start_index = self._index_to_byte(start)
+ if end < self._length:
+ end_index = self._index_to_byte(end)
return (start_index, end_index)
@unwrap_spec(width=int, w_fillchar=WrappedDefault(' '))
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit