Author: Armin Rigo <[email protected]>
Branch:
Changeset: r78076:b385d5240694
Date: 2015-06-13 10:41 +0200
http://bitbucket.org/pypy/pypy/changeset/b385d5240694/
Log: Arguably buggy corner cases of str.startswith() vs
unicode.startswith() on CPython
diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py
--- a/pypy/objspace/std/listobject.py
+++ b/pypy/objspace/std/listobject.py
@@ -635,7 +635,8 @@
first index of value'''
# needs to be safe against eq_w() mutating the w_list behind our back
size = self.length()
- i, stop = unwrap_start_stop(space, size, w_start, w_stop, True)
+ i, stop = unwrap_start_stop(space, size, w_start, w_stop)
+ # note that 'i' and 'stop' can be bigger than the length of the list
try:
i = self.find(w_value, i, stop)
except ValueError:
diff --git a/pypy/objspace/std/sliceobject.py b/pypy/objspace/std/sliceobject.py
--- a/pypy/objspace/std/sliceobject.py
+++ b/pypy/objspace/std/sliceobject.py
@@ -211,26 +211,14 @@
assert index >= 0
return index
-def adapt_bound(space, size, w_index):
- index = adapt_lower_bound(space, size, w_index)
- if index > size:
- index = size
- assert index >= 0
- return index
-
[email protected](4)
-def unwrap_start_stop(space, size, w_start, w_end, upper_bound=False):
+def unwrap_start_stop(space, size, w_start, w_end):
if space.is_none(w_start):
start = 0
- elif upper_bound:
- start = adapt_bound(space, size, w_start)
else:
start = adapt_lower_bound(space, size, w_start)
if space.is_none(w_end):
end = size
- elif upper_bound:
- end = adapt_bound(space, size, w_end)
else:
end = adapt_lower_bound(space, size, w_end)
return start, end
diff --git a/pypy/objspace/std/stringmethods.py
b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -22,12 +22,10 @@
# return orig_obj
return self._new(s[start:stop])
- @specialize.arg(4)
- def _convert_idx_params(self, space, w_start, w_end, upper_bound=False):
+ def _convert_idx_params(self, space, w_start, w_end):
value = self._val(space)
lenself = len(value)
- start, end = unwrap_start_stop(space, lenself, w_start, w_end,
- upper_bound=upper_bound)
+ start, end = unwrap_start_stop(space, lenself, w_start, w_end)
return (value, start, end)
def _multi_chr(self, c):
@@ -606,8 +604,7 @@
return self._newlist_unwrapped(space, strs)
def descr_startswith(self, space, w_prefix, w_start=None, w_end=None):
- (value, start, end) = self._convert_idx_params(space, w_start, w_end,
- True)
+ (value, start, end) = self._convert_idx_params(space, w_start, w_end)
if space.isinstance_w(w_prefix, space.w_tuple):
for w_prefix in space.fixedview(w_prefix):
if self._startswith(space, value, w_prefix, start, end):
@@ -617,11 +614,17 @@
end))
def _startswith(self, space, value, w_prefix, start, end):
- return startswith(value, self._op_val(space, w_prefix), start, end)
+ prefix = self._op_val(space, w_prefix)
+ if start > len(value):
+ return self._starts_ends_overflow(prefix)
+ return startswith(value, prefix, start, end)
+
+ def _starts_ends_overflow(self, prefix):
+ return False # bug-to-bug compat: this is for strings and
+ # bytearrays, but overridden for unicodes
def descr_endswith(self, space, w_suffix, w_start=None, w_end=None):
- (value, start, end) = self._convert_idx_params(space, w_start, w_end,
- True)
+ (value, start, end) = self._convert_idx_params(space, w_start, w_end)
if space.isinstance_w(w_suffix, space.w_tuple):
for w_suffix in space.fixedview(w_suffix):
if self._endswith(space, value, w_suffix, start, end):
@@ -631,7 +634,10 @@
end))
def _endswith(self, space, value, w_prefix, start, end):
- return endswith(value, self._op_val(space, w_prefix), start, end)
+ prefix = self._op_val(space, w_prefix)
+ if start > len(value):
+ return self._starts_ends_overflow(prefix)
+ return endswith(value, prefix, start, end)
def _strip(self, space, w_chars, left, right):
"internal function called by str_xstrip methods"
diff --git a/pypy/objspace/std/test/test_bytearrayobject.py
b/pypy/objspace/std/test/test_bytearrayobject.py
--- a/pypy/objspace/std/test/test_bytearrayobject.py
+++ b/pypy/objspace/std/test/test_bytearrayobject.py
@@ -195,6 +195,14 @@
assert bytearray('hello').endswith(bytearray('lo'))
assert bytearray('hello').endswith((bytearray('lo'), 'he'))
+ def test_startswith_too_large(self):
+ assert bytearray('ab').startswith(bytearray('b'), 1) is True
+ assert bytearray('ab').startswith(bytearray(''), 2) is True
+ assert bytearray('ab').startswith(bytearray(''), 3) is False
+ assert bytearray('ab').endswith(bytearray('b'), 1) is True
+ assert bytearray('ab').endswith(bytearray(''), 2) is True
+ assert bytearray('ab').endswith(bytearray(''), 3) is False
+
def test_stringlike_conversions(self):
# methods that should return bytearray (and not str)
def check(result, expected):
diff --git a/pypy/objspace/std/test/test_bytesobject.py
b/pypy/objspace/std/test/test_bytesobject.py
--- a/pypy/objspace/std/test/test_bytesobject.py
+++ b/pypy/objspace/std/test/test_bytesobject.py
@@ -308,6 +308,14 @@
assert 'abc'.startswith('bc', 1, 2) is False
assert 'abc'.startswith('c', -1, 4) is True
+ def test_startswith_too_large(self):
+ assert 'ab'.startswith('b', 1) is True
+ assert 'ab'.startswith('', 2) is True
+ assert 'ab'.startswith('', 3) is False
+ assert 'ab'.endswith('b', 1) is True
+ assert 'ab'.endswith('', 2) is True
+ assert 'ab'.endswith('', 3) is False
+
def test_startswith_tuples(self):
assert 'hello'.startswith(('he', 'ha'))
assert not 'hello'.startswith(('lo', 'llo'))
diff --git a/pypy/objspace/std/test/test_unicodeobject.py
b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -409,6 +409,14 @@
assert u'abc'.startswith(u'bc', 1, 2) is False
assert u'abc'.startswith(u'c', -1, 4) is True
+ def test_startswith_too_large(self):
+ assert u'ab'.startswith(u'b', 1) is True
+ assert u'ab'.startswith(u'', 2) is True
+ assert u'ab'.startswith(u'', 3) is True # not False
+ assert u'ab'.endswith(u'b', 1) is True
+ assert u'ab'.endswith(u'', 2) is True
+ assert u'ab'.endswith(u'', 3) is True # not False
+
def test_startswith_tuples(self):
assert u'hello'.startswith((u'he', u'ha'))
assert not u'hello'.startswith((u'lo', u'llo'))
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -392,6 +392,9 @@
cased = True
return space.newbool(cased)
+ def _starts_ends_overflow(self, prefix):
+ return len(prefix) == 0
+
def wrapunicode(space, uni):
return W_UnicodeObject(uni)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit