[pypy-commit] pypy default: Arguably buggy corner cases of str.startswith() vs unicode.startswith()

arigo Sat, 13 Jun 2015 02:15:49 -0700

Author: Armin Rigo <[email protected]>
Branch: 
Changeset: r78076:b385d5240694
Date: 2015-06-13 10:41 +0200
http://bitbucket.org/pypy/pypy/changeset/b385d5240694/


Log:    Arguably buggy corner cases of str.startswith() vs
        unicode.startswith() on CPython

diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py
--- a/pypy/objspace/std/listobject.py
+++ b/pypy/objspace/std/listobject.py
@@ -635,7 +635,8 @@
         first index of value'''
         # needs to be safe against eq_w() mutating the w_list behind our back
         size = self.length()
-        i, stop = unwrap_start_stop(space, size, w_start, w_stop, True)
+        i, stop = unwrap_start_stop(space, size, w_start, w_stop)
+        # note that 'i' and 'stop' can be bigger than the length of the list
         try:
             i = self.find(w_value, i, stop)
         except ValueError:
diff --git a/pypy/objspace/std/sliceobject.py b/pypy/objspace/std/sliceobject.py
--- a/pypy/objspace/std/sliceobject.py
+++ b/pypy/objspace/std/sliceobject.py
@@ -211,26 +211,14 @@
     assert index >= 0
     return index
 
-def adapt_bound(space, size, w_index):
-    index = adapt_lower_bound(space, size, w_index)
-    if index > size:
-        index = size
-    assert index >= 0
-    return index
-
[email protected](4)
-def unwrap_start_stop(space, size, w_start, w_end, upper_bound=False):
+def unwrap_start_stop(space, size, w_start, w_end):
     if space.is_none(w_start):
         start = 0
-    elif upper_bound:
-        start = adapt_bound(space, size, w_start)
     else:
         start = adapt_lower_bound(space, size, w_start)
 
     if space.is_none(w_end):
         end = size
-    elif upper_bound:
-        end = adapt_bound(space, size, w_end)
     else:
         end = adapt_lower_bound(space, size, w_end)
     return start, end
diff --git a/pypy/objspace/std/stringmethods.py 
b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -22,12 +22,10 @@
         #    return orig_obj
         return self._new(s[start:stop])
 
-    @specialize.arg(4)
-    def _convert_idx_params(self, space, w_start, w_end, upper_bound=False):
+    def _convert_idx_params(self, space, w_start, w_end):
         value = self._val(space)
         lenself = len(value)
-        start, end = unwrap_start_stop(space, lenself, w_start, w_end,
-                                       upper_bound=upper_bound)
+        start, end = unwrap_start_stop(space, lenself, w_start, w_end)
         return (value, start, end)
 
     def _multi_chr(self, c):
@@ -606,8 +604,7 @@
         return self._newlist_unwrapped(space, strs)
 
     def descr_startswith(self, space, w_prefix, w_start=None, w_end=None):
-        (value, start, end) = self._convert_idx_params(space, w_start, w_end,
-                                                       True)
+        (value, start, end) = self._convert_idx_params(space, w_start, w_end)
         if space.isinstance_w(w_prefix, space.w_tuple):
             for w_prefix in space.fixedview(w_prefix):
                 if self._startswith(space, value, w_prefix, start, end):
@@ -617,11 +614,17 @@
                                               end))
 
     def _startswith(self, space, value, w_prefix, start, end):
-        return startswith(value, self._op_val(space, w_prefix), start, end)
+        prefix = self._op_val(space, w_prefix)
+        if start > len(value):
+            return self._starts_ends_overflow(prefix)
+        return startswith(value, prefix, start, end)
+
+    def _starts_ends_overflow(self, prefix):
+        return False     # bug-to-bug compat: this is for strings and
+                         # bytearrays, but overridden for unicodes
 
     def descr_endswith(self, space, w_suffix, w_start=None, w_end=None):
-        (value, start, end) = self._convert_idx_params(space, w_start, w_end,
-                                                       True)
+        (value, start, end) = self._convert_idx_params(space, w_start, w_end)
         if space.isinstance_w(w_suffix, space.w_tuple):
             for w_suffix in space.fixedview(w_suffix):
                 if self._endswith(space, value, w_suffix, start, end):
@@ -631,7 +634,10 @@
                                             end))
 
     def _endswith(self, space, value, w_prefix, start, end):
-        return endswith(value, self._op_val(space, w_prefix), start, end)
+        prefix = self._op_val(space, w_prefix)
+        if start > len(value):
+            return self._starts_ends_overflow(prefix)
+        return endswith(value, prefix, start, end)
 
     def _strip(self, space, w_chars, left, right):
         "internal function called by str_xstrip methods"
diff --git a/pypy/objspace/std/test/test_bytearrayobject.py 
b/pypy/objspace/std/test/test_bytearrayobject.py
--- a/pypy/objspace/std/test/test_bytearrayobject.py
+++ b/pypy/objspace/std/test/test_bytearrayobject.py
@@ -195,6 +195,14 @@
         assert bytearray('hello').endswith(bytearray('lo'))
         assert bytearray('hello').endswith((bytearray('lo'), 'he'))
 
+    def test_startswith_too_large(self):
+        assert bytearray('ab').startswith(bytearray('b'), 1) is True
+        assert bytearray('ab').startswith(bytearray(''), 2) is True
+        assert bytearray('ab').startswith(bytearray(''), 3) is False
+        assert bytearray('ab').endswith(bytearray('b'), 1) is True
+        assert bytearray('ab').endswith(bytearray(''), 2) is True
+        assert bytearray('ab').endswith(bytearray(''), 3) is False
+
     def test_stringlike_conversions(self):
         # methods that should return bytearray (and not str)
         def check(result, expected):
diff --git a/pypy/objspace/std/test/test_bytesobject.py 
b/pypy/objspace/std/test/test_bytesobject.py
--- a/pypy/objspace/std/test/test_bytesobject.py
+++ b/pypy/objspace/std/test/test_bytesobject.py
@@ -308,6 +308,14 @@
         assert 'abc'.startswith('bc', 1, 2) is False
         assert 'abc'.startswith('c', -1, 4) is True
 
+    def test_startswith_too_large(self):
+        assert 'ab'.startswith('b', 1) is True
+        assert 'ab'.startswith('', 2) is True
+        assert 'ab'.startswith('', 3) is False
+        assert 'ab'.endswith('b', 1) is True
+        assert 'ab'.endswith('', 2) is True
+        assert 'ab'.endswith('', 3) is False
+
     def test_startswith_tuples(self):
         assert 'hello'.startswith(('he', 'ha'))
         assert not 'hello'.startswith(('lo', 'llo'))
diff --git a/pypy/objspace/std/test/test_unicodeobject.py 
b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -409,6 +409,14 @@
         assert u'abc'.startswith(u'bc', 1, 2) is False
         assert u'abc'.startswith(u'c', -1, 4) is True
 
+    def test_startswith_too_large(self):
+        assert u'ab'.startswith(u'b', 1) is True
+        assert u'ab'.startswith(u'', 2) is True
+        assert u'ab'.startswith(u'', 3) is True   # not False
+        assert u'ab'.endswith(u'b', 1) is True
+        assert u'ab'.endswith(u'', 2) is True
+        assert u'ab'.endswith(u'', 3) is True   # not False
+
     def test_startswith_tuples(self):
         assert u'hello'.startswith((u'he', u'ha'))
         assert not u'hello'.startswith((u'lo', u'llo'))
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -392,6 +392,9 @@
                 cased = True
         return space.newbool(cased)
 
+    def _starts_ends_overflow(self, prefix):
+        return len(prefix) == 0
+
 
 def wrapunicode(space, uni):
     return W_UnicodeObject(uni)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy default: Arguably buggy corner cases of str.startswith() vs unicode.startswith()

Reply via email to