[pypy-commit] pypy unicode-utf8: Add startswith/endswith in the hypothesis test. Fixes. Includes a fix

arigo Sat, 14 Oct 2017 02:44:09 -0700

Author: Armin Rigo <[email protected]>
Branch: unicode-utf8
Changeset: r92753:f029e18069d3
Date: 2017-10-14 10:47 +0200
http://bitbucket.org/pypy/pypy/changeset/f029e18069d3/


Log:    Add startswith/endswith in the hypothesis test. Fixes. Includes a
        fix that should also be in default...

diff --git a/pypy/objspace/std/test/test_unicodeobject.py 
b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -80,6 +80,28 @@
                                     space.newint(start + len1))
         assert space.int_w(w_index) == rexpected
 
+        expected = u.startswith(v, start)
+        w_res = space.call_method(w_u, 'startswith', w_v,
+                                  space.newint(start))
+        assert w_res is space.newbool(expected)
+
+        expected = u.startswith(v, start, start + len1)
+        w_res = space.call_method(w_u, 'startswith', w_v,
+                                  space.newint(start),
+                                  space.newint(start + len1))
+        assert w_res is space.newbool(expected)
+
+        expected = u.endswith(v, start)
+        w_res = space.call_method(w_u, 'endswith', w_v,
+                                  space.newint(start))
+        assert w_res is space.newbool(expected)
+
+        expected = u.endswith(v, start, start + len1)
+        w_res = space.call_method(w_u, 'endswith', w_v,
+                                  space.newint(start),
+                                  space.newint(start + len1))
+        assert w_res is space.newbool(expected)
+
 
 class AppTestUnicodeStringStdOnly:
     def test_compares(self):
@@ -475,6 +497,9 @@
         assert u''.startswith(u'a') is False
         assert u'x'.startswith(u'xx') is False
         assert u'y'.startswith(u'xx') is False
+        assert u'\u1234\u5678\u4321'.startswith(u'\u1234') is True
+        assert u'\u1234\u5678\u4321'.startswith(u'\u1234\u4321') is False
+        assert u'\u1234'.startswith(u'', 1, 0) is True
 
     def test_startswith_more(self):
         assert u'ab'.startswith(u'a', 0) is True
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -599,8 +599,8 @@
 
     def _startswith(self, space, value, w_prefix, start, end):
         prefix = self.convert_arg_to_w_unicode(space, w_prefix)._utf8
-        if start > len(value):
-            return len(prefix) == 0
+        if len(prefix) == 0:
+            return True
         return startswith(value, prefix, start, end)
 
     def descr_endswith(self, space, w_suffix, w_start=None, w_end=None):
@@ -613,8 +613,8 @@
 
     def _endswith(self, space, value, w_prefix, start, end):
         prefix = self.convert_arg_to_w_unicode(space, w_prefix)._utf8
-        if start > len(value):
-            return len(prefix) == 0
+        if len(prefix) == 0:
+            return True
         return endswith(value, prefix, start, end)
 
     def descr_add(self, space, w_other):
@@ -925,26 +925,20 @@
             return space.newint(res)
 
     def _unwrap_and_compute_idx_params(self, space, w_start, w_end):
+        # unwrap start and stop indices, optimized for the case where
+        # start == 0 and end == self._length.  Note that 'start' and
+        # 'end' are measured in codepoints whereas 'start_index' and
+        # 'end_index' are measured in bytes.
         start, end = unwrap_start_stop(space, self._length, w_start, w_end)
-        # XXX for now just create index
         start_index = 0
         end_index = len(self._utf8)
-        if start > 0 or end != self._length:
-            storage = self._get_index_storage()
-            if start > 0:
-                # :-(
-                if start > self._length:
-                    start_index = start
-                else:
-                    start_index = rutf8.codepoint_position_at_index(
-                        self._utf8, storage, start)
-            if end != self._length:
-                # :-(
-                if end > self._length:
-                    end_index = end
-                else:
-                    end_index = rutf8.codepoint_position_at_index(
-                        self._utf8, storage, end)
+        if start > 0:
+            if start > self._length:
+                start_index = end_index
+            else:
+                start_index = self._index_to_byte(start)
+        if end < self._length:
+            end_index = self._index_to_byte(end)
         return (start_index, end_index)
 
     @unwrap_spec(width=int, w_fillchar=WrappedDefault(' '))
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8: Add startswith/endswith in the hypothesis test. Fixes. Includes a fix

Reply via email to