[pypy-commit] pypy record-known-result: hints on the bounds of the results of some of the rutf8 functions

cfbolz Tue, 01 Oct 2019 12:13:32 -0700

Author: Carl Friedrich Bolz-Tereick <[email protected]>
Branch: record-known-result
Changeset: r97698:7151aca5645a
Date: 2019-10-01 15:53 +0200
http://bitbucket.org/pypy/pypy/changeset/7151aca5645a/


Log:    hints on the bounds of the results of some of the rutf8 functions

diff --git a/dotviewer/graphclient.py b/dotviewer/graphclient.py
--- a/dotviewer/graphclient.py
+++ b/dotviewer/graphclient.py
@@ -95,8 +95,8 @@
     except EOFError:
         ioerror = ioerror or IOError("connection unexpectedly closed "
                                      "(graphserver crash?)")
-    if ioerror is not None:
-        raise ioerror
+    #if ioerror is not None:
+    #    raise ioerror
 
 def send_error(io, e):
     try:
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -760,7 +760,7 @@
         assert stop >= 0
         byte_start = 0
         for i in range(start):
-            byte_start = next_codepoint_pos_dont_look_inside(self._utf8, 
byte_start)
+            byte_start = self.next_codepoint_pos_dont_look_inside(byte_start)
         byte_stop = len(self._utf8)
         for i in range(self._len() - stop):
             byte_stop = prev_codepoint_pos_dont_look_inside(self._utf8, 
byte_stop)
@@ -920,7 +920,7 @@
         if not self.is_ascii():
             storage = self._get_index_storage()
             jit.record_known_result(
-                end, rutf8.codepoint_position_at_index, self._utf8, storage, 
index + 1)
+                end, rutf8._codepoint_position_at_index, self._utf8, storage, 
index + 1)
         return W_UnicodeObject(self._utf8[start:end], 1)
 
     @jit.unroll_safe
@@ -952,6 +952,7 @@
         return self._length == len(self._utf8)
 
     def _index_to_byte(self, index):
+        assert 0 <= index < self._len()
         if self.is_ascii():
             assert index >= 0
             return index
@@ -974,6 +975,7 @@
         """ this returns index such that self._index_to_byte(index) == bytepos
         NB: this is slow! roughly logarithmic with a big constant
         """
+        assert 0 <= bytepos < len(self._utf8)
         if self.is_ascii():
             return bytepos
         return rutf8.codepoint_index_at_byte_position(
@@ -982,7 +984,10 @@
     def next_codepoint_pos_dont_look_inside(self, pos):
         if self.is_ascii():
             return pos + 1
-        return next_codepoint_pos_dont_look_inside(self._utf8, pos)
+        res = next_codepoint_pos_dont_look_inside(self._utf8, pos)
+        jit.record_exact_value(res >= 0, True)
+        jit.record_exact_value(res <= len(self._utf8), True)
+        return res
 
     def prev_codepoint_pos_dont_look_inside(self, pos):
         if self.is_ascii():
@@ -1013,7 +1018,6 @@
         if res_index < 0:
             return None
         res = self._byte_to_index(res_index)
-        jit.promote(res >= 0)  # always true!
         assert res >= 0
         return space.newint(res)
 
diff --git a/rpython/jit/metainterp/test/test_string.py 
b/rpython/jit/metainterp/test/test_string.py
--- a/rpython/jit/metainterp/test/test_string.py
+++ b/rpython/jit/metainterp/test/test_string.py
@@ -1053,9 +1053,14 @@
                 s = pick(x)
                 # the following lines emulate unicode.find
                 byteindex = s.find(search[z])
-                assert byteindex >= 0
+                if byteindex < 0:
+                    return -1001
                 storage = rutf8.create_utf8_index_storage(s, len(s) - 1)
                 index = rutf8.codepoint_index_at_byte_position(s, storage, 
byteindex, len(s) - 1)
+                if index < 0:  # no guard
+                    return -1000
+                if index >= len(s) - 1:  # no guard
+                    return -1004
                 # then we use the resulting codepoint index in conjunction with
                 # the string to get at a byte index
                 b = rutf8.codepoint_position_at_index(s, storage, index)
@@ -1068,5 +1073,5 @@
         f(1)
         res = self.meta_interp(f, [1], backendopt=True)
         assert res == f(1)
-        self.check_simple_loop(int_sub=1)
+        self.check_simple_loop(int_sub=1, guard_false=1)
 
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -545,13 +545,20 @@
         break
     return storage
 
[email protected]
 def codepoint_position_at_index(utf8, storage, index):
     """ Return byte index of a character inside utf8 encoded string, given
     storage of type UTF8_INDEX_STORAGE.  The index must be smaller than
     or equal to the utf8 length: if needed, check explicitly before calling
     this function.
     """
+    res = _codepoint_position_at_index(utf8, storage, index)
+    # tell the jit about the invariants of the result
+    jit.record_exact_value(res >= 0, True)
+    jit.record_exact_value(res < len(utf8), True)
+    return res
+
[email protected]
+def _codepoint_position_at_index(utf8, storage, index):
     current = index >> 6
     ofs = ord(storage[current].ofs[(index >> 2) & 0x0F])
     bytepos = storage[current].baseindex + ofs
@@ -599,7 +606,10 @@
     is not tiny either.
     """
     res = _codepoint_index_at_byte_position(utf8, storage, bytepos, 
num_codepoints)
-    jit.record_known_result(bytepos, codepoint_position_at_index, utf8, 
storage, res)
+    jit.record_known_result(bytepos, _codepoint_position_at_index, utf8, 
storage, res)
+    # tell the JIT that there are no bounds checks needed on the resulting 
indices
+    jit.record_exact_value(res >= 0, True)
+    jit.record_exact_value(res < num_codepoints, True)
     return res
 
 @jit.elidable
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy record-known-result: hints on the bounds of the results of some of the rutf8 functions

Reply via email to