Author: Carl Friedrich Bolz-Tereick <[email protected]>
Branch: record-known-result
Changeset: r97698:7151aca5645a
Date: 2019-10-01 15:53 +0200
http://bitbucket.org/pypy/pypy/changeset/7151aca5645a/
Log: hints on the bounds of the results of some of the rutf8 functions
diff --git a/dotviewer/graphclient.py b/dotviewer/graphclient.py
--- a/dotviewer/graphclient.py
+++ b/dotviewer/graphclient.py
@@ -95,8 +95,8 @@
except EOFError:
ioerror = ioerror or IOError("connection unexpectedly closed "
"(graphserver crash?)")
- if ioerror is not None:
- raise ioerror
+ #if ioerror is not None:
+ # raise ioerror
def send_error(io, e):
try:
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -760,7 +760,7 @@
assert stop >= 0
byte_start = 0
for i in range(start):
- byte_start = next_codepoint_pos_dont_look_inside(self._utf8,
byte_start)
+ byte_start = self.next_codepoint_pos_dont_look_inside(byte_start)
byte_stop = len(self._utf8)
for i in range(self._len() - stop):
byte_stop = prev_codepoint_pos_dont_look_inside(self._utf8,
byte_stop)
@@ -920,7 +920,7 @@
if not self.is_ascii():
storage = self._get_index_storage()
jit.record_known_result(
- end, rutf8.codepoint_position_at_index, self._utf8, storage,
index + 1)
+ end, rutf8._codepoint_position_at_index, self._utf8, storage,
index + 1)
return W_UnicodeObject(self._utf8[start:end], 1)
@jit.unroll_safe
@@ -952,6 +952,7 @@
return self._length == len(self._utf8)
def _index_to_byte(self, index):
+ assert 0 <= index < self._len()
if self.is_ascii():
assert index >= 0
return index
@@ -974,6 +975,7 @@
""" this returns index such that self._index_to_byte(index) == bytepos
NB: this is slow! roughly logarithmic with a big constant
"""
+ assert 0 <= bytepos < len(self._utf8)
if self.is_ascii():
return bytepos
return rutf8.codepoint_index_at_byte_position(
@@ -982,7 +984,10 @@
def next_codepoint_pos_dont_look_inside(self, pos):
if self.is_ascii():
return pos + 1
- return next_codepoint_pos_dont_look_inside(self._utf8, pos)
+ res = next_codepoint_pos_dont_look_inside(self._utf8, pos)
+ jit.record_exact_value(res >= 0, True)
+ jit.record_exact_value(res <= len(self._utf8), True)
+ return res
def prev_codepoint_pos_dont_look_inside(self, pos):
if self.is_ascii():
@@ -1013,7 +1018,6 @@
if res_index < 0:
return None
res = self._byte_to_index(res_index)
- jit.promote(res >= 0) # always true!
assert res >= 0
return space.newint(res)
diff --git a/rpython/jit/metainterp/test/test_string.py
b/rpython/jit/metainterp/test/test_string.py
--- a/rpython/jit/metainterp/test/test_string.py
+++ b/rpython/jit/metainterp/test/test_string.py
@@ -1053,9 +1053,14 @@
s = pick(x)
# the following lines emulate unicode.find
byteindex = s.find(search[z])
- assert byteindex >= 0
+ if byteindex < 0:
+ return -1001
storage = rutf8.create_utf8_index_storage(s, len(s) - 1)
index = rutf8.codepoint_index_at_byte_position(s, storage,
byteindex, len(s) - 1)
+ if index < 0: # no guard
+ return -1000
+ if index >= len(s) - 1: # no guard
+ return -1004
# then we use the resulting codepoint index in conjunction with
# the string to get at a byte index
b = rutf8.codepoint_position_at_index(s, storage, index)
@@ -1068,5 +1073,5 @@
f(1)
res = self.meta_interp(f, [1], backendopt=True)
assert res == f(1)
- self.check_simple_loop(int_sub=1)
+ self.check_simple_loop(int_sub=1, guard_false=1)
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -545,13 +545,20 @@
break
return storage
[email protected]
def codepoint_position_at_index(utf8, storage, index):
""" Return byte index of a character inside utf8 encoded string, given
storage of type UTF8_INDEX_STORAGE. The index must be smaller than
or equal to the utf8 length: if needed, check explicitly before calling
this function.
"""
+ res = _codepoint_position_at_index(utf8, storage, index)
+ # tell the jit about the invariants of the result
+ jit.record_exact_value(res >= 0, True)
+ jit.record_exact_value(res < len(utf8), True)
+ return res
+
[email protected]
+def _codepoint_position_at_index(utf8, storage, index):
current = index >> 6
ofs = ord(storage[current].ofs[(index >> 2) & 0x0F])
bytepos = storage[current].baseindex + ofs
@@ -599,7 +606,10 @@
is not tiny either.
"""
res = _codepoint_index_at_byte_position(utf8, storage, bytepos,
num_codepoints)
- jit.record_known_result(bytepos, codepoint_position_at_index, utf8,
storage, res)
+ jit.record_known_result(bytepos, _codepoint_position_at_index, utf8,
storage, res)
+ # tell the JIT that there are no bounds checks needed on the resulting
indices
+ jit.record_exact_value(res >= 0, True)
+ jit.record_exact_value(res < num_codepoints, True)
return res
@jit.elidable
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit