Author: Carl Friedrich Bolz-Tereick <[email protected]>
Branch:
Changeset: r97258:e2fdef728670
Date: 2019-08-25 23:42 +0200
http://bitbucket.org/pypy/pypy/changeset/e2fdef728670/
Log: optimization for ascii case in unicode.(r)find and .(r)index:
no need to convert the result index back from bytes to codepoints if
the string is ascii.
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -886,6 +886,11 @@
return rutf8.codepoint_position_at_index(
self._utf8, self._get_index_storage(), index)
+ def _codepoints_in_utf8(self, start, end):
+ if self.is_ascii():
+ return end - start
+ return rutf8.codepoints_in_utf8(self._utf8, start, end)
+
@always_inline
def _unwrap_and_search(self, space, w_sub, w_start, w_end, forward=True):
w_sub = self.convert_arg_to_w_unicode(space, w_sub)
@@ -907,7 +912,7 @@
res_index = self._utf8.find(w_sub._utf8, start_index, end_index)
if res_index < 0:
return None
- skip = rutf8.codepoints_in_utf8(self._utf8, start_index, res_index)
+ skip = self._codepoints_in_utf8(start_index, res_index)
res = start + skip
assert res >= 0
return space.newint(res)
@@ -915,7 +920,7 @@
res_index = self._utf8.rfind(w_sub._utf8, start_index, end_index)
if res_index < 0:
return None
- skip = rutf8.codepoints_in_utf8(self._utf8, res_index, end_index)
+ skip = self._codepoints_in_utf8(res_index, end_index)
res = end - skip
assert res >= 0
return space.newint(res)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit