[pypy-commit] pypy unicode-utf8: partition/rpartition

fijal Thu, 26 Oct 2017 02:57:07 -0700

Author: fijal
Branch: unicode-utf8
Changeset: r92849:b80499557864
Date: 2017-10-26 11:55 +0200
http://bitbucket.org/pypy/pypy/changeset/b80499557864/


Log:    partition/rpartition

diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -825,9 +825,11 @@
         if pos == -1:
             return space.newtuple([self, self._empty(), self._empty()])
         else:
+            lgt = rutf8.check_utf8(value, True, stop=pos)
             return space.newtuple(
-                [self._sliced(space, value, 0, pos, self), w_sub,
-                 self._sliced(space, value, pos + sublen, len(value), self)])
+                [W_UnicodeObject(value[0:pos], lgt), w_sub,
+                 W_UnicodeObject(value[pos + len(sub._utf8):len(value)],
+                    self._len() - lgt - sublen)])
 
     def descr_rpartition(self, space, w_sub):
         value = self._utf8
@@ -841,10 +843,11 @@
         if pos == -1:
             return space.newtuple([self._empty(), self._empty(), self])
         else:
+            lgt = rutf8.check_utf8(value, True, stop=pos)
             return space.newtuple(
-                [self._sliced(space, value, 0, pos, self), w_sub,
-                 self._sliced(space, value, pos + sublen, len(value), self)])
-
+                [W_UnicodeObject(value[0:pos], lgt), w_sub,
+                 W_UnicodeObject(value[pos + len(sub._utf8):len(value)],
+                    self._len() - lgt - sublen)])
 
     @unwrap_spec(count=int)
     def descr_replace(self, space, w_old, w_new, count=-1):
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -291,16 +291,19 @@
 
 
 #@jit.elidable
-def check_utf8(s, allow_surrogates):
+def check_utf8(s, allow_surrogates, start=0, stop=-1):
     """Check that 's' is a utf-8-encoded byte string.
     Returns the length (number of chars) or raise CheckError.
     If allow_surrogates is False, then also raise if we see any.
     Note also codepoints_in_utf8(), which also computes the length
     faster by assuming that 's' is valid utf-8.
     """
-    pos = 0
+    pos = start
     continuation_bytes = 0
-    end = len(s)
+    if stop < 0:
+        end = len(s)
+    else:
+        end = stop
     while pos < end:
         ordch1 = ord(s[pos])
         pos += 1
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8: partition/rpartition

Reply via email to