Author: fijal
Branch: unicode-utf8
Changeset: r90333:b17610fd28e3
Date: 2017-02-23 20:32 +0100
http://bitbucket.org/pypy/pypy/changeset/b17610fd28e3/

Log:    split and rsplit (inefficient)

diff --git a/pypy/objspace/std/stringmethods.py 
b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -608,7 +608,7 @@
                 eol = pos
             strs.append(value[sol:eol])
         if pos < length:
-            # XXX is this code reachable ever?
+            # XXX is this code reachable?
             strs.append(value[pos:length])
         return self._newlist_unwrapped(space, strs)
 
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -4,7 +4,7 @@
     compute_hash, compute_unique_id, import_from_mixin,
     enforceargs, newlist_hint)
 from rpython.rlib.buffer import StringBuffer
-from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
+from rpython.rlib.rstring import StringBuilder, split, rsplit
 from rpython.rlib.runicode import (
     make_unicode_escape_function, str_decode_ascii, str_decode_utf_8,
     unicode_encode_ascii, unicode_encode_utf_8, fast_str_decode_ascii)
@@ -120,6 +120,8 @@
         return W_UnicodeObject.EMPTY
 
     def _len(self):
+        if self._length == -1:
+            self._length = self._compute_length()
         return self._length
 
     _val = utf8_w
@@ -438,13 +440,13 @@
                 return space.w_NotImplemented
             raise
         return W_UnicodeObject(self._utf8 + w_other._utf8,
-                               self._length + w_other._length)
+                               self._len() + w_other._len())
 
     @jit.look_inside_iff(lambda self, space, list_w, size:
                          jit.loop_unrolling_heuristic(list_w, size))
     def _str_join_many_items(self, space, list_w, size):
         value = self._utf8
-        lgt = self._length * (size - 1)
+        lgt = self._len() * (size - 1)
 
         prealloc_size = len(value) * (size - 1)
         unwrapped = newlist_hint(size)
@@ -502,10 +504,10 @@
         selfval = self._val(space)
         if len(selfval) == 0:
             return self._new(self._multi_chr(self._chr('0')) * width, width)
-        num_zeros = width - self._length
+        num_zeros = width - self._len()
         if num_zeros <= 0:
             # cannot return self, in case it is a subclass of str
-            return self._new(selfval, self._length)
+            return self._new(selfval, self._len())
         builder = self._builder(num_zeros + len(selfval))
         if len(selfval) > 0 and (selfval[0] == '+' or selfval[0] == '-'):
             # copy sign to first position
@@ -517,6 +519,36 @@
         builder.append_slice(selfval, start, len(selfval))
         return self._new(builder.build(), width)
 
+    @unwrap_spec(maxsplit=int)
+    def descr_split(self, space, w_sep=None, maxsplit=-1):
+        # XXX maybe optimize?
+        res = []
+        value = self._val(space)
+        if space.is_none(w_sep):
+            res = split(value, maxsplit=maxsplit)
+            return space.newlist([W_UnicodeObject(s, -1) for s in res])
+
+        by = self._op_val(space, w_sep)
+        if len(by) == 0:
+            raise oefmt(space.w_ValueError, "empty separator")
+        res = split(value, by, maxsplit)
+
+        return space.newlist([W_UnicodeObject(s, -1) for s in res])
+
+    @unwrap_spec(maxsplit=int)
+    def descr_rsplit(self, space, w_sep=None, maxsplit=-1):
+        res = []
+        value = self._val(space)
+        if space.is_none(w_sep):
+            res = rsplit(value, maxsplit=maxsplit)
+            return space.newlist([W_UnicodeObject(s, -1) for s in res])
+
+        by = self._op_val(space, w_sep)
+        if len(by) == 0:
+            raise oefmt(space.w_ValueError, "empty separator")
+        res = rsplit(value, by, maxsplit)
+
+        return space.newlist([W_UnicodeObject(s, -1) for s in res])
 
 def wrapunicode(space, uni):
     return W_UnicodeObject(uni)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to