Author: fijal
Branch: unicode-utf8
Changeset: r90460:8f690010d092
Date: 2017-03-01 19:16 +0100
http://bitbucket.org/pypy/pypy/changeset/8f690010d092/
Log: fix fix fix
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -42,15 +42,6 @@
"""representation for debugging purposes"""
return "%s(%r)" % (self.__class__.__name__, self._utf8)
- def unwrap(self, space):
- # for testing
- return self._value
-
- def create_if_subclassed(self):
- if type(self) is W_UnicodeObject:
- return self
- return W_UnicodeObject(self._value)
-
def is_w(self, space, w_other):
if not isinstance(w_other, W_UnicodeObject):
return False
@@ -103,6 +94,7 @@
charbuf_w = str_w
def listview_unicode(self):
+ XXX # fix at some point
return _create_list_from_unicode(self._value)
def ord(self, space):
@@ -130,6 +122,8 @@
return rutf8.compute_length_utf8(self._utf8)
def _val(self, space):
+ import pdb
+ pdb.set_trace()
return self._utf8.decode('utf8')
@staticmethod
@@ -534,11 +528,10 @@
@unwrap_spec(maxsplit=int)
def descr_split(self, space, w_sep=None, maxsplit=-1):
- # XXX maybe optimize?
res = []
value = self._utf8
if space.is_none(w_sep):
- res = split(value, maxsplit=maxsplit)
+ res = split(value, maxsplit=maxsplit, isutf8=1)
return space.newlist_from_unicode(res)
by = self.convert_arg_to_w_unicode(space, w_sep)._utf8
@@ -582,6 +575,12 @@
return W_UnicodeObject(centered, self._len() + d)
+ def descr_contains(self, space, w_sub):
+ value = self._utf8
+ w_other = self.convert_arg_to_w_unicode(space, w_sub)
+ return space.newbool(value.find(w_other._utf8) >= 0)
+
+
def wrapunicode(space, uni):
return W_UnicodeObject(uni)
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -16,17 +16,31 @@
# -------------- public API for string functions -----------------------
[email protected](0)
-def _isspace(char):
- if isinstance(char, str):
- return char.isspace()
[email protected]_and_arg(2)
+def _isspace(s, pos, isutf8=0):
+ if isutf8:
+ from rpython.rlib import rutf8
+ char = rutf8.codepoint_at_pos(s, pos)
+ return unicodedb.isspace(char)
else:
- assert isinstance(char, unicode)
- return unicodedb.isspace(ord(char))
+ char = s[pos]
+ if isinstance(char, str):
+ return char.isspace()
+ else:
+ assert isinstance(char, unicode)
+ return unicodedb.isspace(ord(char))
[email protected](2)
+def _incr(s, pos, isutf8):
+ from rpython.rlib.rutf8 import next_codepoint_pos
[email protected](0, 1)
-def split(value, by=None, maxsplit=-1):
+ if isutf8:
+ return next_codepoint_pos(s, pos)
+ else:
+ return pos + 1
+
[email protected]_and_arg(3)
+def split(value, by=None, maxsplit=-1, isutf8=0):
if by is None:
length = len(value)
i = 0
@@ -34,9 +48,9 @@
while True:
# find the beginning of the next word
while i < length:
- if not _isspace(value[i]):
+ if not _isspace(value, i, isutf8):
break # found
- i += 1
+ i = _incr(value, i, isutf8)
else:
break # end of string, finished
@@ -44,16 +58,19 @@
if maxsplit == 0:
j = length # take all the rest of the string
else:
- j = i + 1
- while j < length and not _isspace(value[j]):
- j += 1
+ j = _incr(value, i, isutf8)
+ while j < length and not _isspace(value, j, isutf8):
+ j = _incr(value, j, isutf8)
maxsplit -= 1 # NB. if it's already < 0, it stays < 0
# the word is value[i:j]
res.append(value[i:j])
# continue to look from the character following the space after
the word
- i = j + 1
+ if j < length:
+ i = _incr(value, j, isutf8)
+ else:
+ break
return res
if isinstance(value, unicode):
@@ -66,6 +83,8 @@
bylen = len(by)
if bylen == 0:
raise ValueError("empty separator")
+ # XXX measure if preallocating the result list to the correct
+ # size is faster, should be
start = 0
if bylen == 1:
@@ -102,8 +121,8 @@
return res
[email protected](0, 1)
-def rsplit(value, by=None, maxsplit=-1):
[email protected]_and_arg(3)
+def rsplit(value, by=None, maxsplit=-1, isutf8=0):
if by is None:
res = []
diff --git a/rpython/rtyper/rpbc.py b/rpython/rtyper/rpbc.py
--- a/rpython/rtyper/rpbc.py
+++ b/rpython/rtyper/rpbc.py
@@ -1134,6 +1134,8 @@
self.lowleveltype = self.r_im_self.lowleveltype
def convert_const(self, method):
+ if method is None:
+ return nullptr(self.lowleveltype.TO)
if getattr(method, 'im_func', None) is None:
raise TyperError("not a bound method: %r" % method)
return self.r_im_self.convert_const(method.im_self)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit