Author: fijal Branch: unicode-utf8 Changeset: r90460:8f690010d092 Date: 2017-03-01 19:16 +0100 http://bitbucket.org/pypy/pypy/changeset/8f690010d092/
Log: fix fix fix diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -42,15 +42,6 @@ """representation for debugging purposes""" return "%s(%r)" % (self.__class__.__name__, self._utf8) - def unwrap(self, space): - # for testing - return self._value - - def create_if_subclassed(self): - if type(self) is W_UnicodeObject: - return self - return W_UnicodeObject(self._value) - def is_w(self, space, w_other): if not isinstance(w_other, W_UnicodeObject): return False @@ -103,6 +94,7 @@ charbuf_w = str_w def listview_unicode(self): + XXX # fix at some point return _create_list_from_unicode(self._value) def ord(self, space): @@ -130,6 +122,8 @@ return rutf8.compute_length_utf8(self._utf8) def _val(self, space): + import pdb + pdb.set_trace() return self._utf8.decode('utf8') @staticmethod @@ -534,11 +528,10 @@ @unwrap_spec(maxsplit=int) def descr_split(self, space, w_sep=None, maxsplit=-1): - # XXX maybe optimize? res = [] value = self._utf8 if space.is_none(w_sep): - res = split(value, maxsplit=maxsplit) + res = split(value, maxsplit=maxsplit, isutf8=1) return space.newlist_from_unicode(res) by = self.convert_arg_to_w_unicode(space, w_sep)._utf8 @@ -582,6 +575,12 @@ return W_UnicodeObject(centered, self._len() + d) + def descr_contains(self, space, w_sub): + value = self._utf8 + w_other = self.convert_arg_to_w_unicode(space, w_sub) + return space.newbool(value.find(w_other._utf8) >= 0) + + def wrapunicode(space, uni): return W_UnicodeObject(uni) diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -16,17 +16,31 @@ # -------------- public API for string functions ----------------------- -@specialize.argtype(0) -def _isspace(char): - if isinstance(char, str): - return char.isspace() +@specialize.ll_and_arg(2) +def _isspace(s, pos, isutf8=0): + if isutf8: + from rpython.rlib import rutf8 + char = rutf8.codepoint_at_pos(s, pos) + return unicodedb.isspace(char) else: - assert isinstance(char, unicode) - return unicodedb.isspace(ord(char)) + char = s[pos] + if isinstance(char, str): + return char.isspace() + else: + assert isinstance(char, unicode) + return unicodedb.isspace(ord(char)) +@specialize.arg(2) +def _incr(s, pos, isutf8): + from rpython.rlib.rutf8 import next_codepoint_pos -@specialize.argtype(0, 1) -def split(value, by=None, maxsplit=-1): + if isutf8: + return next_codepoint_pos(s, pos) + else: + return pos + 1 + +@specialize.ll_and_arg(3) +def split(value, by=None, maxsplit=-1, isutf8=0): if by is None: length = len(value) i = 0 @@ -34,9 +48,9 @@ while True: # find the beginning of the next word while i < length: - if not _isspace(value[i]): + if not _isspace(value, i, isutf8): break # found - i += 1 + i = _incr(value, i, isutf8) else: break # end of string, finished @@ -44,16 +58,19 @@ if maxsplit == 0: j = length # take all the rest of the string else: - j = i + 1 - while j < length and not _isspace(value[j]): - j += 1 + j = _incr(value, i, isutf8) + while j < length and not _isspace(value, j, isutf8): + j = _incr(value, j, isutf8) maxsplit -= 1 # NB. if it's already < 0, it stays < 0 # the word is value[i:j] res.append(value[i:j]) # continue to look from the character following the space after the word - i = j + 1 + if j < length: + i = _incr(value, j, isutf8) + else: + break return res if isinstance(value, unicode): @@ -66,6 +83,8 @@ bylen = len(by) if bylen == 0: raise ValueError("empty separator") + # XXX measure if preallocating the result list to the correct + # size is faster, should be start = 0 if bylen == 1: @@ -102,8 +121,8 @@ return res -@specialize.argtype(0, 1) -def rsplit(value, by=None, maxsplit=-1): +@specialize.ll_and_arg(3) +def rsplit(value, by=None, maxsplit=-1, isutf8=0): if by is None: res = [] diff --git a/rpython/rtyper/rpbc.py b/rpython/rtyper/rpbc.py --- a/rpython/rtyper/rpbc.py +++ b/rpython/rtyper/rpbc.py @@ -1134,6 +1134,8 @@ self.lowleveltype = self.r_im_self.lowleveltype def convert_const(self, method): + if method is None: + return nullptr(self.lowleveltype.TO) if getattr(method, 'im_func', None) is None: raise TyperError("not a bound method: %r" % method) return self.r_im_self.convert_const(method.im_self) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit