Author: aliles Branch: Changeset: r53539:ab3326dd1a85 Date: 2012-03-13 21:45 -0700 http://bitbucket.org/pypy/pypy/changeset/ab3326dd1a85/
Log: Pull shared logic between str, unicode and bytearray in base classes. diff --git a/pypy/objspace/std/abstractstring.py b/pypy/objspace/std/abstractstring.py --- a/pypy/objspace/std/abstractstring.py +++ b/pypy/objspace/std/abstractstring.py @@ -2,6 +2,60 @@ from pypy.rlib.objectmodel import specialize +class Mixin_BaseStringMethods(object): + __slots__ = () + + def isalnum(w_self, space): + return w_self._all_true(space, w_self._isalnum) + + def isalpha(w_self, space): + return w_self._all_true(space, w_self._isalpha) + + def isdigit(w_self, space): + return w_self._all_true(space, w_self._isdigit) + + def islower(w_self, space): + return w_self._none_false_one_true(space, + w_self._islower, w_self._isupper) + + def isspace(w_self, space): + return w_self._all_true(space, w_self._isspace) + + def isupper(w_self, space): + return w_self._none_false_one_true(space, + w_self._isupper, w_self._islower) + + def istitle(w_self, space): + return w_self._title(space) + + +class AbstractCharIterator(object): + + def __init__(self, sequence): + self.sequence = sequence + self.pos = 0 + + def __len__(self): + return len(self.sequence) + + def __iter__(self): + return self + + def next(self): + ch = self.nextchar() + if ch is None: + raise StopIteration + return ch + + # XXX deprecate nextchar() method + def nextchar(self): + if self.pos >= len(self): + return None + idx = self.pos + self.pos += 1 + return self.sequence[idx] + + class W_AbstractBaseStringObject(W_Object): __slots__ = () @@ -9,6 +63,11 @@ """ representation for debugging purposes """ return "%s(%r)" % (w_self.__class__.__name__, w_self.raw_value()) + def immutable_unique_id(w_self, space): + if w_self.user_overridden_class: + return None + return space.wrap(compute_unique_id(w_self.unwrap(space))) + def is_w(self, space, w_other): if not isinstance(w_other, W_AbstractBaseStringObject): return False @@ -18,32 +77,75 @@ return False return self.unwrap(space) is w_other.unwrap(space) - def immutable_unique_id(w_self, space): - if w_self.user_overridden_class: - return None - return space.wrap(compute_unique_id(w_self.unwrap(space))) + def iterator(w_self, space): + return AbstractCharIterator(w_self.unwrap(space)) + + def length(w_self, space): + return len(w_self.unwrap(space)) def raw_value(w_self): raise NotImplemented("method not implemented") - def unwrap(w_self, space): - raise NotImplemented("method not implemented") - def str_w(w_self, space): raise NotImplemented("method not implemented") def unicode_w(w_self, space): raise NotImplemented("method not implemented") + def unwrap(w_self, space): + raise NotImplemented("method not implemented") -@specialize.arg(2) -def is_generic(space, w_self, fun): - v = w_self._value - if len(v) == 0: - return space.w_False - if len(v) == 1: - return space.newbool(fun(v[0])) - for idx in range(len(v)): - if not fun(v[idx]): + @specialize.arg(2) + def _all_true(w_self, space, func): + """Test all elements of a list with func for True. + Returns True only if all elements test True.""" + size = w_self.length(space) + it = w_self.iterator(space) + if size == 0: return space.w_False - return space.w_True + if size == 1: + return space.newbool(func(it.nextchar())) + # not all it objects will support iterator protocol, eg rope + for pos in range(size): + ch = it.nextchar() + if not func(ch): + return space.w_False + return space.w_True + + @specialize.arg(2, 3) + def _none_false_one_true(w_self, space, pred, inverse): + """Test all elements against predicate and inverse. + Returns True only if all elements fail inverse and at least one + element passes predicate.""" + v = w_self.unwrap(space) + if len(v) == 1: + c = v[0] + return space.newbool(pred(c)) + status = False + for idx in range(len(v)): + if inverse(v[idx]): + return space.w_False + elif not status and pred(v[idx]): + status = True + return space.newbool(status) + + def _title(w_self, space): + input = w_self.unwrap(space) + cased = False + previous_is_cased = False + + for pos in range(0, len(input)): + ch = input[pos] + if w_self._isupper(ch): + if previous_is_cased: + return space.w_False + previous_is_cased = True + cased = True + elif w_self._islower(ch): + if not previous_is_cased: + return space.w_False + cased = True + else: + previous_is_cased = False + + return space.newbool(cased) diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py --- a/pypy/objspace/std/bytearrayobject.py +++ b/pypy/objspace/std/bytearrayobject.py @@ -19,22 +19,39 @@ from pypy.interpreter import gateway from pypy.interpreter.argument import Signature from pypy.interpreter.buffer import RWBuffer +from pypy.objspace.std.abstractstring import \ + W_AbstractBaseStringObject, Mixin_BaseStringMethods from pypy.objspace.std.bytearraytype import ( makebytearraydata_w, getbytevalue, new_bytearray ) -from pypy.tool.sourcetools import func_with_new_name -class W_BytearrayObject(W_Object): +class Mixin_BytearrayMethods(Mixin_BaseStringMethods): + __slots__ = () + + +class W_AbstractBytearrayObject(stringobject.W_AbstractStringObject): + __slots__ = () + + +class W_BytearrayObject(W_AbstractBytearrayObject, Mixin_BytearrayMethods): from pypy.objspace.std.bytearraytype import bytearray_typedef as typedef def __init__(w_self, data): w_self.data = data - def __repr__(w_self): - """ representation for debugging purposes """ - return "%s(%s)" % (w_self.__class__.__name__, ''.join(w_self.data)) + def raw_value(w_self): + return w_self.data + + def str_w(w_self, space): + return w_self.data + + def unicode_w(w_self, space): + # XXX should this use the default encoding? + from pypy.objspace.std.unicodetype import plain_str2unicode + return plain_str2unicode(space, w_self.data) + registerimplementation(W_BytearrayObject) @@ -279,6 +296,27 @@ def str__Bytearray(space, w_bytearray): return space.wrap(''.join(w_bytearray.data)) +def str_isalnum__Bytearray(space, w_self): + return w_self.isalnum(space) + +def str_isalpha__Bytearray(space, w_self): + return w_self.isalpha(space) + +def str_isdigit__Bytearray(space, w_self): + return w_self.isdigit(space) + +def str_islower__Bytearray(space, w_self): + return w_self.islower(space) + +def str_isspace__Bytearray(space, w_self): + return w_self.isspace(space) + +def str_istitle__Bytearray(space, w_self): + return w_self.istitle(space) + +def str_isupper__Bytearray(space, w_self): + return w_self.isupper(space) + def str_count__Bytearray_Int_ANY_ANY(space, w_bytearray, w_char, w_start, w_stop): char = w_char.intval bytearray = w_bytearray.data @@ -372,34 +410,6 @@ w_str = str__Bytearray(space, w_bytearray) return stringobject.str_decode__String_ANY_ANY(space, w_str, w_encoding, w_errors) -def str_islower__Bytearray(space, w_bytearray): - w_str = str__Bytearray(space, w_bytearray) - return stringobject.str_islower__String(space, w_str) - -def str_isupper__Bytearray(space, w_bytearray): - w_str = str__Bytearray(space, w_bytearray) - return stringobject.str_isupper__String(space, w_str) - -def str_isalpha__Bytearray(space, w_bytearray): - w_str = str__Bytearray(space, w_bytearray) - return stringobject.str_isalpha__String(space, w_str) - -def str_isalnum__Bytearray(space, w_bytearray): - w_str = str__Bytearray(space, w_bytearray) - return stringobject.str_isalnum__String(space, w_str) - -def str_isdigit__Bytearray(space, w_bytearray): - w_str = str__Bytearray(space, w_bytearray) - return stringobject.str_isdigit__String(space, w_str) - -def str_istitle__Bytearray(space, w_bytearray): - w_str = str__Bytearray(space, w_bytearray) - return stringobject.str_istitle__String(space, w_str) - -def str_isspace__Bytearray(space, w_bytearray): - w_str = str__Bytearray(space, w_bytearray) - return stringobject.str_isspace__String(space, w_str) - def bytearray_insert__Bytearray_Int_ANY(space, w_bytearray, w_idx, w_other): where = space.int_w(w_idx) length = len(w_bytearray.data) diff --git a/pypy/objspace/std/ropeobject.py b/pypy/objspace/std/ropeobject.py --- a/pypy/objspace/std/ropeobject.py +++ b/pypy/objspace/std/ropeobject.py @@ -17,9 +17,10 @@ from pypy.objspace.std.stringobject import ( mod__String_ANY as mod__Rope_ANY, str_format__String as str_format__Rope, - _upper, _lower, DEFAULT_NOOP_TABLE) + DEFAULT_NOOP_TABLE) -class W_RopeObject(stringobject.W_AbstractStringObject): +class W_RopeObject(stringobject.W_AbstractStringObject, + stringobject.Mixin_StringMethods): from pypy.objspace.std.stringtype import str_typedef as typedef _immutable_fields_ = ['_node'] @@ -28,6 +29,12 @@ assert node.is_bytestring() w_self._node = node + def iterator(w_self, space): + return rope.ItemIterator(w_self._node) + + def length(w_self, space): + return w_self._node.length() + def raw_value(w_self): return w_self._node @@ -67,92 +74,34 @@ registerimplementation(W_RopeIterObject) -def _is_generic(space, w_self, fun): - l = w_self._node.length() - if l == 0: - return space.w_False - iter = rope.ItemIterator(w_self._node) - for i in range(l): - if not fun(iter.nextchar()): - return space.w_False - return space.w_True -_is_generic._annspecialcase_ = "specialize:arg(2)" - -_isspace = lambda c: c.isspace() -_isdigit = lambda c: c.isdigit() -_isalpha = lambda c: c.isalpha() -_isalnum = lambda c: c.isalnum() - def str_isspace__Rope(space, w_self): - return _is_generic(space, w_self, _isspace) + return w_self.isspace(space) def str_isdigit__Rope(space, w_self): - return _is_generic(space, w_self, _isdigit) + return w_self.isdigit(space) def str_isalpha__Rope(space, w_self): - return _is_generic(space, w_self, _isalpha) + return w_self.isalpha(space) def str_isalnum__Rope(space, w_self): - return _is_generic(space, w_self, _isalnum) + return w_self.isalnum(space) def str_isupper__Rope(space, w_self): """Return True if all cased characters in S are uppercase and there is at least one cased character in S, False otherwise.""" - l = w_self._node.length() - - if l == 0: - return space.w_False - cased = False - iter = rope.ItemIterator(w_self._node) - for idx in range(l): - c = iter.nextchar() - if c.islower(): - return space.w_False - elif not cased and c.isupper(): - cased = True - return space.newbool(cased) + return w_self.isupper(space) def str_islower__Rope(space, w_self): """Return True if all cased characters in S are lowercase and there is at least one cased character in S, False otherwise.""" - l = w_self._node.length() - - if l == 0: - return space.w_False - cased = False - iter = rope.ItemIterator(w_self._node) - for idx in range(l): - c = iter.nextchar() - if c.isupper(): - return space.w_False - elif not cased and c.islower(): - cased = True - return space.newbool(cased) + return w_self.islower(space) def str_istitle__Rope(space, w_self): """Return True if S is a titlecased string and there is at least one character in S, i.e. uppercase characters may only follow uncased characters and lowercase characters only cased ones. Return False otherwise.""" - cased = False - previous_is_cased = False - - iter = rope.ItemIterator(w_self._node) - for pos in range(0, w_self._node.length()): - ch = iter.nextchar() - if ch.isupper(): - if previous_is_cased: - return space.w_False - previous_is_cased = True - cased = True - elif ch.islower(): - if not previous_is_cased: - return space.w_False - cased = True - else: - previous_is_cased = False - - return space.newbool(cased) + return w_self.istitle(space) def _local_transform(node, transform): l = node.length() @@ -166,24 +115,13 @@ _local_transform._annspecialcase_ = "specialize:arg(1)" def str_upper__Rope(space, w_self): - return _local_transform(w_self._node, _upper) + return _local_transform(w_self._node, w_self._upper) def str_lower__Rope(space, w_self): - return _local_transform(w_self._node, _lower) - -def _swapcase(ch): - if ch.isupper(): - o = ord(ch) + 32 - return chr(o) - elif ch.islower(): - o = ord(ch) - 32 - return chr(o) - else: - return ch + return _local_transform(w_self._node, w_self._lower) def str_swapcase__Rope(space, w_self): - return _local_transform(w_self._node, _swapcase) - + return _local_transform(w_self._node, w_self._swapcase) def str_capitalize__Rope(space, w_self): node = w_self._node @@ -221,9 +159,9 @@ for pos in range(0, length): ch = iter.nextchar() if not prev_letter.isalpha(): - buffer[pos] = _upper(ch) + buffer[pos] = w_self._upper(ch) else: - buffer[pos] = _lower(ch) + buffer[pos] = w_self._lower(ch) prev_letter = buffer[pos] diff --git a/pypy/objspace/std/ropeunicodeobject.py b/pypy/objspace/std/ropeunicodeobject.py --- a/pypy/objspace/std/ropeunicodeobject.py +++ b/pypy/objspace/std/ropeunicodeobject.py @@ -14,7 +14,6 @@ from pypy.objspace.std.tupleobject import W_TupleObject from pypy.rlib.rarithmetic import intmask, ovfcheck from pypy.module.unicodedata import unicodedb -from pypy.tool.sourcetools import func_with_new_name from pypy.objspace.std.formatting import mod_format @@ -84,6 +83,12 @@ def __init__(w_self, node): w_self._node = node + def iterator(w_self, space): + return rope.ItemIterator(w_self._node) + + def length(w_self, space): + return w_self._node.length() + def raw_value(w_self): return w_self._node @@ -102,9 +107,6 @@ registerimplementation(W_RopeUnicodeObject) -def _isspace(uchar_ord): - return unicodedb.isspace(uchar_ord) - def ropeunicode_w(space, w_str): if isinstance(w_str, W_RopeUnicodeObject): return w_str._node @@ -314,81 +316,38 @@ def mul__ANY_RopeUnicode(space, w_times, w_uni): return mul__RopeUnicode_ANY(space, w_uni, w_times) +def unicode_isspace__RopeUnicode(space, w_self): + return w_self.isspace(space) -def make_generic(funcname): - def func(space, w_self): - node = w_self._node - if node.length() == 0: - return space.w_False - iter = rope.ItemIterator(node) - for idx in range(node.length()): - if not getattr(unicodedb, funcname)(iter.nextint()): - return space.w_False - return space.w_True - return func_with_new_name(func, "unicode_%s__RopeUnicode" % (funcname, )) +def unicode_isalpha__RopeUnicode(space, w_self): + return w_self.isalpha(space) -unicode_isspace__RopeUnicode = make_generic("isspace") -unicode_isalpha__RopeUnicode = make_generic("isalpha") -unicode_isalnum__RopeUnicode = make_generic("isalnum") -unicode_isdecimal__RopeUnicode = make_generic("isdecimal") -unicode_isdigit__RopeUnicode = make_generic("isdigit") -unicode_isnumeric__RopeUnicode = make_generic("isnumeric") +def unicode_isalnum__RopeUnicode(space, w_self): + return w_self.isalnum(space) -def unicode_islower__RopeUnicode(space, w_unicode): - cased = False - iter = rope.ItemIterator(w_unicode._node) - while 1: - try: - ch = iter.nextint() - except StopIteration: - return space.newbool(cased) - if (unicodedb.isupper(ch) or - unicodedb.istitle(ch)): - return space.w_False - if not cased and unicodedb.islower(ch): - cased = True +def unicode_isdecimal__RopeUnicode(space, w_self): + return w_self.isdecimal(space) -def unicode_isupper__RopeUnicode(space, w_unicode): - cased = False - iter = rope.ItemIterator(w_unicode._node) - while 1: - try: - ch = iter.nextint() - except StopIteration: - return space.newbool(cased) - if (unicodedb.islower(ch) or - unicodedb.istitle(ch)): - return space.w_False - if not cased and unicodedb.isupper(ch): - cased = True +def unicode_isdigit__RopeUnicode(space, w_self): + return w_self.isdigit(space) -def unicode_istitle__RopeUnicode(space, w_unicode): - cased = False - previous_is_cased = False - iter = rope.ItemIterator(w_unicode._node) - while 1: - try: - ch = iter.nextint() - except StopIteration: - return space.newbool(cased) - if (unicodedb.isupper(ch) or - unicodedb.istitle(ch)): - if previous_is_cased: - return space.w_False - previous_is_cased = cased = True - elif unicodedb.islower(ch): - if not previous_is_cased: - return space.w_False - previous_is_cased = cased = True - else: - previous_is_cased = False +def unicode_isnumeric__RopeUnicode(space, w_self): + return w_self.isnumeric(space) +def unicode_islower__RopeUnicode(space, w_self): + return w_self.islower(space) + +def unicode_isupper__RopeUnicode(space, w_self): + return w_self.isupper(space) + +def unicode_istitle__RopeUnicode(space, w_self): + return w_self.istitle(space) def _contains(i, uni): return unichr(i) in uni def unicode_strip__RopeUnicode_None(space, w_self, w_chars): - return W_RopeUnicodeObject(rope.strip(w_self._node, True, True, _isspace)) + return W_RopeUnicodeObject(rope.strip(w_self._node, True, True, unicodedb.isspace)) def unicode_strip__RopeUnicode_RopeUnicode(space, w_self, w_chars): return W_RopeUnicodeObject(rope.strip(w_self._node, True, True, _contains, w_chars._node.flatten_unicode())) @@ -398,7 +357,7 @@ unicode_from_string(space, w_chars)) def unicode_lstrip__RopeUnicode_None(space, w_self, w_chars): - return W_RopeUnicodeObject(rope.strip(w_self._node, True, False, _isspace)) + return W_RopeUnicodeObject(rope.strip(w_self._node, True, False, unicodedb.isspace)) def unicode_lstrip__RopeUnicode_RopeUnicode(space, w_self, w_chars): return W_RopeUnicodeObject(rope.strip(w_self._node, True, False, _contains, w_chars._node.flatten_unicode())) @@ -407,7 +366,7 @@ unicode_from_string(space, w_chars)) def unicode_rstrip__RopeUnicode_None(space, w_self, w_chars): - return W_RopeUnicodeObject(rope.strip(w_self._node, False, True, _isspace)) + return W_RopeUnicodeObject(rope.strip(w_self._node, False, True, unicodedb.isspace)) def unicode_rstrip__RopeUnicode_RopeUnicode(space, w_self, w_chars): return W_RopeUnicodeObject(rope.strip(w_self._node, False, True, _contains, w_chars._node.flatten_unicode())) @@ -653,7 +612,7 @@ selfnode = w_self._node maxsplit = space.int_w(w_maxsplit) res_w = [W_RopeUnicodeObject(node) - for node in rope.split_chars(selfnode, maxsplit, _isspace)] + for node in rope.split_chars(selfnode, maxsplit, unicodedb.isspace)] return space.newlist(res_w) def unicode_split__RopeUnicode_RopeUnicode_ANY(space, w_self, w_delim, w_maxsplit): @@ -672,7 +631,7 @@ selfnode = w_self._node maxsplit = space.int_w(w_maxsplit) res_w = [W_RopeUnicodeObject(node) - for node in rope.rsplit_chars(selfnode, maxsplit, _isspace)] + for node in rope.rsplit_chars(selfnode, maxsplit, unicodedb.isspace)] return space.newlist(res_w) diff --git a/pypy/objspace/std/stringobject.py b/pypy/objspace/std/stringobject.py --- a/pypy/objspace/std/stringobject.py +++ b/pypy/objspace/std/stringobject.py @@ -15,20 +15,67 @@ from pypy.rlib.rstring import StringBuilder, split from pypy.interpreter.buffer import StringBuffer -from pypy.objspace.std.abstractstring import W_AbstractBaseStringObject, \ - is_generic +from pypy.objspace.std.abstractstring import \ + W_AbstractBaseStringObject, Mixin_BaseStringMethods from pypy.objspace.std.formatting import mod_format from pypy.objspace.std.stringtype import sliced, wrapstr, wrapchar, \ stringendswith, stringstartswith, joined2 + +class Mixin_StringMethods(Mixin_BaseStringMethods): + __slots__ = () + + class W_AbstractStringObject(W_AbstractBaseStringObject): __slots__ = () def unwrap(w_self, space): return w_self.str_w(space) + def _isalnum(self, ch): + return ch.isalnum() -class W_StringObject(W_AbstractStringObject): + def _isalpha(self, ch): + return ch.isalpha() + + def _isdigit(self, ch): + return ch.isdigit() + + def _islower(self, ch): + return ch.islower() + + def _isspace(self, ch): + return ch.isspace() + + def _isupper(self, ch): + return ch.isupper() + + def _lower(self, ch): + if ch.isupper(): + o = ord(ch) + 32 + return chr(o) + else: + return ch + + def _upper(self, ch): + if ch.islower(): + o = ord(ch) - 32 + return chr(o) + else: + return ch + + def _swapcase(self, ch): + if ch.isupper(): + o = ord(ch) + 32 + return chr(o) + elif ch.islower(): + o = ord(ch) - 32 + return chr(o) + else: + return ch + + +class W_StringObject(W_AbstractStringObject, Mixin_StringMethods): from pypy.objspace.std.stringtype import str_typedef as typedef _immutable_fields_ = ['_value'] @@ -46,96 +93,41 @@ from pypy.objspace.std.unicodetype import plain_str2unicode return plain_str2unicode(space, w_self._value) + registerimplementation(W_StringObject) W_StringObject.EMPTY = W_StringObject('') W_StringObject.PREBUILT = [W_StringObject(chr(i)) for i in range(256)] del i -def _upper(ch): - if ch.islower(): - o = ord(ch) - 32 - return chr(o) - else: - return ch +def str_isalnum__String(space, w_self): + return w_self.isalnum(space) -def _lower(ch): - if ch.isupper(): - o = ord(ch) + 32 - return chr(o) - else: - return ch +def str_isalpha__String(space, w_self): + return w_self.isalpha(space) + +def str_isdigit__String(space, w_self): + return w_self.isdigit(space) def str_isspace__String(space, w_self): - isspace = lambda c: c.isspace() - return is_generic(space, w_self, isspace) + return w_self.isspace(space) -def str_isdigit__String(space, w_self): - isdigit = lambda c: c.isdigit() - return is_generic(space, w_self, isdigit) - -def str_isalpha__String(space, w_self): - isalpha = lambda c: c.isalpha() - return is_generic(space, w_self, isalpha) - -def str_isalnum__String(space, w_self): - isalnum = lambda c: c.isalnum() - return is_generic(space, w_self, isalnum) +def str_islower__String(space, w_self): + """Return True if all cased characters in S are lowercase and there is +at least one cased character in S, False otherwise.""" + return w_self.islower(space) def str_isupper__String(space, w_self): """Return True if all cased characters in S are uppercase and there is at least one cased character in S, False otherwise.""" - v = w_self._value - if len(v) == 1: - c = v[0] - return space.newbool(c.isupper()) - cased = False - for idx in range(len(v)): - if v[idx].islower(): - return space.w_False - elif not cased and v[idx].isupper(): - cased = True - return space.newbool(cased) - -def str_islower__String(space, w_self): - """Return True if all cased characters in S are lowercase and there is -at least one cased character in S, False otherwise.""" - v = w_self._value - if len(v) == 1: - c = v[0] - return space.newbool(c.islower()) - cased = False - for idx in range(len(v)): - if v[idx].isupper(): - return space.w_False - elif not cased and v[idx].islower(): - cased = True - return space.newbool(cased) + return w_self.isupper(space) def str_istitle__String(space, w_self): """Return True if S is a titlecased string and there is at least one character in S, i.e. uppercase characters may only follow uncased characters and lowercase characters only cased ones. Return False otherwise.""" - input = w_self._value - cased = False - previous_is_cased = False - - for pos in range(0, len(input)): - ch = input[pos] - if ch.isupper(): - if previous_is_cased: - return space.w_False - previous_is_cased = True - cased = True - elif ch.islower(): - if not previous_is_cased: - return space.w_False - cased = True - else: - previous_is_cased = False - - return space.newbool(cased) + return w_self.istitle(space) def str_upper__String(space, w_self): self = w_self._value @@ -150,18 +142,10 @@ builder = StringBuilder(len(self)) for i in range(len(self)): ch = self[i] - if ch.isupper(): - o = ord(ch) + 32 - builder.append(chr(o)) - elif ch.islower(): - o = ord(ch) - 32 - builder.append(chr(o)) - else: - builder.append(ch) + builder.append(w_self._swapcase(ch)) return space.wrap(builder.build()) - def str_capitalize__String(space, w_self): input = w_self._value builder = StringBuilder(len(input)) @@ -191,10 +175,10 @@ for pos in range(len(input)): ch = input[pos] if not prev_letter.isalpha(): - ch = _upper(ch) + ch = w_self._upper(ch) builder.append(ch) else: - ch = _lower(ch) + ch = w_self._lower(ch) builder.append(ch) prev_letter = ch diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -15,18 +15,61 @@ from pypy.rlib.rstring import UnicodeBuilder from pypy.rlib.runicode import unicode_encode_unicode_escape from pypy.module.unicodedata import unicodedb -from pypy.tool.sourcetools import func_with_new_name -from pypy.objspace.std.abstractstring import W_AbstractBaseStringObject +from pypy.objspace.std.abstractstring import \ + W_AbstractBaseStringObject, Mixin_BaseStringMethods from pypy.objspace.std.formatting import mod_format from pypy.objspace.std.stringtype import stringstartswith, stringendswith -class W_AbstractUnicodeObject(W_AbstractBaseStringObject): + +class Mixin_UnicodeMethods(Mixin_BaseStringMethods): + __slows__ = () + + def isdecimal(w_self, space): + return w_self._all_true(space, w_self._isdecimal) + + +class W_AbstractUnicodeObject(W_AbstractBaseStringObject, Mixin_UnicodeMethods): __slots__ = () def unwrap(w_self, space): return w_self.unicode_w(space) + def _isalnum(self, ch): + return unicodedb.isalnum(ord(ch)) + + def _isalpha(self, ch): + return unicodedb.isalpha(ord(ch)) + + def _isdigit(self, ch): + return unicodedb.isdigit(ord(ch)) + + def _isdecimal(self, ch): + return unicodedb.isdecimal(ord(ch)) + + def _islower(self, ch): + return unicodedb.islower(ord(ch)) + + def _isspace(self, ch): + return unicodedb.isspace(ord(ch)) + + def _isupper(self, ch): + return unicodedb.isupper(ord(ch)) + + def _lower(self, ch): + return unichr(unicodedb.tolower(ord(ch))) + + def _upper(self, ch): + return unichr(unicodedb.toupper(ord(ch))) + + def _swapcase(self, ch): + if unicodedb.islower(ch): + return unichr(unicodedb.toupper(ord(ch))) + elif unicodedb.isupper(ch): + return unichr(unicodedb.tolower(ord(ch))) + else: + return ch + class W_UnicodeObject(W_AbstractUnicodeObject): from pypy.objspace.std.unicodetype import unicode_typedef as typedef @@ -50,6 +93,7 @@ return w_self return W_UnicodeObject(w_self._value) + W_UnicodeObject.EMPTY = W_UnicodeObject(u'') registerimplementation(W_UnicodeObject) @@ -60,6 +104,7 @@ raise operationerrfmt(space.w_TypeError, "expected unicode, got '%s'", space.type(w_unistr).getname(space)) + # XXX remove direct use of _value unistr = w_unistr._value result = ['\0'] * len(unistr) digits = [ '0', '1', '2', '3', '4', @@ -287,63 +332,32 @@ def mul__ANY_Unicode(space, w_times, w_uni): return mul__Unicode_ANY(space, w_uni, w_times) -def _isspace(uchar): - return unicodedb.isspace(ord(uchar)) - def unicode_isspace__Unicode(space, w_self): - return is_generic(space, w_self, unicodedb.isspace) + return w_self.isspace(space) def unicode_isalpha__Unicode(space, w_self): - return is_generic(space, w_self, unicodedb.isalpha) + return w_self.isalpha(space) def unicode_isalnum__Unicode(space, w_self): - return is_generic(space, w_self, unicodedb.isalnum) + return w_self.isalnum(space) def unicode_isdecimal__Unicode(space, w_self): - return is_generic(space, w_self, unicodedb.isdecimal) + return w_self.isdecimal(space) def unicode_isdigit__Unicode(space, w_self): - return is_generic(space, w_self, unicodedb.isdigit) + return w_self.isdigit(space) def unicode_isnumeric__Unicode(space, w_self): - return is_generic(space, w_self, unicodedb.isnumeric) + return w_self.isnumeric(space) -def unicode_islower__Unicode(space, w_unicode): - cased = False - for uchar in w_unicode._value: - if (unicodedb.isupper(ord(uchar)) or - unicodedb.istitle(ord(uchar))): - return space.w_False - if not cased and unicodedb.islower(ord(uchar)): - cased = True - return space.newbool(cased) +def unicode_islower__Unicode(space, w_self): + return w_self.islower(space) -def unicode_isupper__Unicode(space, w_unicode): - cased = False - for uchar in w_unicode._value: - if (unicodedb.islower(ord(uchar)) or - unicodedb.istitle(ord(uchar))): - return space.w_False - if not cased and unicodedb.isupper(ord(uchar)): - cased = True - return space.newbool(cased) +def unicode_isupper__Unicode(space, w_self): + return w_self.isupper(space) -def unicode_istitle__Unicode(space, w_unicode): - cased = False - previous_is_cased = False - for uchar in w_unicode._value: - if (unicodedb.isupper(ord(uchar)) or - unicodedb.istitle(ord(uchar))): - if previous_is_cased: - return space.w_False - previous_is_cased = cased = True - elif unicodedb.islower(ord(uchar)): - if not previous_is_cased: - return space.w_False - previous_is_cased = cased = True - else: - previous_is_cased = False - return space.newbool(cased) +def unicode_istitle__Unicode(space, w_self): + return w_self.istitle(space) def _strip(space, w_self, w_chars, left, right): "internal function called by str_xstrip methods" @@ -373,11 +387,11 @@ rpos = len(u_self) if left: - while lpos < rpos and _isspace(u_self[lpos]): + while lpos < rpos and w_self._isspace(u_self[lpos]): lpos += 1 if right: - while rpos > lpos and _isspace(u_self[rpos - 1]): + while rpos > lpos and w_self._isspace(u_self[rpos - 1]): rpos -= 1 assert rpos >= 0 @@ -651,7 +665,7 @@ while True: # find the beginning of the next word while i < length: - if not _isspace(value[i]): + if not w_self._isspace(value[i]): break # found i += 1 else: @@ -662,7 +676,7 @@ j = length # take all the rest of the string else: j = i + 1 - while j < length and not _isspace(value[j]): + while j < length and not w_self._isspace(value[j]): j += 1 maxsplit -= 1 # NB. if it's already < 0, it stays < 0 @@ -694,7 +708,7 @@ while True: # starting from the end, find the end of the next word while i >= 0: - if not _isspace(value[i]): + if not w_self._isspace(value[i]): break # found i -= 1 else: @@ -706,7 +720,7 @@ j = -1 # take all the rest of the string else: j = i - 1 - while j >= 0 and not _isspace(value[j]): + while j >= 0 and not w_self._isspace(value[j]): j -= 1 maxsplit -= 1 # NB. if it's already < 0, it stays < 0 _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit