Author: Amaury Forgeot d'Arc <amaur...@gmail.com> Branch: py3k Changeset: r49979:5a8c73ef4e8f Date: 2011-11-29 23:25 +0100 http://bitbucket.org/pypy/pypy/changeset/5a8c73ef4e8f/
Log: (chronitis) Add bytes.maketrans, and allow buffer-compatible objects to be used as arguments of most bytes methods diff --git a/pypy/objspace/std/stringobject.py b/pypy/objspace/std/stringobject.py --- a/pypy/objspace/std/stringobject.py +++ b/pypy/objspace/std/stringobject.py @@ -250,10 +250,10 @@ return space.newlist(res_w) -def str_split__String_String_ANY(space, w_self, w_by, w_maxsplit=-1): +def str_split__String_ANY_ANY(space, w_self, w_by, w_maxsplit=-1): maxsplit = space.int_w(w_maxsplit) value = w_self._value - by = w_by._value + by = space.bufferstr_w(w_by) bylen = len(by) if bylen == 0: raise OperationError(space.w_ValueError, space.wrap("empty separator")) @@ -324,12 +324,19 @@ def make_rsplit_with_delim(funcname, sliced): from pypy.tool.sourcetools import func_with_new_name + if 'Unicode' in funcname: + def unwrap_sep(space, w_by): + return w_by.value + else: + def unwrap_sep(space, w_by): + return space.bufferstr_w(w_by) + def fn(space, w_self, w_by, w_maxsplit=-1): maxsplit = space.int_w(w_maxsplit) res_w = [] value = w_self._value end = len(value) - by = w_by._value + by = unwrap_sep(space, w_by) bylen = len(by) if bylen == 0: raise OperationError(space.w_ValueError, space.wrap("empty separator")) @@ -348,8 +355,8 @@ return func_with_new_name(fn, funcname) -str_rsplit__String_String_ANY = make_rsplit_with_delim('str_rsplit__String_String_ANY', - sliced) +str_rsplit__String_ANY_ANY = make_rsplit_with_delim( + 'str_rsplit__String_ANY_ANY', sliced) def str_join__String_ANY(space, w_self, w_list): list_w = space.listview(w_list) @@ -372,18 +379,22 @@ reslen = len(self) * (size - 1) for i in range(size): w_s = list_w[i] - if not space.isinstance_w(w_s, space.w_bytes): + try: + item = space.bufferstr_w(w_s) + except OperationError, e: + if not e.match(space, space.w_TypeError): + raise raise operationerrfmt( space.w_TypeError, "sequence item %d: expected bytes, %s " "found", i, space.type(w_s).getname(space)) - reslen += len(space.bytes_w(w_s)) + reslen += len(item) sb = StringBuilder(reslen) for i in range(size): if self and i != 0: sb.append(self) - sb.append(space.bytes_w(list_w[i])) + sb.append(space.bufferstr_w(list_w[i])) return space.wrapbytes(sb.build()) def str_rjust__String_ANY_ANY(space, w_self, w_arg, w_fillchar): @@ -426,6 +437,11 @@ space, lenself, w_start, w_end, upper_bound=upper_bound) return (self, start, end) +def contains__String_ANY(space, w_self, w_sub): + self = w_self._value + sub = space.bufferstr_w(w_sub) + return space.newbool(self.find(sub) >= 0) + def contains__String_String(space, w_self, w_sub): self = w_self._value sub = w_sub._value @@ -436,13 +452,23 @@ char = w_char.intval return space.newbool(self.find(chr(char)) >= 0) +def str_find__String_ANY_ANY_ANY(space, w_self, w_sub, w_start, w_end): + (self, start, end) = _convert_idx_params(space, w_self, w_start, w_end) + res = self.find(space.bufferstr_w(w_sub), start, end) + return space.wrap(res) + def str_find__String_String_ANY_ANY(space, w_self, w_sub, w_start, w_end): - (self, start, end) = _convert_idx_params(space, w_self, w_start, w_end) + (self, start, end) = _convert_idx_params(space, w_self, w_start, w_end) res = self.find(w_sub._value, start, end) return space.wrap(res) +def str_rfind__String_ANY_ANY_ANY(space, w_self, w_sub, w_start, w_end): + (self, start, end) = _convert_idx_params(space, w_self, w_start, w_end) + res = self.rfind(space.bufferstr_w(w_sub), start, end) + return space.wrap(res) + def str_rfind__String_String_ANY_ANY(space, w_self, w_sub, w_start, w_end): - (self, start, end) = _convert_idx_params(space, w_self, w_start, w_end) + (self, start, end) = _convert_idx_params(space, w_self, w_start, w_end) res = self.rfind(w_sub._value, start, end) return space.wrap(res) @@ -554,7 +580,7 @@ def _strip(space, w_self, w_chars, left, right): "internal function called by str_xstrip methods" u_self = w_self._value - u_chars = w_chars._value + u_chars = space.bufferstr_w(w_chars) lpos = 0 rpos = len(u_self) @@ -590,20 +616,20 @@ assert rpos >= lpos # annotator hint, don't remove return sliced(space, u_self, lpos, rpos, w_self) -def str_strip__String_String(space, w_self, w_chars): +def str_strip__String_ANY(space, w_self, w_chars): return _strip(space, w_self, w_chars, left=1, right=1) def str_strip__String_None(space, w_self, w_chars): return _strip_none(space, w_self, left=1, right=1) -def str_rstrip__String_String(space, w_self, w_chars): +def str_rstrip__String_ANY(space, w_self, w_chars): return _strip(space, w_self, w_chars, left=0, right=1) def str_rstrip__String_None(space, w_self, w_chars): return _strip_none(space, w_self, left=0, right=1) -def str_lstrip__String_String(space, w_self, w_chars): +def str_lstrip__String_ANY(space, w_self, w_chars): return _strip(space, w_self, w_chars, left=1, right=0) def str_lstrip__String_None(space, w_self, w_chars): @@ -633,6 +659,12 @@ u_self, u_start, u_end = _convert_idx_params(space, w_self, w_start, w_end) return wrapint(space, u_self.count(w_arg._value, u_start, u_end)) +def str_endswith__String_ANY_ANY_ANY(space, w_self, w_suffix, w_start, w_end): + (u_self, start, end) = _convert_idx_params(space, w_self, w_start, + w_end, True) + return space.newbool(stringendswith(u_self, space.bufferstr_w(w_suffix), + start, end)) + def str_endswith__String_String_ANY_ANY(space, w_self, w_suffix, w_start, w_end): (u_self, start, end) = _convert_idx_params(space, w_self, w_start, w_end, True) @@ -642,15 +674,17 @@ (u_self, start, end) = _convert_idx_params(space, w_self, w_start, w_end, True) for w_suffix in space.fixedview(w_suffixes): - if space.isinstance_w(w_suffix, space.w_unicode): - w_u = space.call_function(space.w_unicode, w_self) - return space.call_method(w_u, "endswith", w_suffixes, w_start, - w_end) - suffix = space.bytes_w(w_suffix) + suffix = space.bufferstr_w(w_suffix) if stringendswith(u_self, suffix, start, end): return space.w_True return space.w_False +def str_startswith__String_ANY_ANY_ANY(space, w_self, w_prefix, w_start, w_end): + (u_self, start, end) = _convert_idx_params(space, w_self, w_start, + w_end, True) + return space.newbool(stringstartswith(u_self, space.bufferstr_w(w_prefix), + start, end)) + def str_startswith__String_String_ANY_ANY(space, w_self, w_prefix, w_start, w_end): (u_self, start, end) = _convert_idx_params(space, w_self, w_start, w_end, True) @@ -660,11 +694,7 @@ (u_self, start, end) = _convert_idx_params(space, w_self, w_start, w_end, True) for w_prefix in space.fixedview(w_prefixes): - if space.isinstance_w(w_prefix, space.w_unicode): - w_u = space.call_function(space.w_unicode, w_self) - return space.call_method(w_u, "startswith", w_prefixes, w_start, - w_end) - prefix = space.bytes_w(w_prefix) + prefix = space.bufferstr_w(w_prefix) if stringstartswith(u_self, prefix, start, end): return space.w_True return space.w_False diff --git a/pypy/objspace/std/stringtype.py b/pypy/objspace/std/stringtype.py --- a/pypy/objspace/std/stringtype.py +++ b/pypy/objspace/std/stringtype.py @@ -365,6 +365,38 @@ W_StringObject.__init__(w_obj, chars) return w_obj +def descr_maketrans(space, w_type, w_from, w_to): + """bytes.maketrans(frm, to) -> translation table + + Return a translation table (a bytes object of length 256) suitable + for use in the bytes or bytearray translate method where each byte + in frm is mapped to the byte at the same position in to. + The bytes objects frm and to must be of the same length.""" + base_table = [chr(i) for i in range(256)] + list_from = makebytesdata_w(space, w_from) + list_to = makebytesdata_w(space, w_to) + + if len(list_from) != len(list_to): + raise OperationError(space.w_ValueError, space.wrap( + "maketrans arguments must have same length")) + + for i in range(len(list_from)): + pos_from = ord(list_from[i]) + char_to = list_to[i] + base_table[pos_from] = char_to + + chars = ''.join(base_table) + if space.config.objspace.std.withrope: + from pypy.objspace.std.ropeobject import rope, W_RopeObject + w_obj = space.allocate_instance(W_RopeObject, w_type) + W_RopeObject.__init__(w_obj, rope.LiteralStringNode(chars)) + return w_obj + else: + from pypy.objspace.std.stringobject import W_StringObject + w_obj = space.allocate_instance(W_StringObject, w_type) + W_StringObject.__init__(w_obj, chars) + return w_obj + # ____________________________________________________________ str_typedef = StdTypeDef("bytes", @@ -378,7 +410,8 @@ ' - a text string encoded using the specified encoding\n' ' - a bytes or a buffer object\n' ' - any object implementing the buffer API.', - fromhex = gateway.interp2app(descr_fromhex, as_classmethod=True) + fromhex = gateway.interp2app(descr_fromhex, as_classmethod=True), + maketrans = gateway.interp2app(descr_maketrans, as_classmethod=True), ) str_typedef.registermethods(globals()) diff --git a/pypy/objspace/std/test/test_stringobject.py b/pypy/objspace/std/test/test_stringobject.py --- a/pypy/objspace/std/test/test_stringobject.py +++ b/pypy/objspace/std/test/test_stringobject.py @@ -728,6 +728,34 @@ x = b"A" * (2**16) raises(OverflowError, x.replace, b'', x) + def test_compatibility(self): + #a whole bunch of methods should accept bytearray/memoryview without complaining... + #I don't know how slavishly we should follow the cpython spec here, since it appears + #quite arbitrary in which methods accept only bytes as secondary arguments or + #anything with the buffer protocol + + b = b'hello world' + b2 = b'ello' + #not testing result, just lack of TypeError + for bb in (b2, bytearray(b2), memoryview(b2)): + assert b.split(bb) + assert b.rsplit(bb) + assert b.split(bb[:1]) + assert b.rsplit(bb[:1]) + assert b.join((bb, bb)) # cpython accepts bytes and + # bytearray only, not buffer + assert bb in b + assert b.find(bb) + assert b.rfind(bb) + assert b.strip(bb) + assert b.rstrip(bb) + assert b.lstrip(bb) + assert not b.startswith(bb) + assert not b.startswith((bb, bb)) + assert not b.endswith(bb) + assert not b.endswith((bb, bb)) + assert b.maketrans(bb, bb) + class AppTestPrebuilt(AppTestStringObject): def setup_class(cls): cls.space = gettestobjspace(**{"objspace.std.withprebuiltchar": True}) _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit