[pypy-commit] pypy py3k-refactor-str-types: reintegrate our bytes/bytearray

pjenvey Fri, 24 Jan 2014 12:20:39 -0800

Author: Philip Jenvey <[email protected]>
Branch: py3k-refactor-str-types
Changeset: r68912:eeae6a72a1be
Date: 2014-01-24 11:49 -0800
http://bitbucket.org/pypy/pypy/changeset/eeae6a72a1be/


Log:    reintegrate our bytes/bytearray

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -233,9 +233,8 @@
         raise operationerrfmt(space.w_TypeError, msg, w_result)
 
     def ord(self, space):
-        typename = space.type(self).getname(space)
-        msg = "ord() expected string of length 1, but %s found"
-        raise operationerrfmt(space.w_TypeError, msg, typename)
+        msg = "ord() expected string of length 1, but %T found"
+        raise operationerrfmt(space.w_TypeError, msg, self)
 
     def __spacebind__(self, space):
         return self
diff --git a/pypy/objspace/std/bytearrayobject.py 
b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -3,15 +3,14 @@
 from pypy.interpreter.baseobjspace import W_Root
 from pypy.interpreter.buffer import RWBuffer
 from pypy.interpreter.error import OperationError, operationerrfmt
-from pypy.objspace.std.bytearraytype import new_bytearray
-from pypy.objspace.std.stringtype import getbytevalue, makebytesdata_w
+from pypy.objspace.std.bytesobject import (
+    getbytevalue, makebytesdata_w, newbytesdata_w)
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
-from pypy.interpreter.signature import Signature
 from pypy.objspace.std.sliceobject import W_SliceObject
 from pypy.objspace.std.stdtypedef import StdTypeDef
 from pypy.objspace.std.stringmethods import StringMethods
 from pypy.objspace.std.util import get_positive_index
-from rpython.rlib.objectmodel import newlist_hint, resizelist_hint, 
import_from_mixin
+from rpython.rlib.objectmodel import import_from_mixin
 from rpython.rlib.rstring import StringBuilder
 
 
@@ -101,10 +100,8 @@
         return False
 
     def _join_check_item(self, space, w_obj):
-        if (space.isinstance_w(w_obj, space.w_str) or
-            space.isinstance_w(w_obj, space.w_bytearray)):
-            return 0
-        return 1
+        return not (space.isinstance_w(w_obj, space.w_bytes) or
+                    space.isinstance_w(w_obj, space.w_bytearray))
 
     def ord(self, space):
         if len(self.data) != 1:
@@ -134,74 +131,19 @@
         "Create a bytearray object from a string of hexadecimal numbers.\n"
         "Spaces between two numbers are accepted.\n"
         "Example: bytearray.fromhex('B9 01EF') -> 
bytearray(b'\\xb9\\x01\\xef')."
-        hexstring = space.str_w(w_hexstring)
-        hexstring = hexstring.lower()
-        data = []
-        length = len(hexstring)
-        i = -2
-        while True:
-            i += 2
-            while i < length and hexstring[i] == ' ':
-                i += 1
-            if i >= length:
-                break
-            if i+1 == length:
-                raise OperationError(space.w_ValueError, space.wrap(
-                    "non-hexadecimal number found in fromhex() arg at position 
%d" % i))
-
-            top = _hex_digit_to_int(hexstring[i])
-            if top == -1:
-                raise OperationError(space.w_ValueError, space.wrap(
-                    "non-hexadecimal number found in fromhex() arg at position 
%d" % i))
-            bot = _hex_digit_to_int(hexstring[i+1])
-            if bot == -1:
-                raise OperationError(space.w_ValueError, space.wrap(
-                    "non-hexadecimal number found in fromhex() arg at position 
%d" % (i+1,)))
-            data.append(chr(top*16 + bot))
-
+        if not space.is_w(space.type(w_hexstring), space.w_unicode):
+            raise operationerrfmt(space.w_TypeError, "must be str, not %T",
+                                  w_hexstring)
+        hexstring = space.unicode_w(w_hexstring)
+        data = _hexstring_to_array(space, hexstring)
         # in CPython bytearray.fromhex is a staticmethod, so
         # we ignore w_type and always return a bytearray
         return new_bytearray(space, space.w_bytearray, data)
 
-    def descr_init(self, space, __args__):
-        # this is on the silly side
-        w_source, w_encoding, w_errors = __args__.parse_obj(
-                None, 'bytearray', init_signature, init_defaults)
-
-        if w_source is None:
-            w_source = space.wrap('')
-        if w_encoding is None:
-            w_encoding = space.w_None
-        if w_errors is None:
-            w_errors = space.w_None
-
-        # Unicode argument
-        if not space.is_w(w_encoding, space.w_None):
-            from pypy.objspace.std.unicodeobject import (
-                _get_encoding_and_errors, encode_object
-            )
-            encoding, errors = _get_encoding_and_errors(space, w_encoding, 
w_errors)
-
-            # if w_source is an integer this correctly raises a TypeError
-            # the CPython error message is: "encoding or errors without a 
string argument"
-            # ours is: "expected unicode, got int object"
-            w_source = encode_object(space, w_source, encoding, errors)
-
-        # Is it an int?
-        try:
-            count = space.int_w(w_source)
-        except OperationError, e:
-            if not e.match(space, space.w_TypeError):
-                raise
-        else:
-            if count < 0:
-                raise OperationError(space.w_ValueError,
-                                     space.wrap("bytearray negative count"))
-            self.data = ['\0'] * count
-            return
-
-        data = makebytearraydata_w(space, w_source)
-        self.data = data
+    @unwrap_spec(encoding='str_or_None', errors='str_or_None')
+    def descr_init(self, space, w_source=None, encoding=None, errors=None):
+        assert isinstance(self, W_BytearrayObject)
+        self.data = newbytesdata_w(space, w_source, encoding, errors)
 
     def descr_repr(self, space):
         s = self.data
@@ -236,7 +178,10 @@
         return space.wrap(buf.build())
 
     def descr_str(self, space):
-        return space.wrap(''.join(self.data))
+        if space.sys.get_flag('bytes_warning'):
+            space.warn(space.wrap("str() on a bytearray instance"),
+                       space.w_BytesWarning)
+        return self.descr_repr(space)
 
     def descr_eq(self, space, w_other):
         try:
@@ -310,7 +255,7 @@
         if isinstance(w_index, W_SliceObject):
             oldsize = len(self.data)
             start, stop, step, slicelength = w_index.indices4(space, oldsize)
-            sequence2 = makebytearraydata_w(space, w_other)
+            sequence2 = makebytesdata_w(space, w_other)
             _setitem_slice_helper(space, self.data, start, step,
                                   slicelength, sequence2, empty_elem='\x00')
         else:
@@ -341,7 +286,7 @@
         if isinstance(w_other, W_BytearrayObject):
             self.data += w_other.data
         else:
-            self.data += makebytearraydata_w(space, w_other)
+            self.data += makebytesdata_w(space, w_other)
         return self
 
     def descr_insert(self, space, w_idx, w_other):
@@ -376,64 +321,47 @@
     def descr_reverse(self, space):
         self.data.reverse()
 
-def getbytevalue(space, w_value):
-    if space.isinstance_w(w_value, space.w_str):
-        string = space.str_w(w_value)
-        if len(string) != 1:
-            raise OperationError(space.w_ValueError, space.wrap(
-                "string must be of size 1"))
-        return string[0]
-
-    value = space.getindex_w(w_value, None)
-    if not 0 <= value < 256:
-        # this includes the OverflowError in case the long is too large
-        raise OperationError(space.w_ValueError, space.wrap(
-            "byte must be in range(0, 256)"))
-    return chr(value)
-
 def new_bytearray(space, w_bytearraytype, data):
     w_obj = space.allocate_instance(W_BytearrayObject, w_bytearraytype)
     W_BytearrayObject.__init__(w_obj, data)
     return w_obj
 
 
-def makebytearraydata_w(space, w_source):
-    # String-like argument
-    try:
-        string = space.bufferstr_new_w(w_source)
-    except OperationError, e:
-        if not e.match(space, space.w_TypeError):
-            raise
-    else:
-        return [c for c in string]
-
-    # sequence of bytes
-    w_iter = space.iter(w_source)
-    length_hint = space.length_hint(w_source, 0)
-    data = newlist_hint(length_hint)
-    extended = 0
-    while True:
-        try:
-            w_item = space.next(w_iter)
-        except OperationError, e:
-            if not e.match(space, space.w_StopIteration):
-                raise
-            break
-        value = getbytevalue(space, w_item)
-        data.append(value)
-        extended += 1
-    if extended < length_hint:
-        resizelist_hint(data, extended)
-    return data
-
 def _hex_digit_to_int(d):
     val = ord(d)
     if 47 < val < 58:
         return val - 48
+    if 64 < val < 71:
+        return val - 55
     if 96 < val < 103:
         return val - 87
     return -1
 
+def _hexstring_to_array(space, s):
+    data = []
+    length = len(s)
+    i = -2
+    while True:
+        i += 2
+        while i < length and s[i] == ' ':
+            i += 1
+        if i >= length:
+            break
+        if i + 1 == length:
+            raise OperationError(space.w_ValueError, space.wrap(
+                "non-hexadecimal number found in fromhex() arg at position %d" 
% i))
+
+        top = _hex_digit_to_int(s[i])
+        if top == -1:
+            raise OperationError(space.w_ValueError, space.wrap(
+                "non-hexadecimal number found in fromhex() arg at position %d" 
% i))
+        bot = _hex_digit_to_int(s[i+1])
+        if bot == -1:
+            raise OperationError(space.w_ValueError, space.wrap(
+                "non-hexadecimal number found in fromhex() arg at position %d" 
% (i+1,)))
+        data.append(chr(top*16 + bot))
+    return data
+
 
 class BytearrayDocstrings:
     """bytearray(iterable_of_ints) -> bytearray
@@ -867,6 +795,8 @@
                             doc=BytearrayDocstrings.__reduce__.__doc__),
     fromhex = interp2app(W_BytearrayObject.descr_fromhex, as_classmethod=True,
                          doc=BytearrayDocstrings.fromhex.__doc__),
+    maketrans = interp2app(W_BytearrayObject.descr_maketrans,
+                           as_classmethod=True),
 
     __repr__ = interp2app(W_BytearrayObject.descr_repr,
                           doc=BytearrayDocstrings.__repr__.__doc__),
@@ -1001,9 +931,6 @@
                          doc=BytearrayDocstrings.reverse.__doc__),
 )
 
-init_signature = Signature(['source', 'encoding', 'errors'], None, None)
-init_defaults = [None, None, None]
-
 
 # XXX consider moving to W_BytearrayObject or remove
 def str_join__Bytearray_ANY(space, w_self, w_list):
@@ -1014,7 +941,7 @@
     newdata = []
     for i in range(len(list_w)):
         w_s = list_w[i]
-        if not (space.isinstance_w(w_s, space.w_str) or
+        if not (space.isinstance_w(w_s, space.w_bytes) or
                 space.isinstance_w(w_s, space.w_bytearray)):
             msg = "sequence item %d: expected string, %T found"
             raise operationerrfmt(space.w_TypeError, msg, i, w_s)
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -4,15 +4,13 @@
 from pypy.interpreter.buffer import StringBuffer
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault, 
interpindirect2app
-from pypy.objspace.std import newformat
-from pypy.objspace.std.formatting import mod_format
 from pypy.objspace.std.stdtypedef import StdTypeDef
 from pypy.objspace.std.stringmethods import StringMethods
-from pypy.objspace.std.unicodeobject import (
-    decode_object, unicode_from_encoded_object, _get_encoding_and_errors)
 from rpython.rlib.jit import we_are_jitted
-from rpython.rlib.objectmodel import compute_hash, compute_unique_id, 
import_from_mixin
-from rpython.rlib.rstring import StringBuilder, replace
+from rpython.rlib.objectmodel import (
+    compute_hash, compute_unique_id, import_from_mixin, newlist_hint,
+    resizelist_hint)
+from rpython.rlib.rstring import StringBuilder
 
 
 class W_AbstractBytesObject(W_Root):
@@ -41,12 +39,6 @@
     def descr_eq(self, space, w_other):
         """x.__eq__(y) <==> x==y"""
 
-    def descr__format__(self, space, w_format_spec):
-        """S.__format__(format_spec) -> string
-
-        Return a formatted version of S as described by format_spec.
-        """
-
     def descr_ge(self, space, w_other):
         """x.__ge__(y) <==> x>=y"""
 
@@ -56,12 +48,6 @@
     def descr_getnewargs(self, space):
         ""
 
-    def descr_getslice(self, space, w_start, w_stop):
-        """x.__getslice__(i, j) <==> x[i:j]
-
-        Use of negative indices is not supported.
-        """
-
     def descr_gt(self, space, w_other):
         """x.__gt__(y) <==> x>y"""
 
@@ -77,9 +63,6 @@
     def descr_lt(self, space, w_other):
         """x.__lt__(y) <==> x<y"""
 
-    def descr_mod(self, space, w_values):
-        """x.__mod__(y) <==> x%y"""
-
     def descr_mul(self, space, w_times):
         """x.__mul__(n) <==> x*n"""
 
@@ -132,17 +115,6 @@
         able to handle UnicodeDecodeErrors.
         """
 
-    def descr_encode(self, space, w_encoding=None, w_errors=None):
-        """S.encode(encoding=None, errors='strict') -> object
-
-        Encode S using the codec registered for encoding. encoding defaults
-        to the default encoding. errors may be given to set a different error
-        handling scheme. Default is 'strict' meaning that encoding errors raise
-        a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and
-        'xmlcharrefreplace' as well as any other name registered with
-        codecs.register_error that is able to handle UnicodeEncodeErrors.
-        """
-
     def descr_endswith(self, space, w_suffix, w_start=None, w_end=None):
         """S.endswith(suffix[, start[, end]]) -> bool
 
@@ -170,13 +142,6 @@
         Return -1 on failure.
         """
 
-    def descr_format(self, space, __args__):
-        """S.format(*args, **kwargs) -> string
-
-        Return a formatted version of S, using substitutions from args and 
kwargs.
-        The substitutions are identified by braces ('{' and '}').
-        """
-
     def descr_index(self, space, w_sub, w_start=None, w_end=None):
         """S.index(sub[, start[, end]]) -> int
 
@@ -511,47 +476,46 @@
         return space.newlist_str(lst)
 
     @staticmethod
-    @unwrap_spec(w_object = WrappedDefault(""))
-    def descr_new(space, w_stringtype, w_object):
-        # NB. the default value of w_object is really a *wrapped* empty string:
-        #     there is gateway magic at work
-        w_obj = space.str(w_object)
-        if space.is_w(w_stringtype, space.w_str):
-            return w_obj  # XXX might be reworked when space.str() typechecks
-        value = space.str_w(w_obj)
+    @unwrap_spec(encoding='str_or_None', errors='str_or_None')
+    def descr_new(space, w_stringtype, w_source=None, encoding=None,
+                  errors=None):
+        if (w_source and space.is_w(space.type(w_source), space.w_bytes) and
+            space.is_w(w_stringtype, space.w_bytes)):
+            return w_source
+        value = ''.join(newbytesdata_w(space, w_source, encoding, errors))
         w_obj = space.allocate_instance(W_BytesObject, w_stringtype)
         W_BytesObject.__init__(w_obj, value)
         return w_obj
 
+    @staticmethod
+    def descr_fromhex(space, w_type, w_hexstring):
+        r"""bytes.fromhex(string) -> bytes
+
+        Create a bytes object from a string of hexadecimal numbers.
+        Spaces between two numbers are accepted.
+        Example: bytes.fromhex('B9 01EF') -> b'\xb9\x01\xef'.
+        """
+        if not space.is_w(space.type(w_hexstring), space.w_unicode):
+            raise operationerrfmt(space.w_TypeError, "must be str, not %T",
+                                  w_hexstring)
+        from pypy.objspace.std.bytearrayobject import _hexstring_to_array
+        hexstring = space.unicode_w(w_hexstring)
+        bytes = ''.join(_hexstring_to_array(space, hexstring))
+        return W_BytesObject(bytes)
+
     def descr_repr(self, space):
-        s = self._value
-        quote = "'"
-        if quote in s and '"' not in s:
-            quote = '"'
-        return space.wrap(string_escape_encode(s, quote))
+        return space.wrap(string_escape_encode(self._value, True))
 
     def descr_str(self, space):
-        if type(self) is W_BytesObject:
-            return self
-        return wrapstr(space, self._value)
+        if space.sys.get_flag('bytes_warning'):
+            space.warn(space.wrap("str() on a bytes instance"),
+                       space.w_BytesWarning)
+        return self.descr_repr(space)
 
     def descr_hash(self, space):
         x = compute_hash(self._value)
         return space.wrap(x)
 
-    def descr_format(self, space, __args__):
-        return newformat.format_method(space, self, __args__, is_unicode=False)
-
-    def descr__format__(self, space, w_format_spec):
-        if not space.isinstance_w(w_format_spec, space.w_str):
-            w_format_spec = space.str(w_format_spec)
-        spec = space.str_w(w_format_spec)
-        formatter = newformat.str_formatter(space, spec)
-        return formatter.format_string(self._value)
-
-    def descr_mod(self, space, w_values):
-        return mod_format(space, self, w_values, do_unicode=False)
-
     def descr_buffer(self, space):
         return space.wrap(StringBuffer(self._value))
 
@@ -613,10 +577,7 @@
 
     _StringMethods_descr_add = descr_add
     def descr_add(self, space, w_other):
-        if space.isinstance_w(w_other, space.w_unicode):
-            self_as_unicode = unicode_from_encoded_object(space, self, None, 
None)
-            return space.add(self_as_unicode, w_other)
-        elif space.isinstance_w(w_other, space.w_bytearray):
+        if space.isinstance_w(w_other, space.w_bytearray):
             # XXX: eliminate double-copy
             from .bytearrayobject import W_BytearrayObject, _make_data
             self_as_bytearray = W_BytearrayObject(_make_data(self._value))
@@ -635,51 +596,23 @@
             return W_StringBufferObject(builder)
         return self._StringMethods_descr_add(space, w_other)
 
-    _StringMethods__startswith = _startswith
-    def _startswith(self, space, value, w_prefix, start, end):
-        if space.isinstance_w(w_prefix, space.w_unicode):
-            self_as_unicode = unicode_from_encoded_object(space, self, None, 
None)
-            return self_as_unicode._startswith(space, self_as_unicode._value, 
w_prefix, start, end)
-        return self._StringMethods__startswith(space, value, w_prefix, start, 
end)
-
-    _StringMethods__endswith = _endswith
-    def _endswith(self, space, value, w_suffix, start, end):
-        if space.isinstance_w(w_suffix, space.w_unicode):
-            self_as_unicode = unicode_from_encoded_object(space, self, None, 
None)
-            return self_as_unicode._endswith(space, self_as_unicode._value, 
w_suffix, start, end)
-        return self._StringMethods__endswith(space, value, w_suffix, start, 
end)
-
     _StringMethods_descr_contains = descr_contains
     def descr_contains(self, space, w_sub):
-        if space.isinstance_w(w_sub, space.w_unicode):
-            from pypy.objspace.std.unicodeobject import W_UnicodeObject
-            assert isinstance(w_sub, W_UnicodeObject)
-            self_as_unicode = unicode_from_encoded_object(space, self, None, 
None)
-            return space.newbool(self_as_unicode._value.find(w_sub._value) >= 
0)
+        if space.isinstance_w(w_sub, space.w_int):
+            try:
+                char = space.int_w(w_sub)
+            except OperationError as e:
+                if e.match(space, space.w_OverflowError):
+                    char = 256 # arbitrary value which will trigger the 
ValueError
+                               # condition below
+                else:
+                    raise
+            if not 0 <= char < 256:
+                raise operationerrfmt(space.w_ValueError,
+                                      "character must be in range(256)")
+            return space.newbool(self._value.find(chr(char)) >= 0)
         return self._StringMethods_descr_contains(space, w_sub)
 
-    _StringMethods_descr_replace = descr_replace
-    @unwrap_spec(count=int)
-    def descr_replace(self, space, w_old, w_new, count=-1):
-        old_is_unicode = space.isinstance_w(w_old, space.w_unicode)
-        new_is_unicode = space.isinstance_w(w_new, space.w_unicode)
-        if old_is_unicode or new_is_unicode:
-            self_as_uni = unicode_from_encoded_object(space, self, None, None)
-            if not old_is_unicode:
-                w_old = unicode_from_encoded_object(space, w_old, None, None)
-            if not new_is_unicode:
-                w_new = unicode_from_encoded_object(space, w_new, None, None)
-            input = self_as_uni._val(space)
-            sub = self_as_uni._op_val(space, w_old)
-            by = self_as_uni._op_val(space, w_new)
-            try:
-                res = replace(input, sub, by, count)
-            except OverflowError:
-                raise OperationError(space.w_OverflowError,
-                                     space.wrap("replace string is too long"))
-            return self_as_uni._new(res)
-        return self._StringMethods_descr_replace(space, w_old, w_new, count)
-
     def descr_lower(self, space):
         return W_BytesObject(self._value.lower())
 
@@ -687,32 +620,16 @@
         return W_BytesObject(self._value.upper())
 
     def _join_return_one(self, space, w_obj):
-        return (space.is_w(space.type(w_obj), space.w_str) or
-                space.is_w(space.type(w_obj), space.w_unicode))
+        return space.is_w(space.type(w_obj), space.w_str)
 
     def _join_check_item(self, space, w_obj):
-        if space.isinstance_w(w_obj, space.w_str):
-            return 0
-        if space.isinstance_w(w_obj, space.w_unicode):
-            return 2
-        return 1
-
-    def _join_autoconvert(self, space, list_w):
-        # we need to rebuild w_list here, because the original
-        # w_list might be an iterable which we already consumed
-        w_list = space.newlist(list_w)
-        w_u = space.call_function(space.w_unicode, self)
-        return space.call_method(w_u, "join", w_list)
-
-    def descr_formatter_parser(self, space):
-        from pypy.objspace.std.newformat import str_template_formatter
-        tformat = str_template_formatter(space, space.str_w(self))
-        return tformat.formatter_parser()
-
-    def descr_formatter_field_name_split(self, space):
-        from pypy.objspace.std.newformat import str_template_formatter
-        tformat = str_template_formatter(space, space.str_w(self))
-        return tformat.formatter_field_name_split()
+        try:
+            self._op_val(space, w_obj)
+        except OperationError as e:
+            if not e.match(space, space.w_TypeError):
+                raise
+            return True
+        return False
 
 
 def _create_list_from_string(value):
@@ -748,13 +665,103 @@
         return W_BytesObject(c)
 
 
+def getbytevalue(space, w_value):
+    value = space.getindex_w(w_value, None)
+    if not 0 <= value < 256:
+        # this includes the OverflowError in case the long is too large
+        raise OperationError(space.w_ValueError, space.wrap(
+            "byte must be in range(0, 256)"))
+    return chr(value)
+
+def newbytesdata_w(space, w_source, encoding, errors):
+    # None value
+    if w_source is None:
+        if encoding is not None or errors is not None:
+            raise OperationError(space.w_TypeError, space.wrap(
+                    "encoding or errors without string argument"))
+        return []
+    # Is it an int?
+    try:
+        count = space.int_w(w_source)
+    except OperationError, e:
+        if not e.match(space, space.w_TypeError):
+            raise
+    else:
+        if count < 0:
+            raise OperationError(space.w_ValueError,
+                                 space.wrap("negative count"))
+        if encoding is not None or errors is not None:
+            raise OperationError(space.w_TypeError, space.wrap(
+                    "encoding or errors without string argument"))
+        return ['\0'] * count
+    # Unicode with encoding
+    if space.isinstance_w(w_source, space.w_unicode):
+        if encoding is None:
+            raise OperationError(space.w_TypeError, space.wrap(
+                    "string argument without an encoding"))
+        from pypy.objspace.std.unicodeobject import encode_object
+        w_source = encode_object(space, w_source, encoding, errors)
+        # and continue with the encoded string
+
+    return makebytesdata_w(space, w_source)
+
+def makebytesdata_w(space, w_source):
+    w_bytes_method = space.lookup(w_source, "__bytes__")
+    if w_bytes_method is not None:
+        w_bytes = space.get_and_call_function(w_bytes_method, w_source)
+        if not space.isinstance_w(w_bytes, space.w_bytes):
+            msg = "__bytes__ returned non-bytes (type '%T')"
+            raise operationerrfmt(space.w_TypeError, msg, w_bytes)
+        return [c for c in space.bytes_w(w_bytes)]
+
+    # String-like argument
+    try:
+        string = space.bufferstr_new_w(w_source)
+    except OperationError, e:
+        if not e.match(space, space.w_TypeError):
+            raise
+    else:
+        return [c for c in string]
+
+    if space.isinstance_w(w_source, space.w_unicode):
+        raise OperationError(
+            space.w_TypeError,
+            space.wrap("cannot convert unicode object to bytes"))
+
+    # sequence of bytes
+    w_iter = space.iter(w_source)
+    length_hint = space.length_hint(w_source, 0)
+    data = newlist_hint(length_hint)
+    extended = 0
+    while True:
+        try:
+            w_item = space.next(w_iter)
+        except OperationError, e:
+            if not e.match(space, space.w_StopIteration):
+                raise
+            break
+        value = getbytevalue(space, w_item)
+        data.append(value)
+        extended += 1
+    if extended < length_hint:
+        resizelist_hint(data, extended)
+    return data
+
+
 W_BytesObject.typedef = StdTypeDef(
     "bytes",
     __new__ = interp2app(W_BytesObject.descr_new),
-    __doc__ = """str(object='') -> string
+    __doc__ = """bytes(iterable_of_ints) -> bytes
+    bytes(string, encoding[, errors]) -> bytes
+    bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer
+    bytes(int) -> bytes object of size given by the parameter initialized with 
null bytes
+    bytes() -> empty bytes object
 
-    Return a nice string representation of the object.
-    If the argument is a string, the return value is the same object.
+    Construct an immutable array of bytes from:
+      - an iterable yielding integers in range(256)
+      - a text string encoded using the specified encoding
+      - any object implementing the buffer API.
+      - an integer
     """,
 
     __repr__ = interpindirect2app(W_AbstractBytesObject.descr_repr),
@@ -776,13 +783,11 @@
     __rmul__ = interpindirect2app(W_AbstractBytesObject.descr_rmul),
 
     __getitem__ = interpindirect2app(W_AbstractBytesObject.descr_getitem),
-    __getslice__ = interpindirect2app(W_AbstractBytesObject.descr_getslice),
 
     capitalize = interpindirect2app(W_AbstractBytesObject.descr_capitalize),
     center = interpindirect2app(W_AbstractBytesObject.descr_center),
     count = interpindirect2app(W_AbstractBytesObject.descr_count),
     decode = interpindirect2app(W_AbstractBytesObject.descr_decode),
-    encode = interpindirect2app(W_AbstractBytesObject.descr_encode),
     expandtabs = interpindirect2app(W_AbstractBytesObject.descr_expandtabs),
     find = interpindirect2app(W_AbstractBytesObject.descr_find),
     rfind = interpindirect2app(W_AbstractBytesObject.descr_rfind),
@@ -816,14 +821,11 @@
     upper = interpindirect2app(W_AbstractBytesObject.descr_upper),
     zfill = interpindirect2app(W_AbstractBytesObject.descr_zfill),
 
-    format = interpindirect2app(W_BytesObject.descr_format),
-    __format__ = interpindirect2app(W_BytesObject.descr__format__),
-    __mod__ = interpindirect2app(W_BytesObject.descr_mod),
     __buffer__ = interpindirect2app(W_AbstractBytesObject.descr_buffer),
     __getnewargs__ = 
interpindirect2app(W_AbstractBytesObject.descr_getnewargs),
-    _formatter_parser = interp2app(W_BytesObject.descr_formatter_parser),
-    _formatter_field_name_split =
-        interp2app(W_BytesObject.descr_formatter_field_name_split),
+
+    fromhex = interp2app(W_BytesObject.descr_fromhex, as_classmethod=True),
+    maketrans = interp2app(W_BytesObject.descr_maketrans, as_classmethod=True),
 )
 
 
diff --git a/pypy/objspace/std/stringmethods.py 
b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -1,7 +1,7 @@
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.interpreter.gateway import unwrap_spec, WrappedDefault
 from pypy.objspace.std import slicetype
-from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice
+from pypy.objspace.std.sliceobject import W_SliceObject
 from rpython.rlib import jit
 from rpython.rlib.objectmodel import specialize
 from rpython.rlib.rarithmetic import ovfcheck
@@ -24,6 +24,32 @@
                 space, lenself, w_start, w_end, upper_bound=upper_bound)
         return (value, start, end)
 
+    @staticmethod
+    def descr_maketrans(space, w_type, w_from, w_to):
+        """B.maketrans(frm, to) -> translation table
+
+        Return a translation table (a bytes object of length 256) suitable
+        for use in the bytes or bytearray translate method where each byte
+        in frm is mapped to the byte at the same position in to.
+        The bytes objects frm and to must be of the same length.
+        """
+        from pypy.objspace.std.bytesobject import makebytesdata_w, wrapstr
+
+        base_table = [chr(i) for i in range(256)]
+        list_from = makebytesdata_w(space, w_from)
+        list_to = makebytesdata_w(space, w_to)
+
+        if len(list_from) != len(list_to):
+            raise operationerrfmt(space.w_ValueError,
+                                  "maketrans arguments must have same length")
+
+        for i in range(len(list_from)):
+            pos_from = ord(list_from[i])
+            char_to = list_to[i]
+            base_table[pos_from] = char_to
+
+        return wrapstr(space, ''.join(base_table))
+
     def descr_len(self, space):
         return space.wrap(self._len())
 
@@ -90,21 +116,13 @@
         if index < 0 or index >= selflen:
             raise OperationError(space.w_IndexError,
                                  space.wrap("string index out of range"))
+        from pypy.objspace.std.bytesobject import W_BytesObject
         from pypy.objspace.std.bytearrayobject import W_BytearrayObject
-        if isinstance(self, W_BytearrayObject):
+        if isinstance(self, W_BytesObject) or isinstance(self, 
W_BytearrayObject):
             return space.wrap(ord(selfvalue[index]))
         #return wrapchar(space, selfvalue[index])
         return self._new(selfvalue[index])
 
-    def descr_getslice(self, space, w_start, w_stop):
-        selfvalue = self._val(space)
-        start, stop = normalize_simple_slice(space, len(selfvalue), w_start,
-                                             w_stop)
-        if start == stop:
-            return self._empty()
-        else:
-            return self._sliced(space, selfvalue, start, stop, self)
-
     def descr_capitalize(self, space):
         value = self._val(space)
         if len(value) == 0:
@@ -139,19 +157,11 @@
         return space.newint(value.count(self._op_val(space, w_sub), start, 
end))
 
     def descr_decode(self, space, w_encoding=None, w_errors=None):
-        from pypy.objspace.std.unicodeobject import _get_encoding_and_errors, \
-            unicode_from_string, decode_object
+        from pypy.objspace.std.unicodeobject import (
+            _get_encoding_and_errors, decode_object)
         encoding, errors = _get_encoding_and_errors(space, w_encoding, 
w_errors)
-        if encoding is None and errors is None:
-            return unicode_from_string(space, self)
         return decode_object(space, self, encoding, errors)
 
-    def descr_encode(self, space, w_encoding=None, w_errors=None):
-        from pypy.objspace.std.unicodeobject import _get_encoding_and_errors, \
-            encode_object
-        encoding, errors = _get_encoding_and_errors(space, w_encoding, 
w_errors)
-        return encode_object(space, self, encoding, errors)
-
     @unwrap_spec(tabsize=int)
     def descr_expandtabs(self, space, tabsize=8):
         value = self._val(space)
@@ -175,6 +185,9 @@
     def _tabindent(self, token, tabsize):
         "calculates distance behind the token to the next tabstop"
 
+        if tabsize <= 0:
+            return tabsize
+
         distance = tabsize
         if token:
             distance = 0
@@ -305,16 +318,9 @@
         return space.newbool(cased)
 
     def descr_join(self, space, w_list):
-        from pypy.objspace.std.bytesobject import W_BytesObject
         from pypy.objspace.std.unicodeobject import W_UnicodeObject
 
-        if isinstance(self, W_BytesObject):
-            l = space.listview_str(w_list)
-            if l is not None:
-                if len(l) == 1:
-                    return space.wrap(l[0])
-                return space.wrap(self._val(space).join(l))
-        elif isinstance(self, W_UnicodeObject):
+        if isinstance(self, W_UnicodeObject):
             l = space.listview_unicode(w_list)
             if l is not None:
                 if len(l) == 1:
@@ -343,14 +349,11 @@
         prealloc_size = len(value) * (size - 1)
         for i in range(size):
             w_s = list_w[i]
-            check_item = self._join_check_item(space, w_s)
-            if check_item == 1:
+            if self._join_check_item(space, w_s):
                 raise operationerrfmt(
                     space.w_TypeError,
-                    "sequence item %d: expected string, %s "
-                    "found", i, space.type(w_s).getname(space))
-            elif check_item == 2:
-                return self._join_autoconvert(space, list_w)
+                    "sequence item %d: expected %s, %T found",
+                    i, self._generic_name(), w_s)
             prealloc_size += len(self._op_val(space, w_s))
 
         sb = self._builder(prealloc_size)
@@ -360,9 +363,6 @@
             sb.append(self._op_val(space, list_w[i]))
         return self._new(sb.build())
 
-    def _join_autoconvert(self, space, list_w):
-        assert False, 'unreachable'
-
     @unwrap_spec(width=int, w_fillchar=WrappedDefault(' '))
     def descr_ljust(self, space, width, w_fillchar):
         value = self._val(space)
@@ -505,6 +505,9 @@
             strs.append(value[pos:length])
         return self._newlist_unwrapped(space, strs)
 
+    def _generic_name(self):
+        return "bytes"
+
     def descr_startswith(self, space, w_prefix, w_start=None, w_end=None):
         (value, start, end) = self._convert_idx_params(space, w_start, w_end,
                                                        True)
@@ -514,13 +517,15 @@
                     return space.w_True
             return space.w_False
         try:
-            return space.newbool(self._startswith(space, value, w_prefix, 
start, end))
+            res = self._startswith(space, value, w_prefix, start, end)
         except OperationError as e:
-            if e.match(space, space.w_TypeError):
-                msg = ("startswith first arg must be str or a tuple of str, "
-                       "not %T")
-                raise operationerrfmt(space.w_TypeError, msg, w_prefix)
-            raise
+            if not e.match(space, space.w_TypeError):
+                raise
+            wanted = self._generic_name()
+            raise operationerrfmt(space.w_TypeError,
+                                  "startswith first arg must be %s or a tuple "
+                                  "of %s, not %T", wanted, wanted, w_prefix)
+        return space.newbool(res)
 
     def _startswith(self, space, value, w_prefix, start, end):
         return startswith(value, self._op_val(space, w_prefix), start, end)
@@ -535,14 +540,15 @@
                     return space.w_True
             return space.w_False
         try:
-            return space.newbool(self._endswith(space, value, w_suffix, start,
-                                                end))
+            res = self._endswith(space, value, w_suffix, start, end)
         except OperationError as e:
-            if e.match(space, space.w_TypeError):
-                msg = ("endswith first arg must be str or a tuple of str, not "
-                       "%T")
-                raise operationerrfmt(space.w_TypeError, msg, w_suffix)
-            raise
+            if not e.match(space, space.w_TypeError):
+                raise
+            wanted = self._generic_name()
+            raise operationerrfmt(space.w_TypeError,
+                                  "endswith first arg must be %s or a tuple "
+                                  "of %s, not %T", wanted, wanted, w_suffix)
+        return space.newbool(res)
 
     def _endswith(self, space, value, w_prefix, start, end):
         return endswith(value, self._op_val(space, w_prefix), start, end)
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -109,6 +109,9 @@
 
     _builder = UnicodeBuilder
 
+    def _generic_name(self):
+        return "str"
+
     def _isupper(self, ch):
         return unicodedb.isupper(ord(ch))
 
@@ -178,16 +181,10 @@
 
     @staticmethod
     def descr_maketrans(space, w_type, w_x, w_y=None, w_z=None):
-        if space.is_none(w_y):
-            y = None
-        else:
-            y = space.unicode_w(w_y)
-        if space.is_none(w_z):
-            z = None
-        else:
-            z = space.unicode_w(w_z)
+        y = None if space.is_none(w_y) else space.unicode_w(w_y)
+        z = None if space.is_none(w_z) else space.unicode_w(w_z)
+        w_new = space.newdict()
 
-        w_new = space.newdict()
         if y is not None:
             # x must be a string too, of equal length
             ylen = len(y)
@@ -362,9 +359,9 @@
                 elif space.isinstance_w(w_newval, space.w_unicode):
                     result.append(space.unicode_w(w_newval))
                 else:
-                    raise OperationError(
+                    raise operationerrfmt(
                         space.w_TypeError,
-                        space.wrap("character mapping must return integer, 
None or unicode"))
+                        "character mapping must return integer, None or str")
         return W_UnicodeObject(u''.join(result))
 
     def descr_encode(self, space, w_encoding=None, w_errors=None):
@@ -375,10 +372,7 @@
         return space.is_w(space.type(w_obj), space.w_unicode)
 
     def _join_check_item(self, space, w_obj):
-        if (space.isinstance_w(w_obj, space.w_str) or
-            space.isinstance_w(w_obj, space.w_unicode)):
-            return 0
-        return 1
+        return not space.isinstance_w(w_obj, space.w_unicode)
 
     def descr_isdecimal(self, space):
         return self._is_generic(space, '_isdecimal')
@@ -415,6 +409,17 @@
                 return space.w_False
         return space.w_True
 
+    def _fix_fillchar(func):
+        # XXX: hack
+        from rpython.tool.sourcetools import func_with_new_name
+        func = func_with_new_name(func, func.__name__)
+        func.unwrap_spec = func.unwrap_spec.copy()
+        func.unwrap_spec['w_fillchar'] = WrappedDefault(u' ')
+        return func
+
+    descr_center = _fix_fillchar(StringMethods.descr_center)
+    descr_ljust = _fix_fillchar(StringMethods.descr_ljust)
+    descr_rjust = _fix_fillchar(StringMethods.descr_rjust)
 
 def wrapunicode(space, uni):
     return W_UnicodeObject(uni)
@@ -530,17 +535,11 @@
 
 
 def unicode_from_encoded_object(space, w_obj, encoding, errors):
-    # explicitly block bytearray on 2.7
-    from .bytearrayobject import W_BytearrayObject
-    if isinstance(w_obj, W_BytearrayObject):
-        raise OperationError(space.w_TypeError,
-                             space.wrap("decoding bytearray is not supported"))
-
     w_retval = decode_object(space, w_obj, encoding, errors)
     if not space.isinstance_w(w_retval, space.w_unicode):
         raise operationerrfmt(space.w_TypeError,
-            "decoder did not return an unicode object (type '%s')",
-            space.type(w_retval).getname(space))
+            "decoder did not return a str object (type '%T')",
+            w_retval)
     assert isinstance(w_retval, W_UnicodeObject)
     return w_retval
 
@@ -840,19 +839,6 @@
         If chars is a str, it will be converted to unicode before stripping
         """
 
-    def maketrans():
-        """str.maketrans(x[, y[, z]]) -> dict (static method)
-
-        Return a translation table usable for str.translate().
-        If there is only one argument, it must be a dictionary mapping Unicode
-        ordinals (integers) or characters to Unicode ordinals, strings or None.
-        Character keys will be then converted to ordinals.
-        If there are two arguments, they must be strings of equal length, and
-        in the resulting dictionary, each character in x will be mapped to the
-        character at the same position in y. If there is a third argument, it
-        must be a string, whose characters will be mapped to None in the 
result.
-        """
-
     def partition():
         """S.partition(sep) -> (head, sep, tail)
 
@@ -1126,8 +1112,7 @@
     __getnewargs__ = interp2app(W_UnicodeObject.descr_getnewargs,
                                 doc=UnicodeDocstrings.__getnewargs__.__doc__),
     maketrans = interp2app(W_UnicodeObject.descr_maketrans,
-                           as_classmethod=True,
-                           doc=UnicodeDocstrings.maketrans.__doc__)
+                           as_classmethod=True),
 )
 
 
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy py3k-refactor-str-types: reintegrate our bytes/bytearray

Reply via email to