Author: Philip Jenvey <[email protected]>
Branch: py3k-refactor-str-types
Changeset: r68912:eeae6a72a1be
Date: 2014-01-24 11:49 -0800
http://bitbucket.org/pypy/pypy/changeset/eeae6a72a1be/
Log: reintegrate our bytes/bytearray
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -233,9 +233,8 @@
raise operationerrfmt(space.w_TypeError, msg, w_result)
def ord(self, space):
- typename = space.type(self).getname(space)
- msg = "ord() expected string of length 1, but %s found"
- raise operationerrfmt(space.w_TypeError, msg, typename)
+ msg = "ord() expected string of length 1, but %T found"
+ raise operationerrfmt(space.w_TypeError, msg, self)
def __spacebind__(self, space):
return self
diff --git a/pypy/objspace/std/bytearrayobject.py
b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -3,15 +3,14 @@
from pypy.interpreter.baseobjspace import W_Root
from pypy.interpreter.buffer import RWBuffer
from pypy.interpreter.error import OperationError, operationerrfmt
-from pypy.objspace.std.bytearraytype import new_bytearray
-from pypy.objspace.std.stringtype import getbytevalue, makebytesdata_w
+from pypy.objspace.std.bytesobject import (
+ getbytevalue, makebytesdata_w, newbytesdata_w)
from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
-from pypy.interpreter.signature import Signature
from pypy.objspace.std.sliceobject import W_SliceObject
from pypy.objspace.std.stdtypedef import StdTypeDef
from pypy.objspace.std.stringmethods import StringMethods
from pypy.objspace.std.util import get_positive_index
-from rpython.rlib.objectmodel import newlist_hint, resizelist_hint,
import_from_mixin
+from rpython.rlib.objectmodel import import_from_mixin
from rpython.rlib.rstring import StringBuilder
@@ -101,10 +100,8 @@
return False
def _join_check_item(self, space, w_obj):
- if (space.isinstance_w(w_obj, space.w_str) or
- space.isinstance_w(w_obj, space.w_bytearray)):
- return 0
- return 1
+ return not (space.isinstance_w(w_obj, space.w_bytes) or
+ space.isinstance_w(w_obj, space.w_bytearray))
def ord(self, space):
if len(self.data) != 1:
@@ -134,74 +131,19 @@
"Create a bytearray object from a string of hexadecimal numbers.\n"
"Spaces between two numbers are accepted.\n"
"Example: bytearray.fromhex('B9 01EF') ->
bytearray(b'\\xb9\\x01\\xef')."
- hexstring = space.str_w(w_hexstring)
- hexstring = hexstring.lower()
- data = []
- length = len(hexstring)
- i = -2
- while True:
- i += 2
- while i < length and hexstring[i] == ' ':
- i += 1
- if i >= length:
- break
- if i+1 == length:
- raise OperationError(space.w_ValueError, space.wrap(
- "non-hexadecimal number found in fromhex() arg at position
%d" % i))
-
- top = _hex_digit_to_int(hexstring[i])
- if top == -1:
- raise OperationError(space.w_ValueError, space.wrap(
- "non-hexadecimal number found in fromhex() arg at position
%d" % i))
- bot = _hex_digit_to_int(hexstring[i+1])
- if bot == -1:
- raise OperationError(space.w_ValueError, space.wrap(
- "non-hexadecimal number found in fromhex() arg at position
%d" % (i+1,)))
- data.append(chr(top*16 + bot))
-
+ if not space.is_w(space.type(w_hexstring), space.w_unicode):
+ raise operationerrfmt(space.w_TypeError, "must be str, not %T",
+ w_hexstring)
+ hexstring = space.unicode_w(w_hexstring)
+ data = _hexstring_to_array(space, hexstring)
# in CPython bytearray.fromhex is a staticmethod, so
# we ignore w_type and always return a bytearray
return new_bytearray(space, space.w_bytearray, data)
- def descr_init(self, space, __args__):
- # this is on the silly side
- w_source, w_encoding, w_errors = __args__.parse_obj(
- None, 'bytearray', init_signature, init_defaults)
-
- if w_source is None:
- w_source = space.wrap('')
- if w_encoding is None:
- w_encoding = space.w_None
- if w_errors is None:
- w_errors = space.w_None
-
- # Unicode argument
- if not space.is_w(w_encoding, space.w_None):
- from pypy.objspace.std.unicodeobject import (
- _get_encoding_and_errors, encode_object
- )
- encoding, errors = _get_encoding_and_errors(space, w_encoding,
w_errors)
-
- # if w_source is an integer this correctly raises a TypeError
- # the CPython error message is: "encoding or errors without a
string argument"
- # ours is: "expected unicode, got int object"
- w_source = encode_object(space, w_source, encoding, errors)
-
- # Is it an int?
- try:
- count = space.int_w(w_source)
- except OperationError, e:
- if not e.match(space, space.w_TypeError):
- raise
- else:
- if count < 0:
- raise OperationError(space.w_ValueError,
- space.wrap("bytearray negative count"))
- self.data = ['\0'] * count
- return
-
- data = makebytearraydata_w(space, w_source)
- self.data = data
+ @unwrap_spec(encoding='str_or_None', errors='str_or_None')
+ def descr_init(self, space, w_source=None, encoding=None, errors=None):
+ assert isinstance(self, W_BytearrayObject)
+ self.data = newbytesdata_w(space, w_source, encoding, errors)
def descr_repr(self, space):
s = self.data
@@ -236,7 +178,10 @@
return space.wrap(buf.build())
def descr_str(self, space):
- return space.wrap(''.join(self.data))
+ if space.sys.get_flag('bytes_warning'):
+ space.warn(space.wrap("str() on a bytearray instance"),
+ space.w_BytesWarning)
+ return self.descr_repr(space)
def descr_eq(self, space, w_other):
try:
@@ -310,7 +255,7 @@
if isinstance(w_index, W_SliceObject):
oldsize = len(self.data)
start, stop, step, slicelength = w_index.indices4(space, oldsize)
- sequence2 = makebytearraydata_w(space, w_other)
+ sequence2 = makebytesdata_w(space, w_other)
_setitem_slice_helper(space, self.data, start, step,
slicelength, sequence2, empty_elem='\x00')
else:
@@ -341,7 +286,7 @@
if isinstance(w_other, W_BytearrayObject):
self.data += w_other.data
else:
- self.data += makebytearraydata_w(space, w_other)
+ self.data += makebytesdata_w(space, w_other)
return self
def descr_insert(self, space, w_idx, w_other):
@@ -376,64 +321,47 @@
def descr_reverse(self, space):
self.data.reverse()
-def getbytevalue(space, w_value):
- if space.isinstance_w(w_value, space.w_str):
- string = space.str_w(w_value)
- if len(string) != 1:
- raise OperationError(space.w_ValueError, space.wrap(
- "string must be of size 1"))
- return string[0]
-
- value = space.getindex_w(w_value, None)
- if not 0 <= value < 256:
- # this includes the OverflowError in case the long is too large
- raise OperationError(space.w_ValueError, space.wrap(
- "byte must be in range(0, 256)"))
- return chr(value)
-
def new_bytearray(space, w_bytearraytype, data):
w_obj = space.allocate_instance(W_BytearrayObject, w_bytearraytype)
W_BytearrayObject.__init__(w_obj, data)
return w_obj
-def makebytearraydata_w(space, w_source):
- # String-like argument
- try:
- string = space.bufferstr_new_w(w_source)
- except OperationError, e:
- if not e.match(space, space.w_TypeError):
- raise
- else:
- return [c for c in string]
-
- # sequence of bytes
- w_iter = space.iter(w_source)
- length_hint = space.length_hint(w_source, 0)
- data = newlist_hint(length_hint)
- extended = 0
- while True:
- try:
- w_item = space.next(w_iter)
- except OperationError, e:
- if not e.match(space, space.w_StopIteration):
- raise
- break
- value = getbytevalue(space, w_item)
- data.append(value)
- extended += 1
- if extended < length_hint:
- resizelist_hint(data, extended)
- return data
-
def _hex_digit_to_int(d):
val = ord(d)
if 47 < val < 58:
return val - 48
+ if 64 < val < 71:
+ return val - 55
if 96 < val < 103:
return val - 87
return -1
+def _hexstring_to_array(space, s):
+ data = []
+ length = len(s)
+ i = -2
+ while True:
+ i += 2
+ while i < length and s[i] == ' ':
+ i += 1
+ if i >= length:
+ break
+ if i + 1 == length:
+ raise OperationError(space.w_ValueError, space.wrap(
+ "non-hexadecimal number found in fromhex() arg at position %d"
% i))
+
+ top = _hex_digit_to_int(s[i])
+ if top == -1:
+ raise OperationError(space.w_ValueError, space.wrap(
+ "non-hexadecimal number found in fromhex() arg at position %d"
% i))
+ bot = _hex_digit_to_int(s[i+1])
+ if bot == -1:
+ raise OperationError(space.w_ValueError, space.wrap(
+ "non-hexadecimal number found in fromhex() arg at position %d"
% (i+1,)))
+ data.append(chr(top*16 + bot))
+ return data
+
class BytearrayDocstrings:
"""bytearray(iterable_of_ints) -> bytearray
@@ -867,6 +795,8 @@
doc=BytearrayDocstrings.__reduce__.__doc__),
fromhex = interp2app(W_BytearrayObject.descr_fromhex, as_classmethod=True,
doc=BytearrayDocstrings.fromhex.__doc__),
+ maketrans = interp2app(W_BytearrayObject.descr_maketrans,
+ as_classmethod=True),
__repr__ = interp2app(W_BytearrayObject.descr_repr,
doc=BytearrayDocstrings.__repr__.__doc__),
@@ -1001,9 +931,6 @@
doc=BytearrayDocstrings.reverse.__doc__),
)
-init_signature = Signature(['source', 'encoding', 'errors'], None, None)
-init_defaults = [None, None, None]
-
# XXX consider moving to W_BytearrayObject or remove
def str_join__Bytearray_ANY(space, w_self, w_list):
@@ -1014,7 +941,7 @@
newdata = []
for i in range(len(list_w)):
w_s = list_w[i]
- if not (space.isinstance_w(w_s, space.w_str) or
+ if not (space.isinstance_w(w_s, space.w_bytes) or
space.isinstance_w(w_s, space.w_bytearray)):
msg = "sequence item %d: expected string, %T found"
raise operationerrfmt(space.w_TypeError, msg, i, w_s)
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -4,15 +4,13 @@
from pypy.interpreter.buffer import StringBuffer
from pypy.interpreter.error import OperationError, operationerrfmt
from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault,
interpindirect2app
-from pypy.objspace.std import newformat
-from pypy.objspace.std.formatting import mod_format
from pypy.objspace.std.stdtypedef import StdTypeDef
from pypy.objspace.std.stringmethods import StringMethods
-from pypy.objspace.std.unicodeobject import (
- decode_object, unicode_from_encoded_object, _get_encoding_and_errors)
from rpython.rlib.jit import we_are_jitted
-from rpython.rlib.objectmodel import compute_hash, compute_unique_id,
import_from_mixin
-from rpython.rlib.rstring import StringBuilder, replace
+from rpython.rlib.objectmodel import (
+ compute_hash, compute_unique_id, import_from_mixin, newlist_hint,
+ resizelist_hint)
+from rpython.rlib.rstring import StringBuilder
class W_AbstractBytesObject(W_Root):
@@ -41,12 +39,6 @@
def descr_eq(self, space, w_other):
"""x.__eq__(y) <==> x==y"""
- def descr__format__(self, space, w_format_spec):
- """S.__format__(format_spec) -> string
-
- Return a formatted version of S as described by format_spec.
- """
-
def descr_ge(self, space, w_other):
"""x.__ge__(y) <==> x>=y"""
@@ -56,12 +48,6 @@
def descr_getnewargs(self, space):
""
- def descr_getslice(self, space, w_start, w_stop):
- """x.__getslice__(i, j) <==> x[i:j]
-
- Use of negative indices is not supported.
- """
-
def descr_gt(self, space, w_other):
"""x.__gt__(y) <==> x>y"""
@@ -77,9 +63,6 @@
def descr_lt(self, space, w_other):
"""x.__lt__(y) <==> x<y"""
- def descr_mod(self, space, w_values):
- """x.__mod__(y) <==> x%y"""
-
def descr_mul(self, space, w_times):
"""x.__mul__(n) <==> x*n"""
@@ -132,17 +115,6 @@
able to handle UnicodeDecodeErrors.
"""
- def descr_encode(self, space, w_encoding=None, w_errors=None):
- """S.encode(encoding=None, errors='strict') -> object
-
- Encode S using the codec registered for encoding. encoding defaults
- to the default encoding. errors may be given to set a different error
- handling scheme. Default is 'strict' meaning that encoding errors raise
- a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and
- 'xmlcharrefreplace' as well as any other name registered with
- codecs.register_error that is able to handle UnicodeEncodeErrors.
- """
-
def descr_endswith(self, space, w_suffix, w_start=None, w_end=None):
"""S.endswith(suffix[, start[, end]]) -> bool
@@ -170,13 +142,6 @@
Return -1 on failure.
"""
- def descr_format(self, space, __args__):
- """S.format(*args, **kwargs) -> string
-
- Return a formatted version of S, using substitutions from args and
kwargs.
- The substitutions are identified by braces ('{' and '}').
- """
-
def descr_index(self, space, w_sub, w_start=None, w_end=None):
"""S.index(sub[, start[, end]]) -> int
@@ -511,47 +476,46 @@
return space.newlist_str(lst)
@staticmethod
- @unwrap_spec(w_object = WrappedDefault(""))
- def descr_new(space, w_stringtype, w_object):
- # NB. the default value of w_object is really a *wrapped* empty string:
- # there is gateway magic at work
- w_obj = space.str(w_object)
- if space.is_w(w_stringtype, space.w_str):
- return w_obj # XXX might be reworked when space.str() typechecks
- value = space.str_w(w_obj)
+ @unwrap_spec(encoding='str_or_None', errors='str_or_None')
+ def descr_new(space, w_stringtype, w_source=None, encoding=None,
+ errors=None):
+ if (w_source and space.is_w(space.type(w_source), space.w_bytes) and
+ space.is_w(w_stringtype, space.w_bytes)):
+ return w_source
+ value = ''.join(newbytesdata_w(space, w_source, encoding, errors))
w_obj = space.allocate_instance(W_BytesObject, w_stringtype)
W_BytesObject.__init__(w_obj, value)
return w_obj
+ @staticmethod
+ def descr_fromhex(space, w_type, w_hexstring):
+ r"""bytes.fromhex(string) -> bytes
+
+ Create a bytes object from a string of hexadecimal numbers.
+ Spaces between two numbers are accepted.
+ Example: bytes.fromhex('B9 01EF') -> b'\xb9\x01\xef'.
+ """
+ if not space.is_w(space.type(w_hexstring), space.w_unicode):
+ raise operationerrfmt(space.w_TypeError, "must be str, not %T",
+ w_hexstring)
+ from pypy.objspace.std.bytearrayobject import _hexstring_to_array
+ hexstring = space.unicode_w(w_hexstring)
+ bytes = ''.join(_hexstring_to_array(space, hexstring))
+ return W_BytesObject(bytes)
+
def descr_repr(self, space):
- s = self._value
- quote = "'"
- if quote in s and '"' not in s:
- quote = '"'
- return space.wrap(string_escape_encode(s, quote))
+ return space.wrap(string_escape_encode(self._value, True))
def descr_str(self, space):
- if type(self) is W_BytesObject:
- return self
- return wrapstr(space, self._value)
+ if space.sys.get_flag('bytes_warning'):
+ space.warn(space.wrap("str() on a bytes instance"),
+ space.w_BytesWarning)
+ return self.descr_repr(space)
def descr_hash(self, space):
x = compute_hash(self._value)
return space.wrap(x)
- def descr_format(self, space, __args__):
- return newformat.format_method(space, self, __args__, is_unicode=False)
-
- def descr__format__(self, space, w_format_spec):
- if not space.isinstance_w(w_format_spec, space.w_str):
- w_format_spec = space.str(w_format_spec)
- spec = space.str_w(w_format_spec)
- formatter = newformat.str_formatter(space, spec)
- return formatter.format_string(self._value)
-
- def descr_mod(self, space, w_values):
- return mod_format(space, self, w_values, do_unicode=False)
-
def descr_buffer(self, space):
return space.wrap(StringBuffer(self._value))
@@ -613,10 +577,7 @@
_StringMethods_descr_add = descr_add
def descr_add(self, space, w_other):
- if space.isinstance_w(w_other, space.w_unicode):
- self_as_unicode = unicode_from_encoded_object(space, self, None,
None)
- return space.add(self_as_unicode, w_other)
- elif space.isinstance_w(w_other, space.w_bytearray):
+ if space.isinstance_w(w_other, space.w_bytearray):
# XXX: eliminate double-copy
from .bytearrayobject import W_BytearrayObject, _make_data
self_as_bytearray = W_BytearrayObject(_make_data(self._value))
@@ -635,51 +596,23 @@
return W_StringBufferObject(builder)
return self._StringMethods_descr_add(space, w_other)
- _StringMethods__startswith = _startswith
- def _startswith(self, space, value, w_prefix, start, end):
- if space.isinstance_w(w_prefix, space.w_unicode):
- self_as_unicode = unicode_from_encoded_object(space, self, None,
None)
- return self_as_unicode._startswith(space, self_as_unicode._value,
w_prefix, start, end)
- return self._StringMethods__startswith(space, value, w_prefix, start,
end)
-
- _StringMethods__endswith = _endswith
- def _endswith(self, space, value, w_suffix, start, end):
- if space.isinstance_w(w_suffix, space.w_unicode):
- self_as_unicode = unicode_from_encoded_object(space, self, None,
None)
- return self_as_unicode._endswith(space, self_as_unicode._value,
w_suffix, start, end)
- return self._StringMethods__endswith(space, value, w_suffix, start,
end)
-
_StringMethods_descr_contains = descr_contains
def descr_contains(self, space, w_sub):
- if space.isinstance_w(w_sub, space.w_unicode):
- from pypy.objspace.std.unicodeobject import W_UnicodeObject
- assert isinstance(w_sub, W_UnicodeObject)
- self_as_unicode = unicode_from_encoded_object(space, self, None,
None)
- return space.newbool(self_as_unicode._value.find(w_sub._value) >=
0)
+ if space.isinstance_w(w_sub, space.w_int):
+ try:
+ char = space.int_w(w_sub)
+ except OperationError as e:
+ if e.match(space, space.w_OverflowError):
+ char = 256 # arbitrary value which will trigger the
ValueError
+ # condition below
+ else:
+ raise
+ if not 0 <= char < 256:
+ raise operationerrfmt(space.w_ValueError,
+ "character must be in range(256)")
+ return space.newbool(self._value.find(chr(char)) >= 0)
return self._StringMethods_descr_contains(space, w_sub)
- _StringMethods_descr_replace = descr_replace
- @unwrap_spec(count=int)
- def descr_replace(self, space, w_old, w_new, count=-1):
- old_is_unicode = space.isinstance_w(w_old, space.w_unicode)
- new_is_unicode = space.isinstance_w(w_new, space.w_unicode)
- if old_is_unicode or new_is_unicode:
- self_as_uni = unicode_from_encoded_object(space, self, None, None)
- if not old_is_unicode:
- w_old = unicode_from_encoded_object(space, w_old, None, None)
- if not new_is_unicode:
- w_new = unicode_from_encoded_object(space, w_new, None, None)
- input = self_as_uni._val(space)
- sub = self_as_uni._op_val(space, w_old)
- by = self_as_uni._op_val(space, w_new)
- try:
- res = replace(input, sub, by, count)
- except OverflowError:
- raise OperationError(space.w_OverflowError,
- space.wrap("replace string is too long"))
- return self_as_uni._new(res)
- return self._StringMethods_descr_replace(space, w_old, w_new, count)
-
def descr_lower(self, space):
return W_BytesObject(self._value.lower())
@@ -687,32 +620,16 @@
return W_BytesObject(self._value.upper())
def _join_return_one(self, space, w_obj):
- return (space.is_w(space.type(w_obj), space.w_str) or
- space.is_w(space.type(w_obj), space.w_unicode))
+ return space.is_w(space.type(w_obj), space.w_str)
def _join_check_item(self, space, w_obj):
- if space.isinstance_w(w_obj, space.w_str):
- return 0
- if space.isinstance_w(w_obj, space.w_unicode):
- return 2
- return 1
-
- def _join_autoconvert(self, space, list_w):
- # we need to rebuild w_list here, because the original
- # w_list might be an iterable which we already consumed
- w_list = space.newlist(list_w)
- w_u = space.call_function(space.w_unicode, self)
- return space.call_method(w_u, "join", w_list)
-
- def descr_formatter_parser(self, space):
- from pypy.objspace.std.newformat import str_template_formatter
- tformat = str_template_formatter(space, space.str_w(self))
- return tformat.formatter_parser()
-
- def descr_formatter_field_name_split(self, space):
- from pypy.objspace.std.newformat import str_template_formatter
- tformat = str_template_formatter(space, space.str_w(self))
- return tformat.formatter_field_name_split()
+ try:
+ self._op_val(space, w_obj)
+ except OperationError as e:
+ if not e.match(space, space.w_TypeError):
+ raise
+ return True
+ return False
def _create_list_from_string(value):
@@ -748,13 +665,103 @@
return W_BytesObject(c)
+def getbytevalue(space, w_value):
+ value = space.getindex_w(w_value, None)
+ if not 0 <= value < 256:
+ # this includes the OverflowError in case the long is too large
+ raise OperationError(space.w_ValueError, space.wrap(
+ "byte must be in range(0, 256)"))
+ return chr(value)
+
+def newbytesdata_w(space, w_source, encoding, errors):
+ # None value
+ if w_source is None:
+ if encoding is not None or errors is not None:
+ raise OperationError(space.w_TypeError, space.wrap(
+ "encoding or errors without string argument"))
+ return []
+ # Is it an int?
+ try:
+ count = space.int_w(w_source)
+ except OperationError, e:
+ if not e.match(space, space.w_TypeError):
+ raise
+ else:
+ if count < 0:
+ raise OperationError(space.w_ValueError,
+ space.wrap("negative count"))
+ if encoding is not None or errors is not None:
+ raise OperationError(space.w_TypeError, space.wrap(
+ "encoding or errors without string argument"))
+ return ['\0'] * count
+ # Unicode with encoding
+ if space.isinstance_w(w_source, space.w_unicode):
+ if encoding is None:
+ raise OperationError(space.w_TypeError, space.wrap(
+ "string argument without an encoding"))
+ from pypy.objspace.std.unicodeobject import encode_object
+ w_source = encode_object(space, w_source, encoding, errors)
+ # and continue with the encoded string
+
+ return makebytesdata_w(space, w_source)
+
+def makebytesdata_w(space, w_source):
+ w_bytes_method = space.lookup(w_source, "__bytes__")
+ if w_bytes_method is not None:
+ w_bytes = space.get_and_call_function(w_bytes_method, w_source)
+ if not space.isinstance_w(w_bytes, space.w_bytes):
+ msg = "__bytes__ returned non-bytes (type '%T')"
+ raise operationerrfmt(space.w_TypeError, msg, w_bytes)
+ return [c for c in space.bytes_w(w_bytes)]
+
+ # String-like argument
+ try:
+ string = space.bufferstr_new_w(w_source)
+ except OperationError, e:
+ if not e.match(space, space.w_TypeError):
+ raise
+ else:
+ return [c for c in string]
+
+ if space.isinstance_w(w_source, space.w_unicode):
+ raise OperationError(
+ space.w_TypeError,
+ space.wrap("cannot convert unicode object to bytes"))
+
+ # sequence of bytes
+ w_iter = space.iter(w_source)
+ length_hint = space.length_hint(w_source, 0)
+ data = newlist_hint(length_hint)
+ extended = 0
+ while True:
+ try:
+ w_item = space.next(w_iter)
+ except OperationError, e:
+ if not e.match(space, space.w_StopIteration):
+ raise
+ break
+ value = getbytevalue(space, w_item)
+ data.append(value)
+ extended += 1
+ if extended < length_hint:
+ resizelist_hint(data, extended)
+ return data
+
+
W_BytesObject.typedef = StdTypeDef(
"bytes",
__new__ = interp2app(W_BytesObject.descr_new),
- __doc__ = """str(object='') -> string
+ __doc__ = """bytes(iterable_of_ints) -> bytes
+ bytes(string, encoding[, errors]) -> bytes
+ bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer
+ bytes(int) -> bytes object of size given by the parameter initialized with
null bytes
+ bytes() -> empty bytes object
- Return a nice string representation of the object.
- If the argument is a string, the return value is the same object.
+ Construct an immutable array of bytes from:
+ - an iterable yielding integers in range(256)
+ - a text string encoded using the specified encoding
+ - any object implementing the buffer API.
+ - an integer
""",
__repr__ = interpindirect2app(W_AbstractBytesObject.descr_repr),
@@ -776,13 +783,11 @@
__rmul__ = interpindirect2app(W_AbstractBytesObject.descr_rmul),
__getitem__ = interpindirect2app(W_AbstractBytesObject.descr_getitem),
- __getslice__ = interpindirect2app(W_AbstractBytesObject.descr_getslice),
capitalize = interpindirect2app(W_AbstractBytesObject.descr_capitalize),
center = interpindirect2app(W_AbstractBytesObject.descr_center),
count = interpindirect2app(W_AbstractBytesObject.descr_count),
decode = interpindirect2app(W_AbstractBytesObject.descr_decode),
- encode = interpindirect2app(W_AbstractBytesObject.descr_encode),
expandtabs = interpindirect2app(W_AbstractBytesObject.descr_expandtabs),
find = interpindirect2app(W_AbstractBytesObject.descr_find),
rfind = interpindirect2app(W_AbstractBytesObject.descr_rfind),
@@ -816,14 +821,11 @@
upper = interpindirect2app(W_AbstractBytesObject.descr_upper),
zfill = interpindirect2app(W_AbstractBytesObject.descr_zfill),
- format = interpindirect2app(W_BytesObject.descr_format),
- __format__ = interpindirect2app(W_BytesObject.descr__format__),
- __mod__ = interpindirect2app(W_BytesObject.descr_mod),
__buffer__ = interpindirect2app(W_AbstractBytesObject.descr_buffer),
__getnewargs__ =
interpindirect2app(W_AbstractBytesObject.descr_getnewargs),
- _formatter_parser = interp2app(W_BytesObject.descr_formatter_parser),
- _formatter_field_name_split =
- interp2app(W_BytesObject.descr_formatter_field_name_split),
+
+ fromhex = interp2app(W_BytesObject.descr_fromhex, as_classmethod=True),
+ maketrans = interp2app(W_BytesObject.descr_maketrans, as_classmethod=True),
)
diff --git a/pypy/objspace/std/stringmethods.py
b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -1,7 +1,7 @@
from pypy.interpreter.error import OperationError, operationerrfmt
from pypy.interpreter.gateway import unwrap_spec, WrappedDefault
from pypy.objspace.std import slicetype
-from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice
+from pypy.objspace.std.sliceobject import W_SliceObject
from rpython.rlib import jit
from rpython.rlib.objectmodel import specialize
from rpython.rlib.rarithmetic import ovfcheck
@@ -24,6 +24,32 @@
space, lenself, w_start, w_end, upper_bound=upper_bound)
return (value, start, end)
+ @staticmethod
+ def descr_maketrans(space, w_type, w_from, w_to):
+ """B.maketrans(frm, to) -> translation table
+
+ Return a translation table (a bytes object of length 256) suitable
+ for use in the bytes or bytearray translate method where each byte
+ in frm is mapped to the byte at the same position in to.
+ The bytes objects frm and to must be of the same length.
+ """
+ from pypy.objspace.std.bytesobject import makebytesdata_w, wrapstr
+
+ base_table = [chr(i) for i in range(256)]
+ list_from = makebytesdata_w(space, w_from)
+ list_to = makebytesdata_w(space, w_to)
+
+ if len(list_from) != len(list_to):
+ raise operationerrfmt(space.w_ValueError,
+ "maketrans arguments must have same length")
+
+ for i in range(len(list_from)):
+ pos_from = ord(list_from[i])
+ char_to = list_to[i]
+ base_table[pos_from] = char_to
+
+ return wrapstr(space, ''.join(base_table))
+
def descr_len(self, space):
return space.wrap(self._len())
@@ -90,21 +116,13 @@
if index < 0 or index >= selflen:
raise OperationError(space.w_IndexError,
space.wrap("string index out of range"))
+ from pypy.objspace.std.bytesobject import W_BytesObject
from pypy.objspace.std.bytearrayobject import W_BytearrayObject
- if isinstance(self, W_BytearrayObject):
+ if isinstance(self, W_BytesObject) or isinstance(self,
W_BytearrayObject):
return space.wrap(ord(selfvalue[index]))
#return wrapchar(space, selfvalue[index])
return self._new(selfvalue[index])
- def descr_getslice(self, space, w_start, w_stop):
- selfvalue = self._val(space)
- start, stop = normalize_simple_slice(space, len(selfvalue), w_start,
- w_stop)
- if start == stop:
- return self._empty()
- else:
- return self._sliced(space, selfvalue, start, stop, self)
-
def descr_capitalize(self, space):
value = self._val(space)
if len(value) == 0:
@@ -139,19 +157,11 @@
return space.newint(value.count(self._op_val(space, w_sub), start,
end))
def descr_decode(self, space, w_encoding=None, w_errors=None):
- from pypy.objspace.std.unicodeobject import _get_encoding_and_errors, \
- unicode_from_string, decode_object
+ from pypy.objspace.std.unicodeobject import (
+ _get_encoding_and_errors, decode_object)
encoding, errors = _get_encoding_and_errors(space, w_encoding,
w_errors)
- if encoding is None and errors is None:
- return unicode_from_string(space, self)
return decode_object(space, self, encoding, errors)
- def descr_encode(self, space, w_encoding=None, w_errors=None):
- from pypy.objspace.std.unicodeobject import _get_encoding_and_errors, \
- encode_object
- encoding, errors = _get_encoding_and_errors(space, w_encoding,
w_errors)
- return encode_object(space, self, encoding, errors)
-
@unwrap_spec(tabsize=int)
def descr_expandtabs(self, space, tabsize=8):
value = self._val(space)
@@ -175,6 +185,9 @@
def _tabindent(self, token, tabsize):
"calculates distance behind the token to the next tabstop"
+ if tabsize <= 0:
+ return tabsize
+
distance = tabsize
if token:
distance = 0
@@ -305,16 +318,9 @@
return space.newbool(cased)
def descr_join(self, space, w_list):
- from pypy.objspace.std.bytesobject import W_BytesObject
from pypy.objspace.std.unicodeobject import W_UnicodeObject
- if isinstance(self, W_BytesObject):
- l = space.listview_str(w_list)
- if l is not None:
- if len(l) == 1:
- return space.wrap(l[0])
- return space.wrap(self._val(space).join(l))
- elif isinstance(self, W_UnicodeObject):
+ if isinstance(self, W_UnicodeObject):
l = space.listview_unicode(w_list)
if l is not None:
if len(l) == 1:
@@ -343,14 +349,11 @@
prealloc_size = len(value) * (size - 1)
for i in range(size):
w_s = list_w[i]
- check_item = self._join_check_item(space, w_s)
- if check_item == 1:
+ if self._join_check_item(space, w_s):
raise operationerrfmt(
space.w_TypeError,
- "sequence item %d: expected string, %s "
- "found", i, space.type(w_s).getname(space))
- elif check_item == 2:
- return self._join_autoconvert(space, list_w)
+ "sequence item %d: expected %s, %T found",
+ i, self._generic_name(), w_s)
prealloc_size += len(self._op_val(space, w_s))
sb = self._builder(prealloc_size)
@@ -360,9 +363,6 @@
sb.append(self._op_val(space, list_w[i]))
return self._new(sb.build())
- def _join_autoconvert(self, space, list_w):
- assert False, 'unreachable'
-
@unwrap_spec(width=int, w_fillchar=WrappedDefault(' '))
def descr_ljust(self, space, width, w_fillchar):
value = self._val(space)
@@ -505,6 +505,9 @@
strs.append(value[pos:length])
return self._newlist_unwrapped(space, strs)
+ def _generic_name(self):
+ return "bytes"
+
def descr_startswith(self, space, w_prefix, w_start=None, w_end=None):
(value, start, end) = self._convert_idx_params(space, w_start, w_end,
True)
@@ -514,13 +517,15 @@
return space.w_True
return space.w_False
try:
- return space.newbool(self._startswith(space, value, w_prefix,
start, end))
+ res = self._startswith(space, value, w_prefix, start, end)
except OperationError as e:
- if e.match(space, space.w_TypeError):
- msg = ("startswith first arg must be str or a tuple of str, "
- "not %T")
- raise operationerrfmt(space.w_TypeError, msg, w_prefix)
- raise
+ if not e.match(space, space.w_TypeError):
+ raise
+ wanted = self._generic_name()
+ raise operationerrfmt(space.w_TypeError,
+ "startswith first arg must be %s or a tuple "
+ "of %s, not %T", wanted, wanted, w_prefix)
+ return space.newbool(res)
def _startswith(self, space, value, w_prefix, start, end):
return startswith(value, self._op_val(space, w_prefix), start, end)
@@ -535,14 +540,15 @@
return space.w_True
return space.w_False
try:
- return space.newbool(self._endswith(space, value, w_suffix, start,
- end))
+ res = self._endswith(space, value, w_suffix, start, end)
except OperationError as e:
- if e.match(space, space.w_TypeError):
- msg = ("endswith first arg must be str or a tuple of str, not "
- "%T")
- raise operationerrfmt(space.w_TypeError, msg, w_suffix)
- raise
+ if not e.match(space, space.w_TypeError):
+ raise
+ wanted = self._generic_name()
+ raise operationerrfmt(space.w_TypeError,
+ "endswith first arg must be %s or a tuple "
+ "of %s, not %T", wanted, wanted, w_suffix)
+ return space.newbool(res)
def _endswith(self, space, value, w_prefix, start, end):
return endswith(value, self._op_val(space, w_prefix), start, end)
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -109,6 +109,9 @@
_builder = UnicodeBuilder
+ def _generic_name(self):
+ return "str"
+
def _isupper(self, ch):
return unicodedb.isupper(ord(ch))
@@ -178,16 +181,10 @@
@staticmethod
def descr_maketrans(space, w_type, w_x, w_y=None, w_z=None):
- if space.is_none(w_y):
- y = None
- else:
- y = space.unicode_w(w_y)
- if space.is_none(w_z):
- z = None
- else:
- z = space.unicode_w(w_z)
+ y = None if space.is_none(w_y) else space.unicode_w(w_y)
+ z = None if space.is_none(w_z) else space.unicode_w(w_z)
+ w_new = space.newdict()
- w_new = space.newdict()
if y is not None:
# x must be a string too, of equal length
ylen = len(y)
@@ -362,9 +359,9 @@
elif space.isinstance_w(w_newval, space.w_unicode):
result.append(space.unicode_w(w_newval))
else:
- raise OperationError(
+ raise operationerrfmt(
space.w_TypeError,
- space.wrap("character mapping must return integer,
None or unicode"))
+ "character mapping must return integer, None or str")
return W_UnicodeObject(u''.join(result))
def descr_encode(self, space, w_encoding=None, w_errors=None):
@@ -375,10 +372,7 @@
return space.is_w(space.type(w_obj), space.w_unicode)
def _join_check_item(self, space, w_obj):
- if (space.isinstance_w(w_obj, space.w_str) or
- space.isinstance_w(w_obj, space.w_unicode)):
- return 0
- return 1
+ return not space.isinstance_w(w_obj, space.w_unicode)
def descr_isdecimal(self, space):
return self._is_generic(space, '_isdecimal')
@@ -415,6 +409,17 @@
return space.w_False
return space.w_True
+ def _fix_fillchar(func):
+ # XXX: hack
+ from rpython.tool.sourcetools import func_with_new_name
+ func = func_with_new_name(func, func.__name__)
+ func.unwrap_spec = func.unwrap_spec.copy()
+ func.unwrap_spec['w_fillchar'] = WrappedDefault(u' ')
+ return func
+
+ descr_center = _fix_fillchar(StringMethods.descr_center)
+ descr_ljust = _fix_fillchar(StringMethods.descr_ljust)
+ descr_rjust = _fix_fillchar(StringMethods.descr_rjust)
def wrapunicode(space, uni):
return W_UnicodeObject(uni)
@@ -530,17 +535,11 @@
def unicode_from_encoded_object(space, w_obj, encoding, errors):
- # explicitly block bytearray on 2.7
- from .bytearrayobject import W_BytearrayObject
- if isinstance(w_obj, W_BytearrayObject):
- raise OperationError(space.w_TypeError,
- space.wrap("decoding bytearray is not supported"))
-
w_retval = decode_object(space, w_obj, encoding, errors)
if not space.isinstance_w(w_retval, space.w_unicode):
raise operationerrfmt(space.w_TypeError,
- "decoder did not return an unicode object (type '%s')",
- space.type(w_retval).getname(space))
+ "decoder did not return a str object (type '%T')",
+ w_retval)
assert isinstance(w_retval, W_UnicodeObject)
return w_retval
@@ -840,19 +839,6 @@
If chars is a str, it will be converted to unicode before stripping
"""
- def maketrans():
- """str.maketrans(x[, y[, z]]) -> dict (static method)
-
- Return a translation table usable for str.translate().
- If there is only one argument, it must be a dictionary mapping Unicode
- ordinals (integers) or characters to Unicode ordinals, strings or None.
- Character keys will be then converted to ordinals.
- If there are two arguments, they must be strings of equal length, and
- in the resulting dictionary, each character in x will be mapped to the
- character at the same position in y. If there is a third argument, it
- must be a string, whose characters will be mapped to None in the
result.
- """
-
def partition():
"""S.partition(sep) -> (head, sep, tail)
@@ -1126,8 +1112,7 @@
__getnewargs__ = interp2app(W_UnicodeObject.descr_getnewargs,
doc=UnicodeDocstrings.__getnewargs__.__doc__),
maketrans = interp2app(W_UnicodeObject.descr_maketrans,
- as_classmethod=True,
- doc=UnicodeDocstrings.maketrans.__doc__)
+ as_classmethod=True),
)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit