Author: Amaury Forgeot d'Arc <[email protected]>
Branch: py3k
Changeset: r47999:ef504fcb59bb
Date: 2011-10-12 22:19 +0200
http://bitbucket.org/pypy/pypy/changeset/ef504fcb59bb/
Log: space.wrap("xxx") now returns a unicode string!
space.str_w(w_someunicode) return a RPython bytestring! (use
space.wrapbytes and space.bytes_w to get the previous behavior)
diff --git a/pypy/interpreter/astcompiler/ast.py
b/pypy/interpreter/astcompiler/ast.py
--- a/pypy/interpreter/astcompiler/ast.py
+++ b/pypy/interpreter/astcompiler/ast.py
@@ -1758,6 +1758,7 @@
_col_offset_mask = 8
def __init__(self, id, ctx, lineno, col_offset):
+ assert isinstance(id, str)
self.id = id
self.ctx = ctx
expr.__init__(self, lineno, col_offset)
diff --git a/pypy/interpreter/astcompiler/astbuilder.py
b/pypy/interpreter/astcompiler/astbuilder.py
--- a/pypy/interpreter/astcompiler/astbuilder.py
+++ b/pypy/interpreter/astcompiler/astbuilder.py
@@ -503,6 +503,8 @@
return name
def handle_arguments(self, arguments_node):
+ # This function handles both typedargslist (function definition)
+ # and varargslist (lambda definition).
if arguments_node.type == syms.parameters:
if len(arguments_node.children) == 2:
return ast.arguments(None, None, None, None)
@@ -517,7 +519,7 @@
while i < child_count:
argument = arguments_node.children[i]
arg_type = argument.type
- if arg_type == syms.tfpdef:
+ if arg_type == syms.tfpdef or arg_type == syms.vfpdef:
parenthesized = False
complex_args = False
while True:
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -188,13 +188,13 @@
# -------------------------------------------------------------------
- def str_w(self, space):
- w_msg = typed_unwrap_error_msg(space, "string", self)
+ def bytes_w(self, space):
+ w_msg = typed_unwrap_error_msg(space, "bytes", self)
raise OperationError(space.w_TypeError, w_msg)
def unicode_w(self, space):
raise OperationError(space.w_TypeError,
- typed_unwrap_error_msg(space, "unicode", self))
+ typed_unwrap_error_msg(space, "string", self))
def int_w(self, space):
raise OperationError(space.w_TypeError,
@@ -1233,7 +1233,10 @@
return self.str_w(w_obj)
def str_w(self, w_obj):
- return w_obj.str_w(self)
+ return self.unicode_w(w_obj).encode('ascii')
+
+ def bytes_w(self, w_obj):
+ return w_obj.bytes_w(self)
def int_w(self, w_obj):
return w_obj.int_w(self)
@@ -1561,7 +1564,7 @@
ObjSpace.IrregularOpTable = [
'wrap',
- 'str_w',
+ 'bytes_w',
'int_w',
'float_w',
'uint_w',
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -66,7 +66,7 @@
exc_value = str(w_value)
else:
w = space.wrap
- if space.is_w(space.type(self.w_type), space.w_str):
+ if space.is_w(space.type(self.w_type), space.w_text):
exc_typename = space.str_w(self.w_type)
else:
exc_typename = space.str_w(
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -721,7 +721,6 @@
def IMPORT_NAME(self, nameindex, next_instr):
space = self.space
w_modulename = self.getname_w(nameindex)
- modulename = self.space.str_w(w_modulename)
w_fromlist = self.popvalue()
w_flag = self.popvalue()
@@ -739,7 +738,6 @@
w_locals = self.w_locals
if w_locals is None: # CPython does this
w_locals = space.w_None
- w_modulename = space.wrap(modulename)
w_globals = self.w_globals
if w_flag is None:
w_obj = space.call_function(w_import, w_modulename, w_globals,
diff --git a/pypy/interpreter/pyparser/parsestring.py
b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -13,6 +13,8 @@
rawmode = False
unicode = True
+ assert isinstance(s, str)
+
# string decoration handling
o = ord(quote)
isalpha = (o>=97 and o<=122) or (o>=65 and o<=90)
@@ -95,13 +97,13 @@
w_v = unicodehelper.PyUnicode_AsEncodedString(space, w_u,
space.wrap(encoding))
return w_v
else:
- return space.wrap(substr)
+ return space.wrapbytes(substr)
enc = None
if need_encoding:
enc = encoding
v = PyString_DecodeEscape(space, substr, enc)
- return space.wrap(v)
+ return space.wrapbytes(v)
def hexbyte(val):
result = "%x" % val
diff --git a/pypy/interpreter/pyparser/pyparse.py
b/pypy/interpreter/pyparser/pyparse.py
--- a/pypy/interpreter/pyparser/pyparse.py
+++ b/pypy/interpreter/pyparser/pyparse.py
@@ -4,14 +4,16 @@
from pypy.interpreter.astcompiler import consts
-_recode_to_utf8 = gateway.applevel(r'''
- def _recode_to_utf8(text, encoding):
- return unicode(text, encoding).encode("utf-8")
-''').interphook('_recode_to_utf8')
+def decode_source(space, bytes, encoding=None):
+ if encoding is None:
+ encoding = 'utf-8'
+ if encoding == 'utf-8':
+ return bytes
+ text = space.unicode_w(space.call_function(space.w_unicode,
+ space.wrapbytes(bytes),
+ space.wrap(encoding)))
+ return text.encode('utf-8')
-def recode_to_utf8(space, text, encoding):
- return space.str_w(_recode_to_utf8(space, space.wrap(text),
- space.wrap(encoding)))
def _normalize_encoding(encoding):
"""returns normalized name for <encoding>
@@ -94,7 +96,7 @@
self.space = space
self.future_flags = future_flags
- def parse_source(self, textsrc, compile_info):
+ def parse_source(self, bytessrc, compile_info):
"""Main entry point for parsing Python source.
Everything from decoding the source to tokenizing to building the parse
@@ -102,38 +104,39 @@
"""
# Detect source encoding.
enc = None
- if textsrc.startswith("\xEF\xBB\xBF"):
- textsrc = textsrc[3:]
+ if bytessrc.startswith("\xEF\xBB\xBF"):
+ bytessrc = bytessrc[3:]
enc = 'utf-8'
# If an encoding is explicitly given check that it is utf-8.
- decl_enc = _check_for_encoding(textsrc)
+ decl_enc = _check_for_encoding(bytessrc)
if decl_enc and decl_enc != "utf-8":
raise error.SyntaxError("UTF-8 BOM with non-utf8 coding
cookie",
filename=compile_info.filename)
+ textsrc = decode_source(self.space, bytessrc, enc)
elif compile_info.flags & consts.PyCF_SOURCE_IS_UTF8:
enc = 'utf-8'
- if _check_for_encoding(textsrc) is not None:
+ if _check_for_encoding(bytessrc) is not None:
raise error.SyntaxError("coding declaration in unicode string",
filename=compile_info.filename)
+ textsrc = decode_source(self.space, bytessrc, enc)
else:
- enc = _normalize_encoding(_check_for_encoding(textsrc))
- if enc is not None and enc not in ('utf-8', 'iso-8859-1'):
- try:
- textsrc = recode_to_utf8(self.space, textsrc, enc)
- except OperationError, e:
- # if the codec is not found, LookupError is raised. we
- # check using 'is_w' not to mask potential IndexError or
- # KeyError
- space = self.space
- if e.match(space, space.w_LookupError):
- raise error.SyntaxError("Unknown encoding: %s" % enc,
- filename=compile_info.filename)
- # Transform unicode errors into SyntaxError
- if e.match(space, space.w_UnicodeDecodeError):
- e.normalize_exception(space)
- w_message = space.str(e.get_w_value(space))
- raise error.SyntaxError(space.str_w(w_message))
- raise
+ enc = _normalize_encoding(_check_for_encoding(bytessrc))
+ try:
+ textsrc = decode_source(self.space, bytessrc, enc)
+ except OperationError, e:
+ # if the codec is not found, LookupError is raised. we
+ # check using 'is_w' not to mask potential IndexError or
+ # KeyError
+ space = self.space
+ if e.match(space, space.w_LookupError):
+ raise error.SyntaxError("Unknown encoding: %s" % enc,
+ filename=compile_info.filename)
+ # Transform unicode errors into SyntaxError
+ if e.match(space, space.w_UnicodeDecodeError):
+ e.normalize_exception(space)
+ w_message = space.str(e.get_w_value(space))
+ raise error.SyntaxError(space.text_w(w_message))
+ raise
f_flags, future_info = future.get_futures(self.future_flags, textsrc)
compile_info.last_future_import = future_info
diff --git a/pypy/module/__builtin__/compiling.py
b/pypy/module/__builtin__/compiling.py
--- a/pypy/module/__builtin__/compiling.py
+++ b/pypy/module/__builtin__/compiling.py
@@ -26,18 +26,14 @@
ast_node = None
w_ast_type = space.gettypeobject(ast.AST.typedef)
- str_ = None
+ source_str = None
if space.is_true(space.isinstance(w_source, w_ast_type)):
ast_node = space.interp_w(ast.mod, w_source)
ast_node.sync_app_attrs(space)
- elif space.is_true(space.isinstance(w_source, space.w_unicode)):
- w_utf_8_source = space.call_method(w_source, "encode",
- space.wrap("utf-8"))
- str_ = space.str_w(w_utf_8_source)
+ else:
+ source_str = space.str_w(w_source)
# This flag tells the parser to reject any coding cookies it sees.
flags |= consts.PyCF_SOURCE_IS_UTF8
- else:
- str_ = space.str_w(w_source)
ec = space.getexecutioncontext()
if flags & ~(ec.compiler.compiler_flags | consts.PyCF_ONLY_AST |
@@ -56,10 +52,10 @@
if ast_node is None:
if flags & consts.PyCF_ONLY_AST:
- mod = ec.compiler.compile_to_ast(str_, filename, mode, flags)
+ mod = ec.compiler.compile_to_ast(source_str, filename, mode, flags)
return space.wrap(mod)
else:
- code = ec.compiler.compile(str_, filename, mode, flags)
+ code = ec.compiler.compile(source_str, filename, mode, flags)
else:
code = ec.compiler.compile_ast(ast_node, filename, mode, flags)
return space.wrap(code)
diff --git a/pypy/module/__builtin__/operation.py
b/pypy/module/__builtin__/operation.py
--- a/pypy/module/__builtin__/operation.py
+++ b/pypy/module/__builtin__/operation.py
@@ -47,8 +47,8 @@
# space.{get,set,del}attr()...
# Note that if w_name is already a string (or a subclass of str),
# it must be returned unmodified (and not e.g. unwrapped-rewrapped).
- if not space.is_w(space.type(w_name), space.w_str):
- name = space.str_w(w_name) # typecheck
+ if not space.is_w(space.type(w_name), space.w_text):
+ name = space.text_w(w_name) # typecheck
w_name = space.wrap(name) # rewrap as a real string
return w_name
diff --git a/pypy/module/_codecs/interp_codecs.py
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -386,7 +386,7 @@
state = space.fromcache(CodecState)
func = getattr(runicode, rname)
result = func(uni, len(uni), errors, state.encode_error_handler)
- return space.newtuple([space.wrap(result), space.wrap(len(uni))])
+ return space.newtuple([space.wrapbytes(result), space.wrap(len(uni))])
wrap_encoder.func_name = rname
globals()[name] = wrap_encoder
diff --git a/pypy/module/exceptions/interp_exceptions.py
b/pypy/module/exceptions/interp_exceptions.py
--- a/pypy/module/exceptions/interp_exceptions.py
+++ b/pypy/module/exceptions/interp_exceptions.py
@@ -627,11 +627,11 @@
def descr_init(self, space, w_encoding, w_object, w_start, w_end,
w_reason):
# typechecking
- space.realstr_w(w_encoding)
- space.realstr_w(w_object)
+ space.text_w(w_encoding)
+ space.str_w(w_object)
space.int_w(w_start)
space.int_w(w_end)
- space.realstr_w(w_reason)
+ space.text_w(w_reason)
# assign attributes
self.w_encoding = w_encoding
self.w_object = w_object
diff --git a/pypy/module/marshal/interp_marshal.py
b/pypy/module/marshal/interp_marshal.py
--- a/pypy/module/marshal/interp_marshal.py
+++ b/pypy/module/marshal/interp_marshal.py
@@ -28,7 +28,7 @@
by dump(data, file)."""
m = StringMarshaller(space, space.int_w(w_version))
m.dump_w_obj(w_data)
- return space.wrap(m.get_value())
+ return space.wrapbytes(m.get_value())
def load(space, w_f):
"""Read one value from the file 'f' and return it."""
diff --git a/pypy/module/sys/__init__.py b/pypy/module/sys/__init__.py
--- a/pypy/module/sys/__init__.py
+++ b/pypy/module/sys/__init__.py
@@ -42,7 +42,7 @@
'argv' : 'state.get(space).w_argv',
'py3kwarning' : 'space.w_False',
'warnoptions' : 'state.get(space).w_warnoptions',
- 'builtin_module_names' : 'state.w_None',
+ 'builtin_module_names' : 'space.w_None',
'pypy_getudir' : 'state.pypy_getudir', # not translated
'pypy_initial_path' : 'state.pypy_initial_path',
diff --git a/pypy/objspace/std/dictmultiobject.py
b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -431,7 +431,7 @@
def is_correct_type(self, w_obj):
space = self.space
- return space.is_w(space.type(w_obj), space.w_str)
+ return space.is_w(space.type(w_obj), space.w_text)
def get_empty_storage(self):
res = {}
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -68,6 +68,7 @@
w_type = self.gettypeobject(typedef)
self.builtin_types[typedef.name] = w_type
setattr(self, 'w_' + typedef.name, w_type)
+ self.w_text = self.w_unicode
self.builtin_types["NotImplemented"] = self.w_NotImplemented
self.builtin_types["Ellipsis"] = self.w_Ellipsis
@@ -149,6 +150,9 @@
assert typedef is not None
return self.fromcache(stdtypedef.TypeCache).getorbuild(typedef)
+ def wrapbytes(self, bytes):
+ return wrapstr(self, bytes)
+
def wrap(self, x):
"Wraps the Python value 'x' into one of the wrapper classes."
# You might notice that this function is rather conspicuously
@@ -170,7 +174,7 @@
else:
return self.newint(x)
if isinstance(x, str):
- return wrapstr(self, x)
+ return wrapunicode(self, x.decode('ascii'))
if isinstance(x, unicode):
return wrapunicode(self, x)
if isinstance(x, float):
diff --git a/pypy/objspace/std/ropeunicodeobject.py
b/pypy/objspace/std/ropeunicodeobject.py
--- a/pypy/objspace/std/ropeunicodeobject.py
+++ b/pypy/objspace/std/ropeunicodeobject.py
@@ -78,9 +78,6 @@
# for testing
return w_self._node.flatten_unicode()
- def str_w(w_self, space):
- return space.str_w(space.str(w_self))
-
def create_if_subclassed(w_self):
if type(w_self) is W_RopeUnicodeObject:
return w_self
diff --git a/pypy/objspace/std/strsliceobject.py
b/pypy/objspace/std/strsliceobject.py
--- a/pypy/objspace/std/strsliceobject.py
+++ b/pypy/objspace/std/strsliceobject.py
@@ -30,7 +30,7 @@
w_self.stop = len(str)
return str
- def str_w(w_self, space):
+ def bytes_w(w_self, space):
return w_self.force()
def __repr__(w_self):
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -39,9 +39,6 @@
return w_self
return W_UnicodeObject(w_self._value)
- def str_w(self, space):
- return space.str_w(space.str(self))
-
def unicode_w(self, space):
return self._value
diff --git a/pypy/objspace/std/unicodetype.py b/pypy/objspace/std/unicodetype.py
--- a/pypy/objspace/std/unicodetype.py
+++ b/pypy/objspace/std/unicodetype.py
@@ -25,7 +25,7 @@
space.w_UnicodeDecodeError,
space.newtuple([
space.wrap('ascii'),
- space.wrap(s),
+ space.wrapbytes(s),
space.wrap(i),
space.wrap(i+1),
space.wrap("ordinal not in range(128)")]))
@@ -191,7 +191,7 @@
startingpos, endingpos):
raise OperationError(space.w_UnicodeDecodeError,
space.newtuple([space.wrap(encoding),
- space.wrap(s),
+ space.wrapbytes(s),
space.wrap(startingpos),
space.wrap(endingpos),
space.wrap(msg)]))
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit