Author: Armin Rigo <[email protected]>
Branch: py3.5-marshal3
Changeset: r86609:4414cc2fc2f5
Date: 2016-08-27 17:27 +0200
http://bitbucket.org/pypy/pypy/changeset/4414cc2fc2f5/
Log: in-progress
diff --git a/pypy/module/imp/importing.py b/pypy/module/imp/importing.py
--- a/pypy/module/imp/importing.py
+++ b/pypy/module/imp/importing.py
@@ -228,7 +228,7 @@
# CPython + 7 = default_magic -- used by PyPy (incompatible!)
#
from pypy.interpreter.pycode import default_magic
-MARSHAL_VERSION_FOR_PYC = 2
+MARSHAL_VERSION_FOR_PYC = 3
def get_pyc_magic(space):
return default_magic
diff --git a/pypy/module/marshal/interp_marshal.py
b/pypy/module/marshal/interp_marshal.py
--- a/pypy/module/marshal/interp_marshal.py
+++ b/pypy/module/marshal/interp_marshal.py
@@ -4,24 +4,29 @@
from rpython.rlib import rstackovf
from pypy.objspace.std.marshal_impl import marshal, get_unmarshallers
+#
+# Write Python objects to files and read them back. This is primarily
+# intended for writing and reading compiled Python code, even though
+# dicts, lists, sets and frozensets, not commonly seen in code
+# objects, are supported. Version 3 of this protocol properly
+# supports circular links and sharing. The previous version is called
+# "2", like in Python 2.7, although it is not always compatible
+# between CPython 2.7 and CPython 3.x.
+#
+# XXX: before py3k, there was logic to do efficiently dump()/load() on
+# a file object. The corresponding logic is gone from CPython 3.x, so
+# I don't feel bad about killing it here too.
+#
-Py_MARSHAL_VERSION = 2
+Py_MARSHAL_VERSION = 3
+
@unwrap_spec(w_version=WrappedDefault(Py_MARSHAL_VERSION))
def dump(space, w_data, w_f, w_version):
"""Write the 'data' object into the open file 'f'."""
- # XXX: before py3k, we special-cased W_File to use a more performant
- # FileWriter class. Should we do the same for py3k? Look also at
- # DirectStreamWriter
- writer = FileWriter(space, w_f)
- try:
- # note: bound methods are currently not supported,
- # so we have to pass the instance in, instead.
- ##m = Marshaller(space, writer.write, space.int_w(w_version))
- m = Marshaller(space, writer, space.int_w(w_version))
- m.dump_w_obj(w_data)
- finally:
- writer.finished()
+ # same implementation as CPython 3.x.
+ w_string = dumps(space, w_data, w_version)
+ space.call_method(w_f, 'write', w_string)
@unwrap_spec(w_version=WrappedDefault(Py_MARSHAL_VERSION))
def dumps(space, w_data, w_version):
@@ -33,9 +38,6 @@
def load(space, w_f):
"""Read one value from the file 'f' and return it."""
- # XXX: before py3k, we special-cased W_File to use a more performant
- # FileWriter class. Should we do the same for py3k? Look also at
- # DirectStreamReader
reader = FileReader(space, w_f)
try:
u = Unmarshaller(space, reader)
@@ -68,22 +70,6 @@
def write(self, data):
raise NotImplementedError("Purely abstract method")
-class FileWriter(AbstractReaderWriter):
- def __init__(self, space, w_f):
- AbstractReaderWriter.__init__(self, space)
- try:
- self.func = space.getattr(w_f, space.wrap('write'))
- # XXX how to check if it is callable?
- except OperationError as e:
- if not e.match(space, space.w_AttributeError):
- raise
- raise oefmt(space.w_TypeError,
- "marshal.dump() 2nd arg must be file-like object")
-
- def write(self, data):
- space = self.space
- space.call_function(self.func, space.newbytes(data))
-
class FileReader(AbstractReaderWriter):
def __init__(self, space, w_f):
@@ -111,33 +97,6 @@
return ret
-class StreamReaderWriter(AbstractReaderWriter):
- def __init__(self, space, file):
- AbstractReaderWriter.__init__(self, space)
- self.file = file
- file.lock()
-
- def finished(self):
- self.file.unlock()
-
-class DirectStreamWriter(StreamReaderWriter):
- """
- XXX: this class is unused right now. Look at the comment in dump()
- """
- def write(self, data):
- self.file.do_direct_write(data)
-
-class DirectStreamReader(StreamReaderWriter):
- """
- XXX: this class is unused right now. Look at the comment in dump()
- """
- def read(self, n):
- data = self.file.direct_read(n)
- if len(data) < n:
- self.raise_eof()
- return data
-
-
class _Base(object):
def raise_exc(self, msg):
space = self.space
@@ -354,7 +313,6 @@
def __init__(self, space, reader):
self.space = space
self.reader = reader
- self.stringtable_w = []
def get(self, n):
assert n >= 0
diff --git a/pypy/module/marshal/test/test_marshalimpl.py
b/pypy/module/marshal/test/test_marshalimpl.py
--- a/pypy/module/marshal/test/test_marshalimpl.py
+++ b/pypy/module/marshal/test/test_marshalimpl.py
@@ -6,20 +6,6 @@
class AppTestMarshalMore:
spaceconfig = dict(usemodules=('array',))
- def test_unmarshal_int64(self):
- # test that we can unmarshal 64-bit ints on 32-bit platforms
- # (of course we only test that if we're running on such a
- # platform :-)
- import marshal
- z = marshal.loads(b'I\x00\xe4\x0bT\x02\x00\x00\x00')
- assert z == 10000000000
- z = marshal.loads(b'I\x00\x1c\xf4\xab\xfd\xff\xff\xff')
- assert z == -10000000000
- z = marshal.loads(b'I\x88\x87\x86\x85\x84\x83\x82\x01')
- assert z == 108793946209421192
- z = marshal.loads(b'I\xd8\xd8\xd9\xda\xdb\xdc\xcd\xfe')
- assert z == -0x0132232425262728
-
def test_marshal_bufferlike_object(self):
import marshal, array
s = marshal.dumps(array.array('b', b'asd'))
@@ -33,10 +19,6 @@
def test_unmarshal_evil_long(self):
import marshal
raises(ValueError, marshal.loads, b'l\x02\x00\x00\x00\x00\x00\x00\x00')
- z = marshal.loads(b'I\x00\xe4\x0bT\x02\x00\x00\x00')
- assert z == 10000000000
- z = marshal.loads(b'I\x00\x1c\xf4\xab\xfd\xff\xff\xff')
- assert z == -10000000000
def test_marshal_code_object(self):
def foo(a, b):
@@ -49,6 +31,14 @@
if attr_name.startswith("co_"):
assert getattr(code2, attr_name) == getattr(foo.__code__,
attr_name)
+ def test_shared_string(self):
+ x = "hello, "
+ x += "world"
+ s = marshal.dumps((x, x))
+ assert s.count(x) == 1
+ y = marshal.loads(s)
+ assert y == (x, x)
+
class AppTestMarshalSmallLong(AppTestMarshalMore):
spaceconfig = dict(usemodules=('array',),
diff --git a/pypy/objspace/std/marshal_impl.py
b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -29,14 +29,14 @@
TYPE_STOPITER = 'S'
TYPE_ELLIPSIS = '.'
TYPE_INT = 'i'
-TYPE_INT64 = 'I'
TYPE_FLOAT = 'f'
TYPE_BINARY_FLOAT = 'g'
TYPE_COMPLEX = 'x'
TYPE_BINARY_COMPLEX = 'y'
TYPE_LONG = 'l'
-TYPE_STRING = 's'
-TYPE_STRINGREF = 'R'
+TYPE_STRING = 's' # a *byte* string, not unicode
+TYPE_INTERNED = 't'
+TYPE_REF = 'r'
TYPE_TUPLE = '('
TYPE_LIST = '['
TYPE_DICT = '{'
@@ -45,6 +45,13 @@
TYPE_UNKNOWN = '?'
TYPE_SET = '<'
TYPE_FROZENSET = '>'
+FLAG_REF = 0x80 # bit added to mean "add obj to index"
+
+TYPE_ASCII = 'a' # never generated so far
+TYPE_ASCII_INTERNED = 'A' # never generated so far
+TYPE_SMALL_TUPLE = ')'
+TYPE_SHORT_ASCII = 'z' # never generated so far
+TYPE_SHORT_ASCII_INTERNED = 'Z' # never generated so far
_marshallers = []
@@ -75,7 +82,8 @@
s = space.readbuf_w(w_obj)
except OperationError as e:
if e.match(space, space.w_TypeError):
- raise oefmt(space.w_ValueError, "unmarshallable object")
+ raise oefmt(space.w_ValueError, "cannot marshal '%T' object",
+ w_obj)
raise
m.atom_str(TYPE_STRING, s.as_str())
@@ -108,7 +116,7 @@
@marshaller(W_TypeObject)
def marshal_stopiter(space, w_type, m):
if not space.is_w(w_type, space.w_StopIteration):
- raise oefmt(space.w_ValueError, "unmarshallable object")
+ raise oefmt(space.w_ValueError, "cannot marshal type object")
m.atom(TYPE_STOPITER)
@unmarshaller(TYPE_STOPITER)
@@ -127,37 +135,26 @@
@marshaller(W_IntObject)
def marshal_int(space, w_int, m):
- if LONG_BIT == 32:
+ y = w_int.intval >> 31
+ if y and y != -1:
+ _marshal_bigint(space, space.bigint_w(w_int), m)
+ else:
m.atom_int(TYPE_INT, w_int.intval)
- else:
- y = w_int.intval >> 31
- if y and y != -1:
- m.atom_int64(TYPE_INT64, w_int.intval)
- else:
- m.atom_int(TYPE_INT, w_int.intval)
@unmarshaller(TYPE_INT)
def unmarshal_int(space, u, tc):
return space.newint(u.get_int())
-@unmarshaller(TYPE_INT64)
-def unmarshal_int64(space, u, tc):
- lo = u.get_int() # get the first 32 bits
- hi = u.get_int() # get the next 32 bits
- if LONG_BIT >= 64:
- x = (hi << 32) | (lo & (2**32-1)) # result fits in an int
- else:
- x = (r_longlong(hi) << 32) | r_longlong(r_uint(lo)) # get a r_longlong
- return space.wrap(x)
-
@marshaller(W_AbstractLongObject)
def marshal_long(space, w_long, m):
+ _marshal_bigint(space, w_long.asbigint(), m)
+
+def _marshal_bigint(space, num, m):
from rpython.rlib.rarithmetic import r_ulonglong
m.start(TYPE_LONG)
SHIFT = 15
MASK = (1 << SHIFT) - 1
- num = w_long.asbigint()
sign = num.sign
num = num.abs()
total_length = (num.bit_length() + (SHIFT - 1)) / SHIFT
@@ -252,14 +249,6 @@
def unmarshal_bytes(space, u, tc):
return space.newbytes(u.get_str())
-@unmarshaller(TYPE_STRINGREF)
-def unmarshal_stringref(space, u, tc):
- idx = u.get_int()
- try:
- return u.stringtable_w[idx]
- except IndexError:
- raise oefmt(space.w_ValueError, "bad marshal data")
-
@marshaller(W_AbstractTupleObject)
def marshal_tuple(space, w_tuple, m):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit