Author: Armin Rigo <[email protected]>
Branch: py3.5-marshal3
Changeset: r86609:4414cc2fc2f5
Date: 2016-08-27 17:27 +0200
http://bitbucket.org/pypy/pypy/changeset/4414cc2fc2f5/

Log:    in-progress

diff --git a/pypy/module/imp/importing.py b/pypy/module/imp/importing.py
--- a/pypy/module/imp/importing.py
+++ b/pypy/module/imp/importing.py
@@ -228,7 +228,7 @@
 #     CPython + 7 = default_magic  -- used by PyPy (incompatible!)
 #
 from pypy.interpreter.pycode import default_magic
-MARSHAL_VERSION_FOR_PYC = 2
+MARSHAL_VERSION_FOR_PYC = 3
 
 def get_pyc_magic(space):
     return default_magic
diff --git a/pypy/module/marshal/interp_marshal.py 
b/pypy/module/marshal/interp_marshal.py
--- a/pypy/module/marshal/interp_marshal.py
+++ b/pypy/module/marshal/interp_marshal.py
@@ -4,24 +4,29 @@
 from rpython.rlib import rstackovf
 from pypy.objspace.std.marshal_impl import marshal, get_unmarshallers
 
+#
+# Write Python objects to files and read them back.  This is primarily
+# intended for writing and reading compiled Python code, even though
+# dicts, lists, sets and frozensets, not commonly seen in code
+# objects, are supported.  Version 3 of this protocol properly
+# supports circular links and sharing.  The previous version is called
+# "2", like in Python 2.7, although it is not always compatible
+# between CPython 2.7 and CPython 3.x.
+#
+# XXX: before py3k, there was logic to do efficiently dump()/load() on
+# a file object.  The corresponding logic is gone from CPython 3.x, so
+# I don't feel bad about killing it here too.
+#
 
-Py_MARSHAL_VERSION = 2
+Py_MARSHAL_VERSION = 3
+
 
 @unwrap_spec(w_version=WrappedDefault(Py_MARSHAL_VERSION))
 def dump(space, w_data, w_f, w_version):
     """Write the 'data' object into the open file 'f'."""
-    # XXX: before py3k, we special-cased W_File to use a more performant
-    # FileWriter class. Should we do the same for py3k? Look also at
-    # DirectStreamWriter
-    writer = FileWriter(space, w_f)
-    try:
-        # note: bound methods are currently not supported,
-        # so we have to pass the instance in, instead.
-        ##m = Marshaller(space, writer.write, space.int_w(w_version))
-        m = Marshaller(space, writer, space.int_w(w_version))
-        m.dump_w_obj(w_data)
-    finally:
-        writer.finished()
+    # same implementation as CPython 3.x.
+    w_string = dumps(space, w_data, w_version)
+    space.call_method(w_f, 'write', w_string)
 
 @unwrap_spec(w_version=WrappedDefault(Py_MARSHAL_VERSION))
 def dumps(space, w_data, w_version):
@@ -33,9 +38,6 @@
 
 def load(space, w_f):
     """Read one value from the file 'f' and return it."""
-    # XXX: before py3k, we special-cased W_File to use a more performant
-    # FileWriter class. Should we do the same for py3k? Look also at
-    # DirectStreamReader
     reader = FileReader(space, w_f)
     try:
         u = Unmarshaller(space, reader)
@@ -68,22 +70,6 @@
     def write(self, data):
         raise NotImplementedError("Purely abstract method")
 
-class FileWriter(AbstractReaderWriter):
-    def __init__(self, space, w_f):
-        AbstractReaderWriter.__init__(self, space)
-        try:
-            self.func = space.getattr(w_f, space.wrap('write'))
-            # XXX how to check if it is callable?
-        except OperationError as e:
-            if not e.match(space, space.w_AttributeError):
-                raise
-            raise oefmt(space.w_TypeError,
-                        "marshal.dump() 2nd arg must be file-like object")
-
-    def write(self, data):
-        space = self.space
-        space.call_function(self.func, space.newbytes(data))
-
 
 class FileReader(AbstractReaderWriter):
     def __init__(self, space, w_f):
@@ -111,33 +97,6 @@
         return ret
 
 
-class StreamReaderWriter(AbstractReaderWriter):
-    def __init__(self, space, file):
-        AbstractReaderWriter.__init__(self, space)
-        self.file = file
-        file.lock()
-
-    def finished(self):
-        self.file.unlock()
-
-class DirectStreamWriter(StreamReaderWriter):
-    """
-    XXX: this class is unused right now. Look at the comment in dump()
-    """
-    def write(self, data):
-        self.file.do_direct_write(data)
-
-class DirectStreamReader(StreamReaderWriter):
-    """
-    XXX: this class is unused right now. Look at the comment in dump()
-    """
-    def read(self, n):
-        data = self.file.direct_read(n)
-        if len(data) < n:
-            self.raise_eof()
-        return data
-
-
 class _Base(object):
     def raise_exc(self, msg):
         space = self.space
@@ -354,7 +313,6 @@
     def __init__(self, space, reader):
         self.space = space
         self.reader = reader
-        self.stringtable_w = []
 
     def get(self, n):
         assert n >= 0
diff --git a/pypy/module/marshal/test/test_marshalimpl.py 
b/pypy/module/marshal/test/test_marshalimpl.py
--- a/pypy/module/marshal/test/test_marshalimpl.py
+++ b/pypy/module/marshal/test/test_marshalimpl.py
@@ -6,20 +6,6 @@
 class AppTestMarshalMore:
     spaceconfig = dict(usemodules=('array',))
 
-    def test_unmarshal_int64(self):
-        # test that we can unmarshal 64-bit ints on 32-bit platforms
-        # (of course we only test that if we're running on such a
-        # platform :-)
-        import marshal
-        z = marshal.loads(b'I\x00\xe4\x0bT\x02\x00\x00\x00')
-        assert z == 10000000000
-        z = marshal.loads(b'I\x00\x1c\xf4\xab\xfd\xff\xff\xff')
-        assert z == -10000000000
-        z = marshal.loads(b'I\x88\x87\x86\x85\x84\x83\x82\x01')
-        assert z == 108793946209421192
-        z = marshal.loads(b'I\xd8\xd8\xd9\xda\xdb\xdc\xcd\xfe')
-        assert z == -0x0132232425262728
-
     def test_marshal_bufferlike_object(self):
         import marshal, array
         s = marshal.dumps(array.array('b', b'asd'))
@@ -33,10 +19,6 @@
     def test_unmarshal_evil_long(self):
         import marshal
         raises(ValueError, marshal.loads, b'l\x02\x00\x00\x00\x00\x00\x00\x00')
-        z = marshal.loads(b'I\x00\xe4\x0bT\x02\x00\x00\x00')
-        assert z == 10000000000
-        z = marshal.loads(b'I\x00\x1c\xf4\xab\xfd\xff\xff\xff')
-        assert z == -10000000000
 
     def test_marshal_code_object(self):
         def foo(a, b):
@@ -49,6 +31,14 @@
             if attr_name.startswith("co_"):
                 assert getattr(code2, attr_name) == getattr(foo.__code__, 
attr_name)
 
+    def test_shared_string(self):
+        x = "hello, "
+        x += "world"
+        s = marshal.dumps((x, x))
+        assert s.count(x) == 1
+        y = marshal.loads(s)
+        assert y == (x, x)
+
 
 class AppTestMarshalSmallLong(AppTestMarshalMore):
     spaceconfig = dict(usemodules=('array',),
diff --git a/pypy/objspace/std/marshal_impl.py 
b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -29,14 +29,14 @@
 TYPE_STOPITER  = 'S'
 TYPE_ELLIPSIS  = '.'
 TYPE_INT       = 'i'
-TYPE_INT64     = 'I'
 TYPE_FLOAT     = 'f'
 TYPE_BINARY_FLOAT = 'g'
 TYPE_COMPLEX   = 'x'
 TYPE_BINARY_COMPLEX = 'y'
 TYPE_LONG      = 'l'
-TYPE_STRING    = 's'
-TYPE_STRINGREF = 'R'
+TYPE_STRING    = 's'     # a *byte* string, not unicode
+TYPE_INTERNED  = 't'
+TYPE_REF       = 'r'
 TYPE_TUPLE     = '('
 TYPE_LIST      = '['
 TYPE_DICT      = '{'
@@ -45,6 +45,13 @@
 TYPE_UNKNOWN   = '?'
 TYPE_SET       = '<'
 TYPE_FROZENSET = '>'
+FLAG_REF       = 0x80    # bit added to mean "add obj to index"
+
+TYPE_ASCII                = 'a'   # never generated so far
+TYPE_ASCII_INTERNED       = 'A'   # never generated so far
+TYPE_SMALL_TUPLE          = ')'
+TYPE_SHORT_ASCII          = 'z'   # never generated so far
+TYPE_SHORT_ASCII_INTERNED = 'Z'   # never generated so far
 
 
 _marshallers = []
@@ -75,7 +82,8 @@
         s = space.readbuf_w(w_obj)
     except OperationError as e:
         if e.match(space, space.w_TypeError):
-            raise oefmt(space.w_ValueError, "unmarshallable object")
+            raise oefmt(space.w_ValueError, "cannot marshal '%T' object",
+                        w_obj)
         raise
     m.atom_str(TYPE_STRING, s.as_str())
 
@@ -108,7 +116,7 @@
 @marshaller(W_TypeObject)
 def marshal_stopiter(space, w_type, m):
     if not space.is_w(w_type, space.w_StopIteration):
-        raise oefmt(space.w_ValueError, "unmarshallable object")
+        raise oefmt(space.w_ValueError, "cannot marshal type object")
     m.atom(TYPE_STOPITER)
 
 @unmarshaller(TYPE_STOPITER)
@@ -127,37 +135,26 @@
 
 @marshaller(W_IntObject)
 def marshal_int(space, w_int, m):
-    if LONG_BIT == 32:
+    y = w_int.intval >> 31
+    if y and y != -1:
+        _marshal_bigint(space, space.bigint_w(w_int), m)
+    else:
         m.atom_int(TYPE_INT, w_int.intval)
-    else:
-        y = w_int.intval >> 31
-        if y and y != -1:
-            m.atom_int64(TYPE_INT64, w_int.intval)
-        else:
-            m.atom_int(TYPE_INT, w_int.intval)
 
 @unmarshaller(TYPE_INT)
 def unmarshal_int(space, u, tc):
     return space.newint(u.get_int())
 
-@unmarshaller(TYPE_INT64)
-def unmarshal_int64(space, u, tc):
-    lo = u.get_int()    # get the first 32 bits
-    hi = u.get_int()    # get the next 32 bits
-    if LONG_BIT >= 64:
-        x = (hi << 32) | (lo & (2**32-1))    # result fits in an int
-    else:
-        x = (r_longlong(hi) << 32) | r_longlong(r_uint(lo))  # get a r_longlong
-    return space.wrap(x)
-
 
 @marshaller(W_AbstractLongObject)
 def marshal_long(space, w_long, m):
+    _marshal_bigint(space, w_long.asbigint(), m)
+
+def _marshal_bigint(space, num, m):
     from rpython.rlib.rarithmetic import r_ulonglong
     m.start(TYPE_LONG)
     SHIFT = 15
     MASK = (1 << SHIFT) - 1
-    num = w_long.asbigint()
     sign = num.sign
     num = num.abs()
     total_length = (num.bit_length() + (SHIFT - 1)) / SHIFT
@@ -252,14 +249,6 @@
 def unmarshal_bytes(space, u, tc):
     return space.newbytes(u.get_str())
 
-@unmarshaller(TYPE_STRINGREF)
-def unmarshal_stringref(space, u, tc):
-    idx = u.get_int()
-    try:
-        return u.stringtable_w[idx]
-    except IndexError:
-        raise oefmt(space.w_ValueError, "bad marshal data")
-
 
 @marshaller(W_AbstractTupleObject)
 def marshal_tuple(space, w_tuple, m):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to