[pypy-commit] pypy py3.5: hg merge py3.5-marshal3

arigo Sun, 28 Aug 2016 13:17:01 -0700

Author: Armin Rigo <[email protected]>
Branch: py3.5
Changeset: r86661:4384ff755734
Date: 2016-08-28 22:16 +0200
http://bitbucket.org/pypy/pypy/changeset/4384ff755734/


Log:    hg merge py3.5-marshal3

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -837,13 +837,13 @@
             self.interned_strings.set(u, w_s1)
         return w_s1
 
-    def is_interned_str(self, s):
+    def get_interned_str(self, s):
         """Assumes an identifier (utf-8 encoded str)"""
         # interface for marshal_impl
         if not we_are_translated():
             assert type(s) is str
         u = s.decode('utf-8')
-        return self.interned_strings.get(u) is not None
+        return self.interned_strings.get(u)   # may be None
 
     def descr_self_interp_w(self, RequiredClass, w_obj):
         if not isinstance(w_obj, RequiredClass):
diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py
--- a/pypy/interpreter/pycode.py
+++ b/pypy/interpreter/pycode.py
@@ -37,7 +37,7 @@
 # different value for the highest 16 bits. Bump pypy_incremental_magic every
 # time you make pyc files incompatible
 
-pypy_incremental_magic = 64 # bump it by 16
+pypy_incremental_magic = 80 # bump it by 16
 assert pypy_incremental_magic % 16 == 0
 assert pypy_incremental_magic < 3000 # the magic number of Python 3. There are
                                      # no known magic numbers below this value
diff --git a/pypy/module/_frozen_importlib/__init__.py 
b/pypy/module/_frozen_importlib/__init__.py
--- a/pypy/module/_frozen_importlib/__init__.py
+++ b/pypy/module/_frozen_importlib/__init__.py
@@ -47,8 +47,10 @@
     def _cached_compile(space, name, source, *args):
         from rpython.config.translationoption import CACHE_DIR
         from pypy.module.marshal import interp_marshal
+        from pypy.interpreter.pycode import default_magic
 
-        cachename = os.path.join(CACHE_DIR, 'frozen_importlib_%s' % (name,))
+        cachename = os.path.join(CACHE_DIR, 'frozen_importlib_%d%s' % (
+            default_magic, name))
         try:
             if space.config.translating:
                 raise IOError("don't use the cache when translating pypy")
diff --git a/pypy/module/imp/importing.py b/pypy/module/imp/importing.py
--- a/pypy/module/imp/importing.py
+++ b/pypy/module/imp/importing.py
@@ -228,7 +228,7 @@
 #     CPython + 7 = default_magic  -- used by PyPy (incompatible!)
 #
 from pypy.interpreter.pycode import default_magic
-MARSHAL_VERSION_FOR_PYC = 2
+MARSHAL_VERSION_FOR_PYC = 4
 
 def get_pyc_magic(space):
     return default_magic
diff --git a/pypy/module/marshal/interp_marshal.py 
b/pypy/module/marshal/interp_marshal.py
--- a/pypy/module/marshal/interp_marshal.py
+++ b/pypy/module/marshal/interp_marshal.py
@@ -4,24 +4,30 @@
 from rpython.rlib import rstackovf
 from pypy.objspace.std.marshal_impl import marshal, get_unmarshallers
 
+#
+# Write Python objects to files and read them back.  This is primarily
+# intended for writing and reading compiled Python code, even though
+# dicts, lists, sets and frozensets, not commonly seen in code
+# objects, are supported.  Version 3 of this protocol properly
+# supports circular links and sharing.  The previous version is called
+# "2", like in Python 2.7, although it is not always compatible
+# between CPython 2.7 and CPython 3.4.  Version 4 adds small
+# optimizations in compactness.
+#
+# XXX: before py3k, there was logic to do efficiently dump()/load() on
+# a file object.  The corresponding logic is gone from CPython 3.x, so
+# I don't feel bad about killing it here too.
+#
 
-Py_MARSHAL_VERSION = 2
+Py_MARSHAL_VERSION = 4
+
 
 @unwrap_spec(w_version=WrappedDefault(Py_MARSHAL_VERSION))
 def dump(space, w_data, w_f, w_version):
     """Write the 'data' object into the open file 'f'."""
-    # XXX: before py3k, we special-cased W_File to use a more performant
-    # FileWriter class. Should we do the same for py3k? Look also at
-    # DirectStreamWriter
-    writer = FileWriter(space, w_f)
-    try:
-        # note: bound methods are currently not supported,
-        # so we have to pass the instance in, instead.
-        ##m = Marshaller(space, writer.write, space.int_w(w_version))
-        m = Marshaller(space, writer, space.int_w(w_version))
-        m.dump_w_obj(w_data)
-    finally:
-        writer.finished()
+    # same implementation as CPython 3.x.
+    w_string = dumps(space, w_data, w_version)
+    space.call_method(w_f, 'write', w_string)
 
 @unwrap_spec(w_version=WrappedDefault(Py_MARSHAL_VERSION))
 def dumps(space, w_data, w_version):
@@ -33,9 +39,6 @@
 
 def load(space, w_f):
     """Read one value from the file 'f' and return it."""
-    # XXX: before py3k, we special-cased W_File to use a more performant
-    # FileWriter class. Should we do the same for py3k? Look also at
-    # DirectStreamReader
     reader = FileReader(space, w_f)
     try:
         u = Unmarshaller(space, reader)
@@ -68,22 +71,6 @@
     def write(self, data):
         raise NotImplementedError("Purely abstract method")
 
-class FileWriter(AbstractReaderWriter):
-    def __init__(self, space, w_f):
-        AbstractReaderWriter.__init__(self, space)
-        try:
-            self.func = space.getattr(w_f, space.wrap('write'))
-            # XXX how to check if it is callable?
-        except OperationError as e:
-            if not e.match(space, space.w_AttributeError):
-                raise
-            raise oefmt(space.w_TypeError,
-                        "marshal.dump() 2nd arg must be file-like object")
-
-    def write(self, data):
-        space = self.space
-        space.call_function(self.func, space.newbytes(data))
-
 
 class FileReader(AbstractReaderWriter):
     def __init__(self, space, w_f):
@@ -111,33 +98,6 @@
         return ret
 
 
-class StreamReaderWriter(AbstractReaderWriter):
-    def __init__(self, space, file):
-        AbstractReaderWriter.__init__(self, space)
-        self.file = file
-        file.lock()
-
-    def finished(self):
-        self.file.unlock()
-
-class DirectStreamWriter(StreamReaderWriter):
-    """
-    XXX: this class is unused right now. Look at the comment in dump()
-    """
-    def write(self, data):
-        self.file.do_direct_write(data)
-
-class DirectStreamReader(StreamReaderWriter):
-    """
-    XXX: this class is unused right now. Look at the comment in dump()
-    """
-    def read(self, n):
-        data = self.file.direct_read(n)
-        if len(data) < n:
-            self.raise_eof()
-        return data
-
-
 class _Base(object):
     def raise_exc(self, msg):
         space = self.space
@@ -168,7 +128,15 @@
         ## self.put = putfunc
         self.writer = writer
         self.version = version
-        self.stringtable = {}
+        self.all_refs = {}
+        # all_refs = {w_obj: index} for all w_obj that are of a
+        # "reasonably sharable" type.  CPython checks the refcount of
+        # any object to know if it is sharable, independently of its
+        # type.  We can't do that.  We could do a two-pass marshaller.
+        # For now we simply add to this list all objects that marshal to
+        # more than a few fixed-sized bytes, minus ones like code
+        # objects that never appear more than once except in complete
+        # corner cases.
 
     ## currently we cannot use a put that is a bound method
     ## from outside. Same holds for get.
@@ -239,10 +207,13 @@
             rstackovf.check_stack_overflow()
             self._overflow()
 
-    def put_tuple_w(self, typecode, lst_w):
+    def put_tuple_w(self, typecode, lst_w, single_byte_size=False):
         self.start(typecode)
         lng = len(lst_w)
-        self.put_int(lng)
+        if single_byte_size:
+            self.put(chr(lng))
+        else:
+            self.put_int(lng)
         idx = 0
         while idx < lng:
             w_obj = lst_w[idx]
@@ -333,19 +304,35 @@
 
 
 def invalid_typecode(space, u, tc):
-    u.raise_exc("bad marshal data (unknown type code)")
+    u.raise_exc("bad marshal data (unknown type code %d)" % (ord(tc),))
 
 
+def _make_unmarshall_and_save_ref(func):
+    def unmarshall_save_ref(space, u, tc):
+        index = len(u.refs_w)
+        u.refs_w.append(None)
+        w_obj = func(space, u, tc)
+        u.refs_w[index] = w_obj
+        return w_obj
+    return unmarshall_save_ref
 
-class Unmarshaller(_Base):
+def _make_unmarshaller_dispatch():
     _dispatch = [invalid_typecode] * 256
     for tc, func in get_unmarshallers():
         _dispatch[ord(tc)] = func
+    for tc, func in get_unmarshallers():
+        if tc < '\x80' and _dispatch[ord(tc) + 0x80] is invalid_typecode:
+            _dispatch[ord(tc) + 0x80] = _make_unmarshall_and_save_ref(func)
+    return _dispatch
+
+
+class Unmarshaller(_Base):
+    _dispatch = _make_unmarshaller_dispatch()
 
     def __init__(self, space, reader):
         self.space = space
         self.reader = reader
-        self.stringtable_w = []
+        self.refs_w = []
 
     def get(self, n):
         assert n >= 0
@@ -355,6 +342,10 @@
         # the [0] is used to convince the annotator to return a char
         return self.get(1)[0]
 
+    def save_ref(self, typecode, w_obj):
+        if typecode >= '\x80':
+            self.refs_w.append(w_obj)
+
     def atom_str(self, typecode):
         self.start(typecode)
         lng = self.get_lng()
@@ -425,8 +416,11 @@
             self._overflow()
 
     # inlined version to save a recursion level
-    def get_tuple_w(self):
-        lng = self.get_lng()
+    def get_tuple_w(self, single_byte_size=False):
+        if single_byte_size:
+            lng = ord(self.get1())
+        else:
+            lng = self.get_lng()
         res_w = [None] * lng
         idx = 0
         space = self.space
@@ -442,9 +436,6 @@
             raise oefmt(space.w_TypeError, "NULL object in marshal data")
         return res_w
 
-    def get_list_w(self):
-        return self.get_tuple_w()[:]
-
     def _overflow(self):
         self.raise_exc('object too deeply nested to unmarshal')
 
diff --git a/pypy/module/marshal/test/test_marshal.py 
b/pypy/module/marshal/test/test_marshal.py
--- a/pypy/module/marshal/test/test_marshal.py
+++ b/pypy/module/marshal/test/test_marshal.py
@@ -199,7 +199,7 @@
     def test_bad_typecode(self):
         import marshal
         exc = raises(ValueError, marshal.loads, bytes([1]))
-        assert str(exc.value) == "bad marshal data (unknown type code)"
+        assert str(exc.value).startswith("bad marshal data (unknown type code")
 
     def test_bad_data(self):
         # If you have sufficiently little memory, the line at the end of the
diff --git a/pypy/module/marshal/test/test_marshalimpl.py 
b/pypy/module/marshal/test/test_marshalimpl.py
--- a/pypy/module/marshal/test/test_marshalimpl.py
+++ b/pypy/module/marshal/test/test_marshalimpl.py
@@ -6,20 +6,6 @@
 class AppTestMarshalMore:
     spaceconfig = dict(usemodules=('array',))
 
-    def test_unmarshal_int64(self):
-        # test that we can unmarshal 64-bit ints on 32-bit platforms
-        # (of course we only test that if we're running on such a
-        # platform :-)
-        import marshal
-        z = marshal.loads(b'I\x00\xe4\x0bT\x02\x00\x00\x00')
-        assert z == 10000000000
-        z = marshal.loads(b'I\x00\x1c\xf4\xab\xfd\xff\xff\xff')
-        assert z == -10000000000
-        z = marshal.loads(b'I\x88\x87\x86\x85\x84\x83\x82\x01')
-        assert z == 108793946209421192
-        z = marshal.loads(b'I\xd8\xd8\xd9\xda\xdb\xdc\xcd\xfe')
-        assert z == -0x0132232425262728
-
     def test_marshal_bufferlike_object(self):
         import marshal, array
         s = marshal.dumps(array.array('b', b'asd'))
@@ -33,10 +19,6 @@
     def test_unmarshal_evil_long(self):
         import marshal
         raises(ValueError, marshal.loads, b'l\x02\x00\x00\x00\x00\x00\x00\x00')
-        z = marshal.loads(b'I\x00\xe4\x0bT\x02\x00\x00\x00')
-        assert z == 10000000000
-        z = marshal.loads(b'I\x00\x1c\xf4\xab\xfd\xff\xff\xff')
-        assert z == -10000000000
 
     def test_marshal_code_object(self):
         def foo(a, b):
@@ -49,6 +31,37 @@
             if attr_name.startswith("co_"):
                 assert getattr(code2, attr_name) == getattr(foo.__code__, 
attr_name)
 
+    def test_unmarshal_ascii(self):
+        import marshal
+        s = marshal.loads(b"a\x04\x00\x00\x00ab\xc2\x84")
+        assert s == "ab\xc2\x84"
+        s = marshal.loads(b"A\x04\x00\x00\x00ab\xc2\x84")
+        assert s == "ab\xc2\x84"
+        s = marshal.loads(b"z\x04ab\xc2\x84")
+        assert s == "ab\xc2\x84"
+        s = marshal.loads(b"Z\x04ab\xc2\x84")
+        assert s == "ab\xc2\x84"
+
+    def test_shared_string(self):
+        import marshal
+        x = "hello, "
+        x += "world"
+        xl = 256
+        xl **= 100
+        for version in [2, 3]:
+            s = marshal.dumps((x, x), version)
+            assert s.count(b'hello, world') == 2 if version < 3 else 1
+            y = marshal.loads(s)
+            assert y == (x, x)
+            #
+            s = marshal.dumps((xl, xl), version)
+            if version < 3:
+                assert 200 < len(s) < 250
+            else:
+                assert 100 < len(s) < 125
+            yl = marshal.loads(s)
+            assert yl == (xl, xl)
+
 
 class AppTestMarshalSmallLong(AppTestMarshalMore):
     spaceconfig = dict(usemodules=('array',),
@@ -62,6 +75,7 @@
         # NOTE: marshal is platform independent, running this test must assume
         # that self.seen gets values from the endianess of the marshal module.
         # (which is little endian!)
+        version = 2
         def __init__(self):
             self.seen = []
         def start(self, code):
diff --git a/pypy/objspace/std/marshal_impl.py 
b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -2,6 +2,7 @@
 from rpython.rlib.rstring import StringBuilder
 from rpython.rlib.rstruct import ieee
 from rpython.rlib.unroll import unrolling_iterable
+from rpython.rlib import objectmodel
 
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.special import Ellipsis
@@ -29,14 +30,14 @@
 TYPE_STOPITER  = 'S'
 TYPE_ELLIPSIS  = '.'
 TYPE_INT       = 'i'
-TYPE_INT64     = 'I'
 TYPE_FLOAT     = 'f'
 TYPE_BINARY_FLOAT = 'g'
 TYPE_COMPLEX   = 'x'
 TYPE_BINARY_COMPLEX = 'y'
 TYPE_LONG      = 'l'
-TYPE_STRING    = 's'
-TYPE_STRINGREF = 'R'
+TYPE_STRING    = 's'     # a *byte* string, not unicode
+TYPE_INTERNED  = 't'
+TYPE_REF       = 'r'
 TYPE_TUPLE     = '('
 TYPE_LIST      = '['
 TYPE_DICT      = '{'
@@ -45,6 +46,15 @@
 TYPE_UNKNOWN   = '?'
 TYPE_SET       = '<'
 TYPE_FROZENSET = '>'
+FLAG_REF       = 0x80    # bit added to mean "add obj to index"
+FLAG_DONE      = '\x00'
+
+# the following typecodes have been added in version 4.
+TYPE_ASCII                = 'a'   # never generated so far by pypy
+TYPE_ASCII_INTERNED       = 'A'   # never generated so far by pypy
+TYPE_SMALL_TUPLE          = ')'
+TYPE_SHORT_ASCII          = 'z'   # never generated so far by pypy
+TYPE_SHORT_ASCII_INTERNED = 'Z'   # never generated so far by pypy
 
 
 _marshallers = []
@@ -56,12 +66,33 @@
         return f
     return _decorator
 
-def unmarshaller(tc):
+def unmarshaller(tc, save_ref=False):
     def _decorator(f):
+        assert tc < '\x80'
         _unmarshallers.append((tc, f))
+        if save_ref:
+            tcref = chr(ord(tc) + 0x80)
+            _unmarshallers.append((tcref, f))
         return f
     return _decorator
 
+def write_ref(typecode, w_obj, m):
+    if m.version < 3:
+        return typecode     # not writing object references
+    try:
+        index = m.all_refs[w_obj]
+    except KeyError:
+        # we don't support long indices
+        index = len(m.all_refs)
+        if index >= 0x7fffffff:
+            return typecode
+        m.all_refs[w_obj] = index
+        return chr(ord(typecode) + FLAG_REF)
+    else:
+        # write the reference index to the stream
+        m.atom_int(TYPE_REF, index)
+        return FLAG_DONE
+
 def marshal(space, w_obj, m):
     # _marshallers_unroll is defined at the end of the file
     # NOTE that if w_obj is a heap type, like an instance of a
@@ -80,7 +111,9 @@
         if e.match(space, space.w_TypeError):
             raise oefmt(space.w_ValueError, "unmarshallable object")
         raise
-    m.atom_str(TYPE_STRING, s.as_str())
+    typecode = write_ref(TYPE_STRING, w_obj, m)
+    if typecode != FLAG_DONE:
+        m.atom_str(typecode, s.as_str())
 
 def get_unmarshallers():
     return _unmarshallers
@@ -130,37 +163,27 @@
 
 @marshaller(W_IntObject)
 def marshal_int(space, w_int, m):
-    if LONG_BIT == 32:
+    y = w_int.intval >> 31
+    if y and y != -1:
+        marshal_long(space, w_int, m)
+    else:
         m.atom_int(TYPE_INT, w_int.intval)
-    else:
-        y = w_int.intval >> 31
-        if y and y != -1:
-            m.atom_int64(TYPE_INT64, w_int.intval)
-        else:
-            m.atom_int(TYPE_INT, w_int.intval)
 
 @unmarshaller(TYPE_INT)
 def unmarshal_int(space, u, tc):
     return space.newint(u.get_int())
 
-@unmarshaller(TYPE_INT64)
-def unmarshal_int64(space, u, tc):
-    lo = u.get_int()    # get the first 32 bits
-    hi = u.get_int()    # get the next 32 bits
-    if LONG_BIT >= 64:
-        x = (hi << 32) | (lo & (2**32-1))    # result fits in an int
-    else:
-        x = (r_longlong(hi) << 32) | r_longlong(r_uint(lo))  # get a r_longlong
-    return space.wrap(x)
-
 
 @marshaller(W_AbstractLongObject)
 def marshal_long(space, w_long, m):
     from rpython.rlib.rarithmetic import r_ulonglong
-    m.start(TYPE_LONG)
+    typecode = write_ref(TYPE_LONG, w_long, m)
+    if typecode == FLAG_DONE:
+        return
+    m.start(typecode)
     SHIFT = 15
     MASK = (1 << SHIFT) - 1
-    num = w_long.asbigint()
+    num = space.bigint_w(w_long)
     sign = num.sign
     num = num.abs()
     total_length = (num.bit_length() + (SHIFT - 1)) / SHIFT
@@ -248,59 +271,79 @@
 
 @marshaller(W_BytesObject)
 def marshal_bytes(space, w_str, m):
-    s = w_str.unwrap(space)
-    m.atom_str(TYPE_STRING, s)
+    typecode = write_ref(TYPE_STRING, w_str, m)
+    if typecode != FLAG_DONE:
+        s = space.bytes_w(w_str)
+        m.atom_str(typecode, s)
 
 @unmarshaller(TYPE_STRING)
 def unmarshal_bytes(space, u, tc):
     return space.newbytes(u.get_str())
 
-@unmarshaller(TYPE_STRINGREF)
-def unmarshal_stringref(space, u, tc):
-    idx = u.get_int()
-    try:
-        return u.stringtable_w[idx]
-    except IndexError:
-        raise oefmt(space.w_ValueError, "bad marshal data")
 
+def _marshal_tuple(space, tuple_w, m):
+    if m.version >= 4 and len(tuple_w) < 256:
+        typecode = TYPE_SMALL_TUPLE
+        single_byte_size = True
+    else:
+        typecode = TYPE_TUPLE
+        single_byte_size = False
+    # -- does it make any sense to try to share tuples, based on the
+    # -- *identity* of the tuple object?  I'd guess not really
+    #typecode = write_ref(typecode, w_tuple, m)
+    #if typecode != FLAG_DONE:
+    m.put_tuple_w(typecode, tuple_w, single_byte_size=single_byte_size)
 
 @marshaller(W_AbstractTupleObject)
 def marshal_tuple(space, w_tuple, m):
-    items = w_tuple.tolist()
-    m.put_tuple_w(TYPE_TUPLE, items)
+    _marshal_tuple(space, w_tuple.tolist(), m)
 
 @unmarshaller(TYPE_TUPLE)
 def unmarshal_tuple(space, u, tc):
     items_w = u.get_tuple_w()
     return space.newtuple(items_w)
 
+@unmarshaller(TYPE_SMALL_TUPLE)
+def unmarshal_tuple(space, u, tc):
+    items_w = u.get_tuple_w(single_byte_size=True)
+    return space.newtuple(items_w)
+
 
 @marshaller(W_ListObject)
 def marshal_list(space, w_list, m):
-    items = w_list.getitems()[:]
-    m.put_tuple_w(TYPE_LIST, items)
+    typecode = write_ref(TYPE_LIST, w_list, m)
+    if typecode != FLAG_DONE:
+        items = w_list.getitems()[:]
+        m.put_tuple_w(typecode, items)
 
-@unmarshaller(TYPE_LIST)
+@unmarshaller(TYPE_LIST, save_ref=True)
 def unmarshal_list(space, u, tc):
-    items_w = u.get_list_w()
-    return space.newlist(items_w)
+    w_obj = space.newlist([])
+    u.save_ref(tc, w_obj)
+    for w_item in u.get_tuple_w():
+        w_obj.append(w_item)
+    return w_obj
 
 
 @marshaller(W_DictMultiObject)
 def marshal_dict(space, w_dict, m):
-    m.start(TYPE_DICT)
+    typecode = write_ref(TYPE_DICT, w_dict, m)
+    if typecode == FLAG_DONE:
+        return
+    m.start(typecode)
     for w_tuple in w_dict.items():
         w_key, w_value = space.fixedview(w_tuple, 2)
         m.put_w_obj(w_key)
         m.put_w_obj(w_value)
     m.atom(TYPE_NULL)
 
-@unmarshaller(TYPE_DICT)
+@unmarshaller(TYPE_DICT, save_ref=True)
 def unmarshal_dict(space, u, tc):
     # since primitive lists are not optimized and we don't know
     # the dict size in advance, use the dict's setitem instead
     # of building a list of tuples.
     w_dic = space.newdict()
+    u.save_ref(tc, w_dic)
     while 1:
         w_key = u.get_w_obj(allow_null=True)
         if w_key is None:
@@ -314,14 +357,9 @@
     return None
 
 
-def _put_str_list(space, m, strlist):
-    m.atom_int(TYPE_TUPLE, len(strlist))
-    atom_str = m.atom_str
-    for item in strlist:
-        atom_str(TYPE_STRING, item)
-
 @marshaller(PyCode)
 def marshal_pycode(space, w_pycode, m):
+    # (no attempt at using write_ref here, there is little point imho)
     m.start(TYPE_CODE)
     # see pypy.interpreter.pycode for the layout
     x = space.interp_w(PyCode, w_pycode)
@@ -331,105 +369,161 @@
     m.put_int(x.co_stacksize)
     m.put_int(x.co_flags)
     m.atom_str(TYPE_STRING, x.co_code)
-    m.put_tuple_w(TYPE_TUPLE, x.co_consts_w)
-    _put_str_list(space, m, [space.str_w(w_name) for w_name in x.co_names_w])
-    _put_str_list(space, m, x.co_varnames)
-    _put_str_list(space, m, x.co_freevars)
-    _put_str_list(space, m, x.co_cellvars)
-    m.atom_str(TYPE_STRING, x.co_filename)
-    m.atom_str(TYPE_STRING, x.co_name)
+    _marshal_tuple(space, x.co_consts_w, m)
+    _marshal_tuple(space, x.co_names_w, m)   # list of w_unicodes
+    co_varnames_w = [space.wrap(s.decode('utf-8')) for s in x.co_varnames]
+    co_freevars_w = [space.wrap(s.decode('utf-8')) for s in x.co_freevars]
+    co_cellvars_w = [space.wrap(s.decode('utf-8')) for s in x.co_cellvars]
+    _marshal_tuple(space, co_varnames_w, m)  # more lists, now of w_unicodes
+    _marshal_tuple(space, co_freevars_w, m)
+    _marshal_tuple(space, co_cellvars_w, m)
+    _marshal_unicode(space, x.co_filename, m)
+    _marshal_unicode(space, x.co_name, m)
     m.put_int(x.co_firstlineno)
     m.atom_str(TYPE_STRING, x.co_lnotab)
 
 # helper for unmarshalling "tuple of string" objects
 # into rpython-level lists of strings.  Only for code objects.
 
-def unmarshal_str(u):
+def _unmarshal_strlist(u):
+    items_w = _unmarshal_tuple_w(u)
+    return [u.space.unicode_w(w_item).encode('utf-8') for w_item in items_w]
+
+def _unmarshal_tuple_w(u):
     w_obj = u.get_w_obj()
     try:
-        return u.space.bytes_w(w_obj)
-    except OperationError as e:
-        if e.match(u.space, u.space.w_TypeError):
-            u.raise_exc('invalid marshal data for code object')
-        else:
-            raise
-
-def unmarshal_str0(u):
-    w_obj = u.get_w_obj()
-    try:
-        return u.space.bytes0_w(w_obj)
+        return u.space.fixedview(w_obj)
     except OperationError as e:
         if e.match(u.space, u.space.w_TypeError):
             u.raise_exc('invalid marshal data for code object')
         raise
 
-def unmarshal_strlist(u, tc):
-    lng = u.atom_lng(tc)
-    return [unmarshal_str(u) for i in range(lng)]
-
-@unmarshaller(TYPE_CODE)
+@unmarshaller(TYPE_CODE, save_ref=True)
 def unmarshal_pycode(space, u, tc):
+    w_codeobj = objectmodel.instantiate(PyCode)
+    u.save_ref(tc, w_codeobj)
     argcount    = u.get_int()
     kwonlyargcount = u.get_int()
     nlocals     = u.get_int()
     stacksize   = u.get_int()
     flags       = u.get_int()
-    code        = unmarshal_str(u)
-    u.start(TYPE_TUPLE)
-    consts_w    = u.get_tuple_w()
-    # copy in order not to merge it with anything else
-    names       = unmarshal_strlist(u, TYPE_TUPLE)
-    varnames    = unmarshal_strlist(u, TYPE_TUPLE)
-    freevars    = unmarshal_strlist(u, TYPE_TUPLE)
-    cellvars    = unmarshal_strlist(u, TYPE_TUPLE)
-    filename    = unmarshal_str0(u)
-    name        = unmarshal_str(u)
+    code        = space.bytes_w(u.get_w_obj())
+    consts_w    = _unmarshal_tuple_w(u)   
+    names       = _unmarshal_strlist(u)
+    varnames    = _unmarshal_strlist(u)
+    freevars    = _unmarshal_strlist(u)
+    cellvars    = _unmarshal_strlist(u)
+    filename    = space.unicode0_w(u.get_w_obj()).encode('utf-8')
+    name        = space.unicode_w(u.get_w_obj()).encode('utf-8')
     firstlineno = u.get_int()
-    lnotab      = unmarshal_str(u)
-    return PyCode(space, argcount, kwonlyargcount, nlocals, stacksize, flags,
+    lnotab      = space.bytes_w(u.get_w_obj())
+    PyCode.__init__(w_codeobj,
+                  space, argcount, kwonlyargcount, nlocals, stacksize, flags,
                   code, consts_w[:], names, varnames, filename,
                   name, firstlineno, lnotab, freevars, cellvars)
+    return w_codeobj
 
 
+def _marshal_unicode(space, s, m, w_unicode=None):
+    if m.version >= 3:
+        w_interned = space.get_interned_str(s)
+    else:
+        w_interned = None
+    if w_interned is not None:
+        w_unicode = w_interned    # use the interned W_UnicodeObject
+        typecode = TYPE_INTERNED  #   as a key for u.all_refs
+    else:
+        typecode = TYPE_UNICODE
+    if w_unicode is not None:
+        typecode = write_ref(typecode, w_unicode, m)
+    if typecode != FLAG_DONE:
+        m.atom_str(typecode, s)
+
 @marshaller(W_UnicodeObject)
 def marshal_unicode(space, w_unicode, m):
     s = unicodehelper.encode_utf8(space, space.unicode_w(w_unicode),
                                   allow_surrogates=True)
-    m.atom_str(TYPE_UNICODE, s)
+    _marshal_unicode(space, s, m, w_unicode=w_unicode)
 
 @unmarshaller(TYPE_UNICODE)
 def unmarshal_unicode(space, u, tc):
-    return space.wrap(unicodehelper.decode_utf8(space, u.get_str(),
-                                                allow_surrogates=True))
+    uc = unicodehelper.decode_utf8(space, u.get_str(), allow_surrogates=True)
+    return space.newunicode(uc)
+
+@unmarshaller(TYPE_INTERNED)
+def unmarshal_bytes(space, u, tc):
+    return space.new_interned_str(u.get_str())
+
+def _unmarshal_ascii(u, short_length, interned):
+    if short_length:
+        lng = ord(u.get1())
+    else:
+        lng = u.get_lng()
+    s = u.get(lng)
+    w_u = u.space.newunicode(s.decode('latin-1'))
+    if interned:
+        w_u = u.space.new_interned_w_str(w_u)
+    return w_u
+
+@unmarshaller(TYPE_ASCII)    # nb. never generated by pypy so far
+def unmarshal_ascii(space, u, tc):
+    return _unmarshal_ascii(u, False, False)
+@unmarshaller(TYPE_ASCII_INTERNED)
+def unmarshal_ascii(space, u, tc):
+    return _unmarshal_ascii(u, False, True)
+@unmarshaller(TYPE_SHORT_ASCII)
+def unmarshal_ascii(space, u, tc):
+    return _unmarshal_ascii(u, True, False)
+@unmarshaller(TYPE_SHORT_ASCII_INTERNED)
+def unmarshal_ascii(space, u, tc):
+    return _unmarshal_ascii(u, True, True)
+
 
 @marshaller(W_SetObject)
 def marshal_set(space, w_set, m):
-    lis_w = space.fixedview(w_set)
-    m.put_tuple_w(TYPE_SET, lis_w)
+    typecode = write_ref(TYPE_SET, w_set, m)
+    if typecode != FLAG_DONE:
+        lis_w = space.fixedview(w_set)
+        m.put_tuple_w(typecode, lis_w)
 
-@unmarshaller(TYPE_SET)
+@unmarshaller(TYPE_SET, save_ref=True)
 def unmarshal_set(space, u, tc):
-    return unmarshal_set_frozenset(space, u, tc)
+    w_set = space.call_function(space.w_set)
+    u.save_ref(tc, w_set)
+    _unmarshal_set_frozenset(space, u, w_set)
+    return w_set
 
 
 @marshaller(W_FrozensetObject)
 def marshal_frozenset(space, w_frozenset, m):
-    lis_w = space.fixedview(w_frozenset)
-    m.put_tuple_w(TYPE_FROZENSET, lis_w)
+    typecode = write_ref(TYPE_FROZENSET, w_frozenset, m)
+    if typecode != FLAG_DONE:
+        lis_w = space.fixedview(w_frozenset)
+        m.put_tuple_w(typecode, lis_w)
 
-def unmarshal_set_frozenset(space, u, tc):
+def _unmarshal_set_frozenset(space, u, w_set):
     lng = u.get_lng()
-    w_set = space.call_function(space.w_set)
     for i in xrange(lng):
         w_obj = u.get_w_obj()
         space.call_method(w_set, "add", w_obj)
-    if tc == TYPE_FROZENSET:
-        w_set = space.call_function(space.w_frozenset, w_set)
-    return w_set
 
 @unmarshaller(TYPE_FROZENSET)
 def unmarshal_frozenset(space, u, tc):
-    return unmarshal_set_frozenset(space, u, tc)
+    w_set = space.call_function(space.w_set)
+    _unmarshal_set_frozenset(space, u, w_set)
+    return space.call_function(space.w_frozenset, w_set)
+
+
+@unmarshaller(TYPE_REF)
+def unmarshal_ref(space, u, tc):
+    index = u.get_lng()
+    if 0 <= index < len(u.refs_w):
+        w_obj = u.refs_w[index]
+    else:
+        w_obj = None
+    if w_obj is None:
+        raise oefmt(space.w_ValueError, "bad marshal data (invalid reference)")
+    return w_obj
 
 
 _marshallers_unroll = unrolling_iterable(_marshallers)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy py3.5: hg merge py3.5-marshal3

Reply via email to