Author: Armin Rigo <[email protected]>
Branch: intern-not-immortal
Changeset: r74610:b866d4c8dbd1
Date: 2014-11-20 13:16 +0100
http://bitbucket.org/pypy/pypy/changeset/b866d4c8dbd1/

Log:    Try to fix marshal's handling of interned strings

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -796,6 +796,12 @@
             self.interned_strings.set(s, w_s1)
         return w_s1
 
+    def is_interned_str(self, s):
+        # interface for marshal_impl
+        if not we_are_translated():
+            assert type(s) is str
+        return self.interned_strings.get(s) is not None
+
     def descr_self_interp_w(self, RequiredClass, w_obj):
         if not isinstance(w_obj, RequiredClass):
             raise DescrMismatch()
diff --git a/pypy/module/marshal/interp_marshal.py 
b/pypy/module/marshal/interp_marshal.py
--- a/pypy/module/marshal/interp_marshal.py
+++ b/pypy/module/marshal/interp_marshal.py
@@ -144,7 +144,6 @@
     atom_int(tc, int)           puts code and int
     atom_int64(tc, int64)       puts code and int64
     atom_str(tc, str)           puts code, len and string
-    atom_strlist(tc, strlist)   puts code, len and list of strings
 
     building blocks for compound types:
 
@@ -198,15 +197,6 @@
         self.atom_int(typecode, len(x))
         self.put(x)
 
-    def atom_strlist(self, typecode, tc2, x):
-        self.atom_int(typecode, len(x))
-        atom_str = self.atom_str
-        for item in x:
-            # type(str) seems to be forbidden
-            #if type(item) is not str:
-            #    self.raise_exc('object with wrong type in strlist')
-            atom_str(tc2, item)
-
     def start(self, typecode):
         # type(char) not supported
         self.put(typecode)
@@ -379,16 +369,6 @@
         self.start(typecode)
         return self.get_lng()
 
-    def atom_strlist(self, typecode, tc2):
-        self.start(typecode)
-        lng = self.get_lng()
-        res = [None] * lng
-        idx = 0
-        while idx < lng:
-            res[idx] = self.atom_str(tc2)
-            idx += 1
-        return res
-
     def start(self, typecode):
         tc = self.get1()
         if tc != typecode:
@@ -436,7 +416,6 @@
 
     def get_w_obj(self, allow_null=False):
         space = self.space
-        w_ret = space.w_None # something not None
         tc = self.get1()
         w_ret = self._dispatch[ord(tc)](space, self, tc)
         if w_ret is None and not allow_null:
diff --git a/pypy/objspace/std/marshal_impl.py 
b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -244,26 +244,19 @@
     return space.newcomplex(real, imag)
 
 
-# XXX currently, intern() is at applevel,
-# and there is no interface to get at the
-# internal table.
-# Move intern to interplevel and add a flag
-# to strings.
-def PySTRING_CHECK_INTERNED(w_str):
-    return False
-
 @marshaller(W_BytesObject)
 def marshal_bytes(space, w_str, m):
     s = space.str_w(w_str)
-    if m.version >= 1 and PySTRING_CHECK_INTERNED(w_str):
+    if m.version >= 1 and space.is_interned_str(s):
         # we use a native rtyper stringdict for speed
-        idx = m.stringtable.get(s, -1)
-        if idx >= 0:
-            m.atom_int(TYPE_STRINGREF, idx)
-        else:
+        try:
+            idx = m.stringtable[s]
+        except KeyError:
             idx = len(m.stringtable)
             m.stringtable[s] = idx
             m.atom_str(TYPE_INTERNED, s)
+        else:
+            m.atom_int(TYPE_STRINGREF, idx)
     else:
         m.atom_str(TYPE_STRING, s)
 
@@ -273,10 +266,8 @@
 
 @unmarshaller(TYPE_INTERNED)
 def unmarshal_interned(space, u, tc):
-    w_ret = space.wrap(u.get_str())
+    w_ret = space.new_interned_str(u.get_str())
     u.stringtable_w.append(w_ret)
-    w_intern = space.builtin.get('intern')
-    space.call_function(w_intern, w_ret)
     return w_ret
 
 @unmarshaller(TYPE_STRINGREF)
@@ -338,6 +329,12 @@
     return None
 
 
+def _put_interned_str_list(space, m, strlist):
+    lst = [None] * len(strlist)
+    for i in range(len(strlist)):
+        lst[i] = space.new_interned_str(strlist[i])
+    m.put_tuple_w(TYPE_TUPLE, lst)
+
 @marshaller(PyCode)
 def marshal_pycode(space, w_pycode, m):
     m.start(TYPE_CODE)
@@ -348,19 +345,18 @@
     m.put_int(x.co_stacksize)
     m.put_int(x.co_flags)
     m.atom_str(TYPE_STRING, x.co_code)
-    m.put_tuple_w(TYPE_TUPLE, x.co_consts_w[:])
-    m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, [space.str_w(w_name) for w_name 
in x.co_names_w])
-    m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, x.co_varnames)
-    m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, x.co_freevars)
-    m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, x.co_cellvars)
-    m.atom_str(TYPE_INTERNED, x.co_filename)
-    m.atom_str(TYPE_INTERNED, x.co_name)
+    m.put_tuple_w(TYPE_TUPLE, x.co_consts_w)
+    m.put_tuple_w(TYPE_TUPLE, x.co_names_w)
+    _put_interned_str_list(space, m, x.co_varnames)
+    _put_interned_str_list(space, m, x.co_freevars)
+    _put_interned_str_list(space, m, x.co_cellvars)
+    m.put_w_obj(space.new_interned_str(x.co_filename))
+    m.put_w_obj(space.new_interned_str(x.co_name))
     m.put_int(x.co_firstlineno)
     m.atom_str(TYPE_STRING, x.co_lnotab)
 
-# helper for unmarshalling string lists of code objects.
-# unfortunately they now can be interned or referenced,
-# so we no longer can handle it in interp_marshal.atom_strlist
+# helper for unmarshalling "tuple of string" objects
+# into rpython-level lists of strings.  Only for code objects.
 
 def unmarshal_str(u):
     w_obj = u.get_w_obj()
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to