Author: Armin Rigo <[email protected]>
Branch: intern-not-immortal
Changeset: r74610:b866d4c8dbd1
Date: 2014-11-20 13:16 +0100
http://bitbucket.org/pypy/pypy/changeset/b866d4c8dbd1/
Log: Try to fix marshal's handling of interned strings
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -796,6 +796,12 @@
self.interned_strings.set(s, w_s1)
return w_s1
+ def is_interned_str(self, s):
+ # interface for marshal_impl
+ if not we_are_translated():
+ assert type(s) is str
+ return self.interned_strings.get(s) is not None
+
def descr_self_interp_w(self, RequiredClass, w_obj):
if not isinstance(w_obj, RequiredClass):
raise DescrMismatch()
diff --git a/pypy/module/marshal/interp_marshal.py
b/pypy/module/marshal/interp_marshal.py
--- a/pypy/module/marshal/interp_marshal.py
+++ b/pypy/module/marshal/interp_marshal.py
@@ -144,7 +144,6 @@
atom_int(tc, int) puts code and int
atom_int64(tc, int64) puts code and int64
atom_str(tc, str) puts code, len and string
- atom_strlist(tc, strlist) puts code, len and list of strings
building blocks for compound types:
@@ -198,15 +197,6 @@
self.atom_int(typecode, len(x))
self.put(x)
- def atom_strlist(self, typecode, tc2, x):
- self.atom_int(typecode, len(x))
- atom_str = self.atom_str
- for item in x:
- # type(str) seems to be forbidden
- #if type(item) is not str:
- # self.raise_exc('object with wrong type in strlist')
- atom_str(tc2, item)
-
def start(self, typecode):
# type(char) not supported
self.put(typecode)
@@ -379,16 +369,6 @@
self.start(typecode)
return self.get_lng()
- def atom_strlist(self, typecode, tc2):
- self.start(typecode)
- lng = self.get_lng()
- res = [None] * lng
- idx = 0
- while idx < lng:
- res[idx] = self.atom_str(tc2)
- idx += 1
- return res
-
def start(self, typecode):
tc = self.get1()
if tc != typecode:
@@ -436,7 +416,6 @@
def get_w_obj(self, allow_null=False):
space = self.space
- w_ret = space.w_None # something not None
tc = self.get1()
w_ret = self._dispatch[ord(tc)](space, self, tc)
if w_ret is None and not allow_null:
diff --git a/pypy/objspace/std/marshal_impl.py
b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -244,26 +244,19 @@
return space.newcomplex(real, imag)
-# XXX currently, intern() is at applevel,
-# and there is no interface to get at the
-# internal table.
-# Move intern to interplevel and add a flag
-# to strings.
-def PySTRING_CHECK_INTERNED(w_str):
- return False
-
@marshaller(W_BytesObject)
def marshal_bytes(space, w_str, m):
s = space.str_w(w_str)
- if m.version >= 1 and PySTRING_CHECK_INTERNED(w_str):
+ if m.version >= 1 and space.is_interned_str(s):
# we use a native rtyper stringdict for speed
- idx = m.stringtable.get(s, -1)
- if idx >= 0:
- m.atom_int(TYPE_STRINGREF, idx)
- else:
+ try:
+ idx = m.stringtable[s]
+ except KeyError:
idx = len(m.stringtable)
m.stringtable[s] = idx
m.atom_str(TYPE_INTERNED, s)
+ else:
+ m.atom_int(TYPE_STRINGREF, idx)
else:
m.atom_str(TYPE_STRING, s)
@@ -273,10 +266,8 @@
@unmarshaller(TYPE_INTERNED)
def unmarshal_interned(space, u, tc):
- w_ret = space.wrap(u.get_str())
+ w_ret = space.new_interned_str(u.get_str())
u.stringtable_w.append(w_ret)
- w_intern = space.builtin.get('intern')
- space.call_function(w_intern, w_ret)
return w_ret
@unmarshaller(TYPE_STRINGREF)
@@ -338,6 +329,12 @@
return None
+def _put_interned_str_list(space, m, strlist):
+ lst = [None] * len(strlist)
+ for i in range(len(strlist)):
+ lst[i] = space.new_interned_str(strlist[i])
+ m.put_tuple_w(TYPE_TUPLE, lst)
+
@marshaller(PyCode)
def marshal_pycode(space, w_pycode, m):
m.start(TYPE_CODE)
@@ -348,19 +345,18 @@
m.put_int(x.co_stacksize)
m.put_int(x.co_flags)
m.atom_str(TYPE_STRING, x.co_code)
- m.put_tuple_w(TYPE_TUPLE, x.co_consts_w[:])
- m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, [space.str_w(w_name) for w_name
in x.co_names_w])
- m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, x.co_varnames)
- m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, x.co_freevars)
- m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, x.co_cellvars)
- m.atom_str(TYPE_INTERNED, x.co_filename)
- m.atom_str(TYPE_INTERNED, x.co_name)
+ m.put_tuple_w(TYPE_TUPLE, x.co_consts_w)
+ m.put_tuple_w(TYPE_TUPLE, x.co_names_w)
+ _put_interned_str_list(space, m, x.co_varnames)
+ _put_interned_str_list(space, m, x.co_freevars)
+ _put_interned_str_list(space, m, x.co_cellvars)
+ m.put_w_obj(space.new_interned_str(x.co_filename))
+ m.put_w_obj(space.new_interned_str(x.co_name))
m.put_int(x.co_firstlineno)
m.atom_str(TYPE_STRING, x.co_lnotab)
-# helper for unmarshalling string lists of code objects.
-# unfortunately they now can be interned or referenced,
-# so we no longer can handle it in interp_marshal.atom_strlist
+# helper for unmarshalling "tuple of string" objects
+# into rpython-level lists of strings. Only for code objects.
def unmarshal_str(u):
w_obj = u.get_w_obj()
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit