Author: Armin Rigo <[email protected]>
Branch:
Changeset: r74615:2afdc25d04e2
Date: 2014-11-20 14:51 +0100
http://bitbucket.org/pypy/pypy/changeset/2afdc25d04e2/
Log: Merge intern-not-immortal: fix intern() to return mortal strings,
and try to fix things so that the AST compiler and the unmarshaller
try to produce correctly-interned strings.
diff --git a/pypy/interpreter/astcompiler/assemble.py
b/pypy/interpreter/astcompiler/assemble.py
--- a/pypy/interpreter/astcompiler/assemble.py
+++ b/pypy/interpreter/astcompiler/assemble.py
@@ -2,7 +2,7 @@
Python control flow graph generation and bytecode assembly.
"""
-from pypy.interpreter.astcompiler import ast, symtable
+from pypy.interpreter.astcompiler import ast, symtable, misc
from pypy.interpreter import pycode
from pypy.tool import stdlib_opcode as ops
@@ -365,7 +365,9 @@
raise
break
w_index = space.getitem(w_consts, w_key)
- consts_w[space.int_w(w_index)] = space.getitem(w_key, first)
+ w_constant = space.getitem(w_key, first)
+ w_constant = misc.intern_if_common_string(space, w_constant)
+ consts_w[space.int_w(w_index)] = w_constant
return consts_w
def _get_code_flags(self):
diff --git a/pypy/interpreter/astcompiler/misc.py
b/pypy/interpreter/astcompiler/misc.py
--- a/pypy/interpreter/astcompiler/misc.py
+++ b/pypy/interpreter/astcompiler/misc.py
@@ -106,3 +106,13 @@
except IndexError:
return name
return "_%s%s" % (klass[i:], name)
+
+
+def intern_if_common_string(space, w_const):
+ # only intern identifier-like strings
+ if not space.is_w(space.type(w_const), space.w_str):
+ return w_const
+ for c in space.str_w(w_const):
+ if not (c.isalnum() or c == '_'):
+ return w_const
+ return space.new_interned_w_str(w_const)
diff --git a/pypy/interpreter/astcompiler/optimize.py
b/pypy/interpreter/astcompiler/optimize.py
--- a/pypy/interpreter/astcompiler/optimize.py
+++ b/pypy/interpreter/astcompiler/optimize.py
@@ -272,6 +272,11 @@
if w_const is None:
return tup
consts_w[i] = w_const
+ # intern the string constants packed into the tuple here,
+ # because assemble.py will see the result as just a tuple constant
+ for i in range(len(consts_w)):
+ consts_w[i] = misc.intern_if_common_string(
+ self.space, consts_w[i])
else:
consts_w = []
w_consts = self.space.newtuple(consts_w)
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -14,7 +14,7 @@
UserDelAction)
from pypy.interpreter.error import OperationError, new_exception_class, oefmt
from pypy.interpreter.argument import Arguments
-from pypy.interpreter.miscutils import ThreadLocals
+from pypy.interpreter.miscutils import ThreadLocals, make_weak_value_dictionary
__all__ = ['ObjSpace', 'OperationError', 'W_Root']
@@ -384,7 +384,7 @@
self.builtin_modules = {}
self.reloading_modules = {}
- self.interned_strings = {}
+ self.interned_strings = make_weak_value_dictionary(self, str, W_Root)
self.actionflag = ActionFlag() # changed by the signal module
self.check_signal_action = None # changed by the signal module
self.user_del_action = UserDelAction(self)
@@ -777,25 +777,30 @@
return self.w_False
def new_interned_w_str(self, w_s):
+ assert isinstance(w_s, W_Root) # and is not None
s = self.str_w(w_s)
if not we_are_translated():
assert type(s) is str
- try:
- return self.interned_strings[s]
- except KeyError:
- pass
- self.interned_strings[s] = w_s
- return w_s
+ w_s1 = self.interned_strings.get(s)
+ if w_s1 is None:
+ w_s1 = w_s
+ self.interned_strings.set(s, w_s1)
+ return w_s1
def new_interned_str(self, s):
if not we_are_translated():
assert type(s) is str
- try:
- return self.interned_strings[s]
- except KeyError:
- pass
- w_s = self.interned_strings[s] = self.wrap(s)
- return w_s
+ w_s1 = self.interned_strings.get(s)
+ if w_s1 is None:
+ w_s1 = self.wrap(s)
+ self.interned_strings.set(s, w_s1)
+ return w_s1
+
+ def is_interned_str(self, s):
+ # interface for marshal_impl
+ if not we_are_translated():
+ assert type(s) is str
+ return self.interned_strings.get(s) is not None
def descr_self_interp_w(self, RequiredClass, w_obj):
if not isinstance(w_obj, RequiredClass):
diff --git a/pypy/interpreter/miscutils.py b/pypy/interpreter/miscutils.py
--- a/pypy/interpreter/miscutils.py
+++ b/pypy/interpreter/miscutils.py
@@ -31,3 +31,19 @@
def getallvalues(self):
return {0: self._value}
+
+
+def make_weak_value_dictionary(space, keytype, valuetype):
+ "NOT_RPYTHON"
+ if space.config.translation.rweakref:
+ from rpython.rlib.rweakref import RWeakValueDictionary
+ return RWeakValueDictionary(keytype, valuetype)
+ else:
+ class FakeWeakValueDict(object):
+ def __init__(self):
+ self._dict = {}
+ def get(self, key):
+ return self._dict.get(key, None)
+ def set(self, key, value):
+ self._dict[key] = value
+ return FakeWeakValueDict()
diff --git a/pypy/interpreter/test/test_compiler.py
b/pypy/interpreter/test/test_compiler.py
--- a/pypy/interpreter/test/test_compiler.py
+++ b/pypy/interpreter/test/test_compiler.py
@@ -970,7 +970,12 @@
sys.stdout = out
output = s.getvalue()
assert "CALL_METHOD" in output
-
+
+ def test_interned_strings(self):
+ source = """x = ('foo_bar42', 5); y = 'foo_bar42'; z = x[0]"""
+ exec source
+ assert y is z
+
class AppTestExceptions:
def test_indentation_error(self):
diff --git a/pypy/interpreter/test/test_objspace.py
b/pypy/interpreter/test/test_objspace.py
--- a/pypy/interpreter/test/test_objspace.py
+++ b/pypy/interpreter/test/test_objspace.py
@@ -378,3 +378,41 @@
assert space.str_w(space.getattr(space.sys, w_executable)) == 'foobar'
space.startup()
assert space.str_w(space.getattr(space.sys, w_executable)) == 'foobar'
+
+ def test_interned_strings_are_weak(self):
+ import weakref, gc, random
+ space = self.space
+ assert space.config.translation.rweakref
+ w1 = space.new_interned_str("abcdef")
+ w2 = space.new_interned_str("abcdef")
+ assert w2 is w1
+ #
+ # check that 'w1' goes away if we don't hold a reference to it
+ rw1 = weakref.ref(w1)
+ del w1, w2
+ i = 10
+ while rw1() is not None:
+ i -= 1
+ assert i >= 0
+ gc.collect()
+ #
+ s = "foobar%r" % random.random()
+ w0 = space.wrap(s)
+ w1 = space.new_interned_w_str(w0)
+ assert w1 is w0
+ w2 = space.new_interned_w_str(w0)
+ assert w2 is w0
+ w3 = space.wrap(s)
+ assert w3 is not w0
+ w4 = space.new_interned_w_str(w3)
+ assert w4 is w0
+ #
+ # check that 'w0' goes away if we don't hold a reference to it
+ # (even if we hold a reference to 'w3')
+ rw0 = weakref.ref(w0)
+ del w0, w1, w2, w4
+ i = 10
+ while rw0() is not None:
+ i -= 1
+ assert i >= 0
+ gc.collect()
diff --git a/pypy/module/marshal/interp_marshal.py
b/pypy/module/marshal/interp_marshal.py
--- a/pypy/module/marshal/interp_marshal.py
+++ b/pypy/module/marshal/interp_marshal.py
@@ -144,7 +144,6 @@
atom_int(tc, int) puts code and int
atom_int64(tc, int64) puts code and int64
atom_str(tc, str) puts code, len and string
- atom_strlist(tc, strlist) puts code, len and list of strings
building blocks for compound types:
@@ -198,15 +197,6 @@
self.atom_int(typecode, len(x))
self.put(x)
- def atom_strlist(self, typecode, tc2, x):
- self.atom_int(typecode, len(x))
- atom_str = self.atom_str
- for item in x:
- # type(str) seems to be forbidden
- #if type(item) is not str:
- # self.raise_exc('object with wrong type in strlist')
- atom_str(tc2, item)
-
def start(self, typecode):
# type(char) not supported
self.put(typecode)
@@ -379,16 +369,6 @@
self.start(typecode)
return self.get_lng()
- def atom_strlist(self, typecode, tc2):
- self.start(typecode)
- lng = self.get_lng()
- res = [None] * lng
- idx = 0
- while idx < lng:
- res[idx] = self.atom_str(tc2)
- idx += 1
- return res
-
def start(self, typecode):
tc = self.get1()
if tc != typecode:
@@ -436,7 +416,6 @@
def get_w_obj(self, allow_null=False):
space = self.space
- w_ret = space.w_None # something not None
tc = self.get1()
w_ret = self._dispatch[ord(tc)](space, self, tc)
if w_ret is None and not allow_null:
diff --git a/pypy/objspace/std/marshal_impl.py
b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -244,26 +244,19 @@
return space.newcomplex(real, imag)
-# XXX currently, intern() is at applevel,
-# and there is no interface to get at the
-# internal table.
-# Move intern to interplevel and add a flag
-# to strings.
-def PySTRING_CHECK_INTERNED(w_str):
- return False
-
@marshaller(W_BytesObject)
def marshal_bytes(space, w_str, m):
s = space.str_w(w_str)
- if m.version >= 1 and PySTRING_CHECK_INTERNED(w_str):
+ if m.version >= 1 and space.is_interned_str(s):
# we use a native rtyper stringdict for speed
- idx = m.stringtable.get(s, -1)
- if idx >= 0:
- m.atom_int(TYPE_STRINGREF, idx)
- else:
+ try:
+ idx = m.stringtable[s]
+ except KeyError:
idx = len(m.stringtable)
m.stringtable[s] = idx
m.atom_str(TYPE_INTERNED, s)
+ else:
+ m.atom_int(TYPE_STRINGREF, idx)
else:
m.atom_str(TYPE_STRING, s)
@@ -273,10 +266,8 @@
@unmarshaller(TYPE_INTERNED)
def unmarshal_interned(space, u, tc):
- w_ret = space.wrap(u.get_str())
+ w_ret = space.new_interned_str(u.get_str())
u.stringtable_w.append(w_ret)
- w_intern = space.builtin.get('intern')
- space.call_function(w_intern, w_ret)
return w_ret
@unmarshaller(TYPE_STRINGREF)
@@ -338,6 +329,12 @@
return None
+def _put_interned_str_list(space, m, strlist):
+ lst = [None] * len(strlist)
+ for i in range(len(strlist)):
+ lst[i] = space.new_interned_str(strlist[i])
+ m.put_tuple_w(TYPE_TUPLE, lst)
+
@marshaller(PyCode)
def marshal_pycode(space, w_pycode, m):
m.start(TYPE_CODE)
@@ -348,19 +345,18 @@
m.put_int(x.co_stacksize)
m.put_int(x.co_flags)
m.atom_str(TYPE_STRING, x.co_code)
- m.put_tuple_w(TYPE_TUPLE, x.co_consts_w[:])
- m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, [space.str_w(w_name) for w_name
in x.co_names_w])
- m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, x.co_varnames)
- m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, x.co_freevars)
- m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, x.co_cellvars)
- m.atom_str(TYPE_INTERNED, x.co_filename)
- m.atom_str(TYPE_INTERNED, x.co_name)
+ m.put_tuple_w(TYPE_TUPLE, x.co_consts_w)
+ m.put_tuple_w(TYPE_TUPLE, x.co_names_w)
+ _put_interned_str_list(space, m, x.co_varnames)
+ _put_interned_str_list(space, m, x.co_freevars)
+ _put_interned_str_list(space, m, x.co_cellvars)
+ m.put_w_obj(space.new_interned_str(x.co_filename))
+ m.put_w_obj(space.new_interned_str(x.co_name))
m.put_int(x.co_firstlineno)
m.atom_str(TYPE_STRING, x.co_lnotab)
-# helper for unmarshalling string lists of code objects.
-# unfortunately they now can be interned or referenced,
-# so we no longer can handle it in interp_marshal.atom_strlist
+# helper for unmarshalling "tuple of string" objects
+# into rpython-level lists of strings. Only for code objects.
def unmarshal_str(u):
w_obj = u.get_w_obj()
diff --git a/rpython/rlib/rweakref.py b/rpython/rlib/rweakref.py
--- a/rpython/rlib/rweakref.py
+++ b/rpython/rlib/rweakref.py
@@ -105,7 +105,7 @@
rtyper.getrepr(self.s_key))
def rtyper_makekey(self):
- return self.__class__,
+ return self.__class__, self.s_key.rtyper_makekey(), self.valueclassdef
def method_get(self, s_key):
return annmodel.SomeInstance(self.valueclassdef, can_be_None=True)
@@ -165,7 +165,7 @@
return _rweakkeydict.WeakKeyDictRepr(rtyper)
def rtyper_makekey(self):
- return self.__class__,
+ return self.__class__, self.keyclassdef, self.valueclassdef
def method_get(self, s_key):
assert isinstance(s_key, annmodel.SomeInstance)
diff --git a/rpython/rlib/test/test_rweakvaldict.py
b/rpython/rlib/test/test_rweakvaldict.py
--- a/rpython/rlib/test/test_rweakvaldict.py
+++ b/rpython/rlib/test/test_rweakvaldict.py
@@ -144,3 +144,13 @@
d = RWeakValueDictionary(str, Y)
d.set("x", X())
py.test.raises(Exception, interpret, g, [1])
+
+
+def test_bogus_makekey():
+ class X: pass
+ class Y: pass
+ def g():
+ X(); Y()
+ RWeakValueDictionary(str, X).get("foobar")
+ RWeakValueDictionary(int, Y).get(42)
+ interpret(g, [])
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit