Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r74615:2afdc25d04e2 Date: 2014-11-20 14:51 +0100 http://bitbucket.org/pypy/pypy/changeset/2afdc25d04e2/
Log: Merge intern-not-immortal: fix intern() to return mortal strings, and try to fix things so that the AST compiler and the unmarshaller try to produce correctly-interned strings. diff --git a/pypy/interpreter/astcompiler/assemble.py b/pypy/interpreter/astcompiler/assemble.py --- a/pypy/interpreter/astcompiler/assemble.py +++ b/pypy/interpreter/astcompiler/assemble.py @@ -2,7 +2,7 @@ Python control flow graph generation and bytecode assembly. """ -from pypy.interpreter.astcompiler import ast, symtable +from pypy.interpreter.astcompiler import ast, symtable, misc from pypy.interpreter import pycode from pypy.tool import stdlib_opcode as ops @@ -365,7 +365,9 @@ raise break w_index = space.getitem(w_consts, w_key) - consts_w[space.int_w(w_index)] = space.getitem(w_key, first) + w_constant = space.getitem(w_key, first) + w_constant = misc.intern_if_common_string(space, w_constant) + consts_w[space.int_w(w_index)] = w_constant return consts_w def _get_code_flags(self): diff --git a/pypy/interpreter/astcompiler/misc.py b/pypy/interpreter/astcompiler/misc.py --- a/pypy/interpreter/astcompiler/misc.py +++ b/pypy/interpreter/astcompiler/misc.py @@ -106,3 +106,13 @@ except IndexError: return name return "_%s%s" % (klass[i:], name) + + +def intern_if_common_string(space, w_const): + # only intern identifier-like strings + if not space.is_w(space.type(w_const), space.w_str): + return w_const + for c in space.str_w(w_const): + if not (c.isalnum() or c == '_'): + return w_const + return space.new_interned_w_str(w_const) diff --git a/pypy/interpreter/astcompiler/optimize.py b/pypy/interpreter/astcompiler/optimize.py --- a/pypy/interpreter/astcompiler/optimize.py +++ b/pypy/interpreter/astcompiler/optimize.py @@ -272,6 +272,11 @@ if w_const is None: return tup consts_w[i] = w_const + # intern the string constants packed into the tuple here, + # because assemble.py will see the result as just a tuple constant + for i in range(len(consts_w)): + consts_w[i] = misc.intern_if_common_string( + self.space, consts_w[i]) else: consts_w = [] w_consts = self.space.newtuple(consts_w) diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -14,7 +14,7 @@ UserDelAction) from pypy.interpreter.error import OperationError, new_exception_class, oefmt from pypy.interpreter.argument import Arguments -from pypy.interpreter.miscutils import ThreadLocals +from pypy.interpreter.miscutils import ThreadLocals, make_weak_value_dictionary __all__ = ['ObjSpace', 'OperationError', 'W_Root'] @@ -384,7 +384,7 @@ self.builtin_modules = {} self.reloading_modules = {} - self.interned_strings = {} + self.interned_strings = make_weak_value_dictionary(self, str, W_Root) self.actionflag = ActionFlag() # changed by the signal module self.check_signal_action = None # changed by the signal module self.user_del_action = UserDelAction(self) @@ -777,25 +777,30 @@ return self.w_False def new_interned_w_str(self, w_s): + assert isinstance(w_s, W_Root) # and is not None s = self.str_w(w_s) if not we_are_translated(): assert type(s) is str - try: - return self.interned_strings[s] - except KeyError: - pass - self.interned_strings[s] = w_s - return w_s + w_s1 = self.interned_strings.get(s) + if w_s1 is None: + w_s1 = w_s + self.interned_strings.set(s, w_s1) + return w_s1 def new_interned_str(self, s): if not we_are_translated(): assert type(s) is str - try: - return self.interned_strings[s] - except KeyError: - pass - w_s = self.interned_strings[s] = self.wrap(s) - return w_s + w_s1 = self.interned_strings.get(s) + if w_s1 is None: + w_s1 = self.wrap(s) + self.interned_strings.set(s, w_s1) + return w_s1 + + def is_interned_str(self, s): + # interface for marshal_impl + if not we_are_translated(): + assert type(s) is str + return self.interned_strings.get(s) is not None def descr_self_interp_w(self, RequiredClass, w_obj): if not isinstance(w_obj, RequiredClass): diff --git a/pypy/interpreter/miscutils.py b/pypy/interpreter/miscutils.py --- a/pypy/interpreter/miscutils.py +++ b/pypy/interpreter/miscutils.py @@ -31,3 +31,19 @@ def getallvalues(self): return {0: self._value} + + +def make_weak_value_dictionary(space, keytype, valuetype): + "NOT_RPYTHON" + if space.config.translation.rweakref: + from rpython.rlib.rweakref import RWeakValueDictionary + return RWeakValueDictionary(keytype, valuetype) + else: + class FakeWeakValueDict(object): + def __init__(self): + self._dict = {} + def get(self, key): + return self._dict.get(key, None) + def set(self, key, value): + self._dict[key] = value + return FakeWeakValueDict() diff --git a/pypy/interpreter/test/test_compiler.py b/pypy/interpreter/test/test_compiler.py --- a/pypy/interpreter/test/test_compiler.py +++ b/pypy/interpreter/test/test_compiler.py @@ -970,7 +970,12 @@ sys.stdout = out output = s.getvalue() assert "CALL_METHOD" in output - + + def test_interned_strings(self): + source = """x = ('foo_bar42', 5); y = 'foo_bar42'; z = x[0]""" + exec source + assert y is z + class AppTestExceptions: def test_indentation_error(self): diff --git a/pypy/interpreter/test/test_objspace.py b/pypy/interpreter/test/test_objspace.py --- a/pypy/interpreter/test/test_objspace.py +++ b/pypy/interpreter/test/test_objspace.py @@ -378,3 +378,41 @@ assert space.str_w(space.getattr(space.sys, w_executable)) == 'foobar' space.startup() assert space.str_w(space.getattr(space.sys, w_executable)) == 'foobar' + + def test_interned_strings_are_weak(self): + import weakref, gc, random + space = self.space + assert space.config.translation.rweakref + w1 = space.new_interned_str("abcdef") + w2 = space.new_interned_str("abcdef") + assert w2 is w1 + # + # check that 'w1' goes away if we don't hold a reference to it + rw1 = weakref.ref(w1) + del w1, w2 + i = 10 + while rw1() is not None: + i -= 1 + assert i >= 0 + gc.collect() + # + s = "foobar%r" % random.random() + w0 = space.wrap(s) + w1 = space.new_interned_w_str(w0) + assert w1 is w0 + w2 = space.new_interned_w_str(w0) + assert w2 is w0 + w3 = space.wrap(s) + assert w3 is not w0 + w4 = space.new_interned_w_str(w3) + assert w4 is w0 + # + # check that 'w0' goes away if we don't hold a reference to it + # (even if we hold a reference to 'w3') + rw0 = weakref.ref(w0) + del w0, w1, w2, w4 + i = 10 + while rw0() is not None: + i -= 1 + assert i >= 0 + gc.collect() diff --git a/pypy/module/marshal/interp_marshal.py b/pypy/module/marshal/interp_marshal.py --- a/pypy/module/marshal/interp_marshal.py +++ b/pypy/module/marshal/interp_marshal.py @@ -144,7 +144,6 @@ atom_int(tc, int) puts code and int atom_int64(tc, int64) puts code and int64 atom_str(tc, str) puts code, len and string - atom_strlist(tc, strlist) puts code, len and list of strings building blocks for compound types: @@ -198,15 +197,6 @@ self.atom_int(typecode, len(x)) self.put(x) - def atom_strlist(self, typecode, tc2, x): - self.atom_int(typecode, len(x)) - atom_str = self.atom_str - for item in x: - # type(str) seems to be forbidden - #if type(item) is not str: - # self.raise_exc('object with wrong type in strlist') - atom_str(tc2, item) - def start(self, typecode): # type(char) not supported self.put(typecode) @@ -379,16 +369,6 @@ self.start(typecode) return self.get_lng() - def atom_strlist(self, typecode, tc2): - self.start(typecode) - lng = self.get_lng() - res = [None] * lng - idx = 0 - while idx < lng: - res[idx] = self.atom_str(tc2) - idx += 1 - return res - def start(self, typecode): tc = self.get1() if tc != typecode: @@ -436,7 +416,6 @@ def get_w_obj(self, allow_null=False): space = self.space - w_ret = space.w_None # something not None tc = self.get1() w_ret = self._dispatch[ord(tc)](space, self, tc) if w_ret is None and not allow_null: diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py --- a/pypy/objspace/std/marshal_impl.py +++ b/pypy/objspace/std/marshal_impl.py @@ -244,26 +244,19 @@ return space.newcomplex(real, imag) -# XXX currently, intern() is at applevel, -# and there is no interface to get at the -# internal table. -# Move intern to interplevel and add a flag -# to strings. -def PySTRING_CHECK_INTERNED(w_str): - return False - @marshaller(W_BytesObject) def marshal_bytes(space, w_str, m): s = space.str_w(w_str) - if m.version >= 1 and PySTRING_CHECK_INTERNED(w_str): + if m.version >= 1 and space.is_interned_str(s): # we use a native rtyper stringdict for speed - idx = m.stringtable.get(s, -1) - if idx >= 0: - m.atom_int(TYPE_STRINGREF, idx) - else: + try: + idx = m.stringtable[s] + except KeyError: idx = len(m.stringtable) m.stringtable[s] = idx m.atom_str(TYPE_INTERNED, s) + else: + m.atom_int(TYPE_STRINGREF, idx) else: m.atom_str(TYPE_STRING, s) @@ -273,10 +266,8 @@ @unmarshaller(TYPE_INTERNED) def unmarshal_interned(space, u, tc): - w_ret = space.wrap(u.get_str()) + w_ret = space.new_interned_str(u.get_str()) u.stringtable_w.append(w_ret) - w_intern = space.builtin.get('intern') - space.call_function(w_intern, w_ret) return w_ret @unmarshaller(TYPE_STRINGREF) @@ -338,6 +329,12 @@ return None +def _put_interned_str_list(space, m, strlist): + lst = [None] * len(strlist) + for i in range(len(strlist)): + lst[i] = space.new_interned_str(strlist[i]) + m.put_tuple_w(TYPE_TUPLE, lst) + @marshaller(PyCode) def marshal_pycode(space, w_pycode, m): m.start(TYPE_CODE) @@ -348,19 +345,18 @@ m.put_int(x.co_stacksize) m.put_int(x.co_flags) m.atom_str(TYPE_STRING, x.co_code) - m.put_tuple_w(TYPE_TUPLE, x.co_consts_w[:]) - m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, [space.str_w(w_name) for w_name in x.co_names_w]) - m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, x.co_varnames) - m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, x.co_freevars) - m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, x.co_cellvars) - m.atom_str(TYPE_INTERNED, x.co_filename) - m.atom_str(TYPE_INTERNED, x.co_name) + m.put_tuple_w(TYPE_TUPLE, x.co_consts_w) + m.put_tuple_w(TYPE_TUPLE, x.co_names_w) + _put_interned_str_list(space, m, x.co_varnames) + _put_interned_str_list(space, m, x.co_freevars) + _put_interned_str_list(space, m, x.co_cellvars) + m.put_w_obj(space.new_interned_str(x.co_filename)) + m.put_w_obj(space.new_interned_str(x.co_name)) m.put_int(x.co_firstlineno) m.atom_str(TYPE_STRING, x.co_lnotab) -# helper for unmarshalling string lists of code objects. -# unfortunately they now can be interned or referenced, -# so we no longer can handle it in interp_marshal.atom_strlist +# helper for unmarshalling "tuple of string" objects +# into rpython-level lists of strings. Only for code objects. def unmarshal_str(u): w_obj = u.get_w_obj() diff --git a/rpython/rlib/rweakref.py b/rpython/rlib/rweakref.py --- a/rpython/rlib/rweakref.py +++ b/rpython/rlib/rweakref.py @@ -105,7 +105,7 @@ rtyper.getrepr(self.s_key)) def rtyper_makekey(self): - return self.__class__, + return self.__class__, self.s_key.rtyper_makekey(), self.valueclassdef def method_get(self, s_key): return annmodel.SomeInstance(self.valueclassdef, can_be_None=True) @@ -165,7 +165,7 @@ return _rweakkeydict.WeakKeyDictRepr(rtyper) def rtyper_makekey(self): - return self.__class__, + return self.__class__, self.keyclassdef, self.valueclassdef def method_get(self, s_key): assert isinstance(s_key, annmodel.SomeInstance) diff --git a/rpython/rlib/test/test_rweakvaldict.py b/rpython/rlib/test/test_rweakvaldict.py --- a/rpython/rlib/test/test_rweakvaldict.py +++ b/rpython/rlib/test/test_rweakvaldict.py @@ -144,3 +144,13 @@ d = RWeakValueDictionary(str, Y) d.set("x", X()) py.test.raises(Exception, interpret, g, [1]) + + +def test_bogus_makekey(): + class X: pass + class Y: pass + def g(): + X(); Y() + RWeakValueDictionary(str, X).get("foobar") + RWeakValueDictionary(int, Y).get(42) + interpret(g, []) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit