Author: Armin Rigo <ar...@tunes.org>
Branch: 
Changeset: r74615:2afdc25d04e2
Date: 2014-11-20 14:51 +0100
http://bitbucket.org/pypy/pypy/changeset/2afdc25d04e2/

Log:    Merge intern-not-immortal: fix intern() to return mortal strings,
        and try to fix things so that the AST compiler and the unmarshaller
        try to produce correctly-interned strings.

diff --git a/pypy/interpreter/astcompiler/assemble.py 
b/pypy/interpreter/astcompiler/assemble.py
--- a/pypy/interpreter/astcompiler/assemble.py
+++ b/pypy/interpreter/astcompiler/assemble.py
@@ -2,7 +2,7 @@
 Python control flow graph generation and bytecode assembly.
 """
 
-from pypy.interpreter.astcompiler import ast, symtable
+from pypy.interpreter.astcompiler import ast, symtable, misc
 from pypy.interpreter import pycode
 from pypy.tool import stdlib_opcode as ops
 
@@ -365,7 +365,9 @@
                     raise
                 break
             w_index = space.getitem(w_consts, w_key)
-            consts_w[space.int_w(w_index)] = space.getitem(w_key, first)
+            w_constant = space.getitem(w_key, first)
+            w_constant = misc.intern_if_common_string(space, w_constant)
+            consts_w[space.int_w(w_index)] = w_constant
         return consts_w
 
     def _get_code_flags(self):
diff --git a/pypy/interpreter/astcompiler/misc.py 
b/pypy/interpreter/astcompiler/misc.py
--- a/pypy/interpreter/astcompiler/misc.py
+++ b/pypy/interpreter/astcompiler/misc.py
@@ -106,3 +106,13 @@
     except IndexError:
         return name
     return "_%s%s" % (klass[i:], name)
+
+
+def intern_if_common_string(space, w_const):
+    # only intern identifier-like strings
+    if not space.is_w(space.type(w_const), space.w_str):
+        return w_const
+    for c in space.str_w(w_const):
+        if not (c.isalnum() or c == '_'):
+            return w_const
+    return space.new_interned_w_str(w_const)
diff --git a/pypy/interpreter/astcompiler/optimize.py 
b/pypy/interpreter/astcompiler/optimize.py
--- a/pypy/interpreter/astcompiler/optimize.py
+++ b/pypy/interpreter/astcompiler/optimize.py
@@ -272,6 +272,11 @@
                 if w_const is None:
                     return tup
                 consts_w[i] = w_const
+            # intern the string constants packed into the tuple here,
+            # because assemble.py will see the result as just a tuple constant
+            for i in range(len(consts_w)):
+                consts_w[i] = misc.intern_if_common_string(
+                    self.space, consts_w[i])
         else:
             consts_w = []
         w_consts = self.space.newtuple(consts_w)
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -14,7 +14,7 @@
     UserDelAction)
 from pypy.interpreter.error import OperationError, new_exception_class, oefmt
 from pypy.interpreter.argument import Arguments
-from pypy.interpreter.miscutils import ThreadLocals
+from pypy.interpreter.miscutils import ThreadLocals, make_weak_value_dictionary
 
 
 __all__ = ['ObjSpace', 'OperationError', 'W_Root']
@@ -384,7 +384,7 @@
         self.builtin_modules = {}
         self.reloading_modules = {}
 
-        self.interned_strings = {}
+        self.interned_strings = make_weak_value_dictionary(self, str, W_Root)
         self.actionflag = ActionFlag()    # changed by the signal module
         self.check_signal_action = None   # changed by the signal module
         self.user_del_action = UserDelAction(self)
@@ -777,25 +777,30 @@
             return self.w_False
 
     def new_interned_w_str(self, w_s):
+        assert isinstance(w_s, W_Root)   # and is not None
         s = self.str_w(w_s)
         if not we_are_translated():
             assert type(s) is str
-        try:
-            return self.interned_strings[s]
-        except KeyError:
-            pass
-        self.interned_strings[s] = w_s
-        return w_s
+        w_s1 = self.interned_strings.get(s)
+        if w_s1 is None:
+            w_s1 = w_s
+            self.interned_strings.set(s, w_s1)
+        return w_s1
 
     def new_interned_str(self, s):
         if not we_are_translated():
             assert type(s) is str
-        try:
-            return self.interned_strings[s]
-        except KeyError:
-            pass
-        w_s = self.interned_strings[s] = self.wrap(s)
-        return w_s
+        w_s1 = self.interned_strings.get(s)
+        if w_s1 is None:
+            w_s1 = self.wrap(s)
+            self.interned_strings.set(s, w_s1)
+        return w_s1
+
+    def is_interned_str(self, s):
+        # interface for marshal_impl
+        if not we_are_translated():
+            assert type(s) is str
+        return self.interned_strings.get(s) is not None
 
     def descr_self_interp_w(self, RequiredClass, w_obj):
         if not isinstance(w_obj, RequiredClass):
diff --git a/pypy/interpreter/miscutils.py b/pypy/interpreter/miscutils.py
--- a/pypy/interpreter/miscutils.py
+++ b/pypy/interpreter/miscutils.py
@@ -31,3 +31,19 @@
 
     def getallvalues(self):
         return {0: self._value}
+
+
+def make_weak_value_dictionary(space, keytype, valuetype):
+    "NOT_RPYTHON"
+    if space.config.translation.rweakref:
+        from rpython.rlib.rweakref import RWeakValueDictionary
+        return RWeakValueDictionary(keytype, valuetype)
+    else:
+        class FakeWeakValueDict(object):
+            def __init__(self):
+                self._dict = {}
+            def get(self, key):
+                return self._dict.get(key, None)
+            def set(self, key, value):
+                self._dict[key] = value
+        return FakeWeakValueDict()
diff --git a/pypy/interpreter/test/test_compiler.py 
b/pypy/interpreter/test/test_compiler.py
--- a/pypy/interpreter/test/test_compiler.py
+++ b/pypy/interpreter/test/test_compiler.py
@@ -970,7 +970,12 @@
             sys.stdout = out
         output = s.getvalue()
         assert "CALL_METHOD" in output
-            
+
+    def test_interned_strings(self):
+        source = """x = ('foo_bar42', 5); y = 'foo_bar42'; z = x[0]"""
+        exec source
+        assert y is z
+
 
 class AppTestExceptions:
     def test_indentation_error(self):
diff --git a/pypy/interpreter/test/test_objspace.py 
b/pypy/interpreter/test/test_objspace.py
--- a/pypy/interpreter/test/test_objspace.py
+++ b/pypy/interpreter/test/test_objspace.py
@@ -378,3 +378,41 @@
         assert space.str_w(space.getattr(space.sys, w_executable)) == 'foobar'
         space.startup()
         assert space.str_w(space.getattr(space.sys, w_executable)) == 'foobar'
+
+    def test_interned_strings_are_weak(self):
+        import weakref, gc, random
+        space = self.space
+        assert space.config.translation.rweakref
+        w1 = space.new_interned_str("abcdef")
+        w2 = space.new_interned_str("abcdef")
+        assert w2 is w1
+        #
+        # check that 'w1' goes away if we don't hold a reference to it
+        rw1 = weakref.ref(w1)
+        del w1, w2
+        i = 10
+        while rw1() is not None:
+            i -= 1
+            assert i >= 0
+            gc.collect()
+        #
+        s = "foobar%r" % random.random()
+        w0 = space.wrap(s)
+        w1 = space.new_interned_w_str(w0)
+        assert w1 is w0
+        w2 = space.new_interned_w_str(w0)
+        assert w2 is w0
+        w3 = space.wrap(s)
+        assert w3 is not w0
+        w4 = space.new_interned_w_str(w3)
+        assert w4 is w0
+        #
+        # check that 'w0' goes away if we don't hold a reference to it
+        # (even if we hold a reference to 'w3')
+        rw0 = weakref.ref(w0)
+        del w0, w1, w2, w4
+        i = 10
+        while rw0() is not None:
+            i -= 1
+            assert i >= 0
+            gc.collect()
diff --git a/pypy/module/marshal/interp_marshal.py 
b/pypy/module/marshal/interp_marshal.py
--- a/pypy/module/marshal/interp_marshal.py
+++ b/pypy/module/marshal/interp_marshal.py
@@ -144,7 +144,6 @@
     atom_int(tc, int)           puts code and int
     atom_int64(tc, int64)       puts code and int64
     atom_str(tc, str)           puts code, len and string
-    atom_strlist(tc, strlist)   puts code, len and list of strings
 
     building blocks for compound types:
 
@@ -198,15 +197,6 @@
         self.atom_int(typecode, len(x))
         self.put(x)
 
-    def atom_strlist(self, typecode, tc2, x):
-        self.atom_int(typecode, len(x))
-        atom_str = self.atom_str
-        for item in x:
-            # type(str) seems to be forbidden
-            #if type(item) is not str:
-            #    self.raise_exc('object with wrong type in strlist')
-            atom_str(tc2, item)
-
     def start(self, typecode):
         # type(char) not supported
         self.put(typecode)
@@ -379,16 +369,6 @@
         self.start(typecode)
         return self.get_lng()
 
-    def atom_strlist(self, typecode, tc2):
-        self.start(typecode)
-        lng = self.get_lng()
-        res = [None] * lng
-        idx = 0
-        while idx < lng:
-            res[idx] = self.atom_str(tc2)
-            idx += 1
-        return res
-
     def start(self, typecode):
         tc = self.get1()
         if tc != typecode:
@@ -436,7 +416,6 @@
 
     def get_w_obj(self, allow_null=False):
         space = self.space
-        w_ret = space.w_None # something not None
         tc = self.get1()
         w_ret = self._dispatch[ord(tc)](space, self, tc)
         if w_ret is None and not allow_null:
diff --git a/pypy/objspace/std/marshal_impl.py 
b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -244,26 +244,19 @@
     return space.newcomplex(real, imag)
 
 
-# XXX currently, intern() is at applevel,
-# and there is no interface to get at the
-# internal table.
-# Move intern to interplevel and add a flag
-# to strings.
-def PySTRING_CHECK_INTERNED(w_str):
-    return False
-
 @marshaller(W_BytesObject)
 def marshal_bytes(space, w_str, m):
     s = space.str_w(w_str)
-    if m.version >= 1 and PySTRING_CHECK_INTERNED(w_str):
+    if m.version >= 1 and space.is_interned_str(s):
         # we use a native rtyper stringdict for speed
-        idx = m.stringtable.get(s, -1)
-        if idx >= 0:
-            m.atom_int(TYPE_STRINGREF, idx)
-        else:
+        try:
+            idx = m.stringtable[s]
+        except KeyError:
             idx = len(m.stringtable)
             m.stringtable[s] = idx
             m.atom_str(TYPE_INTERNED, s)
+        else:
+            m.atom_int(TYPE_STRINGREF, idx)
     else:
         m.atom_str(TYPE_STRING, s)
 
@@ -273,10 +266,8 @@
 
 @unmarshaller(TYPE_INTERNED)
 def unmarshal_interned(space, u, tc):
-    w_ret = space.wrap(u.get_str())
+    w_ret = space.new_interned_str(u.get_str())
     u.stringtable_w.append(w_ret)
-    w_intern = space.builtin.get('intern')
-    space.call_function(w_intern, w_ret)
     return w_ret
 
 @unmarshaller(TYPE_STRINGREF)
@@ -338,6 +329,12 @@
     return None
 
 
+def _put_interned_str_list(space, m, strlist):
+    lst = [None] * len(strlist)
+    for i in range(len(strlist)):
+        lst[i] = space.new_interned_str(strlist[i])
+    m.put_tuple_w(TYPE_TUPLE, lst)
+
 @marshaller(PyCode)
 def marshal_pycode(space, w_pycode, m):
     m.start(TYPE_CODE)
@@ -348,19 +345,18 @@
     m.put_int(x.co_stacksize)
     m.put_int(x.co_flags)
     m.atom_str(TYPE_STRING, x.co_code)
-    m.put_tuple_w(TYPE_TUPLE, x.co_consts_w[:])
-    m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, [space.str_w(w_name) for w_name 
in x.co_names_w])
-    m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, x.co_varnames)
-    m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, x.co_freevars)
-    m.atom_strlist(TYPE_TUPLE, TYPE_INTERNED, x.co_cellvars)
-    m.atom_str(TYPE_INTERNED, x.co_filename)
-    m.atom_str(TYPE_INTERNED, x.co_name)
+    m.put_tuple_w(TYPE_TUPLE, x.co_consts_w)
+    m.put_tuple_w(TYPE_TUPLE, x.co_names_w)
+    _put_interned_str_list(space, m, x.co_varnames)
+    _put_interned_str_list(space, m, x.co_freevars)
+    _put_interned_str_list(space, m, x.co_cellvars)
+    m.put_w_obj(space.new_interned_str(x.co_filename))
+    m.put_w_obj(space.new_interned_str(x.co_name))
     m.put_int(x.co_firstlineno)
     m.atom_str(TYPE_STRING, x.co_lnotab)
 
-# helper for unmarshalling string lists of code objects.
-# unfortunately they now can be interned or referenced,
-# so we no longer can handle it in interp_marshal.atom_strlist
+# helper for unmarshalling "tuple of string" objects
+# into rpython-level lists of strings.  Only for code objects.
 
 def unmarshal_str(u):
     w_obj = u.get_w_obj()
diff --git a/rpython/rlib/rweakref.py b/rpython/rlib/rweakref.py
--- a/rpython/rlib/rweakref.py
+++ b/rpython/rlib/rweakref.py
@@ -105,7 +105,7 @@
                                                rtyper.getrepr(self.s_key))
 
     def rtyper_makekey(self):
-        return self.__class__,
+        return self.__class__, self.s_key.rtyper_makekey(), self.valueclassdef
 
     def method_get(self, s_key):
         return annmodel.SomeInstance(self.valueclassdef, can_be_None=True)
@@ -165,7 +165,7 @@
         return _rweakkeydict.WeakKeyDictRepr(rtyper)
 
     def rtyper_makekey(self):
-        return self.__class__,
+        return self.__class__, self.keyclassdef, self.valueclassdef
 
     def method_get(self, s_key):
         assert isinstance(s_key, annmodel.SomeInstance)
diff --git a/rpython/rlib/test/test_rweakvaldict.py 
b/rpython/rlib/test/test_rweakvaldict.py
--- a/rpython/rlib/test/test_rweakvaldict.py
+++ b/rpython/rlib/test/test_rweakvaldict.py
@@ -144,3 +144,13 @@
             d = RWeakValueDictionary(str, Y)
         d.set("x", X())
     py.test.raises(Exception, interpret, g, [1])
+
+
+def test_bogus_makekey():
+    class X: pass
+    class Y: pass
+    def g():
+        X(); Y()
+        RWeakValueDictionary(str, X).get("foobar")
+        RWeakValueDictionary(int, Y).get(42)
+    interpret(g, [])
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to