[pypy-commit] pypy rdict-experiments-2: in-progress of rweakkeydict

fijal Sun, 06 Jan 2013 15:22:11 -0800

Author: Maciej Fijalkowski <[email protected]>
Branch: rdict-experiments-2
Changeset: r59827:1fb600db1932
Date: 2013-01-07 01:20 +0200
http://bitbucket.org/pypy/pypy/changeset/1fb600db1932/


Log:    in-progress of rweakkeydict

diff --git a/pypy/rlib/_rweakkeydict.py b/pypy/rlib/_rweakkeydict.py
--- a/pypy/rlib/_rweakkeydict.py
+++ b/pypy/rlib/_rweakkeydict.py
@@ -1,13 +1,13 @@
 from pypy.objspace.flow.model import Constant
 from pypy.rpython.lltypesystem import lltype, llmemory, rclass, rdict
+from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rpython.lltypesystem.llmemory import weakref_create, weakref_deref
-from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rpython.rclass import getinstancerepr
 from pypy.rpython.rmodel import Repr
 from pypy.rlib.rweakref import RWeakKeyDictionary
 from pypy.rlib import jit
+from pypy.rlib.debug import ll_assert
 from pypy.rlib.objectmodel import compute_identity_hash
-from pypy.rlib.objectmodel import we_are_translated
 
 
 # Warning: this implementation of RWeakKeyDictionary is not exactly
@@ -25,7 +25,7 @@
 
     def convert_const(self, weakdict):
         if not isinstance(weakdict, RWeakKeyDictionary):
-            raise TyperError("expected an RWeakKeyDictionary: %r" % (
+            raise TypeError("expected an RWeakKeyDictionary: %r" % (
                 weakdict,))
         try:
             key = Constant(weakdict)
@@ -33,7 +33,7 @@
         except KeyError:
             self.setup()
             if weakdict.length() != 0:
-                raise TyperError("got a non-empty prebuilt RWeakKeyDictionary")
+                raise TypeError("got a non-empty prebuilt RWeakKeyDictionary")
             l_dict = ll_new_weakdict()
             self.dict_cache[key] = l_dict
             return l_dict
@@ -83,8 +83,10 @@
         h = 0
     return '<%x>' % (h,)
 
-def ll_valid(entries, i):
-    key = entries[i].key
+def ll_valid(entries, index):
+    if index < 0:
+        return False
+    key = entries[index].key
     if not key:
         return False
     elif weakref_deref(rclass.OBJECTPTR, key):
@@ -93,19 +95,16 @@
         # The entry might be a dead weakref still holding a strong
         # reference to the value; for this case, we clear the old
         # value from the entry, if any.
-        entries[i].value = NULLVALUE
+        entries[index].value = NULLVALUE
         return False
 
-def ll_everused(entries, i):
-    return bool(entries[i].key)
-
 entrymeths = {
     'allocate': lltype.typeMethod(rdict._ll_malloc_entries),
-    'delete': rdict._ll_free_entries,
-    'valid': ll_valid,
-    'everused': ll_everused,
     'hash': rdict.ll_hash_from_cache,
     'no_direct_compare': True,
+    'clear_key': lambda : llmemory.dead_wref,
+    'clear_value': lambda : lltype.nullptr(rclass.OBJECTPTR.TO),
+    'valid': ll_valid,
     }
 WEAKDICTENTRYARRAY = lltype.GcArray(WEAKDICTENTRY,
                                     adtmeths=entrymeths,
@@ -115,22 +114,30 @@
 @jit.dont_look_inside
 def ll_new_weakdict():
     d = lltype.malloc(WEAKDICT)
-    d.entries = WEAKDICT.entries.TO.allocate(rdict.DICT_INITSIZE)
+    d.entries = WEAKDICT.entries.TO.allocate(rdict.DICT_ITEMS_INITSIZE)
+    d.indexes = WEAKDICT.indexes.TO.allocate(rdict.DICT_INITSIZE)
     d.num_items = 0
-    d.resize_counter = rdict.DICT_INITSIZE * 2
+    d.resize_counter = rdict.DICT_ITEMS_INITSIZE
     return d
 
 @jit.dont_look_inside
 def ll_get(d, llkey):
     hash = compute_identity_hash(llkey)
+    llop.debug_print(lltype.Void, "computed key", ll_debugrepr(llkey),
+                     hex(hash))
     i = rdict.ll_dict_lookup(d, llkey, hash) & rdict.MASK
-    #llop.debug_print(lltype.Void, i, 'get', hex(hash),
-    #                 ll_debugrepr(d.entries[i].key),
-    #                 ll_debugrepr(d.entries[i].value))
+    index = d.indexes[i]
+    if index < 0:
+        llop.debug_print(lltype.Void, i, 'get', hex(hash), "null")
+        return NULLVALUE
+    llop.debug_print(lltype.Void, i, "getting", index)
+    llop.debug_print(lltype.Void, i, 'get', hex(hash),
+                     ll_debugrepr(d.entries[index].key),
+                     ll_debugrepr(d.entries[index].value))
     # NB. ll_valid() above was just called at least on entry i, so if
     # it is an invalid entry with a dead weakref, the value was reset
     # to NULLVALUE.
-    return d.entries[i].value
+    return d.entries[index].value
 
 @jit.dont_look_inside
 def ll_set(d, llkey, llvalue):
@@ -144,43 +151,77 @@
     hash = compute_identity_hash(llkey)
     keyref = weakref_create(llkey)    # GC effects here, before the rest
     i = rdict.ll_dict_lookup(d, llkey, hash) & rdict.MASK
-    everused = d.entries.everused(i)
-    d.entries[i].key = keyref
-    d.entries[i].value = llvalue
-    d.entries[i].f_hash = hash
-    #llop.debug_print(lltype.Void, i, 'stored', hex(hash),
-    #                 ll_debugrepr(llkey),
-    #                 ll_debugrepr(llvalue))
+    index = d.indexes[i]
+    everused = index != rdict.FREE
+    if index < 0:
+        index = d.num_items
+        d.indexes[i] = index
+        d.num_items += 1
+    d.entries[index].key = keyref
+    d.entries[index].value = llvalue
+    d.entries[index].f_hash = hash
+    llop.debug_print(lltype.Void, i, 'stored', index, d.num_items, hex(hash),
+                     ll_debugrepr(llkey),
+                     ll_debugrepr(llvalue))
     if not everused:
-        d.resize_counter -= 3
+        d.resize_counter -= 1
         if d.resize_counter <= 0:
-            #llop.debug_print(lltype.Void, 'RESIZE')
+            llop.debug_print(lltype.Void, 'RESIZE')
             ll_weakdict_resize(d)
 
 @jit.dont_look_inside
 def ll_set_null(d, llkey):
     hash = compute_identity_hash(llkey)
     i = rdict.ll_dict_lookup(d, llkey, hash) & rdict.MASK
-    if d.entries.everused(i):
-        # If the entry was ever used, clean up its key and value.
-        # We don't store a NULL value, but a dead weakref, because
-        # the entry must still be marked as everused().
-        d.entries[i].key = llmemory.dead_wref
-        d.entries[i].value = NULLVALUE
-        #llop.debug_print(lltype.Void, i, 'zero')
-
-def ll_update_num_items(d):
-    entries = d.entries
-    num_items = 0
-    for i in range(len(entries)):
-        if entries.valid(i):
-            num_items += 1
-    d.num_items = num_items
+    index = d.indexes[i]
+    if d.entries.valid(index):
+        d.entries[index].key = llmemory.dead_wref
+        d.entries[index].value = NULLVALUE
+        llop.debug_print(lltype.Void, i, index, 'zero')
 
 def ll_weakdict_resize(d):
-    # first set num_items to its correct, up-to-date value
-    ll_update_num_items(d)
-    rdict.ll_dict_resize(d)
+    llop.debug_print(lltype.Void, "weakdict resize")
+    old_entries = d.entries
+    old_indexes = d.indexes
+    old_size = len(old_indexes)
+    # make a 'new_size' estimate and shrink it if there are many
+    # deleted entry markers.  See CPython for why it is a good idea to
+    # quadruple the dictionary size as long as it's not too big.
+    # count the number of valid entries
+    i = 0
+    num_items = 0
+    while i < d.num_items:
+        if old_entries.valid(i):
+            num_items += 1
+        i += 1
+    if num_items > 50000: new_estimate = (num_items + 1) * 2
+    else:                 new_estimate = (num_items + 1) * 4
+    new_size = rdict.DICT_INITSIZE
+    while new_size <= new_estimate:
+        new_size *= 2
+    #
+    new_item_size = new_size // 3 * 2 + 1
+    d.entries = lltype.typeOf(old_entries).TO.allocate(new_item_size)
+    d.indexes = lltype.typeOf(d).TO.indexes.TO.allocate(new_size)
+    d.resize_counter = new_item_size - num_items
+    i = 0
+    indexes = d.indexes
+    j = 0
+    while i < old_size:
+        index = old_indexes[i]
+        if old_entries.valid(index):
+            hash = old_entries.hash(index)
+            lookup_i = rdict.ll_dict_lookup_clean(d, hash)
+            indexes[lookup_i] = j
+            llop.debug_print(lltype.Void, "inserting", hex(hash), i,
+                             "to", lookup_i, index, "=>", j)
+            llop.debug_print(lltype.Void, hex(old_entries[index].f_hash))
+            d.entries[j].key = old_entries[index].key
+            d.entries[j].value = old_entries[index].value
+            d.entries[j].f_hash = old_entries[index].f_hash
+            j += 1
+        i += 1
+    d.num_items = j
 
 def ll_keyeq(d, weakkey1, realkey2):
     # only called by ll_dict_lookup() with the first arg coming from an
@@ -188,13 +229,15 @@
     if not weakkey1:
         assert bool(realkey2)
         return False
-    return weakref_deref(rclass.OBJECTPTR, weakkey1) == realkey2
+    realkey1 = weakref_deref(rclass.OBJECTPTR, weakkey1)
+    llop.debug_print(lltype.Void, "comparison", realkey1, realkey2)
+    return realkey1 == realkey2
 
 @jit.dont_look_inside
 def ll_length(d):
     # xxx slow, but it's only for debugging
-    ll_update_num_items(d)
-    #llop.debug_print(lltype.Void, 'length:', d.num_items)
+    d.resize()
+    llop.debug_print(lltype.Void, 'length:', d.num_items)
     return d.num_items
 
 dictmeths = {
@@ -202,10 +245,15 @@
     'll_set': ll_set,
     'keyeq': ll_keyeq,
     'paranoia': False,
+    'resize': ll_weakdict_resize,
     }
 
+INDEXESARRAY = lltype.GcArray(lltype.Signed,
+              adtmeths={'allocate' : 
lltype.typeMethod(rdict._ll_malloc_indexes)})
+
 WEAKDICT = lltype.GcStruct("weakkeydict",
                            ("num_items", lltype.Signed),
                            ("resize_counter", lltype.Signed),
+                           ("indexes", lltype.Ptr(INDEXESARRAY)),
                            ("entries", lltype.Ptr(WEAKDICTENTRYARRAY)),
                            adtmeths=dictmeths)
diff --git a/pypy/rlib/objectmodel.py b/pypy/rlib/objectmodel.py
--- a/pypy/rlib/objectmodel.py
+++ b/pypy/rlib/objectmodel.py
@@ -410,6 +410,8 @@
     lltypesystem.
     """
     assert x is not None
+    if hasattr(x, '_obj'):
+        return compute_identity_hash(x._obj) # hack hack hack
     result = object.__hash__(x)
     try:
         x.__dict__['__precomputed_identity_hash'] = result
diff --git a/pypy/rlib/rweakref.py b/pypy/rlib/rweakref.py
--- a/pypy/rlib/rweakref.py
+++ b/pypy/rlib/rweakref.py
@@ -68,7 +68,6 @@
 
 from pypy.rpython import extregistry
 from pypy.annotation import model as annmodel
-from pypy.annotation.bookkeeper import getbookkeeper
 from pypy.tool.pairtype import pairtype
 
 class Entry(extregistry.ExtRegistryEntry):
@@ -175,9 +174,9 @@
 class __extend__(pairtype(SomeWeakKeyDict, SomeWeakKeyDict)):
     def union((s_wkd1, s_wkd2)):
         if s_wkd1.keyclassdef is not s_wkd2.keyclassdef:
-            return SomeObject() # not the same key class! complain...
+            return annmodel.SomeObject() # not the same key class! complain...
         if s_wkd1.valueclassdef is not s_wkd2.valueclassdef:
-            return SomeObject() # not the same value class! complain...
+            return annmodel.SomeObject() # not the same value class! 
complain...
         return SomeWeakKeyDict(s_wkd1.keyclassdef, s_wkd1.valueclassdef)
 
 class Entry(extregistry.ExtRegistryEntry):
diff --git a/pypy/rlib/test/test_rweakkeydict.py 
b/pypy/rlib/test/test_rweakkeydict.py
--- a/pypy/rlib/test/test_rweakkeydict.py
+++ b/pypy/rlib/test/test_rweakkeydict.py
@@ -2,6 +2,8 @@
 from pypy.rlib import rgc
 from pypy.rlib.rweakref import RWeakKeyDictionary
 from pypy.rpython.test.test_llinterp import interpret
+from pypy.rpython.lltypesystem.lloperation import llop
+from pypy.rpython.lltypesystem import lltype
 
 class KX(object):
     pass
@@ -15,7 +17,6 @@
 class VY(VX):
     pass
 
-
 def make_test(loop=100, prebuilt=None):
     def g(d):
         assert d.get(KX()) is None
@@ -35,19 +36,25 @@
         d = prebuilt
         if d is None:
             d = RWeakKeyDictionary(KX, VX)
+        llop.debug_print(lltype.Void, "XXX 1")
         k1, k3, v1, v2, v3 = g(d)
         rgc.collect(); rgc.collect()
+        llop.debug_print(lltype.Void, "XXX 2")
         assert d.get(k1) is v1
         assert d.get(k3) is v3
         assert d.get(k1) is not v2
         assert d.get(k3) is not v2
+        llop.debug_print(lltype.Void, "XXX 3")
         assert d.length() == 2
+        llop.debug_print(lltype.Void, "XXX 4")
         d.set(k1, None)
         assert d.get(k1) is None
         assert d.get(k3) is v3
         assert d.length() == 1
+        llop.debug_print(lltype.Void, "XXX 5")
         # resizing should also work
         lots_of_keys = [KX() for i in range(loop)]
+        llop.debug_print(lltype.Void, "XXX 6")
         for k in lots_of_keys:
             d.set(k, v1)
         for k in lots_of_keys:
@@ -56,19 +63,26 @@
         assert d.get(k3) is v3
         assert d.length() == loop + 1
         # a subclass
+        llop.debug_print(lltype.Void, "XXX 7")
         ky = KY()
         vy = VY()
         d.set(ky, vy)
         assert d.get(ky) is vy
         assert d.length() == loop + 2
         # deleting by storing Nones
+        llop.debug_print(lltype.Void, "XXX 8")
         for k in lots_of_keys:
             d.set(k, None)
+        llop.debug_print(lltype.Void, "XXX 9")
         for k in lots_of_keys:
             assert d.get(k) is None
+        llop.debug_print(lltype.Void, "XXX 10")
         assert d.get(k1) is None
+        llop.debug_print(lltype.Void, "XXX 11")
         assert d.get(k3) is v3
+        llop.debug_print(lltype.Void, "XXX 12")
         assert d.get(ky) is vy
+        llop.debug_print(lltype.Void, "XXX 13")
         assert d.length() == 2
     return f
 
diff --git a/pypy/rpython/lltypesystem/rdict.py 
b/pypy/rpython/lltypesystem/rdict.py
--- a/pypy/rpython/lltypesystem/rdict.py
+++ b/pypy/rpython/lltypesystem/rdict.py
@@ -42,20 +42,19 @@
 
 def get_ll_dict(DICTKEY, DICTVALUE, get_custom_eq_hash=None, DICT=None,
                 ll_fasthash_function=None, ll_hash_function=None,
-                ll_eq_function=None):
+                ll_eq_function=None, method_cache={}):
     # get the actual DICT type. if DICT is None, it's created, otherwise
     # forward reference is becoming DICT
     if DICT is None:
         DICT = lltype.GcForwardReference()
     # compute the shape of the DICTENTRY structure
     entryfields = []
-    entrymeths = {
-        'allocate': lltype.typeMethod(_ll_malloc_entries),
-        'must_clear_key':   (isinstance(DICTKEY, lltype.Ptr)
-                             and DICTKEY._needsgc()),
-        'must_clear_value': (isinstance(DICTVALUE, lltype.Ptr)
-                             and DICTVALUE._needsgc()),
-        }
+    entrymeths = {'allocate': lltype.typeMethod(_ll_malloc_entries),
+                  'valid': ll_valid_entry,
+                  'clear_key': (isinstance(DICTKEY, lltype.Ptr) and
+                                DICTKEY._needsgc()),
+                  'clear_value': (isinstance(DICTVALUE, lltype.Ptr) and
+                                  DICTVALUE._needsgc())}
 
     # * the key
     entryfields.append(("key", DICTKEY))
@@ -79,7 +78,7 @@
     DICTENTRY = lltype.Struct("dictentry", *entryfields)
     DICTENTRYARRAY = lltype.GcArray(DICTENTRY,
                                     adtmeths=entrymeths)
-    array_adtmeths = {'allocate': lltype.typeMethod(_ll_malloc_items)}
+    array_adtmeths = {'allocate': lltype.typeMethod(_ll_malloc_indexes)}
     fields = [("num_items", lltype.Signed),
               ("resize_counter", lltype.Signed),
               ("entries", lltype.Ptr(DICTENTRYARRAY)),
@@ -111,6 +110,7 @@
     adtmeths['KEY']   = DICTKEY
     adtmeths['VALUE'] = DICTVALUE
     adtmeths['allocate'] = lltype.typeMethod(_ll_malloc_dict)
+    adtmeths['resize'] = ll_dict_resize
     DICT.become(lltype.GcStruct("dicttable", adtmeths=adtmeths,
                                 *fields))
     return DICT
@@ -337,6 +337,9 @@
 def ll_hash_from_cache(entries, i):
     return entries[i].f_hash
 
+def ll_valid_entry(entires, index):
+    return index >= 0
+
 def ll_hash_recomputed(entries, i):
     ENTRIES = lltype.typeOf(entries).TO
     return ENTRIES.fasthashfn(entries[i].key)
@@ -390,7 +393,7 @@
         ll_assert(not valid, "valid but not everused")
         rc = d.resize_counter - 1
         if rc <= 0:       # if needed, resize the dict -- before the insertion
-            ll_dict_resize(d)
+            d.resize()
             i = ll_dict_lookup_clean(d, hash)
             # then redo the lookup for 'key'
             entry = d.entries[index]
@@ -442,9 +445,9 @@
     d.indexes[i] = DELETED
     d.num_items -= 1
     entry = d.entries[d.num_items]
-    if ENTRIES.must_clear_key:
+    if ENTRIES.clear_key:
         entry.key = lltype.nullptr(ENTRY.key.TO)
-    if ENTRIES.must_clear_value:
+    if ENTRIES.clear_value:
         entry.value = lltype.nullptr(ENTRY.value.TO)
     #
     # The rest is commented out: like CPython we no longer shrink the
@@ -482,7 +485,7 @@
     indexes = d.indexes
     while i < old_size:
         index = old_indexes[i]
-        if index >= 0:
+        if old_entries.valid(index):
             indexes[ll_dict_lookup_clean(d, old_entries.hash(index))] = index
         i += 1
     rgc.ll_arraycopy(old_entries, d.entries, 0, 0, min(len(old_entries),
@@ -493,6 +496,14 @@
 PERTURB_SHIFT = 5
 
 _look_inside_lookup = lambda d, key, hash: jit.isvirtual(d) and 
jit.isconstant(key)
+from pypy.rlib.objectmodel import compute_identity_hash
+
+def ll_debugrepr(x):
+    if x:
+        h = compute_identity_hash(x)
+    else:
+        h = 0
+    return '<%x>' % (h,)
 
 @jit.look_inside_iff(_look_inside_lookup)
 def ll_dict_lookup(d, key, hash):
@@ -501,18 +512,21 @@
     mask = len(indexes) - 1
     i = hash & mask
     # do the first try before any looping
+    ENTRIES = lltype.typeOf(entries).TO
+    direct_compare = not hasattr(ENTRIES, 'no_direct_compare')
     index = indexes[i]
-    if index >= 0:
+    if entries.valid(index):
         checkingkey = entries[index].key
-        if checkingkey == key:
+        if direct_compare and checkingkey == key:
             return i   # found the entry
         if d.keyeq is not None and entries.hash(index) == hash:
             # correct hash, maybe the key is e.g. a different pointer to
             # an equal object
             found = d.keyeq(checkingkey, key)
+            llop.debug_print(lltype.Void, "comparing keys", 
ll_debugrepr(checkingkey), ll_debugrepr(key), found)
             if d.paranoia:
                 if (entries != d.entries or
-                    not indexes[i] >= 0 or entries[index].key != checkingkey):
+                    not entries.valid(indexes[i]) or entries[index].key != 
checkingkey):
                     # the compare did major nasty stuff to the dict: start over
                     return ll_dict_lookup(d, key, hash)
             if found:
@@ -538,9 +552,9 @@
             if freeslot == -1:
                 freeslot = i
             return freeslot | HIGHEST_BIT
-        elif index >= 0:
+        elif entries.valid(index):
             checkingkey = entries[index].key
-            if checkingkey == key:
+            if direct_compare and checkingkey == key:
                 return i
             if d.keyeq is not None and entries.hash(index) == hash:
                 # correct hash, maybe the key is e.g. a different pointer to
@@ -548,7 +562,7 @@
                 found = d.keyeq(checkingkey, key)
                 if d.paranoia:
                     if (entries != d.entries or
-                        not indexes[i] >= 0 or
+                        not entries.valid(indexes[i]) or
                         entries[index].key != checkingkey):
                         # the compare did major nasty stuff to the dict:
                         # start over
@@ -614,7 +628,7 @@
     return lltype.malloc(DICT)
 def _ll_malloc_entries(ENTRIES, n):
     return lltype.malloc(ENTRIES, n, zero=True)
-def _ll_malloc_items(ITEMS, n):
+def _ll_malloc_indexes(ITEMS, n):
     res = lltype.malloc(ITEMS, n)
     i = 0
     while i < n:
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy rdict-experiments-2: in-progress of rweakkeydict

Reply via email to