Author: Maciej Fijalkowski <fij...@gmail.com>
Branch: rdict-experiments-2
Changeset: r59829:802afeec68e5
Date: 2013-01-07 02:05 +0200
http://bitbucket.org/pypy/pypy/changeset/802afeec68e5/

Log:    port weakvalueref

diff --git a/pypy/rlib/_rweakkeydict.py b/pypy/rlib/_rweakkeydict.py
--- a/pypy/rlib/_rweakkeydict.py
+++ b/pypy/rlib/_rweakkeydict.py
@@ -123,17 +123,17 @@
 @jit.dont_look_inside
 def ll_get(d, llkey):
     hash = compute_identity_hash(llkey)
-    llop.debug_print(lltype.Void, "computed key", ll_debugrepr(llkey),
-                     hex(hash))
+    #llop.debug_print(lltype.Void, "computed key", ll_debugrepr(llkey),
+    #                 hex(hash))
     i = rdict.ll_dict_lookup(d, llkey, hash) & rdict.MASK
     index = d.indexes[i]
     if index < 0:
-        llop.debug_print(lltype.Void, i, 'get', hex(hash), "null")
+        #llop.debug_print(lltype.Void, i, 'get', hex(hash), "null")
         return NULLVALUE
-    llop.debug_print(lltype.Void, i, "getting", index)
-    llop.debug_print(lltype.Void, i, 'get', hex(hash),
-                     ll_debugrepr(d.entries[index].key),
-                     ll_debugrepr(d.entries[index].value))
+    #llop.debug_print(lltype.Void, i, "getting", index)
+    #llop.debug_print(lltype.Void, i, 'get', hex(hash),
+    #                 ll_debugrepr(d.entries[index].key),
+    #                 ll_debugrepr(d.entries[index].value))
     # NB. ll_valid() above was just called at least on entry i, so if
     # it is an invalid entry with a dead weakref, the value was reset
     # to NULLVALUE.
@@ -160,13 +160,13 @@
     d.entries[index].key = keyref
     d.entries[index].value = llvalue
     d.entries[index].f_hash = hash
-    llop.debug_print(lltype.Void, i, 'stored', index, d.num_items, hex(hash),
-                     ll_debugrepr(llkey),
-                     ll_debugrepr(llvalue))
+    #llop.debug_print(lltype.Void, i, 'stored', index, d.num_items, hex(hash),
+    #                 ll_debugrepr(llkey),
+    #                 ll_debugrepr(llvalue))
     if not everused:
         d.resize_counter -= 1
         if d.resize_counter <= 0:
-            llop.debug_print(lltype.Void, 'RESIZE')
+            #llop.debug_print(lltype.Void, 'RESIZE')
             ll_weakdict_resize(d)
 
 @jit.dont_look_inside
@@ -177,10 +177,10 @@
     if d.entries.valid(index):
         d.entries[index].key = llmemory.dead_wref
         d.entries[index].value = NULLVALUE
-        llop.debug_print(lltype.Void, i, index, 'zero')
+        #llop.debug_print(lltype.Void, i, index, 'zero')
 
 def ll_weakdict_resize(d):
-    llop.debug_print(lltype.Void, "weakdict resize")
+    #llop.debug_print(lltype.Void, "weakdict resize")
     old_entries = d.entries
     old_indexes = d.indexes
     old_size = len(old_indexes)
@@ -203,7 +203,6 @@
     new_item_size = new_size // 3 * 2 + 1
     d.entries = lltype.typeOf(old_entries).TO.allocate(new_item_size)
     d.indexes = lltype.typeOf(d).TO.indexes.TO.allocate(new_size)
-    d.resize_counter = new_item_size - num_items
     i = 0
     indexes = d.indexes
     j = 0
@@ -213,15 +212,16 @@
             hash = old_entries.hash(index)
             lookup_i = rdict.ll_dict_lookup_clean(d, hash)
             indexes[lookup_i] = j
-            llop.debug_print(lltype.Void, "inserting", hex(hash), i,
-                             "to", lookup_i, index, "=>", j)
-            llop.debug_print(lltype.Void, hex(old_entries[index].f_hash))
+            #llop.debug_print(lltype.Void, "inserting", hex(hash), i,
+            #                 "to", lookup_i, index, "=>", j)
+            #llop.debug_print(lltype.Void, hex(old_entries[index].f_hash))
             d.entries[j].key = old_entries[index].key
             d.entries[j].value = old_entries[index].value
             d.entries[j].f_hash = old_entries[index].f_hash
             j += 1
         i += 1
     d.num_items = j
+    d.resize_counter = new_item_size - j
 
 def ll_keyeq(d, weakkey1, realkey2):
     # only called by ll_dict_lookup() with the first arg coming from an
@@ -230,14 +230,14 @@
         assert bool(realkey2)
         return False
     realkey1 = weakref_deref(rclass.OBJECTPTR, weakkey1)
-    llop.debug_print(lltype.Void, "comparison", realkey1, realkey2)
+    #llop.debug_print(lltype.Void, "comparison", realkey1, realkey2)
     return realkey1 == realkey2
 
 @jit.dont_look_inside
 def ll_length(d):
     # xxx slow, but it's only for debugging
     d.resize()
-    llop.debug_print(lltype.Void, 'length:', d.num_items)
+    #llop.debug_print(lltype.Void, 'length:', d.num_items)
     return d.num_items
 
 dictmeths = {
diff --git a/pypy/rlib/_rweakvaldict.py b/pypy/rlib/_rweakvaldict.py
--- a/pypy/rlib/_rweakvaldict.py
+++ b/pypy/rlib/_rweakvaldict.py
@@ -18,8 +18,10 @@
         self.ll_keyhash = r_key.get_ll_hash_function()
         ll_keyeq = lltype.staticAdtMethod(r_key.get_ll_eq_function())
 
-        def ll_valid(entries, i):
-            value = entries[i].value
+        def ll_valid(entries, index):
+            if index < 0:
+                return False
+            value = entries[index].value
             return bool(value) and bool(weakref_deref(rclass.OBJECTPTR, value))
 
         def ll_everused(entries, i):
@@ -30,7 +32,6 @@
 
         entrymeths = {
             'allocate': lltype.typeMethod(rdict._ll_malloc_entries),
-            'delete': rdict._ll_free_entries,
             'valid': ll_valid,
             'everused': ll_everused,
             'hash': ll_hash,
@@ -41,6 +42,9 @@
         WEAKDICTENTRYARRAY = lltype.GcArray(WEAKDICTENTRY,
                                             adtmeths=entrymeths,
                                             hints={'weakarray': 'value'})
+
+        WEAKINDEXESARRAY = lltype.GcArray(lltype.Signed,
+           adtmeths={'allocate': lltype.typeMethod(rdict._ll_malloc_indexes)})
         # NB. the 'hints' is not used so far ^^^
 
         dictmeths = {
@@ -48,12 +52,14 @@
             'll_set': self.ll_set,
             'keyeq': ll_keyeq,
             'paranoia': False,
+            'resize': self.ll_weakdict_resize,
             }
 
         self.WEAKDICT = lltype.GcStruct(
             "weakvaldict",
             ("num_items", lltype.Signed),
             ("resize_counter", lltype.Signed),
+            ('indexes', lltype.Ptr(WEAKINDEXESARRAY)),
             ("entries", lltype.Ptr(WEAKDICTENTRYARRAY)),
             adtmeths=dictmeths)
 
@@ -62,7 +68,7 @@
 
     def convert_const(self, weakdict):
         if not isinstance(weakdict, RWeakValueDictionary):
-            raise TyperError("expected an RWeakValueDictionary: %r" % (
+            raise TypeError("expected an RWeakValueDictionary: %r" % (
                 weakdict,))
         try:
             key = Constant(weakdict)
@@ -105,9 +111,10 @@
     @jit.dont_look_inside
     def ll_new_weakdict(self):
         d = lltype.malloc(self.WEAKDICT)
-        d.entries = self.WEAKDICT.entries.TO.allocate(rdict.DICT_INITSIZE)
+        d.entries = 
self.WEAKDICT.entries.TO.allocate(rdict.DICT_ITEMS_INITSIZE)
+        d.indexes = self.WEAKDICT.indexes.TO.allocate(rdict.DICT_INITSIZE)
         d.num_items = 0
-        d.resize_counter = rdict.DICT_INITSIZE * 2
+        d.resize_counter = rdict.DICT_ITEMS_INITSIZE
         return d
 
     @jit.dont_look_inside
@@ -115,8 +122,11 @@
         hash = self.ll_keyhash(llkey)
         i = rdict.ll_dict_lookup(d, llkey, hash) & rdict.MASK
         #llop.debug_print(lltype.Void, i, 'get')
-        valueref = d.entries[i].value
-        if valueref:
+        index = d.indexes[i]
+        if index >= 0:
+            valueref = d.entries[index].value
+            if not valueref:
+                return lltype.nullptr(rclass.OBJECTPTR.TO)
             return weakref_deref(rclass.OBJECTPTR, valueref)
         else:
             return lltype.nullptr(rclass.OBJECTPTR.TO)
@@ -127,18 +137,26 @@
             self.ll_set_nonnull(d, llkey, llvalue)
         else:
             self.ll_set_null(d, llkey)
-
+    
     @jit.dont_look_inside
     def ll_set_nonnull(self, d, llkey, llvalue):
         hash = self.ll_keyhash(llkey)
         valueref = weakref_create(llvalue)    # GC effects here, before the 
rest
         i = rdict.ll_dict_lookup(d, llkey, hash) & rdict.MASK
-        everused = d.entries.everused(i)
-        d.entries[i].key = llkey
-        d.entries[i].value = valueref
-        #llop.debug_print(lltype.Void, i, 'stored')
+        index = d.indexes[i]
+        everused = index != rdict.FREE
+        if index < 0:
+            index = d.num_items
+            d.indexes[i] = index
+            d.num_items += 1
+        d.entries[index].key = llkey
+        d.entries[index].value = valueref
+        llop.debug_print(lltype.Void, "set nonnull", i, index)
+        #llop.debug_print(lltype.Void, i, 'stored', index, d.num_items, 
hex(hash),
+        #                 ll_debugrepr(llkey),
+        #                 ll_debugrepr(llvalue))
         if not everused:
-            d.resize_counter -= 3
+            d.resize_counter -= 1
             if d.resize_counter <= 0:
                 #llop.debug_print(lltype.Void, 'RESIZE')
                 self.ll_weakdict_resize(d)
@@ -147,26 +165,60 @@
     def ll_set_null(self, d, llkey):
         hash = self.ll_keyhash(llkey)
         i = rdict.ll_dict_lookup(d, llkey, hash) & rdict.MASK
-        if d.entries.everused(i):
+        index = d.indexes[i]
+        if d.entries.valid(index):
             # If the entry was ever used, clean up its key and value.
             # We don't store a NULL value, but a dead weakref, because
             # the entry must still be marked as everused().
-            d.entries[i].value = llmemory.dead_wref
+            d.entries[index].value = llmemory.dead_wref
             if isinstance(self.r_key.lowleveltype, lltype.Ptr):
-                d.entries[i].key = self.r_key.convert_const(None)
+                d.entries[index].key = self.r_key.convert_const(None)
             else:
-                d.entries[i].key = self.r_key.convert_const(0)
+                d.entries[index].key = self.r_key.convert_const(0)
             #llop.debug_print(lltype.Void, i, 'zero')
 
     def ll_weakdict_resize(self, d):
-        # first set num_items to its correct, up-to-date value
-        entries = d.entries
+        #llop.debug_print(lltype.Void, "weakdict resize")
+        old_entries = d.entries
+        old_indexes = d.indexes
+        old_size = len(old_indexes)
+        # make a 'new_size' estimate and shrink it if there are many
+        # deleted entry markers.  See CPython for why it is a good idea to
+        # quadruple the dictionary size as long as it's not too big.
+        # count the number of valid entries
+        i = 0
         num_items = 0
-        for i in range(len(entries)):
-            if entries.valid(i):
+        while i < d.num_items:
+            if old_entries.valid(i):
                 num_items += 1
-        d.num_items = num_items
-        rdict.ll_dict_resize(d)
+            i += 1
+        if num_items > 50000: new_estimate = (num_items + 1) * 2
+        else:                 new_estimate = (num_items + 1) * 4
+        new_size = rdict.DICT_INITSIZE
+        while new_size <= new_estimate:
+            new_size *= 2
+        #
+        new_item_size = new_size // 3 * 2 + 1
+        d.entries = lltype.typeOf(old_entries).TO.allocate(new_item_size)
+        d.indexes = lltype.typeOf(d).TO.indexes.TO.allocate(new_size)
+        i = 0
+        indexes = d.indexes
+        j = 0
+        while i < old_size:
+            index = old_indexes[i]
+            if old_entries.valid(index):
+                hash = old_entries.hash(index)
+                lookup_i = rdict.ll_dict_lookup_clean(d, hash)
+                indexes[lookup_i] = j
+                #llop.debug_print(lltype.Void, "inserting", hex(hash), i,
+                #                 "to", lookup_i, index, "=>", j)
+                #llop.debug_print(lltype.Void, hex(old_entries[index].f_hash))
+                d.entries[j].key = old_entries[index].key
+                d.entries[j].value = old_entries[index].value
+                j += 1
+            i += 1
+        d.num_items = j
+        d.resize_counter = new_item_size - j
 
 def specialize_make_weakdict(hop):
     hop.exception_cannot_occur()
diff --git a/pypy/rpython/lltypesystem/rdict.py 
b/pypy/rpython/lltypesystem/rdict.py
--- a/pypy/rpython/lltypesystem/rdict.py
+++ b/pypy/rpython/lltypesystem/rdict.py
@@ -523,7 +523,7 @@
             # correct hash, maybe the key is e.g. a different pointer to
             # an equal object
             found = d.keyeq(checkingkey, key)
-            llop.debug_print(lltype.Void, "comparing keys", 
ll_debugrepr(checkingkey), ll_debugrepr(key), found)
+            #llop.debug_print(lltype.Void, "comparing keys", 
ll_debugrepr(checkingkey), ll_debugrepr(key), found)
             if d.paranoia:
                 if (entries != d.entries or
                     not entries.valid(indexes[i]) or entries[index].key != 
checkingkey):
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to