Author: Maciej Fijalkowski <[email protected]>
Branch: rdict-experiments-2
Changeset: r59827:1fb600db1932
Date: 2013-01-07 01:20 +0200
http://bitbucket.org/pypy/pypy/changeset/1fb600db1932/
Log: in-progress of rweakkeydict
diff --git a/pypy/rlib/_rweakkeydict.py b/pypy/rlib/_rweakkeydict.py
--- a/pypy/rlib/_rweakkeydict.py
+++ b/pypy/rlib/_rweakkeydict.py
@@ -1,13 +1,13 @@
from pypy.objspace.flow.model import Constant
from pypy.rpython.lltypesystem import lltype, llmemory, rclass, rdict
+from pypy.rpython.lltypesystem.lloperation import llop
from pypy.rpython.lltypesystem.llmemory import weakref_create, weakref_deref
-from pypy.rpython.lltypesystem.lloperation import llop
from pypy.rpython.rclass import getinstancerepr
from pypy.rpython.rmodel import Repr
from pypy.rlib.rweakref import RWeakKeyDictionary
from pypy.rlib import jit
+from pypy.rlib.debug import ll_assert
from pypy.rlib.objectmodel import compute_identity_hash
-from pypy.rlib.objectmodel import we_are_translated
# Warning: this implementation of RWeakKeyDictionary is not exactly
@@ -25,7 +25,7 @@
def convert_const(self, weakdict):
if not isinstance(weakdict, RWeakKeyDictionary):
- raise TyperError("expected an RWeakKeyDictionary: %r" % (
+ raise TypeError("expected an RWeakKeyDictionary: %r" % (
weakdict,))
try:
key = Constant(weakdict)
@@ -33,7 +33,7 @@
except KeyError:
self.setup()
if weakdict.length() != 0:
- raise TyperError("got a non-empty prebuilt RWeakKeyDictionary")
+ raise TypeError("got a non-empty prebuilt RWeakKeyDictionary")
l_dict = ll_new_weakdict()
self.dict_cache[key] = l_dict
return l_dict
@@ -83,8 +83,10 @@
h = 0
return '<%x>' % (h,)
-def ll_valid(entries, i):
- key = entries[i].key
+def ll_valid(entries, index):
+ if index < 0:
+ return False
+ key = entries[index].key
if not key:
return False
elif weakref_deref(rclass.OBJECTPTR, key):
@@ -93,19 +95,16 @@
# The entry might be a dead weakref still holding a strong
# reference to the value; for this case, we clear the old
# value from the entry, if any.
- entries[i].value = NULLVALUE
+ entries[index].value = NULLVALUE
return False
-def ll_everused(entries, i):
- return bool(entries[i].key)
-
entrymeths = {
'allocate': lltype.typeMethod(rdict._ll_malloc_entries),
- 'delete': rdict._ll_free_entries,
- 'valid': ll_valid,
- 'everused': ll_everused,
'hash': rdict.ll_hash_from_cache,
'no_direct_compare': True,
+ 'clear_key': lambda : llmemory.dead_wref,
+ 'clear_value': lambda : lltype.nullptr(rclass.OBJECTPTR.TO),
+ 'valid': ll_valid,
}
WEAKDICTENTRYARRAY = lltype.GcArray(WEAKDICTENTRY,
adtmeths=entrymeths,
@@ -115,22 +114,30 @@
@jit.dont_look_inside
def ll_new_weakdict():
d = lltype.malloc(WEAKDICT)
- d.entries = WEAKDICT.entries.TO.allocate(rdict.DICT_INITSIZE)
+ d.entries = WEAKDICT.entries.TO.allocate(rdict.DICT_ITEMS_INITSIZE)
+ d.indexes = WEAKDICT.indexes.TO.allocate(rdict.DICT_INITSIZE)
d.num_items = 0
- d.resize_counter = rdict.DICT_INITSIZE * 2
+ d.resize_counter = rdict.DICT_ITEMS_INITSIZE
return d
@jit.dont_look_inside
def ll_get(d, llkey):
hash = compute_identity_hash(llkey)
+ llop.debug_print(lltype.Void, "computed key", ll_debugrepr(llkey),
+ hex(hash))
i = rdict.ll_dict_lookup(d, llkey, hash) & rdict.MASK
- #llop.debug_print(lltype.Void, i, 'get', hex(hash),
- # ll_debugrepr(d.entries[i].key),
- # ll_debugrepr(d.entries[i].value))
+ index = d.indexes[i]
+ if index < 0:
+ llop.debug_print(lltype.Void, i, 'get', hex(hash), "null")
+ return NULLVALUE
+ llop.debug_print(lltype.Void, i, "getting", index)
+ llop.debug_print(lltype.Void, i, 'get', hex(hash),
+ ll_debugrepr(d.entries[index].key),
+ ll_debugrepr(d.entries[index].value))
# NB. ll_valid() above was just called at least on entry i, so if
# it is an invalid entry with a dead weakref, the value was reset
# to NULLVALUE.
- return d.entries[i].value
+ return d.entries[index].value
@jit.dont_look_inside
def ll_set(d, llkey, llvalue):
@@ -144,43 +151,77 @@
hash = compute_identity_hash(llkey)
keyref = weakref_create(llkey) # GC effects here, before the rest
i = rdict.ll_dict_lookup(d, llkey, hash) & rdict.MASK
- everused = d.entries.everused(i)
- d.entries[i].key = keyref
- d.entries[i].value = llvalue
- d.entries[i].f_hash = hash
- #llop.debug_print(lltype.Void, i, 'stored', hex(hash),
- # ll_debugrepr(llkey),
- # ll_debugrepr(llvalue))
+ index = d.indexes[i]
+ everused = index != rdict.FREE
+ if index < 0:
+ index = d.num_items
+ d.indexes[i] = index
+ d.num_items += 1
+ d.entries[index].key = keyref
+ d.entries[index].value = llvalue
+ d.entries[index].f_hash = hash
+ llop.debug_print(lltype.Void, i, 'stored', index, d.num_items, hex(hash),
+ ll_debugrepr(llkey),
+ ll_debugrepr(llvalue))
if not everused:
- d.resize_counter -= 3
+ d.resize_counter -= 1
if d.resize_counter <= 0:
- #llop.debug_print(lltype.Void, 'RESIZE')
+ llop.debug_print(lltype.Void, 'RESIZE')
ll_weakdict_resize(d)
@jit.dont_look_inside
def ll_set_null(d, llkey):
hash = compute_identity_hash(llkey)
i = rdict.ll_dict_lookup(d, llkey, hash) & rdict.MASK
- if d.entries.everused(i):
- # If the entry was ever used, clean up its key and value.
- # We don't store a NULL value, but a dead weakref, because
- # the entry must still be marked as everused().
- d.entries[i].key = llmemory.dead_wref
- d.entries[i].value = NULLVALUE
- #llop.debug_print(lltype.Void, i, 'zero')
-
-def ll_update_num_items(d):
- entries = d.entries
- num_items = 0
- for i in range(len(entries)):
- if entries.valid(i):
- num_items += 1
- d.num_items = num_items
+ index = d.indexes[i]
+ if d.entries.valid(index):
+ d.entries[index].key = llmemory.dead_wref
+ d.entries[index].value = NULLVALUE
+ llop.debug_print(lltype.Void, i, index, 'zero')
def ll_weakdict_resize(d):
- # first set num_items to its correct, up-to-date value
- ll_update_num_items(d)
- rdict.ll_dict_resize(d)
+ llop.debug_print(lltype.Void, "weakdict resize")
+ old_entries = d.entries
+ old_indexes = d.indexes
+ old_size = len(old_indexes)
+ # make a 'new_size' estimate and shrink it if there are many
+ # deleted entry markers. See CPython for why it is a good idea to
+ # quadruple the dictionary size as long as it's not too big.
+ # count the number of valid entries
+ i = 0
+ num_items = 0
+ while i < d.num_items:
+ if old_entries.valid(i):
+ num_items += 1
+ i += 1
+ if num_items > 50000: new_estimate = (num_items + 1) * 2
+ else: new_estimate = (num_items + 1) * 4
+ new_size = rdict.DICT_INITSIZE
+ while new_size <= new_estimate:
+ new_size *= 2
+ #
+ new_item_size = new_size // 3 * 2 + 1
+ d.entries = lltype.typeOf(old_entries).TO.allocate(new_item_size)
+ d.indexes = lltype.typeOf(d).TO.indexes.TO.allocate(new_size)
+ d.resize_counter = new_item_size - num_items
+ i = 0
+ indexes = d.indexes
+ j = 0
+ while i < old_size:
+ index = old_indexes[i]
+ if old_entries.valid(index):
+ hash = old_entries.hash(index)
+ lookup_i = rdict.ll_dict_lookup_clean(d, hash)
+ indexes[lookup_i] = j
+ llop.debug_print(lltype.Void, "inserting", hex(hash), i,
+ "to", lookup_i, index, "=>", j)
+ llop.debug_print(lltype.Void, hex(old_entries[index].f_hash))
+ d.entries[j].key = old_entries[index].key
+ d.entries[j].value = old_entries[index].value
+ d.entries[j].f_hash = old_entries[index].f_hash
+ j += 1
+ i += 1
+ d.num_items = j
def ll_keyeq(d, weakkey1, realkey2):
# only called by ll_dict_lookup() with the first arg coming from an
@@ -188,13 +229,15 @@
if not weakkey1:
assert bool(realkey2)
return False
- return weakref_deref(rclass.OBJECTPTR, weakkey1) == realkey2
+ realkey1 = weakref_deref(rclass.OBJECTPTR, weakkey1)
+ llop.debug_print(lltype.Void, "comparison", realkey1, realkey2)
+ return realkey1 == realkey2
@jit.dont_look_inside
def ll_length(d):
# xxx slow, but it's only for debugging
- ll_update_num_items(d)
- #llop.debug_print(lltype.Void, 'length:', d.num_items)
+ d.resize()
+ llop.debug_print(lltype.Void, 'length:', d.num_items)
return d.num_items
dictmeths = {
@@ -202,10 +245,15 @@
'll_set': ll_set,
'keyeq': ll_keyeq,
'paranoia': False,
+ 'resize': ll_weakdict_resize,
}
+INDEXESARRAY = lltype.GcArray(lltype.Signed,
+ adtmeths={'allocate' :
lltype.typeMethod(rdict._ll_malloc_indexes)})
+
WEAKDICT = lltype.GcStruct("weakkeydict",
("num_items", lltype.Signed),
("resize_counter", lltype.Signed),
+ ("indexes", lltype.Ptr(INDEXESARRAY)),
("entries", lltype.Ptr(WEAKDICTENTRYARRAY)),
adtmeths=dictmeths)
diff --git a/pypy/rlib/objectmodel.py b/pypy/rlib/objectmodel.py
--- a/pypy/rlib/objectmodel.py
+++ b/pypy/rlib/objectmodel.py
@@ -410,6 +410,8 @@
lltypesystem.
"""
assert x is not None
+ if hasattr(x, '_obj'):
+ return compute_identity_hash(x._obj) # hack hack hack
result = object.__hash__(x)
try:
x.__dict__['__precomputed_identity_hash'] = result
diff --git a/pypy/rlib/rweakref.py b/pypy/rlib/rweakref.py
--- a/pypy/rlib/rweakref.py
+++ b/pypy/rlib/rweakref.py
@@ -68,7 +68,6 @@
from pypy.rpython import extregistry
from pypy.annotation import model as annmodel
-from pypy.annotation.bookkeeper import getbookkeeper
from pypy.tool.pairtype import pairtype
class Entry(extregistry.ExtRegistryEntry):
@@ -175,9 +174,9 @@
class __extend__(pairtype(SomeWeakKeyDict, SomeWeakKeyDict)):
def union((s_wkd1, s_wkd2)):
if s_wkd1.keyclassdef is not s_wkd2.keyclassdef:
- return SomeObject() # not the same key class! complain...
+ return annmodel.SomeObject() # not the same key class! complain...
if s_wkd1.valueclassdef is not s_wkd2.valueclassdef:
- return SomeObject() # not the same value class! complain...
+ return annmodel.SomeObject() # not the same value class!
complain...
return SomeWeakKeyDict(s_wkd1.keyclassdef, s_wkd1.valueclassdef)
class Entry(extregistry.ExtRegistryEntry):
diff --git a/pypy/rlib/test/test_rweakkeydict.py
b/pypy/rlib/test/test_rweakkeydict.py
--- a/pypy/rlib/test/test_rweakkeydict.py
+++ b/pypy/rlib/test/test_rweakkeydict.py
@@ -2,6 +2,8 @@
from pypy.rlib import rgc
from pypy.rlib.rweakref import RWeakKeyDictionary
from pypy.rpython.test.test_llinterp import interpret
+from pypy.rpython.lltypesystem.lloperation import llop
+from pypy.rpython.lltypesystem import lltype
class KX(object):
pass
@@ -15,7 +17,6 @@
class VY(VX):
pass
-
def make_test(loop=100, prebuilt=None):
def g(d):
assert d.get(KX()) is None
@@ -35,19 +36,25 @@
d = prebuilt
if d is None:
d = RWeakKeyDictionary(KX, VX)
+ llop.debug_print(lltype.Void, "XXX 1")
k1, k3, v1, v2, v3 = g(d)
rgc.collect(); rgc.collect()
+ llop.debug_print(lltype.Void, "XXX 2")
assert d.get(k1) is v1
assert d.get(k3) is v3
assert d.get(k1) is not v2
assert d.get(k3) is not v2
+ llop.debug_print(lltype.Void, "XXX 3")
assert d.length() == 2
+ llop.debug_print(lltype.Void, "XXX 4")
d.set(k1, None)
assert d.get(k1) is None
assert d.get(k3) is v3
assert d.length() == 1
+ llop.debug_print(lltype.Void, "XXX 5")
# resizing should also work
lots_of_keys = [KX() for i in range(loop)]
+ llop.debug_print(lltype.Void, "XXX 6")
for k in lots_of_keys:
d.set(k, v1)
for k in lots_of_keys:
@@ -56,19 +63,26 @@
assert d.get(k3) is v3
assert d.length() == loop + 1
# a subclass
+ llop.debug_print(lltype.Void, "XXX 7")
ky = KY()
vy = VY()
d.set(ky, vy)
assert d.get(ky) is vy
assert d.length() == loop + 2
# deleting by storing Nones
+ llop.debug_print(lltype.Void, "XXX 8")
for k in lots_of_keys:
d.set(k, None)
+ llop.debug_print(lltype.Void, "XXX 9")
for k in lots_of_keys:
assert d.get(k) is None
+ llop.debug_print(lltype.Void, "XXX 10")
assert d.get(k1) is None
+ llop.debug_print(lltype.Void, "XXX 11")
assert d.get(k3) is v3
+ llop.debug_print(lltype.Void, "XXX 12")
assert d.get(ky) is vy
+ llop.debug_print(lltype.Void, "XXX 13")
assert d.length() == 2
return f
diff --git a/pypy/rpython/lltypesystem/rdict.py
b/pypy/rpython/lltypesystem/rdict.py
--- a/pypy/rpython/lltypesystem/rdict.py
+++ b/pypy/rpython/lltypesystem/rdict.py
@@ -42,20 +42,19 @@
def get_ll_dict(DICTKEY, DICTVALUE, get_custom_eq_hash=None, DICT=None,
ll_fasthash_function=None, ll_hash_function=None,
- ll_eq_function=None):
+ ll_eq_function=None, method_cache={}):
# get the actual DICT type. if DICT is None, it's created, otherwise
# forward reference is becoming DICT
if DICT is None:
DICT = lltype.GcForwardReference()
# compute the shape of the DICTENTRY structure
entryfields = []
- entrymeths = {
- 'allocate': lltype.typeMethod(_ll_malloc_entries),
- 'must_clear_key': (isinstance(DICTKEY, lltype.Ptr)
- and DICTKEY._needsgc()),
- 'must_clear_value': (isinstance(DICTVALUE, lltype.Ptr)
- and DICTVALUE._needsgc()),
- }
+ entrymeths = {'allocate': lltype.typeMethod(_ll_malloc_entries),
+ 'valid': ll_valid_entry,
+ 'clear_key': (isinstance(DICTKEY, lltype.Ptr) and
+ DICTKEY._needsgc()),
+ 'clear_value': (isinstance(DICTVALUE, lltype.Ptr) and
+ DICTVALUE._needsgc())}
# * the key
entryfields.append(("key", DICTKEY))
@@ -79,7 +78,7 @@
DICTENTRY = lltype.Struct("dictentry", *entryfields)
DICTENTRYARRAY = lltype.GcArray(DICTENTRY,
adtmeths=entrymeths)
- array_adtmeths = {'allocate': lltype.typeMethod(_ll_malloc_items)}
+ array_adtmeths = {'allocate': lltype.typeMethod(_ll_malloc_indexes)}
fields = [("num_items", lltype.Signed),
("resize_counter", lltype.Signed),
("entries", lltype.Ptr(DICTENTRYARRAY)),
@@ -111,6 +110,7 @@
adtmeths['KEY'] = DICTKEY
adtmeths['VALUE'] = DICTVALUE
adtmeths['allocate'] = lltype.typeMethod(_ll_malloc_dict)
+ adtmeths['resize'] = ll_dict_resize
DICT.become(lltype.GcStruct("dicttable", adtmeths=adtmeths,
*fields))
return DICT
@@ -337,6 +337,9 @@
def ll_hash_from_cache(entries, i):
return entries[i].f_hash
+def ll_valid_entry(entires, index):
+ return index >= 0
+
def ll_hash_recomputed(entries, i):
ENTRIES = lltype.typeOf(entries).TO
return ENTRIES.fasthashfn(entries[i].key)
@@ -390,7 +393,7 @@
ll_assert(not valid, "valid but not everused")
rc = d.resize_counter - 1
if rc <= 0: # if needed, resize the dict -- before the insertion
- ll_dict_resize(d)
+ d.resize()
i = ll_dict_lookup_clean(d, hash)
# then redo the lookup for 'key'
entry = d.entries[index]
@@ -442,9 +445,9 @@
d.indexes[i] = DELETED
d.num_items -= 1
entry = d.entries[d.num_items]
- if ENTRIES.must_clear_key:
+ if ENTRIES.clear_key:
entry.key = lltype.nullptr(ENTRY.key.TO)
- if ENTRIES.must_clear_value:
+ if ENTRIES.clear_value:
entry.value = lltype.nullptr(ENTRY.value.TO)
#
# The rest is commented out: like CPython we no longer shrink the
@@ -482,7 +485,7 @@
indexes = d.indexes
while i < old_size:
index = old_indexes[i]
- if index >= 0:
+ if old_entries.valid(index):
indexes[ll_dict_lookup_clean(d, old_entries.hash(index))] = index
i += 1
rgc.ll_arraycopy(old_entries, d.entries, 0, 0, min(len(old_entries),
@@ -493,6 +496,14 @@
PERTURB_SHIFT = 5
_look_inside_lookup = lambda d, key, hash: jit.isvirtual(d) and
jit.isconstant(key)
+from pypy.rlib.objectmodel import compute_identity_hash
+
+def ll_debugrepr(x):
+ if x:
+ h = compute_identity_hash(x)
+ else:
+ h = 0
+ return '<%x>' % (h,)
@jit.look_inside_iff(_look_inside_lookup)
def ll_dict_lookup(d, key, hash):
@@ -501,18 +512,21 @@
mask = len(indexes) - 1
i = hash & mask
# do the first try before any looping
+ ENTRIES = lltype.typeOf(entries).TO
+ direct_compare = not hasattr(ENTRIES, 'no_direct_compare')
index = indexes[i]
- if index >= 0:
+ if entries.valid(index):
checkingkey = entries[index].key
- if checkingkey == key:
+ if direct_compare and checkingkey == key:
return i # found the entry
if d.keyeq is not None and entries.hash(index) == hash:
# correct hash, maybe the key is e.g. a different pointer to
# an equal object
found = d.keyeq(checkingkey, key)
+ llop.debug_print(lltype.Void, "comparing keys",
ll_debugrepr(checkingkey), ll_debugrepr(key), found)
if d.paranoia:
if (entries != d.entries or
- not indexes[i] >= 0 or entries[index].key != checkingkey):
+ not entries.valid(indexes[i]) or entries[index].key !=
checkingkey):
# the compare did major nasty stuff to the dict: start over
return ll_dict_lookup(d, key, hash)
if found:
@@ -538,9 +552,9 @@
if freeslot == -1:
freeslot = i
return freeslot | HIGHEST_BIT
- elif index >= 0:
+ elif entries.valid(index):
checkingkey = entries[index].key
- if checkingkey == key:
+ if direct_compare and checkingkey == key:
return i
if d.keyeq is not None and entries.hash(index) == hash:
# correct hash, maybe the key is e.g. a different pointer to
@@ -548,7 +562,7 @@
found = d.keyeq(checkingkey, key)
if d.paranoia:
if (entries != d.entries or
- not indexes[i] >= 0 or
+ not entries.valid(indexes[i]) or
entries[index].key != checkingkey):
# the compare did major nasty stuff to the dict:
# start over
@@ -614,7 +628,7 @@
return lltype.malloc(DICT)
def _ll_malloc_entries(ENTRIES, n):
return lltype.malloc(ENTRIES, n, zero=True)
-def _ll_malloc_items(ITEMS, n):
+def _ll_malloc_indexes(ITEMS, n):
res = lltype.malloc(ITEMS, n)
i = 0
while i < n:
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit