Author: Armin Rigo <[email protected]>
Branch: rdict-experiments-3
Changeset: r67248:0017edf4d84c
Date: 2013-10-09 16:52 +0200
http://bitbucket.org/pypy/pypy/changeset/0017edf4d84c/
Log: (arigo, fijal around) in-progress
diff --git a/rpython/rtyper/lltypesystem/rdict.py
b/rpython/rtyper/lltypesystem/rdict.py
--- a/rpython/rtyper/lltypesystem/rdict.py
+++ b/rpython/rtyper/lltypesystem/rdict.py
@@ -99,8 +99,12 @@
DICTENTRY = lltype.Struct("dictentry", *entryfields)
DICTENTRYARRAY = lltype.GcArray(DICTENTRY,
adtmeths=entrymeths)
- LOOKUP_FUNC = lltype.Ptr(lltype.FuncType([lltype.Ptr(DICT), DICTKEY,
lltype.Signed, lltype.Signed], lltype.Signed))
-
+ LOOKUP_FUNC = lltype.Ptr(lltype.FuncType([lltype.Ptr(DICT), DICTKEY,
+ lltype.Signed, lltype.Signed],
+ lltype.Signed))
+ LOOKCLEAN_FUNC = lltype.Ptr(lltype.FuncType([lltype.Ptr(DICT),
+ lltype.Signed],
+ lltype.Signed))
fields = [ ("num_items", lltype.Signed),
("num_used_items", lltype.Signed),
@@ -135,15 +139,18 @@
adtmeths['VALUE'] = DICTVALUE
adtmeths['allocate'] = lltype.typeMethod(_ll_malloc_dict)
adtmeths['empty_array'] = DICTENTRYARRAY.allocate(0)
- adtmeths['byte_lookup_function'] = new_lookup_function(LOOKUP_FUNC,
- T=rffi.UCHAR)
- adtmeths['short_lookup_function'] = new_lookup_function(LOOKUP_FUNC,
- T=rffi.USHORT)
- if IS_64BIT:
- adtmeths['int_lookup_function'] = new_lookup_function(LOOKUP_FUNC,
- T=rffi.UINT)
- adtmeths['long_lookup_function'] = new_lookup_function(LOOKUP_FUNC,
- T=lltype.Unsigned)
+
+ for name, T in [('byte', rffi.UCHAR),
+ ('short', rffi.USHORT),
+ ('int', rffi.UINT),
+ ('long', lltype.Unsigned)]:
+ if name == 'int' and not IS_64BIT:
+ continue
+ lookupfn, lookcleanfn = new_lookup_functions(LOOKUP_FUNC,
+ LOOKCLEAN_FUNC, T=T)
+ adtmeths['%s_lookup_function' % name] = lookupfn
+ adtmeths['%s_lookup_clean_function' % name] = lookcleanfn
+
DICT.become(lltype.GcStruct("dicttable", adtmeths=adtmeths,
*fields))
return DICT
@@ -397,21 +404,26 @@
lltype.malloc(DICTINDEX_BYTE.TO, n,
zero=True))
d.lookup_function = DICT.byte_lookup_function
+ return DICT.byte_lookup_clean_function
elif n <= 65536:
d.indexes = lltype.cast_opaque_ptr(llmemory.GCREF,
lltype.malloc(DICTINDEX_SHORT.TO, n,
zero=True))
d.lookup_function = DICT.short_lookup_function
+ return DICT.short_lookup_clean_function
elif IS_64BIT and n <= 2 ** 32:
d.indexes = lltype.cast_opaque_ptr(llmemory.GCREF,
lltype.malloc(DICTINDEX_INT.TO, n,
zero=True))
d.lookup_function = DICT.int_lookup_function
+ return DICT.int_lookup_clean_function
else:
d.indexes = lltype.cast_opaque_ptr(llmemory.GCREF,
lltype.malloc(DICTINDEX_LONG.TO, n,
zero=True))
d.lookup_function = DICT.long_lookup_function
+ return DICT.long_lookup_clean_function
+ll_malloc_indexes_and_choose_lookup._always_inline_ = True
def ll_valid_from_flag(entries, i):
return entries[i].f_valid
@@ -495,15 +507,14 @@
d.num_items += 1
rc = d.resize_counter - 3
if rc <= 0:
- XXX
ll_dict_resize(d)
- i = ll_dict_lookup_clean(d, hash) # then redo the lookup for 'key'
- entry = d.entries[i]
rc = d.resize_counter - 3
ll_assert(rc > 0, "ll_dict_resize failed?")
d.resize_counter = rc
def ll_dict_grow(d):
+ if d.num_items < d.num_used_items // 4:
+ xxxxxxxxx
# This over-allocates proportional to the list size, making room
# for additional growth. The over-allocation is mild, but is
# enough to give linear-time amortized behavior over a long
@@ -518,24 +529,37 @@
some += newsize >> 3
new_allocated = newsize + some
newitems = lltype.malloc(lltype.typeOf(d).TO.entries.TO, new_allocated)
- rgc.ll_arraycopy(d.entries, newitems, 0, 0, len(d.entries))
+ #
+ # XXX we should do this with rgc.ll_arraycopy()!!
+ ENTRY = lltype.typeOf(d).TO.entries.TO.OF
+ i = 0
+ while i < len(d.entries):
+ src = d.entries[i]
+ dst = newitems[i]
+ dst.key = src.key
+ dst.value = src.value
+ if hasattr(ENTRY, 'f_hash'):
+ dst.f_hash = src.f_hash
+ if hasattr(ENTRY, 'f_valid'):
+ dst.f_valid = src.f_valid
+ i += 1
d.entries = newitems
-def ll_dict_insertclean(d, key, value, hash):
+def ll_dict_insertclean(d, key, value, hash, lookcleanfn):
# Internal routine used by ll_dict_resize() to insert an item which is
# known to be absent from the dict. This routine also assumes that
# the dict contains no deleted entries. This routine has the advantage
# of never calling d.keyhash() and d.keyeq(), so it cannot call back
# to user code. ll_dict_insertclean() doesn't resize the dict, either.
- i = ll_dict_lookup_clean(d, hash)
+ index = lookcleanfn(d, hash)
ENTRY = lltype.typeOf(d.entries).TO.OF
- entry = d.entries[i]
+ entry = d.entries[index]
entry.value = value
entry.key = key
if hasattr(ENTRY, 'f_hash'): entry.f_hash = hash
if hasattr(ENTRY, 'f_valid'): entry.f_valid = True
- if hasattr(ENTRY, 'f_everused'): entry.f_everused = True
d.num_items += 1
+ d.num_used_items += 1
d.resize_counter -= 3
def ll_dict_delitem(d, key):
@@ -571,29 +595,31 @@
# avoid extra branches.
def ll_dict_resize(d):
- old_entries = d.entries
- old_size = len(old_entries)
# make a 'new_size' estimate and shrink it if there are many
# deleted entry markers. See CPython for why it is a good idea to
# quadruple the dictionary size as long as it's not too big.
- num_items = d.num_items + 1
- if num_items > 50000: new_estimate = num_items * 2
- else: new_estimate = num_items * 4
+ num_items = d.num_used_items
+ if num_items > 50000:
+ new_estimate = num_items * 2
+ else:
+ new_estimate = num_items * 4
new_size = DICT_INITSIZE
while new_size <= new_estimate:
new_size *= 2
+ lookcleanfn = ll_malloc_indexes_and_choose_lookup(d, new_size)
+ d.num_items = 0
+ d.num_used_items = 0
+ d.resize_counter = new_size * 2
#
- d.entries = lltype.typeOf(old_entries).TO.allocate(new_size)
- d.num_items = 0
- d.resize_counter = new_size * 2
+ entries = d.entries
i = 0
- while i < old_size:
- if old_entries.valid(i):
- hash = old_entries.hash(i)
- entry = old_entries[i]
- ll_dict_insertclean(d, entry.key, entry.value, hash)
+ while i < num_items:
+ if entries.valid(i):
+ hash = entries.hash(i)
+ entry = entries[i]
+ ll_dict_insertclean(d, entry.key, entry.value, hash, lookcleanfn)
i += 1
- old_entries.delete()
+ #old_entries.delete() XXXX!
ll_dict_resize.oopspec = 'dict.resize(d)'
# ------- a port of CPython's dictobject.c's lookdict implementation -------
@@ -607,7 +633,7 @@
FLAG_STORE = 1
FLAG_DELETE = 2
-def new_lookup_function(LOOKUP_FUNC, T):
+def new_lookup_functions(LOOKUP_FUNC, LOOKCLEAN_FUNC, T):
INDEXES = lltype.Ptr(lltype.GcArray(T))
@jit.look_inside_iff(lambda d, key, hash, store_flag:
@@ -616,7 +642,7 @@
entries = d.entries
indexes = lltype.cast_opaque_ptr(INDEXES, d.indexes)
mask = len(indexes) - 1
- i = hash & mask
+ i = r_uint(hash & mask)
# do the first try before any looping
ENTRIES = lltype.typeOf(entries).TO
direct_compare = not hasattr(ENTRIES, 'no_direct_compare')
@@ -662,7 +688,6 @@
perturb = r_uint(hash)
while 1:
# compute the next index using unsigned arithmetic
- i = r_uint(i)
i = (i << 2) + i + perturb + 1
i = intmask(i) & mask
# keep 'i' as a signed number here, to consistently pass signed
@@ -706,21 +731,24 @@
deletedslot = i
perturb >>= PERTURB_SHIFT
- return llhelper(LOOKUP_FUNC, ll_dict_lookup)
+ def ll_dict_lookup_clean(d, hash):
+ # a simplified version of ll_dict_lookup() which assumes that the
+ # key is new, and the dictionary doesn't contain deleted entries.
+ # It only finds the next free slot for the given hash.
+ indexes = lltype.cast_opaque_ptr(INDEXES, d.indexes)
+ mask = len(indexes) - 1
+ i = r_uint(hash & mask)
+ perturb = r_uint(hash)
+ while rffi.cast(lltype.Signed, indexes[i]) != 0:
+ i = (i << 2) + i + perturb + 1
+ i = i & mask
+ perturb >>= PERTURB_SHIFT
+ index = d.num_used_items
+ indexes[i] = rffi.cast(T, index + VALID_OFFSET)
+ return index
-def ll_dict_lookup_clean(d, hash):
- # a simplified version of ll_dict_lookup() which assumes that the
- # key is new, and the dictionary doesn't contain deleted entries.
- # It only finds the next free slot for the given hash.
- entries = d.entries
- mask = len(entries) - 1
- i = r_uint(hash & mask)
- perturb = r_uint(hash)
- while entries.everused(i):
- i = (i << 2) + i + perturb + 1
- i = i & mask
- perturb >>= PERTURB_SHIFT
- return i
+ return (llhelper(LOOKUP_FUNC, ll_dict_lookup),
+ llhelper(LOOKCLEAN_FUNC, ll_dict_lookup_clean))
# ____________________________________________________________
#
diff --git a/rpython/rtyper/test/test_rdict.py
b/rpython/rtyper/test/test_rdict.py
--- a/rpython/rtyper/test/test_rdict.py
+++ b/rpython/rtyper/test/test_rdict.py
@@ -23,8 +23,11 @@
assert 0 <= x < 4
yield x
+def get_indexes(ll_d):
+ return ll_d.indexes._obj.container._as_ptr()
+
def foreach_index(ll_d):
- indexes = ll_d.indexes._obj.container._as_ptr()
+ indexes = get_indexes(ll_d)
for i in range(len(indexes)):
yield rffi.cast(lltype.Signed, indexes[i])
@@ -84,10 +87,10 @@
rdict.ll_dict_setitem(ll_d, llstr("b"), 2)
rdict.ll_dict_setitem(ll_d, llstr("c"), 3)
rdict.ll_dict_setitem(ll_d, llstr("d"), 4)
- assert ll_d.size == 8
+ assert len(get_indexes(ll_d)) == 8
rdict.ll_dict_setitem(ll_d, llstr("e"), 5)
rdict.ll_dict_setitem(ll_d, llstr("f"), 6)
- assert ll_d.size == 32
+ assert len(get_indexes(ll_d)) == 32
for item in ['a', 'b', 'c', 'd', 'e', 'f']:
assert rdict.ll_dict_getitem(ll_d, llstr(item)) == ord(item) -
ord('a') + 1
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit