Author: Armin Rigo <[email protected]>
Branch: all_ordered_dicts
Changeset: r75407:3f2c4305cbc5
Date: 2015-01-17 09:20 +0100
http://bitbucket.org/pypy/pypy/changeset/3f2c4305cbc5/
Log: Remove most of the quadratic complexity of
collections.OrderedDict.popitem(last=False).
diff --git a/rpython/rtyper/lltypesystem/rordereddict.py
b/rpython/rtyper/lltypesystem/rordereddict.py
--- a/rpython/rtyper/lltypesystem/rordereddict.py
+++ b/rpython/rtyper/lltypesystem/rordereddict.py
@@ -34,7 +34,8 @@
# {byte, short, int, long} *indexes;
# dictentry *entries;
# lookup_function_no; # one of the four possible functions for different
-# # size dicts
+# # size dicts; the rest of the word is a counter for how
+# # many 'entries' at the start are known to be deleted
# (Function DICTKEY, DICTKEY -> bool) *fnkeyeq;
# (Function DICTKEY -> int) *fnkeyhash;
# }
@@ -44,7 +45,7 @@
@jit.look_inside_iff(lambda d, key, hash, flag: jit.isvirtual(d))
@jit.oopspec('ordereddict.lookup(d, key, hash, flag)')
def ll_call_lookup_function(d, key, hash, flag):
- fun = d.lookup_function_no
+ fun = d.lookup_function_no & FUNC_MASK
if fun == FUNC_BYTE:
return ll_dict_lookup(d, key, hash, flag, TYPE_BYTE)
elif fun == FUNC_SHORT:
@@ -409,6 +410,8 @@
IS_64BIT = sys.maxint != 2 ** 31 - 1
+FUNC_SHIFT = 2
+FUNC_MASK = 0x03 # two bits
if IS_64BIT:
FUNC_BYTE, FUNC_SHORT, FUNC_INT, FUNC_LONG = range(4)
else:
@@ -442,36 +445,42 @@
d.lookup_function_no = FUNC_LONG
def ll_clear_indexes(d, n):
- if n <= 256:
+ fun = d.lookup_function_no & FUNC_MASK
+ d.lookup_function_no = fun
+ if fun == FUNC_BYTE:
rgc.ll_arrayclear(lltype.cast_opaque_ptr(DICTINDEX_BYTE, d.indexes))
- elif n <= 65536:
+ elif fun == FUNC_SHORT:
rgc.ll_arrayclear(lltype.cast_opaque_ptr(DICTINDEX_SHORT, d.indexes))
- elif IS_64BIT and n <= 2 ** 32:
+ elif IS_64BIT and fun == FUNC_INT:
rgc.ll_arrayclear(lltype.cast_opaque_ptr(DICTINDEX_INT, d.indexes))
+ elif fun == FUNC_LONG:
+ rgc.ll_arrayclear(lltype.cast_opaque_ptr(DICTINDEX_LONG, d.indexes))
else:
- rgc.ll_arrayclear(lltype.cast_opaque_ptr(DICTINDEX_LONG, d.indexes))
+ assert False
@jit.dont_look_inside
def ll_call_insert_clean_function(d, hash, i):
- if d.lookup_function_no == FUNC_BYTE:
+ fun = d.lookup_function_no & FUNC_MASK
+ if fun == FUNC_BYTE:
ll_dict_store_clean(d, hash, i, TYPE_BYTE)
- elif d.lookup_function_no == FUNC_SHORT:
+ elif fun == FUNC_SHORT:
ll_dict_store_clean(d, hash, i, TYPE_SHORT)
- elif IS_64BIT and d.lookup_function_no == FUNC_INT:
+ elif IS_64BIT and fun == FUNC_INT:
ll_dict_store_clean(d, hash, i, TYPE_INT)
- elif d.lookup_function_no == FUNC_LONG:
+ elif fun == FUNC_LONG:
ll_dict_store_clean(d, hash, i, TYPE_LONG)
else:
assert False
def ll_call_delete_by_entry_index(d, hash, i):
- if d.lookup_function_no == FUNC_BYTE:
+ fun = d.lookup_function_no & FUNC_MASK
+ if fun == FUNC_BYTE:
ll_dict_delete_by_entry_index(d, hash, i, TYPE_BYTE)
- elif d.lookup_function_no == FUNC_SHORT:
+ elif fun == FUNC_SHORT:
ll_dict_delete_by_entry_index(d, hash, i, TYPE_SHORT)
- elif IS_64BIT and d.lookup_function_no == FUNC_INT:
+ elif IS_64BIT and fun == FUNC_INT:
ll_dict_delete_by_entry_index(d, hash, i, TYPE_INT)
- elif d.lookup_function_no == FUNC_LONG:
+ elif fun == FUNC_LONG:
ll_dict_delete_by_entry_index(d, hash, i, TYPE_LONG)
else:
assert False
@@ -645,7 +654,7 @@
# full, so we know that 'd.num_live_items' should be at most 2/3 * 256
# (or 65536 or etc.) so after the ll_dict_remove_deleted_items() below
# at least 1/3rd items in 'd.entries' are free.
- fun = d.lookup_function_no
+ fun = d.lookup_function_no & FUNC_MASK
toobig = False
if fun == FUNC_BYTE:
assert d.num_live_items < ((1 << 8) - MIN_INDEXES_MINUS_ENTRIES)
@@ -783,7 +792,9 @@
else:
ll_malloc_indexes_and_choose_lookup(d, new_size)
d.resize_counter = new_size * 2 - d.num_live_items * 3
- assert d.resize_counter > 0
+ ll_assert(d.resize_counter > 0, "reindex: resize_counter <= 0")
+ ll_assert((d.lookup_function_no >> FUNC_SHIFT) == 0,
+ "reindex: lookup_fun >> SHIFT")
#
entries = d.entries
i = 0
@@ -999,7 +1010,8 @@
def ll_dictiter(ITERPTR, d):
iter = lltype.malloc(ITERPTR.TO)
iter.dict = d
- iter.index = 0
+ # initialize the index with usually 0, but occasionally a larger value
+ iter.index = d.lookup_function_no >> FUNC_SHIFT
return iter
@jit.look_inside_iff(lambda iter: jit.isvirtual(iter)
@@ -1018,6 +1030,17 @@
if entries.valid(index):
iter.index = nextindex
return index
+ else:
+ # In case of repeated iteration over the start of
+ # a dict where the items get removed, like
+ # collections.OrderedDict.popitem(last=False),
+ # the hack below will increase the value stored in
+ # the high bits of lookup_function_no and so the
+ # next iteration will start at a higher value.
+ # We should carefully reset these high bits to zero
+ # as soon as we do something like ll_dict_reindex().
+ if index == (dict.lookup_function_no >> FUNC_SHIFT):
+ dict.lookup_function_no += (1 << FUNC_SHIFT)
index = nextindex
# clear the reference to the dict and prevent restarts
iter.dict = lltype.nullptr(lltype.typeOf(iter).TO.dict.TO)
diff --git a/rpython/rtyper/test/test_rordereddict.py
b/rpython/rtyper/test/test_rordereddict.py
--- a/rpython/rtyper/test/test_rordereddict.py
+++ b/rpython/rtyper/test/test_rordereddict.py
@@ -160,6 +160,22 @@
assert ll_elem.item1 == 1
py.test.raises(KeyError, rordereddict.ll_dict_popitem, TUP, ll_d)
+ def test_popitem_first(self):
+ DICT = self._get_str_dict()
+ ll_d = rordereddict.ll_newdict(DICT)
+ rordereddict.ll_dict_setitem(ll_d, llstr("k"), 1)
+ rordereddict.ll_dict_setitem(ll_d, llstr("j"), 2)
+ rordereddict.ll_dict_setitem(ll_d, llstr("m"), 3)
+ ITER = rordereddict.get_ll_dictiter(lltype.Ptr(DICT))
+ for expected in ["k", "j", "m"]:
+ ll_iter = rordereddict.ll_dictiter(ITER, ll_d)
+ num = rordereddict._ll_dictnext(ll_iter)
+ ll_key = ll_d.entries[num].key
+ assert hlstr(ll_key) == expected
+ rordereddict.ll_dict_delitem(ll_d, ll_key)
+ ll_iter = rordereddict.ll_dictiter(ITER, ll_d)
+ py.test.raises(StopIteration, rordereddict._ll_dictnext, ll_iter)
+
def test_direct_enter_and_del(self):
def eq(a, b):
return a == b
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit