Author: Maciej Fijalkowski <[email protected]>
Branch: rdict-experiments-2
Changeset: r59957:67c1f89d912c
Date: 2013-01-11 19:29 +0200
http://bitbucket.org/pypy/pypy/changeset/67c1f89d912c/

Log:    Try to use different index sizes in a smarter way. a bit too much
        mess for my liking, but a bit no clue what to do

diff --git a/pypy/rpython/lltypesystem/rdict.py 
b/pypy/rpython/lltypesystem/rdict.py
--- a/pypy/rpython/lltypesystem/rdict.py
+++ b/pypy/rpython/lltypesystem/rdict.py
@@ -1,3 +1,5 @@
+
+import sys
 from pypy.tool.pairtype import pairtype
 from pypy.objspace.flow.model import Constant
 from pypy.rpython.rdict import (AbstractDictRepr, AbstractDictIteratorRepr,
@@ -6,6 +8,7 @@
 from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rlib import objectmodel, jit, rgc
 from pypy.rlib.debug import ll_assert
+from pypy.rlib.objectmodel import specialize
 from pypy.rlib.rarithmetic import r_uint, intmask, LONG_BIT
 from pypy.rpython import rmodel
 from pypy.rpython.error import TyperError
@@ -352,58 +355,58 @@
 DICTINDEX_INT = lltype.Ptr(lltype.GcArray(rffi.INT_real))
 DICTINDEX_SHORT = lltype.Ptr(lltype.GcArray(rffi.SHORT))
 DICTINDEX_BYTE = lltype.Ptr(lltype.GcArray(rffi.SIGNEDCHAR))
-MAX_INT_MASK = ~(2 ** 31 - 1)
-MAX_SHORT_MASK = ~(2 ** 16 - 1)
-MAX_BYTE_MASK = ~(2 ** 8 - 1)
 
 def _ll_malloc_indexes(n):
-    # XXXX 64 bit only
-    #if n & MAX_INT_MASK:
-    res = lltype.cast_opaque_ptr(llmemory.GCREF,
-                                 lltype.malloc(DICTINDEX_SIGNED.TO, n,
-                                               zero=True))
-    #elif n & MAX_SHORT_MASK:
-    #    res = lltype.cast_opaque_ptr(llmemory.GCREF,
-    #                                 lltype.malloc(DICTINDEX_INT.TO, n))
-    #elif n & MAX_BYTE_MASK:
-    #    res = lltype.cast_opaque_ptr(llmemory.GCREF,
-    #                                 lltype.malloc(DICTINDEX_SHORT.TO, n))
-    #else:
-    #    res = lltype.cast_opaque_ptr(llmemory.GCREF,
-    #                                 lltype.malloc(DICTINDEX_BYTE.TO, n))
+    if IS_64BIT and n < 2 ** 31:
+        res = lltype.cast_opaque_ptr(llmemory.GCREF,
+                                     lltype.malloc(DICTINDEX_INT.TO, n,
+                                                   zero=True))        
+    else:
+        res = lltype.cast_opaque_ptr(llmemory.GCREF,
+                                     lltype.malloc(DICTINDEX_SIGNED.TO, n,
+                                                   zero=True))
     return res
 
-def ll_index_getitem(size, indexes, i):
-    #if size & MAX_INT_MASK:
+def ll_index_getitem_signed(size, indexes, i):
     return lltype.cast_opaque_ptr(DICTINDEX_SIGNED, indexes)[i] - 2
-    #elif size & MAX_SHORT_MASK:    
-    #    return rffi.cast(lltype.Signed,
-    #                     lltype.cast_opaque_ptr(DICTINDEX_INT, indexes)[i])
-    #elif size & MAX_BYTE_MASK:
-    #    return rffi.cast(lltype.Signed,
-    #                     lltype.cast_opaque_ptr(DICTINDEX_SHORT, indexes)[i])
-    #else:
-    #    return rffi.cast(lltype.Signed,
-    #                     lltype.cast_opaque_ptr(DICTINDEX_BYTE, indexes)[i])
 
-def ll_index_setitem(size, indexes, i, v):
-    #if size & MAX_INT_MASK:
+def ll_index_getitem_int(size, indexes, i):
+    res = lltype.cast_opaque_ptr(DICTINDEX_INT, indexes)[i]
+    return rffi.cast(lltype.Signed, res) - 2
+
+def ll_index_setitem_signed(size, indexes, i, v):
     lltype.cast_opaque_ptr(DICTINDEX_SIGNED, indexes)[i] = v + 2
-    #elif size & MAX_SHORT_MASK:    
-    #    lltype.cast_opaque_ptr(DICTINDEX_INT, indexes)[i] = rffi.cast(
-    #        rffi.INT_real, v)
-    #elif size & MAX_BYTE_MASK:
-    #    lltype.cast_opaque_ptr(DICTINDEX_SHORT, indexes)[i] = rffi.cast(
-    #        rffi.SHORT, v)
-    #else:
-    #    lltype.cast_opaque_ptr(DICTINDEX_BYTE, indexes)[i] = rffi.cast(
-    #        rffi.SIGNEDCHAR, v)
 
-def ll_dict_copy_indexes(size, from_indexes, to_indexes):
+def ll_index_setitem_int(size, indexes, i, v):
+    v = rffi.cast(rffi.INT_real, v + 2)
+    lltype.cast_opaque_ptr(DICTINDEX_INT, indexes)[i] = v
+
+IS_64BIT = sys.maxint != 2 ** 31 - 1
+
+def _pick_index_setitem(d):
+    if IS_64BIT and d.size < 2 ** 31:
+        return ll_index_setitem_int            
+    return ll_index_setitem_signed
+
+def _pick_index_getitem(d):
+    if IS_64BIT and d.size < 2 ** 31:
+        return ll_index_getitem_int            
+    return ll_index_getitem_signed
+
+def d_signed_indexes(d):
+    if IS_64BIT and d.size < 2 ** 31:
+        return False
+    return True
+
+def ll_dict_copy_indexes(size, from_d, to_d):
+    ll_index_getitem = _pick_index_getitem(from_d)
+    ll_index_setitem = _pick_index_setitem(to_d)
+    from_indexes = from_d.indexes
+    to_indexes = to_d.indexes
     i = 0
     while i < size:
-        ll_index_setitem(size, to_indexes, i,
-                         ll_index_getitem(size, from_indexes, i))
+        v = ll_index_getitem(size, from_indexes, i)
+        ll_index_setitem(size, to_indexes, i, v)
         i += 1
 
 # ---------------------- entries -----------------------
@@ -446,7 +449,8 @@
     ENTRIES = lltype.typeOf(entries).TO
     return ENTRIES.fasthashfn(entries[i].key)
 
-def ll_get_value(d, i):
[email protected]_and_arg(2)
+def ll_get_value(d, i, ll_index_getitem):
     return d.entries.getitem_clean(ll_index_getitem(d.size, d.indexes, 
i)).value
 
 def ll_keyhash_custom(d, key):
@@ -465,22 +469,45 @@
     return bool(d) and d.num_items != 0
 
 def ll_dict_getitem(d, key):
-    i = ll_dict_lookup(d, key, d.keyhash(key))
-    if not i & HIGHEST_BIT:
-        return ll_get_value(d, i)
+    if d_signed_indexes(d):
+        i = ll_dict_lookup(d, key, d.keyhash(key), ll_index_getitem_signed)
+        if not i & HIGHEST_BIT:
+            return ll_get_value(d, i, ll_index_getitem_signed)
+        else:
+            raise KeyError
     else:
-        raise KeyError
+        i = ll_dict_lookup(d, key, d.keyhash(key), ll_index_getitem_int)
+        if not i & HIGHEST_BIT:
+            return ll_get_value(d, i, ll_index_getitem_int)
+        else:
+            raise KeyError
 
 def ll_dict_setitem(d, key, value):
-    hash = d.keyhash(key)
-    i = ll_dict_lookup(d, key, hash)
-    return _ll_dict_setitem_lookup_done(d, key, value, hash, i)
+    if d_signed_indexes(d):
+        hash = d.keyhash(key)
+        i = ll_dict_lookup(d, key, hash, ll_index_getitem_signed)
+        _ll_dict_setitem_lookup_done(d, key, value, hash, i,
+                                     ll_index_getitem_signed,
+                                     ll_index_setitem_signed)
+    else:
+        hash = d.keyhash(key)
+        i = ll_dict_lookup(d, key, hash, ll_index_getitem_int)
+        _ll_dict_setitem_lookup_done(d, key, value, hash, i,
+                                     ll_index_getitem_int,
+                                     ll_index_setitem_int)
 
 def ll_dict_insertclean(d, key, value, hash):
-    i = ll_dict_lookup_clean(d, hash)
-    return _ll_dict_setitem_lookup_done(d, key, value, hash, i | HIGHEST_BIT)
+    if d_signed_indexes(d):
+        i = ll_dict_lookup_clean(d, hash, ll_index_getitem_signed)
+        _ll_dict_setitem_lookup_done(d, key, value, hash, i | HIGHEST_BIT,
+                                     ll_index_getitem_signed)
+    else:
+        i = ll_dict_lookup_clean(d, hash, ll_index_getitem_int)
+        _ll_dict_setitem_lookup_done(d, key, value, hash, i | HIGHEST_BIT,
+                                     ll_index_getitem_int)
 
-def ll_dict_lookup_clean(d, hash):
[email protected]_and_arg(2)
+def ll_dict_lookup_clean(d, hash, ll_index_getitem):
     # a simplified version of ll_dict_lookup() which assumes that the
     # key is new, and the dictionary doesn't contain deleted entries.
     # It only finds the next free slot for the given hash.
@@ -499,13 +526,15 @@
         perturb >>= PERTURB_SHIFT
     return i
 
-def _look_inside_setitem(d, key, value, hash, i):
+def _look_inside_setitem(d, key, value, hash, i, _1, _2):
     return jit.isvirtual(d) and jit.isconstant(key)
 
 # It may be safe to look inside always, it has a few branches though, and their
 # frequencies needs to be investigated.
 @jit.look_inside_iff(_look_inside_setitem)
-def _ll_dict_setitem_lookup_done(d, key, value, hash, i):
[email protected]_and_arg(5, 6)
+def _ll_dict_setitem_lookup_done(d, key, value, hash, i, ll_index_getitem,
+                                 ll_index_setitem):
     valid = (i & HIGHEST_BIT) == 0
     i = i & MASK
     ENTRY = lltype.typeOf(d.entries).TO.OF
@@ -518,7 +547,10 @@
         rc = d.resize_counter - 1
         if rc <= 0:       # if needed, resize the dict -- before the insertion
             d.resize()
-            i = ll_dict_lookup_clean(d, hash)
+            if d_signed_indexes(d):
+                i = ll_dict_lookup_clean(d, hash, ll_index_getitem_signed)
+            else:
+                i = ll_dict_lookup_clean(d, hash, ll_index_getitem_int)
             # then redo the lookup for 'key'
             entry = d.entries.getitem(d, index)
             rc = d.resize_counter - 1
@@ -543,20 +575,28 @@
     d.num_items += 1
 
 def ll_dict_delitem(d, key):
-    i = ll_dict_lookup(d, key, d.keyhash(key))
-    if i & HIGHEST_BIT:
-        raise KeyError
-    _ll_dict_del(d, i)
+    if d_signed_indexes(d):
+        i = ll_dict_lookup(d, key, d.keyhash(key), ll_index_getitem_signed)
+        if i & HIGHEST_BIT:
+            raise KeyError
+        _ll_dict_del(d, i, ll_index_getitem_signed, ll_index_setitem_signed)
+    else:
+        i = ll_dict_lookup(d, key, d.keyhash(key), ll_index_getitem_int)
+        if i & HIGHEST_BIT:
+            raise KeyError
+        _ll_dict_del(d, i, ll_index_getitem_int, ll_index_setitem_int)
 
[email protected]_inside_iff(lambda d, i: jit.isvirtual(d) and jit.isconstant(i))
-def _ll_dict_del(d, i):
[email protected]_inside_iff(lambda d, i, _1, _2: jit.isvirtual(d) and
+                     jit.isconstant(i))
[email protected]_and_arg(2, 3)
+def _ll_dict_del(d, i, ll_index_getitem, ll_index_setitem):
     index = ll_index_getitem(d.size, d.indexes, i)
     ENTRIES = lltype.typeOf(d.entries).TO
     ENTRY = ENTRIES.OF
     if index != d.num_items - 1:
         old_entry = d.entries.getitem(d, d.num_items - 1)
         key = old_entry.key
-        to_insert_i = ll_dict_lookup(d, key, d.keyhash(key))
+        to_insert_i = ll_dict_lookup(d, key, d.keyhash(key), ll_index_getitem)
         ll_assert(not to_insert_i & HIGHEST_BIT, "invalid entry")
         ll_index_setitem(d.size, d.indexes, to_insert_i, index)
         # copy the value
@@ -603,22 +643,31 @@
         new_size *= 2
     #
     new_item_size = new_size // 3 * 2 + 1
+    ll_index_getitem = _pick_index_getitem(d)
     d.indexes = _ll_malloc_indexes(new_size)
     d.size = new_size
     d.resize_counter = new_item_size - d.num_items
     i = 0
     indexes = d.indexes
+    ll_index_setitem = _pick_index_setitem(d)
     while i < old_size:
         index = ll_index_getitem(old_size, old_indexes, i)
         if old_entries.valid(index):
-            pos = ll_dict_lookup_clean(d, old_entries.hash(index))
+            # XXX we can do better here, but we have to remember about
+            #     paranoia
+            if d_signed_indexes(d):
+                pos = ll_dict_lookup_clean(d, old_entries.hash(index),
+                                           ll_index_getitem_signed)
+            else:
+                pos = ll_dict_lookup_clean(d, old_entries.hash(index),
+                                           ll_index_getitem_int)
             ll_index_setitem(d.size, indexes, pos, index)
         i += 1
 
 # ------- a port of CPython's dictobject.c's lookdict implementation -------
 PERTURB_SHIFT = 5
 
-_look_inside_lookup = lambda d, key, hash: jit.isvirtual(d) and 
jit.isconstant(key)
+_look_inside_lookup = lambda d, key, hash, _: jit.isvirtual(d) and 
jit.isconstant(key)
 from pypy.rlib.objectmodel import compute_identity_hash
 
 def ll_debugrepr(x):
@@ -629,7 +678,8 @@
     return '<%x>' % (h,)
 
 @jit.look_inside_iff(_look_inside_lookup)
-def ll_dict_lookup(d, key, hash):
[email protected]_and_arg(3)
+def ll_dict_lookup(d, key, hash, ll_index_getitem):
     entries = d.entries
     indexes = d.indexes
     mask = d.size - 1
@@ -648,11 +698,16 @@
             found = d.keyeq(checkingkey, key)
             #llop.debug_print(lltype.Void, "comparing keys", 
ll_debugrepr(checkingkey), ll_debugrepr(key), found)
             if d.paranoia:
-                if (entries != d.entries or
+                if (entries != d.entries or indexes != d.indexes or
                     not entries.valid(ll_index_getitem(d.size, indexes, i))
                     or entries.getitem_clean(index).key != checkingkey):
                     # the compare did major nasty stuff to the dict: start over
-                    return ll_dict_lookup(d, key, hash)
+                    if d_signed_indexes(d):
+                        return ll_dict_lookup(d, key, hash,
+                                              ll_index_getitem_signed)
+                    else:
+                        return ll_dict_lookup(d, key, hash,
+                                              ll_index_getitem_int)
             if found:
                 return i   # found the entry
         freeslot = -1
@@ -685,12 +740,17 @@
                 # an equal object
                 found = d.keyeq(checkingkey, key)
                 if d.paranoia:
-                    if (entries != d.entries or
+                    if (entries != d.entries or indexes != d.indexes or
                         not entries.valid(ll_index_getitem(d.size, indexes, 
i)) or
                         entries.getitem_clean(index).key != checkingkey):
                         # the compare did major nasty stuff to the dict:
                         # start over
-                        return ll_dict_lookup(d, key, hash)
+                        if d_signed_indexes(d):
+                            return ll_dict_lookup(d, key, hash,
+                                                  ll_index_getitem_signed)
+                        else:
+                            return ll_dict_lookup(d, key, hash,
+                                                  ll_index_getitem_int)
                 if found:
                     return i   # found the entry
         elif freeslot == -1:
@@ -814,20 +874,43 @@
 # methods
 
 def ll_get(dict, key, default):
-    i = ll_dict_lookup(dict, key, dict.keyhash(key))
-    if not i & HIGHEST_BIT:
-        return ll_get_value(dict, i)
+    if d_signed_indexes(dict):
+        i = ll_dict_lookup(dict, key, dict.keyhash(key),
+                           ll_index_getitem_signed)
+        if not i & HIGHEST_BIT:
+            return ll_get_value(dict, i, ll_index_getitem_signed)
+        else:
+            return default
     else:
-        return default
+        i = ll_dict_lookup(dict, key, dict.keyhash(key),
+                           ll_index_getitem_int)
+        if not i & HIGHEST_BIT:
+            return ll_get_value(dict, i, ll_index_getitem_int)
+        else:
+            return default        
 
 def ll_setdefault(dict, key, default):
-    hash = dict.keyhash(key)
-    i = ll_dict_lookup(dict, key, hash)
-    if not i & HIGHEST_BIT:
-        return ll_get_value(dict, i)
+    if d_signed_indexes(dict):
+        hash = dict.keyhash(key)
+        i = ll_dict_lookup(dict, key, hash, ll_index_getitem_signed)
+        if not i & HIGHEST_BIT:
+            return ll_get_value(dict, i, ll_index_getitem_signed)
+        else:
+            _ll_dict_setitem_lookup_done(dict, key, default, hash, i,
+                                         ll_index_getitem_signed,
+                                         ll_index_setitem_signed)
+            return default
     else:
-        _ll_dict_setitem_lookup_done(dict, key, default, hash, i)
-        return default
+        hash = dict.keyhash(key)
+        i = ll_dict_lookup(dict, key, hash, ll_index_getitem_int)
+        if not i & HIGHEST_BIT:
+            return ll_get_value(dict, i, ll_index_getitem_int)
+        else:
+            _ll_dict_setitem_lookup_done(dict, key, default, hash, i,
+                                         ll_index_getitem_int,
+                                         ll_index_setitem_int)
+            return default
+
 
 def ll_copy(dict):
     DICT = lltype.typeOf(dict).TO
@@ -840,8 +923,7 @@
     d.resize_counter = dict.resize_counter
     if hasattr(DICT, 'fnkeyeq'):   d.fnkeyeq   = dict.fnkeyeq
     if hasattr(DICT, 'fnkeyhash'): d.fnkeyhash = dict.fnkeyhash
-    ll_dict_copy_indexes(d.size, dict.indexes, d.indexes)
-    #rgc.ll_arraycopy(dict.indexes, d.indexes, 0, 0, len(dict.indexes))
+    ll_dict_copy_indexes(d.size, dict, d)
     rgc.ll_arraycopy(dict.entries, d.entries, 0, 0, dict.num_items)
     return d
 
@@ -864,8 +946,19 @@
         entry = entries.getitem_clean(i)
         hash = entries.hash(i)
         key = entry.key
-        j = ll_dict_lookup(dic1, key, hash)
-        _ll_dict_setitem_lookup_done(dic1, key, entry.value, hash, j)
+        if d_signed_indexes(dic1):
+            j = ll_dict_lookup(dic1, key, hash, ll_index_getitem_signed)
+        else:
+            j = ll_dict_lookup(dic1, key, hash, ll_index_getitem_int)
+        # a separate lookup since it might have changed
+        if d_signed_indexes(dic1):
+            _ll_dict_setitem_lookup_done(dic1, key, entry.value, hash, j,
+                                         ll_index_getitem_signed,
+                                         ll_index_setitem_signed)
+        else:
+            _ll_dict_setitem_lookup_done(dic1, key, entry.value, hash, j,
+                                         ll_index_getitem_int,
+                                         ll_index_setitem_int)
         i += 1
 
 # this is an implementation of keys(), values() and items()
@@ -908,7 +1001,10 @@
 ll_dict_items  = _make_ll_keys_values_items('items')
 
 def ll_contains(d, key):
-    i = ll_dict_lookup(d, key, d.keyhash(key))
+    if d_signed_indexes(d):
+        i = ll_dict_lookup(d, key, d.keyhash(key), ll_index_getitem_signed)
+    else:
+        i = ll_dict_lookup(d, key, d.keyhash(key), ll_index_getitem_int)
     return not i & HIGHEST_BIT
 
 def ll_popitem(ELEM, dic):
@@ -922,13 +1018,24 @@
     return r
 
 def ll_pop(dic, key):
-    i = ll_dict_lookup(dic, key, dic.keyhash(key))
-    if not i & HIGHEST_BIT:
-        value = ll_get_value(dic, i)
-        _ll_dict_del(dic, i)
-        return value
+    if d_signed_indexes(dic):
+        i = ll_dict_lookup(dic, key, dic.keyhash(key), ll_index_getitem_signed)
+        if not i & HIGHEST_BIT:
+            value = ll_get_value(dic, i, ll_index_getitem_signed)
+            _ll_dict_del(dic, i, ll_index_getitem_signed,
+                         ll_index_setitem_signed)
+            return value
+        else:
+            raise KeyError
     else:
-        raise KeyError
+        i = ll_dict_lookup(dic, key, dic.keyhash(key), ll_index_getitem_int)
+        if not i & HIGHEST_BIT:
+            value = ll_get_value(dic, i, ll_index_getitem_int)
+            _ll_dict_del(dic, i, ll_index_getitem_int,
+                         ll_index_setitem_int)
+            return value
+        else:
+            raise KeyError
 
 def ll_pop_default(dic, key, dfl):
     try:
diff --git a/pypy/rpython/test/test_rdict.py b/pypy/rpython/test/test_rdict.py
--- a/pypy/rpython/test/test_rdict.py
+++ b/pypy/rpython/test/test_rdict.py
@@ -24,8 +24,9 @@
         yield x
 
 def foreach_index(ll_d):
+    ll_index_getitem = rdict._pick_index_getitem(ll_d)
     for i in range(ll_d.size):
-        yield rdict.ll_index_getitem(ll_d.size, ll_d.indexes, i)
+        yield ll_index_getitem(ll_d.size, ll_d.indexes, i)
 
 def count_items(ll_d, ITEM):
     c = 0
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to