Author: Maciej Fijalkowski <[email protected]>
Branch: rdict-experiments
Changeset: r59658:c463f8834835
Date: 2013-01-03 10:50 +0200
http://bitbucket.org/pypy/pypy/changeset/c463f8834835/
Log: see how good is rhettingers dict in rpython
diff --git a/pypy/rlib/dict.py b/pypy/rlib/dict.py
new file mode 100644
--- /dev/null
+++ b/pypy/rlib/dict.py
@@ -0,0 +1,198 @@
+
+from pypy.rlib.objectmodel import compute_identity_hash
+
+# Placeholder constants
+
+FREE = -1
+DUMMY = -2
+
+class Dict(object):
+ 'Space efficient dictionary with fast iteration and cheap resizes.'
+
+ def _lookup(self, key, hashvalue):
+ 'Same lookup logic as currently used in real dicts'
+ assert self.filled < len(self.indices) # At least one open slot
+ freeslot = -1
+ PERTURB_SHIFT = 5
+ if hashvalue < 0:
+ perturb = -hashvalue
+ else:
+ perturb = hashvalue
+ n = len(self.indices)
+ i = perturb & (n-1)
+ while True:
+ index = self.indices[i]
+ if index == FREE:
+ return (FREE, i) if freeslot == -1 else (DUMMY, freeslot)
+ elif index == DUMMY:
+ freeslot = i
+ elif (self.keylist[index] is key or
+ self.hashlist[index] == hashvalue
+ and self.keylist[index] == key):
+ return (index, i)
+ i = 5 * i + perturb + 1
+ i = i & (n-1)
+ perturb >>= PERTURB_SHIFT
+
+ @staticmethod
+ def _make_index(n):
+ 'New sequence of indices using the smallest possible datatype'
+ #if n <= 2**7: return array.array('b', [FREE]) * n # signed char
+ #if n <= 2**15: return array.array('h', [FREE]) * n # signed short
+ #if n <= 2**31: return array.array('l', [FREE]) * n # signed long
+ return [FREE] * n # python
integers
+
+ def _resize(self, n):
+ '''Reindex the existing hash/key/value entries.
+ Entries do not get moved, they only get new indices.
+ No calls are made to hash() or __eq__().
+
+ '''
+ new_size = 8
+ while new_size < n:
+ new_size <<= 1
+ n = new_size
+ self.indices = self._make_index(n)
+ PERTURB_SHIFT = 5
+ for index, hashvalue in enumerate(self.hashlist):
+ if hashvalue < 0:
+ perturb = -hashvalue
+ else:
+ perturb = hashvalue
+ i = hashvalue & (n-1)
+ while True:
+ if self.indices[i] == FREE:
+ break
+ i = 5 * i + perturb + 1
+ i = i & (n-1)
+ perturb >>= PERTURB_SHIFT
+ self.indices[i] = index
+ self.filled = self.used
+
+ def clear(self):
+ self.indices = self._make_index(8)
+ self.hashlist = []
+ self.keylist = []
+ self.valuelist = []
+ self.used = 0
+ self.filled = 0 # used +
dummies
+
+ def __init__(self):
+ self.clear()
+
+ def __getitem__(self, key):
+ hashvalue = hash(key)
+ index, i = self._lookup(key, hashvalue)
+ if index < 0:
+ raise KeyError(key)
+ return self.valuelist[index]
+
+ def __setitem__(self, key, value):
+ hashvalue = key # hash
+ index, i = self._lookup(key, hashvalue)
+ if index < 0:
+ self.indices[i] = self.used
+ self.hashlist.append(hashvalue)
+ self.keylist.append(key)
+ self.valuelist.append(value)
+ self.used += 1
+ if index == FREE:
+ self.filled += 1
+ if self.filled * 3 > len(self.indices) * 2:
+ self._resize(4 * self.__len__())
+ else:
+ self.valuelist[index] = value
+
+ def __delitem__(self, key):
+ hashvalue = hash(key)
+ index, i = self._lookup(key, hashvalue)
+ if index < 0:
+ raise KeyError(key)
+ self.indices[i] = DUMMY
+ self.used -= 1
+ # If needed, swap with the lastmost entry to avoid leaving a "hole"
+ if index != self.used:
+ lasthash = self.hashlist[-1]
+ lastkey = self.keylist[-1]
+ lastvalue = self.valuelist[-1]
+ lastindex, j = self._lookup(lastkey, lasthash)
+ assert lastindex >= 0 and i != j
+ self.indices[j] = index
+ self.hashlist[index] = lasthash
+ self.keylist[index] = lastkey
+ self.valuelist[index] = lastvalue
+ # Remove the lastmost entry
+ self.hashlist.pop()
+ self.keylist.pop()
+ self.valuelist.pop()
+
+ def __len__(self):
+ return self.used
+
+ def __iter__(self):
+ return iter(self.keylist)
+
+ def iterkeys(self):
+ return iter(self.keylist)
+
+ def keys(self):
+ return list(self.keylist)
+
+ def itervalues(self):
+ return iter(self.valuelist)
+
+ def values(self):
+ return list(self.valuelist)
+
+ def iteritems(self):
+ return itertools.izip(self.keylist, self.valuelist)
+
+ def items(self):
+ return zip(self.keylist, self.valuelist)
+
+ def __contains__(self, key):
+ index, i = self._lookup(key, hash(key))
+ return index >= 0
+
+ def get(self, key, default=None):
+ 'D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.'
+ index, i = self._lookup(key, hash(key))
+ return self.valuelist[index] if index >= 0 else default
+
+ def popitem(self):
+ ''' D.popitem() -> (k, v), remove and return some (key, value) pair as
a
+ 2-tuple; but raise KeyError if D is empty.
+ '''
+ try:
+ key = self.keylist[-1]
+ value = self.valuelist[-1]
+ except IndexError:
+ raise KeyError( 'popitem(): dictionary is empty')
+ del self[key]
+ return key, value
+
+ def __repr__(self):
+ return 'Dict(%r)' % self.items()
+
+ def show_structure(self):
+ 'Diagnostic method. Not part of the API.'
+ print '=' * 50
+ print self
+ print 'Indices:', self.indices
+ for i, row in enumerate(zip(self.hashlist, self.keylist,
self.valuelist)):
+ print i, row
+ print '-' * 50
+
+
+if __name__ == '__main__':
+ import sys
+ def f():
+ if len(sys.argv) > 1:
+ d = {}
+ else:
+ d = Dict()
+ class A(object):
+ pass
+ for i in range(10000000):
+ d[i] = A()
+ f()
diff --git a/pypy/translator/goal/targetnopstandalone.py
b/pypy/translator/goal/targetnopstandalone.py
--- a/pypy/translator/goal/targetnopstandalone.py
+++ b/pypy/translator/goal/targetnopstandalone.py
@@ -7,13 +7,24 @@
actually implementing argv of the executable.
"""
-def debug(msg):
- print "debug:", msg
-
# __________ Entry point __________
+class A(object):
+ def __init__(self, a, b, c):
+ self.a = a
+ self.b = b
+ self.c = c
+
def entry_point(argv):
- debug("hello world")
+ if len(argv) > 2:
+ d = {}
+ for i in range(int(argv[1])):
+ d[i] = A(i, i, i)
+ else:
+ from pypy.rlib import dict
+ d = dict.Dict()
+ for i in range(int(argv[1])):
+ d.__setitem__(i, A(i, i, i))
return 0
# _____ Define and setup target ___
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit