I've just pushed the revised XHashTable to my github repo.
https://github.com/hemmecke/fricas/commit/9e60187f531876811cb47834cf8bf3f2360c5e28
Waldek, for your convenience, I've attached the old and new spad file.
The main differences are:
I've changed the representation to include a few more counters and the
two thresholds for ordinary load (0.7) and "virtual" load (0.9) (i.e.
occupied+deleted entries).
The counter and rehashing in the search routine is gone. I now only need
one single search function "localSearch".
The search is basically the same as before, but from the return value
one can distinguish whether a free entry is VACANT or DELETED.
This enables me to keep track of the deleted entries.
Rehashing and growing the table is now solely done in setelt.
While I revised the code, I had the impression, that I could probably
have all those counters as SingleInteger instead of Integer (as they are
now). I refrained from changing it, since (a) the numbers in
arrayLengths are bigger than 64bit and (b) fixnum on a 32bit machine is
probably too small. Anyway, that's certainly a direction for improvement.
Waldek, if you are satisfied, I'll reorder and squash some of the
commits and then commit to trunk.
Ralf
PS: Maybe it's also a good idea to give the user a chance to create
tables with preset load factor and virtual load factor.
--
You received this message because you are subscribed to the Google Groups
"FriCAS - computer algebra system" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/fricas-devel?hl=en.
rep x ==> (x@%) pretend Rep
per x ==> (x@Rep) pretend %
N ==> NonNegativeInteger
Z ==> Integer
I ==> SingleInteger
)abbrev domain XHASHTBL XHashTable
++ Author: Ralf Hemmecke
++ Keywords: hash table
++ Description:
++ An implementation of a hash table that uses equality of the key domain
++ to decide upon equality of keys.
XHashTable(Key: SetCategory, Entry: Type):
Join(TableAggregate(Key, Entry), finiteAggregate, shallowlyMutable) with
table: (Key -> SingleInteger) -> %
++ table(hashfunction) creates an empty hash table that uses
++ hashfunction instead of hash$Key.
== add
Marker ==> None
toMarker mk ==> mk@Marker -- note that MKey==Marker==UMKE
VACANT : Marker := (HASHTABLEVACANT$Lisp) pretend Marker -- pos never used
DELETED: Marker := (HASHTABLEDELETED$Lisp) pretend Marker -- pos is deleted
vacant?(mk) ==> EQ(toMarker mk, VACANT)$Lisp
deleted?(mk) ==> EQ(toMarker mk, DELETED)$Lisp
key?(mk) ==> not (vacant? mk or deleted? mk)
MKey ==> None
UMKE ==> None
Buckets ==> PrimitiveArray UMKE
numOfBuckets(a) ==> shift(#a, -1)
toUMKE x ==> x pretend UMKE
toKey k ==> (k@UMKE) pretend Key
getMKey(a, i) ==> ((a.i)@UMKE) pretend MKey
setKey!(a, i, k) ==> (a.i := toUMKE k) pretend Key
getEntry(a, n, i) ==> a(n+i) pretend Entry
setEntry!(a, n, i, e) ==> (a(n+i) := toUMKE e) pretend Entry
setKeyEntry!(a, n, i, k, e) ==> (setKey!(a, i, k); setEntry!(a, n, i, e))
-- deleteKeyEntry!(a, n, i) ==> setKeyEntry!(a, n, i, DELETED, VACANT)
deleteKeyEntry!(a, n, i) ==> (a.i := toUMKE DELETED; a(n+i) := toUMKE
VACANT)
KE ==> Record(key: Key, entry: Entry)
UE ==> Union(Entry, "failed")
maxLoad n ==> n*7 quo 10 -- load factor
maxVirtualLoad n ==> n*9 quo 10 -- virtual load factor
Rep == Record(_
numOfEntries: Z,_
maxNumOfEntries: Z,_
numOfDeletedEntries: Z,_
maxNumOfVirtualEntries: Z,_
idx: Z,_
arr: Buckets,_
hashCode: Key -> I)
localSearch(a: Buckets, k: Key, h: Key -> I): Z ==
n: Z := numOfBuckets a
h1: Z := (h k)::Z
p: Z := positiveRemainder(h1, n) -- position in array
h2: Z := 1 + positiveRemainder(h1, n-2)
mk: MKey := getMKey(a, p)
deletedPosition?: Boolean := false
while not vacant? mk repeat
deleted? mk => (deletedPosition? := true; break)
k = toKey mk => return p -- key found
p := p + h2
if p>=n then p := p-n
mk := getMKey(a, p)
q := p -- first position of a free entry
-- We ignore DELETED entries when looking for the key.
while not vacant? mk repeat
not deleted? mk and k = toKey mk =>
setKeyEntry!(a, n, q, k, getEntry(a, n, p))
deleteKeyEntry!(a, n, p)
return q -- entry has been copied to previous DELETED position
p := p + h2
if p>=n then p := p-n
mk := getMKey(a, p)
if deletedPosition? then q := q-n
q-n -- KEY not found.
newArr(n: N): Buckets == new(2*n, toUMKE VACANT)
arrayLengths: PrimitiveArray N := [[_
7, 13, 31, 61, 109, 241, 463, 1021, 2029, 4093, 8089, 16363,_
32719, 65521, 131011, 262111, 524221, 1048573, 2097133,_
4193803, 8388451, 16777141, 33554011, 67108669, 134217439,_
268435009, 536870839, 1073741719, 2147482951, 4294965841,_
8589934291, 17179868809, 34359737299, 68719476391,_
137438953273, 274877906629, 549755813359, 1099511626399]]
rehashx!(x: %, ix: Z): % ==
m: N := arrayLengths ix
r: Rep := rep x
h: Key -> I := r.hashCode
a: Buckets := r.arr
n: Z := numOfBuckets a
c: Buckets := newArr m
for i in 0..n-1 | key?(mk: MKey := getMKey(a, i)) repeat
k: Key := toKey mk
-- Note that k is not in c, and there are no DELETED positions.
-- Thus, -m<=p<0.
p := m + localSearch(c, k, h)
setKeyEntry!(c, m, p, k, getEntry(a, n, i))
r.arr := c --destructively set new array
r.idx := ix
r.maxNumOfEntries := maxLoad m
r.numOfDeletedEntries := 0
r.maxNumOfVirtualEntries := maxVirtualLoad m
x
grow!(x: %): % == rehashx!(x, rep(x).idx + 1)
rehash!(x: %): % == rehashx!(x, rep(x).idx)
table(hashfunction: Key -> SingleInteger): % ==
n: N := arrayLengths 0
maxEntries: Z := maxLoad n
maxVirtualEntries: Z := maxVirtualLoad n
per [0, maxEntries, 0, maxVirtualEntries, 0, newArr n, hashfunction]
empty(): % ==
table(forceLazySlot((hash$Key)@(Key -> I))$Lisp pretend (Key->I))
inspect(x: %): KE ==
a := rep(x).arr
n: Z := numOfBuckets a
for i in 0..n - 1 | key?(mk: MKey := getMKey(a, i)) repeat
return [toKey mk, getEntry(a, n, i)]
error "table must be non-empty"
#(x: %): N == rep(x).numOfEntries :: N
search(k: Key, x: %): Union(Entry, "failed") ==
a: Buckets := rep(x).arr
h: Key -> I := rep(x).hashCode
p: Z := localSearch(a, k, h)
p<0 => "failed"
getEntry(a, numOfBuckets a, p)::UE
elt(x: %, k: Key): Entry ==
a: Buckets := rep(x).arr
h: Key -> I := rep(x).hashCode
p := localSearch(a, k, h)
p<0 => error "key not in table"
getEntry(a, numOfBuckets a, p)
elt(x: %, k: Key, e: Entry): Entry ==
a: Buckets := rep(x).arr
h: Key -> I := rep(x).hashCode
p := localSearch(a, k, h)
p<0 => e
getEntry(a, numOfBuckets a, p)
setelt(x: %, k: Key, e: Entry): Entry ==
if rep(x).numOfEntries >= rep(x).maxNumOfEntries then grow! x
a: Buckets := rep(x).arr
h: Key -> I := rep(x).hashCode
p := localSearch(a, k, h)
n: Z := numOfBuckets a
p>=0 => setEntry!(a, n, p, e)
r: Rep := rep x
r.numOfEntries := inc(r.numOfEntries)
p := n+p
p<0 => -- fill DELETED position
r.numOfDeletedEntries := dec(r.numOfDeletedEntries)
setKeyEntry!(a, n, n+p, k, e)
setKeyEntry!(a, n, p, k, e) -- fill VACANT position
if r.numOfEntries + r.numOfDeletedEntries > r.maxNumOfVirtualEntries
then
rehash! x
e
remove!(k: Key, x: %): Union(Entry, "failed") ==
a: Buckets := rep(x).arr
h: Key -> I := rep(x).hashCode
p := localSearch(a, k, h)
p<0 => "failed" -- key has not been found
n: Z := numOfBuckets a
e: Entry := getEntry(a, n, p) -- to be returned
deleteKeyEntry!(a, n, p)
rep(x).numOfEntries := dec(rep(x).numOfEntries)
rep(x).numOfDeletedEntries := inc(rep(x).numOfDeletedEntries)
e::UE
copy(x: %): % ==
r: Rep := rep x
per [r.numOfEntries, r.maxNumOfEntries,_
r.numOfDeletedEntries, r.maxNumOfVirtualEntries,_
r.idx, copy(r.arr), r.hashCode]
fill!(x: %, e: Entry): % ==
a := rep(x).arr
n: N := numOfBuckets a
for i in 0..n-1 | key? getMKey(a, i) repeat setEntry!(a, n, i, e)
x
map!(f: Entry -> Entry, x: %): % ==
a := rep(x).arr
n: N := numOfBuckets a
for i in 0..n-1 | key? getMKey(a, i) repeat
setEntry!(a, n, i, f getEntry(a, n, i))
x
keys(x: %): List Key ==
a := rep(x).arr
l: List Key := empty()
for i in 0..numOfBuckets a - 1 | key?(mk: MKey := getMKey(a, i)) repeat
l := cons(toKey mk, l)
l
parts(x: %): List Entry ==
a := rep(x).arr
n: N := numOfBuckets a
l: List Entry := empty()
for i in 0..n-1 | key? getMKey(a, i) repeat
l := cons(getEntry(a, n, i), l)
l
parts(x: %): List KE ==
a := rep(x).arr
n: N := numOfBuckets a
l: List KE := empty()
for i in 0..n-1 | key?(mk: MKey := getMKey(a, i)) repeat
l := cons([toKey mk, getEntry(a, n, i)], l)
l
removeDuplicates(x: %): % == x
if Entry has BasicType then
((x: %) = (y: %)): Boolean ==
#x ~= #y => false
xa := rep(x).arr; xn := numOfBuckets xa
ya := rep(y).arr; yn := numOfBuckets ya
h := rep(y).hashCode
for i in 0..xn - 1 | key?(mk: MKey := getMKey(xa, i)) repeat
p := localSearch(ya, toKey mk, h)
p < 0 => return false
getEntry(xa, xn, i) ~= getEntry(ya, yn, p) => return false
true
rep x ==> (x@%) pretend Rep
per x ==> (x@Rep) pretend %
N ==> NonNegativeInteger
Z ==> Integer
I ==> SingleInteger
)abbrev domain XHASHTBL XHashTable
++ Author: Ralf Hemmecke
++ Keywords: hash table
++ Description:
++ An implementation of a hash table that uses equality of the key domain
++ to decide upon equality of keys.
XHashTable(Key: SetCategory, Entry: Type):
Join(TableAggregate(Key, Entry), finiteAggregate, shallowlyMutable) with
table: (Key -> SingleInteger) -> %
++ table(hashfunction) creates an empty hash table that uses
++ hashfunction instead of hash$Key.
== add
Marker ==> None
toMarker mk ==> mk@Marker -- note that MKey==Marker==UMKE
VACANT : Marker := (HASHTABLE_-VACANT$Lisp) pretend Marker -- pos never
used
DELETED: Marker := (HASHTABLE_-DELETED$Lisp) pretend Marker -- pos is
deleted
vacant?(mk) ==> EQ(toMarker mk, VACANT)$Lisp
deleted?(mk) ==> EQ(toMarker mk, DELETED)$Lisp
key?(mk) ==> not (vacant? mk or deleted? mk)
MKey ==> None
UMKE ==> None
Buckets ==> PrimitiveArray UMKE
numOfBuckets(a) ==> shift(#a, -1)
toUMKE x ==> x pretend UMKE
toKey k ==> (k@UMKE) pretend Key
getMKey(a, i) ==> ((a.i)@UMKE) pretend MKey
setKey!(a, i, k) ==> (a.i := toUMKE k) pretend Key
getEntry(a, n, i) ==> a(n+i) pretend Entry
setEntry!(a, n, i, e) ==> (a(n+i) := toUMKE e) pretend Entry
setKeyEntry!(a, n, i, k, e) ==> (setKey!(a, i, k); setEntry!(a, n, i, e))
-- deleteKeyEntry!(a, n, i) ==> setKeyEntry!(a, n, i, DELETED, VACANT)
deleteKeyEntry!(a, n, i) ==> (a.i := toUMKE DELETED; a(n+i) := toUMKE
VACANT)
KE ==> Record(key: Key, entry: Entry)
UE ==> Union(Entry, "failed")
Rep == Record(numOfEntries: Z, idx: Z, arr: Buckets, hashCode: Key -> I)
localBucketSearch(a: Buckets, k: Key, h: Key -> I): Z ==
n: Z := numOfBuckets a
h1: Z := (h k)::Z
p: Z := positiveRemainder(h1, n) -- position in array
h2: Z := 1 + positiveRemainder(h1, n-2)
mk: MKey := getMKey(a, p)
i := n -- initialize with the maximal number of iterations
while key? mk repeat
k = toKey mk => return p -- key found
i := i-1
p := p + h2
if p>=n then p := p-n
mk := getMKey(a, p)
q := p -- first position of a free entry
-- We ignore DELETED entries when looking for the key.
while i>0 and not vacant? mk repeat
not deleted? mk and k = toKey mk =>
setKeyEntry!(a, n, q, k, getEntry(a, n, p))
deleteKeyEntry!(a, n, p)
return q -- entry has been copied to previous DELETED position
i := i-1
p := p + h2
if p>=n then p := p-n
mk := getMKey(a, p)
q-n -- KEY not found. Return a shifted version of the first free
position.
newArr(n: N): Buckets == new(2*n, toUMKE VACANT)
arrayLengths: PrimitiveArray N := [[_
7, 13, 31, 61, 109, 241, 463, 1021, 2029, 4093, 8089, 16363,_
32719, 65521, 131011, 262111, 524221, 1048573, 2097133,_
4193803, 8388451, 16777141, 33554011, 67108669, 134217439,_
268435009, 536870839, 1073741719, 2147482951, 4294965841,_
8589934291, 17179868809, 34359737299, 68719476391,_
137438953273, 274877906629, 549755813359, 1099511626399]]
rehashx!(x: %, ix: Z): % ==
m: N := arrayLengths ix
r: Rep := rep x
h: Key -> I := r.hashCode
a: Buckets := r.arr
n: Z := numOfBuckets a
c: Buckets := newArr m
for i in 0..n-1 | key?(mk: MKey := getMKey(a, i)) repeat
k: Key := toKey mk
-- note that k is not in c, so the search returns a negative value
p := m + localBucketSearch(c, k, h)
setKeyEntry!(c, m, p, k, getEntry(a, n, i))
r.arr := c --destructively set new array
r.idx := ix
x
grow!(x: %): % == rehashx!(x, rep(x).idx + 1)
rehash!(x: %): % == rehashx!(x, rep(x).idx)
localSearch!(x: %, k: Key): Z ==
a: Buckets := rep(x).arr
h: Key -> I := rep(x).hashCode
n: Z := numOfBuckets a
h1: Z := (h k)::Z
p: Z := positiveRemainder(h1, n) -- position in array
h2: Z := 1 + positiveRemainder(h1, n-2)
mk: MKey := getMKey(a, p)
i := n -- initialize with the maximal number of iterations
while key? mk repeat
k = toKey mk => return p -- key found
i := i-1
p := p + h2
if p>=n then p := p-n
mk := getMKey(a, p)
q := p -- first position of a free entry
-- We ignore DELETED entries when looking for the key.
while i>0 and not vacant? mk repeat
not deleted? mk and k = toKey mk =>
setKeyEntry!(a, n, q, k, getEntry(a, n, p))
deleteKeyEntry!(a, n, p)
return q -- entry has been copied to previous DELETED position
i := i-1
p := p + h2
if p>=n then p := p-n
mk := getMKey(a, p)
not zero? i => q-n -- KEY not found.
rehash! x -- internal buckets array will change
localBucketSearch(rep(x).arr, k, h) -- search a second in a rehashed
table
table(hashfunction: Key -> SingleInteger): % ==
per [0, 0, newArr arrayLengths 0, hashfunction]
empty(): % ==
table(forceLazySlot((hash$Key)@(Key -> I))$Lisp pretend (Key->I))
inspect(x: %): KE ==
a := rep(x).arr
n: Z := numOfBuckets a
for i in 0..n - 1 | key?(mk: MKey := getMKey(a, i)) repeat
return [toKey mk, getEntry(a, n, i)]
error "table must be non-empty"
#(x: %): N == rep(x).numOfEntries :: N
search(k: Key, x: %): Union(Entry, "failed") ==
p: Z := localSearch!(x, k)
p<0 => "failed"
a := rep(x).arr
getEntry(a, numOfBuckets a, p)::UE
elt(x: %, k: Key): Entry ==
p := localSearch!(x, k)
p<0 => error "key not in table"
a := rep(x).arr
getEntry(a, numOfBuckets a, p)
elt(x: %, k: Key, e: Entry): Entry ==
p := localSearch!(x, k)
p<0 => e
a := rep(x).arr
getEntry(a, numOfBuckets a, p)
setelt(x: %, k: Key, e: Entry): Entry ==
if 20*#x > 7*#(rep(x).arr) then grow! x
p := localSearch!(x, k)
a := rep(x).arr
n: Z := numOfBuckets a
p>=0 => setEntry!(a, n, p, e)
rep(x).numOfEntries := inc(rep(x).numOfEntries)
p := n+p
setKeyEntry!(a, n, p, k, e)
remove!(k: Key, x: %): Union(Entry, "failed") ==
p := localSearch!(x, k)
p<0 => "failed" -- key has not been found
a := rep(x).arr
n: Z := numOfBuckets a
e: Entry := getEntry(a, n, p) -- to be returned
deleteKeyEntry!(a, n, p)
rep(x).numOfEntries := dec(rep(x).numOfEntries)
e::UE
copy(x: %): % == per [rep(x).numOfEntries, rep(x).idx,_
copy(rep(x).arr), rep(x).hashCode]
fill!(x: %, e: Entry): % ==
a := rep(x).arr
n: N := numOfBuckets a
for i in 0..n-1 | key? getMKey(a, i) repeat setEntry!(a, n, i, e)
x
map!(f: Entry -> Entry, x: %): % ==
a := rep(x).arr
n: N := numOfBuckets a
for i in 0..n-1 | key? getMKey(a, i) repeat
setEntry!(a, n, i, f getEntry(a, n, i))
x
keys(x: %): List Key ==
a := rep(x).arr
l: List Key := empty()
for i in 0..numOfBuckets a - 1 | key?(mk: MKey := getMKey(a, i)) repeat
l := cons(toKey mk, l)
l
parts(x: %): List Entry ==
a := rep(x).arr
n: N := numOfBuckets a
l: List Entry := empty()
for i in 0..n-1 | key? getMKey(a, i) repeat
l := cons(getEntry(a, n, i), l)
l
parts(x: %): List KE ==
a := rep(x).arr
n: N := numOfBuckets a
l: List KE := empty()
for i in 0..n-1 | key?(mk: MKey := getMKey(a, i)) repeat
l := cons([toKey mk, getEntry(a, n, i)], l)
l
removeDuplicates(x: %): % == x
if Entry has BasicType then
((x: %) = (y: %)): Boolean ==
#x ~= #y => false
xa := rep(x).arr; xn := numOfBuckets xa
ya := rep(y).arr; yn := numOfBuckets ya
h := rep(y).hashCode
for i in 0..xn - 1 | key?(mk: MKey := getMKey(xa, i)) repeat
p := localBucketSearch(ya, toKey mk, h)
p < 0 => return false
getEntry(xa, xn, i) ~= getEntry(ya, yn, p) => return false
true