I'm not checking this in yet, I'd like some comments first. This changes IdentityHashMap to use linear probing, aka PR 28987. This ought to be more cache friendly. Also it lets us handle remove() more efficiently, and removes the need for a tombstone object.
It also incorporates Martin's idea of using a special value to represent a null key or value, and using 'null' to represent an empty slot. This speeds certain operations; see Martin's original message: http://developer.classpath.org/pipermail/classpath-patches/2006-April/001430.html This passes the IdentityHashMap test cases in Mauve. Comments? Tom 2006-09-28 Tom Tromey <[EMAIL PROTECTED]> PR classpath/28987: * java/util/IdentityHashMap.java (tombstone): Removed. (emptyslot): Removed. (nullslot): New field. (IdentityHashMap): Don't fill array. (clear): Fill with null. (hash): Now final. Use linear probing. (xform): New method. (unxform): Likewise. (removeAtIndex): Likewise. (clone, containsKey, containsValue, entrySet, get, hashCode, keySet, put, remove, values): Updated. (IdentityIterator, IdentityEntry): Likewise. (writeObject): Likewise. Index: IdentityHashMap.java =================================================================== RCS file: /cvsroot/classpath/classpath/java/util/IdentityHashMap.java,v retrieving revision 1.19 diff -u -r1.19 IdentityHashMap.java --- IdentityHashMap.java 5 Apr 2006 18:32:29 -0000 1.19 +++ IdentityHashMap.java 29 Sep 2006 01:19:45 -0000 @@ -97,16 +97,13 @@ private static final int DEFAULT_CAPACITY = 21; /** - * This object is used to mark deleted items. Package visible for use by - * nested classes. + * This object is used to mark a slot whose key or value is 'null'. + * This is more efficient than using a special value to mark an empty + * slot, because null entries are rare, empty slots are common, and + * the JVM will clear new arrays for us. + * Package visible for use by nested classes. */ - static final Object tombstone = new Object(); - - /** - * This object is used to mark empty slots. We need this because - * using null is ambiguous. Package visible for use by nested classes. - */ - static final Object emptyslot = new Object(); + static final Object nullslot = new Object(); /** * Compatible with JDK 1.4. @@ -166,7 +163,6 @@ if (max < 2) max = 2; table = new Object[max << 1]; - Arrays.fill(table, emptyslot); threshold = (max >> 2) * 3; } @@ -191,7 +187,7 @@ if (size != 0) { modCount++; - Arrays.fill(table, emptyslot); + Arrays.fill(table, null); size = 0; } } @@ -227,6 +223,7 @@ */ public boolean containsKey(Object key) { + key = xform(key); return key == table[hash(key)]; } @@ -241,6 +238,7 @@ */ public boolean containsValue(Object value) { + value = xform(value); for (int i = table.length - 1; i > 0; i -= 2) if (table[i] == value) return true; @@ -299,7 +297,9 @@ if (! (o instanceof Map.Entry)) return false; Map.Entry m = (Map.Entry) o; - return m.getValue() == table[hash(m.getKey()) + 1]; + Object value = xform(m.getValue()); + Object key = xform(m.getKey()); + return value == table[hash(key) + 1]; } public int hashCode() @@ -311,14 +311,13 @@ { if (! (o instanceof Map.Entry)) return false; - Object key = ((Map.Entry) o).getKey(); + Object key = xform(((Map.Entry) o).getKey()); int h = hash(key); if (table[h] == key) { size--; modCount++; - table[h] = tombstone; - table[h + 1] = tombstone; + IdentityHashMap.this.removeAtIndex(h); return true; } return false; @@ -360,8 +359,9 @@ */ public Object get(Object key) { + key = xform(key); int h = hash(key); - return table[h] == key ? table[h + 1] : null; + return table[h] == key ? unxform(table[h + 1]) : null; } /** @@ -378,10 +378,11 @@ for (int i = table.length - 2; i >= 0; i -= 2) { Object key = table[i]; - if (key == emptyslot || key == tombstone) + if (key == null) continue; - hash += (System.identityHashCode(key) - ^ System.identityHashCode(table[i + 1])); + // FIXME: this is a lame computation. + hash += (System.identityHashCode(unxform(key)) + ^ System.identityHashCode(unxform(table[i + 1]))); } return hash; } @@ -445,23 +446,22 @@ for (int i = table.length - 2; i >= 0; i -= 2) { Object key = table[i]; - if (key == emptyslot || key == tombstone) + if (key == null) continue; - hash += System.identityHashCode(key); + hash += System.identityHashCode(unxform(key)); } return hash; - } public boolean remove(Object o) { + o = xform(o); int h = hash(o); if (table[h] == o) { size--; modCount++; - table[h] = tombstone; - table[h + 1] = tombstone; + removeAtIndex(h); return true; } return false; @@ -486,6 +486,18 @@ */ public Object put(Object key, Object value) { + key = xform(key); + value = xform(value); + + // We don't want to rehash if we're overwriting an existing slot. + int h = hash(key); + if (table[h] == key) + { + Object r = unxform(table[h + 1]); + table[h + 1] = value; + return r; + } + // Rehash if the load factor is too high. if (size > threshold) { @@ -493,25 +505,25 @@ // This isn't necessarily prime, but it is an odd number of key/value // slots, which has a higher probability of fewer collisions. table = new Object[(old.length * 2) + 2]; - Arrays.fill(table, emptyslot); size = 0; threshold = (table.length >>> 3) * 3; for (int i = old.length - 2; i >= 0; i -= 2) { Object oldkey = old[i]; - if (oldkey != tombstone && oldkey != emptyslot) - // Just use put. This isn't very efficient, but it is ok. - put(oldkey, old[i + 1]); + if (oldkey != null) + { + h = hash(oldkey); + table[h] = oldkey; + table[h + 1] = old[i + 1]; + ++size; + // No need to update modCount here, we'll do it + // just after the loop. + } } - } - int h = hash(key); - if (table[h] == key) - { - Object r = table[h + 1]; - table[h + 1] = value; - return r; + // Now that we've resize, recompute the hash value. + h = hash(key); } // At this point, we add a new mapping. @@ -536,6 +548,40 @@ } /** + * Remove the element at index and update the table to compensate. + * This is package-private for use by inner classes. + * @param i index of the removed element + */ + final void removeAtIndex(int i) + { + // This is Algorithm R from Knuth, section 64. + // Variable names are taken directly from the text. + while (true) + { + table[i] = null; + table[i + 1] = null; + int j = i; + int r; + do + { + i -= 2; + if (i < 0) + i = table.length - 2; + Object key = table[i]; + if (key == null) + return; + r = Math.abs(System.identityHashCode(key) + % (table.length >> 1)) << 1; + } + while ((i <= r && r < j) + || (r < j && j < i) + || (j < i && i <= r)); + table[j] = table[i]; + table[j + 1] = table[i + 1]; + } + } + + /** * Removes from the HashMap and returns the value which is mapped by * the supplied key. If the key maps to nothing, then the HashMap * remains unchanged, and <code>null</code> is returned. @@ -551,14 +597,14 @@ */ public Object remove(Object key) { + key = xform(key); int h = hash(key); if (table[h] == key) { modCount++; size--; - Object r = table[h + 1]; - table[h] = tombstone; - table[h + 1] = tombstone; + Object r = unxform(table[h + 1]); + removeAtIndex(h); return r; } return null; @@ -613,13 +659,14 @@ public boolean remove(Object o) { + o = xform(o); + // This approach may look strange, but it is ok. for (int i = table.length - 1; i > 0; i -= 2) if (table[i] == o) { modCount++; - table[i - 1] = tombstone; - table[i] = tombstone; size--; + IdentityHashMap.this.removeAtIndex(i - 1); return true; } return false; @@ -629,8 +676,31 @@ } /** + * Transform a reference from its external form to its internal form. + * This is package-private for use by inner classes. + */ + final Object xform(Object o) + { + if (o == null) + o = nullslot; + return o; + } + + /** + * Transform a reference from its internal form to its external form. + * This is package-private for use by inner classes. + */ + final Object unxform(Object o) + { + if (o == nullslot) + o = null; + return o; + } + + /** * Helper method which computes the hash code, then traverses the table - * until it finds the key, or the spot where the key would go. + * until it finds the key, or the spot where the key would go. the key + * must already be in its internal form. * * @param key the key to check * @return the index where the key belongs @@ -638,36 +708,23 @@ * @see #put(Object, Object) */ // Package visible for use by nested classes. - int hash(Object key) + final int hash(Object key) { - // Implementation note: it is feasible for the table to have no - // emptyslots, if it is full with entries and tombstones, so we must - // remember where we started. If we encounter the key or an emptyslot, - // we are done. If we encounter a tombstone, the key may still be in - // the array. If we don't encounter the key, we use the first emptyslot - // or tombstone we encountered as the location where the key would go. - // By requiring at least 2 key/value slots, and rehashing at 75% - // capacity, we guarantee that there will always be either an emptyslot - // or a tombstone somewhere in the table. int h = Math.abs(System.identityHashCode(key) % (table.length >> 1)) << 1; - int del = -1; - int save = h; - do + while (true) { - if (table[h] == key) + // By requiring at least 2 key/value slots, and rehashing at 75% + // capacity, we guarantee that there will always be either an empty + // slot somewhere in the table. + if (table[h] == key || table[h] == null) return h; - if (table[h] == emptyslot) - break; - if (table[h] == tombstone && del < 0) - del = h; + // We use linear probing as it is friendlier to the cache and + // it lets us efficiently remove entries. h -= 2; if (h < 0) h = table.length - 2; } - while (h != save); - - return del < 0 ? h : del; } /** @@ -731,10 +788,11 @@ loc -= 2; key = table[loc]; } - while (key == emptyslot || key == tombstone); + while (key == null); - return type == KEYS ? key : (type == VALUES ? table[loc + 1] - : new IdentityEntry(loc)); + return type == KEYS ? unxform(key) + : (type == VALUES ? unxform(table[loc + 1]) + : new IdentityEntry(loc)); } /** @@ -748,12 +806,11 @@ { if (knownMod != modCount) throw new ConcurrentModificationException(); - if (loc == table.length || table[loc] == tombstone) + if (loc == table.length) throw new IllegalStateException(); modCount++; size--; - table[loc] = tombstone; - table[loc + 1] = tombstone; + removeAtIndex(loc); knownMod++; } } // class IdentityIterator @@ -797,12 +854,13 @@ */ public boolean equals(Object o) { - if (knownMod != modCount || table[loc] == tombstone) + if (knownMod != modCount) throw new ConcurrentModificationException(); if (! (o instanceof Map.Entry)) return false; Map.Entry e = (Map.Entry) o; - return table[loc] == e.getKey() && table[loc + 1] == e.getValue(); + return table[loc] == xform(e.getKey()) + && table[loc + 1] == xform(e.getValue()); } /** @@ -814,9 +872,9 @@ */ public Object getKey() { - if (knownMod != modCount || table[loc] == tombstone) + if (knownMod != modCount) throw new ConcurrentModificationException(); - return table[loc]; + return unxform(table[loc]); } /** @@ -828,9 +886,9 @@ */ public Object getValue() { - if (knownMod != modCount || table[loc] == tombstone) + if (knownMod != modCount) throw new ConcurrentModificationException(); - return table[loc + 1]; + return unxform(table[loc + 1]); } /** @@ -844,10 +902,10 @@ */ public int hashCode() { - if (knownMod != modCount || table[loc] == tombstone) + if (knownMod != modCount) throw new ConcurrentModificationException(); - return (System.identityHashCode(table[loc]) - ^ System.identityHashCode(table[loc + 1])); + return (System.identityHashCode(unxform(table[loc])) + ^ System.identityHashCode(unxform(table[loc + 1]))); } /** @@ -860,10 +918,10 @@ */ public Object setValue(Object value) { - if (knownMod != modCount || table[loc] == tombstone) + if (knownMod != modCount) throw new ConcurrentModificationException(); - Object r = table[loc + 1]; - table[loc + 1] = value; + Object r = unxform(table[loc + 1]); + table[loc + 1] = xform(value); return r; } @@ -877,9 +935,9 @@ */ public String toString() { - if (knownMod != modCount || table[loc] == tombstone) + if (knownMod != modCount) throw new ConcurrentModificationException(); - return table[loc] + "=" + table[loc + 1]; + return unxform(table[loc]) + "=" + unxform(table[loc + 1]); } } // class IdentityEntry @@ -922,10 +980,10 @@ for (int i = table.length - 2; i >= 0; i -= 2) { Object key = table[i]; - if (key != tombstone && key != emptyslot) + if (key != null) { - s.writeObject(key); - s.writeObject(table[i + 1]); + s.writeObject(unxform(key)); + s.writeObject(unxform(table[i + 1])); } } }
