http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad15923a/core/src/main/java/hivemall/utils/collections/maps/Long2DoubleOpenHashTable.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/maps/Long2DoubleOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/maps/Long2DoubleOpenHashTable.java index 115571e..84eac5f 100644 --- a/core/src/main/java/hivemall/utils/collections/maps/Long2DoubleOpenHashTable.java +++ b/core/src/main/java/hivemall/utils/collections/maps/Long2DoubleOpenHashTable.java @@ -26,6 +26,8 @@ import java.io.ObjectInput; import java.io.ObjectOutput; import java.util.Arrays; +import javax.annotation.Nonnull; + /** * An open-addressing hash table using double hashing. * @@ -45,12 +47,24 @@ public final class Long2DoubleOpenHashTable implements Externalizable { private static final float DEFAULT_LOAD_FACTOR = 0.75f; private static final float DEFAULT_GROW_FACTOR = 2.0f; - protected final transient float _loadFactor; - protected final transient float _growFactor; + private static final float SHRINK_FACTOR = 0.1f; // at least 10% of table must be FREE + private static final float GROW_FACTOR_AT_SHRINK = 1.7f; + + protected/* final */float _loadFactor; + protected/* final */float _growFactor; + + protected int _used; + protected int _freeEntries; + + /** Used entry threshold to grow table */ + protected int _growThreshold; + /** + * Free entry threshold to shrink table. Shrink threshold will be set in the first expansion to + * avoid shrink at very early remove(). + */ + protected int _shrinkThreshold; - protected int _used = 0; - protected int _threshold; - protected double defaultReturnValue = 0.d; + protected double _defaultReturnValue = 0.d; protected long[] _keys; protected double[] _values; @@ -67,7 +81,10 @@ public final class Long2DoubleOpenHashTable implements Externalizable { this._keys = new long[actualSize]; this._values = new double[actualSize]; this._states = new byte[actualSize]; - this._threshold = (int) (actualSize * _loadFactor); + this._used = 0; + this._freeEntries = actualSize; + this._growThreshold = Math.round(actualSize * _loadFactor); + this._shrinkThreshold = Math.round(actualSize * SHRINK_FACTOR); } public Long2DoubleOpenHashTable(int size, int loadFactor, int growFactor) { @@ -87,7 +104,7 @@ public final class Long2DoubleOpenHashTable implements Externalizable { } public void defaultReturnValue(double v) { - this.defaultReturnValue = v; + this._defaultReturnValue = v; } public boolean containsKey(final long key) { @@ -98,12 +115,12 @@ public final class Long2DoubleOpenHashTable implements Externalizable { * @return defaultReturnValue if not found */ public double get(final long key) { - return get(key, defaultReturnValue); + return get(key, _defaultReturnValue); } public double get(final long key, final double defaultValue) { final int i = _findKey(key); - if (i < 0) { + if (i == -1) { return defaultValue; } return _values[i]; @@ -111,7 +128,7 @@ public final class Long2DoubleOpenHashTable implements Externalizable { public double _get(final int index) { if (index < 0) { - return defaultReturnValue; + return _defaultReturnValue; } return _values[index]; } @@ -129,7 +146,7 @@ public final class Long2DoubleOpenHashTable implements Externalizable { } public double put(final long key, final double value) { - return put(key, value, defaultReturnValue); + return put(key, value, _defaultReturnValue); } public double put(final long key, final double value, final double defaultValue) { @@ -147,26 +164,39 @@ public final class Long2DoubleOpenHashTable implements Externalizable { final double[] values = _values; final byte[] states = _states; - if (states[keyIdx] == FULL) {// double hashing + byte state = states[keyIdx]; + if (state == FULL) {// double hashing if (keys[keyIdx] == key) { double old = values[keyIdx]; values[keyIdx] = value; return old; } // try second hash - int decr = 1 + (hash % (keyLength - 2)); + final int loopIndex = keyIdx; + final int decr = 1 + (hash % (keyLength - 2)); for (;;) { keyIdx -= decr; if (keyIdx < 0) { keyIdx += keyLength; } - if (isFree(keyIdx, key)) { + if (keyIdx == loopIndex) { + throw new IllegalStateException("Detected infinite loop where key=" + key + + ", keyIdx=" + keyIdx); + } + + state = states[keyIdx]; + if (state == FREE) { break; } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - double old = values[keyIdx]; - values[keyIdx] = value; - return old; + if (keys[keyIdx] == key) { + if (state == FULL) { + double old = values[keyIdx]; + values[keyIdx] = value; + return old; + } else { + assert (state == REMOVED); + break; + } } } } @@ -174,24 +204,21 @@ public final class Long2DoubleOpenHashTable implements Externalizable { values[keyIdx] = value; states[keyIdx] = FULL; ++_used; - return defaultValue; - } - /** Return weather the required slot is free for new entry */ - protected boolean isFree(final int index, final long key) { - final byte stat = _states[index]; - if (stat == FREE) { - return true; - } - if (stat == REMOVED && _keys[index] == key) { - return true; + if (state == FREE) { + _freeEntries--; + if (_freeEntries < _shrinkThreshold) { + int newCapacity = Math.max(keys.length, Math.round(_used * GROW_FACTOR_AT_SHRINK)); + ensureCapacity(newCapacity); + } } - return false; + + return defaultValue; } /** @return expanded or not */ protected boolean preAddEntry(final int index) { - if ((_used + 1) >= _threshold) {// too filled + if ((_used + 1) >= _growThreshold) {// too filled int newCapacity = Math.round(_keys.length * _growFactor); ensureCapacity(newCapacity); return true; @@ -207,64 +234,44 @@ public final class Long2DoubleOpenHashTable implements Externalizable { final byte[] states = _states; final int keyLength = keys.length; + // double hashing final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; + final int decr = 1 + (hash % (keyLength - 2)); + final int startIndex = hash % keyLength; + for (int keyIdx = startIndex;;) { + final byte state = states[keyIdx]; + if (state == FREE) { + return -1; } - // try second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - return -1; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { + if (keys[keyIdx] == key) { + if (state == FULL) { return keyIdx; + } else { + assert (state == REMOVED); + return -1; } } + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (keyIdx == startIndex) { + throw new IllegalStateException("Detected infinite loop where key=" + key + + ", keyIdx=" + keyIdx); + } } - return -1; } public double remove(final long key) { - final long[] keys = _keys; - final double[] values = _values; - final byte[] states = _states; - final int keyLength = keys.length; - - final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - double old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - // second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (states[keyIdx] == FREE) { - return defaultReturnValue; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - double old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - } + final int keyIdx = _findKey(key); + if (keyIdx == -1) { + return _defaultReturnValue; } - return defaultReturnValue; + + double old = _values[keyIdx]; + _states[keyIdx] = REMOVED; + --_used; + return old; } public int size() { @@ -274,6 +281,7 @@ public final class Long2DoubleOpenHashTable implements Externalizable { public void clear() { Arrays.fill(_states, FREE); this._used = 0; + this._freeEntries = _states.length; } public IMapIterator entries() { @@ -301,95 +309,58 @@ public final class Long2DoubleOpenHashTable implements Externalizable { protected void ensureCapacity(final int newCapacity) { int prime = Primes.findLeastPrimeNumber(newCapacity); rehash(prime); - this._threshold = Math.round(prime * _loadFactor); } private void rehash(final int newCapacity) { - int oldCapacity = _keys.length; - if (newCapacity <= oldCapacity) { - throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); - } + final long[] oldKeys = _keys; + final double[] oldValues = _values; + final byte[] oldStates = _states; + final int oldCapacity = _keys.length; + final long[] newkeys = new long[newCapacity]; final double[] newValues = new double[newCapacity]; final byte[] newStates = new byte[newCapacity]; int used = 0; for (int i = 0; i < oldCapacity; i++) { - if (_states[i] == FULL) { - used++; - long k = _keys[i]; - double v = _values[i]; - int hash = keyHash(k); - int keyIdx = hash % newCapacity; - if (newStates[keyIdx] == FULL) {// second hashing - int decr = 1 + (hash % (newCapacity - 2)); - while (newStates[keyIdx] != FREE) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += newCapacity; - } + if (oldStates[i] != FULL) { + continue; + } + final long k = oldKeys[i]; + final double v = oldValues[i]; + final int hash = keyHash(k); + int keyIdx = hash % newCapacity; + if (newStates[keyIdx] == FULL) {// second hashing + final int decr = 1 + (hash % (newCapacity - 2)); + final int loopIndex = keyIdx; + do { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += newCapacity; } - } - newkeys[keyIdx] = k; - newValues[keyIdx] = v; - newStates[keyIdx] = FULL; + if (keyIdx == loopIndex) { + throw new IllegalStateException("Detected infinite loop where key=" + k + + ", keyIdx=" + keyIdx); + } + } while (newStates[keyIdx] != FREE); } + newkeys[keyIdx] = k; + newValues[keyIdx] = v; + newStates[keyIdx] = FULL; + used++; } this._keys = newkeys; this._values = newValues; this._states = newStates; this._used = used; + this._freeEntries = newCapacity - used; + this._growThreshold = Math.round(newCapacity * _loadFactor); + this._shrinkThreshold = Math.round(newCapacity * SHRINK_FACTOR); } private static int keyHash(final long key) { return (int) (key ^ (key >>> 32)) & 0x7FFFFFFF; } - public void writeExternal(ObjectOutput out) throws IOException { - out.writeInt(_threshold); - out.writeInt(_used); - - out.writeInt(_keys.length); - IMapIterator i = entries(); - while (i.next() != -1) { - out.writeLong(i.getKey()); - out.writeDouble(i.getValue()); - } - } - - public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { - this._threshold = in.readInt(); - this._used = in.readInt(); - - final int keylen = in.readInt(); - final long[] keys = new long[keylen]; - final double[] values = new double[keylen]; - final byte[] states = new byte[keylen]; - for (int i = 0; i < _used; i++) { - long k = in.readLong(); - double v = in.readDouble(); - int hash = keyHash(k); - int keyIdx = hash % keylen; - if (states[keyIdx] != FREE) {// second hash - int decr = 1 + (hash % (keylen - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keylen; - } - if (states[keyIdx] == FREE) { - break; - } - } - } - states[keyIdx] = FULL; - keys[keyIdx] = k; - values[keyIdx] = v; - } - this._keys = keys; - this._values = values; - this._states = states; - } - public interface IMapIterator { public boolean hasNext(); @@ -450,4 +421,62 @@ public final class Long2DoubleOpenHashTable implements Externalizable { return _values[lastEntry]; } } + + @Override + public void writeExternal(@Nonnull ObjectOutput out) throws IOException { + out.writeFloat(_loadFactor); + out.writeFloat(_growFactor); + out.writeInt(_used); + out.writeDouble(_defaultReturnValue); + + final IMapIterator i = entries(); + while (i.next() != -1) { + out.writeLong(i.getKey()); + out.writeDouble(i.getValue()); + } + } + + @Override + public void readExternal(@Nonnull ObjectInput in) throws IOException, ClassNotFoundException { + this._loadFactor = in.readFloat(); + this._growFactor = in.readFloat(); + final int used = in.readInt(); + this._defaultReturnValue = in.readDouble(); + + final int newCapacity = Primes.findLeastPrimeNumber(Math.round(used * 1.7f)); + final long[] keys = new long[newCapacity]; + final double[] values = new double[newCapacity]; + final byte[] states = new byte[newCapacity]; + for (int i = 0; i < used; i++) { + final long k = in.readLong(); + final double v = in.readDouble(); + final int hash = keyHash(k); + int keyIdx = hash % newCapacity; + if (states[keyIdx] != FREE) {// second hashing + final int decr = 1 + (hash % (newCapacity - 2)); + final int loopIndex = keyIdx; + do { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += newCapacity; + } + if (keyIdx == loopIndex) { + throw new IllegalStateException("Detected infinite loop where key=" + k + + ", keyIdx=" + keyIdx); + } + } while (states[keyIdx] != FREE); + } + keys[keyIdx] = k; + values[keyIdx] = v; + states[keyIdx] = FULL; + } + this._keys = keys; + this._values = values; + this._states = states; + this._used = used; + this._freeEntries = newCapacity - used; + this._growThreshold = Math.round(newCapacity * _loadFactor); + this._shrinkThreshold = Math.round(newCapacity * SHRINK_FACTOR); + } + }
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad15923a/core/src/main/java/hivemall/utils/collections/maps/Long2FloatOpenHashTable.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/maps/Long2FloatOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/maps/Long2FloatOpenHashTable.java index ba2de76..ca8c4ee 100644 --- a/core/src/main/java/hivemall/utils/collections/maps/Long2FloatOpenHashTable.java +++ b/core/src/main/java/hivemall/utils/collections/maps/Long2FloatOpenHashTable.java @@ -45,12 +45,24 @@ public final class Long2FloatOpenHashTable implements Externalizable { private static final float DEFAULT_LOAD_FACTOR = 0.75f; private static final float DEFAULT_GROW_FACTOR = 2.0f; - protected final transient float _loadFactor; - protected final transient float _growFactor; + private static final float SHRINK_FACTOR = 0.1f; // at least 10% of table must be FREE + private static final float GROW_FACTOR_AT_SHRINK = 1.7f; - protected int _used = 0; - protected int _threshold; - protected float defaultReturnValue = 0.f; + protected/* final */float _loadFactor; + protected/* final */float _growFactor; + + protected int _used; + protected int _freeEntries; + + /** Used entry threshold to grow table */ + protected int _growThreshold; + /** + * Free entry threshold to shrink table. Shrink threshold will be set in the first expansion to + * avoid shrink at very early remove(). + */ + protected int _shrinkThreshold; + + protected float _defaultReturnValue = 0.f; protected long[] _keys; protected float[] _values; @@ -67,7 +79,10 @@ public final class Long2FloatOpenHashTable implements Externalizable { this._keys = new long[actualSize]; this._values = new float[actualSize]; this._states = new byte[actualSize]; - this._threshold = (int) (actualSize * _loadFactor); + this._used = 0; + this._freeEntries = actualSize; + this._growThreshold = Math.round(actualSize * _loadFactor); + this._shrinkThreshold = Math.round(actualSize * SHRINK_FACTOR); } public Long2FloatOpenHashTable(int size, int loadFactor, int growFactor) { @@ -87,7 +102,7 @@ public final class Long2FloatOpenHashTable implements Externalizable { } public void defaultReturnValue(float v) { - this.defaultReturnValue = v; + this._defaultReturnValue = v; } public boolean containsKey(final long key) { @@ -98,12 +113,12 @@ public final class Long2FloatOpenHashTable implements Externalizable { * @return defaultReturnValue if not found */ public float get(final long key) { - return get(key, defaultReturnValue); + return get(key, _defaultReturnValue); } public float get(final long key, final float defaultValue) { final int i = _findKey(key); - if (i < 0) { + if (i == -1) { return defaultValue; } return _values[i]; @@ -111,7 +126,7 @@ public final class Long2FloatOpenHashTable implements Externalizable { public float _get(final int index) { if (index < 0) { - return defaultReturnValue; + return _defaultReturnValue; } return _values[index]; } @@ -129,7 +144,7 @@ public final class Long2FloatOpenHashTable implements Externalizable { } public float put(final long key, final float value) { - return put(key, value, defaultReturnValue); + return put(key, value, _defaultReturnValue); } public float put(final long key, final float value, final float defaultValue) { @@ -147,26 +162,39 @@ public final class Long2FloatOpenHashTable implements Externalizable { final float[] values = _values; final byte[] states = _states; - if (states[keyIdx] == FULL) {// float hashing + byte state = states[keyIdx]; + if (state == FULL) {// double hashing if (keys[keyIdx] == key) { float old = values[keyIdx]; values[keyIdx] = value; return old; } // try second hash - int decr = 1 + (hash % (keyLength - 2)); + final int loopIndex = keyIdx; + final int decr = 1 + (hash % (keyLength - 2)); for (;;) { keyIdx -= decr; if (keyIdx < 0) { keyIdx += keyLength; } - if (isFree(keyIdx, key)) { + if (keyIdx == loopIndex) { + throw new IllegalStateException("Detected infinite loop where key=" + key + + ", keyIdx=" + keyIdx); + } + + state = states[keyIdx]; + if (state == FREE) { break; } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - float old = values[keyIdx]; - values[keyIdx] = value; - return old; + if (keys[keyIdx] == key) { + if (state == FULL) { + float old = values[keyIdx]; + values[keyIdx] = value; + return old; + } else { + assert (state == REMOVED); + break; + } } } } @@ -174,24 +202,21 @@ public final class Long2FloatOpenHashTable implements Externalizable { values[keyIdx] = value; states[keyIdx] = FULL; ++_used; - return defaultValue; - } - /** Return weather the required slot is free for new entry */ - protected boolean isFree(final int index, final long key) { - final byte stat = _states[index]; - if (stat == FREE) { - return true; - } - if (stat == REMOVED && _keys[index] == key) { - return true; + if (state == FREE) { + _freeEntries--; + if (_freeEntries < _shrinkThreshold) { + int newCapacity = Math.max(keys.length, Math.round(_used * GROW_FACTOR_AT_SHRINK)); + ensureCapacity(newCapacity); + } } - return false; + + return defaultValue; } /** @return expanded or not */ protected boolean preAddEntry(final int index) { - if ((_used + 1) >= _threshold) {// too filled + if ((_used + 1) >= _growThreshold) {// too filled int newCapacity = Math.round(_keys.length * _growFactor); ensureCapacity(newCapacity); return true; @@ -207,64 +232,44 @@ public final class Long2FloatOpenHashTable implements Externalizable { final byte[] states = _states; final int keyLength = keys.length; + // double hashing final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; + final int decr = 1 + (hash % (keyLength - 2)); + final int startIndex = hash % keyLength; + for (int keyIdx = startIndex;;) { + final byte state = states[keyIdx]; + if (state == FREE) { + return -1; } - // try second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - return -1; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { + if (keys[keyIdx] == key) { + if (state == FULL) { return keyIdx; + } else { + assert (state == REMOVED); + return -1; } } + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (keyIdx == startIndex) { + throw new IllegalStateException("Detected infinite loop where key=" + key + + ", keyIdx=" + keyIdx); + } } - return -1; } public float remove(final long key) { - final long[] keys = _keys; - final float[] values = _values; - final byte[] states = _states; - final int keyLength = keys.length; - - final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - float old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - // second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (states[keyIdx] == FREE) { - return defaultReturnValue; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - float old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - } + final int keyIdx = _findKey(key); + if (keyIdx == -1) { + return _defaultReturnValue; } - return defaultReturnValue; + + float old = _values[keyIdx]; + _states[keyIdx] = REMOVED; + --_used; + return old; } public int size() { @@ -274,6 +279,7 @@ public final class Long2FloatOpenHashTable implements Externalizable { public void clear() { Arrays.fill(_states, FREE); this._used = 0; + this._freeEntries = _states.length; } public IMapIterator entries() { @@ -301,95 +307,58 @@ public final class Long2FloatOpenHashTable implements Externalizable { protected void ensureCapacity(final int newCapacity) { int prime = Primes.findLeastPrimeNumber(newCapacity); rehash(prime); - this._threshold = Math.round(prime * _loadFactor); } private void rehash(final int newCapacity) { - int oldCapacity = _keys.length; - if (newCapacity <= oldCapacity) { - throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); - } + final long[] oldKeys = _keys; + final float[] oldValues = _values; + final byte[] oldStates = _states; + final int oldCapacity = _keys.length; + final long[] newkeys = new long[newCapacity]; final float[] newValues = new float[newCapacity]; final byte[] newStates = new byte[newCapacity]; int used = 0; for (int i = 0; i < oldCapacity; i++) { - if (_states[i] == FULL) { - used++; - long k = _keys[i]; - float v = _values[i]; - int hash = keyHash(k); - int keyIdx = hash % newCapacity; - if (newStates[keyIdx] == FULL) {// second hashing - int decr = 1 + (hash % (newCapacity - 2)); - while (newStates[keyIdx] != FREE) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += newCapacity; - } + if (oldStates[i] != FULL) { + continue; + } + final long k = oldKeys[i]; + final float v = oldValues[i]; + final int hash = keyHash(k); + int keyIdx = hash % newCapacity; + if (newStates[keyIdx] == FULL) {// second hashing + final int decr = 1 + (hash % (newCapacity - 2)); + final int loopIndex = keyIdx; + do { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += newCapacity; } - } - newkeys[keyIdx] = k; - newValues[keyIdx] = v; - newStates[keyIdx] = FULL; + if (keyIdx == loopIndex) { + throw new IllegalStateException("Detected infinite loop where key=" + k + + ", keyIdx=" + keyIdx); + } + } while (newStates[keyIdx] != FREE); } + newkeys[keyIdx] = k; + newValues[keyIdx] = v; + newStates[keyIdx] = FULL; + used++; } this._keys = newkeys; this._values = newValues; this._states = newStates; this._used = used; + this._freeEntries = newCapacity - used; + this._growThreshold = Math.round(newCapacity * _loadFactor); + this._shrinkThreshold = Math.round(newCapacity * SHRINK_FACTOR); } private static int keyHash(final long key) { return (int) (key ^ (key >>> 32)) & 0x7FFFFFFF; } - public void writeExternal(ObjectOutput out) throws IOException { - out.writeInt(_threshold); - out.writeInt(_used); - - out.writeInt(_keys.length); - IMapIterator i = entries(); - while (i.next() != -1) { - out.writeLong(i.getKey()); - out.writeFloat(i.getValue()); - } - } - - public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { - this._threshold = in.readInt(); - this._used = in.readInt(); - - final int keylen = in.readInt(); - final long[] keys = new long[keylen]; - final float[] values = new float[keylen]; - final byte[] states = new byte[keylen]; - for (int i = 0; i < _used; i++) { - long k = in.readLong(); - float v = in.readFloat(); - int hash = keyHash(k); - int keyIdx = hash % keylen; - if (states[keyIdx] != FREE) {// second hash - int decr = 1 + (hash % (keylen - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keylen; - } - if (states[keyIdx] == FREE) { - break; - } - } - } - states[keyIdx] = FULL; - keys[keyIdx] = k; - values[keyIdx] = v; - } - this._keys = keys; - this._values = values; - this._states = states; - } - public interface IMapIterator { public boolean hasNext(); @@ -450,4 +419,62 @@ public final class Long2FloatOpenHashTable implements Externalizable { return _values[lastEntry]; } } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + out.writeFloat(_loadFactor); + out.writeFloat(_growFactor); + out.writeInt(_used); + out.writeFloat(_defaultReturnValue); + + final IMapIterator i = entries(); + while (i.next() != -1) { + out.writeLong(i.getKey()); + out.writeFloat(i.getValue()); + } + } + + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + this._loadFactor = in.readFloat(); + this._growFactor = in.readFloat(); + final int used = in.readInt(); + this._defaultReturnValue = in.readFloat(); + + final int newCapacity = Primes.findLeastPrimeNumber(Math.round(used * 1.7f)); + final long[] keys = new long[newCapacity]; + final float[] values = new float[newCapacity]; + final byte[] states = new byte[newCapacity]; + for (int i = 0; i < used; i++) { + final long k = in.readLong(); + final float v = in.readFloat(); + final int hash = keyHash(k); + int keyIdx = hash % newCapacity; + if (states[keyIdx] != FREE) {// second hashing + final int decr = 1 + (hash % (newCapacity - 2)); + final int loopIndex = keyIdx; + do { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += newCapacity; + } + if (keyIdx == loopIndex) { + throw new IllegalStateException("Detected infinite loop where key=" + k + + ", keyIdx=" + keyIdx); + } + } while (states[keyIdx] != FREE); + } + keys[keyIdx] = k; + values[keyIdx] = v; + states[keyIdx] = FULL; + } + this._keys = keys; + this._values = values; + this._states = states; + this._used = used; + this._freeEntries = newCapacity - used; + this._growThreshold = Math.round(newCapacity * _loadFactor); + this._shrinkThreshold = Math.round(newCapacity * SHRINK_FACTOR); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad15923a/core/src/main/java/hivemall/utils/collections/maps/Long2IntOpenHashTable.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/maps/Long2IntOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/maps/Long2IntOpenHashTable.java index 6445231..360e806 100644 --- a/core/src/main/java/hivemall/utils/collections/maps/Long2IntOpenHashTable.java +++ b/core/src/main/java/hivemall/utils/collections/maps/Long2IntOpenHashTable.java @@ -45,12 +45,24 @@ public final class Long2IntOpenHashTable implements Externalizable { private static final float DEFAULT_LOAD_FACTOR = 0.75f; private static final float DEFAULT_GROW_FACTOR = 2.0f; - protected final transient float _loadFactor; - protected final transient float _growFactor; + private static final float SHRINK_FACTOR = 0.1f; // at least 10% of table must be FREE + private static final float GROW_FACTOR_AT_SHRINK = 1.7f; - protected int _used = 0; - protected int _threshold; - protected int defaultReturnValue = -1; + protected/* final */float _loadFactor; + protected/* final */float _growFactor; + + protected int _used; + protected int _freeEntries; + + /** Used entry threshold to grow table */ + protected int _growThreshold; + /** + * Free entry threshold to shrink table. Shrink threshold will be set in the first expansion to + * avoid shrink at very early remove(). + */ + protected int _shrinkThreshold; + + protected int _defaultReturnValue = -1; protected long[] _keys; protected int[] _values; @@ -66,7 +78,10 @@ public final class Long2IntOpenHashTable implements Externalizable { this._keys = new long[actualSize]; this._values = new int[actualSize]; this._states = new byte[actualSize]; - this._threshold = (int) (actualSize * _loadFactor); + this._used = 0; + this._freeEntries = actualSize; + this._growThreshold = Math.round(actualSize * _loadFactor); + this._shrinkThreshold = Math.round(actualSize * SHRINK_FACTOR); } public Long2IntOpenHashTable(int size, int loadFactor, int growFactor) { @@ -86,7 +101,7 @@ public final class Long2IntOpenHashTable implements Externalizable { } public void defaultReturnValue(int v) { - this.defaultReturnValue = v; + this._defaultReturnValue = v; } public boolean containsKey(final long key) { @@ -97,12 +112,12 @@ public final class Long2IntOpenHashTable implements Externalizable { * @return defaultReturnValue if not found */ public int get(final long key) { - return get(key, defaultReturnValue); + return get(key, _defaultReturnValue); } public int get(final long key, final int defaultValue) { final int i = _findKey(key); - if (i < 0) { + if (i == -1) { return defaultValue; } return _values[i]; @@ -110,7 +125,7 @@ public final class Long2IntOpenHashTable implements Externalizable { public int _get(final int index) { if (index < 0) { - return defaultReturnValue; + return _defaultReturnValue; } return _values[index]; } @@ -130,26 +145,39 @@ public final class Long2IntOpenHashTable implements Externalizable { final int[] values = _values; final byte[] states = _states; - if (states[keyIdx] == FULL) {// double hashing + byte state = states[keyIdx]; + if (state == FULL) {// double hashing if (keys[keyIdx] == key) { int old = values[keyIdx]; values[keyIdx] = value; return old; } // try second hash - int decr = 1 + (hash % (keyLength - 2)); + final int loopIndex = keyIdx; + final int decr = 1 + (hash % (keyLength - 2)); for (;;) { keyIdx -= decr; if (keyIdx < 0) { keyIdx += keyLength; } - if (isFree(keyIdx, key)) { + if (keyIdx == loopIndex) { + throw new IllegalStateException("Detected infinite loop where key=" + key + + ", keyIdx=" + keyIdx); + } + + state = states[keyIdx]; + if (state == FREE) { break; } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - int old = values[keyIdx]; - values[keyIdx] = value; - return old; + if (keys[keyIdx] == key) { + if (state == FULL) { + int old = values[keyIdx]; + values[keyIdx] = value; + return old; + } else { + assert (state == REMOVED); + break; + } } } } @@ -157,7 +185,16 @@ public final class Long2IntOpenHashTable implements Externalizable { values[keyIdx] = value; states[keyIdx] = FULL; ++_used; - return defaultReturnValue; + + if (state == FREE) { + _freeEntries--; + if (_freeEntries < _shrinkThreshold) { + int newCapacity = Math.max(keys.length, Math.round(_used * GROW_FACTOR_AT_SHRINK)); + ensureCapacity(newCapacity); + } + } + + return _defaultReturnValue; } public int incr(final long key, final int delta) { @@ -175,26 +212,39 @@ public final class Long2IntOpenHashTable implements Externalizable { final int[] values = _values; final byte[] states = _states; - if (states[keyIdx] == FULL) {// double hashing + byte state = states[keyIdx]; + if (state == FULL) {// double hashing if (keys[keyIdx] == key) { int old = values[keyIdx]; values[keyIdx] += delta; return old; } // try second hash - int decr = 1 + (hash % (keyLength - 2)); + final int loopIndex = keyIdx; + final int decr = 1 + (hash % (keyLength - 2)); for (;;) { keyIdx -= decr; if (keyIdx < 0) { keyIdx += keyLength; } - if (isFree(keyIdx, key)) { + if (keyIdx == loopIndex) { + throw new IllegalStateException("Detected infinite loop where key=" + key + + ", keyIdx=" + keyIdx); + } + + state = states[keyIdx]; + if (state == FREE) { break; } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - int old = values[keyIdx]; - values[keyIdx] += delta; - return old; + if (keys[keyIdx] == key) { + if (state == FULL) { + int old = values[keyIdx]; + values[keyIdx] += delta; + return old; + } else { + assert (state == REMOVED); + break; + } } } } @@ -202,24 +252,21 @@ public final class Long2IntOpenHashTable implements Externalizable { values[keyIdx] += delta; states[keyIdx] = FULL; ++_used; - return defaultReturnValue; - } - /** Return weather the required slot is free for new entry */ - protected boolean isFree(final int index, final long key) { - final byte stat = _states[index]; - if (stat == FREE) { - return true; - } - if (stat == REMOVED && _keys[index] == key) { - return true; + if (state == FREE) { + _freeEntries--; + if (_freeEntries < _shrinkThreshold) { + int newCapacity = Math.max(keys.length, Math.round(_used * GROW_FACTOR_AT_SHRINK)); + ensureCapacity(newCapacity); + } } - return false; + + return _defaultReturnValue; } /** @return expanded or not */ protected boolean preAddEntry(final int index) { - if ((_used + 1) >= _threshold) {// too filled + if ((_used + 1) >= _growThreshold) {// too filled int newCapacity = Math.round(_keys.length * _growFactor); ensureCapacity(newCapacity); return true; @@ -235,64 +282,45 @@ public final class Long2IntOpenHashTable implements Externalizable { final byte[] states = _states; final int keyLength = keys.length; + // double hashing final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; + final int decr = 1 + (hash % (keyLength - 2)); + final int startIndex = hash % keyLength; + for (int keyIdx = startIndex;;) { + final byte state = states[keyIdx]; + if (state == FREE) { + return -1; } - // try second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - return -1; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { + if (keys[keyIdx] == key) { + if (state == FULL) { return keyIdx; + } else { + assert (state == REMOVED); + return -1; } } + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (keyIdx == startIndex) { + throw new IllegalStateException("Detected infinite loop where key=" + key + + ", keyIdx=" + keyIdx); + } } - return -1; } public int remove(final long key) { - final long[] keys = _keys; - final int[] values = _values; - final byte[] states = _states; - final int keyLength = keys.length; - - final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - int old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - // second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (states[keyIdx] == FREE) { - return defaultReturnValue; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - int old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - } + final int keyIdx = _findKey(key); + if (keyIdx == -1) { + return _defaultReturnValue; } - return defaultReturnValue; + + int old = _values[keyIdx]; + _states[keyIdx] = REMOVED; + _values[keyIdx] = 0; // required for incr() + --_used; + return old; } public int size() { @@ -302,6 +330,7 @@ public final class Long2IntOpenHashTable implements Externalizable { public void clear() { Arrays.fill(_states, FREE); this._used = 0; + this._freeEntries = _states.length; } public IMapIterator entries() { @@ -329,95 +358,58 @@ public final class Long2IntOpenHashTable implements Externalizable { protected void ensureCapacity(final int newCapacity) { int prime = Primes.findLeastPrimeNumber(newCapacity); rehash(prime); - this._threshold = Math.round(prime * _loadFactor); } private void rehash(final int newCapacity) { - int oldCapacity = _keys.length; - if (newCapacity <= oldCapacity) { - throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); - } + final long[] oldKeys = _keys; + final int[] oldValues = _values; + final byte[] oldStates = _states; + final int oldCapacity = oldKeys.length; + final long[] newkeys = new long[newCapacity]; final int[] newValues = new int[newCapacity]; final byte[] newStates = new byte[newCapacity]; int used = 0; for (int i = 0; i < oldCapacity; i++) { - if (_states[i] == FULL) { - used++; - long k = _keys[i]; - int v = _values[i]; - int hash = keyHash(k); - int keyIdx = hash % newCapacity; - if (newStates[keyIdx] == FULL) {// second hashing - int decr = 1 + (hash % (newCapacity - 2)); - while (newStates[keyIdx] != FREE) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += newCapacity; - } + if (oldStates[i] != FULL) { + continue; + } + final long k = oldKeys[i]; + final int v = oldValues[i]; + final int hash = keyHash(k); + int keyIdx = hash % newCapacity; + if (newStates[keyIdx] == FULL) {// second hashing + final int decr = 1 + (hash % (newCapacity - 2)); + final int loopIndex = keyIdx; + do { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += newCapacity; } - } - newkeys[keyIdx] = k; - newValues[keyIdx] = v; - newStates[keyIdx] = FULL; + if (keyIdx == loopIndex) { + throw new IllegalStateException("Detected infinite loop where key=" + k + + ", keyIdx=" + keyIdx); + } + } while (newStates[keyIdx] != FREE); } + newkeys[keyIdx] = k; + newValues[keyIdx] = v; + newStates[keyIdx] = FULL; + used++; } this._keys = newkeys; this._values = newValues; this._states = newStates; this._used = used; + this._freeEntries = newCapacity - used; + this._growThreshold = Math.round(newCapacity * _loadFactor); + this._shrinkThreshold = Math.round(newCapacity * SHRINK_FACTOR); } private static int keyHash(final long key) { return (int) (key ^ (key >>> 32)) & 0x7FFFFFFF; } - public void writeExternal(ObjectOutput out) throws IOException { - out.writeInt(_threshold); - out.writeInt(_used); - - out.writeInt(_keys.length); - IMapIterator i = entries(); - while (i.next() != -1) { - out.writeLong(i.getKey()); - out.writeInt(i.getValue()); - } - } - - public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { - this._threshold = in.readInt(); - this._used = in.readInt(); - - final int keylen = in.readInt(); - final long[] keys = new long[keylen]; - final int[] values = new int[keylen]; - final byte[] states = new byte[keylen]; - for (int i = 0; i < _used; i++) { - long k = in.readLong(); - int v = in.readInt(); - int hash = keyHash(k); - int keyIdx = hash % keylen; - if (states[keyIdx] != FREE) {// second hash - int decr = 1 + (hash % (keylen - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keylen; - } - if (states[keyIdx] == FREE) { - break; - } - } - } - states[keyIdx] = FULL; - keys[keyIdx] = k; - values[keyIdx] = v; - } - this._keys = keys; - this._values = values; - this._states = states; - } - public interface IMapIterator { public boolean hasNext(); @@ -478,4 +470,60 @@ public final class Long2IntOpenHashTable implements Externalizable { return _values[lastEntry]; } } + + public void writeExternal(ObjectOutput out) throws IOException { + out.writeFloat(_loadFactor); + out.writeFloat(_growFactor); + out.writeInt(_used); + out.writeInt(_defaultReturnValue); + + final IMapIterator itor = entries(); + while (itor.next() != -1) { + out.writeLong(itor.getKey()); + out.writeInt(itor.getValue()); + } + } + + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + this._loadFactor = in.readFloat(); + this._growFactor = in.readFloat(); + final int used = in.readInt(); + this._defaultReturnValue = in.readInt(); + + final int newCapacity = Primes.findLeastPrimeNumber(Math.round(used * 1.7f)); + final long[] keys = new long[newCapacity]; + final int[] values = new int[newCapacity]; + final byte[] states = new byte[newCapacity]; + for (int i = 0; i < used; i++) { + final long k = in.readLong(); + final int v = in.readInt(); + final int hash = keyHash(k); + int keyIdx = hash % newCapacity; + if (states[keyIdx] != FREE) {// second hashing + final int decr = 1 + (hash % (newCapacity - 2)); + final int loopIndex = keyIdx; + do { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += newCapacity; + } + if (keyIdx == loopIndex) { + throw new IllegalStateException("Detected infinite loop where key=" + k + + ", keyIdx=" + keyIdx); + } + } while (states[keyIdx] != FREE); + } + keys[keyIdx] = k; + values[keyIdx] = v; + states[keyIdx] = FULL; + } + this._keys = keys; + this._values = values; + this._states = states; + this._used = used; + this._freeEntries = newCapacity - used; + this._growThreshold = Math.round(newCapacity * _loadFactor); + this._shrinkThreshold = Math.round(newCapacity * SHRINK_FACTOR); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad15923a/core/src/main/java/hivemall/utils/collections/maps/OpenHashMap.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/maps/OpenHashMap.java b/core/src/main/java/hivemall/utils/collections/maps/OpenHashMap.java deleted file mode 100644 index f5ee1e6..0000000 --- a/core/src/main/java/hivemall/utils/collections/maps/OpenHashMap.java +++ /dev/null @@ -1,409 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -// -// Copyright (C) 2010 catchpole.net -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -package hivemall.utils.collections.maps; - -import hivemall.utils.collections.IMapIterator; -import hivemall.utils.lang.Copyable; -import hivemall.utils.math.MathUtils; - -import java.io.Externalizable; -import java.io.IOException; -import java.io.ObjectInput; -import java.io.ObjectOutput; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -import javax.annotation.CheckForNull; -import javax.annotation.Nonnull; -import javax.annotation.Nullable; - -/** - * A space efficient open-addressing HashMap implementation. - * - * Unlike {@link OpenHashTable}, it maintains single arrays for keys and object references. - * - * It uses single open hashing arrays sized to binary powers (256, 512 etc) rather than those - * divisible by prime numbers. This allows the hash offset calculation to be a simple binary masking - * operation. - * - * The index into the arrays is determined by masking a portion of the key and shifting it to - * provide a series of small buckets within the array. To insert an entry the a sweep is searched - * until an empty key space is found. A sweep is 4 times the length of a bucket, to reduce the need - * to rehash. If no key space is found within a sweep, the table size is doubled. - * - * While performance is high, the slowest situation is where lookup occurs for entries that do not - * exist, as an entire sweep area must be searched. However, this HashMap is more space efficient - * than other open-addressing HashMap implementations as in fastutil. - * - * Note that this HashMap does not allow nulls to be used as keys. - */ -public final class OpenHashMap<K, V> implements Map<K, V>, Externalizable { - private K[] keys; - private V[] values; - - // total number of entries in this table - private int size; - // number of bits for the value table (eg. 8 bits = 256 entries) - private int bits; - // the number of bits in each sweep zone. - private int sweepbits; - // the size of a sweep (2 to the power of sweepbits) - private int sweep; - // the sweepmask used to create sweep zone offsets - private int sweepmask; - - public OpenHashMap() {}// for Externalizable - - public OpenHashMap(int size) { - resize(MathUtils.bitsRequired(size < 256 ? 256 : size)); - } - - @Nullable - public V put(@CheckForNull final K key, @Nullable final V value) { - if (key == null) { - throw new NullPointerException(this.getClass().getName() + " key"); - } - - for (;;) { - int off = getBucketOffset(key); - final int end = off + sweep; - for (; off < end; off++) { - final K searchKey = keys[off]; - if (searchKey == null) { - // insert - keys[off] = key; - size++; - V previous = values[off]; - values[off] = value; - return previous; - } else if (compare(searchKey, key)) { - // replace - V previous = values[off]; - values[off] = value; - return previous; - } - } - resize(this.bits + 1); - } - } - - @Nullable - public V putIfAbsent(@CheckForNull final K key, @Nullable final V value) { - if (key == null) { - throw new NullPointerException(this.getClass().getName() + " key"); - } - - for (;;) { - int off = getBucketOffset(key); - final int end = off + sweep; - for (; off < end; off++) { - final K searchKey = keys[off]; - if (searchKey == null) { - // insert - keys[off] = key; - size++; - V previous = values[off]; - values[off] = value; - return previous; - } else if (compare(searchKey, key)) { - return values[off]; - } - } - resize(this.bits + 1); - } - } - - @Nullable - public V get(@Nonnull final Object key) { - int off = getBucketOffset(key); - final int end = sweep + off; - for (; off < end; off++) { - if (keys[off] != null && compare(keys[off], key)) { - return values[off]; - } - } - return null; - } - - @Nullable - public V remove(@Nonnull final Object key) { - int off = getBucketOffset(key); - final int end = sweep + off; - for (; off < end; off++) { - if (keys[off] != null && compare(keys[off], key)) { - keys[off] = null; - V previous = values[off]; - values[off] = null; - size--; - return previous; - } - } - return null; - } - - public int size() { - return size; - } - - public void putAll(@Nonnull final Map<? extends K, ? extends V> m) { - for (K key : m.keySet()) { - put(key, m.get(key)); - } - } - - public boolean isEmpty() { - return size == 0; - } - - public boolean containsKey(@Nonnull final Object key) { - return get(key) != null; - } - - public boolean containsValue(@Nonnull final Object value) { - for (V v : values) { - if (v != null && compare(v, value)) { - return true; - } - } - return false; - } - - public void clear() { - Arrays.fill(keys, null); - Arrays.fill(values, null); - this.size = 0; - } - - @Nonnull - public Set<K> keySet() { - final Set<K> set = new HashSet<K>(); - for (K key : keys) { - if (key != null) { - set.add(key); - } - } - return set; - } - - @Nonnull - public Collection<V> values() { - final Collection<V> list = new ArrayList<V>(); - for (V value : values) { - if (value != null) { - list.add(value); - } - } - return list; - } - - @Nonnull - public Set<Entry<K, V>> entrySet() { - final Set<Entry<K, V>> set = new HashSet<Entry<K, V>>(); - for (K key : keys) { - if (key != null) { - set.add(new MapEntry<K, V>(this, key)); - } - } - return set; - } - - private static final class MapEntry<K, V> implements Map.Entry<K, V> { - private final Map<K, V> map; - private final K key; - - public MapEntry(Map<K, V> map, K key) { - this.map = map; - this.key = key; - } - - @Override - public K getKey() { - return key; - } - - @Override - public V getValue() { - return map.get(key); - } - - @Override - public V setValue(V value) { - return map.put(key, value); - } - } - - @Override - public void writeExternal(ObjectOutput out) throws IOException { - // remember the number of bits - out.writeInt(this.bits); - // remember the total number of entries - out.writeInt(this.size); - // write all entries - for (int x = 0; x < this.keys.length; x++) { - if (keys[x] != null) { - out.writeObject(keys[x]); - out.writeObject(values[x]); - } - } - } - - @SuppressWarnings("unchecked") - @Override - public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { - // resize to old bit size - int bitSize = in.readInt(); - if (bitSize != bits) { - resize(bitSize); - } - // read all entries - int size = in.readInt(); - for (int x = 0; x < size; x++) { - this.put((K) in.readObject(), (V) in.readObject()); - } - } - - @Override - public String toString() { - return this.getClass().getSimpleName() + ' ' + size; - } - - @SuppressWarnings("unchecked") - private void resize(final int bits) { - this.bits = bits; - this.sweepbits = bits / 4; - this.sweep = MathUtils.powerOf(2, sweepbits) * 4; - this.sweepmask = MathUtils.bitMask(bits - sweepbits) << sweepbits; - - // remember old values so we can recreate the entries - final K[] existingKeys = this.keys; - final V[] existingValues = this.values; - - // create the arrays - this.values = (V[]) new Object[MathUtils.powerOf(2, bits) + sweep]; - this.keys = (K[]) new Object[values.length]; - this.size = 0; - - // re-add the previous entries if resizing - if (existingKeys != null) { - for (int x = 0; x < existingKeys.length; x++) { - final K k = existingKeys[x]; - if (k != null) { - put(k, existingValues[x]); - } - } - } - } - - private int getBucketOffset(@Nonnull final Object key) { - return (key.hashCode() << sweepbits) & sweepmask; - } - - private static boolean compare(@Nonnull final Object v1, @Nonnull final Object v2) { - return v1 == v2 || v1.equals(v2); - } - - public IMapIterator<K, V> entries() { - return new MapIterator(false); - } - - public IMapIterator<K, V> entries(boolean releaseSeen) { - return new MapIterator(releaseSeen); - } - - private final class MapIterator implements IMapIterator<K, V> { - - final boolean releaseSeen; - int nextEntry; - int lastEntry = -1; - - MapIterator(boolean releaseSeen) { - this.releaseSeen = releaseSeen; - this.nextEntry = nextEntry(0); - } - - /** find the index of next full entry */ - int nextEntry(int index) { - while (index < keys.length && keys[index] == null) { - index++; - } - return index; - } - - @Override - public boolean hasNext() { - return nextEntry < keys.length; - } - - @Override - public int next() { - if (releaseSeen) { - free(lastEntry); - } - if (!hasNext()) { - return -1; - } - int curEntry = nextEntry; - this.lastEntry = curEntry; - this.nextEntry = nextEntry(curEntry + 1); - return curEntry; - } - - @Override - public K getKey() { - return keys[lastEntry]; - } - - @Override - public V getValue() { - return values[lastEntry]; - } - - @Override - public <T extends Copyable<V>> void getValue(T probe) { - probe.copyFrom(getValue()); - } - - private void free(int index) { - if (index >= 0) { - keys[index] = null; - values[index] = null; - } - } - - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad15923a/core/src/main/java/hivemall/utils/collections/maps/OpenHashTable.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/maps/OpenHashTable.java b/core/src/main/java/hivemall/utils/collections/maps/OpenHashTable.java index c16567a..28a677e 100644 --- a/core/src/main/java/hivemall/utils/collections/maps/OpenHashTable.java +++ b/core/src/main/java/hivemall/utils/collections/maps/OpenHashTable.java @@ -20,6 +20,7 @@ package hivemall.utils.collections.maps; import hivemall.utils.collections.IMapIterator; import hivemall.utils.lang.Copyable; +import hivemall.utils.lang.Preconditions; import hivemall.utils.math.Primes; import java.io.Externalizable; @@ -28,7 +29,10 @@ import java.io.ObjectInput; import java.io.ObjectOutput; import java.util.Arrays; +import javax.annotation.CheckForNull; +import javax.annotation.Nonnegative; import javax.annotation.Nonnull; +import javax.annotation.Nullable; /** * An open-addressing hash table using double-hashing. @@ -45,6 +49,9 @@ public final class OpenHashTable<K, V> implements Externalizable { public static final float DEFAULT_LOAD_FACTOR = 0.75f; public static final float DEFAULT_GROW_FACTOR = 2.0f; + private static final float SHRINK_FACTOR = 0.1f; // at least 10% of table must be FREE + private static final float GROW_FACTOR_AT_SHRINK = 1.7f; + protected static final byte FREE = 0; protected static final byte FULL = 1; protected static final byte REMOVED = 2; @@ -52,8 +59,16 @@ public final class OpenHashTable<K, V> implements Externalizable { protected/* final */float _loadFactor; protected/* final */float _growFactor; - protected int _used = 0; - protected int _threshold; + protected int _used; + protected int _freeEntries; + + /** Used entry threshold to grow table */ + protected int _growThreshold; + /** + * Free entry threshold to shrink table. Shrink threshold will be set in the first expansion to + * avoid shrink at very early remove(). + */ + protected int _shrinkThreshold; protected K[] _keys; protected V[] _values; @@ -79,15 +94,10 @@ public final class OpenHashTable<K, V> implements Externalizable { this._keys = (K[]) new Object[actualSize]; this._values = (V[]) new Object[actualSize]; this._states = new byte[actualSize]; - this._threshold = Math.round(actualSize * _loadFactor); - } - - public OpenHashTable(@Nonnull K[] keys, @Nonnull V[] values, @Nonnull byte[] states, int used) { - this._used = used; - this._threshold = keys.length; - this._keys = keys; - this._values = values; - this._states = states; + this._used = 0; + this._freeEntries = actualSize; + this._growThreshold = Math.round(actualSize * _loadFactor); + this._shrinkThreshold = Math.round(actualSize * SHRINK_FACTOR); } public Object[] getKeys() { @@ -102,20 +112,22 @@ public final class OpenHashTable<K, V> implements Externalizable { return _states; } - public boolean containsKey(final K key) { + public boolean containsKey(@CheckForNull final K key) { return findKey(key) >= 0; } - public V get(final K key) { + public V get(@CheckForNull final K key) { final int i = findKey(key); - if (i < 0) { + if (i == -1) { return null; } return _values[i]; } - public V put(final K key, final V value) { - int hash = keyHash(key); + public V put(@CheckForNull final K key, @Nullable final V value) { + Preconditions.checkNotNull(key); + + final int hash = keyHash(key); int keyLength = _keys.length; int keyIdx = hash % keyLength; @@ -125,59 +137,70 @@ public final class OpenHashTable<K, V> implements Externalizable { keyIdx = hash % keyLength; } - K[] keys = _keys; - V[] values = _values; - byte[] states = _states; + final K[] keys = _keys; + final V[] values = _values; + final byte[] states = _states; - if (states[keyIdx] == FULL) { + byte state = states[keyIdx]; + if (state == FULL) {// double hashing if (equals(keys[keyIdx], key)) { V old = values[keyIdx]; values[keyIdx] = value; return old; } // try second hash - int decr = 1 + (hash % (keyLength - 2)); + final int loopIndex = keyIdx; + final int decr = 1 + (hash % (keyLength - 2)); for (;;) { keyIdx -= decr; if (keyIdx < 0) { keyIdx += keyLength; } - if (isFree(keyIdx, key)) { + if (keyIdx == loopIndex) { + throw new IllegalStateException("Detected infinite loop where key=" + key + + ", keyIdx=" + keyIdx); + } + + state = states[keyIdx]; + if (state == FREE) { break; } - if (states[keyIdx] == FULL && equals(keys[keyIdx], key)) { - V old = values[keyIdx]; - values[keyIdx] = value; - return old; + if (equals(keys[keyIdx], key)) { + if (state == FULL) { + V old = values[keyIdx]; + values[keyIdx] = value; + return old; + } else { + assert (state == REMOVED); + break; + } } } } + keys[keyIdx] = key; values[keyIdx] = value; states[keyIdx] = FULL; ++_used; + + if (state == FREE) { + _freeEntries--; + if (_freeEntries < _shrinkThreshold) { + int newCapacity = Math.max(keys.length, Math.round(_used * GROW_FACTOR_AT_SHRINK)); + ensureCapacity(newCapacity); + } + } + return null; } - private static boolean equals(final Object k1, final Object k2) { + private static boolean equals(@Nonnull final Object k1, @Nonnull final Object k2) { return k1 == k2 || k1.equals(k2); } - /** Return weather the required slot is free for new entry */ - protected boolean isFree(int index, K key) { - byte stat = _states[index]; - if (stat == FREE) { - return true; - } - if (stat == REMOVED && equals(_keys[index], key)) { - return true; - } - return false; - } - /** @return expanded or not */ protected boolean preAddEntry(int index) { - if ((_used + 1) >= _threshold) {// filled enough + if ((_used + 1) >= _growThreshold) {// filled enough int newCapacity = Math.round(_keys.length * _growFactor); ensureCapacity(newCapacity); return true; @@ -185,69 +208,51 @@ public final class OpenHashTable<K, V> implements Externalizable { return false; } - protected int findKey(final K key) { - K[] keys = _keys; - byte[] states = _states; - int keyLength = keys.length; + protected int findKey(@CheckForNull final K key) { + Preconditions.checkNotNull(key); - int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && equals(keys[keyIdx], key)) { - return keyIdx; + final K[] keys = _keys; + final byte[] states = _states; + final int keyLength = keys.length; + + // double hashing + final int hash = keyHash(key); + final int decr = 1 + (hash % (keyLength - 2)); + final int startIndex = hash % keyLength; + for (int keyIdx = startIndex;;) { + final byte state = states[keyIdx]; + if (state == FREE) { + return -1; } - // try second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - return -1; - } - if (states[keyIdx] == FULL && equals(keys[keyIdx], key)) { + if (equals(keys[keyIdx], key)) { + if (state == FULL) { return keyIdx; + } else { + assert (state == REMOVED); + return -1; } } + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (keyIdx == startIndex) { + throw new IllegalStateException("Detected infinite loop where key=" + key + + ", keyIdx=" + keyIdx); + } } - return -1; } - public V remove(final K key) { - K[] keys = _keys; - V[] values = _values; - byte[] states = _states; - int keyLength = keys.length; - - int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && equals(keys[keyIdx], key)) { - V old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - // second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (states[keyIdx] == FREE) { - return null; - } - if (states[keyIdx] == FULL && equals(keys[keyIdx], key)) { - V old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - } + public V remove(@CheckForNull final K key) { + final int keyIdx = findKey(key); + if (keyIdx == -1) { + return null; } - return null; + + V old = _values[keyIdx]; + _states[keyIdx] = REMOVED; + --_used; + return old; } public int size() { @@ -257,10 +262,15 @@ public final class OpenHashTable<K, V> implements Externalizable { public void clear() { Arrays.fill(_states, FREE); this._used = 0; + this._freeEntries = _states.length; } public IMapIterator<K, V> entries() { - return new MapIterator(); + return new MapIterator(false); + } + + public IMapIterator<K, V> entries(boolean releaseSeen) { + return new MapIterator(releaseSeen); } @Override @@ -282,60 +292,71 @@ public final class OpenHashTable<K, V> implements Externalizable { return buf.toString(); } - protected void ensureCapacity(int newCapacity) { + protected void ensureCapacity(@Nonnegative int newCapacity) { int prime = Primes.findLeastPrimeNumber(newCapacity); rehash(prime); - this._threshold = Math.round(prime * _loadFactor); } @SuppressWarnings("unchecked") - private void rehash(int newCapacity) { - int oldCapacity = _keys.length; - if (newCapacity <= oldCapacity) { - throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); - } + private void rehash(@Nonnegative final int newCapacity) { + final K[] oldKeys = _keys; + final V[] oldValues = _values; + final byte[] oldStates = _states; + final int oldCapacity = oldKeys.length; + final K[] newkeys = (K[]) new Object[newCapacity]; final V[] newValues = (V[]) new Object[newCapacity]; final byte[] newStates = new byte[newCapacity]; int used = 0; for (int i = 0; i < oldCapacity; i++) { - if (_states[i] == FULL) { - used++; - K k = _keys[i]; - V v = _values[i]; - int hash = keyHash(k); - int keyIdx = hash % newCapacity; - if (newStates[keyIdx] == FULL) {// second hashing - int decr = 1 + (hash % (newCapacity - 2)); - while (newStates[keyIdx] != FREE) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += newCapacity; - } + if (oldStates[i] != FULL) { + continue; + } + final K k = oldKeys[i]; + final V v = oldValues[i]; + final int hash = keyHash(k); + int keyIdx = hash % newCapacity; + if (newStates[keyIdx] == FULL) {// second hashing + final int decr = 1 + (hash % (newCapacity - 2)); + final int loopIndex = keyIdx; + do { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += newCapacity; } - } - newStates[keyIdx] = FULL; - newkeys[keyIdx] = k; - newValues[keyIdx] = v; + if (keyIdx == loopIndex) { + throw new IllegalStateException("Detected infinite loop where key=" + k + + ", keyIdx=" + keyIdx); + } + } while (newStates[keyIdx] != FREE); } + newkeys[keyIdx] = k; + newValues[keyIdx] = v; + newStates[keyIdx] = FULL; + used++; } this._keys = newkeys; this._values = newValues; this._states = newStates; this._used = used; + this._freeEntries = newCapacity - used; + this._growThreshold = Math.round(newCapacity * _loadFactor); + this._shrinkThreshold = Math.round(newCapacity * SHRINK_FACTOR); } - private static int keyHash(final Object key) { + private static int keyHash(@Nonnull final Object key) { int hash = key.hashCode(); return hash & 0x7fffffff; } private final class MapIterator implements IMapIterator<K, V> { + final boolean releaseSeen; int nextEntry; int lastEntry = -1; - MapIterator() { + MapIterator(boolean releaseSeen) { + this.releaseSeen = releaseSeen; this.nextEntry = nextEntry(0); } @@ -352,12 +373,15 @@ public final class OpenHashTable<K, V> implements Externalizable { } public int next() { + if (releaseSeen) { + free(lastEntry); + } if (!hasNext()) { return -1; } int curEntry = nextEntry; - this.lastEntry = nextEntry; - this.nextEntry = nextEntry(nextEntry + 1); + this.lastEntry = curEntry; + this.nextEntry = nextEntry(curEntry + 1); return curEntry; } @@ -379,6 +403,15 @@ public final class OpenHashTable<K, V> implements Externalizable { public <T extends Copyable<V>> void getValue(T probe) { probe.copyFrom(getValue()); } + + private void free(int index) { + if (index < 0) { + return; // should not happen + } + _keys[index] = null; + _values[index] = null; + _states[index] = FREE; + } } @Override @@ -387,13 +420,10 @@ public final class OpenHashTable<K, V> implements Externalizable { out.writeFloat(_growFactor); out.writeInt(_used); - final int size = _keys.length; - out.writeInt(size); - - for (int i = 0; i < size; i++) { - out.writeObject(_keys[i]); - out.writeObject(_values[i]); - out.writeByte(_states[i]); + final IMapIterator<K, V> itor = entries(); + while (itor.next() != -1) { + out.writeObject(itor.getKey()); + out.writeObject(itor.getValue()); } } @@ -402,21 +432,42 @@ public final class OpenHashTable<K, V> implements Externalizable { public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { this._loadFactor = in.readFloat(); this._growFactor = in.readFloat(); - this._used = in.readInt(); - - final int size = in.readInt(); - final Object[] keys = new Object[size]; - final Object[] values = new Object[size]; - final byte[] states = new byte[size]; - for (int i = 0; i < size; i++) { - keys[i] = in.readObject(); - values[i] = in.readObject(); - states[i] = in.readByte(); + final int used = in.readInt(); + + final int newCapacity = Primes.findLeastPrimeNumber(Math.round(used * 1.7f)); + final K[] keys = (K[]) new Object[newCapacity]; + final V[] values = (V[]) new Object[newCapacity]; + final byte[] states = new byte[newCapacity]; + for (int i = 0; i < used; i++) { + final K k = (K) in.readObject(); + final V v = (V) in.readObject(); + final int hash = keyHash(k); + int keyIdx = hash % newCapacity; + if (states[keyIdx] == FULL) {// second hashing + final int decr = 1 + (hash % (newCapacity - 2)); + final int loopIndex = keyIdx; + do { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += newCapacity; + } + if (keyIdx == loopIndex) { + throw new IllegalStateException("Detected infinite loop where key=" + k + + ", keyIdx=" + keyIdx); + } + } while (states[keyIdx] != FREE); + } + keys[keyIdx] = k; + values[keyIdx] = v; + states[keyIdx] = FULL; } - this._threshold = size; - this._keys = (K[]) keys; - this._values = (V[]) values; + this._keys = keys; + this._values = values; this._states = states; + this._used = used; + this._freeEntries = newCapacity - used; + this._growThreshold = Math.round(newCapacity * _loadFactor); + this._shrinkThreshold = Math.round(newCapacity * SHRINK_FACTOR); } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad15923a/core/src/main/java/hivemall/utils/lambda/Throwing.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/lambda/Throwing.java b/core/src/main/java/hivemall/utils/lambda/Throwing.java new file mode 100644 index 0000000..795314e --- /dev/null +++ b/core/src/main/java/hivemall/utils/lambda/Throwing.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.utils.lambda; + +import java.util.function.Consumer; + +import javax.annotation.Nonnull; + +public final class Throwing { + + private Throwing() {} + + @Nonnull + public static <T> Consumer<T> rethrow(@Nonnull final ThrowingConsumer<T> consumer) { + return consumer; + } + + /** + * The compiler sees the signature with the throws T inferred to a RuntimeException type, so it + * allows the unchecked exception to propagate. + * + * http://www.baeldung.com/java-sneaky-throws + */ + @SuppressWarnings("unchecked") + @Nonnull + public static <E extends Throwable> void sneakyThrow(@Nonnull Throwable ex) throws E { + throw (E) ex; + } + +}