http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad15923a/core/src/main/java/hivemall/utils/collections/maps/Int2DoubleOpenHashTable.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/maps/Int2DoubleOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/maps/Int2DoubleOpenHashTable.java deleted file mode 100644 index 3b5585e..0000000 --- a/core/src/main/java/hivemall/utils/collections/maps/Int2DoubleOpenHashTable.java +++ /dev/null @@ -1,427 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.collections.maps; - -import hivemall.utils.math.Primes; - -import java.io.Externalizable; -import java.io.IOException; -import java.io.ObjectInput; -import java.io.ObjectOutput; -import java.util.Arrays; - -/** - * An open-addressing hash table using double hashing. - * - * <pre> - * Primary hash function: h1(k) = k mod m - * Secondary hash function: h2(k) = 1 + (k mod(m-2)) - * </pre> - * - * @see http://en.wikipedia.org/wiki/Double_hashing - */ -public class Int2DoubleOpenHashTable implements Externalizable { - - protected static final byte FREE = 0; - protected static final byte FULL = 1; - protected static final byte REMOVED = 2; - - private static final float DEFAULT_LOAD_FACTOR = 0.75f; - private static final float DEFAULT_GROW_FACTOR = 2.0f; - - protected final transient float _loadFactor; - protected final transient float _growFactor; - - protected int _used = 0; - protected int _threshold; - protected double defaultReturnValue = -1.d; - - protected int[] _keys; - protected double[] _values; - protected byte[] _states; - - protected Int2DoubleOpenHashTable(int size, float loadFactor, float growFactor, - boolean forcePrime) { - if (size < 1) { - throw new IllegalArgumentException(); - } - this._loadFactor = loadFactor; - this._growFactor = growFactor; - int actualSize = forcePrime ? Primes.findLeastPrimeNumber(size) : size; - this._keys = new int[actualSize]; - this._values = new double[actualSize]; - this._states = new byte[actualSize]; - this._threshold = (int) (actualSize * _loadFactor); - } - - public Int2DoubleOpenHashTable(int size, float loadFactor, float growFactor) { - this(size, loadFactor, growFactor, true); - } - - public Int2DoubleOpenHashTable(int size) { - this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR, true); - } - - /** - * Only for {@link Externalizable} - */ - public Int2DoubleOpenHashTable() {// required for serialization - this._loadFactor = DEFAULT_LOAD_FACTOR; - this._growFactor = DEFAULT_GROW_FACTOR; - } - - public void defaultReturnValue(double v) { - this.defaultReturnValue = v; - } - - public boolean containsKey(final int key) { - return findKey(key) >= 0; - } - - /** - * @return -1.d if not found - */ - public double get(final int key) { - return get(key, defaultReturnValue); - } - - public double get(final int key, final double defaultValue) { - final int i = findKey(key); - if (i < 0) { - return defaultValue; - } - return _values[i]; - } - - public double put(final int key, final double value) { - final int hash = keyHash(key); - int keyLength = _keys.length; - int keyIdx = hash % keyLength; - - boolean expanded = preAddEntry(keyIdx); - if (expanded) { - keyLength = _keys.length; - keyIdx = hash % keyLength; - } - - final int[] keys = _keys; - final double[] values = _values; - final byte[] states = _states; - - if (states[keyIdx] == FULL) {// double hashing - if (keys[keyIdx] == key) { - double old = values[keyIdx]; - values[keyIdx] = value; - return old; - } - // try second hash - final int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - break; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - double old = values[keyIdx]; - values[keyIdx] = value; - return old; - } - } - } - keys[keyIdx] = key; - values[keyIdx] = value; - states[keyIdx] = FULL; - ++_used; - return defaultReturnValue; - } - - /** Return weather the required slot is free for new entry */ - protected boolean isFree(final int index, final int key) { - final byte stat = _states[index]; - if (stat == FREE) { - return true; - } - if (stat == REMOVED && _keys[index] == key) { - return true; - } - return false; - } - - /** @return expanded or not */ - protected boolean preAddEntry(final int index) { - if ((_used + 1) >= _threshold) {// too filled - int newCapacity = Math.round(_keys.length * _growFactor); - ensureCapacity(newCapacity); - return true; - } - return false; - } - - protected int findKey(final int key) { - final int[] keys = _keys; - final byte[] states = _states; - final int keyLength = keys.length; - - final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - // try second hash - final int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - return -1; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - } - } - return -1; - } - - public double remove(final int key) { - final int[] keys = _keys; - final double[] values = _values; - final byte[] states = _states; - final int keyLength = keys.length; - - final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - double old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - // second hash - final int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (states[keyIdx] == FREE) { - return defaultReturnValue; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - double old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - } - } - return defaultReturnValue; - } - - public int size() { - return _used; - } - - public void clear() { - Arrays.fill(_states, FREE); - this._used = 0; - } - - public IMapIterator entries() { - return new MapIterator(); - } - - @Override - public String toString() { - int len = size() * 10 + 2; - StringBuilder buf = new StringBuilder(len); - buf.append('{'); - IMapIterator i = entries(); - while (i.next() != -1) { - buf.append(i.getKey()); - buf.append('='); - buf.append(i.getValue()); - if (i.hasNext()) { - buf.append(','); - } - } - buf.append('}'); - return buf.toString(); - } - - protected void ensureCapacity(final int newCapacity) { - int prime = Primes.findLeastPrimeNumber(newCapacity); - rehash(prime); - this._threshold = Math.round(prime * _loadFactor); - } - - private void rehash(final int newCapacity) { - int oldCapacity = _keys.length; - if (newCapacity <= oldCapacity) { - throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); - } - final int[] newkeys = new int[newCapacity]; - final double[] newValues = new double[newCapacity]; - final byte[] newStates = new byte[newCapacity]; - int used = 0; - for (int i = 0; i < oldCapacity; i++) { - if (_states[i] == FULL) { - used++; - final int k = _keys[i]; - final double v = _values[i]; - final int hash = keyHash(k); - int keyIdx = hash % newCapacity; - if (newStates[keyIdx] == FULL) {// second hashing - int decr = 1 + (hash % (newCapacity - 2)); - while (newStates[keyIdx] != FREE) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += newCapacity; - } - } - } - newkeys[keyIdx] = k; - newValues[keyIdx] = v; - newStates[keyIdx] = FULL; - } - } - this._keys = newkeys; - this._values = newValues; - this._states = newStates; - this._used = used; - } - - private static int keyHash(int key) { - return key & 0x7fffffff; - } - - public void writeExternal(ObjectOutput out) throws IOException { - out.writeInt(_threshold); - out.writeInt(_used); - - out.writeInt(_keys.length); - IMapIterator i = entries(); - while (i.next() != -1) { - out.writeInt(i.getKey()); - out.writeDouble(i.getValue()); - } - } - - public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { - this._threshold = in.readInt(); - this._used = in.readInt(); - - int keylen = in.readInt(); - int[] keys = new int[keylen]; - double[] values = new double[keylen]; - byte[] states = new byte[keylen]; - for (int i = 0; i < _used; i++) { - int k = in.readInt(); - double v = in.readDouble(); - int hash = keyHash(k); - int keyIdx = hash % keylen; - if (states[keyIdx] != FREE) {// second hash - int decr = 1 + (hash % (keylen - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keylen; - } - if (states[keyIdx] == FREE) { - break; - } - } - } - states[keyIdx] = FULL; - keys[keyIdx] = k; - values[keyIdx] = v; - } - this._keys = keys; - this._values = values; - this._states = states; - } - - public interface IMapIterator { - - public boolean hasNext(); - - /** - * @return -1 if not found - */ - public int next(); - - public int getKey(); - - public double getValue(); - - } - - private final class MapIterator implements IMapIterator { - - int nextEntry; - int lastEntry = -1; - - MapIterator() { - this.nextEntry = nextEntry(0); - } - - /** find the index of next full entry */ - int nextEntry(int index) { - while (index < _keys.length && _states[index] != FULL) { - index++; - } - return index; - } - - public boolean hasNext() { - return nextEntry < _keys.length; - } - - public int next() { - if (!hasNext()) { - return -1; - } - int curEntry = nextEntry; - this.lastEntry = curEntry; - this.nextEntry = nextEntry(curEntry + 1); - return curEntry; - } - - public int getKey() { - if (lastEntry == -1) { - throw new IllegalStateException(); - } - return _keys[lastEntry]; - } - - public double getValue() { - if (lastEntry == -1) { - throw new IllegalStateException(); - } - return _values[lastEntry]; - } - } -}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad15923a/core/src/main/java/hivemall/utils/collections/maps/Int2FloatOpenHashTable.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/maps/Int2FloatOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/maps/Int2FloatOpenHashTable.java deleted file mode 100644 index 22de115..0000000 --- a/core/src/main/java/hivemall/utils/collections/maps/Int2FloatOpenHashTable.java +++ /dev/null @@ -1,430 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.collections.maps; - -import hivemall.utils.math.Primes; - -import java.io.Externalizable; -import java.io.IOException; -import java.io.ObjectInput; -import java.io.ObjectOutput; -import java.util.Arrays; - -/** - * An open-addressing hash table using double hashing. - * - * <pre> - * Primary hash function: h1(k) = k mod m - * Secondary hash function: h2(k) = 1 + (k mod(m-2)) - * </pre> - * - * @see http://en.wikipedia.org/wiki/Double_hashing - */ -public class Int2FloatOpenHashTable implements Externalizable { - - protected static final byte FREE = 0; - protected static final byte FULL = 1; - protected static final byte REMOVED = 2; - - private static final float DEFAULT_LOAD_FACTOR = 0.75f; - private static final float DEFAULT_GROW_FACTOR = 2.0f; - - protected final transient float _loadFactor; - protected final transient float _growFactor; - - protected int _used = 0; - protected int _threshold; - protected float defaultReturnValue = -1.f; - - protected int[] _keys; - protected float[] _values; - protected byte[] _states; - - protected Int2FloatOpenHashTable(int size, float loadFactor, float growFactor, - boolean forcePrime) { - if (size < 1) { - throw new IllegalArgumentException(); - } - this._loadFactor = loadFactor; - this._growFactor = growFactor; - int actualSize = forcePrime ? Primes.findLeastPrimeNumber(size) : size; - this._keys = new int[actualSize]; - this._values = new float[actualSize]; - this._states = new byte[actualSize]; - this._threshold = (int) (actualSize * _loadFactor); - } - - public Int2FloatOpenHashTable(int size, float loadFactor, float growFactor) { - this(size, loadFactor, growFactor, true); - } - - public Int2FloatOpenHashTable(int size) { - this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR, true); - } - - /** - * Only for {@link Externalizable} - */ - public Int2FloatOpenHashTable() {// required for serialization - this._loadFactor = DEFAULT_LOAD_FACTOR; - this._growFactor = DEFAULT_GROW_FACTOR; - } - - public void defaultReturnValue(float v) { - this.defaultReturnValue = v; - } - - public boolean containsKey(final int key) { - return findKey(key) >= 0; - } - - /** - * @return -1.f if not found - */ - public float get(final int key) { - return get(key, defaultReturnValue); - } - - public float get(final int key, final float defaultValue) { - final int i = findKey(key); - if (i < 0) { - return defaultValue; - } - return _values[i]; - } - - public float put(final int key, final float value) { - final int hash = keyHash(key); - int keyLength = _keys.length; - int keyIdx = hash % keyLength; - - boolean expanded = preAddEntry(keyIdx); - if (expanded) { - keyLength = _keys.length; - keyIdx = hash % keyLength; - } - - final int[] keys = _keys; - final float[] values = _values; - final byte[] states = _states; - - if (states[keyIdx] == FULL) {// double hashing - if (keys[keyIdx] == key) { - float old = values[keyIdx]; - values[keyIdx] = value; - return old; - } - // try second hash - final int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - break; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - float old = values[keyIdx]; - values[keyIdx] = value; - return old; - } - } - } - keys[keyIdx] = key; - values[keyIdx] = value; - states[keyIdx] = FULL; - ++_used; - return defaultReturnValue; - } - - /** Return weather the required slot is free for new entry */ - protected boolean isFree(final int index, final int key) { - final byte stat = _states[index]; - if (stat == FREE) { - return true; - } - if (stat == REMOVED && _keys[index] == key) { - return true; - } - return false; - } - - /** @return expanded or not */ - protected boolean preAddEntry(final int index) { - if ((_used + 1) >= _threshold) {// too filled - int newCapacity = Math.round(_keys.length * _growFactor); - ensureCapacity(newCapacity); - return true; - } - return false; - } - - protected int findKey(final int key) { - final int[] keys = _keys; - final byte[] states = _states; - final int keyLength = keys.length; - - final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - // try second hash - final int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - return -1; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - } - } - return -1; - } - - public float remove(final int key) { - final int[] keys = _keys; - final float[] values = _values; - final byte[] states = _states; - final int keyLength = keys.length; - - final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - float old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - // second hash - final int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (states[keyIdx] == FREE) { - return defaultReturnValue; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - float old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - } - } - return defaultReturnValue; - } - - public int size() { - return _used; - } - - public void clear() { - if (_used == 0) { - return; // no need to clear - } - Arrays.fill(_states, FREE); - this._used = 0; - } - - public IMapIterator entries() { - return new MapIterator(); - } - - @Override - public String toString() { - int len = size() * 10 + 2; - StringBuilder buf = new StringBuilder(len); - buf.append('{'); - IMapIterator i = entries(); - while (i.next() != -1) { - buf.append(i.getKey()); - buf.append('='); - buf.append(i.getValue()); - if (i.hasNext()) { - buf.append(','); - } - } - buf.append('}'); - return buf.toString(); - } - - protected void ensureCapacity(int newCapacity) { - int prime = Primes.findLeastPrimeNumber(newCapacity); - rehash(prime); - this._threshold = Math.round(prime * _loadFactor); - } - - private void rehash(final int newCapacity) { - int oldCapacity = _keys.length; - if (newCapacity <= oldCapacity) { - throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); - } - final int[] newkeys = new int[newCapacity]; - final float[] newValues = new float[newCapacity]; - final byte[] newStates = new byte[newCapacity]; - int used = 0; - for (int i = 0; i < oldCapacity; i++) { - if (_states[i] == FULL) { - used++; - int k = _keys[i]; - float v = _values[i]; - final int hash = keyHash(k); - int keyIdx = hash % newCapacity; - if (newStates[keyIdx] == FULL) {// second hashing - int decr = 1 + (hash % (newCapacity - 2)); - while (newStates[keyIdx] != FREE) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += newCapacity; - } - } - } - newkeys[keyIdx] = k; - newValues[keyIdx] = v; - newStates[keyIdx] = FULL; - } - } - this._keys = newkeys; - this._values = newValues; - this._states = newStates; - this._used = used; - } - - private static int keyHash(final int key) { - return key & 0x7fffffff; - } - - public void writeExternal(ObjectOutput out) throws IOException { - out.writeInt(_threshold); - out.writeInt(_used); - - out.writeInt(_keys.length); - IMapIterator i = entries(); - while (i.next() != -1) { - out.writeInt(i.getKey()); - out.writeFloat(i.getValue()); - } - } - - public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { - this._threshold = in.readInt(); - this._used = in.readInt(); - - int keylen = in.readInt(); - int[] keys = new int[keylen]; - float[] values = new float[keylen]; - byte[] states = new byte[keylen]; - for (int i = 0; i < _used; i++) { - int k = in.readInt(); - float v = in.readFloat(); - int hash = keyHash(k); - int keyIdx = hash % keylen; - if (states[keyIdx] != FREE) {// second hash - int decr = 1 + (hash % (keylen - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keylen; - } - if (states[keyIdx] == FREE) { - break; - } - } - } - states[keyIdx] = FULL; - keys[keyIdx] = k; - values[keyIdx] = v; - } - this._keys = keys; - this._values = values; - this._states = states; - } - - public interface IMapIterator { - - public boolean hasNext(); - - /** - * @return -1 if not found - */ - public int next(); - - public int getKey(); - - public float getValue(); - - } - - private final class MapIterator implements IMapIterator { - - int nextEntry; - int lastEntry = -1; - - MapIterator() { - this.nextEntry = nextEntry(0); - } - - /** find the index of next full entry */ - int nextEntry(int index) { - while (index < _keys.length && _states[index] != FULL) { - index++; - } - return index; - } - - public boolean hasNext() { - return nextEntry < _keys.length; - } - - public int next() { - if (!hasNext()) { - return -1; - } - int curEntry = nextEntry; - this.lastEntry = curEntry; - this.nextEntry = nextEntry(curEntry + 1); - return curEntry; - } - - public int getKey() { - if (lastEntry == -1) { - throw new IllegalStateException(); - } - return _keys[lastEntry]; - } - - public float getValue() { - if (lastEntry == -1) { - throw new IllegalStateException(); - } - return _values[lastEntry]; - } - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad15923a/core/src/main/java/hivemall/utils/collections/maps/Int2IntOpenHashTable.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/maps/Int2IntOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/maps/Int2IntOpenHashTable.java deleted file mode 100644 index 73431d1..0000000 --- a/core/src/main/java/hivemall/utils/collections/maps/Int2IntOpenHashTable.java +++ /dev/null @@ -1,422 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.collections.maps; - -import hivemall.utils.math.Primes; - -import java.io.Externalizable; -import java.io.IOException; -import java.io.ObjectInput; -import java.io.ObjectOutput; -import java.util.Arrays; - -/** - * An open-addressing hash table using double hashing. - * - * <pre> - * Primary hash function: h1(k) = k mod m - * Secondary hash function: h2(k) = 1 + (k mod(m-2)) - * </pre> - * - * @see http://en.wikipedia.org/wiki/Double_hashing - */ -public final class Int2IntOpenHashTable implements Externalizable { - - protected static final byte FREE = 0; - protected static final byte FULL = 1; - protected static final byte REMOVED = 2; - - private static final float DEFAULT_LOAD_FACTOR = 0.75f; - private static final float DEFAULT_GROW_FACTOR = 2.0f; - - protected final transient float _loadFactor; - protected final transient float _growFactor; - - protected int _used = 0; - protected int _threshold; - protected int defaultReturnValue = -1; - - protected int[] _keys; - protected int[] _values; - protected byte[] _states; - - protected Int2IntOpenHashTable(int size, float loadFactor, float growFactor, boolean forcePrime) { - if (size < 1) { - throw new IllegalArgumentException(); - } - this._loadFactor = loadFactor; - this._growFactor = growFactor; - int actualSize = forcePrime ? Primes.findLeastPrimeNumber(size) : size; - this._keys = new int[actualSize]; - this._values = new int[actualSize]; - this._states = new byte[actualSize]; - this._threshold = (int) (actualSize * _loadFactor); - } - - public Int2IntOpenHashTable(int size, int loadFactor, int growFactor) { - this(size, loadFactor, growFactor, true); - } - - public Int2IntOpenHashTable(int size) { - this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR, true); - } - - /** - * Only for {@link Externalizable} - */ - public Int2IntOpenHashTable() { - this._loadFactor = DEFAULT_LOAD_FACTOR; - this._growFactor = DEFAULT_GROW_FACTOR; - } - - public void defaultReturnValue(int v) { - this.defaultReturnValue = v; - } - - public boolean containsKey(final int key) { - return findKey(key) >= 0; - } - - /** - * @return -1.f if not found - */ - public int get(final int key) { - final int i = findKey(key); - if (i < 0) { - return defaultReturnValue; - } - return _values[i]; - } - - public int put(final int key, final int value) { - final int hash = keyHash(key); - int keyLength = _keys.length; - int keyIdx = hash % keyLength; - - boolean expanded = preAddEntry(keyIdx); - if (expanded) { - keyLength = _keys.length; - keyIdx = hash % keyLength; - } - - final int[] keys = _keys; - final int[] values = _values; - final byte[] states = _states; - - if (states[keyIdx] == FULL) {// double hashing - if (keys[keyIdx] == key) { - int old = values[keyIdx]; - values[keyIdx] = value; - return old; - } - // try second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - break; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - int old = values[keyIdx]; - values[keyIdx] = value; - return old; - } - } - } - keys[keyIdx] = key; - values[keyIdx] = value; - states[keyIdx] = FULL; - ++_used; - return defaultReturnValue; - } - - /** Return weather the required slot is free for new entry */ - protected boolean isFree(final int index, final int key) { - final byte stat = _states[index]; - if (stat == FREE) { - return true; - } - if (stat == REMOVED && _keys[index] == key) { - return true; - } - return false; - } - - /** @return expanded or not */ - protected boolean preAddEntry(final int index) { - if ((_used + 1) >= _threshold) {// too filled - int newCapacity = Math.round(_keys.length * _growFactor); - ensureCapacity(newCapacity); - return true; - } - return false; - } - - protected int findKey(final int key) { - final int[] keys = _keys; - final byte[] states = _states; - final int keyLength = keys.length; - - final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - // try second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - return -1; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - } - } - return -1; - } - - public int remove(final int key) { - final int[] keys = _keys; - final int[] values = _values; - final byte[] states = _states; - final int keyLength = keys.length; - - final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - int old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - // second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (states[keyIdx] == FREE) { - return defaultReturnValue; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - int old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - } - } - return defaultReturnValue; - } - - public int size() { - return _used; - } - - public void clear() { - Arrays.fill(_states, FREE); - this._used = 0; - } - - public IMapIterator entries() { - return new MapIterator(); - } - - @Override - public String toString() { - int len = size() * 10 + 2; - StringBuilder buf = new StringBuilder(len); - buf.append('{'); - IMapIterator i = entries(); - while (i.next() != -1) { - buf.append(i.getKey()); - buf.append('='); - buf.append(i.getValue()); - if (i.hasNext()) { - buf.append(','); - } - } - buf.append('}'); - return buf.toString(); - } - - protected void ensureCapacity(final int newCapacity) { - int prime = Primes.findLeastPrimeNumber(newCapacity); - rehash(prime); - this._threshold = Math.round(prime * _loadFactor); - } - - private void rehash(final int newCapacity) { - int oldCapacity = _keys.length; - if (newCapacity <= oldCapacity) { - throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); - } - final int[] newkeys = new int[newCapacity]; - final int[] newValues = new int[newCapacity]; - final byte[] newStates = new byte[newCapacity]; - int used = 0; - for (int i = 0; i < oldCapacity; i++) { - if (_states[i] == FULL) { - used++; - int k = _keys[i]; - int v = _values[i]; - int hash = keyHash(k); - int keyIdx = hash % newCapacity; - if (newStates[keyIdx] == FULL) {// second hashing - int decr = 1 + (hash % (newCapacity - 2)); - while (newStates[keyIdx] != FREE) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += newCapacity; - } - } - } - newkeys[keyIdx] = k; - newValues[keyIdx] = v; - newStates[keyIdx] = FULL; - } - } - this._keys = newkeys; - this._values = newValues; - this._states = newStates; - this._used = used; - } - - private static int keyHash(final int key) { - return key & 0x7fffffff; - } - - public void writeExternal(ObjectOutput out) throws IOException { - out.writeInt(_threshold); - out.writeInt(_used); - - out.writeInt(_keys.length); - IMapIterator i = entries(); - while (i.next() != -1) { - out.writeInt(i.getKey()); - out.writeInt(i.getValue()); - } - } - - public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { - this._threshold = in.readInt(); - this._used = in.readInt(); - - final int keylen = in.readInt(); - final int[] keys = new int[keylen]; - final int[] values = new int[keylen]; - final byte[] states = new byte[keylen]; - for (int i = 0; i < _used; i++) { - int k = in.readInt(); - int v = in.readInt(); - int hash = keyHash(k); - int keyIdx = hash % keylen; - if (states[keyIdx] != FREE) {// second hash - int decr = 1 + (hash % (keylen - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keylen; - } - if (states[keyIdx] == FREE) { - break; - } - } - } - states[keyIdx] = FULL; - keys[keyIdx] = k; - values[keyIdx] = v; - } - this._keys = keys; - this._values = values; - this._states = states; - } - - public interface IMapIterator { - - public boolean hasNext(); - - /** - * @return -1 if not found - */ - public int next(); - - public int getKey(); - - public int getValue(); - - } - - private final class MapIterator implements IMapIterator { - - int nextEntry; - int lastEntry = -1; - - MapIterator() { - this.nextEntry = nextEntry(0); - } - - /** find the index of next full entry */ - int nextEntry(int index) { - while (index < _keys.length && _states[index] != FULL) { - index++; - } - return index; - } - - public boolean hasNext() { - return nextEntry < _keys.length; - } - - public int next() { - if (!hasNext()) { - return -1; - } - int curEntry = nextEntry; - this.lastEntry = curEntry; - this.nextEntry = nextEntry(curEntry + 1); - return curEntry; - } - - public int getKey() { - if (lastEntry == -1) { - throw new IllegalStateException(); - } - return _keys[lastEntry]; - } - - public int getValue() { - if (lastEntry == -1) { - throw new IllegalStateException(); - } - return _values[lastEntry]; - } - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad15923a/core/src/main/java/hivemall/utils/collections/maps/Int2LongOpenHashMap.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/maps/Int2LongOpenHashMap.java b/core/src/main/java/hivemall/utils/collections/maps/Int2LongOpenHashMap.java deleted file mode 100644 index ffa80d0..0000000 --- a/core/src/main/java/hivemall/utils/collections/maps/Int2LongOpenHashMap.java +++ /dev/null @@ -1,346 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -// -// Copyright (C) 2010 catchpole.net -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -package hivemall.utils.collections.maps; - -import hivemall.utils.hashing.HashUtils; -import hivemall.utils.math.MathUtils; - -import java.util.Arrays; - -import javax.annotation.Nonnull; -import javax.annotation.concurrent.NotThreadSafe; - -/** - * A space efficient open-addressing HashMap implementation with integer keys and long values. - * - * Unlike {@link Int2LongOpenHashTable}, it maintains single arrays for keys and object references. - * - * It uses single open hashing arrays sized to binary powers (256, 512 etc) rather than those - * divisible by prime numbers. This allows the hash offset calculation to be a simple binary masking - * operation. - * - * The index into the arrays is determined by masking a portion of the key and shifting it to - * provide a series of small buckets within the array. To insert an entry the a sweep is searched - * until an empty key space is found. A sweep is 4 times the length of a bucket, to reduce the need - * to rehash. If no key space is found within a sweep, the table size is doubled. - * - * While performance is high, the slowest situation is where lookup occurs for entries that do not - * exist, as an entire sweep area must be searched. However, this HashMap is more space efficient - * than other open-addressing HashMap implementations as in fastutil. - */ -@NotThreadSafe -public final class Int2LongOpenHashMap { - - // special treatment for key=0 - private boolean hasKey0 = false; - private long value0 = 0L; - - private int[] keys; - private long[] values; - - // total number of entries in this table - private int size; - // number of bits for the value table (eg. 8 bits = 256 entries) - private int bits; - // the number of bits in each sweep zone. - private int sweepbits; - // the size of a sweep (2 to the power of sweepbits) - private int sweep; - // the sweepmask used to create sweep zone offsets - private int sweepmask; - - public Int2LongOpenHashMap(int size) { - resize(MathUtils.bitsRequired(size < 256 ? 256 : size)); - } - - public long put(final int key, final long value) { - if (key == 0) { - if (!hasKey0) { - this.hasKey0 = true; - size++; - } - long old = value0; - this.value0 = value; - return old; - } - - for (;;) { - int off = getBucketOffset(key); - final int end = off + sweep; - for (; off < end; off++) { - final int searchKey = keys[off]; - if (searchKey == 0) { // insert - keys[off] = key; - size++; - long previous = values[off]; - values[off] = value; - return previous; - } else if (searchKey == key) {// replace - long previous = values[off]; - values[off] = value; - return previous; - } - } - resize(this.bits + 1); - } - } - - public long putIfAbsent(final int key, final long value) { - if (key == 0) { - if (hasKey0) { - return value0; - } - this.hasKey0 = true; - long old = value0; - this.value0 = value; - size++; - return old; - } - - for (;;) { - int off = getBucketOffset(key); - final int end = off + sweep; - for (; off < end; off++) { - final int searchKey = keys[off]; - if (searchKey == 0) { // insert - keys[off] = key; - size++; - long previous = values[off]; - values[off] = value; - return previous; - } else if (searchKey == key) {// replace - return values[off]; - } - } - resize(this.bits + 1); - } - } - - public long get(final int key) { - return get(key, 0L); - } - - public long get(final int key, final long defaultValue) { - if (key == 0) { - return hasKey0 ? value0 : defaultValue; - } - - int off = getBucketOffset(key); - final int end = sweep + off; - for (; off < end; off++) { - if (keys[off] == key) { - return values[off]; - } - } - return defaultValue; - } - - public long remove(final int key, final long defaultValue) { - if (key == 0) { - if (hasKey0) { - this.hasKey0 = false; - long old = value0; - this.value0 = 0L; - size--; - return old; - } else { - return defaultValue; - } - } - - int off = getBucketOffset(key); - final int end = sweep + off; - for (; off < end; off++) { - if (keys[off] == key) { - keys[off] = 0; - long previous = values[off]; - values[off] = 0L; - size--; - return previous; - } - } - return defaultValue; - } - - public int size() { - return size; - } - - public boolean isEmpty() { - return size == 0; - } - - public boolean containsKey(final int key) { - if (key == 0) { - return hasKey0; - } - - int off = getBucketOffset(key); - final int end = sweep + off; - for (; off < end; off++) { - if (keys[off] == key) { - return true; - } - } - return false; - } - - public void clear() { - this.hasKey0 = false; - this.value0 = 0L; - Arrays.fill(keys, 0); - Arrays.fill(values, 0L); - this.size = 0; - } - - @Override - public String toString() { - return this.getClass().getSimpleName() + ' ' + size; - } - - private void resize(final int bits) { - this.bits = bits; - this.sweepbits = bits / 4; - this.sweep = MathUtils.powerOf(2, sweepbits) * 4; - this.sweepmask = MathUtils.bitMask(bits - sweepbits) << sweepbits; - - // remember old values so we can recreate the entries - final int[] existingKeys = this.keys; - final long[] existingValues = this.values; - - // create the arrays - this.values = new long[MathUtils.powerOf(2, bits) + sweep]; - this.keys = new int[values.length]; - this.size = hasKey0 ? 1 : 0; - - // re-add the previous entries if resizing - if (existingKeys != null) { - for (int i = 0; i < existingKeys.length; i++) { - final int k = existingKeys[i]; - if (k != 0) { - put(k, existingValues[i]); - } - } - } - } - - private int getBucketOffset(final int key) { - return (HashUtils.fnv1a(key) << sweepbits) & sweepmask; - } - - @Nonnull - public MapIterator entries() { - return new MapIterator(); - } - - public final class MapIterator { - - int nextEntry; - int lastEntry = -2; - - MapIterator() { - this.nextEntry = nextEntry(-1); - } - - /** find the index of next full entry */ - int nextEntry(int index) { - if (index == -1) { - if (hasKey0) { - return -1; - } else { - index = 0; - } - } - while (index < keys.length && keys[index] == 0) { - index++; - } - return index; - } - - public boolean hasNext() { - return nextEntry < keys.length; - } - - public boolean next() { - free(lastEntry); - if (!hasNext()) { - return false; - } - int curEntry = nextEntry; - this.lastEntry = curEntry; - this.nextEntry = nextEntry(curEntry + 1); - return true; - } - - public int getKey() { - if (lastEntry >= 0 && lastEntry < keys.length) { - return keys[lastEntry]; - } else if (lastEntry == -1) { - return 0; - } else { - throw new IllegalStateException( - "next() should be called before getKey(). lastEntry=" + lastEntry - + ", keys.length=" + keys.length); - } - } - - public long getValue() { - if (lastEntry >= 0 && lastEntry < keys.length) { - return values[lastEntry]; - } else if (lastEntry == -1) { - return value0; - } else { - throw new IllegalStateException( - "next() should be called before getKey(). lastEntry=" + lastEntry - + ", keys.length=" + keys.length); - } - } - - private void free(int index) { - if (index >= 0) { - if (index >= keys.length) { - throw new IllegalStateException("index=" + index + ", keys.length=" - + keys.length); - } - keys[index] = 0; - values[index] = 0L; - } else if (index == -1) { - hasKey0 = false; - value0 = 0L; - } - // index may be -2 - } - - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad15923a/core/src/main/java/hivemall/utils/collections/maps/Int2LongOpenHashTable.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/maps/Int2LongOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/maps/Int2LongOpenHashTable.java deleted file mode 100644 index 22acdb4..0000000 --- a/core/src/main/java/hivemall/utils/collections/maps/Int2LongOpenHashTable.java +++ /dev/null @@ -1,494 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.collections.maps; - -import hivemall.utils.codec.VariableByteCodec; -import hivemall.utils.codec.ZigZagLEB128Codec; -import hivemall.utils.math.Primes; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.Externalizable; -import java.io.IOException; -import java.io.ObjectInput; -import java.io.ObjectOutput; -import java.util.Arrays; - -import javax.annotation.Nonnull; - -/** - * An open-addressing hash table using double hashing. - * - * <pre> - * Primary hash function: h1(k) = k mod m - * Secondary hash function: h2(k) = 1 + (k mod(m-2)) - * </pre> - * - * @see http://en.wikipedia.org/wiki/Double_hashing - */ -public class Int2LongOpenHashTable implements Externalizable { - - protected static final byte FREE = 0; - protected static final byte FULL = 1; - protected static final byte REMOVED = 2; - - public static final int DEFAULT_SIZE = 65536; - public static final float DEFAULT_LOAD_FACTOR = 0.75f; - public static final float DEFAULT_GROW_FACTOR = 2.0f; - - protected final transient float _loadFactor; - protected final transient float _growFactor; - - protected int[] _keys; - protected long[] _values; - protected byte[] _states; - - protected int _used; - protected int _threshold; - protected long defaultReturnValue = -1L; - - /** - * Constructor for Externalizable. Should not be called otherwise. - */ - public Int2LongOpenHashTable() {// for Externalizable - this._loadFactor = DEFAULT_LOAD_FACTOR; - this._growFactor = DEFAULT_GROW_FACTOR; - } - - public Int2LongOpenHashTable(int size) { - this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR, true); - } - - public Int2LongOpenHashTable(int size, float loadFactor, float growFactor) { - this(size, loadFactor, growFactor, true); - } - - protected Int2LongOpenHashTable(int size, float loadFactor, float growFactor, boolean forcePrime) { - if (size < 1) { - throw new IllegalArgumentException(); - } - this._loadFactor = loadFactor; - this._growFactor = growFactor; - int actualSize = forcePrime ? Primes.findLeastPrimeNumber(size) : size; - this._keys = new int[actualSize]; - this._values = new long[actualSize]; - this._states = new byte[actualSize]; - this._used = 0; - this._threshold = (int) (actualSize * _loadFactor); - } - - public Int2LongOpenHashTable(@Nonnull int[] keys, @Nonnull long[] values, - @Nonnull byte[] states, int used) { - this._loadFactor = DEFAULT_LOAD_FACTOR; - this._growFactor = DEFAULT_GROW_FACTOR; - this._keys = keys; - this._values = values; - this._states = states; - this._used = used; - this._threshold = keys.length; - } - - @Nonnull - public static Int2LongOpenHashTable newInstance() { - return new Int2LongOpenHashTable(DEFAULT_SIZE); - } - - public void defaultReturnValue(long v) { - this.defaultReturnValue = v; - } - - @Nonnull - public int[] getKeys() { - return _keys; - } - - @Nonnull - public long[] getValues() { - return _values; - } - - @Nonnull - public byte[] getStates() { - return _states; - } - - public boolean containsKey(final int key) { - return findKey(key) >= 0; - } - - /** - * @return -1.f if not found - */ - public long get(final int key) { - final int i = findKey(key); - if (i < 0) { - return defaultReturnValue; - } - return _values[i]; - } - - public long put(final int key, final long value) { - final int hash = keyHash(key); - int keyLength = _keys.length; - int keyIdx = hash % keyLength; - - boolean expanded = preAddEntry(keyIdx); - if (expanded) { - keyLength = _keys.length; - keyIdx = hash % keyLength; - } - - final int[] keys = _keys; - final long[] values = _values; - final byte[] states = _states; - - if (states[keyIdx] == FULL) {// double hashing - if (keys[keyIdx] == key) { - long old = values[keyIdx]; - values[keyIdx] = value; - return old; - } - // try second hash - final int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - break; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - long old = values[keyIdx]; - values[keyIdx] = value; - return old; - } - } - } - keys[keyIdx] = key; - values[keyIdx] = value; - states[keyIdx] = FULL; - ++_used; - return defaultReturnValue; - } - - /** Return weather the required slot is free for new entry */ - protected boolean isFree(final int index, final int key) { - final byte stat = _states[index]; - if (stat == FREE) { - return true; - } - if (stat == REMOVED && _keys[index] == key) { - return true; - } - return false; - } - - /** @return expanded or not */ - protected boolean preAddEntry(final int index) { - if ((_used + 1) >= _threshold) {// too filled - int newCapacity = Math.round(_keys.length * _growFactor); - ensureCapacity(newCapacity); - return true; - } - return false; - } - - protected int findKey(final int key) { - final int[] keys = _keys; - final byte[] states = _states; - final int keyLength = keys.length; - - final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - // try second hash - final int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - return -1; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - } - } - return -1; - } - - public long remove(final int key) { - final int[] keys = _keys; - final long[] values = _values; - final byte[] states = _states; - final int keyLength = keys.length; - - final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - long old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - // second hash - final int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (states[keyIdx] == FREE) { - return defaultReturnValue; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - long old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - } - } - return defaultReturnValue; - } - - public int size() { - return _used; - } - - public int capacity() { - return _keys.length; - } - - public void clear() { - Arrays.fill(_states, FREE); - this._used = 0; - } - - @Nonnull - public MapIterator entries() { - return new MapIterator(); - } - - @Override - public String toString() { - int len = size() * 10 + 2; - final StringBuilder buf = new StringBuilder(len); - buf.append('{'); - final MapIterator itor = entries(); - while (itor.next() != -1) { - buf.append(itor.getKey()); - buf.append('='); - buf.append(itor.getValue()); - if (itor.hasNext()) { - buf.append(','); - } - } - buf.append('}'); - return buf.toString(); - } - - protected void ensureCapacity(final int newCapacity) { - int prime = Primes.findLeastPrimeNumber(newCapacity); - rehash(prime); - this._threshold = Math.round(prime * _loadFactor); - } - - private void rehash(final int newCapacity) { - int oldCapacity = _keys.length; - if (newCapacity <= oldCapacity) { - throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); - } - final int[] newkeys = new int[newCapacity]; - final long[] newValues = new long[newCapacity]; - final byte[] newStates = new byte[newCapacity]; - int used = 0; - for (int i = 0; i < oldCapacity; i++) { - if (_states[i] == FULL) { - used++; - final int k = _keys[i]; - final long v = _values[i]; - final int hash = keyHash(k); - int keyIdx = hash % newCapacity; - if (newStates[keyIdx] == FULL) {// second hashing - final int decr = 1 + (hash % (newCapacity - 2)); - while (newStates[keyIdx] != FREE) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += newCapacity; - } - } - } - newkeys[keyIdx] = k; - newValues[keyIdx] = v; - newStates[keyIdx] = FULL; - } - } - this._keys = newkeys; - this._values = newValues; - this._states = newStates; - this._used = used; - } - - private static int keyHash(final int key) { - return key & 0x7fffffff; - } - - @Override - public void writeExternal(ObjectOutput out) throws IOException { - out.writeInt(_threshold); - out.writeInt(_used); - - final int[] keys = _keys; - final int size = keys.length; - out.writeInt(size); - - final byte[] states = _states; - writeStates(states, out); - - final long[] values = _values; - for (int i = 0; i < size; i++) { - if (states[i] != FULL) { - continue; - } - ZigZagLEB128Codec.writeSignedInt(keys[i], out); - ZigZagLEB128Codec.writeSignedLong(values[i], out); - } - } - - @Nonnull - private static void writeStates(@Nonnull final byte[] status, @Nonnull final DataOutput out) - throws IOException { - // write empty states's indexes differentially - final int size = status.length; - int cardinarity = 0; - for (int i = 0; i < size; i++) { - if (status[i] != FULL) { - cardinarity++; - } - } - out.writeInt(cardinarity); - if (cardinarity == 0) { - return; - } - int prev = 0; - for (int i = 0; i < size; i++) { - if (status[i] != FULL) { - int diff = i - prev; - assert (diff >= 0); - VariableByteCodec.encodeUnsignedInt(diff, out); - prev = i; - } - } - } - - @Override - public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { - this._threshold = in.readInt(); - this._used = in.readInt(); - - final int size = in.readInt(); - final int[] keys = new int[size]; - final long[] values = new long[size]; - final byte[] states = new byte[size]; - readStates(in, states); - - for (int i = 0; i < size; i++) { - if (states[i] != FULL) { - continue; - } - keys[i] = ZigZagLEB128Codec.readSignedInt(in); - values[i] = ZigZagLEB128Codec.readSignedLong(in); - } - - this._keys = keys; - this._values = values; - this._states = states; - } - - @Nonnull - private static void readStates(@Nonnull final DataInput in, @Nonnull final byte[] status) - throws IOException { - // read non-empty states differentially - final int cardinarity = in.readInt(); - Arrays.fill(status, IntOpenHashTable.FULL); - int prev = 0; - for (int j = 0; j < cardinarity; j++) { - int i = VariableByteCodec.decodeUnsignedInt(in) + prev; - status[i] = IntOpenHashTable.FREE; - prev = i; - } - } - - public final class MapIterator { - - int nextEntry; - int lastEntry = -1; - - MapIterator() { - this.nextEntry = nextEntry(0); - } - - /** find the index of next full entry */ - int nextEntry(int index) { - while (index < _keys.length && _states[index] != FULL) { - index++; - } - return index; - } - - public boolean hasNext() { - return nextEntry < _keys.length; - } - - /** - * @return -1 if not found - */ - public int next() { - if (!hasNext()) { - return -1; - } - int curEntry = nextEntry; - this.lastEntry = curEntry; - this.nextEntry = nextEntry(curEntry + 1); - return curEntry; - } - - public int getKey() { - if (lastEntry == -1) { - throw new IllegalStateException(); - } - return _keys[lastEntry]; - } - - public long getValue() { - if (lastEntry == -1) { - throw new IllegalStateException(); - } - return _values[lastEntry]; - } - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad15923a/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashTable.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashTable.java deleted file mode 100644 index 1c90ae0..0000000 --- a/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashTable.java +++ /dev/null @@ -1,485 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.collections.maps; - -import hivemall.utils.math.Primes; - -import java.io.Externalizable; -import java.io.IOException; -import java.io.ObjectInput; -import java.io.ObjectOutput; -import java.util.Arrays; - -import javax.annotation.Nonnull; - -/** - * An open-addressing hash table using double hashing. - * - * <pre> - * Primary hash function: h1(k) = k mod m - * Secondary hash function: h2(k) = 1 + (k mod(m-2)) - * </pre> - * - * @see http://en.wikipedia.org/wiki/Double_hashing - */ -public final class IntOpenHashTable<V> implements Externalizable { - private static final long serialVersionUID = -8162355845665353513L; - - public static final float DEFAULT_LOAD_FACTOR = 0.75f; - public static final float DEFAULT_GROW_FACTOR = 2.0f; - - protected static final byte FREE = 0; - protected static final byte FULL = 1; - protected static final byte REMOVED = 2; - - protected/* final */float _loadFactor; - protected/* final */float _growFactor; - - protected int _used; - protected int _threshold; - - protected int[] _keys; - protected V[] _values; - protected byte[] _states; - - /** - * Only for {@link Externalizable} - */ - public IntOpenHashTable() {} - - public IntOpenHashTable(int size) { - this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR, true); - } - - public IntOpenHashTable(int size, float loadFactor, float growFactor) { - this(size, loadFactor, growFactor, true); - } - - @SuppressWarnings("unchecked") - protected IntOpenHashTable(int size, float loadFactor, float growFactor, boolean forcePrime) { - if (size < 1) { - throw new IllegalArgumentException(); - } - this._loadFactor = loadFactor; - this._growFactor = growFactor; - this._used = 0; - int actualSize = forcePrime ? Primes.findLeastPrimeNumber(size) : size; - this._threshold = Math.round(actualSize * _loadFactor); - this._keys = new int[actualSize]; - this._values = (V[]) new Object[actualSize]; - this._states = new byte[actualSize]; - } - - public IntOpenHashTable(@Nonnull int[] keys, @Nonnull V[] values, @Nonnull byte[] states, - int used) { - this._loadFactor = DEFAULT_LOAD_FACTOR; - this._growFactor = DEFAULT_GROW_FACTOR; - this._used = used; - this._threshold = keys.length; - this._keys = keys; - this._values = values; - this._states = states; - } - - @Nonnull - public int[] getKeys() { - return _keys; - } - - @Nonnull - public Object[] getValues() { - return _values; - } - - @Nonnull - public byte[] getStates() { - return _states; - } - - public boolean containsKey(final int key) { - return findKey(key) >= 0; - } - - public V get(final int key) { - final int i = findKey(key); - if (i < 0) { - return null; - } - return _values[i]; - } - - public V put(final int key, final V value) { - final int hash = keyHash(key); - int keyLength = _keys.length; - int keyIdx = hash % keyLength; - - final boolean expanded = preAddEntry(keyIdx); - if (expanded) { - keyLength = _keys.length; - keyIdx = hash % keyLength; - } - - final int[] keys = _keys; - final V[] values = _values; - final byte[] states = _states; - - if (states[keyIdx] == FULL) {// double hashing - if (keys[keyIdx] == key) { - V old = values[keyIdx]; - values[keyIdx] = value; - return old; - } - // try second hash - final int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - break; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - V old = values[keyIdx]; - values[keyIdx] = value; - return old; - } - } - } - keys[keyIdx] = key; - values[keyIdx] = value; - states[keyIdx] = FULL; - ++_used; - return null; - } - - public V putIfAbsent(final int key, final V value) { - final int hash = keyHash(key); - int keyLength = _keys.length; - int keyIdx = hash % keyLength; - - final boolean expanded = preAddEntry(keyIdx); - if (expanded) { - keyLength = _keys.length; - keyIdx = hash % keyLength; - } - - final int[] keys = _keys; - final V[] values = _values; - final byte[] states = _states; - - if (states[keyIdx] == FULL) {// second hashing - if (keys[keyIdx] == key) { - return values[keyIdx]; - } - // try second hash - final int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - break; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return values[keyIdx]; - } - } - } - keys[keyIdx] = key; - values[keyIdx] = value; - states[keyIdx] = FULL; - _used++; - return null; - } - - /** Return weather the required slot is free for new entry */ - protected boolean isFree(final int index, final int key) { - final byte stat = _states[index]; - if (stat == FREE) { - return true; - } - if (stat == REMOVED && _keys[index] == key) { - return true; - } - return false; - } - - /** @return expanded or not */ - protected boolean preAddEntry(final int index) { - if ((_used + 1) >= _threshold) {// too filled - int newCapacity = Math.round(_keys.length * _growFactor); - ensureCapacity(newCapacity); - return true; - } - return false; - } - - private int findKey(final int key) { - final int[] keys = _keys; - final byte[] states = _states; - final int keyLength = keys.length; - - final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - // try second hash - final int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - return -1; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - } - } - return -1; - } - - public V remove(final int key) { - final int[] keys = _keys; - final V[] values = _values; - final byte[] states = _states; - final int keyLength = keys.length; - - final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - V old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - // second hash - final int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (states[keyIdx] == FREE) { - return null; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - V old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - } - } - return null; - } - - @Nonnull - public IMapIterator<V> entries() { - return new MapIterator(); - } - - public int size() { - return _used; - } - - public int capacity() { - return _keys.length; - } - - public void clear() { - Arrays.fill(_states, FREE); - this._used = 0; - } - - @Override - public String toString() { - int len = size() * 10 + 2; - final StringBuilder buf = new StringBuilder(len); - buf.append('{'); - final IMapIterator<V> i = entries(); - while (i.next() != -1) { - buf.append(i.getKey()); - buf.append('='); - buf.append(i.getValue()); - if (i.hasNext()) { - buf.append(','); - } - } - buf.append('}'); - return buf.toString(); - } - - private void ensureCapacity(final int newCapacity) { - int prime = Primes.findLeastPrimeNumber(newCapacity); - rehash(prime); - this._threshold = Math.round(prime * _loadFactor); - } - - @SuppressWarnings("unchecked") - private void rehash(final int newCapacity) { - int oldCapacity = _keys.length; - if (newCapacity <= oldCapacity) { - throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); - } - final int[] oldKeys = _keys; - final V[] oldValues = _values; - final byte[] oldStates = _states; - final int[] newkeys = new int[newCapacity]; - final V[] newValues = (V[]) new Object[newCapacity]; - final byte[] newStates = new byte[newCapacity]; - int used = 0; - for (int i = 0; i < oldCapacity; i++) { - if (oldStates[i] == FULL) { - used++; - final int k = oldKeys[i]; - final V v = oldValues[i]; - final int hash = keyHash(k); - int keyIdx = hash % newCapacity; - if (newStates[keyIdx] == FULL) {// second hashing - int decr = 1 + (hash % (newCapacity - 2)); - while (newStates[keyIdx] != FREE) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += newCapacity; - } - } - } - newkeys[keyIdx] = k; - newValues[keyIdx] = v; - newStates[keyIdx] = FULL; - } - } - this._keys = newkeys; - this._values = newValues; - this._states = newStates; - this._used = used; - } - - private static int keyHash(final int key) { - return key & 0x7fffffff; - } - - @Override - public void writeExternal(@Nonnull final ObjectOutput out) throws IOException { - out.writeFloat(_loadFactor); - out.writeFloat(_growFactor); - out.writeInt(_used); - - final int size = _keys.length; - out.writeInt(size); - - for (int i = 0; i < size; i++) { - out.writeInt(_keys[i]); - out.writeObject(_values[i]); - out.writeByte(_states[i]); - } - } - - @SuppressWarnings("unchecked") - public void readExternal(@Nonnull final ObjectInput in) throws IOException, - ClassNotFoundException { - this._loadFactor = in.readFloat(); - this._growFactor = in.readFloat(); - this._used = in.readInt(); - - final int size = in.readInt(); - final int[] keys = new int[size]; - final Object[] values = new Object[size]; - final byte[] states = new byte[size]; - for (int i = 0; i < size; i++) { - keys[i] = in.readInt(); - values[i] = in.readObject(); - states[i] = in.readByte(); - } - this._threshold = size; - this._keys = keys; - this._values = (V[]) values; - this._states = states; - } - - public interface IMapIterator<V> { - - public boolean hasNext(); - - /** - * @return -1 if not found - */ - public int next(); - - public int getKey(); - - public V getValue(); - - } - - private final class MapIterator implements IMapIterator<V> { - - int nextEntry; - int lastEntry = -1; - - MapIterator() { - this.nextEntry = nextEntry(0); - } - - /** find the index of next full entry */ - int nextEntry(int index) { - while (index < _keys.length && _states[index] != FULL) { - index++; - } - return index; - } - - public boolean hasNext() { - return nextEntry < _keys.length; - } - - public int next() { - if (!hasNext()) { - return -1; - } - int curEntry = nextEntry; - this.lastEntry = curEntry; - this.nextEntry = nextEntry(curEntry + 1); - return curEntry; - } - - public int getKey() { - if (lastEntry == -1) { - throw new IllegalStateException(); - } - return _keys[lastEntry]; - } - - public V getValue() { - if (lastEntry == -1) { - throw new IllegalStateException(); - } - return _values[lastEntry]; - } - } - -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad15923a/core/src/main/java/hivemall/utils/collections/maps/LRUMap.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/maps/LRUMap.java b/core/src/main/java/hivemall/utils/collections/maps/LRUMap.java deleted file mode 100644 index 84679c7..0000000 --- a/core/src/main/java/hivemall/utils/collections/maps/LRUMap.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.collections.maps; - -import java.util.LinkedHashMap; -import java.util.Map; - -public class LRUMap<K, V> extends LinkedHashMap<K, V> { - private static final long serialVersionUID = -7708264099645977733L; - - private final int cacheSize; - - public LRUMap(int cacheSize) { - this(cacheSize, 0.75f, cacheSize); - } - - public LRUMap(int capacity, float loadFactor, int cacheSize) { - super(capacity, loadFactor, true); - this.cacheSize = cacheSize; - } - - protected boolean removeEldestEntry(Map.Entry<K, V> eldest) { - return size() > cacheSize; - } -}