http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/maps/Int2FloatOpenHashTable.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/maps/Int2FloatOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/maps/Int2FloatOpenHashTable.java new file mode 100644 index 0000000..f847b15 --- /dev/null +++ b/core/src/main/java/hivemall/utils/collections/maps/Int2FloatOpenHashTable.java @@ -0,0 +1,418 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.utils.collections.maps; + +import hivemall.utils.math.Primes; + +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.util.Arrays; + +/** + * An open-addressing hash table with double hashing + * + * @see http://en.wikipedia.org/wiki/Double_hashing + */ +public class Int2FloatOpenHashTable implements Externalizable { + + protected static final byte FREE = 0; + protected static final byte FULL = 1; + protected static final byte REMOVED = 2; + + private static final float DEFAULT_LOAD_FACTOR = 0.7f; + private static final float DEFAULT_GROW_FACTOR = 2.0f; + + protected final transient float _loadFactor; + protected final transient float _growFactor; + + protected int _used = 0; + protected int _threshold; + protected float defaultReturnValue = -1.f; + + protected int[] _keys; + protected float[] _values; + protected byte[] _states; + + protected Int2FloatOpenHashTable(int size, float loadFactor, float growFactor, + boolean forcePrime) { + if (size < 1) { + throw new IllegalArgumentException(); + } + this._loadFactor = loadFactor; + this._growFactor = growFactor; + int actualSize = forcePrime ? Primes.findLeastPrimeNumber(size) : size; + this._keys = new int[actualSize]; + this._values = new float[actualSize]; + this._states = new byte[actualSize]; + this._threshold = (int) (actualSize * _loadFactor); + } + + public Int2FloatOpenHashTable(int size, float loadFactor, float growFactor) { + this(size, loadFactor, growFactor, true); + } + + public Int2FloatOpenHashTable(int size) { + this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR, true); + } + + /** + * Only for {@link Externalizable} + */ + public Int2FloatOpenHashTable() {// required for serialization + this._loadFactor = DEFAULT_LOAD_FACTOR; + this._growFactor = DEFAULT_GROW_FACTOR; + } + + public void defaultReturnValue(float v) { + this.defaultReturnValue = v; + } + + public boolean containsKey(int key) { + return findKey(key) >= 0; + } + + /** + * @return -1.f if not found + */ + public float get(int key) { + int i = findKey(key); + if (i < 0) { + return defaultReturnValue; + } + return _values[i]; + } + + public float put(int key, float value) { + int hash = keyHash(key); + int keyLength = _keys.length; + int keyIdx = hash % keyLength; + + boolean expanded = preAddEntry(keyIdx); + if (expanded) { + keyLength = _keys.length; + keyIdx = hash % keyLength; + } + + int[] keys = _keys; + float[] values = _values; + byte[] states = _states; + + if (states[keyIdx] == FULL) {// double hashing + if (keys[keyIdx] == key) { + float old = values[keyIdx]; + values[keyIdx] = value; + return old; + } + // try second hash + int decr = 1 + (hash % (keyLength - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (isFree(keyIdx, key)) { + break; + } + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + float old = values[keyIdx]; + values[keyIdx] = value; + return old; + } + } + } + keys[keyIdx] = key; + values[keyIdx] = value; + states[keyIdx] = FULL; + ++_used; + return defaultReturnValue; + } + + /** Return weather the required slot is free for new entry */ + protected boolean isFree(int index, int key) { + byte stat = _states[index]; + if (stat == FREE) { + return true; + } + if (stat == REMOVED && _keys[index] == key) { + return true; + } + return false; + } + + /** @return expanded or not */ + protected boolean preAddEntry(int index) { + if ((_used + 1) >= _threshold) {// too filled + int newCapacity = Math.round(_keys.length * _growFactor); + ensureCapacity(newCapacity); + return true; + } + return false; + } + + protected int findKey(int key) { + int[] keys = _keys; + byte[] states = _states; + int keyLength = keys.length; + + int hash = keyHash(key); + int keyIdx = hash % keyLength; + if (states[keyIdx] != FREE) { + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + return keyIdx; + } + // try second hash + int decr = 1 + (hash % (keyLength - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (isFree(keyIdx, key)) { + return -1; + } + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + return keyIdx; + } + } + } + return -1; + } + + public float remove(int key) { + int[] keys = _keys; + float[] values = _values; + byte[] states = _states; + int keyLength = keys.length; + + int hash = keyHash(key); + int keyIdx = hash % keyLength; + if (states[keyIdx] != FREE) { + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + float old = values[keyIdx]; + states[keyIdx] = REMOVED; + --_used; + return old; + } + // second hash + int decr = 1 + (hash % (keyLength - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (states[keyIdx] == FREE) { + return defaultReturnValue; + } + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + float old = values[keyIdx]; + states[keyIdx] = REMOVED; + --_used; + return old; + } + } + } + return defaultReturnValue; + } + + public int size() { + return _used; + } + + public void clear() { + Arrays.fill(_states, FREE); + this._used = 0; + } + + public IMapIterator entries() { + return new MapIterator(); + } + + @Override + public String toString() { + int len = size() * 10 + 2; + StringBuilder buf = new StringBuilder(len); + buf.append('{'); + IMapIterator i = entries(); + while (i.next() != -1) { + buf.append(i.getKey()); + buf.append('='); + buf.append(i.getValue()); + if (i.hasNext()) { + buf.append(','); + } + } + buf.append('}'); + return buf.toString(); + } + + protected void ensureCapacity(int newCapacity) { + int prime = Primes.findLeastPrimeNumber(newCapacity); + rehash(prime); + this._threshold = Math.round(prime * _loadFactor); + } + + private void rehash(int newCapacity) { + int oldCapacity = _keys.length; + if (newCapacity <= oldCapacity) { + throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); + } + int[] newkeys = new int[newCapacity]; + float[] newValues = new float[newCapacity]; + byte[] newStates = new byte[newCapacity]; + int used = 0; + for (int i = 0; i < oldCapacity; i++) { + if (_states[i] == FULL) { + used++; + int k = _keys[i]; + float v = _values[i]; + int hash = keyHash(k); + int keyIdx = hash % newCapacity; + if (newStates[keyIdx] == FULL) {// second hashing + int decr = 1 + (hash % (newCapacity - 2)); + while (newStates[keyIdx] != FREE) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += newCapacity; + } + } + } + newkeys[keyIdx] = k; + newValues[keyIdx] = v; + newStates[keyIdx] = FULL; + } + } + this._keys = newkeys; + this._values = newValues; + this._states = newStates; + this._used = used; + } + + private static int keyHash(int key) { + return key & 0x7fffffff; + } + + public void writeExternal(ObjectOutput out) throws IOException { + out.writeInt(_threshold); + out.writeInt(_used); + + out.writeInt(_keys.length); + IMapIterator i = entries(); + while (i.next() != -1) { + out.writeInt(i.getKey()); + out.writeFloat(i.getValue()); + } + } + + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + this._threshold = in.readInt(); + this._used = in.readInt(); + + int keylen = in.readInt(); + int[] keys = new int[keylen]; + float[] values = new float[keylen]; + byte[] states = new byte[keylen]; + for (int i = 0; i < _used; i++) { + int k = in.readInt(); + float v = in.readFloat(); + int hash = keyHash(k); + int keyIdx = hash % keylen; + if (states[keyIdx] != FREE) {// second hash + int decr = 1 + (hash % (keylen - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keylen; + } + if (states[keyIdx] == FREE) { + break; + } + } + } + states[keyIdx] = FULL; + keys[keyIdx] = k; + values[keyIdx] = v; + } + this._keys = keys; + this._values = values; + this._states = states; + } + + public interface IMapIterator { + + public boolean hasNext(); + + /** + * @return -1 if not found + */ + public int next(); + + public int getKey(); + + public float getValue(); + + } + + private final class MapIterator implements IMapIterator { + + int nextEntry; + int lastEntry = -1; + + MapIterator() { + this.nextEntry = nextEntry(0); + } + + /** find the index of next full entry */ + int nextEntry(int index) { + while (index < _keys.length && _states[index] != FULL) { + index++; + } + return index; + } + + public boolean hasNext() { + return nextEntry < _keys.length; + } + + public int next() { + if (!hasNext()) { + return -1; + } + int curEntry = nextEntry; + this.lastEntry = curEntry; + this.nextEntry = nextEntry(curEntry + 1); + return curEntry; + } + + public int getKey() { + if (lastEntry == -1) { + throw new IllegalStateException(); + } + return _keys[lastEntry]; + } + + public float getValue() { + if (lastEntry == -1) { + throw new IllegalStateException(); + } + return _values[lastEntry]; + } + } +}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/maps/Int2IntOpenHashTable.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/maps/Int2IntOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/maps/Int2IntOpenHashTable.java new file mode 100644 index 0000000..5e9e812 --- /dev/null +++ b/core/src/main/java/hivemall/utils/collections/maps/Int2IntOpenHashTable.java @@ -0,0 +1,414 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.utils.collections.maps; + +import hivemall.utils.math.Primes; + +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.util.Arrays; + +/** + * An open-addressing hash table with double hashing + * + * @see http://en.wikipedia.org/wiki/Double_hashing + */ +public final class Int2IntOpenHashTable implements Externalizable { + + protected static final byte FREE = 0; + protected static final byte FULL = 1; + protected static final byte REMOVED = 2; + + private static final float DEFAULT_LOAD_FACTOR = 0.7f; + private static final float DEFAULT_GROW_FACTOR = 2.0f; + + protected final transient float _loadFactor; + protected final transient float _growFactor; + + protected int _used = 0; + protected int _threshold; + protected int defaultReturnValue = -1; + + protected int[] _keys; + protected int[] _values; + protected byte[] _states; + + protected Int2IntOpenHashTable(int size, float loadFactor, float growFactor, boolean forcePrime) { + if (size < 1) { + throw new IllegalArgumentException(); + } + this._loadFactor = loadFactor; + this._growFactor = growFactor; + int actualSize = forcePrime ? Primes.findLeastPrimeNumber(size) : size; + this._keys = new int[actualSize]; + this._values = new int[actualSize]; + this._states = new byte[actualSize]; + this._threshold = (int) (actualSize * _loadFactor); + } + + public Int2IntOpenHashTable(int size, int loadFactor, int growFactor) { + this(size, loadFactor, growFactor, true); + } + + public Int2IntOpenHashTable(int size) { + this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR, true); + } + + public Int2IntOpenHashTable() {// required for serialization + this._loadFactor = DEFAULT_LOAD_FACTOR; + this._growFactor = DEFAULT_GROW_FACTOR; + } + + public void defaultReturnValue(int v) { + this.defaultReturnValue = v; + } + + public boolean containsKey(final int key) { + return findKey(key) >= 0; + } + + /** + * @return -1.f if not found + */ + public int get(final int key) { + final int i = findKey(key); + if (i < 0) { + return defaultReturnValue; + } + return _values[i]; + } + + public int put(final int key, final int value) { + final int hash = keyHash(key); + int keyLength = _keys.length; + int keyIdx = hash % keyLength; + + boolean expanded = preAddEntry(keyIdx); + if (expanded) { + keyLength = _keys.length; + keyIdx = hash % keyLength; + } + + final int[] keys = _keys; + final int[] values = _values; + final byte[] states = _states; + + if (states[keyIdx] == FULL) {// double hashing + if (keys[keyIdx] == key) { + int old = values[keyIdx]; + values[keyIdx] = value; + return old; + } + // try second hash + int decr = 1 + (hash % (keyLength - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (isFree(keyIdx, key)) { + break; + } + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + int old = values[keyIdx]; + values[keyIdx] = value; + return old; + } + } + } + keys[keyIdx] = key; + values[keyIdx] = value; + states[keyIdx] = FULL; + ++_used; + return defaultReturnValue; + } + + /** Return weather the required slot is free for new entry */ + protected boolean isFree(final int index, final int key) { + final byte stat = _states[index]; + if (stat == FREE) { + return true; + } + if (stat == REMOVED && _keys[index] == key) { + return true; + } + return false; + } + + /** @return expanded or not */ + protected boolean preAddEntry(final int index) { + if ((_used + 1) >= _threshold) {// too filled + int newCapacity = Math.round(_keys.length * _growFactor); + ensureCapacity(newCapacity); + return true; + } + return false; + } + + protected int findKey(final int key) { + final int[] keys = _keys; + final byte[] states = _states; + final int keyLength = keys.length; + + final int hash = keyHash(key); + int keyIdx = hash % keyLength; + if (states[keyIdx] != FREE) { + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + return keyIdx; + } + // try second hash + int decr = 1 + (hash % (keyLength - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (isFree(keyIdx, key)) { + return -1; + } + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + return keyIdx; + } + } + } + return -1; + } + + public int remove(final int key) { + final int[] keys = _keys; + final int[] values = _values; + final byte[] states = _states; + final int keyLength = keys.length; + + final int hash = keyHash(key); + int keyIdx = hash % keyLength; + if (states[keyIdx] != FREE) { + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + int old = values[keyIdx]; + states[keyIdx] = REMOVED; + --_used; + return old; + } + // second hash + int decr = 1 + (hash % (keyLength - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (states[keyIdx] == FREE) { + return defaultReturnValue; + } + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + int old = values[keyIdx]; + states[keyIdx] = REMOVED; + --_used; + return old; + } + } + } + return defaultReturnValue; + } + + public int size() { + return _used; + } + + public void clear() { + Arrays.fill(_states, FREE); + this._used = 0; + } + + public IMapIterator entries() { + return new MapIterator(); + } + + @Override + public String toString() { + int len = size() * 10 + 2; + StringBuilder buf = new StringBuilder(len); + buf.append('{'); + IMapIterator i = entries(); + while (i.next() != -1) { + buf.append(i.getKey()); + buf.append('='); + buf.append(i.getValue()); + if (i.hasNext()) { + buf.append(','); + } + } + buf.append('}'); + return buf.toString(); + } + + protected void ensureCapacity(final int newCapacity) { + int prime = Primes.findLeastPrimeNumber(newCapacity); + rehash(prime); + this._threshold = Math.round(prime * _loadFactor); + } + + private void rehash(final int newCapacity) { + int oldCapacity = _keys.length; + if (newCapacity <= oldCapacity) { + throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); + } + final int[] newkeys = new int[newCapacity]; + final int[] newValues = new int[newCapacity]; + final byte[] newStates = new byte[newCapacity]; + int used = 0; + for (int i = 0; i < oldCapacity; i++) { + if (_states[i] == FULL) { + used++; + int k = _keys[i]; + int v = _values[i]; + int hash = keyHash(k); + int keyIdx = hash % newCapacity; + if (newStates[keyIdx] == FULL) {// second hashing + int decr = 1 + (hash % (newCapacity - 2)); + while (newStates[keyIdx] != FREE) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += newCapacity; + } + } + } + newkeys[keyIdx] = k; + newValues[keyIdx] = v; + newStates[keyIdx] = FULL; + } + } + this._keys = newkeys; + this._values = newValues; + this._states = newStates; + this._used = used; + } + + private static int keyHash(final int key) { + return key & 0x7fffffff; + } + + public void writeExternal(ObjectOutput out) throws IOException { + out.writeInt(_threshold); + out.writeInt(_used); + + out.writeInt(_keys.length); + IMapIterator i = entries(); + while (i.next() != -1) { + out.writeInt(i.getKey()); + out.writeInt(i.getValue()); + } + } + + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + this._threshold = in.readInt(); + this._used = in.readInt(); + + final int keylen = in.readInt(); + final int[] keys = new int[keylen]; + final int[] values = new int[keylen]; + final byte[] states = new byte[keylen]; + for (int i = 0; i < _used; i++) { + int k = in.readInt(); + int v = in.readInt(); + int hash = keyHash(k); + int keyIdx = hash % keylen; + if (states[keyIdx] != FREE) {// second hash + int decr = 1 + (hash % (keylen - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keylen; + } + if (states[keyIdx] == FREE) { + break; + } + } + } + states[keyIdx] = FULL; + keys[keyIdx] = k; + values[keyIdx] = v; + } + this._keys = keys; + this._values = values; + this._states = states; + } + + public interface IMapIterator { + + public boolean hasNext(); + + /** + * @return -1 if not found + */ + public int next(); + + public int getKey(); + + public int getValue(); + + } + + private final class MapIterator implements IMapIterator { + + int nextEntry; + int lastEntry = -1; + + MapIterator() { + this.nextEntry = nextEntry(0); + } + + /** find the index of next full entry */ + int nextEntry(int index) { + while (index < _keys.length && _states[index] != FULL) { + index++; + } + return index; + } + + public boolean hasNext() { + return nextEntry < _keys.length; + } + + public int next() { + if (!hasNext()) { + return -1; + } + int curEntry = nextEntry; + this.lastEntry = curEntry; + this.nextEntry = nextEntry(curEntry + 1); + return curEntry; + } + + public int getKey() { + if (lastEntry == -1) { + throw new IllegalStateException(); + } + return _keys[lastEntry]; + } + + public int getValue() { + if (lastEntry == -1) { + throw new IllegalStateException(); + } + return _values[lastEntry]; + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/maps/Int2LongOpenHashTable.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/maps/Int2LongOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/maps/Int2LongOpenHashTable.java new file mode 100644 index 0000000..68eb42f --- /dev/null +++ b/core/src/main/java/hivemall/utils/collections/maps/Int2LongOpenHashTable.java @@ -0,0 +1,500 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.utils.collections.maps; + +import hivemall.utils.codec.VariableByteCodec; +import hivemall.utils.codec.ZigZagLEB128Codec; +import hivemall.utils.math.Primes; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.util.Arrays; + +import javax.annotation.Nonnull; + +/** + * An open-addressing hash table with double hashing + * + * @see http://en.wikipedia.org/wiki/Double_hashing + */ +public class Int2LongOpenHashTable implements Externalizable { + + protected static final byte FREE = 0; + protected static final byte FULL = 1; + protected static final byte REMOVED = 2; + + public static final int DEFAULT_SIZE = 65536; + public static final float DEFAULT_LOAD_FACTOR = 0.7f; + public static final float DEFAULT_GROW_FACTOR = 2.0f; + + protected final transient float _loadFactor; + protected final transient float _growFactor; + + protected int[] _keys; + protected long[] _values; + protected byte[] _states; + + protected int _used; + protected int _threshold; + protected long defaultReturnValue = -1L; + + /** + * Constructor for Externalizable. Should not be called otherwise. + */ + public Int2LongOpenHashTable() {// for Externalizable + this._loadFactor = DEFAULT_LOAD_FACTOR; + this._growFactor = DEFAULT_GROW_FACTOR; + } + + public Int2LongOpenHashTable(int size) { + this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR, true); + } + + public Int2LongOpenHashTable(int size, float loadFactor, float growFactor) { + this(size, loadFactor, growFactor, true); + } + + protected Int2LongOpenHashTable(int size, float loadFactor, float growFactor, boolean forcePrime) { + if (size < 1) { + throw new IllegalArgumentException(); + } + this._loadFactor = loadFactor; + this._growFactor = growFactor; + int actualSize = forcePrime ? Primes.findLeastPrimeNumber(size) : size; + this._keys = new int[actualSize]; + this._values = new long[actualSize]; + this._states = new byte[actualSize]; + this._used = 0; + this._threshold = (int) (actualSize * _loadFactor); + } + + public Int2LongOpenHashTable(@Nonnull int[] keys, @Nonnull long[] values, + @Nonnull byte[] states, int used) { + this._loadFactor = DEFAULT_LOAD_FACTOR; + this._growFactor = DEFAULT_GROW_FACTOR; + this._keys = keys; + this._values = values; + this._states = states; + this._used = used; + this._threshold = keys.length; + } + + @Nonnull + public static Int2LongOpenHashTable newInstance() { + return new Int2LongOpenHashTable(DEFAULT_SIZE); + } + + public void defaultReturnValue(long v) { + this.defaultReturnValue = v; + } + + @Nonnull + public int[] getKeys() { + return _keys; + } + + @Nonnull + public long[] getValues() { + return _values; + } + + @Nonnull + public byte[] getStates() { + return _states; + } + + public boolean containsKey(int key) { + return findKey(key) >= 0; + } + + /** + * @return -1.f if not found + */ + public long get(int key) { + int i = findKey(key); + if (i < 0) { + return defaultReturnValue; + } + return _values[i]; + } + + public long put(int key, long value) { + int hash = keyHash(key); + int keyLength = _keys.length; + int keyIdx = hash % keyLength; + + boolean expanded = preAddEntry(keyIdx); + if (expanded) { + keyLength = _keys.length; + keyIdx = hash % keyLength; + } + + int[] keys = _keys; + long[] values = _values; + byte[] states = _states; + + if (states[keyIdx] == FULL) {// double hashing + if (keys[keyIdx] == key) { + long old = values[keyIdx]; + values[keyIdx] = value; + return old; + } + // try second hash + int decr = 1 + (hash % (keyLength - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (isFree(keyIdx, key)) { + break; + } + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + long old = values[keyIdx]; + values[keyIdx] = value; + return old; + } + } + } + keys[keyIdx] = key; + values[keyIdx] = value; + states[keyIdx] = FULL; + ++_used; + return defaultReturnValue; + } + + /** Return weather the required slot is free for new entry */ + protected boolean isFree(int index, int key) { + byte stat = _states[index]; + if (stat == FREE) { + return true; + } + if (stat == REMOVED && _keys[index] == key) { + return true; + } + return false; + } + + /** @return expanded or not */ + protected boolean preAddEntry(int index) { + if ((_used + 1) >= _threshold) {// too filled + int newCapacity = Math.round(_keys.length * _growFactor); + ensureCapacity(newCapacity); + return true; + } + return false; + } + + protected int findKey(int key) { + int[] keys = _keys; + byte[] states = _states; + int keyLength = keys.length; + + int hash = keyHash(key); + int keyIdx = hash % keyLength; + if (states[keyIdx] != FREE) { + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + return keyIdx; + } + // try second hash + int decr = 1 + (hash % (keyLength - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (isFree(keyIdx, key)) { + return -1; + } + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + return keyIdx; + } + } + } + return -1; + } + + public long remove(int key) { + int[] keys = _keys; + long[] values = _values; + byte[] states = _states; + int keyLength = keys.length; + + int hash = keyHash(key); + int keyIdx = hash % keyLength; + if (states[keyIdx] != FREE) { + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + long old = values[keyIdx]; + states[keyIdx] = REMOVED; + --_used; + return old; + } + // second hash + int decr = 1 + (hash % (keyLength - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (states[keyIdx] == FREE) { + return defaultReturnValue; + } + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + long old = values[keyIdx]; + states[keyIdx] = REMOVED; + --_used; + return old; + } + } + } + return defaultReturnValue; + } + + public int size() { + return _used; + } + + public int capacity() { + return _keys.length; + } + + public void clear() { + Arrays.fill(_states, FREE); + this._used = 0; + } + + public IMapIterator entries() { + return new MapIterator(); + } + + @Override + public String toString() { + int len = size() * 10 + 2; + StringBuilder buf = new StringBuilder(len); + buf.append('{'); + IMapIterator i = entries(); + while (i.next() != -1) { + buf.append(i.getKey()); + buf.append('='); + buf.append(i.getValue()); + if (i.hasNext()) { + buf.append(','); + } + } + buf.append('}'); + return buf.toString(); + } + + protected void ensureCapacity(int newCapacity) { + int prime = Primes.findLeastPrimeNumber(newCapacity); + rehash(prime); + this._threshold = Math.round(prime * _loadFactor); + } + + private void rehash(int newCapacity) { + int oldCapacity = _keys.length; + if (newCapacity <= oldCapacity) { + throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); + } + int[] newkeys = new int[newCapacity]; + long[] newValues = new long[newCapacity]; + byte[] newStates = new byte[newCapacity]; + int used = 0; + for (int i = 0; i < oldCapacity; i++) { + if (_states[i] == FULL) { + used++; + int k = _keys[i]; + long v = _values[i]; + int hash = keyHash(k); + int keyIdx = hash % newCapacity; + if (newStates[keyIdx] == FULL) {// second hashing + int decr = 1 + (hash % (newCapacity - 2)); + while (newStates[keyIdx] != FREE) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += newCapacity; + } + } + } + newkeys[keyIdx] = k; + newValues[keyIdx] = v; + newStates[keyIdx] = FULL; + } + } + this._keys = newkeys; + this._values = newValues; + this._states = newStates; + this._used = used; + } + + private static int keyHash(int key) { + return key & 0x7fffffff; + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + out.writeInt(_threshold); + out.writeInt(_used); + + final int[] keys = _keys; + final int size = keys.length; + out.writeInt(size); + + final byte[] states = _states; + writeStates(states, out); + + final long[] values = _values; + for (int i = 0; i < size; i++) { + if (states[i] != FULL) { + continue; + } + ZigZagLEB128Codec.writeSignedInt(keys[i], out); + ZigZagLEB128Codec.writeSignedLong(values[i], out); + } + } + + @Nonnull + private static void writeStates(@Nonnull final byte[] status, @Nonnull final DataOutput out) + throws IOException { + // write empty states's indexes differentially + final int size = status.length; + int cardinarity = 0; + for (int i = 0; i < size; i++) { + if (status[i] != FULL) { + cardinarity++; + } + } + out.writeInt(cardinarity); + if (cardinarity == 0) { + return; + } + int prev = 0; + for (int i = 0; i < size; i++) { + if (status[i] != FULL) { + int diff = i - prev; + assert (diff >= 0); + VariableByteCodec.encodeUnsignedInt(diff, out); + prev = i; + } + } + } + + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + this._threshold = in.readInt(); + this._used = in.readInt(); + + final int size = in.readInt(); + final int[] keys = new int[size]; + final long[] values = new long[size]; + final byte[] states = new byte[size]; + readStates(in, states); + + for (int i = 0; i < size; i++) { + if (states[i] != FULL) { + continue; + } + keys[i] = ZigZagLEB128Codec.readSignedInt(in); + values[i] = ZigZagLEB128Codec.readSignedLong(in); + } + + this._keys = keys; + this._values = values; + this._states = states; + } + + @Nonnull + private static void readStates(@Nonnull final DataInput in, @Nonnull final byte[] status) + throws IOException { + // read non-empty states differentially + final int cardinarity = in.readInt(); + Arrays.fill(status, IntOpenHashTable.FULL); + int prev = 0; + for (int j = 0; j < cardinarity; j++) { + int i = VariableByteCodec.decodeUnsignedInt(in) + prev; + status[i] = IntOpenHashTable.FREE; + prev = i; + } + } + + public interface IMapIterator { + + public boolean hasNext(); + + /** + * @return -1 if not found + */ + public int next(); + + public int getKey(); + + public long getValue(); + + } + + private final class MapIterator implements IMapIterator { + + int nextEntry; + int lastEntry = -1; + + MapIterator() { + this.nextEntry = nextEntry(0); + } + + /** find the index of next full entry */ + int nextEntry(int index) { + while (index < _keys.length && _states[index] != FULL) { + index++; + } + return index; + } + + public boolean hasNext() { + return nextEntry < _keys.length; + } + + public int next() { + if (!hasNext()) { + return -1; + } + int curEntry = nextEntry; + this.lastEntry = curEntry; + this.nextEntry = nextEntry(curEntry + 1); + return curEntry; + } + + public int getKey() { + if (lastEntry == -1) { + throw new IllegalStateException(); + } + return _keys[lastEntry]; + } + + public long getValue() { + if (lastEntry == -1) { + throw new IllegalStateException(); + } + return _values[lastEntry]; + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashMap.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashMap.java b/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashMap.java new file mode 100644 index 0000000..d7ae8d6 --- /dev/null +++ b/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashMap.java @@ -0,0 +1,467 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.utils.collections.maps; + +import hivemall.utils.math.Primes; + +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.util.Arrays; + +/** + * An open-addressing hash table with double hashing + * + * @see http://en.wikipedia.org/wiki/Double_hashing + */ +public class IntOpenHashMap<V> implements Externalizable { + private static final long serialVersionUID = -8162355845665353513L; + + protected static final byte FREE = 0; + protected static final byte FULL = 1; + protected static final byte REMOVED = 2; + + private static final float DEFAULT_LOAD_FACTOR = 0.7f; + private static final float DEFAULT_GROW_FACTOR = 2.0f; + + protected final transient float _loadFactor; + protected final transient float _growFactor; + + protected int _used = 0; + protected int _threshold; + + protected int[] _keys; + protected V[] _values; + protected byte[] _states; + + @SuppressWarnings("unchecked") + protected IntOpenHashMap(int size, float loadFactor, float growFactor, boolean forcePrime) { + if (size < 1) { + throw new IllegalArgumentException(); + } + this._loadFactor = loadFactor; + this._growFactor = growFactor; + int actualSize = forcePrime ? Primes.findLeastPrimeNumber(size) : size; + this._keys = new int[actualSize]; + this._values = (V[]) new Object[actualSize]; + this._states = new byte[actualSize]; + this._threshold = Math.round(actualSize * _loadFactor); + } + + public IntOpenHashMap(int size, float loadFactor, float growFactor) { + this(size, loadFactor, growFactor, true); + } + + public IntOpenHashMap(int size) { + this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR, true); + } + + public IntOpenHashMap() {// required for serialization + this._loadFactor = DEFAULT_LOAD_FACTOR; + this._growFactor = DEFAULT_GROW_FACTOR; + } + + public boolean containsKey(int key) { + return findKey(key) >= 0; + } + + public final V get(final int key) { + final int i = findKey(key); + if (i < 0) { + return null; + } + recordAccess(i); + return _values[i]; + } + + public V put(final int key, final V value) { + final int hash = keyHash(key); + int keyLength = _keys.length; + int keyIdx = hash % keyLength; + + final boolean expanded = preAddEntry(keyIdx); + if (expanded) { + keyLength = _keys.length; + keyIdx = hash % keyLength; + } + + final int[] keys = _keys; + final V[] values = _values; + final byte[] states = _states; + + if (states[keyIdx] == FULL) {// double hashing + if (keys[keyIdx] == key) { + V old = values[keyIdx]; + values[keyIdx] = value; + recordAccess(keyIdx); + return old; + } + // try second hash + final int decr = 1 + (hash % (keyLength - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (isFree(keyIdx, key)) { + break; + } + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + V old = values[keyIdx]; + values[keyIdx] = value; + recordAccess(keyIdx); + return old; + } + } + } + keys[keyIdx] = key; + values[keyIdx] = value; + states[keyIdx] = FULL; + ++_used; + postAddEntry(keyIdx); + return null; + } + + public V putIfAbsent(final int key, final V value) { + final int hash = keyHash(key); + int keyLength = _keys.length; + int keyIdx = hash % keyLength; + + final boolean expanded = preAddEntry(keyIdx); + if (expanded) { + keyLength = _keys.length; + keyIdx = hash % keyLength; + } + + final int[] keys = _keys; + final V[] values = _values; + final byte[] states = _states; + + if (states[keyIdx] == FULL) {// second hashing + if (keys[keyIdx] == key) { + return values[keyIdx]; + } + // try second hash + final int decr = 1 + (hash % (keyLength - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (isFree(keyIdx, key)) { + break; + } + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + return values[keyIdx]; + } + } + } + keys[keyIdx] = key; + values[keyIdx] = value; + states[keyIdx] = FULL; + _used++; + postAddEntry(keyIdx); + return null; + } + + /** Return weather the required slot is free for new entry */ + protected boolean isFree(int index, int key) { + byte stat = _states[index]; + if (stat == FREE) { + return true; + } + if (stat == REMOVED && _keys[index] == key) { + return true; + } + return false; + } + + /** @return expanded or not */ + protected boolean preAddEntry(int index) { + if ((_used + 1) >= _threshold) {// too filled + int newCapacity = Math.round(_keys.length * _growFactor); + ensureCapacity(newCapacity); + return true; + } + return false; + } + + protected void postAddEntry(int index) {} + + private int findKey(int key) { + int[] keys = _keys; + byte[] states = _states; + int keyLength = keys.length; + + int hash = keyHash(key); + int keyIdx = hash % keyLength; + if (states[keyIdx] != FREE) { + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + return keyIdx; + } + // try second hash + int decr = 1 + (hash % (keyLength - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (isFree(keyIdx, key)) { + return -1; + } + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + return keyIdx; + } + } + } + return -1; + } + + public V remove(int key) { + int[] keys = _keys; + V[] values = _values; + byte[] states = _states; + int keyLength = keys.length; + + int hash = keyHash(key); + int keyIdx = hash % keyLength; + if (states[keyIdx] != FREE) { + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + V old = values[keyIdx]; + states[keyIdx] = REMOVED; + --_used; + recordRemoval(keyIdx); + return old; + } + // second hash + int decr = 1 + (hash % (keyLength - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (states[keyIdx] == FREE) { + return null; + } + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + V old = values[keyIdx]; + states[keyIdx] = REMOVED; + --_used; + recordRemoval(keyIdx); + return old; + } + } + } + return null; + } + + public int size() { + return _used; + } + + public void clear() { + Arrays.fill(_states, FREE); + this._used = 0; + } + + @SuppressWarnings("unchecked") + public IMapIterator<V> entries() { + return new MapIterator(); + } + + @Override + public String toString() { + int len = size() * 10 + 2; + StringBuilder buf = new StringBuilder(len); + buf.append('{'); + IMapIterator<V> i = entries(); + while (i.next() != -1) { + buf.append(i.getKey()); + buf.append('='); + buf.append(i.getValue()); + if (i.hasNext()) { + buf.append(','); + } + } + buf.append('}'); + return buf.toString(); + } + + private void ensureCapacity(int newCapacity) { + int prime = Primes.findLeastPrimeNumber(newCapacity); + rehash(prime); + this._threshold = Math.round(prime * _loadFactor); + } + + @SuppressWarnings("unchecked") + protected void rehash(int newCapacity) { + int oldCapacity = _keys.length; + if (newCapacity <= oldCapacity) { + throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); + } + final int[] oldKeys = _keys; + final V[] oldValues = _values; + final byte[] oldStates = _states; + int[] newkeys = new int[newCapacity]; + V[] newValues = (V[]) new Object[newCapacity]; + byte[] newStates = new byte[newCapacity]; + int used = 0; + for (int i = 0; i < oldCapacity; i++) { + if (oldStates[i] == FULL) { + used++; + int k = oldKeys[i]; + V v = oldValues[i]; + int hash = keyHash(k); + int keyIdx = hash % newCapacity; + if (newStates[keyIdx] == FULL) {// second hashing + int decr = 1 + (hash % (newCapacity - 2)); + while (newStates[keyIdx] != FREE) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += newCapacity; + } + } + } + newkeys[keyIdx] = k; + newValues[keyIdx] = v; + newStates[keyIdx] = FULL; + } + } + this._keys = newkeys; + this._values = newValues; + this._states = newStates; + this._used = used; + } + + private static int keyHash(int key) { + return key & 0x7fffffff; + } + + protected void recordAccess(int idx) {}; + + protected void recordRemoval(int idx) {}; + + public void writeExternal(ObjectOutput out) throws IOException { + out.writeInt(_threshold); + out.writeInt(_used); + + out.writeInt(_keys.length); + IMapIterator<V> i = entries(); + while (i.next() != -1) { + out.writeInt(i.getKey()); + out.writeObject(i.getValue()); + } + } + + @SuppressWarnings("unchecked") + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + this._threshold = in.readInt(); + this._used = in.readInt(); + + int keylen = in.readInt(); + int[] keys = new int[keylen]; + V[] values = (V[]) new Object[keylen]; + byte[] states = new byte[keylen]; + for (int i = 0; i < _used; i++) { + int k = in.readInt(); + V v = (V) in.readObject(); + int hash = keyHash(k); + int keyIdx = hash % keylen; + if (states[keyIdx] != FREE) {// second hash + int decr = 1 + (hash % (keylen - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keylen; + } + if (states[keyIdx] == FREE) { + break; + } + } + } + states[keyIdx] = FULL; + keys[keyIdx] = k; + values[keyIdx] = v; + } + this._keys = keys; + this._values = values; + this._states = states; + } + + public interface IMapIterator<V> { + + public boolean hasNext(); + + public int next(); + + public int getKey(); + + public V getValue(); + + } + + @SuppressWarnings("rawtypes") + private final class MapIterator implements IMapIterator { + + int nextEntry; + int lastEntry = -1; + + MapIterator() { + this.nextEntry = nextEntry(0); + } + + /** find the index of next full entry */ + int nextEntry(int index) { + while (index < _keys.length && _states[index] != FULL) { + index++; + } + return index; + } + + public boolean hasNext() { + return nextEntry < _keys.length; + } + + public int next() { + if (!hasNext()) { + return -1; + } + int curEntry = nextEntry; + this.lastEntry = curEntry; + this.nextEntry = nextEntry(curEntry + 1); + return curEntry; + } + + public int getKey() { + if (lastEntry == -1) { + throw new IllegalStateException(); + } + return _keys[lastEntry]; + } + + public V getValue() { + if (lastEntry == -1) { + throw new IllegalStateException(); + } + return _values[lastEntry]; + } + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashTable.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashTable.java new file mode 100644 index 0000000..dcb64d1 --- /dev/null +++ b/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashTable.java @@ -0,0 +1,404 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.utils.collections.maps; + +import hivemall.utils.math.Primes; + +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.util.Arrays; +import java.util.HashMap; + +import javax.annotation.Nonnull; + +/** + * An open-addressing hash table with double-hashing that requires less memory to {@link HashMap}. + */ +public final class IntOpenHashTable<V> implements Externalizable { + + public static final float DEFAULT_LOAD_FACTOR = 0.7f; + public static final float DEFAULT_GROW_FACTOR = 2.0f; + + public static final byte FREE = 0; + public static final byte FULL = 1; + public static final byte REMOVED = 2; + + protected/* final */float _loadFactor; + protected/* final */float _growFactor; + + protected int _used = 0; + protected int _threshold; + + protected int[] _keys; + protected V[] _values; + protected byte[] _states; + + public IntOpenHashTable() {} // for Externalizable + + public IntOpenHashTable(int size) { + this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR); + } + + @SuppressWarnings("unchecked") + public IntOpenHashTable(int size, float loadFactor, float growFactor) { + if (size < 1) { + throw new IllegalArgumentException(); + } + this._loadFactor = loadFactor; + this._growFactor = growFactor; + int actualSize = Primes.findLeastPrimeNumber(size); + this._keys = new int[actualSize]; + this._values = (V[]) new Object[actualSize]; + this._states = new byte[actualSize]; + this._threshold = Math.round(actualSize * _loadFactor); + } + + public IntOpenHashTable(@Nonnull int[] keys, @Nonnull V[] values, @Nonnull byte[] states, + int used) { + this._used = used; + this._threshold = keys.length; + this._keys = keys; + this._values = values; + this._states = states; + } + + public int[] getKeys() { + return _keys; + } + + public Object[] getValues() { + return _values; + } + + public byte[] getStates() { + return _states; + } + + public boolean containsKey(final int key) { + return findKey(key) >= 0; + } + + public V get(final int key) { + final int i = findKey(key); + if (i < 0) { + return null; + } + return _values[i]; + } + + public V put(final int key, final V value) { + final int hash = keyHash(key); + int keyLength = _keys.length; + int keyIdx = hash % keyLength; + + boolean expanded = preAddEntry(keyIdx); + if (expanded) { + keyLength = _keys.length; + keyIdx = hash % keyLength; + } + + final int[] keys = _keys; + final V[] values = _values; + final byte[] states = _states; + + if (states[keyIdx] == FULL) { + if (keys[keyIdx] == key) { + V old = values[keyIdx]; + values[keyIdx] = value; + return old; + } + // try second hash + int decr = 1 + (hash % (keyLength - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (isFree(keyIdx, key)) { + break; + } + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + V old = values[keyIdx]; + values[keyIdx] = value; + return old; + } + } + } + keys[keyIdx] = key; + values[keyIdx] = value; + states[keyIdx] = FULL; + ++_used; + return null; + } + + + /** Return weather the required slot is free for new entry */ + protected boolean isFree(int index, int key) { + byte stat = _states[index]; + if (stat == FREE) { + return true; + } + if (stat == REMOVED && _keys[index] == key) { + return true; + } + return false; + } + + /** @return expanded or not */ + protected boolean preAddEntry(int index) { + if ((_used + 1) >= _threshold) {// filled enough + int newCapacity = Math.round(_keys.length * _growFactor); + ensureCapacity(newCapacity); + return true; + } + return false; + } + + protected int findKey(final int key) { + final int[] keys = _keys; + final byte[] states = _states; + final int keyLength = keys.length; + + final int hash = keyHash(key); + int keyIdx = hash % keyLength; + if (states[keyIdx] != FREE) { + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + return keyIdx; + } + // try second hash + int decr = 1 + (hash % (keyLength - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (isFree(keyIdx, key)) { + return -1; + } + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + return keyIdx; + } + } + } + return -1; + } + + public V remove(final int key) { + final int[] keys = _keys; + final V[] values = _values; + final byte[] states = _states; + final int keyLength = keys.length; + + final int hash = keyHash(key); + int keyIdx = hash % keyLength; + if (states[keyIdx] != FREE) { + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + V old = values[keyIdx]; + states[keyIdx] = REMOVED; + --_used; + return old; + } + // second hash + int decr = 1 + (hash % (keyLength - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (states[keyIdx] == FREE) { + return null; + } + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + V old = values[keyIdx]; + states[keyIdx] = REMOVED; + --_used; + return old; + } + } + } + return null; + } + + @Nonnull + public IMapIterator<V> entries() { + return new MapIterator(); + } + + public int size() { + return _used; + } + + public int capacity() { + return _keys.length; + } + + public void clear() { + Arrays.fill(_states, FREE); + this._used = 0; + } + + protected void ensureCapacity(int newCapacity) { + int prime = Primes.findLeastPrimeNumber(newCapacity); + rehash(prime); + this._threshold = Math.round(prime * _loadFactor); + } + + @SuppressWarnings("unchecked") + private void rehash(int newCapacity) { + int oldCapacity = _keys.length; + if (newCapacity <= oldCapacity) { + throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); + } + final int[] newkeys = new int[newCapacity]; + final V[] newValues = (V[]) new Object[newCapacity]; + final byte[] newStates = new byte[newCapacity]; + int used = 0; + for (int i = 0; i < oldCapacity; i++) { + if (_states[i] == FULL) { + used++; + int k = _keys[i]; + V v = _values[i]; + int hash = keyHash(k); + int keyIdx = hash % newCapacity; + if (newStates[keyIdx] == FULL) {// second hashing + int decr = 1 + (hash % (newCapacity - 2)); + while (newStates[keyIdx] != FREE) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += newCapacity; + } + } + } + newStates[keyIdx] = FULL; + newkeys[keyIdx] = k; + newValues[keyIdx] = v; + } + } + this._keys = newkeys; + this._values = newValues; + this._states = newStates; + this._used = used; + } + + private static int keyHash(final int key) { + return key & 0x7fffffff; + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + out.writeFloat(_loadFactor); + out.writeFloat(_growFactor); + out.writeInt(_used); + + final int size = _keys.length; + out.writeInt(size); + + for (int i = 0; i < size; i++) { + out.writeInt(_keys[i]); + out.writeObject(_values[i]); + out.writeByte(_states[i]); + } + } + + @SuppressWarnings("unchecked") + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + this._loadFactor = in.readFloat(); + this._growFactor = in.readFloat(); + this._used = in.readInt(); + + final int size = in.readInt(); + final int[] keys = new int[size]; + final Object[] values = new Object[size]; + final byte[] states = new byte[size]; + for (int i = 0; i < size; i++) { + keys[i] = in.readInt(); + values[i] = in.readObject(); + states[i] = in.readByte(); + } + this._threshold = size; + this._keys = keys; + this._values = (V[]) values; + this._states = states; + } + + public interface IMapIterator<V> { + + public boolean hasNext(); + + /** + * @return -1 if not found + */ + public int next(); + + public int getKey(); + + public V getValue(); + + } + + private final class MapIterator implements IMapIterator<V> { + + int nextEntry; + int lastEntry = -1; + + MapIterator() { + this.nextEntry = nextEntry(0); + } + + /** find the index of next full entry */ + int nextEntry(int index) { + while (index < _keys.length && _states[index] != FULL) { + index++; + } + return index; + } + + public boolean hasNext() { + return nextEntry < _keys.length; + } + + public int next() { + if (!hasNext()) { + return -1; + } + int curEntry = nextEntry; + this.lastEntry = curEntry; + this.nextEntry = nextEntry(curEntry + 1); + return curEntry; + } + + public int getKey() { + if (lastEntry == -1) { + throw new IllegalStateException(); + } + return _keys[lastEntry]; + } + + public V getValue() { + if (lastEntry == -1) { + throw new IllegalStateException(); + } + return _values[lastEntry]; + } + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/maps/LRUMap.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/maps/LRUMap.java b/core/src/main/java/hivemall/utils/collections/maps/LRUMap.java new file mode 100644 index 0000000..84679c7 --- /dev/null +++ b/core/src/main/java/hivemall/utils/collections/maps/LRUMap.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.utils.collections.maps; + +import java.util.LinkedHashMap; +import java.util.Map; + +public class LRUMap<K, V> extends LinkedHashMap<K, V> { + private static final long serialVersionUID = -7708264099645977733L; + + private final int cacheSize; + + public LRUMap(int cacheSize) { + this(cacheSize, 0.75f, cacheSize); + } + + public LRUMap(int capacity, float loadFactor, int cacheSize) { + super(capacity, loadFactor, true); + this.cacheSize = cacheSize; + } + + protected boolean removeEldestEntry(Map.Entry<K, V> eldest) { + return size() > cacheSize; + } +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/maps/Long2DoubleOpenHashTable.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/maps/Long2DoubleOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/maps/Long2DoubleOpenHashTable.java new file mode 100644 index 0000000..c758824 --- /dev/null +++ b/core/src/main/java/hivemall/utils/collections/maps/Long2DoubleOpenHashTable.java @@ -0,0 +1,445 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.utils.collections.maps; + +import hivemall.utils.math.Primes; + +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.util.Arrays; + +/** + * An open-addressing hash table with double hashing + * + * @see http://en.wikipedia.org/wiki/Double_hashing + */ +public final class Long2DoubleOpenHashTable implements Externalizable { + + protected static final byte FREE = 0; + protected static final byte FULL = 1; + protected static final byte REMOVED = 2; + + private static final float DEFAULT_LOAD_FACTOR = 0.7f; + private static final float DEFAULT_GROW_FACTOR = 2.0f; + + protected final transient float _loadFactor; + protected final transient float _growFactor; + + protected int _used = 0; + protected int _threshold; + protected double defaultReturnValue = 0.d; + + protected long[] _keys; + protected double[] _values; + protected byte[] _states; + + protected Long2DoubleOpenHashTable(int size, float loadFactor, float growFactor, + boolean forcePrime) { + if (size < 1) { + throw new IllegalArgumentException(); + } + this._loadFactor = loadFactor; + this._growFactor = growFactor; + int actualSize = forcePrime ? Primes.findLeastPrimeNumber(size) : size; + this._keys = new long[actualSize]; + this._values = new double[actualSize]; + this._states = new byte[actualSize]; + this._threshold = (int) (actualSize * _loadFactor); + } + + public Long2DoubleOpenHashTable(int size, int loadFactor, int growFactor) { + this(size, loadFactor, growFactor, true); + } + + public Long2DoubleOpenHashTable(int size) { + this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR, true); + } + + public Long2DoubleOpenHashTable() {// required for serialization + this._loadFactor = DEFAULT_LOAD_FACTOR; + this._growFactor = DEFAULT_GROW_FACTOR; + } + + public void defaultReturnValue(double v) { + this.defaultReturnValue = v; + } + + public boolean containsKey(final long key) { + return _findKey(key) >= 0; + } + + /** + * @return defaultReturnValue if not found + */ + public double get(final long key) { + return get(key, defaultReturnValue); + } + + public double get(final long key, final double defaultValue) { + final int i = _findKey(key); + if (i < 0) { + return defaultValue; + } + return _values[i]; + } + + public double _get(final int index) { + if (index < 0) { + return defaultReturnValue; + } + return _values[index]; + } + + public double _set(final int index, final double value) { + double old = _values[index]; + _values[index] = value; + return old; + } + + public double _remove(final int index) { + _states[index] = REMOVED; + --_used; + return _values[index]; + } + + public double put(final long key, final double value) { + return put(key, value, defaultReturnValue); + } + + public double put(final long key, final double value, final double defaultValue) { + final int hash = keyHash(key); + int keyLength = _keys.length; + int keyIdx = hash % keyLength; + + boolean expanded = preAddEntry(keyIdx); + if (expanded) { + keyLength = _keys.length; + keyIdx = hash % keyLength; + } + + final long[] keys = _keys; + final double[] values = _values; + final byte[] states = _states; + + if (states[keyIdx] == FULL) {// double hashing + if (keys[keyIdx] == key) { + double old = values[keyIdx]; + values[keyIdx] = value; + return old; + } + // try second hash + int decr = 1 + (hash % (keyLength - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (isFree(keyIdx, key)) { + break; + } + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + double old = values[keyIdx]; + values[keyIdx] = value; + return old; + } + } + } + keys[keyIdx] = key; + values[keyIdx] = value; + states[keyIdx] = FULL; + ++_used; + return defaultValue; + } + + /** Return weather the required slot is free for new entry */ + protected boolean isFree(final int index, final long key) { + final byte stat = _states[index]; + if (stat == FREE) { + return true; + } + if (stat == REMOVED && _keys[index] == key) { + return true; + } + return false; + } + + /** @return expanded or not */ + protected boolean preAddEntry(final int index) { + if ((_used + 1) >= _threshold) {// too filled + int newCapacity = Math.round(_keys.length * _growFactor); + ensureCapacity(newCapacity); + return true; + } + return false; + } + + /** + * @return -1 if not found + */ + public int _findKey(final long key) { + final long[] keys = _keys; + final byte[] states = _states; + final int keyLength = keys.length; + + final int hash = keyHash(key); + int keyIdx = hash % keyLength; + if (states[keyIdx] != FREE) { + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + return keyIdx; + } + // try second hash + int decr = 1 + (hash % (keyLength - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (isFree(keyIdx, key)) { + return -1; + } + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + return keyIdx; + } + } + } + return -1; + } + + public double remove(final long key) { + final long[] keys = _keys; + final double[] values = _values; + final byte[] states = _states; + final int keyLength = keys.length; + + final int hash = keyHash(key); + int keyIdx = hash % keyLength; + if (states[keyIdx] != FREE) { + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + double old = values[keyIdx]; + states[keyIdx] = REMOVED; + --_used; + return old; + } + // second hash + int decr = 1 + (hash % (keyLength - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keyLength; + } + if (states[keyIdx] == FREE) { + return defaultReturnValue; + } + if (states[keyIdx] == FULL && keys[keyIdx] == key) { + double old = values[keyIdx]; + states[keyIdx] = REMOVED; + --_used; + return old; + } + } + } + return defaultReturnValue; + } + + public int size() { + return _used; + } + + public void clear() { + Arrays.fill(_states, FREE); + this._used = 0; + } + + public IMapIterator entries() { + return new MapIterator(); + } + + @Override + public String toString() { + int len = size() * 10 + 2; + StringBuilder buf = new StringBuilder(len); + buf.append('{'); + IMapIterator i = entries(); + while (i.next() != -1) { + buf.append(i.getKey()); + buf.append('='); + buf.append(i.getValue()); + if (i.hasNext()) { + buf.append(','); + } + } + buf.append('}'); + return buf.toString(); + } + + protected void ensureCapacity(final int newCapacity) { + int prime = Primes.findLeastPrimeNumber(newCapacity); + rehash(prime); + this._threshold = Math.round(prime * _loadFactor); + } + + private void rehash(final int newCapacity) { + int oldCapacity = _keys.length; + if (newCapacity <= oldCapacity) { + throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); + } + final long[] newkeys = new long[newCapacity]; + final double[] newValues = new double[newCapacity]; + final byte[] newStates = new byte[newCapacity]; + int used = 0; + for (int i = 0; i < oldCapacity; i++) { + if (_states[i] == FULL) { + used++; + long k = _keys[i]; + double v = _values[i]; + int hash = keyHash(k); + int keyIdx = hash % newCapacity; + if (newStates[keyIdx] == FULL) {// second hashing + int decr = 1 + (hash % (newCapacity - 2)); + while (newStates[keyIdx] != FREE) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += newCapacity; + } + } + } + newkeys[keyIdx] = k; + newValues[keyIdx] = v; + newStates[keyIdx] = FULL; + } + } + this._keys = newkeys; + this._values = newValues; + this._states = newStates; + this._used = used; + } + + private static int keyHash(final long key) { + return (int) (key ^ (key >>> 32)) & 0x7FFFFFFF; + } + + public void writeExternal(ObjectOutput out) throws IOException { + out.writeInt(_threshold); + out.writeInt(_used); + + out.writeInt(_keys.length); + IMapIterator i = entries(); + while (i.next() != -1) { + out.writeLong(i.getKey()); + out.writeDouble(i.getValue()); + } + } + + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + this._threshold = in.readInt(); + this._used = in.readInt(); + + final int keylen = in.readInt(); + final long[] keys = new long[keylen]; + final double[] values = new double[keylen]; + final byte[] states = new byte[keylen]; + for (int i = 0; i < _used; i++) { + long k = in.readLong(); + double v = in.readDouble(); + int hash = keyHash(k); + int keyIdx = hash % keylen; + if (states[keyIdx] != FREE) {// second hash + int decr = 1 + (hash % (keylen - 2)); + for (;;) { + keyIdx -= decr; + if (keyIdx < 0) { + keyIdx += keylen; + } + if (states[keyIdx] == FREE) { + break; + } + } + } + states[keyIdx] = FULL; + keys[keyIdx] = k; + values[keyIdx] = v; + } + this._keys = keys; + this._values = values; + this._states = states; + } + + public interface IMapIterator { + + public boolean hasNext(); + + /** + * @return -1 if not found + */ + public int next(); + + public long getKey(); + + public double getValue(); + + } + + private final class MapIterator implements IMapIterator { + + int nextEntry; + int lastEntry = -1; + + MapIterator() { + this.nextEntry = nextEntry(0); + } + + /** find the index of next full entry */ + int nextEntry(int index) { + while (index < _keys.length && _states[index] != FULL) { + index++; + } + return index; + } + + public boolean hasNext() { + return nextEntry < _keys.length; + } + + public int next() { + if (!hasNext()) { + return -1; + } + int curEntry = nextEntry; + this.lastEntry = curEntry; + this.nextEntry = nextEntry(curEntry + 1); + return curEntry; + } + + public long getKey() { + if (lastEntry == -1) { + throw new IllegalStateException(); + } + return _keys[lastEntry]; + } + + public double getValue() { + if (lastEntry == -1) { + throw new IllegalStateException(); + } + return _values[lastEntry]; + } + } +}