http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/Int2FloatOpenHashTable.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/Int2FloatOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/Int2FloatOpenHashTable.java deleted file mode 100644 index a06cdb0..0000000 --- a/core/src/main/java/hivemall/utils/collections/Int2FloatOpenHashTable.java +++ /dev/null @@ -1,418 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.collections; - -import hivemall.utils.math.Primes; - -import java.io.Externalizable; -import java.io.IOException; -import java.io.ObjectInput; -import java.io.ObjectOutput; -import java.util.Arrays; - -/** - * An open-addressing hash table with double hashing - * - * @see http://en.wikipedia.org/wiki/Double_hashing - */ -public class Int2FloatOpenHashTable implements Externalizable { - - protected static final byte FREE = 0; - protected static final byte FULL = 1; - protected static final byte REMOVED = 2; - - private static final float DEFAULT_LOAD_FACTOR = 0.7f; - private static final float DEFAULT_GROW_FACTOR = 2.0f; - - protected final transient float _loadFactor; - protected final transient float _growFactor; - - protected int _used = 0; - protected int _threshold; - protected float defaultReturnValue = -1.f; - - protected int[] _keys; - protected float[] _values; - protected byte[] _states; - - protected Int2FloatOpenHashTable(int size, float loadFactor, float growFactor, - boolean forcePrime) { - if (size < 1) { - throw new IllegalArgumentException(); - } - this._loadFactor = loadFactor; - this._growFactor = growFactor; - int actualSize = forcePrime ? Primes.findLeastPrimeNumber(size) : size; - this._keys = new int[actualSize]; - this._values = new float[actualSize]; - this._states = new byte[actualSize]; - this._threshold = (int) (actualSize * _loadFactor); - } - - public Int2FloatOpenHashTable(int size, float loadFactor, float growFactor) { - this(size, loadFactor, growFactor, true); - } - - public Int2FloatOpenHashTable(int size) { - this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR, true); - } - - /** - * Only for {@link Externalizable} - */ - public Int2FloatOpenHashTable() {// required for serialization - this._loadFactor = DEFAULT_LOAD_FACTOR; - this._growFactor = DEFAULT_GROW_FACTOR; - } - - public void defaultReturnValue(float v) { - this.defaultReturnValue = v; - } - - public boolean containsKey(int key) { - return findKey(key) >= 0; - } - - /** - * @return -1.f if not found - */ - public float get(int key) { - int i = findKey(key); - if (i < 0) { - return defaultReturnValue; - } - return _values[i]; - } - - public float put(int key, float value) { - int hash = keyHash(key); - int keyLength = _keys.length; - int keyIdx = hash % keyLength; - - boolean expanded = preAddEntry(keyIdx); - if (expanded) { - keyLength = _keys.length; - keyIdx = hash % keyLength; - } - - int[] keys = _keys; - float[] values = _values; - byte[] states = _states; - - if (states[keyIdx] == FULL) {// double hashing - if (keys[keyIdx] == key) { - float old = values[keyIdx]; - values[keyIdx] = value; - return old; - } - // try second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - break; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - float old = values[keyIdx]; - values[keyIdx] = value; - return old; - } - } - } - keys[keyIdx] = key; - values[keyIdx] = value; - states[keyIdx] = FULL; - ++_used; - return defaultReturnValue; - } - - /** Return weather the required slot is free for new entry */ - protected boolean isFree(int index, int key) { - byte stat = _states[index]; - if (stat == FREE) { - return true; - } - if (stat == REMOVED && _keys[index] == key) { - return true; - } - return false; - } - - /** @return expanded or not */ - protected boolean preAddEntry(int index) { - if ((_used + 1) >= _threshold) {// too filled - int newCapacity = Math.round(_keys.length * _growFactor); - ensureCapacity(newCapacity); - return true; - } - return false; - } - - protected int findKey(int key) { - int[] keys = _keys; - byte[] states = _states; - int keyLength = keys.length; - - int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - // try second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - return -1; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - } - } - return -1; - } - - public float remove(int key) { - int[] keys = _keys; - float[] values = _values; - byte[] states = _states; - int keyLength = keys.length; - - int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - float old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - // second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (states[keyIdx] == FREE) { - return defaultReturnValue; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - float old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - } - } - return defaultReturnValue; - } - - public int size() { - return _used; - } - - public void clear() { - Arrays.fill(_states, FREE); - this._used = 0; - } - - public IMapIterator entries() { - return new MapIterator(); - } - - @Override - public String toString() { - int len = size() * 10 + 2; - StringBuilder buf = new StringBuilder(len); - buf.append('{'); - IMapIterator i = entries(); - while (i.next() != -1) { - buf.append(i.getKey()); - buf.append('='); - buf.append(i.getValue()); - if (i.hasNext()) { - buf.append(','); - } - } - buf.append('}'); - return buf.toString(); - } - - protected void ensureCapacity(int newCapacity) { - int prime = Primes.findLeastPrimeNumber(newCapacity); - rehash(prime); - this._threshold = Math.round(prime * _loadFactor); - } - - private void rehash(int newCapacity) { - int oldCapacity = _keys.length; - if (newCapacity <= oldCapacity) { - throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); - } - int[] newkeys = new int[newCapacity]; - float[] newValues = new float[newCapacity]; - byte[] newStates = new byte[newCapacity]; - int used = 0; - for (int i = 0; i < oldCapacity; i++) { - if (_states[i] == FULL) { - used++; - int k = _keys[i]; - float v = _values[i]; - int hash = keyHash(k); - int keyIdx = hash % newCapacity; - if (newStates[keyIdx] == FULL) {// second hashing - int decr = 1 + (hash % (newCapacity - 2)); - while (newStates[keyIdx] != FREE) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += newCapacity; - } - } - } - newkeys[keyIdx] = k; - newValues[keyIdx] = v; - newStates[keyIdx] = FULL; - } - } - this._keys = newkeys; - this._values = newValues; - this._states = newStates; - this._used = used; - } - - private static int keyHash(int key) { - return key & 0x7fffffff; - } - - public void writeExternal(ObjectOutput out) throws IOException { - out.writeInt(_threshold); - out.writeInt(_used); - - out.writeInt(_keys.length); - IMapIterator i = entries(); - while (i.next() != -1) { - out.writeInt(i.getKey()); - out.writeFloat(i.getValue()); - } - } - - public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { - this._threshold = in.readInt(); - this._used = in.readInt(); - - int keylen = in.readInt(); - int[] keys = new int[keylen]; - float[] values = new float[keylen]; - byte[] states = new byte[keylen]; - for (int i = 0; i < _used; i++) { - int k = in.readInt(); - float v = in.readFloat(); - int hash = keyHash(k); - int keyIdx = hash % keylen; - if (states[keyIdx] != FREE) {// second hash - int decr = 1 + (hash % (keylen - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keylen; - } - if (states[keyIdx] == FREE) { - break; - } - } - } - states[keyIdx] = FULL; - keys[keyIdx] = k; - values[keyIdx] = v; - } - this._keys = keys; - this._values = values; - this._states = states; - } - - public interface IMapIterator { - - public boolean hasNext(); - - /** - * @return -1 if not found - */ - public int next(); - - public int getKey(); - - public float getValue(); - - } - - private final class MapIterator implements IMapIterator { - - int nextEntry; - int lastEntry = -1; - - MapIterator() { - this.nextEntry = nextEntry(0); - } - - /** find the index of next full entry */ - int nextEntry(int index) { - while (index < _keys.length && _states[index] != FULL) { - index++; - } - return index; - } - - public boolean hasNext() { - return nextEntry < _keys.length; - } - - public int next() { - if (!hasNext()) { - return -1; - } - int curEntry = nextEntry; - this.lastEntry = curEntry; - this.nextEntry = nextEntry(curEntry + 1); - return curEntry; - } - - public int getKey() { - if (lastEntry == -1) { - throw new IllegalStateException(); - } - return _keys[lastEntry]; - } - - public float getValue() { - if (lastEntry == -1) { - throw new IllegalStateException(); - } - return _values[lastEntry]; - } - } -}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/Int2IntOpenHashTable.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/Int2IntOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/Int2IntOpenHashTable.java deleted file mode 100644 index 211157e..0000000 --- a/core/src/main/java/hivemall/utils/collections/Int2IntOpenHashTable.java +++ /dev/null @@ -1,414 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.collections; - -import hivemall.utils.math.Primes; - -import java.io.Externalizable; -import java.io.IOException; -import java.io.ObjectInput; -import java.io.ObjectOutput; -import java.util.Arrays; - -/** - * An open-addressing hash table with double hashing - * - * @see http://en.wikipedia.org/wiki/Double_hashing - */ -public final class Int2IntOpenHashTable implements Externalizable { - - protected static final byte FREE = 0; - protected static final byte FULL = 1; - protected static final byte REMOVED = 2; - - private static final float DEFAULT_LOAD_FACTOR = 0.7f; - private static final float DEFAULT_GROW_FACTOR = 2.0f; - - protected final transient float _loadFactor; - protected final transient float _growFactor; - - protected int _used = 0; - protected int _threshold; - protected int defaultReturnValue = -1; - - protected int[] _keys; - protected int[] _values; - protected byte[] _states; - - protected Int2IntOpenHashTable(int size, float loadFactor, float growFactor, boolean forcePrime) { - if (size < 1) { - throw new IllegalArgumentException(); - } - this._loadFactor = loadFactor; - this._growFactor = growFactor; - int actualSize = forcePrime ? Primes.findLeastPrimeNumber(size) : size; - this._keys = new int[actualSize]; - this._values = new int[actualSize]; - this._states = new byte[actualSize]; - this._threshold = (int) (actualSize * _loadFactor); - } - - public Int2IntOpenHashTable(int size, int loadFactor, int growFactor) { - this(size, loadFactor, growFactor, true); - } - - public Int2IntOpenHashTable(int size) { - this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR, true); - } - - public Int2IntOpenHashTable() {// required for serialization - this._loadFactor = DEFAULT_LOAD_FACTOR; - this._growFactor = DEFAULT_GROW_FACTOR; - } - - public void defaultReturnValue(int v) { - this.defaultReturnValue = v; - } - - public boolean containsKey(final int key) { - return findKey(key) >= 0; - } - - /** - * @return -1.f if not found - */ - public int get(final int key) { - final int i = findKey(key); - if (i < 0) { - return defaultReturnValue; - } - return _values[i]; - } - - public int put(final int key, final int value) { - final int hash = keyHash(key); - int keyLength = _keys.length; - int keyIdx = hash % keyLength; - - boolean expanded = preAddEntry(keyIdx); - if (expanded) { - keyLength = _keys.length; - keyIdx = hash % keyLength; - } - - final int[] keys = _keys; - final int[] values = _values; - final byte[] states = _states; - - if (states[keyIdx] == FULL) {// double hashing - if (keys[keyIdx] == key) { - int old = values[keyIdx]; - values[keyIdx] = value; - return old; - } - // try second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - break; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - int old = values[keyIdx]; - values[keyIdx] = value; - return old; - } - } - } - keys[keyIdx] = key; - values[keyIdx] = value; - states[keyIdx] = FULL; - ++_used; - return defaultReturnValue; - } - - /** Return weather the required slot is free for new entry */ - protected boolean isFree(final int index, final int key) { - final byte stat = _states[index]; - if (stat == FREE) { - return true; - } - if (stat == REMOVED && _keys[index] == key) { - return true; - } - return false; - } - - /** @return expanded or not */ - protected boolean preAddEntry(final int index) { - if ((_used + 1) >= _threshold) {// too filled - int newCapacity = Math.round(_keys.length * _growFactor); - ensureCapacity(newCapacity); - return true; - } - return false; - } - - protected int findKey(final int key) { - final int[] keys = _keys; - final byte[] states = _states; - final int keyLength = keys.length; - - final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - // try second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - return -1; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - } - } - return -1; - } - - public int remove(final int key) { - final int[] keys = _keys; - final int[] values = _values; - final byte[] states = _states; - final int keyLength = keys.length; - - final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - int old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - // second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (states[keyIdx] == FREE) { - return defaultReturnValue; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - int old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - } - } - return defaultReturnValue; - } - - public int size() { - return _used; - } - - public void clear() { - Arrays.fill(_states, FREE); - this._used = 0; - } - - public IMapIterator entries() { - return new MapIterator(); - } - - @Override - public String toString() { - int len = size() * 10 + 2; - StringBuilder buf = new StringBuilder(len); - buf.append('{'); - IMapIterator i = entries(); - while (i.next() != -1) { - buf.append(i.getKey()); - buf.append('='); - buf.append(i.getValue()); - if (i.hasNext()) { - buf.append(','); - } - } - buf.append('}'); - return buf.toString(); - } - - protected void ensureCapacity(final int newCapacity) { - int prime = Primes.findLeastPrimeNumber(newCapacity); - rehash(prime); - this._threshold = Math.round(prime * _loadFactor); - } - - private void rehash(final int newCapacity) { - int oldCapacity = _keys.length; - if (newCapacity <= oldCapacity) { - throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); - } - final int[] newkeys = new int[newCapacity]; - final int[] newValues = new int[newCapacity]; - final byte[] newStates = new byte[newCapacity]; - int used = 0; - for (int i = 0; i < oldCapacity; i++) { - if (_states[i] == FULL) { - used++; - int k = _keys[i]; - int v = _values[i]; - int hash = keyHash(k); - int keyIdx = hash % newCapacity; - if (newStates[keyIdx] == FULL) {// second hashing - int decr = 1 + (hash % (newCapacity - 2)); - while (newStates[keyIdx] != FREE) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += newCapacity; - } - } - } - newkeys[keyIdx] = k; - newValues[keyIdx] = v; - newStates[keyIdx] = FULL; - } - } - this._keys = newkeys; - this._values = newValues; - this._states = newStates; - this._used = used; - } - - private static int keyHash(final int key) { - return key & 0x7fffffff; - } - - public void writeExternal(ObjectOutput out) throws IOException { - out.writeInt(_threshold); - out.writeInt(_used); - - out.writeInt(_keys.length); - IMapIterator i = entries(); - while (i.next() != -1) { - out.writeInt(i.getKey()); - out.writeInt(i.getValue()); - } - } - - public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { - this._threshold = in.readInt(); - this._used = in.readInt(); - - final int keylen = in.readInt(); - final int[] keys = new int[keylen]; - final int[] values = new int[keylen]; - final byte[] states = new byte[keylen]; - for (int i = 0; i < _used; i++) { - int k = in.readInt(); - int v = in.readInt(); - int hash = keyHash(k); - int keyIdx = hash % keylen; - if (states[keyIdx] != FREE) {// second hash - int decr = 1 + (hash % (keylen - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keylen; - } - if (states[keyIdx] == FREE) { - break; - } - } - } - states[keyIdx] = FULL; - keys[keyIdx] = k; - values[keyIdx] = v; - } - this._keys = keys; - this._values = values; - this._states = states; - } - - public interface IMapIterator { - - public boolean hasNext(); - - /** - * @return -1 if not found - */ - public int next(); - - public int getKey(); - - public int getValue(); - - } - - private final class MapIterator implements IMapIterator { - - int nextEntry; - int lastEntry = -1; - - MapIterator() { - this.nextEntry = nextEntry(0); - } - - /** find the index of next full entry */ - int nextEntry(int index) { - while (index < _keys.length && _states[index] != FULL) { - index++; - } - return index; - } - - public boolean hasNext() { - return nextEntry < _keys.length; - } - - public int next() { - if (!hasNext()) { - return -1; - } - int curEntry = nextEntry; - this.lastEntry = curEntry; - this.nextEntry = nextEntry(curEntry + 1); - return curEntry; - } - - public int getKey() { - if (lastEntry == -1) { - throw new IllegalStateException(); - } - return _keys[lastEntry]; - } - - public int getValue() { - if (lastEntry == -1) { - throw new IllegalStateException(); - } - return _values[lastEntry]; - } - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/Int2LongOpenHashTable.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/Int2LongOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/Int2LongOpenHashTable.java deleted file mode 100644 index 2c229a4..0000000 --- a/core/src/main/java/hivemall/utils/collections/Int2LongOpenHashTable.java +++ /dev/null @@ -1,500 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.collections; - -import hivemall.utils.codec.VariableByteCodec; -import hivemall.utils.codec.ZigZagLEB128Codec; -import hivemall.utils.math.Primes; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.Externalizable; -import java.io.IOException; -import java.io.ObjectInput; -import java.io.ObjectOutput; -import java.util.Arrays; - -import javax.annotation.Nonnull; - -/** - * An open-addressing hash table with double hashing - * - * @see http://en.wikipedia.org/wiki/Double_hashing - */ -public class Int2LongOpenHashTable implements Externalizable { - - protected static final byte FREE = 0; - protected static final byte FULL = 1; - protected static final byte REMOVED = 2; - - public static final int DEFAULT_SIZE = 65536; - public static final float DEFAULT_LOAD_FACTOR = 0.7f; - public static final float DEFAULT_GROW_FACTOR = 2.0f; - - protected final transient float _loadFactor; - protected final transient float _growFactor; - - protected int[] _keys; - protected long[] _values; - protected byte[] _states; - - protected int _used; - protected int _threshold; - protected long defaultReturnValue = -1L; - - /** - * Constructor for Externalizable. Should not be called otherwise. - */ - public Int2LongOpenHashTable() {// for Externalizable - this._loadFactor = DEFAULT_LOAD_FACTOR; - this._growFactor = DEFAULT_GROW_FACTOR; - } - - public Int2LongOpenHashTable(int size) { - this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR, true); - } - - public Int2LongOpenHashTable(int size, float loadFactor, float growFactor) { - this(size, loadFactor, growFactor, true); - } - - protected Int2LongOpenHashTable(int size, float loadFactor, float growFactor, boolean forcePrime) { - if (size < 1) { - throw new IllegalArgumentException(); - } - this._loadFactor = loadFactor; - this._growFactor = growFactor; - int actualSize = forcePrime ? Primes.findLeastPrimeNumber(size) : size; - this._keys = new int[actualSize]; - this._values = new long[actualSize]; - this._states = new byte[actualSize]; - this._used = 0; - this._threshold = (int) (actualSize * _loadFactor); - } - - public Int2LongOpenHashTable(@Nonnull int[] keys, @Nonnull long[] values, - @Nonnull byte[] states, int used) { - this._loadFactor = DEFAULT_LOAD_FACTOR; - this._growFactor = DEFAULT_GROW_FACTOR; - this._keys = keys; - this._values = values; - this._states = states; - this._used = used; - this._threshold = keys.length; - } - - @Nonnull - public static Int2LongOpenHashTable newInstance() { - return new Int2LongOpenHashTable(DEFAULT_SIZE); - } - - public void defaultReturnValue(long v) { - this.defaultReturnValue = v; - } - - @Nonnull - public int[] getKeys() { - return _keys; - } - - @Nonnull - public long[] getValues() { - return _values; - } - - @Nonnull - public byte[] getStates() { - return _states; - } - - public boolean containsKey(int key) { - return findKey(key) >= 0; - } - - /** - * @return -1.f if not found - */ - public long get(int key) { - int i = findKey(key); - if (i < 0) { - return defaultReturnValue; - } - return _values[i]; - } - - public long put(int key, long value) { - int hash = keyHash(key); - int keyLength = _keys.length; - int keyIdx = hash % keyLength; - - boolean expanded = preAddEntry(keyIdx); - if (expanded) { - keyLength = _keys.length; - keyIdx = hash % keyLength; - } - - int[] keys = _keys; - long[] values = _values; - byte[] states = _states; - - if (states[keyIdx] == FULL) {// double hashing - if (keys[keyIdx] == key) { - long old = values[keyIdx]; - values[keyIdx] = value; - return old; - } - // try second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - break; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - long old = values[keyIdx]; - values[keyIdx] = value; - return old; - } - } - } - keys[keyIdx] = key; - values[keyIdx] = value; - states[keyIdx] = FULL; - ++_used; - return defaultReturnValue; - } - - /** Return weather the required slot is free for new entry */ - protected boolean isFree(int index, int key) { - byte stat = _states[index]; - if (stat == FREE) { - return true; - } - if (stat == REMOVED && _keys[index] == key) { - return true; - } - return false; - } - - /** @return expanded or not */ - protected boolean preAddEntry(int index) { - if ((_used + 1) >= _threshold) {// too filled - int newCapacity = Math.round(_keys.length * _growFactor); - ensureCapacity(newCapacity); - return true; - } - return false; - } - - protected int findKey(int key) { - int[] keys = _keys; - byte[] states = _states; - int keyLength = keys.length; - - int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - // try second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - return -1; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - } - } - return -1; - } - - public long remove(int key) { - int[] keys = _keys; - long[] values = _values; - byte[] states = _states; - int keyLength = keys.length; - - int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - long old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - // second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (states[keyIdx] == FREE) { - return defaultReturnValue; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - long old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - } - } - return defaultReturnValue; - } - - public int size() { - return _used; - } - - public int capacity() { - return _keys.length; - } - - public void clear() { - Arrays.fill(_states, FREE); - this._used = 0; - } - - public IMapIterator entries() { - return new MapIterator(); - } - - @Override - public String toString() { - int len = size() * 10 + 2; - StringBuilder buf = new StringBuilder(len); - buf.append('{'); - IMapIterator i = entries(); - while (i.next() != -1) { - buf.append(i.getKey()); - buf.append('='); - buf.append(i.getValue()); - if (i.hasNext()) { - buf.append(','); - } - } - buf.append('}'); - return buf.toString(); - } - - protected void ensureCapacity(int newCapacity) { - int prime = Primes.findLeastPrimeNumber(newCapacity); - rehash(prime); - this._threshold = Math.round(prime * _loadFactor); - } - - private void rehash(int newCapacity) { - int oldCapacity = _keys.length; - if (newCapacity <= oldCapacity) { - throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); - } - int[] newkeys = new int[newCapacity]; - long[] newValues = new long[newCapacity]; - byte[] newStates = new byte[newCapacity]; - int used = 0; - for (int i = 0; i < oldCapacity; i++) { - if (_states[i] == FULL) { - used++; - int k = _keys[i]; - long v = _values[i]; - int hash = keyHash(k); - int keyIdx = hash % newCapacity; - if (newStates[keyIdx] == FULL) {// second hashing - int decr = 1 + (hash % (newCapacity - 2)); - while (newStates[keyIdx] != FREE) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += newCapacity; - } - } - } - newkeys[keyIdx] = k; - newValues[keyIdx] = v; - newStates[keyIdx] = FULL; - } - } - this._keys = newkeys; - this._values = newValues; - this._states = newStates; - this._used = used; - } - - private static int keyHash(int key) { - return key & 0x7fffffff; - } - - @Override - public void writeExternal(ObjectOutput out) throws IOException { - out.writeInt(_threshold); - out.writeInt(_used); - - final int[] keys = _keys; - final int size = keys.length; - out.writeInt(size); - - final byte[] states = _states; - writeStates(states, out); - - final long[] values = _values; - for (int i = 0; i < size; i++) { - if (states[i] != FULL) { - continue; - } - ZigZagLEB128Codec.writeSignedInt(keys[i], out); - ZigZagLEB128Codec.writeSignedLong(values[i], out); - } - } - - @Nonnull - private static void writeStates(@Nonnull final byte[] status, @Nonnull final DataOutput out) - throws IOException { - // write empty states's indexes differentially - final int size = status.length; - int cardinarity = 0; - for (int i = 0; i < size; i++) { - if (status[i] != FULL) { - cardinarity++; - } - } - out.writeInt(cardinarity); - if (cardinarity == 0) { - return; - } - int prev = 0; - for (int i = 0; i < size; i++) { - if (status[i] != FULL) { - int diff = i - prev; - assert (diff >= 0); - VariableByteCodec.encodeUnsignedInt(diff, out); - prev = i; - } - } - } - - @Override - public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { - this._threshold = in.readInt(); - this._used = in.readInt(); - - final int size = in.readInt(); - final int[] keys = new int[size]; - final long[] values = new long[size]; - final byte[] states = new byte[size]; - readStates(in, states); - - for (int i = 0; i < size; i++) { - if (states[i] != FULL) { - continue; - } - keys[i] = ZigZagLEB128Codec.readSignedInt(in); - values[i] = ZigZagLEB128Codec.readSignedLong(in); - } - - this._keys = keys; - this._values = values; - this._states = states; - } - - @Nonnull - private static void readStates(@Nonnull final DataInput in, @Nonnull final byte[] status) - throws IOException { - // read non-empty states differentially - final int cardinarity = in.readInt(); - Arrays.fill(status, IntOpenHashTable.FULL); - int prev = 0; - for (int j = 0; j < cardinarity; j++) { - int i = VariableByteCodec.decodeUnsignedInt(in) + prev; - status[i] = IntOpenHashTable.FREE; - prev = i; - } - } - - public interface IMapIterator { - - public boolean hasNext(); - - /** - * @return -1 if not found - */ - public int next(); - - public int getKey(); - - public long getValue(); - - } - - private final class MapIterator implements IMapIterator { - - int nextEntry; - int lastEntry = -1; - - MapIterator() { - this.nextEntry = nextEntry(0); - } - - /** find the index of next full entry */ - int nextEntry(int index) { - while (index < _keys.length && _states[index] != FULL) { - index++; - } - return index; - } - - public boolean hasNext() { - return nextEntry < _keys.length; - } - - public int next() { - if (!hasNext()) { - return -1; - } - int curEntry = nextEntry; - this.lastEntry = curEntry; - this.nextEntry = nextEntry(curEntry + 1); - return curEntry; - } - - public int getKey() { - if (lastEntry == -1) { - throw new IllegalStateException(); - } - return _keys[lastEntry]; - } - - public long getValue() { - if (lastEntry == -1) { - throw new IllegalStateException(); - } - return _values[lastEntry]; - } - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/IntArray.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/IntArray.java b/core/src/main/java/hivemall/utils/collections/IntArray.java deleted file mode 100644 index cb6b0b8..0000000 --- a/core/src/main/java/hivemall/utils/collections/IntArray.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.collections; - -import java.io.Serializable; - -import javax.annotation.Nonnull; - -public interface IntArray extends Serializable { - - public int get(int key); - - public int get(int key, int valueIfKeyNotFound); - - public void put(int key, int value); - - public int size(); - - public int keyAt(int index); - - @Nonnull - public int[] toArray(); - - @Nonnull - public int[] toArray(boolean copy); - -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/IntArrayList.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/IntArrayList.java b/core/src/main/java/hivemall/utils/collections/IntArrayList.java deleted file mode 100644 index 0716ca8..0000000 --- a/core/src/main/java/hivemall/utils/collections/IntArrayList.java +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.collections; - -import hivemall.utils.lang.ArrayUtils; - -import java.io.Closeable; -import java.io.Serializable; - -import javax.annotation.Nonnull; - -public final class IntArrayList implements Serializable, Closeable { - private static final long serialVersionUID = -2147675120406747488L; - public static final int DEFAULT_CAPACITY = 12; - - /** array entity */ - private int[] data; - private int used; - - public IntArrayList() { - this(DEFAULT_CAPACITY); - } - - public IntArrayList(int size) { - this.data = new int[size]; - this.used = 0; - } - - public IntArrayList(int[] initValues) { - this.data = initValues; - this.used = initValues.length; - } - - public void add(final int value) { - if (used >= data.length) { - expand(used + 1); - } - data[used++] = value; - } - - public void add(final int[] values) { - final int needs = used + values.length; - if (needs >= data.length) { - expand(needs); - } - System.arraycopy(values, 0, data, used, values.length); - this.used = needs; - } - - /** - * dynamic expansion. - */ - private void expand(final int max) { - while (data.length < max) { - final int len = data.length; - int[] newArray = new int[len * 2]; - System.arraycopy(data, 0, newArray, 0, len); - this.data = newArray; - } - } - - public int remove() { - return data[--used]; - } - - public int remove(final int index) { - final int ret; - if (index > used) { - throw new IndexOutOfBoundsException(); - } else if (index == used) { - ret = data[--used]; - } else { // index < used - // removed value - ret = data[index]; - final int[] newarray = new int[--used]; - // prefix - System.arraycopy(data, 0, newarray, 0, index - 1); - // appendix - System.arraycopy(data, index + 1, newarray, index, used - index); - // set fields. - this.data = newarray; - } - return ret; - } - - public void set(final int index, final int value) { - if (index > used) { - throw new IllegalArgumentException("Index " + index + " MUST be less than size() " - + used); - } else if (index == used) { - ++used; - } - data[index] = value; - } - - public int get(final int index) { - if (index >= used) { - throw new IndexOutOfBoundsException("Index " + index + " out of bounds " + used); - } - return data[index]; - } - - public int fastGet(final int index) { - return data[index]; - } - - /** - * @return -1 if not found. - */ - public int indexOf(final int key) { - return ArrayUtils.indexOf(data, key, 0, used); - } - - public boolean contains(final int key) { - return ArrayUtils.indexOf(data, key, 0, used) != -1; - } - - public int size() { - return used; - } - - public boolean isEmpty() { - return used == 0; - } - - public void clear() { - used = 0; - } - - @Nonnull - public int[] toArray() { - return toArray(false); - } - - @Nonnull - public int[] toArray(boolean close) { - final int[] newArray = new int[used]; - System.arraycopy(data, 0, newArray, 0, used); - if (close) { - close(); - } - return newArray; - } - - public int[] array() { - return data; - } - - @Override - public String toString() { - final StringBuilder buf = new StringBuilder(); - buf.append('['); - for (int i = 0; i < used; i++) { - if (i != 0) { - buf.append(", "); - } - buf.append(data[i]); - } - buf.append(']'); - return buf.toString(); - } - - @Override - public void close() { - this.data = null; - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/IntOpenHashMap.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/IntOpenHashMap.java b/core/src/main/java/hivemall/utils/collections/IntOpenHashMap.java deleted file mode 100644 index 4621e6d..0000000 --- a/core/src/main/java/hivemall/utils/collections/IntOpenHashMap.java +++ /dev/null @@ -1,467 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.collections; - -import hivemall.utils.math.Primes; - -import java.io.Externalizable; -import java.io.IOException; -import java.io.ObjectInput; -import java.io.ObjectOutput; -import java.util.Arrays; - -/** - * An open-addressing hash table with double hashing - * - * @see http://en.wikipedia.org/wiki/Double_hashing - */ -public class IntOpenHashMap<V> implements Externalizable { - private static final long serialVersionUID = -8162355845665353513L; - - protected static final byte FREE = 0; - protected static final byte FULL = 1; - protected static final byte REMOVED = 2; - - private static final float DEFAULT_LOAD_FACTOR = 0.7f; - private static final float DEFAULT_GROW_FACTOR = 2.0f; - - protected final transient float _loadFactor; - protected final transient float _growFactor; - - protected int _used = 0; - protected int _threshold; - - protected int[] _keys; - protected V[] _values; - protected byte[] _states; - - @SuppressWarnings("unchecked") - protected IntOpenHashMap(int size, float loadFactor, float growFactor, boolean forcePrime) { - if (size < 1) { - throw new IllegalArgumentException(); - } - this._loadFactor = loadFactor; - this._growFactor = growFactor; - int actualSize = forcePrime ? Primes.findLeastPrimeNumber(size) : size; - this._keys = new int[actualSize]; - this._values = (V[]) new Object[actualSize]; - this._states = new byte[actualSize]; - this._threshold = Math.round(actualSize * _loadFactor); - } - - public IntOpenHashMap(int size, float loadFactor, float growFactor) { - this(size, loadFactor, growFactor, true); - } - - public IntOpenHashMap(int size) { - this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR, true); - } - - public IntOpenHashMap() {// required for serialization - this._loadFactor = DEFAULT_LOAD_FACTOR; - this._growFactor = DEFAULT_GROW_FACTOR; - } - - public boolean containsKey(int key) { - return findKey(key) >= 0; - } - - public final V get(final int key) { - final int i = findKey(key); - if (i < 0) { - return null; - } - recordAccess(i); - return _values[i]; - } - - public V put(final int key, final V value) { - final int hash = keyHash(key); - int keyLength = _keys.length; - int keyIdx = hash % keyLength; - - final boolean expanded = preAddEntry(keyIdx); - if (expanded) { - keyLength = _keys.length; - keyIdx = hash % keyLength; - } - - final int[] keys = _keys; - final V[] values = _values; - final byte[] states = _states; - - if (states[keyIdx] == FULL) {// double hashing - if (keys[keyIdx] == key) { - V old = values[keyIdx]; - values[keyIdx] = value; - recordAccess(keyIdx); - return old; - } - // try second hash - final int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - break; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - V old = values[keyIdx]; - values[keyIdx] = value; - recordAccess(keyIdx); - return old; - } - } - } - keys[keyIdx] = key; - values[keyIdx] = value; - states[keyIdx] = FULL; - ++_used; - postAddEntry(keyIdx); - return null; - } - - public V putIfAbsent(final int key, final V value) { - final int hash = keyHash(key); - int keyLength = _keys.length; - int keyIdx = hash % keyLength; - - final boolean expanded = preAddEntry(keyIdx); - if (expanded) { - keyLength = _keys.length; - keyIdx = hash % keyLength; - } - - final int[] keys = _keys; - final V[] values = _values; - final byte[] states = _states; - - if (states[keyIdx] == FULL) {// second hashing - if (keys[keyIdx] == key) { - return values[keyIdx]; - } - // try second hash - final int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - break; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return values[keyIdx]; - } - } - } - keys[keyIdx] = key; - values[keyIdx] = value; - states[keyIdx] = FULL; - _used++; - postAddEntry(keyIdx); - return null; - } - - /** Return weather the required slot is free for new entry */ - protected boolean isFree(int index, int key) { - byte stat = _states[index]; - if (stat == FREE) { - return true; - } - if (stat == REMOVED && _keys[index] == key) { - return true; - } - return false; - } - - /** @return expanded or not */ - protected boolean preAddEntry(int index) { - if ((_used + 1) >= _threshold) {// too filled - int newCapacity = Math.round(_keys.length * _growFactor); - ensureCapacity(newCapacity); - return true; - } - return false; - } - - protected void postAddEntry(int index) {} - - private int findKey(int key) { - int[] keys = _keys; - byte[] states = _states; - int keyLength = keys.length; - - int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - // try second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - return -1; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - } - } - return -1; - } - - public V remove(int key) { - int[] keys = _keys; - V[] values = _values; - byte[] states = _states; - int keyLength = keys.length; - - int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - V old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - recordRemoval(keyIdx); - return old; - } - // second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (states[keyIdx] == FREE) { - return null; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - V old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - recordRemoval(keyIdx); - return old; - } - } - } - return null; - } - - public int size() { - return _used; - } - - public void clear() { - Arrays.fill(_states, FREE); - this._used = 0; - } - - @SuppressWarnings("unchecked") - public IMapIterator<V> entries() { - return new MapIterator(); - } - - @Override - public String toString() { - int len = size() * 10 + 2; - StringBuilder buf = new StringBuilder(len); - buf.append('{'); - IMapIterator<V> i = entries(); - while (i.next() != -1) { - buf.append(i.getKey()); - buf.append('='); - buf.append(i.getValue()); - if (i.hasNext()) { - buf.append(','); - } - } - buf.append('}'); - return buf.toString(); - } - - private void ensureCapacity(int newCapacity) { - int prime = Primes.findLeastPrimeNumber(newCapacity); - rehash(prime); - this._threshold = Math.round(prime * _loadFactor); - } - - @SuppressWarnings("unchecked") - protected void rehash(int newCapacity) { - int oldCapacity = _keys.length; - if (newCapacity <= oldCapacity) { - throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); - } - final int[] oldKeys = _keys; - final V[] oldValues = _values; - final byte[] oldStates = _states; - int[] newkeys = new int[newCapacity]; - V[] newValues = (V[]) new Object[newCapacity]; - byte[] newStates = new byte[newCapacity]; - int used = 0; - for (int i = 0; i < oldCapacity; i++) { - if (oldStates[i] == FULL) { - used++; - int k = oldKeys[i]; - V v = oldValues[i]; - int hash = keyHash(k); - int keyIdx = hash % newCapacity; - if (newStates[keyIdx] == FULL) {// second hashing - int decr = 1 + (hash % (newCapacity - 2)); - while (newStates[keyIdx] != FREE) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += newCapacity; - } - } - } - newkeys[keyIdx] = k; - newValues[keyIdx] = v; - newStates[keyIdx] = FULL; - } - } - this._keys = newkeys; - this._values = newValues; - this._states = newStates; - this._used = used; - } - - private static int keyHash(int key) { - return key & 0x7fffffff; - } - - protected void recordAccess(int idx) {}; - - protected void recordRemoval(int idx) {}; - - public void writeExternal(ObjectOutput out) throws IOException { - out.writeInt(_threshold); - out.writeInt(_used); - - out.writeInt(_keys.length); - IMapIterator<V> i = entries(); - while (i.next() != -1) { - out.writeInt(i.getKey()); - out.writeObject(i.getValue()); - } - } - - @SuppressWarnings("unchecked") - public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { - this._threshold = in.readInt(); - this._used = in.readInt(); - - int keylen = in.readInt(); - int[] keys = new int[keylen]; - V[] values = (V[]) new Object[keylen]; - byte[] states = new byte[keylen]; - for (int i = 0; i < _used; i++) { - int k = in.readInt(); - V v = (V) in.readObject(); - int hash = keyHash(k); - int keyIdx = hash % keylen; - if (states[keyIdx] != FREE) {// second hash - int decr = 1 + (hash % (keylen - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keylen; - } - if (states[keyIdx] == FREE) { - break; - } - } - } - states[keyIdx] = FULL; - keys[keyIdx] = k; - values[keyIdx] = v; - } - this._keys = keys; - this._values = values; - this._states = states; - } - - public interface IMapIterator<V> { - - public boolean hasNext(); - - public int next(); - - public int getKey(); - - public V getValue(); - - } - - @SuppressWarnings("rawtypes") - private final class MapIterator implements IMapIterator { - - int nextEntry; - int lastEntry = -1; - - MapIterator() { - this.nextEntry = nextEntry(0); - } - - /** find the index of next full entry */ - int nextEntry(int index) { - while (index < _keys.length && _states[index] != FULL) { - index++; - } - return index; - } - - public boolean hasNext() { - return nextEntry < _keys.length; - } - - public int next() { - if (!hasNext()) { - return -1; - } - int curEntry = nextEntry; - this.lastEntry = curEntry; - this.nextEntry = nextEntry(curEntry + 1); - return curEntry; - } - - public int getKey() { - if (lastEntry == -1) { - throw new IllegalStateException(); - } - return _keys[lastEntry]; - } - - public V getValue() { - if (lastEntry == -1) { - throw new IllegalStateException(); - } - return _values[lastEntry]; - } - } - -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/IntOpenHashTable.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/IntOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/IntOpenHashTable.java deleted file mode 100644 index 8d0cdf2..0000000 --- a/core/src/main/java/hivemall/utils/collections/IntOpenHashTable.java +++ /dev/null @@ -1,338 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.collections; - -import hivemall.utils.math.Primes; - -import java.io.Externalizable; -import java.io.IOException; -import java.io.ObjectInput; -import java.io.ObjectOutput; -import java.util.Arrays; -import java.util.HashMap; - -import javax.annotation.Nonnull; - -/** - * An open-addressing hash table with double-hashing that requires less memory to {@link HashMap}. - */ -public final class IntOpenHashTable<V> implements Externalizable { - - public static final float DEFAULT_LOAD_FACTOR = 0.7f; - public static final float DEFAULT_GROW_FACTOR = 2.0f; - - public static final byte FREE = 0; - public static final byte FULL = 1; - public static final byte REMOVED = 2; - - protected/* final */float _loadFactor; - protected/* final */float _growFactor; - - protected int _used = 0; - protected int _threshold; - - protected int[] _keys; - protected V[] _values; - protected byte[] _states; - - public IntOpenHashTable() {} // for Externalizable - - public IntOpenHashTable(int size) { - this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR); - } - - @SuppressWarnings("unchecked") - public IntOpenHashTable(int size, float loadFactor, float growFactor) { - if (size < 1) { - throw new IllegalArgumentException(); - } - this._loadFactor = loadFactor; - this._growFactor = growFactor; - int actualSize = Primes.findLeastPrimeNumber(size); - this._keys = new int[actualSize]; - this._values = (V[]) new Object[actualSize]; - this._states = new byte[actualSize]; - this._threshold = Math.round(actualSize * _loadFactor); - } - - public IntOpenHashTable(@Nonnull int[] keys, @Nonnull V[] values, @Nonnull byte[] states, - int used) { - this._used = used; - this._threshold = keys.length; - this._keys = keys; - this._values = values; - this._states = states; - } - - public int[] getKeys() { - return _keys; - } - - public Object[] getValues() { - return _values; - } - - public byte[] getStates() { - return _states; - } - - public boolean containsKey(final int key) { - return findKey(key) >= 0; - } - - public V get(final int key) { - final int i = findKey(key); - if (i < 0) { - return null; - } - return _values[i]; - } - - public V put(final int key, final V value) { - final int hash = keyHash(key); - int keyLength = _keys.length; - int keyIdx = hash % keyLength; - - boolean expanded = preAddEntry(keyIdx); - if (expanded) { - keyLength = _keys.length; - keyIdx = hash % keyLength; - } - - final int[] keys = _keys; - final V[] values = _values; - final byte[] states = _states; - - if (states[keyIdx] == FULL) { - if (keys[keyIdx] == key) { - V old = values[keyIdx]; - values[keyIdx] = value; - return old; - } - // try second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - break; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - V old = values[keyIdx]; - values[keyIdx] = value; - return old; - } - } - } - keys[keyIdx] = key; - values[keyIdx] = value; - states[keyIdx] = FULL; - ++_used; - return null; - } - - - /** Return weather the required slot is free for new entry */ - protected boolean isFree(int index, int key) { - byte stat = _states[index]; - if (stat == FREE) { - return true; - } - if (stat == REMOVED && _keys[index] == key) { - return true; - } - return false; - } - - /** @return expanded or not */ - protected boolean preAddEntry(int index) { - if ((_used + 1) >= _threshold) {// filled enough - int newCapacity = Math.round(_keys.length * _growFactor); - ensureCapacity(newCapacity); - return true; - } - return false; - } - - protected int findKey(final int key) { - final int[] keys = _keys; - final byte[] states = _states; - final int keyLength = keys.length; - - final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - // try second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (isFree(keyIdx, key)) { - return -1; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - return keyIdx; - } - } - } - return -1; - } - - public V remove(final int key) { - final int[] keys = _keys; - final V[] values = _values; - final byte[] states = _states; - final int keyLength = keys.length; - - final int hash = keyHash(key); - int keyIdx = hash % keyLength; - if (states[keyIdx] != FREE) { - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - V old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - // second hash - int decr = 1 + (hash % (keyLength - 2)); - for (;;) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += keyLength; - } - if (states[keyIdx] == FREE) { - return null; - } - if (states[keyIdx] == FULL && keys[keyIdx] == key) { - V old = values[keyIdx]; - states[keyIdx] = REMOVED; - --_used; - return old; - } - } - } - return null; - } - - public int size() { - return _used; - } - - public int capacity() { - return _keys.length; - } - - public void clear() { - Arrays.fill(_states, FREE); - this._used = 0; - } - - protected void ensureCapacity(int newCapacity) { - int prime = Primes.findLeastPrimeNumber(newCapacity); - rehash(prime); - this._threshold = Math.round(prime * _loadFactor); - } - - @SuppressWarnings("unchecked") - private void rehash(int newCapacity) { - int oldCapacity = _keys.length; - if (newCapacity <= oldCapacity) { - throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity); - } - final int[] newkeys = new int[newCapacity]; - final V[] newValues = (V[]) new Object[newCapacity]; - final byte[] newStates = new byte[newCapacity]; - int used = 0; - for (int i = 0; i < oldCapacity; i++) { - if (_states[i] == FULL) { - used++; - int k = _keys[i]; - V v = _values[i]; - int hash = keyHash(k); - int keyIdx = hash % newCapacity; - if (newStates[keyIdx] == FULL) {// second hashing - int decr = 1 + (hash % (newCapacity - 2)); - while (newStates[keyIdx] != FREE) { - keyIdx -= decr; - if (keyIdx < 0) { - keyIdx += newCapacity; - } - } - } - newStates[keyIdx] = FULL; - newkeys[keyIdx] = k; - newValues[keyIdx] = v; - } - } - this._keys = newkeys; - this._values = newValues; - this._states = newStates; - this._used = used; - } - - private static int keyHash(final int key) { - return key & 0x7fffffff; - } - - @Override - public void writeExternal(ObjectOutput out) throws IOException { - out.writeFloat(_loadFactor); - out.writeFloat(_growFactor); - out.writeInt(_used); - - final int size = _keys.length; - out.writeInt(size); - - for (int i = 0; i < size; i++) { - out.writeInt(_keys[i]); - out.writeObject(_values[i]); - out.writeByte(_states[i]); - } - } - - @SuppressWarnings("unchecked") - @Override - public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { - this._loadFactor = in.readFloat(); - this._growFactor = in.readFloat(); - this._used = in.readInt(); - - final int size = in.readInt(); - final int[] keys = new int[size]; - final Object[] values = new Object[size]; - final byte[] states = new byte[size]; - for (int i = 0; i < size; i++) { - keys[i] = in.readInt(); - values[i] = in.readObject(); - states[i] = in.readByte(); - } - this._threshold = size; - this._keys = keys; - this._values = (V[]) values; - this._states = states; - } - -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/LRUMap.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/LRUMap.java b/core/src/main/java/hivemall/utils/collections/LRUMap.java deleted file mode 100644 index bfae4d7..0000000 --- a/core/src/main/java/hivemall/utils/collections/LRUMap.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.collections; - -import java.util.LinkedHashMap; -import java.util.Map; - -public class LRUMap<K, V> extends LinkedHashMap<K, V> { - private static final long serialVersionUID = -7708264099645977733L; - - private final int cacheSize; - - public LRUMap(int cacheSize) { - this(cacheSize, 0.75f, cacheSize); - } - - public LRUMap(int capacity, float loadFactor, int cacheSize) { - super(capacity, loadFactor, true); - this.cacheSize = cacheSize; - } - - protected boolean removeEldestEntry(Map.Entry<K, V> eldest) { - return size() > cacheSize; - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/OpenHashMap.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/OpenHashMap.java b/core/src/main/java/hivemall/utils/collections/OpenHashMap.java deleted file mode 100644 index b1f5765..0000000 --- a/core/src/main/java/hivemall/utils/collections/OpenHashMap.java +++ /dev/null @@ -1,350 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -// -// Copyright (C) 2010 catchpole.net -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -package hivemall.utils.collections; - -import hivemall.utils.lang.Copyable; -import hivemall.utils.math.MathUtils; - -import java.io.Externalizable; -import java.io.IOException; -import java.io.ObjectInput; -import java.io.ObjectOutput; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -/** - * An optimized Hashed Map implementation. - * <p/> - * <p> - * This Hashmap does not allow nulls to be used as keys or values. - * <p/> - * <p> - * It uses single open hashing arrays sized to binary powers (256, 512 etc) rather than those - * divisable by prime numbers. This allows the hash offset calculation to be a simple binary masking - * operation. - */ -public final class OpenHashMap<K, V> implements Map<K, V>, Externalizable { - private K[] keys; - private V[] values; - - // total number of entries in this table - private int size; - // number of bits for the value table (eg. 8 bits = 256 entries) - private int bits; - // the number of bits in each sweep zone. - private int sweepbits; - // the size of a sweep (2 to the power of sweepbits) - private int sweep; - // the sweepmask used to create sweep zone offsets - private int sweepmask; - - public OpenHashMap() {}// for Externalizable - - public OpenHashMap(int size) { - resize(MathUtils.bitsRequired(size < 256 ? 256 : size)); - } - - public V put(K key, V value) { - if (key == null) { - throw new NullPointerException(this.getClass().getName() + " key"); - } - - for (;;) { - int off = getBucketOffset(key); - int end = off + sweep; - for (; off < end; off++) { - K searchKey = keys[off]; - if (searchKey == null) { - // insert - keys[off] = key; - size++; - - V previous = values[off]; - values[off] = value; - return previous; - } else if (compare(searchKey, key)) { - // replace - V previous = values[off]; - values[off] = value; - return previous; - } - } - resize(this.bits + 1); - } - } - - public V get(Object key) { - int off = getBucketOffset(key); - int end = sweep + off; - for (; off < end; off++) { - if (keys[off] != null && compare(keys[off], key)) { - return values[off]; - } - } - return null; - } - - public V remove(Object key) { - int off = getBucketOffset(key); - int end = sweep + off; - for (; off < end; off++) { - if (keys[off] != null && compare(keys[off], key)) { - keys[off] = null; - V previous = values[off]; - values[off] = null; - size--; - return previous; - } - } - return null; - } - - public int size() { - return size; - } - - public void putAll(Map<? extends K, ? extends V> m) { - for (K key : m.keySet()) { - put(key, m.get(key)); - } - } - - public boolean isEmpty() { - return size == 0; - } - - public boolean containsKey(Object key) { - return get(key) != null; - } - - public boolean containsValue(Object value) { - for (V v : values) { - if (v != null && compare(v, value)) { - return true; - } - } - return false; - } - - public void clear() { - Arrays.fill(keys, null); - Arrays.fill(values, null); - size = 0; - } - - public Set<K> keySet() { - Set<K> set = new HashSet<K>(); - for (K key : keys) { - if (key != null) { - set.add(key); - } - } - return set; - } - - public Collection<V> values() { - Collection<V> list = new ArrayList<V>(); - for (V value : values) { - if (value != null) { - list.add(value); - } - } - return list; - } - - public Set<Entry<K, V>> entrySet() { - Set<Entry<K, V>> set = new HashSet<Entry<K, V>>(); - for (K key : keys) { - if (key != null) { - set.add(new MapEntry<K, V>(this, key)); - } - } - return set; - } - - private static final class MapEntry<K, V> implements Map.Entry<K, V> { - private final Map<K, V> map; - private final K key; - - public MapEntry(Map<K, V> map, K key) { - this.map = map; - this.key = key; - } - - public K getKey() { - return key; - } - - public V getValue() { - return map.get(key); - } - - public V setValue(V value) { - return map.put(key, value); - } - } - - public void writeExternal(ObjectOutput out) throws IOException { - // remember the number of bits - out.writeInt(this.bits); - // remember the total number of entries - out.writeInt(this.size); - // write all entries - for (int x = 0; x < this.keys.length; x++) { - if (keys[x] != null) { - out.writeObject(keys[x]); - out.writeObject(values[x]); - } - } - } - - @SuppressWarnings("unchecked") - public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { - // resize to old bit size - int bitSize = in.readInt(); - if (bitSize != bits) { - resize(bitSize); - } - // read all entries - int size = in.readInt(); - for (int x = 0; x < size; x++) { - this.put((K) in.readObject(), (V) in.readObject()); - } - } - - @Override - public String toString() { - return this.getClass().getSimpleName() + ' ' + this.size; - } - - @SuppressWarnings("unchecked") - private void resize(int bits) { - this.bits = bits; - this.sweepbits = bits / 4; - this.sweep = MathUtils.powerOf(2, sweepbits) * 4; - this.sweepmask = MathUtils.bitMask(bits - this.sweepbits) << sweepbits; - - // remember old values so we can recreate the entries - K[] existingKeys = this.keys; - V[] existingValues = this.values; - - // create the arrays - this.values = (V[]) new Object[MathUtils.powerOf(2, bits) + sweep]; - this.keys = (K[]) new Object[values.length]; - this.size = 0; - - // re-add the previous entries if resizing - if (existingKeys != null) { - for (int x = 0; x < existingKeys.length; x++) { - if (existingKeys[x] != null) { - put(existingKeys[x], existingValues[x]); - } - } - } - } - - private int getBucketOffset(Object key) { - return (key.hashCode() << this.sweepbits) & this.sweepmask; - } - - private static boolean compare(final Object v1, final Object v2) { - return v1 == v2 || v1.equals(v2); - } - - public IMapIterator<K, V> entries() { - return new MapIterator(); - } - - private final class MapIterator implements IMapIterator<K, V> { - - int nextEntry; - int lastEntry = -1; - - MapIterator() { - this.nextEntry = nextEntry(0); - } - - /** find the index of next full entry */ - int nextEntry(int index) { - while (index < keys.length && keys[index] == null) { - index++; - } - return index; - } - - @Override - public boolean hasNext() { - return nextEntry < keys.length; - } - - @Override - public int next() { - free(lastEntry); - if (!hasNext()) { - return -1; - } - int curEntry = nextEntry; - this.lastEntry = curEntry; - this.nextEntry = nextEntry(curEntry + 1); - return curEntry; - } - - @Override - public K getKey() { - return keys[lastEntry]; - } - - @Override - public V getValue() { - return values[lastEntry]; - } - - @Override - public <T extends Copyable<V>> void getValue(T probe) { - probe.copyFrom(getValue()); - } - - private void free(int index) { - if (index >= 0) { - keys[index] = null; - values[index] = null; - } - } - - } -}
