http://git-wip-us.apache.org/repos/asf/mahout/blob/49ad8cb4/core/src/main/java-templates/org/apache/mahout/math/map/OpenKeyTypeObjectHashMap.java.t ---------------------------------------------------------------------- diff --git a/core/src/main/java-templates/org/apache/mahout/math/map/OpenKeyTypeObjectHashMap.java.t b/core/src/main/java-templates/org/apache/mahout/math/map/OpenKeyTypeObjectHashMap.java.t new file mode 100644 index 0000000..18ff8da --- /dev/null +++ b/core/src/main/java-templates/org/apache/mahout/math/map/OpenKeyTypeObjectHashMap.java.t @@ -0,0 +1,548 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* +Copyright � 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.map; + +import java.util.Arrays; +import java.util.List; + +import org.apache.mahout.math.function.${keyTypeCap}ObjectProcedure; +import org.apache.mahout.math.function.${keyTypeCap}Procedure; +import org.apache.mahout.math.list.${keyTypeCap}ArrayList; + +public class Open${keyTypeCap}ObjectHashMap<T> extends Abstract${keyTypeCap}ObjectMap<T> { + + private static final byte FREE = 0; + private static final byte FULL = 1; + private static final byte REMOVED = 2; + + /** The hash table keys. */ + private ${keyType}[] table; + + /** The hash table values. */ + private T[] values; + + /** The state of each hash table entry (FREE, FULL, REMOVED). */ + private byte[] state; + + /** The number of table entries in state==FREE. */ + private int freeEntries; + + /** Constructs an empty map with default capacity and default load factors. */ + public Open${keyTypeCap}ObjectHashMap() { + this(DEFAULT_CAPACITY); + } + + /** + * Constructs an empty map with the specified initial capacity and default load factors. + * + * @param initialCapacity the initial capacity of the map. + * @throws IllegalArgumentException if the initial capacity is less than zero. + */ + public Open${keyTypeCap}ObjectHashMap(int initialCapacity) { + this(initialCapacity, DEFAULT_MIN_LOAD_FACTOR, DEFAULT_MAX_LOAD_FACTOR); + } + + /** + * Constructs an empty map with the specified initial capacity and the specified minimum and maximum load factor. + * + * @param initialCapacity the initial capacity. + * @param minLoadFactor the minimum load factor. + * @param maxLoadFactor the maximum load factor. + * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || + * (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= + * maxLoadFactor)</tt>. + */ + public Open${keyTypeCap}ObjectHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) { + setUp(initialCapacity, minLoadFactor, maxLoadFactor); + } + + /** Removes all (key,value) associations from the receiver. Implicitly calls <tt>trimToSize()</tt>. */ + @Override + public void clear() { + Arrays.fill(state, FREE); + Arrays.fill(values, null); // delta + + this.distinct = 0; + this.freeEntries = table.length; // delta + trimToSize(); + } + + /** + * Returns a deep copy of the receiver. + * + * @return a deep copy of the receiver. + */ + @SuppressWarnings("unchecked") + @Override + public Open${keyTypeCap}ObjectHashMap<T> clone() { + Open${keyTypeCap}ObjectHashMap<T> copy = (Open${keyTypeCap}ObjectHashMap<T>) super.clone(); + copy.table = copy.table.clone(); + copy.values = copy.values.clone(); + copy.state = copy.state.clone(); + return copy; + } + + /** + * Returns <tt>true</tt> if the receiver contains the specified key. + * + * @return <tt>true</tt> if the receiver contains the specified key. + */ + @Override + public boolean containsKey(${keyType} key) { + return indexOfKey(key) >= 0; + } + + /** + * Returns <tt>true</tt> if the receiver contains the specified value. + * + * @return <tt>true</tt> if the receiver contains the specified value. + */ + @Override + public boolean containsValue(T value) { + return indexOfValue(value) >= 0; + } + + /** + * Ensures that the receiver can hold at least the specified number of associations without needing to allocate new + * internal memory. If necessary, allocates new internal memory and increases the capacity of the receiver. <p> This + * method never need be called; it is for performance tuning only. Calling this method before <tt>put()</tt>ing a + * large number of associations boosts performance, because the receiver will grow only once instead of potentially + * many times and hash collisions get less probable. + * + * @param minCapacity the desired minimum capacity. + */ + @Override + public void ensureCapacity(int minCapacity) { + if (table.length < minCapacity) { + int newCapacity = nextPrime(minCapacity); + rehash(newCapacity); + } + } + + /** + * Applies a procedure to each key of the receiver, if any. Note: Iterates over the keys in no particular order. + * Subclasses can define a particular order, for example, "sorted by key". All methods which <i>can</i> be expressed + * in terms of this method (most methods can) <i>must guarantee</i> to use the <i>same</i> order defined by this + * method, even if it is no particular order. This is necessary so that, for example, methods <tt>keys</tt> and + * <tt>values</tt> will yield association pairs, not two uncorrelated lists. + * + * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise + * continues. + * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. + */ + @Override + public boolean forEachKey(${keyTypeCap}Procedure procedure) { + for (int i = table.length; i-- > 0;) { + if (state[i] == FULL && !procedure.apply(table[i])) { + return false; + } + } + return true; + } + + /** + * Applies a procedure to each (key,value) pair of the receiver, if any. Iteration order is guaranteed to be + * <i>identical</i> to the order used by method {@link #forEachKey(${keyTypeCap}Procedure)}. + * + * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise + * continues. + * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. + */ + @Override + public boolean forEachPair(${keyTypeCap}ObjectProcedure<T> procedure) { + for (int i = table.length; i-- > 0;) { + if (state[i] == FULL && !procedure.apply(table[i], values[i])) { + return false; + } + } + return true; + } + + /** + * Returns the value associated with the specified key. It is often a good idea to first check with + * containsKey(${keyType}) whether the given key has a value associated or not, i.e. whether there exists an association + * for the given key or not. + * + * @param key the key to be searched for. + * @return the value associated with the specified key; <tt>null</tt> if no such key is present. + */ + @Override + public T get(${keyType} key) { + final int i = indexOfKey(key); + if (i < 0) { + return null; + } //not contained + return values[i]; + } + + /** + * @param key the key to be added to the receiver. + * @return the index where the key would need to be inserted, if it is not already contained. Returns -index-1 if the + * key is already contained at slot index. Therefore, if the returned index < 0, then it is already contained + * at slot -index-1. If the returned index >= 0, then it is NOT already contained and should be inserted at + * slot index. + */ + protected int indexOfInsertion(${keyType} key) { + final int length = table.length; + + final int hash = HashFunctions.hash(key) & 0x7FFFFFFF; + int i = hash % length; + int decrement = hash % (length - 2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html + //int decrement = (hash / length) % length; + if (decrement == 0) { + decrement = 1; + } + + // stop if we find a removed or free slot, or if we find the key itself + // do NOT skip over removed slots (yes, open addressing is like that...) + while (state[i] == FULL && table[i] != key) { + i -= decrement; + //hashCollisions++; + if (i < 0) { + i += length; + } + } + + if (state[i] == REMOVED) { + // stop if we find a free slot, or if we find the key itself. + // do skip over removed slots (yes, open addressing is like that...) + // assertion: there is at least one FREE slot. + final int j = i; + while (state[i] != FREE && (state[i] == REMOVED || table[i] != key)) { + i -= decrement; + //hashCollisions++; + if (i < 0) { + i += length; + } + } + if (state[i] == FREE) { + i = j; + } + } + + + if (state[i] == FULL) { + // key already contained at slot i. + // return a negative number identifying the slot. + return -i - 1; + } + // not already contained, should be inserted at slot i. + // return a number >= 0 identifying the slot. + return i; + } + + /** + * @param key the key to be searched in the receiver. + * @return the index where the key is contained in the receiver, returns -1 if the key was not found. + */ + protected int indexOfKey(${keyType} key) { + final int length = table.length; + + final int hash = HashFunctions.hash(key) & 0x7FFFFFFF; + int i = hash % length; + int decrement = hash % (length - 2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html + //int decrement = (hash / length) % length; + if (decrement == 0) { + decrement = 1; + } + + // stop if we find a free slot, or if we find the key itself. + // do skip over removed slots (yes, open addressing is like that...) + while (state[i] != FREE && (state[i] == REMOVED || table[i] != key)) { + i -= decrement; + //hashCollisions++; + if (i < 0) { + i += length; + } + } + + if (state[i] == FREE) { + return -1; + } // not found + return i; //found, return index where key is contained + } + + /** + * @param value the value to be searched in the receiver. + * @return the index where the value is contained in the receiver, returns -1 if the value was not found. + */ + protected int indexOfValue(T value) { + T[] val = values; + byte[] stat = state; + + for (int i = stat.length; --i >= 0;) { + if (stat[i] == FULL && val[i] == value) { + return i; + } + } + + return -1; // not found + } + + /** + * Fills all keys contained in the receiver into the specified list. Fills the list, starting at index 0. After this + * call returns the specified list has a new size that equals <tt>this.size()</tt>. Iteration order is guaranteed to + * be <i>identical</i> to the order used by method {@link #forEachKey(${keyTypeCap}Procedure)}. <p> This method can be used to + * iterate over the keys of the receiver. + * + * @param list the list to be filled, can have any size. + */ + @Override + public void keys(${keyTypeCap}ArrayList list) { + list.setSize(distinct); + ${keyType}[] elements = list.elements(); + + int j = 0; + for (int i = table.length; i-- > 0;) { + if (state[i] == FULL) { + elements[j++] = table[i]; + } + } + } + + /** + * Fills all pairs satisfying a given condition into the specified lists. Fills into the lists, starting at index 0. + * After this call returns the specified lists both have a new size, the number of pairs satisfying the condition. + * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link #forEachKey(${keyTypeCap}Procedure)}. + * <p> <b>Example:</b> <br> + * <pre> + * ${keyTypeCap}ObjectProcedure condition = new ${keyTypeCap}ObjectProcedure() { // match even keys only + * public boolean apply(${keyType} key, Object value) { return key%2==0; } + * } + * keys = (8,7,6), values = (1,2,2) --> keyList = (6,8), valueList = (2,1)</tt> + * </pre> + * + * @param condition the condition to be matched. Takes the current key as first and the current value as second + * argument. + * @param keyList the list to be filled with keys, can have any size. + * @param valueList the list to be filled with values, can have any size. + */ + @Override + public void pairsMatching(${keyTypeCap}ObjectProcedure<T> condition, + ${keyTypeCap}ArrayList keyList, + List<T> valueList) { + keyList.clear(); + valueList.clear(); + + for (int i = table.length; i-- > 0;) { + if (state[i] == FULL && condition.apply(table[i], values[i])) { + keyList.add(table[i]); + valueList.add(values[i]); + } + } + } + + /** + * Associates the given key with the given value. Replaces any old <tt>(key,someOtherValue)</tt> association, if + * existing. + * + * @param key the key the value shall be associated with. + * @param value the value to be associated. + * @return <tt>true</tt> if the receiver did not already contain such a key; <tt>false</tt> if the receiver did + * already contain such a key - the new value has now replaced the formerly associated value. + */ + @Override + public boolean put(${keyType} key, T value) { + int i = indexOfInsertion(key); + if (i < 0) { //already contained + i = -i - 1; + this.values[i] = value; + return false; + } + + if (this.distinct > this.highWaterMark) { + int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor); + rehash(newCapacity); + return put(key, value); + } + + this.table[i] = key; + this.values[i] = value; + if (this.state[i] == FREE) { + this.freeEntries--; + } + this.state[i] = FULL; + this.distinct++; + + if (this.freeEntries < 1) { //delta + int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor); + rehash(newCapacity); + } + + return true; + } + + /** + * Rehashes the contents of the receiver into a new table with a smaller or larger capacity. This method is called + * automatically when the number of keys in the receiver exceeds the high water mark or falls below the low water + * mark. + */ + @SuppressWarnings("unchecked") + protected void rehash(int newCapacity) { + int oldCapacity = table.length; + //if (oldCapacity == newCapacity) return; + + ${keyType}[] oldTable = table; + T[] oldValues = values; + byte[] oldState = state; + + this.table = new ${keyType}[newCapacity]; + this.values = (T[]) new Object[newCapacity]; + this.state = new byte[newCapacity]; + + this.lowWaterMark = chooseLowWaterMark(newCapacity, this.minLoadFactor); + this.highWaterMark = chooseHighWaterMark(newCapacity, this.maxLoadFactor); + + this.freeEntries = newCapacity - this.distinct; // delta + + for (int i = oldCapacity; i-- > 0;) { + if (oldState[i] == FULL) { + ${keyType} element = oldTable[i]; + int index = indexOfInsertion(element); + this.table[index] = element; + this.values[index] = oldValues[i]; + this.state[index] = FULL; + } + } + } + + /** + * Removes the given key with its associated element from the receiver, if present. + * + * @param key the key to be removed from the receiver. + * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise. + */ + @Override + public boolean removeKey(${keyType} key) { + int i = indexOfKey(key); + if (i < 0) { + return false; + } // key not contained + + this.state[i] = REMOVED; + this.values[i] = null; // delta + this.distinct--; + + if (this.distinct < this.lowWaterMark) { + int newCapacity = chooseShrinkCapacity(this.distinct, this.minLoadFactor, this.maxLoadFactor); + rehash(newCapacity); + } + + return true; + } + + /** + * Initializes the receiver. + * + * @param initialCapacity the initial capacity of the receiver. + * @param minLoadFactor the minLoadFactor of the receiver. + * @param maxLoadFactor the maxLoadFactor of the receiver. + * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || + * (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= + * maxLoadFactor)</tt>. + */ + @SuppressWarnings("unchecked") + @Override + final protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) { + int capacity = initialCapacity; + super.setUp(capacity, minLoadFactor, maxLoadFactor); + capacity = nextPrime(capacity); + if (capacity == 0) { + capacity = 1; + } // open addressing needs at least one FREE slot at any time. + + this.table = new ${keyType}[capacity]; + this.values = (T[]) new Object[capacity]; + this.state = new byte[capacity]; + + // memory will be exhausted long before this pathological case happens, anyway. + this.minLoadFactor = minLoadFactor; + if (capacity == PrimeFinder.LARGEST_PRIME) { + this.maxLoadFactor = 1.0; + } else { + this.maxLoadFactor = maxLoadFactor; + } + + this.distinct = 0; + this.freeEntries = capacity; // delta + + // lowWaterMark will be established upon first expansion. + // establishing it now (upon instance construction) would immediately make the table shrink upon first put(...). + // After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young. + // See ensureCapacity(...) + this.lowWaterMark = 0; + this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor); + } + + /** + * Trims the capacity of the receiver to be the receiver's current size. Releases any superfluous internal memory. An + * application can use this operation to minimize the storage of the receiver. + */ + @Override + public void trimToSize() { + // * 1.2 because open addressing's performance exponentially degrades beyond that point + // so that even rehashing the table can take very long + int newCapacity = nextPrime((int) (1 + 1.2 * size())); + if (table.length > newCapacity) { + rehash(newCapacity); + } + } + + /** + * Fills all values contained in the receiver into the specified list. Fills the list, starting at index 0. After this + * call returns the specified list has a new size that equals <tt>this.size()</tt>. + * <p> This method can be used to + * iterate over the values of the receiver. + * + * @param list the list to be filled, can have any size. + */ + @Override + public void values(List<T> list) { + list.clear(); + + for (int i = state.length; i-- > 0;) { + if (state[i] == FULL) { + list.add(values[i]); + } + } + } + + /** + * Access for unit tests. + * @param capacity + * @param minLoadFactor + * @param maxLoadFactor + */ + protected void getInternalFactors(int[] capacity, + double[] minLoadFactor, + double[] maxLoadFactor) { + capacity[0] = table.length; + minLoadFactor[0] = this.minLoadFactor; + maxLoadFactor[0] = this.maxLoadFactor; + } +}
http://git-wip-us.apache.org/repos/asf/mahout/blob/49ad8cb4/core/src/main/java-templates/org/apache/mahout/math/map/OpenKeyTypeValueTypeHashMap.java.t ---------------------------------------------------------------------- diff --git a/core/src/main/java-templates/org/apache/mahout/math/map/OpenKeyTypeValueTypeHashMap.java.t b/core/src/main/java-templates/org/apache/mahout/math/map/OpenKeyTypeValueTypeHashMap.java.t new file mode 100644 index 0000000..57d442e --- /dev/null +++ b/core/src/main/java-templates/org/apache/mahout/math/map/OpenKeyTypeValueTypeHashMap.java.t @@ -0,0 +1,632 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* +Copyright � 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.map; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.NoSuchElementException; + +import org.apache.mahout.math.function.${keyTypeCap}${valueTypeCap}Procedure; +import org.apache.mahout.math.function.${keyTypeCap}Procedure; +import org.apache.mahout.math.list.${keyTypeCap}ArrayList; + +#if (${keyType} != ${valueType}) +import org.apache.mahout.math.list.${valueTypeCap}ArrayList; +#end + + +/** + * Open hash map from ${keyType} keys to ${valueType} values. + **/ +public class Open${keyTypeCap}${valueTypeCap}HashMap extends Abstract${keyTypeCap}${valueTypeCap}Map { + protected static final byte FREE = 0; + protected static final byte FULL = 1; + protected static final byte REMOVED = 2; +#if (${keyTypeFloating} == 'true') +#set ($noKeyComment = "${keyTypeCap}.NaN") + protected static final ${keyType} NO_KEY_VALUE = ${keyTypeCap}.NaN; +#else +#set ($noKeyComment = "0") + protected static final ${keyType} NO_KEY_VALUE = 0; +#end + + /** The hash table keys. */ + protected ${keyType}[] table; + + /** The hash table values. */ + protected ${valueType}[] values; + + /** The state of each hash table entry (FREE, FULL, REMOVED). */ + protected byte[] state; + + /** The number of table entries in state==FREE. */ + protected int freeEntries; + + + /** Constructs an empty map with default capacity and default load factors. */ + public Open${keyTypeCap}${valueTypeCap}HashMap() { + this(DEFAULT_CAPACITY); + } + + /** + * Constructs an empty map with the specified initial capacity and default load factors. + * + * @param initialCapacity the initial capacity of the map. + * @throws IllegalArgumentException if the initial capacity is less than zero. + */ + public Open${keyTypeCap}${valueTypeCap}HashMap(int initialCapacity) { + this(initialCapacity, DEFAULT_MIN_LOAD_FACTOR, DEFAULT_MAX_LOAD_FACTOR); + } + + /** + * Constructs an empty map with the specified initial capacity and the specified minimum and maximum load factor. + * + * @param initialCapacity the initial capacity. + * @param minLoadFactor the minimum load factor. + * @param maxLoadFactor the maximum load factor. + * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || + * (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= + * maxLoadFactor)</tt>. + */ + public Open${keyTypeCap}${valueTypeCap}HashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) { + setUp(initialCapacity, minLoadFactor, maxLoadFactor); + } + + /** Removes all (key,value) associations from the receiver. Implicitly calls <tt>trimToSize()</tt>. */ + @Override + public void clear() { + Arrays.fill(this.state, FREE); + distinct = 0; + freeEntries = table.length; // delta + trimToSize(); + } + + /** + * Returns a deep copy of the receiver. + * + * @return a deep copy of the receiver. + */ + @Override + public Object clone() { + Open${keyTypeCap}${valueTypeCap}HashMap copy = (Open${keyTypeCap}${valueTypeCap}HashMap) super.clone(); + copy.table = copy.table.clone(); + copy.values = copy.values.clone(); + copy.state = copy.state.clone(); + return copy; + } + + /** + * Returns <tt>true</tt> if the receiver contains the specified key. + * + * @return <tt>true</tt> if the receiver contains the specified key. + */ + @Override + public boolean containsKey(${keyType} key) { + return indexOfKey(key) >= 0; + } + + /** + * Returns <tt>true</tt> if the receiver contains the specified value. + * + * @return <tt>true</tt> if the receiver contains the specified value. + */ + @Override + public boolean containsValue(${valueType} value) { + return indexOfValue(value) >= 0; + } + + /** + * Ensures that the receiver can hold at least the specified number of associations without needing to allocate new + * internal memory. If necessary, allocates new internal memory and increases the capacity of the receiver. <p> This + * method never need be called; it is for performance tuning only. Calling this method before <tt>put()</tt>ing a + * large number of associations boosts performance, because the receiver will grow only once instead of potentially + * many times and hash collisions get less probable. + * + * @param minCapacity the desired minimum capacity. + */ + @Override + public void ensureCapacity(int minCapacity) { + if (table.length < minCapacity) { + int newCapacity = nextPrime(minCapacity); + rehash(newCapacity); + } + } + + /** + * Applies a procedure to each key of the receiver, if any. Note: Iterates over the keys in no particular order. + * Subclasses can define a particular order, for example, "sorted by key". All methods which <i>can</i> be expressed + * in terms of this method (most methods can) <i>must guarantee</i> to use the <i>same</i> order defined by this + * method, even if it is no particular order. This is necessary so that, for example, methods <tt>keys</tt> and + * <tt>values</tt> will yield association pairs, not two uncorrelated lists. + * + * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise + * continues. + * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. + */ + @Override + public boolean forEachKey(${keyTypeCap}Procedure procedure) { + for (int i = table.length; i-- > 0;) { + if (state[i] == FULL && !procedure.apply(table[i])) { + return false; + } + } + return true; + } + + /** + * Applies a procedure to each (key,value) pair of the receiver, if any. Iteration order is guaranteed to be + * <i>identical</i> to the order used by method {@link #forEachKey(${keyTypeCap}Procedure)}. + * + * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise + * continues. + * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. + */ + @Override + public boolean forEachPair(${keyTypeCap}${valueTypeCap}Procedure procedure) { + for (int i = table.length; i-- > 0;) { + if (state[i] == FULL && !procedure.apply(table[i], values[i])) { + return false; + } + } + return true; + } + + /** + * Returns the value associated with the specified key. It is often a good idea to first check with + * containsKey(${keyType}) whether the given key has a value associated or not, i.e. whether there exists an association + * for the given key or not. + * + * @param key the key to be searched for. + * @return the value associated with the specified key; <tt>0</tt> if no such key is present. + */ + @Override + public ${valueType} get(${keyType} key) { + final int i = indexOfKey(key); + if (i < 0) { + return 0; + } //not contained + return values[i]; + } + + /** + * @param key the key to be added to the receiver. + * @return the index where the key would need to be inserted, if it is not already contained. Returns -index-1 if the + * key is already contained at slot index. Therefore, if the returned index < 0, then it is already contained + * at slot -index-1. If the returned index >= 0, then it is NOT already contained and should be inserted at + * slot index. + */ + protected int indexOfInsertion(${keyType} key) { + final int length = table.length; + + final int hash = HashFunctions.hash(key) & 0x7FFFFFFF; + int i = hash % length; + int decrement = hash % (length - 2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html + //int decrement = (hash / length) % length; + if (decrement == 0) { + decrement = 1; + } + + // stop if we find a removed or free slot, or if we find the key itself + // do NOT skip over removed slots (yes, open addressing is like that...) + while (state[i] == FULL && table[i] != key) { + i -= decrement; + //hashCollisions++; + if (i < 0) { + i += length; + } + } + + if (state[i] == REMOVED) { + // stop if we find a free slot, or if we find the key itself. + // do skip over removed slots (yes, open addressing is like that...) + // assertion: there is at least one FREE slot. + final int j = i; + while (state[i] != FREE && (state[i] == REMOVED || table[i] != key)) { + i -= decrement; + //hashCollisions++; + if (i < 0) { + i += length; + } + } + if (state[i] == FREE) { + i = j; + } + } + + + if (state[i] == FULL) { + // key already contained at slot i. + // return a negative number identifying the slot. + return -i - 1; + } + // not already contained, should be inserted at slot i. + // return a number >= 0 identifying the slot. + return i; + } + + /** + * @param key the key to be searched in the receiver. + * @return the index where the key is contained in the receiver, returns -1 if the key was not found. + */ + protected int indexOfKey(${keyType} key) { + final int length = table.length; + + final int hash = HashFunctions.hash(key) & 0x7FFFFFFF; + int i = hash % length; + int decrement = hash % (length - 2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html + //int decrement = (hash / length) % length; + if (decrement == 0) { + decrement = 1; + } + + // stop if we find a free slot, or if we find the key itself. + // do skip over removed slots (yes, open addressing is like that...) + while (state[i] != FREE && (state[i] == REMOVED || table[i] != key)) { + i -= decrement; + //hashCollisions++; + if (i < 0) { + i += length; + } + } + + if (state[i] == FREE) { + return -1; + } // not found + return i; //found, return index where key is contained + } + + /** + * @param value the value to be searched in the receiver. + * @return the index where the value is contained in the receiver, returns -1 if the value was not found. + */ + protected int indexOfValue(${valueType} value) { + ${valueType}[] val = values; + byte[] stat = state; + + for (int i = stat.length; --i >= 0;) { + if (stat[i] == FULL && val[i] == value) { + return i; + } + } + + return -1; // not found + } + + /** + * Fills all keys contained in the receiver into the specified list. Fills the list, starting at index 0. After this + * call returns the specified list has a new size that equals <tt>this.size()</tt>. Iteration order is guaranteed to + * be <i>identical</i> to the order used by method {@link #forEachKey(${keyTypeCap}Procedure)}. + * <p> This method can be used + * to iterate over the keys of the receiver. + * + * @param list the list to be filled, can have any size. + */ + @Override + public void keys(${keyTypeCap}ArrayList list) { + list.setSize(distinct); + ${keyType} [] elements = list.elements(); + + int j = 0; + for (int i = table.length; i-- > 0;) { + if (state[i] == FULL) { + elements[j++] = table[i]; + } + } + } + + public Iterator<MapElement> iterator() { + return new MapIterator(); + } + + public final class MapElement { + private int offset = -1; + int seen = 0; + + boolean advanceOffset() { + offset++; + while (offset < state.length && state[offset] != FULL) { + offset++; + } + if (offset < state.length) { + seen++; + } + return offset < state.length; + } + + public ${valueType} get() { + return values[offset]; + } + + public ${keyType} index() { + return table[offset]; + } + + public void set(${valueType} value) { + values[offset] = value; + } + } + + public final class MapIterator implements Iterator<MapElement> { + private final MapElement element = new MapElement(); + + private MapIterator() { } + + @Override + public boolean hasNext() { + return element.seen < distinct; + } + + @Override + public MapElement next() { + if (element.advanceOffset()) { + return element; + } + throw new NoSuchElementException(); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } + + /** + * Fills all pairs satisfying a given condition into the specified lists. Fills into the lists, starting at index 0. + * After this call returns the specified lists both have a new size, the number of pairs satisfying the condition. + * Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link + * #forEachKey(${keyTypeCap}Procedure)}. <p> <b>Example:</b> <br> + * <pre> + * ${keyTypeCap}${valueTypeCap}Procedure condition = new ${keyTypeCap}${valueTypeCap}Procedure() { // match even values only + * public boolean apply(${keyType} key, ${valueType} value) { return value%2==0; } + * } + * keys = (8,7,6), values = (1,2,2) --> keyList = (6,8), valueList = (2,1)</tt> + * </pre> + * + * @param condition the condition to be matched. Takes the current key as first and the current value as second + * argument. + * @param keyList the list to be filled with keys, can have any size. + * @param valueList the list to be filled with values, can have any size. + */ + @Override + public void pairsMatching(${keyTypeCap}${valueTypeCap}Procedure condition, + ${keyTypeCap}ArrayList keyList, + ${valueTypeCap}ArrayList valueList) { + keyList.clear(); + valueList.clear(); + + for (int i = table.length; i-- > 0;) { + if (state[i] == FULL && condition.apply(table[i], values[i])) { + keyList.add(table[i]); + valueList.add(values[i]); + } + } + } + + /** + * Associates the given key with the given value. Replaces any old <tt>(key,someOtherValue)</tt> association, if + * existing. + * + * @param key the key the value shall be associated with. + * @param value the value to be associated. + * @return <tt>true</tt> if the receiver did not already contain such a key; <tt>false</tt> if the receiver did + * already contain such a key - the new value has now replaced the formerly associated value. + */ + @Override + public boolean put(${keyType} key, ${valueType} value) { + int i = indexOfInsertion(key); + if (i < 0) { // already contained + i = -i - 1; + this.values[i] = value; + return false; + } + + if (this.distinct > this.highWaterMark) { + int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor); + rehash(newCapacity); + return put(key, value); + } + + this.table[i] = key; + this.values[i] = value; + if (this.state[i] == FREE) { + this.freeEntries--; + } + this.state[i] = FULL; + this.distinct++; + + if (this.freeEntries < 1) { //delta + int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor); + rehash(newCapacity); + } + + return true; + } + + @Override + public ${valueType} adjustOrPutValue(${keyType} key, ${valueType} newValue, ${valueType} incrValue) { + int i = indexOfInsertion(key); + if (i < 0) { //already contained + i = -i - 1; + this.values[i] += incrValue; + return this.values[i]; + } else { + put(key, newValue); + return newValue; + } + } + + /** + * Rehashes the contents of the receiver into a new table with a smaller or larger capacity. This method is called + * automatically when the number of keys in the receiver exceeds the high water mark or falls below the low water + * mark. + */ + protected void rehash(int newCapacity) { + int oldCapacity = table.length; + //if (oldCapacity == newCapacity) return; + + ${keyType}[] oldTable = table; + ${valueType}[] oldValues = values; + byte[] oldState = state; + + this.table = new ${keyType}[newCapacity]; + this.values = new ${valueType}[newCapacity]; + this.state = new byte[newCapacity]; + + this.lowWaterMark = chooseLowWaterMark(newCapacity, this.minLoadFactor); + this.highWaterMark = chooseHighWaterMark(newCapacity, this.maxLoadFactor); + + this.freeEntries = newCapacity - this.distinct; // delta + + for (int i = oldCapacity; i-- > 0;) { + if (oldState[i] == FULL) { + ${keyType} element = oldTable[i]; + int index = indexOfInsertion(element); + this.table[index] = element; + this.values[index] = oldValues[i]; + this.state[index] = FULL; + } + } + } + + /** + * Removes the given key with its associated element from the receiver, if present. + * + * @param key the key to be removed from the receiver. + * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise. + */ + @Override + public boolean removeKey(${keyType} key) { + int i = indexOfKey(key); + if (i < 0) { + return false; + } // key not contained + + this.state[i] = REMOVED; + //this.values[i]=0; // delta + this.distinct--; + + if (this.distinct < this.lowWaterMark) { + int newCapacity = chooseShrinkCapacity(this.distinct, this.minLoadFactor, this.maxLoadFactor); + rehash(newCapacity); + } + + return true; + } + + /** + * Initializes the receiver. + * + * @param initialCapacity the initial capacity of the receiver. + * @param minLoadFactor the minLoadFactor of the receiver. + * @param maxLoadFactor the maxLoadFactor of the receiver. + * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || + * (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= + * maxLoadFactor)</tt>. + */ + @Override + final protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) { + int capacity = initialCapacity; + super.setUp(capacity, minLoadFactor, maxLoadFactor); + capacity = nextPrime(capacity); + if (capacity == 0) { + capacity = 1; + } // open addressing needs at least one FREE slot at any time. + + this.table = new ${keyType}[capacity]; + this.values = new ${valueType}[capacity]; + this.state = new byte[capacity]; + + // memory will be exhausted long before this pathological case happens, anyway. + this.minLoadFactor = minLoadFactor; + if (capacity == PrimeFinder.LARGEST_PRIME) { + this.maxLoadFactor = 1.0; + } else { + this.maxLoadFactor = maxLoadFactor; + } + + this.distinct = 0; + this.freeEntries = capacity; // delta + + // lowWaterMark will be established upon first expansion. + // establishing it now (upon instance construction) would immediately make the table shrink upon first put(...). + // After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young. + // See ensureCapacity(...) + this.lowWaterMark = 0; + this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor); + } + + /** + * Trims the capacity of the receiver to be the receiver's current size. Releases any superfluous internal memory. An + * application can use this operation to minimize the storage of the receiver. + */ + @Override + public void trimToSize() { + // * 1.2 because open addressing's performance exponentially degrades beyond that point + // so that even rehashing the table can take very long + int newCapacity = nextPrime((int) (1 + 1.2 * size())); + if (table.length > newCapacity) { + rehash(newCapacity); + } + } + + /** + * Fills all values contained in the receiver into the specified list. Fills the list, starting at index 0. After this + * call returns the specified list has a new size that equals <tt>this.size()</tt>. Iteration order is guaranteed to + * be <i>identical</i> to the order used by method {@link #forEachKey(${keyTypeCap}Procedure)}. + * <p> This method can be used + * to iterate over the values of the receiver. + * + * @param list the list to be filled, can have any size. + */ + @Override + public void values(${valueTypeCap}ArrayList list) { + list.setSize(distinct); + ${valueType}[] elements = list.elements(); + + int j = 0; + for (int i = state.length; i-- > 0;) { + if (state[i] == FULL) { + elements[j++] = values[i]; + } + } + } + + /** + * Access for unit tests. + * @param capacity + * @param minLoadFactor + * @param maxLoadFactor + */ + protected void getInternalFactors(int[] capacity, + double[] minLoadFactor, + double[] maxLoadFactor) { + capacity[0] = table.length; + minLoadFactor[0] = this.minLoadFactor; + maxLoadFactor[0] = this.maxLoadFactor; + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/49ad8cb4/core/src/main/java-templates/org/apache/mahout/math/map/OpenObjectValueTypeHashMap.java.t ---------------------------------------------------------------------- diff --git a/core/src/main/java-templates/org/apache/mahout/math/map/OpenObjectValueTypeHashMap.java.t b/core/src/main/java-templates/org/apache/mahout/math/map/OpenObjectValueTypeHashMap.java.t new file mode 100644 index 0000000..924c7e2 --- /dev/null +++ b/core/src/main/java-templates/org/apache/mahout/math/map/OpenObjectValueTypeHashMap.java.t @@ -0,0 +1,567 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* +Copyright � 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.map; + +import java.util.Arrays; +import java.util.List; + +import org.apache.mahout.math.function.Object${valueTypeCap}Procedure; +import org.apache.mahout.math.function.ObjectProcedure; +import org.apache.mahout.math.list.${valueTypeCap}ArrayList; + +/** + * Open hash map from Object keys to ${valueType} values. + **/ +public class OpenObject${valueTypeCap}HashMap<T> extends AbstractObject${valueTypeCap}Map<T> { + protected static final byte FREE = 0; + protected static final byte FULL = 1; + protected static final byte REMOVED = 2; + protected static final Object NO_KEY_VALUE = null; + + /** The hash table keys. */ + private Object[] table; + + /** The hash table values. */ + private ${valueType}[] values; + + /** The state of each hash table entry (FREE, FULL, REMOVED). */ + private byte[] state; + + /** The number of table entries in state==FREE. */ + private int freeEntries; + + + /** Constructs an empty map with default capacity and default load factors. */ + public OpenObject${valueTypeCap}HashMap() { + this(DEFAULT_CAPACITY); + } + + /** + * Constructs an empty map with the specified initial capacity and default load factors. + * + * @param initialCapacity the initial capacity of the map. + * @throws IllegalArgumentException if the initial capacity is less than zero. + */ + public OpenObject${valueTypeCap}HashMap(int initialCapacity) { + this(initialCapacity, DEFAULT_MIN_LOAD_FACTOR, DEFAULT_MAX_LOAD_FACTOR); + } + + /** + * Constructs an empty map with the specified initial capacity and the specified minimum and maximum load factor. + * + * @param initialCapacity the initial capacity. + * @param minLoadFactor the minimum load factor. + * @param maxLoadFactor the maximum load factor. + * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || + * (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= + * maxLoadFactor)</tt>. + */ + public OpenObject${valueTypeCap}HashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) { + setUp(initialCapacity, minLoadFactor, maxLoadFactor); + } + + /** Removes all (key,value) associations from the receiver. Implicitly calls <tt>trimToSize()</tt>. */ + @Override + public void clear() { + Arrays.fill(this.state, FREE); + Arrays.fill(this.table, null); + + distinct = 0; + freeEntries = table.length; // delta + trimToSize(); + } + + /** + * Returns a deep copy of the receiver. + * + * @return a deep copy of the receiver. + */ + @Override + @SuppressWarnings("unchecked") + public Object clone() { + OpenObject${valueTypeCap}HashMap copy = (OpenObject${valueTypeCap}HashMap) super.clone(); + copy.table = copy.table.clone(); + copy.values = copy.values.clone(); + copy.state = copy.state.clone(); + return copy; + } + + /** + * Returns <tt>true</tt> if the receiver contains the specified key. + * + * @return <tt>true</tt> if the receiver contains the specified key. + */ + @Override + public boolean containsKey(T key) { + return indexOfKey(key) >= 0; + } + + /** + * Returns <tt>true</tt> if the receiver contains the specified value. + * + * @return <tt>true</tt> if the receiver contains the specified value. + */ + @Override + public boolean containsValue(${valueType} value) { + return indexOfValue(value) >= 0; + } + + /** + * Ensures that the receiver can hold at least the specified number of associations without needing to allocate new + * internal memory. If necessary, allocates new internal memory and increases the capacity of the receiver. <p> This + * method never need be called; it is for performance tuning only. Calling this method before <tt>put()</tt>ing a + * large number of associations boosts performance, because the receiver will grow only once instead of potentially + * many times and hash collisions get less probable. + * + * @param minCapacity the desired minimum capacity. + */ + @Override + public void ensureCapacity(int minCapacity) { + if (table.length < minCapacity) { + int newCapacity = nextPrime(minCapacity); + rehash(newCapacity); + } + } + + /** + * Applies a procedure to each key of the receiver, if any. Note: Iterates over the keys in no particular order. + * Subclasses can define a particular order, for example, "sorted by key". All methods which <i>can</i> be expressed + * in terms of this method (most methods can) <i>must guarantee</i> to use the <i>same</i> order defined by this + * method, even if it is no particular order. This is necessary so that, for example, methods <tt>keys</tt> and + * <tt>values</tt> will yield association pairs, not two uncorrelated lists. + * + * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise + * continues. + * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. + */ + @Override + @SuppressWarnings("unchecked") + public boolean forEachKey(ObjectProcedure<T> procedure) { + for (int i = table.length; i-- > 0;) { + if (state[i] == FULL && !procedure.apply((T)table[i])) { + return false; + } + } + return true; + } + + /** + * Applies a procedure to each (key,value) pair of the receiver, if any. Iteration order is guaranteed to be + * <i>identical</i> to the order used by method {@link #forEachKey(ObjectProcedure)}. + * + * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise + * continues. + * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. + */ + @Override + @SuppressWarnings("unchecked") + public boolean forEachPair(Object${valueTypeCap}Procedure<T> procedure) { + for (int i = table.length; i-- > 0;) { + if (state[i] == FULL && !procedure.apply((T)table[i], values[i])) { + return false; + } + } + return true; + } + + /** + * Returns the value associated with the specified key. It is often a good idea to first check with + * {@link #containsKey(Object)} whether the given key has a value associated or not, + * i.e. whether there exists an association for the given key or not. + * + * @param key the key to be searched for. + * @return the value associated with the specified key; <tt>0</tt> if no such key is present. + */ + @Override + public ${valueType} get(T key) { + final int i = indexOfKey(key); + if (i < 0) { + return 0; + } //not contained + return values[i]; + } + + /** + * @param key the key to be added to the receiver. + * @return the index where the key would need to be inserted, if it is not already contained. Returns -index-1 if the + * key is already contained at slot index. Therefore, if the returned index < 0, then it is already contained + * at slot -index-1. If the returned index >= 0, then it is NOT already contained and should be inserted at + * slot index. + */ + protected int indexOfInsertion(T key) { + final int length = table.length; + + final int hash = key.hashCode() & 0x7FFFFFFF; + int i = hash % length; + int decrement = hash % (length - 2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html + //int decrement = (hash / length) % length; + if (decrement == 0) { + decrement = 1; + } + + // stop if we find a removed or free slot, or if we find the key itself + // do NOT skip over removed slots (yes, open addressing is like that...) + while (state[i] == FULL && !equalsMindTheNull(table[i], key)) { + i -= decrement; + //hashCollisions++; + if (i < 0) { + i += length; + } + } + + if (state[i] == REMOVED) { + // stop if we find a free slot, or if we find the key itself. + // do skip over removed slots (yes, open addressing is like that...) + // assertion: there is at least one FREE slot. + final int j = i; + while (state[i] != FREE && (state[i] == REMOVED || !equalsMindTheNull(table[i], key))) { + i -= decrement; + //hashCollisions++; + if (i < 0) { + i += length; + } + } + if (state[i] == FREE) { + i = j; + } + } + + + if (state[i] == FULL) { + // key already contained at slot i. + // return a negative number identifying the slot. + return -i - 1; + } + // not already contained, should be inserted at slot i. + // return a number >= 0 identifying the slot. + return i; + } + + /** + * @param key the key to be searched in the receiver. + * @return the index where the key is contained in the receiver, returns -1 if the key was not found. + */ + protected int indexOfKey(T key) { + final int length = table.length; + + final int hash = key.hashCode() & 0x7FFFFFFF; + int i = hash % length; + int decrement = hash % (length - 2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html + //int decrement = (hash / length) % length; + if (decrement == 0) { + decrement = 1; + } + + // stop if we find a free slot, or if we find the key itself. + // do skip over removed slots (yes, open addressing is like that...) + while (state[i] != FREE && (state[i] == REMOVED || !equalsMindTheNull(table[i], key))) { + i -= decrement; + //hashCollisions++; + if (i < 0) { + i += length; + } + } + + if (state[i] == FREE) { + return -1; + } // not found + return i; //found, return index where key is contained + } + + /** + * @param value the value to be searched in the receiver. + * @return the index where the value is contained in the receiver, returns -1 if the value was not found. + */ + protected int indexOfValue(${valueType} value) { + ${valueType}[] val = values; + byte[] stat = state; + + for (int i = stat.length; --i >= 0;) { + if (stat[i] == FULL && val[i] == value) { + return i; + } + } + + return -1; // not found + } + + /** + * Fills all keys contained in the receiver into the specified list. Fills the list, starting at index 0. After this + * call returns the specified list has a new size that equals <tt>this.size()</tt>. + * This method can be used + * to iterate over the keys of the receiver. + * + * @param list the list to be filled, can have any size. + */ + @Override + @SuppressWarnings("unchecked") + public void keys(List<T> list) { + list.clear(); + + for (int i = table.length; i-- > 0;) { + if (state[i] == FULL) { + list.add((T)table[i]); + } + } + } + + /** + * Fills all pairs satisfying a given condition into the specified lists. Fills into the lists, starting at index 0. + * After this call returns the specified lists both have a new size, the number of pairs satisfying the condition. + * <p> <b>Example:</b> <br> + * <pre> + * Object${valueTypeCap}Procedure<T> condition = new Object${valueTypeCap}Procedure<T>() { // match even values only + * public boolean apply(T key, ${valueType} value) { return value%2==0; } + * } + * keys = (8,7,6), values = (1,2,2) --> keyList = (6,8), valueList = (2,1)</tt> + * </pre> + * + * @param condition the condition to be matched. Takes the current key as first and the current value as second + * argument. + * @param keyList the list to be filled with keys, can have any size. + * @param valueList the list to be filled with values, can have any size. + */ + @Override + @SuppressWarnings("unchecked") + public void pairsMatching(Object${valueTypeCap}Procedure<T> condition, + List<T> keyList, + ${valueTypeCap}ArrayList valueList) { + keyList.clear(); + valueList.clear(); + + for (int i = table.length; i-- > 0;) { + if (state[i] == FULL && condition.apply((T)table[i], values[i])) { + keyList.add((T)table[i]); + valueList.add(values[i]); + } + } + } + + /** + * Associates the given key with the given value. Replaces any old <tt>(key,someOtherValue)</tt> association, if + * existing. + * + * @param key the key the value shall be associated with. + * @param value the value to be associated. + * @return <tt>true</tt> if the receiver did not already contain such a key; <tt>false</tt> if the receiver did + * already contain such a key - the new value has now replaced the formerly associated value. + */ + @Override + public boolean put(T key, ${valueType} value) { + int i = indexOfInsertion(key); + if (i < 0) { //already contained + i = -i - 1; + this.values[i] = value; + return false; + } + + if (this.distinct > this.highWaterMark) { + int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor); + rehash(newCapacity); + return put(key, value); + } + + this.table[i] = key; + this.values[i] = value; + if (this.state[i] == FREE) { + this.freeEntries--; + } + this.state[i] = FULL; + this.distinct++; + + if (this.freeEntries < 1) { //delta + int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor); + rehash(newCapacity); + } + + return true; + } + + @Override + public ${valueType} adjustOrPutValue(T key, ${valueType} newValue, ${valueType} incrValue) { + int i = indexOfInsertion(key); + if (i < 0) { //already contained + i = -i - 1; + this.values[i] += incrValue; + return this.values[i]; + } else { + put(key, newValue); + return newValue; + } + } + + /** + * Rehashes the contents of the receiver into a new table with a smaller or larger capacity. This method is called + * automatically when the number of keys in the receiver exceeds the high water mark or falls below the low water + * mark. + */ + @SuppressWarnings("unchecked") + protected void rehash(int newCapacity) { + int oldCapacity = table.length; + //if (oldCapacity == newCapacity) return; + + Object[] oldTable = table; + ${valueType}[] oldValues = values; + byte[] oldState = state; + + this.table = new Object[newCapacity]; + this.values = new ${valueType}[newCapacity]; + this.state = new byte[newCapacity]; + + this.lowWaterMark = chooseLowWaterMark(newCapacity, this.minLoadFactor); + this.highWaterMark = chooseHighWaterMark(newCapacity, this.maxLoadFactor); + + this.freeEntries = newCapacity - this.distinct; // delta + + for (int i = oldCapacity; i-- > 0;) { + if (oldState[i] == FULL) { + Object element = oldTable[i]; + int index = indexOfInsertion((T)element); + this.table[index] = element; + this.values[index] = oldValues[i]; + this.state[index] = FULL; + } + } + } + + /** + * Removes the given key with its associated element from the receiver, if present. + * + * @param key the key to be removed from the receiver. + * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise. + */ + @Override + public boolean removeKey(T key) { + int i = indexOfKey(key); + if (i < 0) { + return false; + } // key not contained + + this.state[i] = REMOVED; + //this.values[i]=0; // delta + this.distinct--; + + if (this.distinct < this.lowWaterMark) { + int newCapacity = chooseShrinkCapacity(this.distinct, this.minLoadFactor, this.maxLoadFactor); + rehash(newCapacity); + } + + return true; + } + + /** + * Initializes the receiver. + * + * @param initialCapacity the initial capacity of the receiver. + * @param minLoadFactor the minLoadFactor of the receiver. + * @param maxLoadFactor the maxLoadFactor of the receiver. + * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || + * (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= + * maxLoadFactor)</tt>. + */ + @Override + final protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) { + int capacity = initialCapacity; + super.setUp(capacity, minLoadFactor, maxLoadFactor); + capacity = nextPrime(capacity); + if (capacity == 0) { + capacity = 1; + } // open addressing needs at least one FREE slot at any time. + + this.table = new Object[capacity]; + this.values = new ${valueType}[capacity]; + this.state = new byte[capacity]; + + // memory will be exhausted long before this pathological case happens, anyway. + this.minLoadFactor = minLoadFactor; + if (capacity == PrimeFinder.LARGEST_PRIME) { + this.maxLoadFactor = 1.0; + } else { + this.maxLoadFactor = maxLoadFactor; + } + + this.distinct = 0; + this.freeEntries = capacity; // delta + + // lowWaterMark will be established upon first expansion. + // establishing it now (upon instance construction) would immediately make the table shrink upon first put(...). + // After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young. + // See ensureCapacity(...) + this.lowWaterMark = 0; + this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor); + } + + /** + * Trims the capacity of the receiver to be the receiver's current size. Releases any superfluous internal memory. An + * application can use this operation to minimize the storage of the receiver. + */ + @Override + public void trimToSize() { + // * 1.2 because open addressing's performance exponentially degrades beyond that point + // so that even rehashing the table can take very long + int newCapacity = nextPrime((int) (1 + 1.2 * size())); + if (table.length > newCapacity) { + rehash(newCapacity); + } + } + + /** + * Fills all values contained in the receiver into the specified list. Fills the list, starting at index 0. After this + * call returns the specified list has a new size that equals <tt>this.size()</tt>. + * <p> This method can be used + * to iterate over the values of the receiver. + * + * @param list the list to be filled, can have any size. + */ + @Override + public void values(${valueTypeCap}ArrayList list) { + list.setSize(distinct); + ${valueType}[] elements = list.elements(); + + int j = 0; + for (int i = state.length; i-- > 0;) { + if (state[i] == FULL) { + elements[j++] = values[i]; + } + } + } + + /** + * Access for unit tests. + * @param capacity + * @param minLoadFactor + * @param maxLoadFactor + */ + protected void getInternalFactors(int[] capacity, + double[] minLoadFactor, + double[] maxLoadFactor) { + capacity[0] = table.length; + minLoadFactor[0] = this.minLoadFactor; + maxLoadFactor[0] = this.maxLoadFactor; + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/49ad8cb4/core/src/main/java-templates/org/apache/mahout/math/set/AbstractKeyTypeSet.java.t ---------------------------------------------------------------------- diff --git a/core/src/main/java-templates/org/apache/mahout/math/set/AbstractKeyTypeSet.java.t b/core/src/main/java-templates/org/apache/mahout/math/set/AbstractKeyTypeSet.java.t new file mode 100644 index 0000000..2b451b7 --- /dev/null +++ b/core/src/main/java-templates/org/apache/mahout/math/set/AbstractKeyTypeSet.java.t @@ -0,0 +1,181 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.mahout.math.set; + +import org.apache.mahout.math.function.${keyTypeCap}Procedure; +import org.apache.mahout.math.list.${keyTypeCap}ArrayList; +import java.util.Arrays; +import java.nio.IntBuffer; + +public abstract class Abstract${keyTypeCap}Set extends AbstractSet { + + /** + * Returns <tt>true</tt> if the receiver contains the specified key. + * + * @return <tt>true</tt> if the receiver contains the specified key. + */ + public boolean contains(final ${keyType} key) { + return !forEachKey( + new ${keyTypeCap}Procedure() { + @Override + public boolean apply(${keyType} iterKey) { + return (key != iterKey); + } + } + ); + } + + /** + * Returns a deep copy of the receiver; uses <code>clone()</code> and casts the result. + * + * @return a deep copy of the receiver. + */ + public Abstract${keyTypeCap}Set copy() { + return (Abstract${keyTypeCap}Set) clone(); + } + + public boolean equals(Object obj) { + if (obj == this) { + return true; + } + + if (!(obj instanceof Abstract${keyTypeCap}Set)) { + return false; + } + final Abstract${keyTypeCap}Set other = (Abstract${keyTypeCap}Set) obj; + if (other.size() != size()) { + return false; + } + + return + forEachKey( + new ${keyTypeCap}Procedure() { + @Override + public boolean apply(${keyType} key) { + return other.contains(key); + } + } + ); + } + + public int hashCode() { + final int[] buf = new int[size()]; + forEachKey( + new ${keyTypeCap}Procedure() { + int i = 0; + + @Override + public boolean apply(${keyType} iterKey) { + buf[i++] = HashUtils.hash(iterKey); + return true; + } + } + ); + Arrays.sort(buf); + return IntBuffer.wrap(buf).hashCode(); + } + + /** + * Applies a procedure to each key of the receiver, if any. Note: Iterates over the keys in no particular order. + * Subclasses can define a particular order, for example, "sorted by key". All methods which <i>can</i> be expressed + * in terms of this method (most methods can) <i>must guarantee</i> to use the <i>same</i> order defined by this + * method, even if it is no particular order. This is necessary so that, for example, methods <tt>keys</tt> and + * <tt>values</tt> will yield association pairs, not two uncorrelated lists. + * + * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise + * continues. + * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. + */ + public abstract boolean forEachKey(${keyTypeCap}Procedure procedure); + + /** + * Returns a list filled with all keys contained in the receiver. The returned list has a size that equals + * <tt>this.size()</tt>. Iteration order is guaranteed to be <i>identical</i> to the order used by method {@link + * #forEachKey(${keyTypeCap}Procedure)}. <p> This method can be used to iterate over the keys of the receiver. + * + * @return the keys. + */ + public ${keyTypeCap}ArrayList keys() { + ${keyTypeCap}ArrayList list = new ${keyTypeCap}ArrayList(size()); + keys(list); + return list; + } + + /** + * Fills all keys contained in the receiver into the specified list. Fills the list, starting at index 0. After this + * call returns the specified list has a new size that equals <tt>this.size()</tt>. Iteration order is guaranteed to + * be <i>identical</i> to the order used by method {@link #forEachKey(${keyTypeCap}Procedure)}. + * <p> This method can be used to + * iterate over the keys of the receiver. + * + * @param list the list to be filled, can have any size. + */ + public void keys(final ${keyTypeCap}ArrayList list) { + list.clear(); + forEachKey( + new ${keyTypeCap}Procedure() { + @Override + public boolean apply(${keyType} key) { + list.add(key); + return true; + } + } + ); + } + /** + * Associates the given key with the given value. Replaces any old <tt>(key,someOtherValue)</tt> association, if + * existing. + * + * @param key the key the value shall be associated with. + * @return <tt>true</tt> if the receiver did not already contain such a key; <tt>false</tt> if the receiver did + * already contain such a key - the new value has now replaced the formerly associated value. + */ + public abstract boolean add(${keyType} key); + + /** + * Removes the given key with its associated element from the receiver, if present. + * + * @param key the key to be removed from the receiver. + * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise. + */ + public abstract boolean remove(${keyType} key); + + /** + * Returns a string representation of the receiver, containing the String representation of each key-value pair, + * sorted ascending by key. + */ + public String toString() { + ${keyTypeCap}ArrayList theKeys = keys(); + //theKeys.sort(); + + StringBuilder buf = new StringBuilder(); + buf.append('['); + int maxIndex = theKeys.size() - 1; + for (int i = 0; i <= maxIndex; i++) { + ${keyType} key = theKeys.get(i); + buf.append(String.valueOf(key)); + if (i < maxIndex) { + buf.append(", "); + } + } + buf.append(']'); + return buf.toString(); + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/49ad8cb4/core/src/main/java-templates/org/apache/mahout/math/set/OpenKeyTypeHashSet.java.t ---------------------------------------------------------------------- diff --git a/core/src/main/java-templates/org/apache/mahout/math/set/OpenKeyTypeHashSet.java.t b/core/src/main/java-templates/org/apache/mahout/math/set/OpenKeyTypeHashSet.java.t new file mode 100644 index 0000000..8c4c0f0 --- /dev/null +++ b/core/src/main/java-templates/org/apache/mahout/math/set/OpenKeyTypeHashSet.java.t @@ -0,0 +1,423 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.mahout.math.set; + +import java.util.Arrays; + +import org.apache.mahout.math.function.${keyTypeCap}Procedure; +import org.apache.mahout.math.list.${keyTypeCap}ArrayList; +import org.apache.mahout.math.map.HashFunctions; +import org.apache.mahout.math.map.PrimeFinder; + +/** + * Open hash set of ${keyType} items; + **/ +public class Open${keyTypeCap}HashSet extends Abstract${keyTypeCap}Set { + protected static final byte FREE = 0; + protected static final byte FULL = 1; + protected static final byte REMOVED = 2; +#if (${keyTypeFloating} == 'true') +#set ($noKeyComment = "${keyTypeCap}.NaN") + protected static final ${keyType} NO_KEY_VALUE = ${keyTypeCap}.NaN; +#else +#set ($noKeyComment = "0") + protected static final ${keyType} NO_KEY_VALUE = 0; +#end + + /** The hash table keys. */ + private ${keyType}[] table; + + /** The state of each hash table entry (FREE, FULL, REMOVED). */ + private byte[] state; + + /** The number of table entries in state==FREE. */ + private int freeEntries; + + + /** Constructs an empty map with default capacity and default load factors. */ + public Open${keyTypeCap}HashSet() { + this(DEFAULT_CAPACITY); + } + + /** + * Constructs an empty map with the specified initial capacity and default load factors. + * + * @param initialCapacity the initial capacity of the map. + * @throws IllegalArgumentException if the initial capacity is less than zero. + */ + public Open${keyTypeCap}HashSet(int initialCapacity) { + this(initialCapacity, DEFAULT_MIN_LOAD_FACTOR, DEFAULT_MAX_LOAD_FACTOR); + } + + /** + * Constructs an empty map with the specified initial capacity and the specified minimum and maximum load factor. + * + * @param initialCapacity the initial capacity. + * @param minLoadFactor the minimum load factor. + * @param maxLoadFactor the maximum load factor. + * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || + * (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= + * maxLoadFactor)</tt>. + */ + public Open${keyTypeCap}HashSet(int initialCapacity, double minLoadFactor, double maxLoadFactor) { + setUp(initialCapacity, minLoadFactor, maxLoadFactor); + } + + /** Removes all values associations from the receiver. Implicitly calls <tt>trimToSize()</tt>. */ + @Override + public void clear() { + Arrays.fill(this.state, FREE); + distinct = 0; + freeEntries = table.length; // delta + trimToSize(); + } + + /** + * Returns a deep copy of the receiver. + * + * @return a deep copy of the receiver. + */ + @Override + public Object clone() { + Open${keyTypeCap}HashSet copy = (Open${keyTypeCap}HashSet) super.clone(); + copy.table = copy.table.clone(); + copy.state = copy.state.clone(); + return copy; + } + + /** + * Returns <tt>true</tt> if the receiver contains the specified key. + * + * @return <tt>true</tt> if the receiver contains the specified key. + */ + @Override + public boolean contains(${keyType} key) { + return indexOfKey(key) >= 0; + } + + /** + * Ensures that the receiver can hold at least the specified number of associations without needing to allocate new + * internal memory. If necessary, allocates new internal memory and increases the capacity of the receiver. <p> This + * method never need be called; it is for performance tuning only. Calling this method before <tt>add()</tt>ing a + * large number of associations boosts performance, because the receiver will grow only once instead of potentially + * many times and hash collisions get less probable. + * + * @param minCapacity the desired minimum capacity. + */ + @Override + public void ensureCapacity(int minCapacity) { + if (table.length < minCapacity) { + int newCapacity = nextPrime(minCapacity); + rehash(newCapacity); + } + } + + /** + * Applies a procedure to each key of the receiver, if any. Note: Iterates over the keys in no particular order. + * Subclasses can define a particular order, for example, "sorted by key". All methods which <i>can</i> be expressed + * in terms of this method (most methods can) <i>must guarantee</i> to use the <i>same</i> order defined by this + * method, even if it is no particular order. This is necessary so that, for example, methods <tt>keys</tt> and + * <tt>values</tt> will yield association pairs, not two uncorrelated lists. + * + * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise + * continues. + * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise. + */ + @Override + public boolean forEachKey(${keyTypeCap}Procedure procedure) { + for (int i = table.length; i-- > 0;) { + if (state[i] == FULL) { + if (!procedure.apply(table[i])) { + return false; + } + } + } + return true; + } + + /** + * @param key the key to be added to the receiver. + * @return the index where the key would need to be inserted, if it is not already contained. Returns -index-1 if the + * key is already contained at slot index. Therefore, if the returned index < 0, then it is already contained + * at slot -index-1. If the returned index >= 0, then it is NOT already contained and should be inserted at + * slot index. + */ + protected int indexOfInsertion(${keyType} key) { + final int length = table.length; + + final int hash = HashFunctions.hash(key) & 0x7FFFFFFF; + int i = hash % length; + int decrement = hash % (length - 2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html + //int decrement = (hash / length) % length; + if (decrement == 0) { + decrement = 1; + } + + // stop if we find a removed or free slot, or if we find the key itself + // do NOT skip over removed slots (yes, open addressing is like that...) + while (state[i] == FULL && table[i] != key) { + i -= decrement; + //hashCollisions++; + if (i < 0) { + i += length; + } + } + + if (state[i] == REMOVED) { + // stop if we find a free slot, or if we find the key itself. + // do skip over removed slots (yes, open addressing is like that...) + // assertion: there is at least one FREE slot. + final int j = i; + while (state[i] != FREE && (state[i] == REMOVED || table[i] != key)) { + i -= decrement; + //hashCollisions++; + if (i < 0) { + i += length; + } + } + if (state[i] == FREE) { + i = j; + } + } + + + if (state[i] == FULL) { + // key already contained at slot i. + // return a negative number identifying the slot. + return -i - 1; + } + // not already contained, should be inserted at slot i. + // return a number >= 0 identifying the slot. + return i; + } + + /** + * @param key the key to be searched in the receiver. + * @return the index where the key is contained in the receiver, returns -1 if the key was not found. + */ + protected int indexOfKey(${keyType} key) { + final int length = table.length; + + final int hash = HashFunctions.hash(key) & 0x7FFFFFFF; + int i = hash % length; + int decrement = hash % (length - 2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html + //int decrement = (hash / length) % length; + if (decrement == 0) { + decrement = 1; + } + + // stop if we find a free slot, or if we find the key itself. + // do skip over removed slots (yes, open addressing is like that...) + while (state[i] != FREE && (state[i] == REMOVED || table[i] != key)) { + i -= decrement; + //hashCollisions++; + if (i < 0) { + i += length; + } + } + + if (state[i] == FREE) { + return -1; + } // not found + return i; //found, return index where key is contained + } + + /** + * Fills all keys contained in the receiver into the specified list. Fills the list, starting at index 0. After this + * call returns the specified list has a new size that equals <tt>this.size()</tt>. Iteration order is guaranteed to + * be <i>identical</i> to the order used by method {@link #forEachKey(${keyTypeCap}Procedure)}. + * <p> This method can be used + * to iterate over the keys of the receiver. + * + * @param list the list to be filled, can have any size. + */ + @Override + public void keys(${keyTypeCap}ArrayList list) { + list.setSize(distinct); + ${keyType} [] elements = list.elements(); + + int j = 0; + for (int i = table.length; i-- > 0;) { + if (state[i] == FULL) { + elements[j++] = table[i]; + } + } + } + + /** + * Associates the given key with the given value. Replaces any old <tt>(key,someOtherValue)</tt> association, if + * existing. + * + * @param key the key the value shall be associated with. + * @return <tt>true</tt> if the receiver did not already contain such a key; <tt>false</tt> if the receiver did + * already contain such a key - the new value has now replaced the formerly associated value. + */ + @Override + public boolean add(${keyType} key) { + int i = indexOfInsertion(key); + if (i < 0) { //already contained + //i = -i - 1; + return false; + } + + if (this.distinct > this.highWaterMark) { + int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor); + rehash(newCapacity); + return add(key); + } + + this.table[i] = key; + if (this.state[i] == FREE) { + this.freeEntries--; + } + this.state[i] = FULL; + this.distinct++; + + if (this.freeEntries < 1) { //delta + int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor); + rehash(newCapacity); + } + + return true; + } + + /** + * Rehashes the contents of the receiver into a new table with a smaller or larger capacity. This method is called + * automatically when the number of keys in the receiver exceeds the high water mark or falls below the low water + * mark. + */ + protected void rehash(int newCapacity) { + int oldCapacity = table.length; + //if (oldCapacity == newCapacity) return; + + ${keyType}[] oldTable = table; + byte[] oldState = state; + + this.table = new ${keyType}[newCapacity]; + this.state = new byte[newCapacity]; + + this.lowWaterMark = chooseLowWaterMark(newCapacity, this.minLoadFactor); + this.highWaterMark = chooseHighWaterMark(newCapacity, this.maxLoadFactor); + + this.freeEntries = newCapacity - this.distinct; // delta + + for (int i = oldCapacity; i-- > 0;) { + if (oldState[i] == FULL) { + ${keyType} element = oldTable[i]; + int index = indexOfInsertion(element); + this.table[index] = element; + this.state[index] = FULL; + } + } + } + + /** + * Removes the given key with its associated element from the receiver, if present. + * + * @param key the key to be removed from the receiver. + * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise. + */ + @Override + public boolean remove(${keyType} key) { + int i = indexOfKey(key); + if (i < 0) { + return false; + } // key not contained + + this.state[i] = REMOVED; + this.distinct--; + + if (this.distinct < this.lowWaterMark) { + int newCapacity = chooseShrinkCapacity(this.distinct, this.minLoadFactor, this.maxLoadFactor); + rehash(newCapacity); + } + + return true; + } + + /** + * Initializes the receiver. + * + * @param initialCapacity the initial capacity of the receiver. + * @param minLoadFactor the minLoadFactor of the receiver. + * @param maxLoadFactor the maxLoadFactor of the receiver. + * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || + * (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= + * maxLoadFactor)</tt>. + */ + @Override + final protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) { + int capacity = initialCapacity; + super.setUp(capacity, minLoadFactor, maxLoadFactor); + capacity = nextPrime(capacity); + if (capacity == 0) { + capacity = 1; + } // open addressing needs at least one FREE slot at any time. + + this.table = new ${keyType}[capacity]; + this.state = new byte[capacity]; + + // memory will be exhausted long before this pathological case happens, anyway. + this.minLoadFactor = minLoadFactor; + if (capacity == PrimeFinder.LARGEST_PRIME) { + this.maxLoadFactor = 1.0; + } else { + this.maxLoadFactor = maxLoadFactor; + } + + this.distinct = 0; + this.freeEntries = capacity; // delta + + // lowWaterMark will be established upon first expansion. + // establishing it now (upon instance construction) would immediately make the table shrink upon first put(...). + // After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young. + // See ensureCapacity(...) + this.lowWaterMark = 0; + this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor); + } + + /** + * Trims the capacity of the receiver to be the receiver's current size. Releases any superfluous internal memory. An + * application can use this operation to minimize the storage of the receiver. + */ + @Override + public void trimToSize() { + // * 1.2 because open addressing's performance exponentially degrades beyond that point + // so that even rehashing the table can take very long + int newCapacity = nextPrime((int) (1 + 1.2 * size())); + if (table.length > newCapacity) { + rehash(newCapacity); + } + } + + /** + * Access for unit tests. + * @param capacity + * @param minLoadFactor + * @param maxLoadFactor + */ + protected void getInternalFactors(int[] capacity, + double[] minLoadFactor, + double[] maxLoadFactor) { + capacity[0] = table.length; + minLoadFactor[0] = this.minLoadFactor; + maxLoadFactor[0] = this.maxLoadFactor; + } +}
