Repository: opennlp Updated Branches: refs/heads/trunk 7d1123a9c -> 4da7f4c64
OPENNLP-887: Replace the Cache class with a LinkedHashMap This closes #9 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/4da7f4c6 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/4da7f4c6 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/4da7f4c6 Branch: refs/heads/trunk Commit: 4da7f4c643d45b57a6a94e34797481b476194336 Parents: 7d1123a Author: smarthi <[email protected]> Authored: Mon Dec 19 12:51:54 2016 -0500 Committer: Jörn Kottmann <[email protected]> Committed: Mon Dec 19 22:16:53 2016 +0100 ---------------------------------------------------------------------- .../main/java/opennlp/tools/ml/BeamSearch.java | 10 +- .../opennlp/tools/ngram/NGramGenerator.java | 4 +- .../tools/parser/ChunkContextGenerator.java | 23 +- .../postag/DefaultPOSContextGenerator.java | 15 +- .../java/opennlp/tools/util/BeamSearch.java | 22 +- .../src/main/java/opennlp/tools/util/Cache.java | 329 +------------------ .../util/featuregen/CachedFeatureGenerator.java | 8 +- 7 files changed, 42 insertions(+), 369 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/4da7f4c6/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java b/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java index 0ed5fe6..209d4af 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java @@ -47,7 +47,7 @@ public class BeamSearch<T> implements SequenceClassificationModel<T> { protected MaxentModel model; private double[] probs; - private Cache contextsCache; + private Cache<String[], double[]> contextsCache; private static final int zeroLog = -100000; /** @@ -66,7 +66,7 @@ public class BeamSearch<T> implements SequenceClassificationModel<T> { this.model = model; if (cacheSize > 0) { - contextsCache = new Cache(cacheSize); + contextsCache = new Cache<>(cacheSize); } this.probs = new double[model.getNumOutcomes()]; @@ -102,7 +102,7 @@ public class BeamSearch<T> implements SequenceClassificationModel<T> { String[] contexts = cg.getContext(i, sequence, outcomes, additionalContext); double[] scores; if (contextsCache != null) { - scores = (double[]) contextsCache.get(contexts); + scores = contextsCache.get(contexts); if (scores == null) { scores = model.eval(contexts, probs); contextsCache.put(contexts,scores); @@ -113,9 +113,7 @@ public class BeamSearch<T> implements SequenceClassificationModel<T> { } double[] temp_scores = new double[scores.length]; - for (int c = 0; c < scores.length; c++) { - temp_scores[c] = scores[c]; - } + System.arraycopy(scores, 0, temp_scores, 0, scores.length); Arrays.sort(temp_scores); http://git-wip-us.apache.org/repos/asf/opennlp/blob/4da7f4c6/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramGenerator.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramGenerator.java index 7e05a93..f001ba2 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramGenerator.java @@ -37,7 +37,7 @@ public class NGramGenerator { */ public static List<String> generate(List<String> input, int n, String separator) { - List<String> outGrams = new ArrayList<String>(); + List<String> outGrams = new ArrayList<>(); for (int i = 0; i < input.size() - (n - 2); i++) { String gram = ""; if ((i + n) <= input.size()) { @@ -59,7 +59,7 @@ public class NGramGenerator { */ public static List<String> generate(char[] input, int n, String separator) { - List<String> outGrams = new ArrayList<String>(); + List<String> outGrams = new ArrayList<>(); for (int i = 0; i < input.length - (n - 2); i++) { String gram = ""; if ((i + n) <= input.length) { http://git-wip-us.apache.org/repos/asf/opennlp/blob/4da7f4c6/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java index 3619c57..7471b3c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java @@ -29,7 +29,7 @@ import opennlp.tools.util.Cache; public class ChunkContextGenerator implements ChunkerContextGenerator { private static final String EOS = "eos"; - private Cache contextsCache; + private Cache<String, String[]> contextsCache; private Object wordsKey; @@ -40,13 +40,13 @@ public class ChunkContextGenerator implements ChunkerContextGenerator { public ChunkContextGenerator(int cacheSize) { super(); if (cacheSize > 0) { - contextsCache = new Cache(cacheSize); + contextsCache = new Cache<>(cacheSize); } } public String[] getContext(Object o) { Object[] data = (Object[]) o; - return getContext(((Integer) data[0]).intValue(), (String[]) data[1], (String[]) data[2], (String[]) data[3]); + return getContext((Integer) data[0], (String[]) data[1], (String[]) data[2], (String[]) data[3]); } public String[] getContext(int i, String[] words, String[] prevDecisions, Object[] ac) { @@ -54,12 +54,11 @@ public class ChunkContextGenerator implements ChunkerContextGenerator { } public String[] getContext(int i, String[] words, String[] tags, String[] preds) { - List<String> features = new ArrayList<String>(19); - int x0 = i; - int x_2 = x0 - 2; - int x_1 = x0 - 1; - int x2 = x0 + 2; - int x1 = x0 + 1; + List<String> features = new ArrayList<>(19); + int x_2 = i - 2; + int x_1 = i - 1; + int x2 = i + 2; + int x1 = i + 1; String w_2,w_1,w0,w1,w2; String t_2,t_1,t0,t1,t2; @@ -90,8 +89,8 @@ public class ChunkContextGenerator implements ChunkerContextGenerator { } // chunkandpostag(0) - t0=tags[x0]; - w0=words[x0]; + t0=tags[i]; + w0=words[i]; // chunkandpostag(1) if (x1 < tags.length) { @@ -113,7 +112,7 @@ public class ChunkContextGenerator implements ChunkerContextGenerator { w2=EOS; } - String cacheKey = x0+t_2+t1+t0+t1+t2+p_2+p_1; + String cacheKey = i +t_2+t1+t0+t1+t2+p_2+p_1; if (contextsCache!= null) { if (wordsKey == words) { String[] contexts = (String[]) contextsCache.get(cacheKey); http://git-wip-us.apache.org/repos/asf/opennlp/blob/4da7f4c6/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java index e570c89..581fed5 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java @@ -39,7 +39,7 @@ public class DefaultPOSContextGenerator implements POSContextGenerator { private static Pattern hasCap = Pattern.compile("[A-Z]"); private static Pattern hasNum = Pattern.compile("[0-9]"); - private Cache contextsCache; + private Cache<String, String[]> contextsCache; private Object wordsKey; private Dictionary dict; @@ -64,12 +64,12 @@ public class DefaultPOSContextGenerator implements POSContextGenerator { this.dict = dict; dictGram = new String[1]; if (cacheSize > 0) { - contextsCache = new Cache(cacheSize); + contextsCache = new Cache<>(cacheSize); } } protected static String[] getPrefixes(String lex) { String[] prefs = new String[PREFIX_LENGTH]; - for (int li = 0, ll = PREFIX_LENGTH; li < ll; li++) { + for (int li = 0; li < PREFIX_LENGTH; li++) { prefs[li] = lex.substring(0, Math.min(li + 1, lex.length())); } return prefs; @@ -77,7 +77,7 @@ public class DefaultPOSContextGenerator implements POSContextGenerator { protected static String[] getSuffixes(String lex) { String[] suffs = new String[SUFFIX_LENGTH]; - for (int li = 0, ll = SUFFIX_LENGTH; li < ll; li++) { + for (int li = 0; li < SUFFIX_LENGTH; li++) { suffs[li] = lex.substring(Math.max(lex.length() - li - 1, 0)); } return suffs; @@ -95,10 +95,9 @@ public class DefaultPOSContextGenerator implements POSContextGenerator { * @return The context for making a pos tag decision at the specified token index given the specified tokens and previous tags. */ public String[] getContext(int index, Object[] tokens, String[] tags) { - String next, nextnext, lex, prev, prevprev; + String next, nextnext = null, lex, prev, prevprev = null; String tagprev, tagprevprev; tagprev = tagprevprev = null; - next = nextnext = lex = prev = prevprev = null; lex = tokens[index].toString(); if (tokens.length > index + 1) { @@ -131,7 +130,7 @@ public class DefaultPOSContextGenerator implements POSContextGenerator { String cacheKey = index+tagprev+tagprevprev; if (contextsCache != null) { if (wordsKey == tokens){ - String[] cachedContexts = (String[]) contextsCache.get(cacheKey); + String[] cachedContexts = contextsCache.get(cacheKey); if (cachedContexts != null) { return cachedContexts; } @@ -141,7 +140,7 @@ public class DefaultPOSContextGenerator implements POSContextGenerator { wordsKey = tokens; } } - List<String> e = new ArrayList<String>(); + List<String> e = new ArrayList<>(); e.add("default"); // add the word itself e.add("w=" + lex); http://git-wip-us.apache.org/repos/asf/opennlp/blob/4da7f4c6/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearch.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearch.java b/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearch.java index 8c460ea..95cbea9 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearch.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearch.java @@ -44,7 +44,7 @@ public class BeamSearch<T> { private SequenceValidator<T> validator; private double[] probs; - private Cache contextsCache; + private Cache<String[], double[]> contextsCache; private static final int zeroLog = -100000; /** @@ -72,7 +72,7 @@ public class BeamSearch<T> { this.validator = validator; if (cacheSize > 0) { - contextsCache = new Cache(cacheSize); + contextsCache = new Cache<>(cacheSize); } this.probs = new double[model.getNumOutcomes()]; @@ -86,13 +86,7 @@ public class BeamSearch<T> { * @see SequenceValidator */ private boolean validSequence(int i, T[] inputSequence, String[] outcomesSequence, String outcome) { - - if (validator != null) { - return validator.validSequence(i, inputSequence, outcomesSequence, outcome); - } - else { - return true; - } + return validator == null || validator.validSequence(i, inputSequence, outcomesSequence, outcome); } public Sequence[] bestSequences(int numSequences, T[] sequence, Object[] additionalContext) { @@ -110,8 +104,8 @@ public class BeamSearch<T> { */ public Sequence[] bestSequences(int numSequences, T[] sequence, Object[] additionalContext, double minSequenceScore) { - Heap<Sequence> prev = new ListHeap<Sequence>(size); - Heap<Sequence> next = new ListHeap<Sequence>(size); + Heap<Sequence> prev = new ListHeap<>(size); + Heap<Sequence> next = new ListHeap<>(size); Heap<Sequence> tmp; prev.add(new Sequence()); @@ -129,7 +123,7 @@ public class BeamSearch<T> { String[] contexts = cg.getContext(i, sequence, outcomes, additionalContext); double[] scores; if (contextsCache != null) { - scores = (double[]) contextsCache.get(contexts); + scores = contextsCache.get(contexts); if (scores == null) { scores = model.eval(contexts, probs); contextsCache.put(contexts,scores); @@ -140,9 +134,7 @@ public class BeamSearch<T> { } double[] temp_scores = new double[scores.length]; - for (int c = 0; c < scores.length; c++) { - temp_scores[c] = scores[c]; - } + System.arraycopy(scores, 0, temp_scores, 0, scores.length); Arrays.sort(temp_scores); http://git-wip-us.apache.org/repos/asf/opennlp/blob/4da7f4c6/opennlp-tools/src/main/java/opennlp/tools/util/Cache.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/Cache.java b/opennlp-tools/src/main/java/opennlp/tools/util/Cache.java index 5ae82fd..fb4d6cb 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/Cache.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/Cache.java @@ -17,337 +17,22 @@ package opennlp.tools.util; -import java.util.Collection; -import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.Map; -import java.util.Set; /** * Provides fixed size, pre-allocated, least recently used replacement cache. */ -@SuppressWarnings("unchecked") -public class Cache implements Map { +public class Cache<K,V> extends LinkedHashMap<K,V> { - /** The element in the linked list which was most recently used. **/ - private DoubleLinkedListElement first; - /** The element in the linked list which was least recently used. **/ - private DoubleLinkedListElement last; - /** Temporary holder of the key of the least-recently-used element. */ - private Object lastKey; - /** Temporary value used in swap. */ - private ObjectWrapper temp; - /** Holds the object wrappers which the keys are mapped to. */ - private ObjectWrapper[] wrappers; - /** Map which stores the keys and values of the cache. */ - private Map map; - /** The size of the cache. */ - private int size; + private int capacity; - /** - * Creates a new cache of the specified size. - * @param size The size of the cache. - */ - public Cache(int size) { - map = new HashMap(size); - wrappers = new ObjectWrapper[size]; - this.size=size; - Object o = new Object(); - first = new DoubleLinkedListElement(null, null, o); - map.put(o, new ObjectWrapper(null, first)); - wrappers[0] = new ObjectWrapper(null, first); - - DoubleLinkedListElement e = first; - for(int i=1; i<size; i++) { - o = new Object(); - e = new DoubleLinkedListElement(e, null, o); - wrappers[i] = new ObjectWrapper(null, e); - map.put(o, wrappers[i]); - e.prev.next = e; - } - last = e; - } - - public void clear() { - map.clear(); - DoubleLinkedListElement e = first; - for (int oi=0;oi<size;oi++) { - wrappers[oi].object=null; - Object o = new Object(); - map.put(o,wrappers[oi]); - e.object = o; - e = e.next; - } - } - - public Object put(Object key, Object value) { - ObjectWrapper o = (ObjectWrapper) map.get(key); - if (o != null) { - /* - * this should never be the case, we only do a put on a cache miss which - * means the current value wasn't in the cache. However if the user screws - * up or wants to use this as a fixed size hash and puts the same thing in - * the list twice then we update the value and more the key to the front of the - * most recently used list. - */ - - // Move o's partner in the list to front - DoubleLinkedListElement e = o.listItem; - - //move to front - if (e != first) { - //remove list item - e.prev.next = e.next; - if (e.next != null) { - e.next.prev = e.prev; - } - else { //were moving last - last = e.prev; - } - - //put list item in front - e.next = first; - first.prev = e; - e.prev = null; - - //update first - first = e; - } - return o.object; - } - // Put o in the front and remove the last one - lastKey = last.object; // store key to remove from hash later - last.object = key; //update list element with new key - - // connect list item to front of list - last.next = first; - first.prev = last; - - // update first and last value - first = last; - last = last.prev; - first.prev = null; - last.next = null; - - // remove old value from cache - temp = (ObjectWrapper) map.remove(lastKey); - //update wrapper - temp.object = value; - temp.listItem = first; - - map.put(key, temp); - return null; - } - - public Object get(Object key) { - ObjectWrapper o = (ObjectWrapper) map.get(key); - if (o != null) { - // Move it to the front - DoubleLinkedListElement e = o.listItem; - - //move to front - if (e != first) { - //remove list item - e.prev.next = e.next; - if (e.next != null) { - e.next.prev = e.prev; - } - else { //were moving last - last = e.prev; - } - //put list item in front - e.next = first; - first.prev = e; - e.prev = null; - - //update first - first = e; - } - return o.object; - } - else { - return null; - } - } - - - public boolean containsKey(Object key) { - return map.containsKey(key); - } - - public boolean containsValue(Object value) { - return map.containsValue(value); - } - - public Set entrySet() { - return map.entrySet(); - } - - public boolean isEmpty() { - return map.isEmpty(); - } - - public Set keySet() { - return map.keySet(); - } - - public void putAll(Map t) { - map.putAll(t); - } - - public Object remove(Object key) { - return map.remove(key); - } - - public int size() { - return map.size(); - } - - public Collection values() { - return map.values(); - } -} - -class ObjectWrapper { - - public Object object; - public DoubleLinkedListElement listItem; - - public ObjectWrapper(Object o,DoubleLinkedListElement li) { - object = o; - listItem = li; - } - - public Object getObject() { - return object; - } - - public DoubleLinkedListElement getListItem() { - return listItem; - } - - public void setObject(Object o) { - object = o; - } - - public void setListItem(DoubleLinkedListElement li) { - listItem = li; - } - - public boolean eqauls(Object o) { - return object.equals(o); - } -} - -class DoubleLinkedListElement { - - public DoubleLinkedListElement prev; - public DoubleLinkedListElement next; - public Object object; - - public DoubleLinkedListElement(DoubleLinkedListElement p, - DoubleLinkedListElement n, - Object o) { - prev = p; - next = n; - object = o; - - if (p != null) { - p.next = this; - } - - if (n != null) { - n.prev = this; - } - } -} - -class DoubleLinkedList { - - DoubleLinkedListElement first; - DoubleLinkedListElement last; - DoubleLinkedListElement current; - - public DoubleLinkedList() { - first = null; - last = null; - current = null; - } - - public void addFirst(Object o) { - first = new DoubleLinkedListElement(null, first, o); - - if (current.next == null) { - last = current; - } - } - - public void addLast(Object o) { - last = new DoubleLinkedListElement(last, null, o); - - if (current.prev == null) { - first = current; - } - } - - public void insert(Object o) { - if (current == null) { - current = new DoubleLinkedListElement(null, null, o); - } - else { - current = new DoubleLinkedListElement(current.prev, current, o); - } - - if (current.prev == null) { - first = current; - } - - if (current.next == null) { - last = current; - } - } - - public DoubleLinkedListElement getFirst() { - current = first; - return first; - } - - public DoubleLinkedListElement getLast() { - current = last; - return last; - } - - public DoubleLinkedListElement getCurrent() { - return current; - } - - public DoubleLinkedListElement next() { - if (current.next != null) { - current = current.next; - } - return current; - } - - public DoubleLinkedListElement prev() { - if (current.prev != null) { - current = current.prev; - } - return current; + public Cache(final int capacity) { + this.capacity = capacity; } @Override - public String toString() { - DoubleLinkedListElement e = first; - String s = "[" + e.object.toString(); - - e = e.next; - - while (e != null) { - s = s + ", " + e.object.toString(); - e = e.next; - } - - s = s + "]"; - - return s; + protected boolean removeEldestEntry(Map.Entry<K,V> eldest) { + return this.size() > this.capacity; } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/4da7f4c6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGenerator.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGenerator.java index 2bdec5b..afb0a2c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGenerator.java @@ -32,14 +32,14 @@ public class CachedFeatureGenerator implements AdaptiveFeatureGenerator { private String[] prevTokens; - private Cache contextsCache; + private Cache<Integer, List<String>> contextsCache; private long numberOfCacheHits; private long numberOfCacheMisses; public CachedFeatureGenerator(AdaptiveFeatureGenerator... generators) { this.generator = new AggregatedFeatureGenerator(generators); - contextsCache = new Cache(100); + contextsCache = new Cache<>(100); } @SuppressWarnings("unchecked") @@ -49,7 +49,7 @@ public class CachedFeatureGenerator implements AdaptiveFeatureGenerator { List<String> cacheFeatures; if (tokens == prevTokens) { - cacheFeatures = (List<String>) contextsCache.get(index); + cacheFeatures = contextsCache.get(index); if (cacheFeatures != null) { numberOfCacheHits++; @@ -62,7 +62,7 @@ public class CachedFeatureGenerator implements AdaptiveFeatureGenerator { prevTokens = tokens; } - cacheFeatures = new ArrayList<String>(); + cacheFeatures = new ArrayList<>(); numberOfCacheMisses++;
