Author: yonik
Date: Wed Jan 23 20:41:38 2008
New Revision: 614793
URL: http://svn.apache.org/viewvc?rev=614793&view=rev
Log:
SOLR-466: add CharArrayMap, update SynonymMap to use char[] rather than String
Tokens
Added:
lucene/solr/trunk/src/java/org/apache/solr/util/CharArrayMap.java (with
props)
lucene/solr/trunk/src/test/org/apache/solr/util/TestCharArrayMap.java
(with props)
Modified:
lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java
lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymMap.java
lucene/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java
Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java?rev=614793&r1=614792&r2=614793&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java
(original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java Wed
Jan 23 20:41:38 2008
@@ -39,13 +39,11 @@
public class SynonymFilter extends TokenFilter {
private final SynonymMap map; // Map<String, SynonymMap>
- private final boolean ignoreCase;
- private Iterator replacement; // iterator over generated tokens
+ private Iterator<Token> replacement; // iterator over generated tokens
- public SynonymFilter(TokenStream in, SynonymMap map, boolean ignoreCase) {
+ public SynonymFilter(TokenStream in, SynonymMap map) {
super(in);
this.map = map;
- this.ignoreCase = ignoreCase;
}
@@ -66,26 +64,26 @@
* merging token streams to preserve token positions.
* - preserve original positionIncrement of first matched token
*/
- public Token next() throws IOException {
+ @Override
+ public Token next(Token target) throws IOException {
while (true) {
// if there are any generated tokens, return them... don't try any
// matches against them, as we specifically don't want recursion.
if (replacement!=null && replacement.hasNext()) {
- return (Token)replacement.next();
+ return replacement.next();
}
// common case fast-path of first token not matching anything
- Token firstTok = nextTok();
- if (firstTok ==null) return null;
- String str = ignoreCase ? firstTok.termText().toLowerCase() :
firstTok.termText();
- Object o = map.submap!=null ? map.submap.get(str) : null;
- if (o == null) return firstTok;
+ Token firstTok = nextTok(target);
+ if (firstTok == null) return null;
+ SynonymMap result = map.submap!=null ?
map.submap.get(firstTok.termBuffer(), 0, firstTok.termLength()) : null;
+ if (result == null) return firstTok;
// OK, we matched a token, so find the longest match.
- matched = new LinkedList();
+ matched = new LinkedList<Token>();
- SynonymMap result = match((SynonymMap)o);
+ result = match(result);
if (result==null) {
// no match, simply return the first token read.
@@ -93,13 +91,13 @@
}
// reuse, or create new one each time?
- ArrayList generated = new ArrayList(result.synonyms.length +
matched.size() + 1);
+ ArrayList<Token> generated = new ArrayList<Token>(result.synonyms.length
+ matched.size() + 1);
//
// there was a match... let's generate the new tokens, merging
// in the matched tokens (position increments need adjusting)
//
- Token lastTok = matched.isEmpty() ? firstTok : (Token)matched.getLast();
+ Token lastTok = matched.isEmpty() ? firstTok : matched.getLast();
boolean includeOrig = result.includeOrig();
Token origTok = includeOrig ? firstTok : null;
@@ -109,7 +107,8 @@
for (int i=0; i<result.synonyms.length; i++) {
Token repTok = result.synonyms[i];
- Token newTok = new Token(repTok.termText(), firstTok.startOffset(),
lastTok.endOffset(), firstTok.type());
+ Token newTok = new Token(firstTok.startOffset(), lastTok.endOffset(),
firstTok.type());
+ newTok.setTermBuffer(repTok.termBuffer(), 0, repTok.termLength());
repPos += repTok.getPositionIncrement();
if (i==0) repPos=origPos; // make position of first token equal to
original
@@ -118,7 +117,7 @@
origTok.setPositionIncrement(origPos-pos);
generated.add(origTok);
pos += origTok.getPositionIncrement();
- origTok = matched.isEmpty() ? null : (Token)matched.removeFirst();
+ origTok = matched.isEmpty() ? null : matched.removeFirst();
if (origTok != null) origPos += origTok.getPositionIncrement();
}
@@ -132,7 +131,7 @@
origTok.setPositionIncrement(origPos-pos);
generated.add(origTok);
pos += origTok.getPositionIncrement();
- origTok = matched.isEmpty() ? null : (Token)matched.removeFirst();
+ origTok = matched.isEmpty() ? null : matched.removeFirst();
if (origTok != null) origPos += origTok.getPositionIncrement();
}
@@ -152,21 +151,27 @@
// Defer creation of the buffer until the first time it is used to
// optimize short fields with no matches.
//
- private LinkedList buffer;
- private LinkedList matched;
-
- // TODO: use ArrayList for better performance?
+ private LinkedList<Token> buffer;
+ private LinkedList<Token> matched;
private Token nextTok() throws IOException {
if (buffer!=null && !buffer.isEmpty()) {
- return (Token)buffer.removeFirst();
+ return buffer.removeFirst();
} else {
return input.next();
}
}
+ private Token nextTok(Token target) throws IOException {
+ if (buffer!=null && !buffer.isEmpty()) {
+ return buffer.removeFirst();
+ } else {
+ return input.next(target);
+ }
+ }
+
private void pushTok(Token t) {
- if (buffer==null) buffer=new LinkedList();
+ if (buffer==null) buffer=new LinkedList<Token>();
buffer.addFirst(t);
}
@@ -177,9 +182,7 @@
Token tok = nextTok();
if (tok != null) {
// check for positionIncrement!=1? if>1, should not match, if==0,
check multiple at this level?
- String str = ignoreCase ? tok.termText().toLowerCase() :
tok.termText();
-
- SynonymMap subMap = (SynonymMap)map.submap.get(str);
+ SynonymMap subMap = map.submap.get(tok.termBuffer(), 0,
tok.termLength());
if (subMap != null) {
// recurse
Modified:
lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilterFactory.java?rev=614793&r1=614792&r2=614793&view=diff
==============================================================================
---
lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
(original)
+++
lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
Wed Jan 23 20:41:38 2008
@@ -35,8 +35,8 @@
public void inform(ResourceLoader loader) {
String synonyms = args.get("synonyms");
- ignoreCase = getBoolean("ignoreCase",false);
- expand = getBoolean("expand",true);
+ boolean ignoreCase = getBoolean("ignoreCase", false);
+ boolean expand = getBoolean("expand", true);
if (synonyms != null) {
List<String> wlist=null;
@@ -45,8 +45,8 @@
} catch (IOException e) {
throw new RuntimeException(e);
}
- synMap = new SynonymMap();
- parseRules(wlist, synMap, "=>", ",", ignoreCase,expand);
+ synMap = new SynonymMap(ignoreCase);
+ parseRules(wlist, synMap, "=>", ",", expand);
if (wlist.size()<=20) {
SolrCore.log.fine("SynonymMap "+synonyms +":"+synMap);
}
@@ -54,10 +54,8 @@
}
private SynonymMap synMap;
- private boolean ignoreCase;
- private boolean expand;
- private static void parseRules(List<String> rules, SynonymMap map, String
mappingSep, String synSep, boolean ignoreCase, boolean expansion) {
+ private static void parseRules(List<String> rules, SynonymMap map, String
mappingSep, String synSep, boolean expansion) {
int count=0;
for (String rule : rules) {
// To use regexes, we need an expression that specifies an odd number of
chars.
@@ -91,10 +89,11 @@
for (List<String> fromToks : source) {
count++;
for (List<String> toToks : target) {
- map.add(ignoreCase ? StrUtils.toLower(fromToks) : fromToks,
+ map.add(fromToks,
SynonymMap.makeTokens(toToks),
includeOrig,
- true);
+ true
+ );
}
}
}
@@ -114,7 +113,7 @@
public SynonymFilter create(TokenStream input) {
- return new SynonymFilter(input,synMap,ignoreCase);
+ return new SynonymFilter(input,synMap);
}
Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymMap.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymMap.java?rev=614793&r1=614792&r2=614793&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymMap.java
(original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymMap.java Wed Jan
23 20:41:38 2008
@@ -18,6 +18,7 @@
package org.apache.solr.analysis;
import org.apache.lucene.analysis.Token;
+import org.apache.solr.util.CharArrayMap;
import java.util.*;
@@ -26,13 +27,20 @@
* @version $Id$
*/
public class SynonymMap {
- Map submap; // recursive: Map<String, SynonymMap>
+ CharArrayMap<SynonymMap> submap; // recursive: Map<String, SynonymMap>
Token[] synonyms;
int flags;
static final int INCLUDE_ORIG=0x01;
+ static final int IGNORE_CASE=0x02;
+
+ public SynonymMap() {}
+ public SynonymMap(boolean ignoreCase) {
+ if (ignoreCase) flags |= IGNORE_CASE;
+ }
public boolean includeOrig() { return (flags & INCLUDE_ORIG) != 0; }
+ public boolean ignoreCase() { return (flags & IGNORE_CASE) != 0; }
/**
* @param singleMatch List<String>, the sequence of strings to match
@@ -40,17 +48,17 @@
* @param includeOrig sets a flag on this mapping signaling the generation
of matched tokens in addition to the replacement tokens
* @param mergeExisting merge the replacement tokens with any other mappings
that exist
*/
- public void add(List singleMatch, List replacement, boolean includeOrig,
boolean mergeExisting) {
+ public void add(List<String> singleMatch, List<Token> replacement, boolean
includeOrig, boolean mergeExisting) {
SynonymMap currMap = this;
- for (Iterator iter = singleMatch.iterator(); iter.hasNext();) {
- String str = (String)iter.next();
+ for (String str : singleMatch) {
if (currMap.submap==null) {
- currMap.submap = new HashMap(1);
+ currMap.submap = new CharArrayMap<SynonymMap>(1, ignoreCase());
}
- SynonymMap map = (SynonymMap)currMap.submap.get(str);
+ SynonymMap map = currMap.submap.get(str);
if (map==null) {
map = new SynonymMap();
+ map.flags |= flags & IGNORE_CASE;
currMap.submap.put(str, map);
}
@@ -68,7 +76,7 @@
public String toString() {
- StringBuffer sb = new StringBuffer("<");
+ StringBuilder sb = new StringBuilder("<");
if (synonyms!=null) {
sb.append("[");
for (int i=0; i<synonyms.length; i++) {
@@ -88,10 +96,12 @@
/** Produces a List<Token> from a List<String> */
- public static List makeTokens(List strings) {
- List ret = new ArrayList(strings.size());
- for (Iterator iter = strings.iterator(); iter.hasNext();) {
- Token newTok = new Token((String)iter.next(),0,0,"SYNONYM");
+ public static List<Token> makeTokens(List<String> strings) {
+ List<Token> ret = new ArrayList<Token>(strings.size());
+ for (String str : strings) {
+ //Token newTok = new Token(str,0,0,"SYNONYM");
+ Token newTok = new Token(0,0,"SYNONYM");
+ newTok.setTermBuffer(str.toCharArray(), 0, str.length());
ret.add(newTok);
}
return ret;
@@ -106,8 +116,8 @@
* Example: [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2]
(a,n means a has posInc=n)
*
*/
- public static List mergeTokens(List lst1, List lst2) {
- ArrayList result = new ArrayList();
+ public static List<Token> mergeTokens(List<Token> lst1, List<Token> lst2) {
+ ArrayList<Token> result = new ArrayList<Token>();
if (lst1 ==null || lst2 ==null) {
if (lst2 != null) result.addAll(lst2);
if (lst1 != null) result.addAll(lst1);
@@ -115,27 +125,29 @@
}
int pos=0;
- Iterator iter1=lst1.iterator();
- Iterator iter2=lst2.iterator();
- Token tok1 = iter1.hasNext() ? (Token)iter1.next() : null;
- Token tok2 = iter2.hasNext() ? (Token)iter2.next() : null;
+ Iterator<Token> iter1=lst1.iterator();
+ Iterator<Token> iter2=lst2.iterator();
+ Token tok1 = iter1.hasNext() ? iter1.next() : null;
+ Token tok2 = iter2.hasNext() ? iter2.next() : null;
int pos1 = tok1!=null ? tok1.getPositionIncrement() : 0;
int pos2 = tok2!=null ? tok2.getPositionIncrement() : 0;
while(tok1!=null || tok2!=null) {
while (tok1 != null && (pos1 <= pos2 || tok2==null)) {
- Token tok = new Token(tok1.termText(), tok1.startOffset(),
tok1.endOffset(), tok1.type());
+ Token tok = new Token(tok1.startOffset(), tok1.endOffset(),
tok1.type());
+ tok.setTermBuffer(tok1.termBuffer(), 0, tok1.termLength());
tok.setPositionIncrement(pos1-pos);
result.add(tok);
pos=pos1;
- tok1 = iter1.hasNext() ? (Token)iter1.next() : null;
+ tok1 = iter1.hasNext() ? iter1.next() : null;
pos1 += tok1!=null ? tok1.getPositionIncrement() : 0;
}
while (tok2 != null && (pos2 <= pos1 || tok1==null)) {
- Token tok = new Token(tok2.termText(), tok2.startOffset(),
tok2.endOffset(), tok2.type());
+ Token tok = new Token(tok2.startOffset(), tok2.endOffset(),
tok2.type());
+ tok.setTermBuffer(tok2.termBuffer(), 0, tok2.termLength());
tok.setPositionIncrement(pos2-pos);
result.add(tok);
pos=pos2;
- tok2 = iter2.hasNext() ? (Token)iter2.next() : null;
+ tok2 = iter2.hasNext() ? iter2.next() : null;
pos2 += tok2!=null ? tok2.getPositionIncrement() : 0;
}
}
Added: lucene/solr/trunk/src/java/org/apache/solr/util/CharArrayMap.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/util/CharArrayMap.java?rev=614793&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/util/CharArrayMap.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/util/CharArrayMap.java Wed Jan
23 20:41:38 2008
@@ -0,0 +1,411 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util;
+
+import java.util.*;
+import java.io.Serializable;
+
+/**
+ * A simple class that stores key Strings as char[]'s in a
+ * hash table. Note that this is not a general purpose
+ * class. For example, it cannot remove items from the
+ * map, nor does it resize its hash table to be smaller,
+ * etc. It is designed to be quick to retrieve items
+ * by char[] keys without the necessity of converting
+ * to a String first.
+ */
+
+public class CharArrayMap<V> extends AbstractMap<String, V>
+ implements Map<String, V>, Cloneable,
Serializable
+{
+ private final static int INIT_SIZE = 2;
+ private char[][] keys;
+ private Object[] values;
+ private int count;
+ private final boolean ignoreCase;
+
+ /** Create map with enough capacity to hold startSize
+ * terms */
+ public CharArrayMap(int initialCapacity, boolean ignoreCase) {
+ this.ignoreCase = ignoreCase;
+ int size = INIT_SIZE;
+ // load factor of .75, inverse is 1.25, or x+x/4
+ initialCapacity = initialCapacity + (initialCapacity >>2);
+ while(size <= initialCapacity)
+ size <<= 1;
+ keys = new char[size][];
+ values = new Object[size];
+ }
+
+ public boolean ignoreCase() {
+ return ignoreCase;
+ }
+
+ public V get(char[] key) {
+ return get(key, 0, key.length);
+ }
+
+ public V get(char[] key, int off, int len) {
+ return (V)values[getSlot(key, off, len)];
+ }
+
+ public V get(CharSequence key) {
+ return (V)values[getSlot(key)];
+ }
+
+ @Override
+ public V get(Object key) {
+ return (V)values[getSlot(key)];
+ }
+
+ @Override
+ public boolean containsKey(Object s) {
+ return keys[getSlot(s)] != null;
+ }
+
+ @Override
+ public boolean containsValue(Object value) {
+ if (value == null) {
+ // search for key with a null value
+ for (int i=0; i<keys.length; i++) {
+ if (keys[i] != null && values[i] == null) return true;
+ }
+ return false;
+ }
+
+ for (int i=0; i<values.length; i++) {
+ Object val = values[i];
+ if (val != null && value.equals(val)) return true;
+ }
+ return false;
+ }
+
+
+ private int getSlot(Object key) {
+ if (key instanceof char[]) {
+ char[] keyc = (char[])key;
+ return getSlot(keyc, 0, keyc.length);
+ }
+ return getSlot((CharSequence)key);
+ }
+
+ private int getSlot(char[] key, int off, int len) {
+ int code = getHashCode(key, len);
+ int pos = code & (keys.length-1);
+ char[] key2 = keys[pos];
+ if (key2 != null && !equals(key, off, len, key2)) {
+ final int inc = ((code>>8)+code)|1;
+ do {
+ code += inc;
+ pos = code & (keys.length-1);
+ key2 = keys[pos];
+ } while (key2 != null && !equals(key, off, len, key2));
+ }
+ return pos;
+ }
+
+ /** Returns true if the String is in the set */
+ private int getSlot(CharSequence key) {
+ int code = getHashCode(key);
+ int pos = code & (keys.length-1);
+ char[] key2 = keys[pos];
+ if (key2 != null && !equals(key, key2)) {
+ final int inc = ((code>>8)+code)|1;
+ do {
+ code += inc;
+ pos = code & (keys.length-1);
+ key2 = keys[pos];
+ } while (key2 != null && !equals(key, key2));
+ }
+ return pos;
+ }
+
+ public V put(CharSequence key, V val) {
+ return put(key.toString(), val); // could be more efficient
+ }
+
+ @Override
+ public V put(String key, V val) {
+ return put(key.toCharArray(), val);
+ }
+
+ /** Add this key,val pair to the map.
+ * The char[] key is directly used, no copy is made.
+ * If ignoreCase is true for this Map, the key array will be directly
modified.
+ * The user should never modify the key after calling this method.
+ */
+ public V put(char[] key, Object val) {
+ if (ignoreCase)
+ for(int i=0;i< key.length;i++)
+ key[i] = Character.toLowerCase(key[i]);
+ int slot = getSlot(key, 0, key.length);
+ if (keys[slot] == null) count++;
+ Object prev = values[slot];
+ keys[slot] = key;
+ values[slot] = val;
+
+ if (count + (count>>2) >= keys.length) {
+ rehash();
+ }
+
+ return (V)prev;
+ }
+
+
+ private boolean equals(char[] text1, int off, int len, char[] text2) {
+ if (len != text2.length)
+ return false;
+ if (ignoreCase) {
+ for(int i=0;i<len;i++) {
+ if (Character.toLowerCase(text1[off+i]) != text2[i])
+ return false;
+ }
+ } else {
+ for(int i=0;i<len;i++) {
+ if (text1[off+i] != text2[i])
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private boolean equals(CharSequence text1, char[] text2) {
+ int len = text1.length();
+ if (len != text2.length)
+ return false;
+ if (ignoreCase) {
+ for(int i=0;i<len;i++) {
+ if (Character.toLowerCase(text1.charAt(i)) != text2[i])
+ return false;
+ }
+ } else {
+ for(int i=0;i<len;i++) {
+ if (text1.charAt(i) != text2[i])
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private void rehash() {
+ final int newSize = 2* keys.length;
+ char[][] oldEntries = keys;
+ Object[] oldValues = values;
+ keys = new char[newSize][];
+ values = new Object[newSize];
+
+ for(int i=0;i<oldEntries.length;i++) {
+ char[] key = oldEntries[i];
+ if (key != null) {
+ // todo: could be faster... no need to compare keys on collision
+ // since they are unique
+ int newSlot = getSlot(key,0,key.length);
+ keys[newSlot] = key;
+ values[newSlot] = oldValues[i];
+ }
+ }
+ }
+
+ private int getHashCode(char[] text, int len) {
+ int code = 0;
+ if (ignoreCase) {
+ for (int i=0; i<len; i++) {
+ code = code*31 + Character.toLowerCase(text[i]);
+ }
+ } else {
+ for (int i=0; i<len; i++) {
+ code = code*31 + text[i];
+ }
+ }
+ return code;
+ }
+
+ private int getHashCode(CharSequence text) {
+ int code;
+ if (ignoreCase) {
+ code = 0;
+ int len = text.length();
+ for (int i=0; i<len; i++) {
+ code = code*31 + Character.toLowerCase(text.charAt(i));
+ }
+ } else {
+ if (false && text instanceof String) {
+ code = text.hashCode();
+ } else {
+ code = 0;
+ int len = text.length();
+ for (int i=0; i<len; i++) {
+ code = code*31 + text.charAt(i);
+ }
+ }
+ }
+ return code;
+ }
+
+ @Override
+ public int size() {
+ return count;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return count==0;
+ }
+
+ @Override
+ public void clear() {
+ count = 0;
+ Arrays.fill(keys,null);
+ Arrays.fill(values,null);
+ }
+
+ @Override
+ public Set<Entry<String, V>> entrySet() {
+ return new EntrySet();
+ }
+
+ /** Returns an EntryIterator over this Map. */
+ public EntryIterator iterator() {
+ return new EntryIterator();
+ }
+
+ /** public iterator class so efficient methods are exposed to users */
+ public class EntryIterator implements Iterator<Map.Entry<String,V>> {
+ int pos=-1;
+ int lastPos;
+
+ EntryIterator() {
+ goNext();
+ }
+
+ private void goNext() {
+ lastPos = pos;
+ pos++;
+ while (pos < keys.length && keys[pos] == null) pos++;
+ }
+
+ public boolean hasNext() {
+ return pos < keys.length;
+ }
+
+ /** gets the next key... do not modify the returned char[] */
+ public char[] nextKey() {
+ goNext();
+ return keys[lastPos];
+ }
+
+ /** gets the next key as a newly created String object */
+ public String nextKeyString() {
+ return new String(nextKey());
+ }
+
+ /** returns the value associated with the last key returned */
+ public V currentValue() {
+ return (V)values[lastPos];
+ }
+
+ /** sets the value associated with the last key returned */
+ public V setValue(V value) {
+ V old = (V)values[lastPos];
+ values[lastPos] = value;
+ return old;
+ }
+
+ /** Returns an Entry<String,V> object created on the fly...
+ * use nextCharArray() + currentValie() for better efficiency. */
+ public Map.Entry<String,V> next() {
+ goNext();
+ return new MapEntry(lastPos);
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+
+ private class MapEntry implements Map.Entry<String,V> {
+ final int pos;
+
+ MapEntry(int pos) {
+ this.pos = pos;
+ }
+
+ public char[] getCharArr() {
+ return keys[pos];
+ }
+
+ public String getKey() {
+ return new String(getCharArr());
+ }
+
+ public V getValue() {
+ return (V)values[pos];
+ }
+
+ public V setValue(V value) {
+ V old = (V)values[pos];
+ values[pos] = value;
+ return old;
+ }
+
+ public String toString() {
+ return getKey() + '=' + getValue();
+ }
+ }
+
+
+
+ private class EntrySet extends AbstractSet<Map.Entry<String, V>> {
+ public EntryIterator iterator() {
+ return new EntryIterator();
+ }
+ public boolean contains(Object o) {
+ if (!(o instanceof Map.Entry))
+ return false;
+ Map.Entry e = (Map.Entry)o;
+ Object key = e.getKey();
+ if (key==null) return false; // we don't support null keys
+ Object val = e.getValue();
+ Object v = get(key);
+ return v==null ? val==null : v.equals(val);
+ }
+ public boolean remove(Object o) {
+ throw new UnsupportedOperationException();
+ }
+ public int size() {
+ return count;
+ }
+ public void clear() {
+ CharArrayMap.this.clear();
+ }
+ }
+
+ @Override
+ public Object clone() {
+ CharArrayMap<V> map = null;
+ try {
+ map = (CharArrayMap<V>)super.clone();
+ map.keys = keys.clone();
+ map.values = keys.clone();
+ } catch (CloneNotSupportedException e) {
+ // impossible
+ }
+ return map;
+ }
+}
Propchange: lucene/solr/trunk/src/java/org/apache/solr/util/CharArrayMap.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/java/org/apache/solr/util/CharArrayMap.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: lucene/solr/trunk/src/java/org/apache/solr/util/CharArrayMap.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Modified:
lucene/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java?rev=614793&r1=614792&r2=614793&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java
(original)
+++ lucene/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java
Wed Jan 23 20:41:38 2008
@@ -48,7 +48,7 @@
}
};
- SynonymFilter sf = new SynonymFilter(ts, dict, true);
+ SynonymFilter sf = new SynonymFilter(ts, dict);
while(true) {
Token t = sf.next();
Added: lucene/solr/trunk/src/test/org/apache/solr/util/TestCharArrayMap.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/util/TestCharArrayMap.java?rev=614793&view=auto
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/util/TestCharArrayMap.java
(added)
+++ lucene/solr/trunk/src/test/org/apache/solr/util/TestCharArrayMap.java Wed
Jan 23 20:41:38 2008
@@ -0,0 +1,208 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util;
+
+import junit.framework.TestCase;
+
+import java.util.*;
+
+import org.apache.lucene.analysis.StopAnalyzer;
+
+public class TestCharArrayMap extends TestCase {
+ Random r = new Random(0);
+
+ public void doRandom(int iter, boolean ignoreCase) {
+ CharArrayMap map = new CharArrayMap(1,ignoreCase);
+ HashMap hmap = new HashMap();
+
+ char[] key;
+ for (int i=0; i<iter; i++) {
+ int len = r.nextInt(5);
+ key = new char[len];
+ for (int j=0; j<key.length; j++) {
+ key[j] = (char)r.nextInt(127);
+ }
+ String keyStr = new String(key);
+ String hmapKey = ignoreCase ? keyStr.toLowerCase() : keyStr;
+
+ int val = r.nextInt();
+
+ Object o1 = map.put(key, val);
+ Object o2 = hmap.put(hmapKey,val);
+ assertEquals(o1,o2);
+
+ // add it again with the string method
+ assertEquals(val, map.put(keyStr,val));
+
+ assertEquals(val, map.get(key,0,key.length));
+ assertEquals(val, map.get(key));
+ assertEquals(val, map.get(keyStr));
+
+ assertEquals(hmap.size(), map.size());
+ }
+
+ assertEquals(map,hmap);
+ assertEquals(hmap, map);
+ }
+
+ public void testCharArrayMap() {
+ for (int i=0; i<5; i++) { // pump this up for more random testing
+ doRandom(1000,false);
+ doRandom(1000,true);
+ }
+ }
+
+ public void testMethods() {
+ CharArrayMap<Integer> cm = new CharArrayMap<Integer>(2,false);
+ HashMap<String,Integer> hm = new HashMap<String,Integer>();
+ hm.put("foo",1);
+ hm.put("bar",2);
+ cm.putAll(hm);
+ assertEquals(hm, cm);
+ assertEquals(cm, hm);
+ hm.put("baz", 3);
+ assertFalse(hm.equals(cm));
+ assertFalse(cm.equals(hm));
+ assertTrue(cm.equals(cm));
+ cm.putAll(hm);
+ assertEquals(hm, cm);
+
+ Iterator<Map.Entry<String,Integer>> iter1 = cm.entrySet().iterator();
+ int n=0;
+ while (iter1.hasNext()) {
+ Map.Entry<String,Integer> entry = iter1.next();
+ String key = entry.getKey();
+ Integer val = entry.getValue();
+ assertEquals(hm.get(key), val);
+ entry.setValue(val*100);
+ assertEquals(val*100, (int)cm.get(key));
+ n++;
+ }
+ assertEquals(hm.size(), n);
+ cm.clear();
+ cm.putAll(hm);
+
+ CharArrayMap<Integer>.EntryIterator iter2 = cm.iterator();
+ n=0;
+ while (iter2.hasNext()) {
+ char[] keyc = iter2.nextKey();
+ Integer val = iter2.currentValue();
+ assertEquals(hm.get(new String(keyc)), val);
+ iter2.setValue(val*100);
+ assertEquals(val*100, (int)cm.get(keyc));
+ n++;
+ }
+ assertEquals(hm.size(), n);
+
+ cm.clear();
+ assertEquals(0, cm.size());
+ assertTrue(cm.isEmpty());
+ }
+
+
+ // performance test vs HashMap<String,Object>
+ // HashMap will have an edge because we are testing with
+ // non-dynamically created keys and String caches hashCode
+ public static void main(String[] args) {
+ int a=0;
+ String impl = args[a++].intern(); // hash OR chars OR char
+ int iter1 = Integer.parseInt(args[a++]); // iterations of put()
+ int iter2 = Integer.parseInt(args[a++]); // iterations of get()
+
+ int ret=0;
+ long start = System.currentTimeMillis();
+ String[] stopwords = StopAnalyzer.ENGLISH_STOP_WORDS;
+ // words = "this is a different test to see what is really going on
here... I hope it works well but I'm not sure it will".split(" ");
+ char[][] stopwordschars = new char[stopwords.length][];
+ for (int i=0; i<stopwords.length; i++) {
+ stopwordschars[i] = stopwords[i].toCharArray();
+ }
+
+ String[] testwords = "now is the time for all good men to come to the aid
of their country".split(" ");
+ // testwords = "this is a different test to see what is really going on
here... I hope it works well but I'm not sure it will".split(" ");
+ char[][] testwordchars = new char[testwords.length][];
+
+ for (int i=0; i<testwordchars.length; i++) {
+ testwordchars[i] = testwords[i].toCharArray();
+ }
+
+ HashMap<String,Integer> hm=null;
+ CharArrayMap<Integer> cm=null;
+
+ if (impl=="hash") {
+ for (int i=0; i<iter1; i++) {
+
+ hm = new HashMap<String,Integer>();
+ int v=0;
+ for (String word : stopwords) {
+ hm.put(word, ++v);
+ }
+ ret += hm.size();
+ }
+ } else if (impl=="chars") {
+ for (int i=0; i<iter1; i++) {
+ cm = new CharArrayMap<Integer>(2,false);
+ int v=0;
+ for (String s : stopwords) {
+ cm.put(s,++v);
+ }
+ ret += cm.size();
+ }
+ } else if (impl=="char") {
+ for (int i=0; i<iter1; i++) {
+ cm = new CharArrayMap<Integer>(2,false);
+ int v=0;
+ for (char[] s : stopwordschars) {
+ cm.put(s,++v);
+ }
+ ret += cm.size();
+ }
+ }
+
+
+ if (impl=="hash") {
+ for (int i=0; i<iter2; i++) {
+ for (String word : testwords) {
+ Integer v = hm.get(word);
+ ret += v==null ? 0 : v;
+ }
+ }
+ } else if (impl=="chars") {
+ for (int i=0; i<iter2; i++) {
+ for (String word : testwords) {
+ Integer v = cm.get(word);
+ ret += v==null ? 0 : v;
+ }
+ }
+ } else if (impl=="char") {
+ for (int i=0; i<iter2; i++) {
+ for (char[] word : testwordchars) {
+ Integer v = cm.get(word);
+ ret += v==null ? 0 : v;
+ }
+ }
+ }
+
+ long end = System.currentTimeMillis();
+
+ System.out.println("result=" + ret);
+ System.out.println("time=" +(end-start));
+ }
+
+}
+
Propchange:
lucene/solr/trunk/src/test/org/apache/solr/util/TestCharArrayMap.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange:
lucene/solr/trunk/src/test/org/apache/solr/util/TestCharArrayMap.java
------------------------------------------------------------------------------
svn:executable = *
Propchange:
lucene/solr/trunk/src/test/org/apache/solr/util/TestCharArrayMap.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL