My apologies for the errant commit of WordlistLoader... it was a local change (formatting clean-up) and did not intend to commit it.

But here is the change to StopFilter that has been discussed. I went ahead and converted it to a Set. Do we really need makeStopSet to be public? If you have a String[], just call the constructor that takes it would be my recommendation. I'll happily make it protected.

Erik

Begin forwarded message:

From: [EMAIL PROTECTED]
Date: March 9, 2004 7:18:02 PM EST
To: [EMAIL PROTECTED]
Subject: cvs commit: jakarta-lucene/src/java/org/apache/lucene/analysis/de WordlistLoader.java
Reply-To: "Lucene Developers List" <[EMAIL PROTECTED]>


ehatcher 2004/03/09 16:18:02

Modified: src/java/org/apache/lucene/analysis StopFilter.java
src/java/org/apache/lucene/analysis/de WordlistLoader.java
Log:
convert Hashtable to Set, to avoid unnecessary synchronization issues


Revision Changes Path
1.7 +51 -12 jakarta-lucene/src/java/org/apache/lucene/analysis/StopFilter.java


Index: StopFilter.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/analysis/ StopFilter.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- StopFilter.java 5 Dec 2003 14:30:12 -0000 1.6
+++ StopFilter.java 10 Mar 2004 00:18:02 -0000 1.7
@@ -55,31 +55,55 @@
*/


   import java.io.IOException;
  +import java.util.HashSet;
   import java.util.Hashtable;
  +import java.util.Set;

  -/** Removes stop words from a token stream. */
  +/**
  + * Removes stop words from a token stream.
  + */

public final class StopFilter extends TokenFilter {

  -  private Hashtable table;
  +  private Set table;

  -  /** Constructs a filter which removes words from the input
  -   TokenStream that are named in the array of words. */
  +  /**
  +   * Constructs a filter which removes words from the input
  +   * TokenStream that are named in the array of words.
  +   */
     public StopFilter(TokenStream in, String[] stopWords) {
       super(in);
  -    table = makeStopTable(stopWords);
  +    table = makeStopSet(stopWords);
     }

- /** Constructs a filter which removes words from the input
- TokenStream that are named in the Hashtable. */
+ /**
+ * Constructs a filter which removes words from the input
+ * TokenStream that are named in the Hashtable.
+ *
+ * @deprecated Use [EMAIL PROTECTED] #StopFilter(TokenStream, Set)} StopFilter(TokenStream,Map)} instead
+ */
public StopFilter(TokenStream in, Hashtable stopTable) {
super(in);
+ table = stopTable.keySet();
+ }
+
+ /**
+ * Constructs a filter which removes words from the input
+ * TokenStream that are named in the Set.
+ */
+ public StopFilter(TokenStream in, Set stopTable) {
+ super(in);
table = stopTable;
}


- /** Builds a Hashtable from an array of stop words, appropriate for passing
- into the StopFilter constructor. This permits this table construction to
- be cached once when an Analyzer is constructed. */
+ /**
+ * Builds a Hashtable from an array of stop words,
+ * appropriate for passing into the StopFilter constructor.
+ * This permits this table construction to be cached once when
+ * an Analyzer is constructed.
+ *
+ * @deprecated Use [EMAIL PROTECTED] #makeStopSet(String[] makeStopSet) instead.
+ */
public static final Hashtable makeStopTable(String[] stopWords) {
Hashtable stopTable = new Hashtable(stopWords.length);
for (int i = 0; i < stopWords.length; i++)
@@ -87,11 +111,26 @@
return stopTable;
}


- /** Returns the next input Token whose termText() is not a stop word. */
+ /**
+ * Builds a Set from an array of stop words,
+ * appropriate for passing into the StopFilter constructor.
+ * This permits this table construction to be cached once when
+ * an Analyzer is constructed.
+ */
+ public static final Set makeStopSet(String[] stopWords) {
+ Set stopTable = new HashSet(stopWords.length);
+ for (int i = 0; i < stopWords.length; i++)
+ stopTable.add(stopWords[i]);
+ return stopTable;
+ }
+
+ /**
+ * Returns the next input Token whose termText() is not a stop word.
+ */
public final Token next() throws IOException {
// return the first non-stop word found
for (Token token = input.next(); token != null; token = input.next())
- if (table.get(token.termText) == null)
+ if (!table.contains(token.termText))
return token;
// reached EOS -- return null
return null;




1.5 +60 -61 jakarta-lucene/src/java/org/apache/lucene/analysis/de/ WordlistLoader.java

Index: WordlistLoader.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/analysis/de/ WordlistLoader.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- WordlistLoader.java 18 Aug 2002 17:33:16 -0000 1.4
+++ WordlistLoader.java 10 Mar 2004 00:18:02 -0000 1.5
@@ -68,71 +68,70 @@
* @author Gerhard Schwarz
* @version $Id$
*/
-public class WordlistLoader
-{
- /**
- * @param path Path to the wordlist
- * @param wordfile Name of the wordlist
- */
- public static Hashtable getWordtable( String path, String wordfile ) {
- if ( path == null || wordfile == null ) {
- return new Hashtable();
- }
- return getWordtable( new File( path, wordfile ) );
+public class WordlistLoader {
+ /**
+ * @param path Path to the wordlist
+ * @param wordfile Name of the wordlist
+ */
+ public static Hashtable getWordtable(String path, String wordfile) {
+ if (path == null || wordfile == null) {
+ return new Hashtable();
}
+ return getWordtable(new File(path, wordfile));
+ }


  -    /**
  -     * @param wordfile  Complete path to the wordlist
  -     */
  -    public static Hashtable getWordtable( String wordfile ) {
  -     if ( wordfile == null ) {
  -         return new Hashtable();
  -     }
  -     return getWordtable( new File( wordfile ) );
  +  /**
  +   * @param wordfile  Complete path to the wordlist
  +   */
  +  public static Hashtable getWordtable(String wordfile) {
  +    if (wordfile == null) {
  +      return new Hashtable();
       }
  +    return getWordtable(new File(wordfile));
  +  }

- /**
- * @param wordfile File containing the wordlist
- */
- public static Hashtable getWordtable( File wordfile ) {
- if ( wordfile == null ) {
- return new Hashtable();
- }
- Hashtable result = null;
- try {
- LineNumberReader lnr = new LineNumberReader( new FileReader( wordfile ) );
- String word = null;
- String[] stopwords = new String[100];
- int wordcount = 0;
- while ( ( word = lnr.readLine() ) != null ) {
- wordcount++;
- if ( wordcount == stopwords.length ) {
- String[] tmp = new String[stopwords.length + 50];
- System.arraycopy( stopwords, 0, tmp, 0, wordcount );
- stopwords = tmp;
- }
- stopwords[wordcount-1] = word;
- }
- result = makeWordTable( stopwords, wordcount );
- }
- // On error, use an empty table
- catch ( IOException e ) {
- result = new Hashtable();
- }
- return result;
+ /**
+ * @param wordfile File containing the wordlist
+ */
+ public static Hashtable getWordtable(File wordfile) {
+ if (wordfile == null) {
+ return new Hashtable();
}
+ Hashtable result = null;
+ try {
+ LineNumberReader lnr = new LineNumberReader(new FileReader(wordfile));
+ String word = null;
+ String[] stopwords = new String[100];
+ int wordcount = 0;
+ while ((word = lnr.readLine()) != null) {
+ wordcount++;
+ if (wordcount == stopwords.length) {
+ String[] tmp = new String[stopwords.length + 50];
+ System.arraycopy(stopwords, 0, tmp, 0, wordcount);
+ stopwords = tmp;
+ }
+ stopwords[wordcount - 1] = word;
+ }
+ result = makeWordTable(stopwords, wordcount);
+ }
+// On error, use an empty table
+ catch (IOException e) {
+ result = new Hashtable();
+ }
+ return result;
+ }


- /**
- * Builds the wordlist table.
- *
- * @param words Word that where read
- * @param length Amount of words that where read into <tt>words</tt>
- */
- private static Hashtable makeWordTable( String[] words, int length ) {
- Hashtable table = new Hashtable( length );
- for ( int i = 0; i < length; i++ ) {
- table.put( words[i], words[i] );
- }
- return table;
+ /**
+ * Builds the wordlist table.
+ *
+ * @param words Word that where read
+ * @param length Amount of words that where read into <tt>words</tt>
+ */
+ private static Hashtable makeWordTable(String[] words, int length) {
+ Hashtable table = new Hashtable(length);
+ for (int i = 0; i < length; i++) {
+ table.put(words[i], words[i]);
}
+ return table;
+ }
}





--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]



Reply via email to