nl DutchAnalyzer.java DutchStemFilter.java DutchStemmer.java

ehatcher Fri, 12 Mar 2004 07:52:58 -0800

ehatcher    2004/03/12 07:52:59

  Modified:    contributions/analyzers/src/java/org/apache/lucene/analysis/br
                        BrazilianAnalyzer.java BrazilianStemFilter.java
               contributions/analyzers/src/java/org/apache/lucene/analysis/cjk
                        CJKAnalyzer.java
               contributions/analyzers/src/java/org/apache/lucene/analysis/cz
                        CzechAnalyzer.java
               contributions/analyzers/src/java/org/apache/lucene/analysis/fr
                        FrenchAnalyzer.java FrenchStemFilter.java
               contributions/analyzers/src/java/org/apache/lucene/analysis/nl
                        DutchAnalyzer.java DutchStemFilter.java
                        DutchStemmer.java
  Log:
  clean-up based on core changes to StopFilter
  
  Revision  Changes    Path
  1.5       +3 -2      
jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
  
  Index: BrazilianAnalyzer.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- BrazilianAnalyzer.java    11 Mar 2004 03:05:36 -0000      1.4
  +++ BrazilianAnalyzer.java    12 Mar 2004 15:52:58 -0000      1.5
  @@ -65,6 +65,7 @@
   import java.io.Reader;
   import java.util.Hashtable;
   import java.util.HashSet;
  +import java.util.Set;
   
   /**
    * Analyzer for brazilian language. Supports an external list of stopwords (words 
that
  @@ -103,11 +104,11 @@
        /**
         * Contains the stopwords used with the StopFilter.
         */
  -     private HashSet stoptable = new HashSet();
  +     private Set stoptable = new HashSet();
        /**
         * Contains words that should be indexed but not stemmed.
         */
  -     private HashSet excltable = new HashSet();
  +     private Set excltable = new HashSet();
   
        /**
         * Builds an analyzer.
  
  
  
  1.6       +52 -53    
jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java
  
  Index: BrazilianStemFilter.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- BrazilianStemFilter.java  11 Mar 2004 03:05:36 -0000      1.5
  +++ BrazilianStemFilter.java  12 Mar 2004 15:52:58 -0000      1.6
  @@ -57,72 +57,71 @@
   import org.apache.lucene.analysis.Token;
   import org.apache.lucene.analysis.TokenFilter;
   import org.apache.lucene.analysis.TokenStream;
  +
   import java.io.IOException;
  -import java.util.Hashtable;
   import java.util.HashSet;
  +import java.util.Hashtable;
  +import java.util.Set;
   
   /**
    * Based on (copied) the GermanStemFilter
    *
  - *
  - * @author    João Kramer
  - *
  - *
  - * A filter that stemms german words. It supports a table of words that should
  - * not be stemmed at all.
  - *
  - * @author    Gerhard Schwarz
  + * @author João Kramer
  + *         <p/>
  + *         <p/>
  + *         A filter that stemms german words. It supports a table of words that 
should
  + *         not be stemmed at all.
  + * @author Gerhard Schwarz
    */
   public final class BrazilianStemFilter extends TokenFilter {
   
  -     /**
  -      * The actual token in the input stream.
  -      */
  -     private Token token = null;
  -     private BrazilianStemmer stemmer = null;
  -     private HashSet exclusions = null;
  +  /**
  +   * The actual token in the input stream.
  +   */
  +  private Token token = null;
  +  private BrazilianStemmer stemmer = null;
  +  private Set exclusions = null;
   
  -     public BrazilianStemFilter( TokenStream in ) {
  +  public BrazilianStemFilter(TokenStream in) {
       super(in);
  -             stemmer = new BrazilianStemmer();
  -     }
  +    stemmer = new BrazilianStemmer();
  +  }
   
  -     /**
  -      * Builds a BrazilianStemFilter that uses an exclusiontable.
  -   * 
  +  /**
  +   * Builds a BrazilianStemFilter that uses an exclusiontable.
  +   *
      * @deprecated
  -      */
  -     public BrazilianStemFilter( TokenStream in, Hashtable exclusiontable ) {
  -             this( in );
  -             this.exclusions = new HashSet(exclusiontable.keySet());
  -     }
  -
  -     public BrazilianStemFilter( TokenStream in, HashSet exclusiontable ) {
  -             this( in );
  -             this.exclusions = exclusiontable;
  -     }
  -
  -     /**
  -      * @return  Returns the next token in the stream, or null at EOS.
  -      */
  -     public final Token next()
  -             throws IOException {
  -             if ( ( token = input.next() ) == null ) {
  -                     return null;
  -             }
  -             // Check the exclusiontable.
  -             else if ( exclusions != null && exclusions.contains( token.termText() 
) ) {
  -                     return token;
  -             }
  -             else {
  -                     String s = stemmer.stem( token.termText() );
  -                     // If not stemmed, dont waste the time creating a new token.
  -                     if ( (s != null) && !s.equals( token.termText() ) ) {
  -                             return new Token( s, 0, s.length(), token.type() );
  -                     }
  -                     return token;
  -             }
  -     }
  +   */
  +  public BrazilianStemFilter(TokenStream in, Hashtable exclusiontable) {
  +    this(in);
  +    this.exclusions = new HashSet(exclusiontable.keySet());
  +  }
  +
  +  public BrazilianStemFilter(TokenStream in, Set exclusiontable) {
  +    this(in);
  +    this.exclusions = exclusiontable;
  +  }
  +
  +  /**
  +   * @return Returns the next token in the stream, or null at EOS.
  +   */
  +  public final Token next()
  +      throws IOException {
  +    if ((token = input.next()) == null) {
  +      return null;
  +    }
  +    // Check the exclusiontable.
  +    else if (exclusions != null && exclusions.contains(token.termText())) {
  +      return token;
  +    } else {
  +      String s = stemmer.stem(token.termText());
  +      // If not stemmed, dont waste the time creating a new token.
  +      if ((s != null) && !s.equals(token.termText())) {
  +        return new Token(s, 0, s.length(), token.type());
  +      }
  +      return token;
  +    }
  +  }
   }
   
   
  
  
  
  1.4       +55 -56    
jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
  
  Index: CJKAnalyzer.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- CJKAnalyzer.java  11 Mar 2004 03:05:36 -0000      1.3
  +++ CJKAnalyzer.java  12 Mar 2004 15:52:58 -0000      1.4
  @@ -61,9 +61,7 @@
   import org.apache.lucene.analysis.TokenStream;
   
   import java.io.Reader;
  -
  -import java.util.Hashtable;
  -import java.util.HashSet;
  +import java.util.Set;
   
   
   /**
  @@ -72,57 +70,58 @@
    * @author Che, Dong
    */
   public class CJKAnalyzer extends Analyzer {
  -    //~ Static fields/initializers ---------------------------------------------
  +  //~ Static fields/initializers ---------------------------------------------
   
  -    /**
  -     * An array containing some common English words that are not usually
  -     * useful for searching. and some double-byte interpunctions.....
  -     */
  -    private static String[] stopWords = {
  -                                            "a", "and", "are", "as", "at", "be",
  -                                            "but", "by", "for", "if", "in",
  -                                            "into", "is", "it", "no", "not",
  -                                            "of", "on", "or", "s", "such", "t",
  -                                            "that", "the", "their", "then",
  -                                            "there", "these", "they", "this",
  -                                            "to", "was", "will", "with", "",
  -                                            "www"
  -                                        };
  -
  -    //~ Instance fields --------------------------------------------------------
  -
  -    /** stop word list */
  -    private HashSet stopTable;
  -
  -    //~ Constructors -----------------------------------------------------------
  -
  -    /**
  -     * Builds an analyzer which removes words in STOP_WORDS.
  -     */
  -    public CJKAnalyzer() {
  -        stopTable = StopFilter.makeStopSet(stopWords);
  -    }
  -
  -    /**
  -     * Builds an analyzer which removes words in the provided array.
  -     *
  -     * @param stopWords stop word array
  -     */
  -    public CJKAnalyzer(String[] stopWords) {
  -        stopTable = StopFilter.makeStopSet(stopWords);
  -    }
  -
  -    //~ Methods ----------------------------------------------------------------
  -
  -    /**
  -     * get token stream from input
  -     *
  -     * @param fieldName lucene field name
  -     * @param reader input reader
  -     *
  -     * @return TokenStream
  -     */
  -    public final TokenStream tokenStream(String fieldName, Reader reader) {
  -        return new StopFilter(new CJKTokenizer(reader), stopTable);
  -    }
  +  /**
  +   * An array containing some common English words that are not usually
  +   * useful for searching. and some double-byte interpunctions.....
  +   */
  +  private static String[] stopWords = {
  +    "a", "and", "are", "as", "at", "be",
  +    "but", "by", "for", "if", "in",
  +    "into", "is", "it", "no", "not",
  +    "of", "on", "or", "s", "such", "t",
  +    "that", "the", "their", "then",
  +    "there", "these", "they", "this",
  +    "to", "was", "will", "with", "",
  +    "www"
  +  };
  +
  +  //~ Instance fields --------------------------------------------------------
  +
  +  /**
  +   * stop word list
  +   */
  +  private Set stopTable;
  +
  +  //~ Constructors -----------------------------------------------------------
  +
  +  /**
  +   * Builds an analyzer which removes words in STOP_WORDS.
  +   */
  +  public CJKAnalyzer() {
  +    stopTable = StopFilter.makeStopSet(stopWords);
  +  }
  +
  +  /**
  +   * Builds an analyzer which removes words in the provided array.
  +   *
  +   * @param stopWords stop word array
  +   */
  +  public CJKAnalyzer(String[] stopWords) {
  +    stopTable = StopFilter.makeStopSet(stopWords);
  +  }
  +
  +  //~ Methods ----------------------------------------------------------------
  +
  +  /**
  +   * get token stream from input
  +   *
  +   * @param fieldName lucene field name
  +   * @param reader    input reader
  +   * @return TokenStream
  +   */
  +  public final TokenStream tokenStream(String fieldName, Reader reader) {
  +    return new StopFilter(new CJKTokenizer(reader), stopTable);
  +  }
   }
  
  
  
  1.4       +2 -1      
jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
  
  Index: CzechAnalyzer.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- CzechAnalyzer.java        11 Mar 2004 03:05:36 -0000      1.3
  +++ CzechAnalyzer.java        12 Mar 2004 15:52:58 -0000      1.4
  @@ -65,6 +65,7 @@
   import java.io.*;
   import java.util.Hashtable;
   import java.util.HashSet;
  +import java.util.Set;
   
   /**
    * Analyzer for Czech language. Supports an external list of stopwords (words that
  @@ -103,7 +104,7 @@
        /**
         * Contains the stopwords used with the StopFilter.
         */
  -     private HashSet stoptable;
  +     private Set stoptable;
   
        /**
         * Builds an analyzer.
  
  
  
  1.5       +110 -107  
jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
  
  Index: FrenchAnalyzer.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- FrenchAnalyzer.java       11 Mar 2004 03:05:36 -0000      1.4
  +++ FrenchAnalyzer.java       12 Mar 2004 15:52:58 -0000      1.5
  @@ -58,14 +58,15 @@
   import org.apache.lucene.analysis.LowerCaseFilter;


   import org.apache.lucene.analysis.StopFilter;

   import org.apache.lucene.analysis.TokenStream;

  +import org.apache.lucene.analysis.de.WordlistLoader;

   import org.apache.lucene.analysis.standard.StandardFilter;

   import org.apache.lucene.analysis.standard.StandardTokenizer;

  +

   import java.io.File;

   import java.io.Reader;

  -import java.util.Hashtable;

   import java.util.HashSet;

  -

  -import org.apache.lucene.analysis.de.WordlistLoader;

  +import java.util.Hashtable;

  +import java.util.Set;

   

   /**

    * Analyzer for french language. Supports an external list of stopwords (words that

  @@ -74,115 +75,117 @@
    * A default set of stopwords is used unless an other list is specified, the

    * exclusionlist is empty by default.

    *

  - * @author    Patrick Talbot (based on Gerhard Schwarz work for German)

  - * @version   $Id$

  + * @author Patrick Talbot (based on Gerhard Schwarz work for German)

  + * @version $Id$

    */

   public final class FrenchAnalyzer extends Analyzer {

   

  -     /**

  -      * Extended list of typical french stopwords.

  -      */

  -     private String[] FRENCH_STOP_WORDS = {

  -             "a", "afin", "ai", "ainsi", "après", "attendu", "au", "aujourd", 
"auquel", "aussi",

  -             "autre", "autres", "aux", "auxquelles", "auxquels", "avait", "avant", 
"avec", "avoir",

  -             "c", "car", "ce", "ceci", "cela", "celle", "celles", "celui", 
"cependant", "certain",

  -             "certaine", "certaines", "certains", "ces", "cet", "cette", "ceux", 
"chez", "ci",

  -             "combien", "comme", "comment", "concernant", "contre", "d", "dans", 
"de", "debout",

  -             "dedans", "dehors", "delà", "depuis", "derrière", "des", "désormais", 
"desquelles",

  -             "desquels", "dessous", "dessus", "devant", "devers", "devra", 
"divers", "diverse",

  -             "diverses", "doit", "donc", "dont", "du", "duquel", "durant", "dès", 
"elle", "elles",

  -             "en", "entre", "environ", "est", "et", "etc", "etre", "eu", "eux", 
"excepté", "hormis",

  -             "hors", "hélas", "hui", "il", "ils", "j", "je", "jusqu", "jusque", 
"l", "la", "laquelle",

  -             "le", "lequel", "les", "lesquelles", "lesquels", "leur", "leurs", 
"lorsque", "lui", "là",

  -             "ma", "mais", "malgré", "me", "merci", "mes", "mien", "mienne", 
"miennes", "miens", "moi",

  -             "moins", "mon", "moyennant", "même", "mêmes", "n", "ne", "ni", "non", 
"nos", "notre",

  -             "nous", "néanmoins", "nôtre", "nôtres", "on", "ont", "ou", "outre", 
"où", "par", "parmi",

  -             "partant", "pas", "passé", "pendant", "plein", "plus", "plusieurs", 
"pour", "pourquoi",

  -             "proche", "près", "puisque", "qu", "quand", "que", "quel", "quelle", 
"quelles", "quels",

  -             "qui", "quoi", "quoique", "revoici", "revoilà", "s", "sa", "sans", 
"sauf", "se", "selon",

  -             "seront", "ses", "si", "sien", "sienne", "siennes", "siens", "sinon", 
"soi", "soit",

  -             "son", "sont", "sous", "suivant", "sur", "ta", "te", "tes", "tien", 
"tienne", "tiennes",

  -             "tiens", "toi", "ton", "tous", "tout", "toute", "toutes", "tu", "un", 
"une", "va", "vers",

  -             "voici", "voilà", "vos", "votre", "vous", "vu", "vôtre", "vôtres", 
"y", "à", "ça", "ès",

  -             "été", "être", "ô"

  -     };

  -

  -     /**

  -      * Contains the stopwords used with the StopFilter.

  -      */

  -     private HashSet stoptable = new HashSet();

  -     /**

  -      * Contains words that should be indexed but not stemmed.

  -      */

  -     private HashSet excltable = new HashSet();

  -

  -     /**

  -      * Builds an analyzer.

  -      */

  -     public FrenchAnalyzer() {

  -             stoptable = StopFilter.makeStopSet( FRENCH_STOP_WORDS );

  -     }

  -

  -     /**

  -      * Builds an analyzer with the given stop words.

  -      */

  -     public FrenchAnalyzer( String[] stopwords ) {

  -             stoptable = StopFilter.makeStopSet( stopwords );

  -     }

  +  /**

  +   * Extended list of typical french stopwords.

  +   */

  +  private String[] FRENCH_STOP_WORDS = {

  +    "a", "afin", "ai", "ainsi", "après", "attendu", "au", "aujourd", "auquel", 
"aussi",

  +    "autre", "autres", "aux", "auxquelles", "auxquels", "avait", "avant", "avec", 
"avoir",

  +    "c", "car", "ce", "ceci", "cela", "celle", "celles", "celui", "cependant", 
"certain",

  +    "certaine", "certaines", "certains", "ces", "cet", "cette", "ceux", "chez", 
"ci",

  +    "combien", "comme", "comment", "concernant", "contre", "d", "dans", "de", 
"debout",

  +    "dedans", "dehors", "delà", "depuis", "derrière", "des", "désormais", 
"desquelles",

  +    "desquels", "dessous", "dessus", "devant", "devers", "devra", "divers", 
"diverse",

  +    "diverses", "doit", "donc", "dont", "du", "duquel", "durant", "dès", "elle", 
"elles",

  +    "en", "entre", "environ", "est", "et", "etc", "etre", "eu", "eux", "excepté", 
"hormis",

  +    "hors", "hélas", "hui", "il", "ils", "j", "je", "jusqu", "jusque", "l", "la", 
"laquelle",

  +    "le", "lequel", "les", "lesquelles", "lesquels", "leur", "leurs", "lorsque", 
"lui", "là",

  +    "ma", "mais", "malgré", "me", "merci", "mes", "mien", "mienne", "miennes", 
"miens", "moi",

  +    "moins", "mon", "moyennant", "même", "mêmes", "n", "ne", "ni", "non", "nos", 
"notre",

  +    "nous", "néanmoins", "nôtre", "nôtres", "on", "ont", "ou", "outre", "où", 
"par", "parmi",

  +    "partant", "pas", "passé", "pendant", "plein", "plus", "plusieurs", "pour", 
"pourquoi",

  +    "proche", "près", "puisque", "qu", "quand", "que", "quel", "quelle", "quelles", 
"quels",

  +    "qui", "quoi", "quoique", "revoici", "revoilà", "s", "sa", "sans", "sauf", 
"se", "selon",

  +    "seront", "ses", "si", "sien", "sienne", "siennes", "siens", "sinon", "soi", 
"soit",

  +    "son", "sont", "sous", "suivant", "sur", "ta", "te", "tes", "tien", "tienne", 
"tiennes",

  +    "tiens", "toi", "ton", "tous", "tout", "toute", "toutes", "tu", "un", "une", 
"va", "vers",

  +    "voici", "voilà", "vos", "votre", "vous", "vu", "vôtre", "vôtres", "y", "à", 
"ça", "ès",

  +    "été", "être", "ô"

  +  };

  +

  +  /**

  +   * Contains the stopwords used with the StopFilter.

  +   */

  +  private Set stoptable = new HashSet();

  +  /**

  +   * Contains words that should be indexed but not stemmed.

  +   */

  +  private Set excltable = new HashSet();

  +

  +  /**

  +   * Builds an analyzer.

  +   */

  +  public FrenchAnalyzer() {

  +    stoptable = StopFilter.makeStopSet(FRENCH_STOP_WORDS);

  +  }

  +

  +  /**

  +   * Builds an analyzer with the given stop words.

  +   */

  +  public FrenchAnalyzer(String[] stopwords) {

  +    stoptable = StopFilter.makeStopSet(stopwords);

  +  }

   

  -     /**

  -      * Builds an analyzer with the given stop words.

  +  /**

  +   * Builds an analyzer with the given stop words.

      *

      * @deprecated

  -      */

  -     public FrenchAnalyzer( Hashtable stopwords ) {

  -             stoptable = new HashSet(stopwords.keySet());

  -     }

  -

  -     /**

  -      * Builds an analyzer with the given stop words.

  -      */

  -     public FrenchAnalyzer( File stopwords ) {

  -             stoptable = new HashSet(WordlistLoader.getWordtable( stopwords 
).keySet());

  -     }

  -

  -     /**

  -      * Builds an exclusionlist from an array of Strings.

  -      */

  -     public void setStemExclusionTable( String[] exclusionlist ) {

  -             excltable = StopFilter.makeStopSet( exclusionlist );

  -     }

  -     /**

  -      * Builds an exclusionlist from a Hashtable.

  -      */

  -     public void setStemExclusionTable( Hashtable exclusionlist ) {

  -             excltable = new HashSet(exclusionlist.keySet());

  -     }

  -     /**

  -      * Builds an exclusionlist from the words contained in the given file.

  -      */

  -     public void setStemExclusionTable( File exclusionlist ) {

  -             excltable = new HashSet(WordlistLoader.getWordtable( exclusionlist 
).keySet());

  -     }

  -

  -     /**

  -      * Creates a TokenStream which tokenizes all the text in the provided Reader.

  -      *

  -      * @return  A TokenStream build from a StandardTokenizer filtered with

  -      *                      StandardFilter, StopFilter, FrenchStemFilter and 
LowerCaseFilter

  -      */

  -     public final TokenStream tokenStream( String fieldName, Reader reader ) {

  -             

  -             if (fieldName==null) throw new IllegalArgumentException("fieldName 
must not be null");

  -             if (reader==null) throw new IllegalArgumentException("readermust not 
be null");

  -                             

  -             TokenStream result = new StandardTokenizer( reader );

  -             result = new StandardFilter( result );

  -             result = new StopFilter( result, stoptable );

  -             result = new FrenchStemFilter( result, excltable );

  -             // Convert to lowercase after stemming!

  -             result = new LowerCaseFilter( result );

  -             return result;

  -     }

  +   */

  +  public FrenchAnalyzer(Hashtable stopwords) {

  +    stoptable = new HashSet(stopwords.keySet());

  +  }

  +

  +  /**

  +   * Builds an analyzer with the given stop words.

  +   */

  +  public FrenchAnalyzer(File stopwords) {

  +    stoptable = new HashSet(WordlistLoader.getWordtable(stopwords).keySet());

  +  }

  +

  +  /**

  +   * Builds an exclusionlist from an array of Strings.

  +   */

  +  public void setStemExclusionTable(String[] exclusionlist) {

  +    excltable = StopFilter.makeStopSet(exclusionlist);

  +  }

  +

  +  /**

  +   * Builds an exclusionlist from a Hashtable.

  +   */

  +  public void setStemExclusionTable(Hashtable exclusionlist) {

  +    excltable = new HashSet(exclusionlist.keySet());

  +  }

  +

  +  /**

  +   * Builds an exclusionlist from the words contained in the given file.

  +   */

  +  public void setStemExclusionTable(File exclusionlist) {

  +    excltable = new HashSet(WordlistLoader.getWordtable(exclusionlist).keySet());

  +  }

  +

  +  /**

  +   * Creates a TokenStream which tokenizes all the text in the provided Reader.

  +   *

  +   * @return A TokenStream build from a StandardTokenizer filtered with

  +   *         StandardFilter, StopFilter, FrenchStemFilter and LowerCaseFilter

  +   */

  +  public final TokenStream tokenStream(String fieldName, Reader reader) {

  +

  +    if (fieldName == null) throw new IllegalArgumentException("fieldName must not 
be null");

  +    if (reader == null) throw new IllegalArgumentException("readermust not be 
null");

  +

  +    TokenStream result = new StandardTokenizer(reader);

  +    result = new StandardFilter(result);

  +    result = new StopFilter(result, stoptable);

  +    result = new FrenchStemFilter(result, excltable);

  +    // Convert to lowercase after stemming!

  +    result = new LowerCaseFilter(result);

  +    return result;

  +  }

   }

   

  
  
  
  1.4       +3 -2      
jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java
  
  Index: FrenchStemFilter.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- FrenchStemFilter.java     11 Mar 2004 03:05:36 -0000      1.3
  +++ FrenchStemFilter.java     12 Mar 2004 15:52:59 -0000      1.4
  @@ -60,6 +60,7 @@
   import java.io.IOException;

   import java.util.Hashtable;

   import java.util.HashSet;

  +import java.util.Set;

   

   /**

    * A filter that stemms french words. It supports a table of words that should

  @@ -75,7 +76,7 @@
         */

        private Token token = null;

        private FrenchStemmer stemmer = null;

  -     private HashSet exclusions = null;

  +     private Set exclusions = null;

   

        public FrenchStemFilter( TokenStream in ) {

       super(in);

  @@ -92,7 +93,7 @@
                exclusions = new HashSet(exclusiontable.keySet());

        }

   

  -     public FrenchStemFilter( TokenStream in, HashSet exclusiontable ) {

  +     public FrenchStemFilter( TokenStream in, Set exclusiontable ) {

                this( in );

                exclusions = exclusiontable;

        }

  
  
  
  1.3       +5 -3      
jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
  
  Index: DutchAnalyzer.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- DutchAnalyzer.java        11 Mar 2004 03:05:36 -0000      1.2
  +++ DutchAnalyzer.java        12 Mar 2004 15:52:59 -0000      1.3
  @@ -26,6 +26,8 @@
   import java.io.Reader;
   import java.util.HashMap;
   import java.util.HashSet;
  +import java.util.Set;
  +import java.util.Map;
   
   /**
    * @author Edwin de Jonge
  @@ -61,14 +63,14 @@
     /**
      * Contains the stopwords used with the StopFilter.
      */
  -  private HashSet stoptable = new HashSet();
  +  private Set stoptable = new HashSet();
   
     /**
      * Contains words that should be indexed but not stemmed.
      */
  -  private HashSet excltable = new HashSet();
  +  private Set excltable = new HashSet();
   
  -  private HashMap _stemdict = new HashMap();
  +  private Map _stemdict = new HashMap();
   
   
     /**
  
  
  
  1.3       +5 -3      
jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
  
  Index: DutchStemFilter.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- DutchStemFilter.java      11 Mar 2004 03:05:36 -0000      1.2
  +++ DutchStemFilter.java      12 Mar 2004 15:52:59 -0000      1.3
  @@ -23,6 +23,8 @@
   import java.io.IOException;
   import java.util.HashMap;
   import java.util.HashSet;
  +import java.util.Set;
  +import java.util.Map;
   
   /**
    * @author Edwin de Jonge
  @@ -37,7 +39,7 @@
      */
     private Token token = null;
     private DutchStemmer stemmer = null;
  -  private HashSet exclusions = null;
  +  private Set exclusions = null;
   
     public DutchStemFilter(TokenStream _in) {
       super(_in);
  @@ -47,7 +49,7 @@
     /**
      * Builds a DutchStemFilter that uses an exclusiontable.
      */
  -  public DutchStemFilter(TokenStream _in, HashSet exclusiontable) {
  +  public DutchStemFilter(TokenStream _in, Set exclusiontable) {
       this(_in);
       exclusions = exclusiontable;
     }
  @@ -55,7 +57,7 @@
     /**
      * @param stemdictionary Dictionary of word stem pairs, that overrule the 
algorithm
      */
  -  public DutchStemFilter(TokenStream _in, HashSet exclusiontable, HashMap 
stemdictionary) {
  +  public DutchStemFilter(TokenStream _in, Set exclusiontable, Map stemdictionary) {
       this(_in, exclusiontable);
       stemmer.setStemDictionary(stemdictionary);
     }
  
  
  
  1.3       +3 -3      
jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java
  
  Index: DutchStemmer.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- DutchStemmer.java 11 Mar 2004 03:05:36 -0000      1.2
  +++ DutchStemmer.java 12 Mar 2004 15:52:59 -0000      1.3
  @@ -16,7 +16,7 @@
    * limitations under the License.
    */
   
  -import java.util.HashMap;
  +import java.util.Map;
   
   /*
    * @author Edwin de Jonge ([EMAIL PROTECTED])
  @@ -32,7 +32,7 @@
      */
     private StringBuffer sb = new StringBuffer();
     private boolean _removedE;
  -  private HashMap _stemDict;
  +  private Map _stemDict;
   
     private int _R1;
     private int _R2;
  @@ -399,7 +399,7 @@
       return false;
     }
   
  -  void setStemDictionary(HashMap dict) {
  +  void setStemDictionary(Map dict) {
       _stemDict = dict;
     }
   
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

cvs commit: jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl DutchAnalyzer.java DutchStemFilter.java DutchStemmer.java

Reply via email to