Thank you for your message Yonik, that was very helpful. I didn't have much 
luck with the SnowballPorterFilterFactory so I wrote my own factory last night 
and as you said it gives me much more flexibility. Here it is for anyone who's 
interested:

package myApp;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.solr.analysis.BaseTokenFilterFactory;
import org.apache.lucene.analysis.ru.RussianStemFilter;
import org.apache.lucene.analysis.ru.RussianCharsets;


import java.io.Reader;

public class RussianStemFilterFactory extends BaseTokenFilterFactory {
   public TokenStream create(TokenStream input) {
      String charsetName = getArgs().get("charset");
      char[] charset = RussianCharsets.UnicodeRussian;
      if ( charsetName!= null && charsetName.equals("KOI8")) charset = 
RussianCharsets.KOI8;
      if ( charsetName!= null && charsetName.equals("CP1251")) charset = 
RussianCharsets.CP1251;
      return new RussianStemFilter(input, charset);
   }
}






Reply via email to