Hi, I'm using Lucene.Net along with snowball stemming to index text from a database. The class Lucene.Net.Analysis.Snowball.SnowballFilter uses the reflection API and the invoke method to call the stem methods of snowball. I have written a Snowball filter which creates a delegate and uses this delegate to stem the words afterwards. This approach improves the indexing speed of my indexing program by about 10%. I would be happy if you include this code into lucene.net.
With kind Regards, Arian Code: using System; using Lucene.Net.Analysis; using SF.Snowball; using SF.Snowball.Ext; namespace Index.Search.Analyzers { /// <summary>A filter that stems words using a Snowball-generated stemmer. /// /// Available stemmers are listed in {...@link SF.Snowball.Ext}. The name of a /// stemmer is the part of the class name before "Stemmer", e.g., the stemmer in /// {...@link EnglishStemmer} is named "English". /// </summary> public class FailOverSnowballFilter : TokenFilter { private static readonly System.Object[] EMPTY_ARGS = new System.Object[0]; string stemmerName = string.Empty; private delegate bool BoolVoidDelegate(); private BoolVoidDelegate tehMeth0d; private SnowballProgram stemmer; private System.Reflection.MethodInfo stemMethod; /// <summary>Construct a stemmer for a certain language. /// /// </summary> /// <param name="in">the input tokens to stem /// </param> /// <param name="name">the language name of a stemmer /// </param> public FailOverSnowballFilter(TokenStream in_Renamed, System.String name) : base(in_Renamed) { stemmerName = name + "Stemmer"; try { stemmer = (SnowballProgram)Activator.CreateInstance("Snowball.Net", "SF.Snowball.Ext." + stemmerName).Unwrap(); stemMethod = stemmer.GetType().GetMethod("Stem", (new Type[0] == null) ? new Type[0] : (Type[])new Type[0]); tehMeth0d = (BoolVoidDelegate)Delegate.CreateDelegate(typeof(BoolVoidDelegate), stemmer, stemMethod); } catch (System.Exception e) { throw new System.SystemException(e.ToString()); } } /// <summary>Returns the next input Token, after being stemmed </summary> public override Token Next() { Token token = input.Next(); if (token == null) return null; stemmer.SetCurrent(token.TermText()); try { tehMeth0d(); //stemMethod.Invoke(stemmer, (System.Object[])EMPTY_ARGS); } catch (System.Exception e) { Console.WriteLine(string.Format( "{0} was not able to stemm token \"{1}\", using token directly.\n {2}", stemmerName, token.TermText(), e.ToString())); } Token newToken = new Token(stemmer.GetCurrent(), token.StartOffset(), token.EndOffset(), token.Type()); newToken.SetPositionIncrement(token.GetPositionIncrement()); return newToken; } } } --------------------------------------------------------------------------- An- und Abmeldung zur SCHEMA Mailingliste unter http://www.schema.de/mail ---------------------------------------------------------------------------