Hi Arian, In order for us to accept code, please use JIRA to submit a patch. Lucene.Net JIRA is here: https://issues.apache.org/jira/browse/LUCENENET
Regards, -- George > -----Original Message----- > From: Bär, Arian [mailto:arian.b...@schema.de] > Sent: Monday, April 06, 2009 8:05 AM > To: lucene-net-dev@incubator.apache.org > Subject: SnowballFilter speed improvment > > Hi, > > I'm using Lucene.Net along with snowball stemming to index > text from a database. The class > Lucene.Net.Analysis.Snowball.SnowballFilter uses the > reflection API and the invoke method to call the stem methods > of snowball. I have written a Snowball filter which creates a > delegate and uses this delegate to stem the words afterwards. > This approach improves the indexing speed of my indexing > program by about 10%. I would be happy if you include this > code into lucene.net. > > With kind Regards, > Arian > > Code: > > using System; > using Lucene.Net.Analysis; > using SF.Snowball; > using SF.Snowball.Ext; > > namespace Index.Search.Analyzers > { > > /// <summary>A filter that stems words using a > Snowball-generated stemmer. > /// > /// Available stemmers are listed in {...@link > SF.Snowball.Ext}. The name of a > /// stemmer is the part of the class name before > "Stemmer", e.g., the stemmer in > /// {...@link EnglishStemmer} is named "English". > /// </summary> > > public class FailOverSnowballFilter : TokenFilter > { > private static readonly System.Object[] > EMPTY_ARGS = new System.Object[0]; > string stemmerName = string.Empty; > > private delegate bool BoolVoidDelegate(); > private BoolVoidDelegate tehMeth0d; > > private SnowballProgram stemmer; > private System.Reflection.MethodInfo stemMethod; > > /// <summary>Construct a stemmer for a certain language. > /// > /// </summary> > /// <param name="in">the input tokens to stem > /// </param> > /// <param name="name">the language name of a stemmer > /// </param> > public FailOverSnowballFilter(TokenStream > in_Renamed, System.String name) > : base(in_Renamed) > { > stemmerName = name + "Stemmer"; > try > { > stemmer = > (SnowballProgram)Activator.CreateInstance("Snowball.Net", > "SF.Snowball.Ext." + stemmerName).Unwrap(); > > stemMethod = > stemmer.GetType().GetMethod("Stem", (new Type[0] == null) ? > new Type[0] : (Type[])new Type[0]); > tehMeth0d = > (BoolVoidDelegate)Delegate.CreateDelegate(typeof(BoolVoidDeleg > ate), stemmer, stemMethod); > } > catch (System.Exception e) > { > throw new > System.SystemException(e.ToString()); > } > } > > /// <summary>Returns the next input Token, > after being stemmed </summary> > public override Token Next() > { > Token token = input.Next(); > if (token == null) > return null; > stemmer.SetCurrent(token.TermText()); > try > { > tehMeth0d(); > //stemMethod.Invoke(stemmer, > (System.Object[])EMPTY_ARGS); > } > catch (System.Exception e) > { > > Console.WriteLine(string.Format( "{0} was not able to stemm > token \"{1}\", using token directly.\n {2}", stemmerName, > token.TermText(), e.ToString())); > } > > Token newToken = new > Token(stemmer.GetCurrent(), token.StartOffset(), > token.EndOffset(), token.Type()); > > newToken.SetPositionIncrement(token.GetPositionIncrement()); > return newToken; > } > } > } > > > -------------------------------------------------------------- > ------------- > > An- und Abmeldung zur SCHEMA Mailingliste unter > http://www.schema.de/mail > > -------------------------------------------------------------- > ------------- > >