Hi Arian,

In order for us to accept code, please use JIRA to submit a patch.
Lucene.Net JIRA is here: https://issues.apache.org/jira/browse/LUCENENET

Regards,

-- George 

> -----Original Message-----
> From: Bär, Arian [mailto:arian.b...@schema.de] 
> Sent: Monday, April 06, 2009 8:05 AM
> To: lucene-net-dev@incubator.apache.org
> Subject: SnowballFilter speed improvment
> 
> Hi,
> 
> I'm using Lucene.Net along with snowball stemming to index 
> text from a database. The class 
> Lucene.Net.Analysis.Snowball.SnowballFilter uses the 
> reflection API and the invoke method to call the stem methods 
> of snowball. I have written a Snowball filter which creates a 
> delegate and uses this delegate to stem the words afterwards. 
> This approach improves the indexing speed of my indexing 
> program by about 10%. I would be happy if you include this 
> code into lucene.net.
> 
> With kind Regards,
> Arian
> 
> Code:
> 
> using System;
> using Lucene.Net.Analysis;
> using SF.Snowball;
> using SF.Snowball.Ext;
> 
> namespace Index.Search.Analyzers
> {
> 
>       /// <summary>A filter that stems words using a 
> Snowball-generated stemmer.
>       /// 
>       /// Available stemmers are listed in {...@link 
> SF.Snowball.Ext}.  The name of a
>       /// stemmer is the part of the class name before 
> "Stemmer", e.g., the stemmer in
>       /// {...@link EnglishStemmer} is named "English".
>       /// </summary>
> 
>       public class FailOverSnowballFilter : TokenFilter
>       {
>               private static readonly System.Object[] 
> EMPTY_ARGS = new System.Object[0];
>               string stemmerName = string.Empty;
> 
>               private delegate bool BoolVoidDelegate();
>               private BoolVoidDelegate tehMeth0d;
> 
>               private SnowballProgram stemmer;
>               private System.Reflection.MethodInfo stemMethod;
> 
>               /// <summary>Construct a stemmer for a certain language.
>               /// 
>               /// </summary>
>               /// <param name="in">the input tokens to stem
>               /// </param>
>               /// <param name="name">the language name of a stemmer
>               /// </param>
>               public FailOverSnowballFilter(TokenStream 
> in_Renamed, System.String name)
>                       : base(in_Renamed)
>               {
>                       stemmerName = name + "Stemmer";
>                       try
>                       {
>                               stemmer = 
> (SnowballProgram)Activator.CreateInstance("Snowball.Net", 
> "SF.Snowball.Ext." + stemmerName).Unwrap();
> 
>                               stemMethod = 
> stemmer.GetType().GetMethod("Stem", (new Type[0] == null) ? 
> new Type[0] : (Type[])new Type[0]);
>                               tehMeth0d = 
> (BoolVoidDelegate)Delegate.CreateDelegate(typeof(BoolVoidDeleg
> ate), stemmer, stemMethod);
>                       }
>                       catch (System.Exception e)
>                       {
>                               throw new 
> System.SystemException(e.ToString());
>                       }
>               }
> 
>               /// <summary>Returns the next input Token, 
> after being stemmed </summary>
>               public override Token Next()
>               {
>                       Token token = input.Next();
>                       if (token == null)
>                               return null;
>                       stemmer.SetCurrent(token.TermText());
>                       try
>                       {
>                               tehMeth0d();
>                               //stemMethod.Invoke(stemmer, 
> (System.Object[])EMPTY_ARGS);
>                       }
>                       catch (System.Exception e)
>                       {
>                               
> Console.WriteLine(string.Format( "{0} was not able to stemm 
> token \"{1}\", using token directly.\n {2}", stemmerName, 
> token.TermText(), e.ToString()));                             
>                       }
> 
>                       Token newToken = new 
> Token(stemmer.GetCurrent(), token.StartOffset(), 
> token.EndOffset(), token.Type());
>                       
> newToken.SetPositionIncrement(token.GetPositionIncrement());
>                       return newToken;
>               }
>       }
> }
> 
> 
> --------------------------------------------------------------
> -------------
> 
>  An- und Abmeldung zur SCHEMA Mailingliste unter 
> http://www.schema.de/mail
> 
> --------------------------------------------------------------
> -------------
> 
> 

Reply via email to