Hi,

I'm using Lucene.Net along with snowball stemming to index text from a 
database. The class Lucene.Net.Analysis.Snowball.SnowballFilter uses the 
reflection API and the invoke method to call the stem methods of snowball. I 
have written a Snowball filter which creates a delegate and uses this delegate 
to stem the words afterwards. This approach improves the indexing speed of my 
indexing program by about 10%. I would be happy if you include this code into 
lucene.net.

With kind Regards,
Arian

Code:

using System;
using Lucene.Net.Analysis;
using SF.Snowball;
using SF.Snowball.Ext;

namespace Index.Search.Analyzers
{

        /// <summary>A filter that stems words using a Snowball-generated 
stemmer.
        /// 
        /// Available stemmers are listed in {...@link SF.Snowball.Ext}.  The 
name of a
        /// stemmer is the part of the class name before "Stemmer", e.g., the 
stemmer in
        /// {...@link EnglishStemmer} is named "English".
        /// </summary>

        public class FailOverSnowballFilter : TokenFilter
        {
                private static readonly System.Object[] EMPTY_ARGS = new 
System.Object[0];
                string stemmerName = string.Empty;

                private delegate bool BoolVoidDelegate();
                private BoolVoidDelegate tehMeth0d;

                private SnowballProgram stemmer;
                private System.Reflection.MethodInfo stemMethod;

                /// <summary>Construct a stemmer for a certain language.
                /// 
                /// </summary>
                /// <param name="in">the input tokens to stem
                /// </param>
                /// <param name="name">the language name of a stemmer
                /// </param>
                public FailOverSnowballFilter(TokenStream in_Renamed, 
System.String name)
                        : base(in_Renamed)
                {
                        stemmerName = name + "Stemmer";
                        try
                        {
                                stemmer = 
(SnowballProgram)Activator.CreateInstance("Snowball.Net", "SF.Snowball.Ext." + 
stemmerName).Unwrap();

                                stemMethod = 
stemmer.GetType().GetMethod("Stem", (new Type[0] == null) ? new Type[0] : 
(Type[])new Type[0]);
                                tehMeth0d = 
(BoolVoidDelegate)Delegate.CreateDelegate(typeof(BoolVoidDelegate), stemmer, 
stemMethod);
                        }
                        catch (System.Exception e)
                        {
                                throw new System.SystemException(e.ToString());
                        }
                }

                /// <summary>Returns the next input Token, after being stemmed 
</summary>
                public override Token Next()
                {
                        Token token = input.Next();
                        if (token == null)
                                return null;
                        stemmer.SetCurrent(token.TermText());
                        try
                        {
                                tehMeth0d();
                                //stemMethod.Invoke(stemmer, 
(System.Object[])EMPTY_ARGS);
                        }
                        catch (System.Exception e)
                        {
                                Console.WriteLine(string.Format( "{0} was not 
able to stemm token \"{1}\", using token directly.\n {2}", stemmerName, 
token.TermText(), e.ToString()));                         
                        }

                        Token newToken = new Token(stemmer.GetCurrent(), 
token.StartOffset(), token.EndOffset(), token.Type());
                        
newToken.SetPositionIncrement(token.GetPositionIncrement());
                        return newToken;
                }
        }
}


---------------------------------------------------------------------------

 An- und Abmeldung zur SCHEMA Mailingliste unter http://www.schema.de/mail

---------------------------------------------------------------------------


Reply via email to