Repository: lucenenet Updated Branches: refs/heads/api-work bc485b4c4 -> 917b4fdf5
Lucene.Net.Analysis.Ar refactor: accessibility and documentation comments Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/2878664e Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/2878664e Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/2878664e Branch: refs/heads/api-work Commit: 2878664e260be46bd20a3996dce0bb770aae3ab2 Parents: bc485b4 Author: Shad Storhaug <[email protected]> Authored: Thu Feb 2 20:09:32 2017 +0700 Committer: Shad Storhaug <[email protected]> Committed: Thu Feb 2 20:09:32 2017 +0700 ---------------------------------------------------------------------- .../Analysis/Ar/ArabicAnalyzer.cs | 44 +++++------ .../Analysis/Ar/ArabicLetterTokenizer.cs | 82 ++++++++++---------- .../Analysis/Ar/ArabicLetterTokenizerFactory.cs | 28 +++---- .../Analysis/Ar/ArabicNormalizationFilter.cs | 4 +- .../Ar/ArabicNormalizationFilterFactory.cs | 9 ++- .../Analysis/Ar/ArabicNormalizer.cs | 27 +++---- .../Analysis/Ar/ArabicStemFilter.cs | 13 ++-- .../Analysis/Ar/ArabicStemFilterFactory.cs | 9 ++- .../Analysis/Ar/ArabicStemmer.cs | 21 +++-- 9 files changed, 114 insertions(+), 123 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs index 9db2bdf..e484850 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs @@ -27,23 +27,20 @@ namespace Lucene.Net.Analysis.Ar */ /// <summary> - /// <seealso cref="Analyzer"/> for Arabic. - /// <para> + /// <see cref="Analyzer"/> for Arabic. + /// <para/> /// This analyzer implements light-stemming as specified by: - /// <i> + /// <c> /// Light Stemming for Arabic Information Retrieval - /// </i> + /// </c> /// http://www.mtholyoke.edu/~lballest/Pubs/arab_stem05.pdf - /// </para> - /// <para> + /// <para/> /// The analysis package contains three primary components: - /// <ul> - /// <li><seealso cref="ArabicNormalizationFilter"/>: Arabic orthographic normalization. - /// <li><seealso cref="ArabicStemFilter"/>: Arabic light stemming - /// <li>Arabic stop words file: a set of default Arabic stop words. - /// </ul> - /// - /// </para> + /// <list type="bullet"> + /// <item><see cref="ArabicNormalizationFilter"/>: Arabic orthographic normalization.</item> + /// <item><see cref="ArabicStemFilter"/>: Arabic light stemming</item> + /// <item>Arabic stop words file: a set of default Arabic stop words.</item> + /// </list> /// </summary> public sealed class ArabicAnalyzer : StopwordAnalyzerBase { @@ -94,7 +91,7 @@ namespace Lucene.Net.Analysis.Ar private readonly CharArraySet stemExclusionSet; /// <summary> - /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>. + /// Builds an analyzer with the default stop words: <see cref="DEFAULT_STOPWORD_FILE"/>. /// </summary> public ArabicAnalyzer(LuceneVersion matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET) @@ -115,8 +112,8 @@ namespace Lucene.Net.Analysis.Ar /// <summary> /// Builds an analyzer with the given stop word. If a none-empty stem exclusion set is - /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before - /// <seealso cref="ArabicStemFilter"/>. + /// provided this analyzer will add a <see cref="SetKeywordMarkerFilter"/> before + /// <see cref="ArabicStemFilter"/>. /// </summary> /// <param name="matchVersion"> /// lucene compatibility version </param> @@ -131,15 +128,14 @@ namespace Lucene.Net.Analysis.Ar } /// <summary> - /// Creates - /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> - /// used to tokenize all the text in the provided <seealso cref="Reader"/>. + /// Creates <see cref="Analyzer.TokenStreamComponents"/> + /// used to tokenize all the text in the provided <see cref="TextReader"/>. /// </summary> - /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> - /// built from an <seealso cref="StandardTokenizer"/> filtered with - /// <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>, - /// <seealso cref="ArabicNormalizationFilter"/>, <seealso cref="SetKeywordMarkerFilter"/> - /// if a stem exclusion set is provided and <seealso cref="ArabicStemFilter"/>. </returns> + /// <returns> <see cref="Analyzer.TokenStreamComponents"/> + /// built from an <see cref="StandardTokenizer"/> filtered with + /// <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>, + /// <see cref="ArabicNormalizationFilter"/>, <see cref="SetKeywordMarkerFilter"/> + /// if a stem exclusion set is provided and <see cref="ArabicStemFilter"/>. </returns> protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { #pragma warning disable 612, 618 http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs index 5fa5827..0e4e28c 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs @@ -31,52 +31,54 @@ namespace Lucene.Net.Analysis.Ar /// Handling similar to this is necessary for Indic Scripts, Hebrew, Thaana, etc. /// </para> /// <para> - /// <a name="version"/> - /// You must specify the required <seealso cref="Version"/> compatibility when creating - /// <seealso cref="ArabicLetterTokenizer"/>: - /// <ul> - /// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and - /// detect token characters. See <seealso cref="#isTokenChar(int)"/> and - /// <seealso cref="#normalize(int)"/> for details.</li> - /// </ul> + /// <paramref name="matchVersion"/> + /// You must specify the required <see cref="LuceneVersion"/> compatibility when creating + /// <see cref="ArabicLetterTokenizer"/>: + /// <list type="bullet"> + /// <item>As of 3.1, <see cref="Util.CharTokenizer"/> uses an int based API to normalize and + /// detect token characters. See <see cref="IsTokenChar(int)"/> and + /// <see cref="Util.CharTokenizer.Normalize(int)"/> for details.</item> + /// </list> /// </para> /// </summary> - /// @deprecated (3.1) Use <seealso cref="StandardTokenizer"/> instead. + /// @deprecated (3.1) Use <see cref="Standard.StandardTokenizer"/> instead. [Obsolete("(3.1) Use StandardTokenizer instead.")] - public class ArabicLetterTokenizer : LetterTokenizer - { - /// <summary> - /// Construct a new ArabicLetterTokenizer. </summary> - /// <param name="matchVersion"> Lucene version - /// to match See <seealso cref="<a href="#version">above</a>"/> - /// </param> - /// <param name="in"> - /// the input to split up into tokens </param> - public ArabicLetterTokenizer(LuceneVersion matchVersion, TextReader @in) - : base(matchVersion, @in) - { - } + public class ArabicLetterTokenizer : LetterTokenizer + { + /// <summary> + /// Construct a new ArabicLetterTokenizer. </summary> + /// <param name="matchVersion"> Lucene version + /// to match See <seealso cref="<a href="#version">above</a>"/> + /// </param> + /// <param name="in"> + /// the input to split up into tokens </param> + public ArabicLetterTokenizer(LuceneVersion matchVersion, TextReader @in) + : base(matchVersion, @in) + { + } - /// <summary> - /// Construct a new ArabicLetterTokenizer using a given - /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. * @param - /// matchVersion Lucene version to match See - /// <seealso cref="<a href="#version">above</a>"/> - /// </summary> - /// <param name="factory"> - /// the attribute factory to use for this Tokenizer </param> - /// <param name="in"> - /// the input to split up into tokens </param> - public ArabicLetterTokenizer(LuceneVersion matchVersion, AttributeFactory factory, TextReader @in) + /// <summary> + /// Construct a new <see cref="ArabicLetterTokenizer"/> using a given + /// <see cref="AttributeSource.AttributeFactory"/>. + /// </summary> + /// <param name="matchVersion"> + /// matchVersion Lucene version to match See + /// <see cref="LuceneVersion"/>. + /// </param> + /// <param name="factory"> + /// the attribute factory to use for this Tokenizer </param> + /// <param name="in"> + /// the input to split up into tokens </param> + public ArabicLetterTokenizer(LuceneVersion matchVersion, AttributeFactory factory, TextReader @in) : base(matchVersion, factory, @in) - { - } + { + } - /// <summary> - /// Allows for Letter category or NonspacingMark category </summary> - /// <seealso cref= org.apache.lucene.analysis.core.LetterTokenizer#isTokenChar(int) </seealso> - protected override bool IsTokenChar(int c) - { + /// <summary> + /// Allows for Letter category or NonspacingMark category </summary> + /// <seealso cref="LetterTokenizer.IsTokenChar(int)"/> + protected override bool IsTokenChar(int c) + { return base.IsTokenChar(c) || Character.GetType(c) == UnicodeCategory.NonSpacingMark; } } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs index 43b08d7..366d85c 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs @@ -24,24 +24,24 @@ namespace Lucene.Net.Analysis.Ar */ /// <summary> - /// Factory for <seealso cref="ArabicLetterTokenizer"/> </summary> + /// Factory for <see cref="ArabicLetterTokenizer"/> </summary> /// @deprecated (3.1) Use StandardTokenizerFactory instead. /// [Obsolete("(3.1) Use StandardTokenizerFactory instead.")] - public class ArabicLetterTokenizerFactory : TokenizerFactory - { + public class ArabicLetterTokenizerFactory : TokenizerFactory + { - /// <summary> - /// Creates a new ArabicNormalizationFilterFactory </summary> - public ArabicLetterTokenizerFactory(IDictionary<string, string> args) - : base(args) - { - AssureMatchVersion(); - if (args.Count > 0) - { - throw new System.ArgumentException("Unknown parameters: " + args); - } - } + /// <summary> + /// Creates a new <see cref="ArabicNormalizationFilterFactory"/> </summary> + public ArabicLetterTokenizerFactory(IDictionary<string, string> args) + : base(args) + { + AssureMatchVersion(); + if (args.Count > 0) + { + throw new System.ArgumentException("Unknown parameters: " + args); + } + } public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs index 7d2fa2a..7f87ecf 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs @@ -20,10 +20,8 @@ namespace Lucene.Net.Analysis.Ar */ /// <summary> - /// A <seealso cref="TokenFilter"/> that applies <seealso cref="ArabicNormalizer"/> to normalize the orthography. - /// + /// A <see cref="TokenFilter"/> that applies <see cref="ArabicNormalizer"/> to normalize the orthography. /// </summary> - public sealed class ArabicNormalizationFilter : TokenFilter { private readonly ArabicNormalizer normalizer = new ArabicNormalizer(); http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs index 840522c..0b92b5c 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs @@ -21,20 +21,21 @@ namespace Lucene.Net.Analysis.Ar */ /// <summary> - /// Factory for <seealso cref="ArabicNormalizationFilter"/>. - /// <pre class="prettyprint"> + /// Factory for <see cref="ArabicNormalizationFilter"/>. + /// <code> /// <fieldType name="text_arnormal" class="solr.TextField" positionIncrementGap="100"> /// <analyzer> /// <tokenizer class="solr.StandardTokenizerFactory"/> /// <filter class="solr.ArabicNormalizationFilterFactory"/> /// </analyzer> - /// </fieldType></pre> + /// </fieldType> + /// </code> /// </summary> public class ArabicNormalizationFilterFactory : TokenFilterFactory, IMultiTermAwareComponent { /// <summary> - /// Creates a new ArabicNormalizationFilterFactory </summary> + /// Creates a new <see cref="ArabicNormalizationFilterFactory"/> </summary> public ArabicNormalizationFilterFactory(IDictionary<string, string> args) : base(args) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs index 47ebe76..9733198 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs @@ -20,21 +20,18 @@ namespace Lucene.Net.Analysis.Ar */ /// <summary> - /// Normalizer for Arabic. - /// <para> - /// Normalization is done in-place for efficiency, operating on a termbuffer. - /// </para> - /// <para> - /// Normalization is defined as: - /// <ul> - /// <li> Normalization of hamza with alef seat to a bare alef. - /// <li> Normalization of teh marbuta to heh - /// <li> Normalization of dotless yeh (alef maksura) to yeh. - /// <li> Removal of Arabic diacritics (the harakat) - /// <li> Removal of tatweel (stretching character). - /// </ul> - /// - /// </para> + /// Normalizer for Arabic. + /// <para/> + /// Normalization is done in-place for efficiency, operating on a termbuffer. + /// <para/> + /// Normalization is defined as: + /// <list type="bullet"> + /// <item> Normalization of hamza with alef seat to a bare alef.</item> + /// <item> Normalization of teh marbuta to heh</item> + /// <item> Normalization of dotless yeh (alef maksura) to yeh.</item> + /// <item> Removal of Arabic diacritics (the harakat)</item> + /// <item> Removal of tatweel (stretching character).</item> + /// </list> /// </summary> public class ArabicNormalizer { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs index 54027ed..a8d2745 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs @@ -20,14 +20,13 @@ namespace Lucene.Net.Analysis.Ar */ /// <summary> - /// A <seealso cref="TokenFilter"/> that applies <seealso cref="ArabicStemmer"/> to stem Arabic words.. - /// <para> + /// A <see cref="TokenFilter"/> that applies <see cref="ArabicStemmer"/> to stem Arabic words.. + /// <para/> /// To prevent terms from being stemmed use an instance of - /// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets - /// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>. - /// </para> </summary> - /// <seealso cref= SetKeywordMarkerFilter </seealso> - + /// <see cref="Miscellaneous.SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets + /// the <see cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>. + /// </summary> + /// <seealso cref="Miscellaneous.SetKeywordMarkerFilter"/> public sealed class ArabicStemFilter : TokenFilter { private readonly ArabicStemmer stemmer = new ArabicStemmer(); http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs index 08d0d19..0238b5b 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs @@ -21,21 +21,22 @@ namespace Lucene.Net.Analysis.Ar */ /// <summary> - /// Factory for <seealso cref="ArabicStemFilter"/>. - /// <pre class="prettyprint"> + /// Factory for <see cref="ArabicStemFilter"/>. + /// <code> /// <fieldType name="text_arstem" class="solr.TextField" positionIncrementGap="100"> /// <analyzer> /// <tokenizer class="solr.StandardTokenizerFactory"/> /// <filter class="solr.ArabicNormalizationFilterFactory"/> /// <filter class="solr.ArabicStemFilterFactory"/> /// </analyzer> - /// </fieldType></pre> + /// </fieldType> + /// </code> /// </summary> public class ArabicStemFilterFactory : TokenFilterFactory { /// <summary> - /// Creates a new ArabicStemFilterFactory </summary> + /// Creates a new <see cref="ArabicStemFilterFactory"/> </summary> public ArabicStemFilterFactory(IDictionary<string, string> args) : base(args) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs index 8ba6ca7..444b5d3 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs @@ -20,18 +20,15 @@ namespace Lucene.Net.Analysis.Ar */ /// <summary> - /// Stemmer for Arabic. - /// <para> - /// Stemming is done in-place for efficiency, operating on a termbuffer. - /// </para> - /// <para> - /// Stemming is defined as: - /// <ul> - /// <li> Removal of attached definite article, conjunction, and prepositions. - /// <li> Stemming of common suffixes. - /// </ul> - /// - /// </para> + /// Stemmer for Arabic. + /// <para/> + /// Stemming is done in-place for efficiency, operating on a termbuffer. + /// <para/> + /// Stemming is defined as: + /// <list type="bullet"> + /// <item> Removal of attached definite article, conjunction, and prepositions.</item> + /// <item> Stemming of common suffixes.</item> + /// </list> /// </summary> public class ArabicStemmer {
