Lucene.Net.Analysis.Core refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/695b714f Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/695b714f Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/695b714f Branch: refs/heads/api-work Commit: 695b714fa5d7a95de2999e235e2ccc210e65f3dc Parents: 6dc3ac1 Author: Shad Storhaug <[email protected]> Authored: Fri Feb 3 02:39:12 2017 +0700 Committer: Shad Storhaug <[email protected]> Committed: Fri Feb 3 02:54:24 2017 +0700 ---------------------------------------------------------------------- .../Analysis/Core/KeywordAnalyzer.cs | 2 - .../Analysis/Core/KeywordTokenizer.cs | 11 ++- .../Analysis/Core/KeywordTokenizerFactory.cs | 16 ++--- .../Analysis/Core/LetterTokenizer.cs | 38 +++++------ .../Analysis/Core/LetterTokenizerFactory.cs | 16 ++--- .../Analysis/Core/LowerCaseFilter.cs | 19 +++--- .../Analysis/Core/LowerCaseFilterFactory.cs | 14 ++-- .../Analysis/Core/LowerCaseTokenizer.cs | 47 ++++++------- .../Analysis/Core/LowerCaseTokenizerFactory.cs | 16 ++--- .../Analysis/Core/SimpleAnalyzer.cs | 33 ++++----- .../Analysis/Core/StopAnalyzer.cs | 52 +++++++------- .../Analysis/Core/StopFilter.cs | 72 +++++++++----------- .../Analysis/Core/StopFilterFactory.cs | 48 ++++++------- .../Analysis/Core/TypeTokenFilter.cs | 17 +++-- .../Analysis/Core/TypeTokenFilterFactory.cs | 13 ++-- .../Analysis/Core/UpperCaseFilter.cs | 19 +++--- .../Analysis/Core/UpperCaseFilterFactory.cs | 12 ++-- .../Analysis/Core/WhitespaceAnalyzer.cs | 25 +++---- .../Analysis/Core/WhitespaceTokenizer.cs | 41 +++++------ .../Analysis/Core/WhitespaceTokenizerFactory.cs | 10 +-- .../Miscellaneous/CodepointCountFilter.cs | 2 +- .../Analysis/Miscellaneous/KeepWordFilter.cs | 2 +- .../Analysis/Miscellaneous/LengthFilter.cs | 2 +- .../Analysis/Util/FilteringTokenFilter.cs | 2 +- .../Analysis/Util/MultiTermAwareComponent.cs | 2 +- .../Analysis/Core/TestTypeTokenFilterFactory.cs | 2 +- 26 files changed, 252 insertions(+), 281 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordAnalyzer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordAnalyzer.cs index 8f5e5d9..1f2d00d 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordAnalyzer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordAnalyzer.cs @@ -2,7 +2,6 @@ namespace Lucene.Net.Analysis.Core { - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -20,7 +19,6 @@ namespace Lucene.Net.Analysis.Core * limitations under the License. */ - /// <summary> /// "Tokenizes" the entire stream as a single token. This is useful /// for data like zip codes, ids, and some product names. http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizer.cs index f170588..4f22490 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizer.cs @@ -1,7 +1,6 @@ -using System.IO; -using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes; using Lucene.Net.Util; -using Reader = System.IO.TextReader; +using System.IO; namespace Lucene.Net.Analysis.Core { @@ -54,7 +53,7 @@ namespace Lucene.Net.Analysis.Core termAtt.ResizeBuffer(bufferSize); } - public KeywordTokenizer(AttributeSource.AttributeFactory factory, Reader input, int bufferSize) + public KeywordTokenizer(AttributeSource.AttributeFactory factory, TextReader input, int bufferSize) : base(factory, input) { termAtt = AddAttribute<ICharTermAttribute>(); @@ -67,7 +66,7 @@ namespace Lucene.Net.Analysis.Core termAtt.ResizeBuffer(bufferSize); } - public override bool IncrementToken() + public override sealed bool IncrementToken() { if (!done) { @@ -96,7 +95,7 @@ namespace Lucene.Net.Analysis.Core return false; } - public override void End() + public override sealed void End() { base.End(); // set final offset http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizerFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizerFactory.cs index 29a812a..7d87b37 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizerFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizerFactory.cs @@ -1,7 +1,7 @@ -using System.Collections.Generic; -using System.IO; -using Lucene.Net.Analysis.Util; +using Lucene.Net.Analysis.Util; using Lucene.Net.Util; +using System.Collections.Generic; +using System.IO; namespace Lucene.Net.Analysis.Core { @@ -23,19 +23,19 @@ namespace Lucene.Net.Analysis.Core */ /// <summary> - /// Factory for <seealso cref="KeywordTokenizer"/>. - /// <pre class="prettyprint"> + /// Factory for <see cref="KeywordTokenizer"/>. + /// <code> /// <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100"> /// <analyzer> /// <tokenizer class="solr.KeywordTokenizerFactory"/> /// </analyzer> - /// </fieldType></pre> + /// </fieldType> + /// </code> /// </summary> public class KeywordTokenizerFactory : TokenizerFactory { - /// <summary> - /// Creates a new KeywordTokenizerFactory </summary> + /// Creates a new <see cref="KeywordTokenizerFactory"/> </summary> public KeywordTokenizerFactory(IDictionary<string, string> args) : base(args) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs index 9d3dc2b..9ef19a6 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs @@ -1,7 +1,7 @@ -using System.IO; -using Lucene.Net.Analysis.Util; +using Lucene.Net.Analysis.Util; using Lucene.Net.Support; using Lucene.Net.Util; +using System.IO; namespace Lucene.Net.Analysis.Core { @@ -23,32 +23,30 @@ namespace Lucene.Net.Analysis.Core */ /// <summary> - /// A LetterTokenizer is a tokenizer that divides text at non-letters. That's to + /// A <see cref="LetterTokenizer"/> is a tokenizer that divides text at non-letters. That's to /// say, it defines tokens as maximal strings of adjacent letters, as defined by - /// java.lang.Character.isLetter() predicate. + /// <see cref="char.IsLetter"/> predicate. /// <para> /// Note: this does a decent job for most European languages, but does a terrible /// job for some Asian languages, where words are not separated by spaces. /// </para> /// <para> - /// <a name="version"/> - /// You must specify the required <seealso cref="LuceneVersion"/> compatibility when creating - /// <seealso cref="LetterTokenizer"/>: - /// <ul> - /// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and - /// detect token characters. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and - /// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li> - /// </ul> + /// You must specify the required <see cref="LuceneVersion"/> compatibility when creating + /// <see cref="LetterTokenizer"/>: + /// <list type="bullet"> + /// <item>As of 3.1, <see cref="CharTokenizer"/> uses an <see cref="int"/> based API to normalize and + /// detect token characters. See <see cref="CharTokenizer.IsTokenChar(int)"/> and + /// <see cref="CharTokenizer.Normalize(int)"/> for details.</item> + /// </list> /// </para> /// </summary> public class LetterTokenizer : CharTokenizer { - /// <summary> - /// Construct a new LetterTokenizer. + /// Construct a new <see cref="LetterTokenizer"/>. /// </summary> /// <param name="matchVersion"> - /// Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param> + /// <see cref="LuceneVersion"/> to match. </param> /// <param name="in"> /// the input to split up into tokens </param> public LetterTokenizer(LuceneVersion matchVersion, TextReader @in) @@ -57,13 +55,13 @@ namespace Lucene.Net.Analysis.Core } /// <summary> - /// Construct a new LetterTokenizer using a given - /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. + /// Construct a new <see cref="LetterTokenizer"/> using a given + /// <see cref="AttributeSource.AttributeFactory"/>. /// </summary> /// <param name="matchVersion"> - /// Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param> + /// <see cref="LuceneVersion"/> to match</param> /// <param name="factory"> - /// the attribute factory to use for this <seealso cref="Tokenizer"/> </param> + /// the attribute factory to use for this <see cref="Tokenizer"/> </param> /// <param name="in"> /// the input to split up into tokens </param> public LetterTokenizer(LuceneVersion matchVersion, AttributeSource.AttributeFactory factory, TextReader @in) @@ -73,7 +71,7 @@ namespace Lucene.Net.Analysis.Core /// <summary> /// Collects only characters which satisfy - /// <seealso cref="Character#isLetter(int)"/>. + /// <see cref="Character.IsLetter(int)"/>. /// </summary> protected override bool IsTokenChar(int c) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizerFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizerFactory.cs index 0fe8bed..611a4a4 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizerFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizerFactory.cs @@ -1,7 +1,7 @@ -using System.Collections.Generic; -using System.IO; -using Lucene.Net.Analysis.Util; +using Lucene.Net.Analysis.Util; using Lucene.Net.Util; +using System.Collections.Generic; +using System.IO; namespace Lucene.Net.Analysis.Core { @@ -23,19 +23,19 @@ namespace Lucene.Net.Analysis.Core */ /// <summary> - /// Factory for <seealso cref="LetterTokenizer"/>. - /// <pre class="prettyprint"> + /// Factory for <see cref="LetterTokenizer"/>. + /// <code> /// <fieldType name="text_letter" class="solr.TextField" positionIncrementGap="100"> /// <analyzer> /// <tokenizer class="solr.LetterTokenizerFactory"/> /// </analyzer> - /// </fieldType></pre> + /// </fieldType> + /// </code> /// </summary> public class LetterTokenizerFactory : TokenizerFactory { - /// <summary> - /// Creates a new LetterTokenizerFactory </summary> + /// Creates a new <see cref="LetterTokenizerFactory"/> </summary> public LetterTokenizerFactory(IDictionary<string, string> args) : base(args) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs index fce4e12..36bde21 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs @@ -4,7 +4,6 @@ using Lucene.Net.Util; namespace Lucene.Net.Analysis.Core { - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -21,14 +20,14 @@ namespace Lucene.Net.Analysis.Core * See the License for the specific language governing permissions and * limitations under the License. */ + /// <summary> /// Normalizes token text to lower case. - /// <a name="version"/> - /// <para>You must specify the required <seealso cref="LuceneVersion"/> + /// <para>You must specify the required <see cref="LuceneVersion"/> /// compatibility when creating LowerCaseFilter: - /// <ul> - /// <li> As of 3.1, supplementary characters are properly lowercased. - /// </ul> + /// <list type="bullet"> + /// <item> As of 3.1, supplementary characters are properly lowercased.</item> + /// </list> /// </para> /// </summary> public sealed class LowerCaseFilter : TokenFilter @@ -37,10 +36,10 @@ namespace Lucene.Net.Analysis.Core private readonly ICharTermAttribute termAtt; /// <summary> - /// Create a new LowerCaseFilter, that normalizes token text to lower case. + /// Create a new <see cref="LowerCaseFilter"/>, that normalizes token text to lower case. /// </summary> - /// <param name="matchVersion"> See <a href="#version">above</a> </param> - /// <param name="in"> TokenStream to filter </param> + /// <param name="matchVersion"> See <see cref="LuceneVersion"/> </param> + /// <param name="in"> <see cref="TokenStream"/> to filter </param> public LowerCaseFilter(LuceneVersion matchVersion, TokenStream @in) : base(@in) { @@ -48,7 +47,7 @@ namespace Lucene.Net.Analysis.Core charUtils = CharacterUtils.GetInstance(matchVersion); } - public override bool IncrementToken() + public override sealed bool IncrementToken() { if (m_input.IncrementToken()) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilterFactory.cs index f34afe0..5d4446c 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilterFactory.cs @@ -1,5 +1,5 @@ -using System.Collections.Generic; -using Lucene.Net.Analysis.Util; +using Lucene.Net.Analysis.Util; +using System.Collections.Generic; namespace Lucene.Net.Analysis.Core { @@ -21,20 +21,20 @@ namespace Lucene.Net.Analysis.Core */ /// <summary> - /// Factory for <seealso cref="LowerCaseFilter"/>. - /// <pre class="prettyprint"> + /// Factory for <see cref="LowerCaseFilter"/>. + /// <code> /// <fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100"> /// <analyzer> /// <tokenizer class="solr.WhitespaceTokenizerFactory"/> /// <filter class="solr.LowerCaseFilterFactory"/> /// </analyzer> - /// </fieldType></pre> + /// </fieldType> + /// </code> /// </summary> public class LowerCaseFilterFactory : TokenFilterFactory, IMultiTermAwareComponent { - /// <summary> - /// Creates a new LowerCaseFilterFactory </summary> + /// Creates a new <see cref="LowerCaseFilterFactory"/> </summary> public LowerCaseFilterFactory(IDictionary<string, string> args) : base(args) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs index 94cfbb4..027f3d7 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs @@ -1,11 +1,9 @@ -using System.IO; -using Lucene.Net.Analysis.Util; -using Lucene.Net.Support; +using Lucene.Net.Support; using Lucene.Net.Util; +using System.IO; namespace Lucene.Net.Analysis.Core { - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -22,35 +20,34 @@ namespace Lucene.Net.Analysis.Core * See the License for the specific language governing permissions and * limitations under the License. */ + /// <summary> - /// LowerCaseTokenizer performs the function of LetterTokenizer - /// and LowerCaseFilter together. It divides text at non-letters and converts + /// <see cref="LowerCaseTokenizer"/> performs the function of <see cref="LetterTokenizer"/> + /// and <see cref="LowerCaseFilter"/> together. It divides text at non-letters and converts /// them to lower case. While it is functionally equivalent to the combination - /// of LetterTokenizer and LowerCaseFilter, there is a performance advantage + /// of <see cref="LetterTokenizer"/> and <see cref="LowerCaseFilter"/>, there is a performance advantage /// to doing the two tasks at once, hence this (redundant) implementation. - /// <P> + /// <para> /// Note: this does a decent job for most European languages, but does a terrible /// job for some Asian languages, where words are not separated by spaces. - /// </p> + /// </para> /// <para> - /// <a name="version"/> - /// You must specify the required <seealso cref="LuceneVersion"/> compatibility when creating - /// <seealso cref="LowerCaseTokenizer"/>: - /// <ul> - /// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and - /// detect token characters. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and - /// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li> - /// </ul> + /// You must specify the required <see cref="LuceneVersion"/> compatibility when creating + /// <see cref="LowerCaseTokenizer"/>: + /// <list type="bullet"> + /// <item>As of 3.1, <see cref="Util.CharTokenizer"/> uses an int based API to normalize and + /// detect token characters. See <see cref="Util.CharTokenizer.IsTokenChar(int)"/> and + /// <see cref="Util.CharTokenizer.Normalize(int)"/> for details.</item> + /// </list> /// </para> /// </summary> public sealed class LowerCaseTokenizer : LetterTokenizer { - /// <summary> - /// Construct a new LowerCaseTokenizer. + /// Construct a new <see cref="LowerCaseTokenizer"/>. /// </summary> /// <param name="matchVersion"> - /// Lucene version to match See <seealso cref="<a href="#version">above</a>"/> + /// <see cref="LuceneVersion"/> to match /// </param> /// <param name="in"> /// the input to split up into tokens </param> @@ -60,13 +57,13 @@ namespace Lucene.Net.Analysis.Core } /// <summary> - /// Construct a new LowerCaseTokenizer using a given - /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. + /// Construct a new <see cref="LowerCaseTokenizer"/> using a given + /// <see cref="AttributeSource.AttributeFactory"/>. /// </summary> /// <param name="matchVersion"> - /// Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param> + /// <see cref="LuceneVersion"/> to match </param> /// <param name="factory"> - /// the attribute factory to use for this <seealso cref="Tokenizer"/> </param> + /// the attribute factory to use for this <see cref="Tokenizer"/> </param> /// <param name="in"> /// the input to split up into tokens </param> public LowerCaseTokenizer(LuceneVersion matchVersion, AttributeSource.AttributeFactory factory, TextReader @in) @@ -76,7 +73,7 @@ namespace Lucene.Net.Analysis.Core /// <summary> /// Converts char to lower case - /// <seealso cref="Character#toLowerCase(int)"/>. + /// <see cref="Character.ToLowerCase(int)"/>. /// </summary> protected override int Normalize(int c) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizerFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizerFactory.cs index 76b9d81..08e4b4f 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizerFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizerFactory.cs @@ -1,7 +1,7 @@ -using System.Collections.Generic; -using System.IO; -using Lucene.Net.Analysis.Util; +using Lucene.Net.Analysis.Util; using Lucene.Net.Util; +using System.Collections.Generic; +using System.IO; namespace Lucene.Net.Analysis.Core { @@ -23,19 +23,19 @@ namespace Lucene.Net.Analysis.Core */ /// <summary> - /// Factory for <seealso cref="LowerCaseTokenizer"/>. - /// <pre class="prettyprint"> + /// Factory for <see cref="LowerCaseTokenizer"/>. + /// <code> /// <fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100"> /// <analyzer> /// <tokenizer class="solr.LowerCaseTokenizerFactory"/> /// </analyzer> - /// </fieldType></pre> + /// </fieldType> + /// </code> /// </summary> public class LowerCaseTokenizerFactory : TokenizerFactory, IMultiTermAwareComponent { - /// <summary> - /// Creates a new LowerCaseTokenizerFactory </summary> + /// Creates a new <see cref="LowerCaseTokenizerFactory"/> </summary> public LowerCaseTokenizerFactory(IDictionary<string, string> args) : base(args) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/SimpleAnalyzer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/SimpleAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/SimpleAnalyzer.cs index d2165d0..80586d0 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/SimpleAnalyzer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/SimpleAnalyzer.cs @@ -1,10 +1,8 @@ -using System.IO; -using Lucene.Net.Analysis.Util; -using Lucene.Net.Util; +using Lucene.Net.Util; +using System.IO; namespace Lucene.Net.Analysis.Core { - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -21,30 +19,27 @@ namespace Lucene.Net.Analysis.Core * See the License for the specific language governing permissions and * limitations under the License. */ + /// <summary> - /// An <seealso cref="Analyzer"/> that filters <seealso cref="LetterTokenizer"/> - /// with <seealso cref="LowerCaseFilter"/> + /// An <see cref="Analyzer"/> that filters <see cref="LetterTokenizer"/> + /// with <see cref="LowerCaseFilter"/> /// <para> - /// <a name="version">You must specify the required <seealso cref="LuceneVersion"/> compatibility - /// when creating <seealso cref="CharTokenizer"/>: - /// <ul> - /// <li>As of 3.1, <seealso cref="LowerCaseTokenizer"/> uses an int based API to normalize and - /// detect token codepoints. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and - /// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li> - /// </ul> - /// </para> - /// <para> - /// + /// You must specify the required <see cref="LuceneVersion"/> compatibility + /// when creating <see cref="Util.CharTokenizer"/>: + /// <list type="bullet"> + /// <item>As of 3.1, <see cref="LowerCaseTokenizer"/> uses an int based API to normalize and + /// detect token codepoints. See <see cref="Util.CharTokenizer.IsTokenChar(int)"/> and + /// <see cref="Util.CharTokenizer.Normalize(int)"/> for details.</item> + /// </list> /// </para> /// </summary> public sealed class SimpleAnalyzer : Analyzer { - private readonly LuceneVersion matchVersion; /// <summary> - /// Creates a new <seealso cref="SimpleAnalyzer"/> </summary> - /// <param name="matchVersion"> Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param> + /// Creates a new <see cref="SimpleAnalyzer"/> </summary> + /// <param name="matchVersion"> <see cref="LuceneVersion"/> to match </param> public SimpleAnalyzer(LuceneVersion matchVersion) { this.matchVersion = matchVersion; http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs index d1a2a26..e91072e 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs @@ -1,8 +1,8 @@ -using System.Collections.Generic; -using System.IO; -using Lucene.Net.Analysis.Util; +using Lucene.Net.Analysis.Util; using Lucene.Net.Support; using Lucene.Net.Util; +using System.Collections.Generic; +using System.IO; namespace Lucene.Net.Analysis.Core { @@ -24,21 +24,19 @@ namespace Lucene.Net.Analysis.Core */ /// <summary> - /// Filters <seealso cref="LetterTokenizer"/> with <seealso cref="LowerCaseFilter"/> and <seealso cref="StopFilter"/>. - /// - /// <a name="version"/> - /// <para>You must specify the required <seealso cref="LuceneVersion"/> - /// compatibility when creating StopAnalyzer: - /// <ul> - /// <li> As of 3.1, StopFilter correctly handles Unicode 4.0 - /// supplementary characters in stopwords - /// <li> As of 2.9, position increments are preserved - /// </ul> + /// Filters <see cref="LetterTokenizer"/> with <see cref="LowerCaseFilter"/> and <see cref="StopFilter"/>. + /// <para> + /// You must specify the required <see cref="LuceneVersion"/> + /// compatibility when creating <see cref="StopAnalyzer"/>: + /// <list type="bullet"> + /// <item> As of 3.1, StopFilter correctly handles Unicode 4.0 + /// supplementary characters in stopwords</item> + /// <item> As of 2.9, position increments are preserved</item> + /// </list> /// </para> /// </summary> public sealed class StopAnalyzer : StopwordAnalyzerBase { - /// <summary> /// An unmodifiable set containing some common English words that are not usually useful /// for searching. @@ -59,8 +57,8 @@ namespace Lucene.Net.Analysis.Core /// <summary> /// Builds an analyzer which removes words in - /// <seealso cref="#ENGLISH_STOP_WORDS_SET"/>. </summary> - /// <param name="matchVersion"> See <a href="#version">above</a> </param> + /// <see cref="ENGLISH_STOP_WORDS_SET"/>. </summary> + /// <param name="matchVersion"> See <see cref="LuceneVersion"/> </param> public StopAnalyzer(LuceneVersion matchVersion) : this(matchVersion, ENGLISH_STOP_WORDS_SET) { @@ -68,7 +66,7 @@ namespace Lucene.Net.Analysis.Core /// <summary> /// Builds an analyzer with the stop words from the given set. </summary> - /// <param name="matchVersion"> See <a href="#version">above</a> </param> + /// <param name="matchVersion"> See <see cref="LuceneVersion"/> </param> /// <param name="stopWords"> Set of stop words </param> public StopAnalyzer(LuceneVersion matchVersion, CharArraySet stopWords) : base(matchVersion, stopWords) @@ -77,8 +75,8 @@ namespace Lucene.Net.Analysis.Core /// <summary> /// Builds an analyzer with the stop words from the given file. </summary> - /// <seealso cref= WordlistLoader#getWordSet(Reader, Version) </seealso> - /// <param name="matchVersion"> See <a href="#version">above</a> </param> + /// <seealso cref="WordlistLoader.GetWordSet(TextReader, LuceneVersion)"/> + /// <param name="matchVersion"> See <see cref="LuceneVersion"/> </param> /// <param name="stopwordsFile"> File to load stop words from </param> public StopAnalyzer(LuceneVersion matchVersion, FileInfo stopwordsFile) : this(matchVersion, LoadStopwordSet(stopwordsFile, matchVersion)) @@ -87,9 +85,9 @@ namespace Lucene.Net.Analysis.Core /// <summary> /// Builds an analyzer with the stop words from the given reader. </summary> - /// <seealso cref= WordlistLoader#getWordSet(Reader, Version) </seealso> - /// <param name="matchVersion"> See <a href="#version">above</a> </param> - /// <param name="stopwords"> TextReader to load stop words from </param> + /// <seealso cref="WordlistLoader.GetWordSet(TextReader, LuceneVersion)"/> + /// <param name="matchVersion"> See <see cref="LuceneVersion"/> </param> + /// <param name="stopwords"> <see cref="TextReader"/> to load stop words from </param> public StopAnalyzer(LuceneVersion matchVersion, TextReader stopwords) : this(matchVersion, LoadStopwordSet(stopwords, matchVersion)) { @@ -97,12 +95,12 @@ namespace Lucene.Net.Analysis.Core /// <summary> /// Creates - /// <seealso cref="Analyzer.TokenStreamComponents"/> - /// used to tokenize all the text in the provided <seealso cref="TextReader"/>. + /// <see cref="Analyzer.TokenStreamComponents"/> + /// used to tokenize all the text in the provided <see cref="TextReader"/>. /// </summary> - /// <returns> <seealso cref="Analyzer.TokenStreamComponents"/> - /// built from a <seealso cref="LowerCaseTokenizer"/> filtered with - /// <seealso cref="StopFilter"/> </returns> + /// <returns> <see cref="Analyzer.TokenStreamComponents"/> + /// built from a <see cref="LowerCaseTokenizer"/> filtered with + /// <see cref="StopFilter"/> </returns> protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer source = new LowerCaseTokenizer(m_matchVersion, reader); http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs index b8582a4..1e5e2a0 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs @@ -1,9 +1,7 @@ using Lucene.Net.Analysis.TokenAttributes; using Lucene.Net.Analysis.Util; -using Lucene.Net.Support; using Lucene.Net.Util; using System.Collections.Generic; -using System.Linq; namespace Lucene.Net.Analysis.Core { @@ -26,35 +24,33 @@ namespace Lucene.Net.Analysis.Core /// <summary> /// Removes stop words from a token stream. - /// - /// <a name="version"/> - /// <para>You must specify the required <seealso cref="LuceneVersion"/> - /// compatibility when creating StopFilter: - /// <ul> - /// <li> As of 3.1, StopFilter correctly handles Unicode 4.0 + /// <para> + /// You must specify the required <see cref="LuceneVersion"/> + /// compatibility when creating <see cref="StopFilter"/>: + /// <list type="bullet"> + /// <item>As of 3.1, StopFilter correctly handles Unicode 4.0 /// supplementary characters in stopwords and position - /// increments are preserved - /// </ul> + /// increments are preserved</item> + /// </list> /// </para> /// </summary> public sealed class StopFilter : FilteringTokenFilter { - private readonly CharArraySet stopWords; private readonly ICharTermAttribute termAtt; /// <summary> - /// Constructs a filter which removes words from the input TokenStream that are - /// named in the Set. + /// Constructs a filter which removes words from the input <see cref="TokenStream"/> that are + /// named in the <see cref="CharArraySet"/>. /// </summary> /// <param name="matchVersion"> /// Lucene version to enable correct Unicode 4.0 behavior in the stop - /// set if Version > 3.0. See <a href="#version">above</a> for details. </param> + /// set if Version > 3.0. See <see cref="LuceneVersion"/>> for details. </param> /// <param name="in"> - /// Input stream </param> + /// Input <see cref="TokenStream"/> </param> /// <param name="stopWords"> - /// A <seealso cref="CharArraySet"/> representing the stopwords. </param> - /// <seealso cref= #makeStopSet(Version, java.lang.String...) </seealso> + /// A <see cref="CharArraySet"/> representing the stopwords. </param> + /// <seealso cref="MakeStopSet(LuceneVersion, string[])"/> public StopFilter(LuceneVersion matchVersion, TokenStream @in, CharArraySet stopWords) : base(matchVersion, @in) { @@ -63,29 +59,29 @@ namespace Lucene.Net.Analysis.Core } /// <summary> - /// Builds a Set from an array of stop words, - /// appropriate for passing into the StopFilter constructor. - /// This permits this stopWords construction to be cached once when - /// an Analyzer is constructed. + /// Builds a <see cref="CharArraySet"/> from an array of stop words, + /// appropriate for passing into the <see cref="StopFilter"/> constructor. + /// This permits this <paramref name="stopWords"/> construction to be cached once when + /// an <see cref="Analyzer"/> is constructed. /// </summary> - /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param> + /// <param name="matchVersion"> <see cref="LuceneVersion"/> to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param> /// <param name="stopWords"> An array of stopwords </param> - /// <seealso cref= #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase </seealso> + /// <seealso cref="MakeStopSet(LuceneVersion, string[], bool)"/> passing false to ignoreCase public static CharArraySet MakeStopSet(LuceneVersion matchVersion, params string[] stopWords) { return MakeStopSet(matchVersion, stopWords, false); } /// <summary> - /// Builds a Set from an array of stop words, - /// appropriate for passing into the StopFilter constructor. - /// This permits this stopWords construction to be cached once when - /// an Analyzer is constructed. + /// Builds a <see cref="CharArraySet"/> from an array of stop words, + /// appropriate for passing into the <see cref="StopFilter"/> constructor. + /// This permits this <paramref name="stopWords"/> construction to be cached once when + /// an <see cref="Analyzer"/> is constructed. /// </summary> - /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param> - /// <param name="stopWords"> A List of Strings or char[] or any other toString()-able list representing the stopwords </param> - /// <returns> A Set (<seealso cref="CharArraySet"/>) containing the words </returns> - /// <seealso cref= #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase </seealso> + /// <param name="matchVersion"> <see cref="LuceneVersion"/> to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param> + /// <param name="stopWords"> A List of <see cref="string"/>s or <see cref="char[]"/> or any other ToString()-able list representing the stopwords </param> + /// <returns> A Set (<see cref="CharArraySet"/>) containing the words </returns> + /// <seealso cref="MakeStopSet(LuceneVersion, string[], bool)"/> passing false to ignoreCase public static CharArraySet MakeStopSet<T1>(LuceneVersion matchVersion, IList<T1> stopWords) { return MakeStopSet(matchVersion, stopWords, false); @@ -94,10 +90,10 @@ namespace Lucene.Net.Analysis.Core /// <summary> /// Creates a stopword set from the given stopword array. /// </summary> - /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param> + /// <param name="matchVersion"> <see cref="LuceneVersion"/> to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param> /// <param name="stopWords"> An array of stopwords </param> /// <param name="ignoreCase"> If true, all words are lower cased first. </param> - /// <returns> a Set containing the words </returns> + /// <returns> a Set (<see cref="CharArraySet"/>) containing the words </returns> public static CharArraySet MakeStopSet(LuceneVersion matchVersion, string[] stopWords, bool ignoreCase) { CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.Length, ignoreCase); @@ -107,10 +103,10 @@ namespace Lucene.Net.Analysis.Core /// <summary> /// Creates a stopword set from the given stopword list. </summary> - /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param> - /// <param name="stopWords"> A List of Strings or char[] or any other toString()-able list representing the stopwords </param> + /// <param name="matchVersion"> <see cref="LuceneVersion"/> to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param> + /// <param name="stopWords"> A List of <see cref="string"/>s or <see cref="char[]"/> or any other ToString()-able list representing the stopwords </param> /// <param name="ignoreCase"> if true, all words are lower cased first </param> - /// <returns> A Set (<seealso cref="CharArraySet"/>) containing the words </returns> + /// <returns> A Set (<see cref="CharArraySet"/>) containing the words </returns> public static CharArraySet MakeStopSet<T1>(LuceneVersion matchVersion, IList<T1> stopWords, bool ignoreCase) { var stopSet = new CharArraySet(matchVersion, stopWords.Count, ignoreCase); @@ -119,9 +115,9 @@ namespace Lucene.Net.Analysis.Core } /// <summary> - /// Returns the next input Token whose term() is not a stop word. + /// Returns the next input Token whose Term is not a stop word. /// </summary> - protected internal override bool Accept() + protected override bool Accept() { return !stopWords.Contains(termAtt.Buffer, 0, termAtt.Length); } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs index abc6b8c..9466549 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs @@ -24,46 +24,46 @@ namespace Lucene.Net.Analysis.Core /// <summary> - /// Factory for <seealso cref="StopFilter"/>. + /// Factory for <see cref="StopFilter"/>. /// - /// <pre class="prettyprint"> + /// <code> /// <fieldType name="text_stop" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> /// <analyzer> /// <tokenizer class="solr.WhitespaceTokenizerFactory"/> /// <filter class="solr.StopFilterFactory" ignoreCase="true" /// words="stopwords.txt" format="wordset" /> /// </analyzer> - /// </fieldType></pre> - /// + /// </fieldType> + /// </code> /// <para> /// All attributes are optional: /// </para> - /// <ul> - /// <li><code>ignoreCase</code> defaults to <code>false</code></li> - /// <li><code>words</code> should be the name of a stopwords file to parse, if not - /// specified the factory will use <seealso cref="StopAnalyzer#ENGLISH_STOP_WORDS_SET"/> - /// </li> - /// <li><code>format</code> defines how the <code>words</code> file will be parsed, - /// and defaults to <code>wordset</code>. If <code>words</code> is not specified, - /// then <code>format</code> must not be specified. - /// </li> - /// </ul> + /// <list type="bullet"> + /// <item><c>ignoreCase</c> defaults to <c>false</c></item> + /// <item><c>words</c> should be the name of a stopwords file to parse, if not + /// specified the factory will use <see cref="StopAnalyzer.ENGLISH_STOP_WORDS_SET"/> + /// </item> + /// <item><c>format</c> defines how the <c>words</c> file will be parsed, + /// and defaults to <c>wordset</c>. If <c>words</c> is not specified, + /// then <c>format</c> must not be specified. + /// </item> + /// </list> /// <para> - /// The valid values for the <code>format</code> option are: + /// The valid values for the <c>format</c> option are: /// </para> - /// <ul> - /// <li><code>wordset</code> - This is the default format, which supports one word per + /// <list type="bullet"> + /// <item><c>wordset</c> - This is the default format, which supports one word per /// line (including any intra-word whitespace) and allows whole line comments /// begining with the "#" character. Blank lines are ignored. See - /// <seealso cref="WordlistLoader#getLines WordlistLoader.getLines"/> for details. - /// </li> - /// <li><code>snowball</code> - This format allows for multiple words specified on each + /// <see cref="WordlistLoader.GetLines"/> for details. + /// </item> + /// <item><c>snowball</c> - This format allows for multiple words specified on each /// line, and trailing comments may be specified using the vertical line ("|"). /// Blank lines are ignored. See - /// <seealso cref="WordlistLoader#getSnowballWordSet WordlistLoader.getSnowballWordSet"/> + /// <see cref="WordlistLoader.GetSnowballWordSet"/> /// for details. - /// </li> - /// </ul> + /// </item> + /// </list> /// </summary> public class StopFilterFactory : TokenFilterFactory, IResourceLoaderAware { @@ -77,7 +77,7 @@ namespace Lucene.Net.Analysis.Core private readonly bool enablePositionIncrements; /// <summary> - /// Creates a new StopFilterFactory </summary> + /// Creates a new <see cref="StopFilterFactory"/> </summary> public StopFilterFactory(IDictionary<string, string> args) : base(args) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilter.cs index bf72df8..0c993ad 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilter.cs @@ -3,7 +3,6 @@ using Lucene.Net.Analysis.Util; using Lucene.Net.Util; using System; using System.Collections.Generic; -using System.Linq; namespace Lucene.Net.Analysis.Core { @@ -51,11 +50,11 @@ namespace Lucene.Net.Analysis.Core } /// <summary> - /// Create a new <seealso cref="TypeTokenFilter"/>. </summary> - /// <param name="version"> the Lucene match version </param> - /// <param name="input"> the <seealso cref="TokenStream"/> to consume </param> + /// Create a new <see cref="TypeTokenFilter"/>. </summary> + /// <param name="version"> the <see cref="LuceneVersion"/> match version </param> + /// <param name="input"> the <see cref="TokenStream"/> to consume </param> /// <param name="stopTypes"> the types to filter </param> - /// <param name="useWhiteList"> if true, then tokens whose type is in stopTypes will + /// <param name="useWhiteList"> if true, then tokens whose type is in <paramref name="stopTypes"/> will /// be kept, otherwise they will be filtered out </param> public TypeTokenFilter(LuceneVersion version, TokenStream input, ICollection<string> stopTypes, bool useWhiteList) : base(version, input) @@ -66,9 +65,9 @@ namespace Lucene.Net.Analysis.Core } /// <summary> - /// Create a new <seealso cref="TypeTokenFilter"/> that filters tokens out + /// Create a new <see cref="TypeTokenFilter"/> that filters tokens out /// (useWhiteList=false). </summary> - /// <seealso cref= #TypeTokenFilter(Version, TokenStream, Set, boolean) </seealso> + /// <seealso cref="TypeTokenFilter.TypeTokenFilter(LuceneVersion, TokenStream, ICollection{string}, bool)"/> public TypeTokenFilter(LuceneVersion version, TokenStream input, ICollection<string> stopTypes) : this(version, input, stopTypes, false) { @@ -76,9 +75,9 @@ namespace Lucene.Net.Analysis.Core /// <summary> /// By default accept the token if its type is not a stop type. - /// When the useWhiteList parameter is set to true then accept the token if its type is contained in the stopTypes + /// When the <see cref="useWhiteList"/> parameter is set to true then accept the token if its type is contained in the <see cref="stopTypes"/> /// </summary> - protected internal override bool Accept() + protected override bool Accept() { return useWhiteList == stopTypes.Contains(typeAttribute.Type); } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilterFactory.cs index bf7658b..38c69ab 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilterFactory.cs @@ -23,15 +23,16 @@ namespace Lucene.Net.Analysis.Core */ /// <summary> - /// Factory class for <seealso cref="TypeTokenFilter"/>. - /// <pre class="prettyprint"> + /// Factory class for <see cref="TypeTokenFilter"/>. + /// <code> /// <fieldType name="chars" class="solr.TextField" positionIncrementGap="100"> /// <analyzer> /// <tokenizer class="solr.StandardTokenizerFactory"/> /// <filter class="solr.TypeTokenFilterFactory" types="stoptypes.txt" /// useWhitelist="false"/> /// </analyzer> - /// </fieldType></pre> + /// </fieldType> + /// </code> /// </summary> public class TypeTokenFilterFactory : TokenFilterFactory, IResourceLoaderAware { @@ -41,7 +42,7 @@ namespace Lucene.Net.Analysis.Core private HashSet<string> stopTypes; /// <summary> - /// Creates a new TypeTokenFilterFactory </summary> + /// Creates a new <see cref="TypeTokenFilterFactory"/> </summary> public TypeTokenFilterFactory(IDictionary<string, string> args) : base(args) { @@ -63,7 +64,7 @@ namespace Lucene.Net.Analysis.Core foreach (string file in files) { IList<string> typesLines = GetLines(loader, file.Trim()); - stopTypes.AddAll(typesLines); + stopTypes.UnionWith(typesLines); } } } @@ -76,7 +77,7 @@ namespace Lucene.Net.Analysis.Core } } - public virtual HashSet<string> StopTypes + public virtual ICollection<string> StopTypes { get { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs index ca4aab4..0e65be6 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs @@ -20,16 +20,16 @@ namespace Lucene.Net.Analysis.Core * See the License for the specific language governing permissions and * limitations under the License. */ + /// <summary> /// Normalizes token text to UPPER CASE. - /// <a name="version"/> - /// <para>You must specify the required <seealso cref="LuceneVersion"/> - /// compatibility when creating UpperCaseFilter - /// + /// <para> + /// You must specify the required <see cref="LuceneVersion"/> + /// compatibility when creating <see cref="UpperCaseFilter"/> /// </para> /// <para><b>NOTE:</b> In Unicode, this transformation may lose information when the /// upper case character represents more than one lower case character. Use this filter - /// when you Require uppercase tokens. Use the <seealso cref="LowerCaseFilter"/> for + /// when you Require uppercase tokens. Use the <see cref="LowerCaseFilter"/> for /// general search matching /// </para> /// </summary> @@ -39,10 +39,10 @@ namespace Lucene.Net.Analysis.Core private readonly ICharTermAttribute termAtt; /// <summary> - /// Create a new UpperCaseFilter, that normalizes token text to upper case. + /// Create a new <see cref="UpperCaseFilter"/>, that normalizes token text to upper case. /// </summary> - /// <param name="matchVersion"> See <a href="#version">above</a> </param> - /// <param name="in"> TokenStream to filter </param> + /// <param name="matchVersion"> See <see cref="LuceneVersion"/> </param> + /// <param name="in"> <see cref="TokenStream"/> to filter </param> public UpperCaseFilter(LuceneVersion matchVersion, TokenStream @in) : base(@in) { @@ -51,7 +51,7 @@ namespace Lucene.Net.Analysis.Core charUtils = CharacterUtils.GetInstance(matchVersion); } - public override bool IncrementToken() + public override sealed bool IncrementToken() { if (m_input.IncrementToken()) { @@ -64,5 +64,4 @@ namespace Lucene.Net.Analysis.Core } } } - } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs index e4ade5c..2a6661a 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs @@ -21,26 +21,26 @@ namespace Lucene.Net.Analysis.Core */ /// <summary> - /// Factory for <seealso cref="UpperCaseFilter"/>. - /// <pre class="prettyprint"> + /// Factory for <see cref="UpperCaseFilter"/>. + /// <code> /// <fieldType name="text_uppercase" class="solr.TextField" positionIncrementGap="100"> /// <analyzer> /// <tokenizer class="solr.WhitespaceTokenizerFactory"/> /// <filter class="solr.UpperCaseFilterFactory"/> /// </analyzer> - /// </fieldType></pre> + /// </fieldType> + /// </code> /// /// <para><b>NOTE:</b> In Unicode, this transformation may lose information when the /// upper case character represents more than one lower case character. Use this filter - /// when you require uppercase tokens. Use the <seealso cref="LowerCaseFilterFactory"/> for + /// when you require uppercase tokens. Use the <see cref="LowerCaseFilterFactory"/> for /// general search matching /// </para> /// </summary> public class UpperCaseFilterFactory : TokenFilterFactory, IMultiTermAwareComponent { - /// <summary> - /// Creates a new UpperCaseFilterFactory </summary> + /// Creates a new <see cref="UpperCaseFilterFactory"/> </summary> public UpperCaseFilterFactory(IDictionary<string, string> args) : base(args) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceAnalyzer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceAnalyzer.cs index 9976966..6becd82 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceAnalyzer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceAnalyzer.cs @@ -21,29 +21,26 @@ namespace Lucene.Net.Analysis.Core * See the License for the specific language governing permissions and * limitations under the License. */ + /// <summary> - /// An Analyzer that uses <seealso cref="WhitespaceTokenizer"/>. - /// <para> - /// <a name="version">You must specify the required <seealso cref="LuceneVersion"/> compatibility - /// when creating <seealso cref="CharTokenizer"/>: - /// <ul> - /// <li>As of 3.1, <seealso cref="WhitespaceTokenizer"/> uses an int based API to normalize and - /// detect token codepoints. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and - /// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li> - /// </ul> - /// </para> + /// An <see cref="Analyzer"/> that uses <see cref="WhitespaceTokenizer"/>. /// <para> - /// + /// You must specify the required <see cref="LuceneVersion"/> compatibility + /// when creating <see cref="CharTokenizer"/>: + /// <list type="bullet"> + /// <item>As of 3.1, <see cref="WhitespaceTokenizer"/> uses an int based API to normalize and + /// detect token codepoints. See <see cref="Util.CharTokenizer.IsTokenChar(int)"/> and + /// <see cref="Util.CharTokenizer.Normalize(int)"/> for details.</item> + /// </list> /// </para> /// </summary> public sealed class WhitespaceAnalyzer : Analyzer { - private readonly LuceneVersion matchVersion; /// <summary> - /// Creates a new <seealso cref="WhitespaceAnalyzer"/> </summary> - /// <param name="matchVersion"> Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param> + /// Creates a new <see cref="WhitespaceAnalyzer"/> </summary> + /// <param name="matchVersion"> <see cref="LuceneVersion"/> to match </param> public WhitespaceAnalyzer(LuceneVersion matchVersion) { this.matchVersion = matchVersion; http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs index 5ccdbbf..a60a679 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs @@ -5,7 +5,6 @@ using Lucene.Net.Util; namespace Lucene.Net.Analysis.Core { - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -22,26 +21,24 @@ namespace Lucene.Net.Analysis.Core * See the License for the specific language governing permissions and * limitations under the License. */ + /// <summary> - /// A WhitespaceTokenizer is a tokenizer that divides text at whitespace. - /// Adjacent sequences of non-Whitespace characters form tokens. <a - /// name="version"/> + /// A <see cref="WhitespaceTokenizer"/> is a tokenizer that divides text at whitespace. + /// Adjacent sequences of non-Whitespace characters form tokens. /// <para> - /// You must specify the required <seealso cref="LuceneVersion"/> compatibility when creating - /// <seealso cref="WhitespaceTokenizer"/>: - /// <ul> - /// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and - /// detect token characters. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and - /// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li> - /// </ul> + /// You must specify the required <see cref="LuceneVersion"/> compatibility when creating + /// <see cref="WhitespaceTokenizer"/>: + /// <list type="bullet"> + /// <item>As of 3.1, <see cref="CharTokenizer"/> uses an int based API to normalize and + /// detect token characters. See <see cref="CharTokenizer.IsTokenChar(int)"/> and + /// <see cref="CharTokenizer.Normalize(int)"/> for details.</item> + /// </list> /// </para> /// </summary> public sealed class WhitespaceTokenizer : CharTokenizer { - - /// Construct a new WhitespaceTokenizer. * <param name="matchVersion"> Lucene version - /// to match See <seealso cref="<a href="#version">above</a>"/> - /// </param> + /// Construct a new <see cref="WhitespaceTokenizer"/>. + /// <param name="matchVersion"> <see cref="LuceneVersion"/> to match</param> /// <param name="in"> /// the input to split up into tokens </param> public WhitespaceTokenizer(LuceneVersion matchVersion, TextReader @in) @@ -50,14 +47,12 @@ namespace Lucene.Net.Analysis.Core } /// <summary> - /// Construct a new WhitespaceTokenizer using a given - /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. - /// - /// @param - /// matchVersion Lucene version to match See - /// <seealso cref="<a href="#version">above</a>"/> </summary> + /// Construct a new <see cref="WhitespaceTokenizer"/> using a given + /// <see cref="AttributeSource.AttributeFactory"/>. + /// </summary> + /// <param name="matchVersion"><see cref="LuceneVersion"/> to match</param> /// <param name="factory"> - /// the attribute factory to use for this <seealso cref="Tokenizer"/> </param> + /// the attribute factory to use for this <see cref="Tokenizer"/> </param> /// <param name="in"> /// the input to split up into tokens </param> public WhitespaceTokenizer(LuceneVersion matchVersion, AttributeFactory factory, TextReader @in) @@ -67,7 +62,7 @@ namespace Lucene.Net.Analysis.Core /// <summary> /// Collects only characters which do not satisfy - /// <seealso cref="Character#isWhitespace(int)"/>. + /// <see cref="char.IsWhitespace(char)"/>. /// </summary> protected override bool IsTokenChar(int c) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizerFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizerFactory.cs index 1198f48..645a6c9 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizerFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizerFactory.cs @@ -23,19 +23,19 @@ namespace Lucene.Net.Analysis.Core */ /// <summary> - /// Factory for <seealso cref="WhitespaceTokenizer"/>. - /// <pre class="prettyprint"> + /// Factory for <see cref="WhitespaceTokenizer"/>. + /// <code> /// <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> /// <analyzer> /// <tokenizer class="solr.WhitespaceTokenizerFactory"/> /// </analyzer> - /// </fieldType></pre> + /// </fieldType> + /// </code> /// </summary> public class WhitespaceTokenizerFactory : TokenizerFactory { - /// <summary> - /// Creates a new WhitespaceTokenizerFactory </summary> + /// Creates a new <see cref="WhitespaceTokenizerFactory"/> </summary> public WhitespaceTokenizerFactory(IDictionary<string, string> args) : base(args) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs index 983dc32..2b6f70b 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs @@ -64,7 +64,7 @@ namespace Lucene.Net.Analysis.Miscellaneous termAtt = AddAttribute<ICharTermAttribute>(); } - protected internal override bool Accept() + protected override bool Accept() { int max32 = termAtt.Length; int min32 = max32 >> 1; http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs index 98b1c84..82ec1bc 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs @@ -58,7 +58,7 @@ namespace Lucene.Net.Analysis.Miscellaneous termAtt = AddAttribute<ICharTermAttribute>(); } - protected internal override bool Accept() + protected override bool Accept() { return words.Contains(termAtt.Buffer, 0, termAtt.Length); } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs index d82b34b..e02fd24 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs @@ -77,7 +77,7 @@ namespace Lucene.Net.Analysis.Miscellaneous this.termAtt = AddAttribute<ICharTermAttribute>(); } - protected internal override bool Accept() + protected override bool Accept() { int len = termAtt.Length; return (len >= min && len <= max); http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs index c3c1f41..688c890 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs @@ -79,7 +79,7 @@ namespace Lucene.Net.Analysis.Util /// <summary> /// Override this method and return if the current input token should be returned by <seealso cref="#incrementToken"/>. </summary> - protected internal abstract bool Accept(); + protected abstract bool Accept(); public override sealed bool IncrementToken() { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs index 7ea6673..9f32238 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs @@ -30,6 +30,6 @@ /// Returns an analysis component to handle analysis if multi-term queries. /// The returned component must be a TokenizerFactory, TokenFilterFactory or CharFilterFactory. /// </summary> - AbstractAnalysisFactory MultiTermComponent { get; } + AbstractAnalysisFactory MultiTermComponent { get; } // LUCENENET TODO: Change to GetMultiTermComponent() ? Some implementations return new instance. } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestTypeTokenFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestTypeTokenFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestTypeTokenFilterFactory.cs index 5576495..5a7d81d 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestTypeTokenFilterFactory.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestTypeTokenFilterFactory.cs @@ -31,7 +31,7 @@ namespace Lucene.Net.Analysis.Core public virtual void TestInform() { TypeTokenFilterFactory factory = (TypeTokenFilterFactory)TokenFilterFactory("Type", "types", "stoptypes-1.txt", "enablePositionIncrements", "true"); - ISet<string> types = factory.StopTypes; + ICollection<string> types = factory.StopTypes; assertTrue("types is null and it shouldn't be", types != null); assertTrue("types Size: " + types.Count + " is not: " + 2, types.Count == 2); assertTrue("enablePositionIncrements was set to true but not correctly parsed", factory.EnablePositionIncrements);
