Lucene.Net.Analysis.Cn refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/3e97f31e Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/3e97f31e Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/3e97f31e Branch: refs/heads/api-work Commit: 3e97f31e190f7c3a7781a45e9807e609a1e06393 Parents: 0986545 Author: Shad Storhaug <[email protected]> Authored: Thu Feb 2 23:22:53 2017 +0700 Committer: Shad Storhaug <[email protected]> Committed: Fri Feb 3 01:13:42 2017 +0700 ---------------------------------------------------------------------- .../Analysis/Cn/ChineseAnalyzer.cs | 23 +++++----- .../Analysis/Cn/ChineseFilter.cs | 37 ++++++++-------- .../Analysis/Cn/ChineseFilterFactory.cs | 6 +-- .../Analysis/Cn/ChineseTokenizer.cs | 45 ++++++++++---------- .../Analysis/Cn/ChineseTokenizerFactory.cs | 8 ++-- 5 files changed, 61 insertions(+), 58 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3e97f31e/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseAnalyzer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseAnalyzer.cs index 5dc0aa6..de0b5e7 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseAnalyzer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseAnalyzer.cs @@ -21,21 +21,22 @@ namespace Lucene.Net.Analysis.Cn */ /// <summary> - /// An <seealso cref="Analyzer"/> that tokenizes text with <seealso cref="ChineseTokenizer"/> and - /// filters with <seealso cref="ChineseFilter"/> </summary> - /// @deprecated (3.1) Use <seealso cref="StandardAnalyzer"/> instead, which has the same functionality. + /// An <see cref="Analyzer"/> that tokenizes text with <see cref="ChineseTokenizer"/> and + /// filters with <see cref="ChineseFilter"/> + /// </summary> + /// @deprecated (3.1) Use <see cref="Standard.StandardAnalyzer"/> instead, which has the same functionality. /// This analyzer will be removed in Lucene 5.0 [Obsolete("(3.1) Use StandardAnalyzer instead, which has the same functionality.")] public sealed class ChineseAnalyzer : Analyzer - /// <summary> - /// Creates - /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> - /// used to tokenize all the text in the provided <seealso cref="Reader"/>. - /// </summary> - /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> - /// built from a <seealso cref="ChineseTokenizer"/> filtered with - /// <seealso cref="ChineseFilter"/> </returns> { + /// <summary> + /// Creates + /// <see cref="Analyzer.TokenStreamComponents"/> + /// used to tokenize all the text in the provided <see cref="TextReader"/>. + /// </summary> + /// <returns> <see cref="Analyzer.TokenStreamComponents"/> + /// built from a <see cref="ChineseTokenizer"/> filtered with + /// <see cref="ChineseFilter"/> </returns> protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer source = new ChineseTokenizer(reader); http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3e97f31e/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilter.cs index 9b3b95a..61e6576 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilter.cs @@ -25,28 +25,32 @@ namespace Lucene.Net.Analysis.Cn */ /// <summary> - /// A <seealso cref="TokenFilter"/> with a stop word table. - /// <ul> - /// <li>Numeric tokens are removed. - /// <li>English tokens must be larger than 1 character. - /// <li>One Chinese character as one Chinese word. - /// </ul> + /// A <see cref="TokenFilter"/> with a stop word table. + /// <list type="bullet"> + /// <item>Numeric tokens are removed.</item> + /// <item>English tokens must be larger than 1 character.</item> + /// <item>One Chinese character as one Chinese word.</item> + /// </list> /// TO DO: - /// <ol> - /// <li>Add Chinese stop words, such as \ue400 - /// <li>Dictionary based Chinese word extraction - /// <li>Intelligent Chinese word extraction - /// </ol> + /// <list type="number"> + /// <item>Add Chinese stop words, such as \ue400</item> + /// <item>Dictionary based Chinese word extraction</item> + /// <item>Intelligent Chinese word extraction</item> + /// </list> /// </summary> - /// @deprecated (3.1) Use <seealso cref="StopFilter"/> instead, which has the same functionality. + /// @deprecated (3.1) Use <see cref="Core.StopFilter"/> instead, which has the same functionality. /// This filter will be removed in Lucene 5.0 [Obsolete("(3.1) Use StopFilter instead, which has the same functionality.")] public sealed class ChineseFilter : TokenFilter { - // Only English now, Chinese to be added later. - public static readonly string[] STOP_WORDS = new string[] { "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with" }; - + public static readonly string[] STOP_WORDS = new string[] { + "and", "are", "as", "at", "be", "but", "by", + "for", "if", "in", "into", "is", "it", + "no", "not", "of", "on", "or", "such", + "that", "the", "their", "then", "there", "these", + "they", "this", "to", "was", "will", "with" + }; private CharArraySet stopTable; @@ -55,13 +59,12 @@ namespace Lucene.Net.Analysis.Cn public ChineseFilter(TokenStream @in) : base(@in) { - stopTable = new CharArraySet(LuceneVersion.LUCENE_CURRENT, Arrays.AsList(STOP_WORDS), false); termAtt = AddAttribute<ICharTermAttribute>(); } + public override bool IncrementToken() { - while (m_input.IncrementToken()) { char[] text = termAtt.Buffer; http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3e97f31e/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilterFactory.cs index d3e30e5..98ddee9 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilterFactory.cs @@ -22,14 +22,14 @@ namespace Lucene.Net.Analysis.Cn */ /// <summary> - /// Factory for <seealso cref="ChineseFilter"/> </summary> - /// @deprecated Use <seealso cref="StopFilterFactory"/> instead. + /// Factory for <see cref="ChineseFilter"/> </summary> + /// @deprecated Use <see cref="Core.StopFilterFactory"/> instead. [Obsolete("Use StopFilterFactory instead.")] public class ChineseFilterFactory : TokenFilterFactory { /// <summary> - /// Creates a new ChineseFilterFactory </summary> + /// Creates a new <see cref="ChineseFilterFactory"/> </summary> public ChineseFilterFactory(IDictionary<string, string> args) : base(args) { if (args.Count > 0) http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3e97f31e/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs index 4ae7ff8..eb500bb 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs @@ -26,27 +26,28 @@ namespace Lucene.Net.Analysis.Cn /// Tokenize Chinese text as individual chinese characters. /// /// <para> - /// The difference between ChineseTokenizer and - /// CJKTokenizer is that they have different + /// The difference between <see cref="ChineseTokenizer"/> and + /// <see cref="Cjk.CJKTokenizer"/> is that they have different /// token parsing logic. /// </para> /// <para> /// For example, if the Chinese text /// "C1C2C3C4" is to be indexed: - /// <ul> - /// <li>The tokens returned from ChineseTokenizer are C1, C2, C3, C4. - /// <li>The tokens returned from the CJKTokenizer are C1C2, C2C3, C3C4. - /// </ul> + /// <list type="bullet"> + /// <item>The tokens returned from ChineseTokenizer are C1, C2, C3, C4.</item> + /// <item>The tokens returned from the CJKTokenizer are C1C2, C2C3, C3C4.</item> + /// </list> /// </para> /// <para> - /// Therefore the index created by CJKTokenizer is much larger. + /// Therefore the index created by <see cref="CJKTokenizer"/> is much larger. /// </para> /// <para> /// The problem is that when searching for C1, C1C2, C1C3, - /// C4C2, C1C2C3 ... the ChineseTokenizer works, but the - /// CJKTokenizer will not work. - /// </para> </summary> - /// @deprecated (3.1) Use <seealso cref="StandardTokenizer"/> instead, which has the same functionality. + /// C4C2, C1C2C3 ... the <see cref="ChineseTokenizer"/> works, but the + /// <see cref="Cjk.CJKTokenizer"/> will not work. + /// </para> + /// </summary> + /// @deprecated (3.1) Use <see cref="Standard.StandardTokenizer"/> instead, which has the same functionality. /// This filter will be removed in Lucene 5.0 [Obsolete("(3.1) Use StandardTokenizer instead, which has the same functionality.")] public sealed class ChineseTokenizer : Tokenizer @@ -82,9 +83,8 @@ namespace Lucene.Net.Analysis.Cn private ICharTermAttribute termAtt; private IOffsetAttribute offsetAtt; - private void push(char c) + private void Push(char c) { - if (length == 0) // start of token { start = offset - 1; @@ -93,9 +93,8 @@ namespace Lucene.Net.Analysis.Cn } - private bool flush() + private bool Flush() { - if (length > 0) { //System.out.println(new String(buffer, 0, @@ -132,7 +131,7 @@ namespace Lucene.Net.Analysis.Cn if (dataLen <= 0) { offset--; - return flush(); + return Flush(); } else { @@ -145,10 +144,10 @@ namespace Lucene.Net.Analysis.Cn case UnicodeCategory.DecimalDigitNumber: case UnicodeCategory.LowercaseLetter: case UnicodeCategory.UppercaseLetter: - push(c); + Push(c); if (length == MAX_WORD_LEN) { - return flush(); + return Flush(); } break; @@ -157,22 +156,22 @@ namespace Lucene.Net.Analysis.Cn { bufferIndex--; offset--; - return flush(); + return Flush(); } - push(c); - return flush(); + Push(c); + return Flush(); default: if (length > 0) { - return flush(); + return Flush(); } break; } } } - public override void End() + public override sealed void End() { base.End(); // set final offset http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3e97f31e/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizerFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizerFactory.cs index 2eef7be..b71906e 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizerFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizerFactory.cs @@ -24,14 +24,14 @@ namespace Lucene.Net.Analysis.Cn */ /// <summary> - /// Factory for <seealso cref="ChineseTokenizer"/> </summary> - /// @deprecated Use <seealso cref="org.apache.lucene.analysis.standard.StandardTokenizerFactory"/> instead. + /// Factory for <see cref="ChineseTokenizer"/> + /// </summary> + /// @deprecated Use <see cref="Standard.StandardTokenizerFactory"/> instead. [Obsolete("Use StandardTokenizerFactory instead.")] public class ChineseTokenizerFactory : TokenizerFactory { - /// <summary> - /// Creates a new ChineseTokenizerFactory </summary> + /// Creates a new <see cref="ChineseTokenizerFactory"/> </summary> public ChineseTokenizerFactory(IDictionary<string, string> args) : base(args) {
