Lucene.Net.Analysis.CommonGrams refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/487927c0 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/487927c0 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/487927c0 Branch: refs/heads/api-work Commit: 487927c003fd5d42e4b72d642278683ca0d31aec Parents: 3e97f31 Author: Shad Storhaug <[email protected]> Authored: Thu Feb 2 23:43:22 2017 +0700 Committer: Shad Storhaug <[email protected]> Committed: Fri Feb 3 01:13:43 2017 +0700 ---------------------------------------------------------------------- .../Analysis/CommonGrams/CommonGramsFilter.cs | 49 +++++++++------ .../CommonGrams/CommonGramsFilterFactory.cs | 9 +-- .../CommonGrams/CommonGramsQueryFilter.cs | 63 ++++++++++++-------- .../CommonGramsQueryFilterFactory.cs | 12 ++-- src/Lucene.Net.Core/Analysis/TokenFilter.cs | 19 ++++-- src/Lucene.Net.Core/Analysis/TokenStream.cs | 14 ++--- 6 files changed, 99 insertions(+), 67 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs index fcfe42d..e7578be 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs @@ -31,15 +31,14 @@ namespace Lucene.Net.Analysis.CommonGrams /// <summary> /// Construct bigrams for frequently occurring terms while indexing. Single terms /// are still indexed too, with bigrams overlaid. This is achieved through the - /// use of <seealso cref="PositionIncrementAttribute#setPositionIncrement(int)"/>. Bigrams have a type - /// of <seealso cref="#GRAM_TYPE"/> Example: - /// <ul> - /// <li>input:"the quick brown fox"</li> - /// <li>output:|"the","the-quick"|"brown"|"fox"|</li> - /// <li>"the-quick" has a position increment of 0 so it is in the same position - /// as "the" "the-quick" has a term.type() of "gram"</li> - /// - /// </ul> + /// use of <see cref="PositionIncrementAttribute.PositionIncrement"/>. Bigrams have a type + /// of <see cref="GRAM_TYPE"/> Example: + /// <list type="bullet"> + /// <item>input:"the quick brown fox"</item> + /// <item>output:|"the","the-quick"|"brown"|"fox"|</item> + /// <item>"the-quick" has a position increment of 0 so it is in the same position + /// as "the" "the-quick" has a term.type() of "gram"</item> + /// </list> /// </summary> /* @@ -47,7 +46,6 @@ namespace Lucene.Net.Analysis.CommonGrams */ public sealed class CommonGramsFilter : TokenFilter { - public const string GRAM_TYPE = "gram"; private const char SEPARATOR = '_'; @@ -71,7 +69,7 @@ namespace Lucene.Net.Analysis.CommonGrams /// bigrams with position increment 0 type=gram where one or both of the words /// in a potential bigram are in the set of common words . /// </summary> - /// <param name="input"> TokenStream input in filter chain </param> + /// <param name="input"> <see cref="TokenStream"/> input in filter chain </param> /// <param name="commonWords"> The set of common words. </param> public CommonGramsFilter(LuceneVersion matchVersion, TokenStream input, CharArraySet commonWords) : base(input) @@ -89,11 +87,11 @@ namespace Lucene.Net.Analysis.CommonGrams /// output the token. If the token and/or the following token are in the list /// of common words also output a bigram with position increment 0 and /// type="gram" - /// + /// <para/> /// TODO:Consider adding an option to not emit unigram stopwords - /// as in CDL XTF BigramStopFilter, CommonGramsQueryFilter would need to be + /// as in CDL XTF BigramStopFilter, <see cref="CommonGramsQueryFilter"/> would need to be /// changed to work with this. - /// + /// <para/> /// TODO: Consider optimizing for the case of three /// commongrams i.e "man of the year" normally produces 3 bigrams: "man-of", /// "of-the", "the-year" but with proper management of positions we could @@ -119,7 +117,7 @@ namespace Lucene.Net.Analysis.CommonGrams * When valid, the buffer always contains at least the separator. * If its empty, there is nothing before this stopword. */ - if (lastWasCommon || (Common && buffer.Length > 0)) + if (lastWasCommon || (IsCommon && buffer.Length > 0)) { savedState = CaptureState(); GramToken(); @@ -131,8 +129,21 @@ namespace Lucene.Net.Analysis.CommonGrams } /// <summary> - /// {@inheritDoc} + /// This method is called by a consumer before it begins consumption using + /// <see cref="IncrementToken()"/>. + /// <para/> + /// Resets this stream to a clean state. Stateful implementations must implement + /// this method so that they can be reused, just as if they had been created fresh. + /// <para/> + /// If you override this method, always call <c>base.Reset()</c>, otherwise + /// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will + /// throw <see cref="System.InvalidOperationException"/> on further usage). /// </summary> + /// <remarks> + /// <b>NOTE:</b> + /// The default implementation chains the call to the input <see cref="TokenStream"/>, so + /// be sure to call <c>base.Reset()</c> when overriding this method. + /// </remarks> public override void Reset() { base.Reset(); @@ -146,8 +157,8 @@ namespace Lucene.Net.Analysis.CommonGrams /// <summary> /// Determines if the current token is a common term /// </summary> - /// <returns> {@code true} if the current token is a common term, {@code false} otherwise </returns> - private bool Common + /// <returns> <c>true</c> if the current token is a common term, <c>false</c> otherwise </returns> + private bool IsCommon { get { @@ -164,7 +175,7 @@ namespace Lucene.Net.Analysis.CommonGrams buffer.Append(termAttribute.Buffer, 0, termAttribute.Length); buffer.Append(SEPARATOR); lastStartOffset = offsetAttribute.StartOffset; - lastWasCommon = Common; + lastWasCommon = IsCommon; } /// <summary> http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs index f63a71f..333ac68 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs @@ -23,14 +23,15 @@ namespace Lucene.Net.Analysis.CommonGrams */ /// <summary> - /// Constructs a <seealso cref="CommonGramsFilter"/>. - /// <pre class="prettyprint"> + /// Constructs a <see cref="CommonGramsFilter"/>. + /// <code> /// <fieldType name="text_cmmngrms" class="solr.TextField" positionIncrementGap="100"> /// <analyzer> /// <tokenizer class="solr.WhitespaceTokenizerFactory"/> /// <filter class="solr.CommonGramsFilterFactory" words="commongramsstopwords.txt" ignoreCase="false"/> /// </analyzer> - /// </fieldType></pre> + /// </fieldType> + /// </code> /// </summary> public class CommonGramsFilterFactory : TokenFilterFactory, IResourceLoaderAware { @@ -41,7 +42,7 @@ namespace Lucene.Net.Analysis.CommonGrams private readonly bool ignoreCase; /// <summary> - /// Creates a new CommonGramsFilterFactory </summary> + /// Creates a new <see cref="CommonGramsFilterFactory"/> </summary> public CommonGramsFilterFactory(IDictionary<string, string> args) : base(args) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs index 366621d..32039ca 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs @@ -18,28 +18,26 @@ namespace Lucene.Net.Analysis.CommonGrams * See the License for the specific language governing permissions and * limitations under the License. */ - + /// <summary> - /// Wrap a CommonGramsFilter optimizing phrase queries by only returning single + /// Wrap a <see cref="CommonGramsFilter"/> optimizing phrase queries by only returning single /// words when they are not a member of a bigram. - /// + /// <para/> /// Example: - /// <ul> - /// <li>query input to CommonGramsFilter: "the rain in spain falls mainly" - /// <li>output of CommomGramsFilter/input to CommonGramsQueryFilter: - /// |"the, "the-rain"|"rain" "rain-in"|"in, "in-spain"|"spain"|"falls"|"mainly" - /// <li>output of CommonGramsQueryFilter:"the-rain", "rain-in" ,"in-spain", - /// "falls", "mainly" - /// </ul> + /// <list type="bullet"> + /// <item>query input to CommonGramsFilter: "the rain in spain falls mainly"</item> + /// <item>output of CommomGramsFilter/input to CommonGramsQueryFilter: + /// |"the, "the-rain"|"rain" "rain-in"|"in, "in-spain"|"spain"|"falls"|"mainly"</item> + /// <item>output of CommonGramsQueryFilter:"the-rain", "rain-in" ,"in-spain", + /// "falls", "mainly"</item> + /// </list> /// </summary> - - /* - * See:http://hudson.zones.apache.org/hudson/job/Lucene-trunk/javadoc//all/org/apache/lucene/analysis/TokenStream.html and - * http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/package.html?revision=718798 - */ + /// <remarks> + /// See:http://hudson.zones.apache.org/hudson/job/Lucene-trunk/javadoc//all/org/apache/lucene/analysis/TokenStream.html and + /// http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/package.html?revision=718798 + /// </remarks> public sealed class CommonGramsQueryFilter : TokenFilter { - private readonly ITypeAttribute typeAttribute; private readonly IPositionIncrementAttribute posIncAttribute; @@ -59,8 +57,21 @@ namespace Lucene.Net.Analysis.CommonGrams } /// <summary> - /// {@inheritDoc} + /// This method is called by a consumer before it begins consumption using + /// <see cref="IncrementToken()"/>. + /// <para/> + /// Resets this stream to a clean state. Stateful implementations must implement + /// this method so that they can be reused, just as if they had been created fresh. + /// <para/> + /// If you override this method, always call <c>base.Reset()</c>, otherwise + /// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will + /// throw <see cref="InvalidOperationException"/> on further usage). /// </summary> + /// <remarks> + /// <b>NOTE:</b> + /// The default implementation chains the call to the input <see cref="TokenStream"/>, so + /// be sure to call <c>base.Reset()</c> when overriding this method. + /// </remarks> public override void Reset() { base.Reset(); @@ -72,10 +83,10 @@ namespace Lucene.Net.Analysis.CommonGrams /// <summary> /// Output bigrams whenever possible to optimize queries. Only output unigrams /// when they are not a member of a bigram. Example: - /// <ul> - /// <li>input: "the rain in spain falls mainly" - /// <li>output:"the-rain", "rain-in" ,"in-spain", "falls", "mainly" - /// </ul> + /// <list type="bullet"> + /// <item>input: "the rain in spain falls mainly"</item> + /// <item>output:"the-rain", "rain-in" ,"in-spain", "falls", "mainly"</item> + /// </list> /// </summary> public override bool IncrementToken() { @@ -83,13 +94,13 @@ namespace Lucene.Net.Analysis.CommonGrams { State current = CaptureState(); - if (previous != null && !GramType) + if (previous != null && !IsGramType) { RestoreState(previous); previous = current; previousType = typeAttribute.Type; - if (GramType) + if (IsGramType) { posIncAttribute.PositionIncrement = 1; } @@ -109,7 +120,7 @@ namespace Lucene.Net.Analysis.CommonGrams RestoreState(previous); previous = null; - if (GramType) + if (IsGramType) { posIncAttribute.PositionIncrement = 1; } @@ -121,8 +132,8 @@ namespace Lucene.Net.Analysis.CommonGrams /// <summary> /// Convenience method to check if the current type is a gram type /// </summary> - /// <returns> {@code true} if the current type is a gram type, {@code false} otherwise </returns> - public bool GramType + /// <returns> <c>true</c> if the current type is a gram type, <c>false</c> otherwise </returns> + public bool IsGramType { get { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs index f797390..1e067e9 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs @@ -21,28 +21,28 @@ namespace Lucene.Net.Analysis.CommonGrams /// <summary> - /// Construct <seealso cref="CommonGramsQueryFilter"/>. + /// Construct <see cref="CommonGramsQueryFilter"/>. /// - /// <pre class="prettyprint"> + /// <code> /// <fieldType name="text_cmmngrmsqry" class="solr.TextField" positionIncrementGap="100"> /// <analyzer> /// <tokenizer class="solr.WhitespaceTokenizerFactory"/> /// <filter class="solr.CommonGramsQueryFilterFactory" words="commongramsquerystopwords.txt" ignoreCase="false"/> /// </analyzer> - /// </fieldType></pre> + /// </fieldType> + /// </code> /// </summary> public class CommonGramsQueryFilterFactory : CommonGramsFilterFactory { - /// <summary> - /// Creates a new CommonGramsQueryFilterFactory </summary> + /// Creates a new <see cref="CommonGramsQueryFilterFactory"/> </summary> public CommonGramsQueryFilterFactory(IDictionary<string, string> args) : base(args) { } /// <summary> - /// Create a CommonGramsFilter and wrap it with a CommonGramsQueryFilter + /// Create a <see cref="CommonGramsFilter"/> and wrap it with a <see cref="CommonGramsQueryFilter"/> /// </summary> public override TokenStream Create(TokenStream input) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Core/Analysis/TokenFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Analysis/TokenFilter.cs b/src/Lucene.Net.Core/Analysis/TokenFilter.cs index d6a96fb..b082d6a 100644 --- a/src/Lucene.Net.Core/Analysis/TokenFilter.cs +++ b/src/Lucene.Net.Core/Analysis/TokenFilter.cs @@ -61,12 +61,21 @@ namespace Lucene.Net.Analysis } /// <summary> - /// {@inheritDoc} - /// <p> - /// <b>NOTE:</b> - /// The default implementation chains the call to the input TokenStream, so - /// be sure to call <code>super.reset()</code> when overriding this method. + /// This method is called by a consumer before it begins consumption using + /// <see cref="IncrementToken()"/>. + /// <para/> + /// Resets this stream to a clean state. Stateful implementations must implement + /// this method so that they can be reused, just as if they had been created fresh. + /// <para/> + /// If you override this method, always call <c>base.Reset()</c>, otherwise + /// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will + /// throw <see cref="InvalidOperationException"/> on further usage). /// </summary> + /// <remarks> + /// <b>NOTE:</b> + /// The default implementation chains the call to the input <see cref="TokenStream"/>, so + /// be sure to call <c>base.Reset()</c> when overriding this method. + /// </remarks> public override void Reset() { m_input.Reset(); http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Core/Analysis/TokenStream.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Analysis/TokenStream.cs b/src/Lucene.Net.Core/Analysis/TokenStream.cs index 90bf3f2..1e104e9 100644 --- a/src/Lucene.Net.Core/Analysis/TokenStream.cs +++ b/src/Lucene.Net.Core/Analysis/TokenStream.cs @@ -183,15 +183,15 @@ namespace Lucene.Net.Analysis } /// <summary> - /// this method is called by a consumer before it begins consumption using - /// <seealso cref="#IncrementToken()"/>. - /// <p> + /// This method is called by a consumer before it begins consumption using + /// <see cref="IncrementToken()"/>. + /// <para/> /// Resets this stream to a clean state. Stateful implementations must implement /// this method so that they can be reused, just as if they had been created fresh. - /// <p> - /// If you override this method, always call {@code super.reset()}, otherwise - /// some internal state will not be correctly reset (e.g., <seealso cref="Tokenizer"/> will - /// throw <seealso cref="IllegalStateException"/> on further usage). + /// <para/> + /// If you override this method, always call <c>base.Reset()</c>, otherwise + /// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will + /// throw <see cref="InvalidOperationException"/> on further usage). /// </summary> public virtual void Reset() {
