Lucene.Net.Collation refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/e67f7979 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/e67f7979 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/e67f7979 Branch: refs/heads/api-work Commit: e67f79794936d68b4742f998d916c79e7dab5dcf Parents: d390386 Author: Shad Storhaug <[email protected]> Authored: Sat Feb 4 22:19:30 2017 +0700 Committer: Shad Storhaug <[email protected]> Committed: Sat Feb 4 23:08:21 2017 +0700 ---------------------------------------------------------------------- .../Collation/CollationAttributeFactory.cs | 55 +++++------ .../Collation/CollationKeyAnalyzer.cs | 66 ++++++------- .../Collation/CollationKeyFilter.cs | 97 ++++++++++---------- .../Collation/CollationKeyFilterFactory.cs | 87 +++++++++--------- .../CollatedTermAttributeImpl.cs | 8 +- 5 files changed, 157 insertions(+), 156 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e67f7979/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs index b057e7d..6e15ad1 100644 --- a/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs @@ -24,32 +24,32 @@ namespace Lucene.Net.Collation /// <summary> /// <para> - /// Converts each token into its <see cref="CollationKey"/>, and then + /// Converts each token into its <see cref="System.Globalization.SortKey"/>, and then /// encodes the bytes as an index term. /// </para> /// <para> - /// <strong>WARNING:</strong> Make sure you use exactly the same Collator at - /// index and query time -- CollationKeys are only comparable when produced by - /// the same Collator. Since <see cref="RuleBasedCollator"/>s are not + /// <strong>WARNING:</strong> Make sure you use exactly the same <see cref="Collator"/> at + /// index and query time -- <see cref="System.Globalization.SortKey"/>s are only comparable when produced by + /// the same <see cref="Collator"/>. Since <see cref="RuleBasedCollator"/>s are not /// independently versioned, it is unsafe to search against stored - /// CollationKeys unless the following are exactly the same (best practice is + /// <see cref="System.Globalization.SortKey"/>s unless the following are exactly the same (best practice is /// to store this information with the index and check that they remain the /// same at query time): /// </para> - /// <ol> - /// <li>JVM vendor</li> - /// <li>JVM version, including patch version</li> - /// <li> + /// <list type="number"> + /// <item>JVM vendor</item> + /// <item>JVM version, including patch version</item> + /// <item> /// The language (and country and variant, if specified) of the Locale /// used when constructing the collator via - /// <see cref="Collator#getInstance(Locale)"/>. - /// </li> - /// <li> - /// The collation strength used - see <see cref="Collator#setStrength(int)"/> - /// </li> - /// </ol> + /// <see cref="Collator.Create(System.Globalization.CultureInfo)"/>. + /// </item> + /// <item> + /// The collation strength used - see <see cref="Collator.Strength"/> + /// </item> + /// </list> /// <para> - /// The <code>ICUCollationAttributeFactory</code> in the analysis-icu package + /// The <c>ICUCollationAttributeFactory</c> in the analysis-icu package /// uses ICU4J's Collator, which makes its /// version available, thus allowing collation to be versioned independently /// from the JVM. ICUCollationAttributeFactory is also significantly faster and @@ -57,7 +57,7 @@ namespace Lucene.Net.Collation /// <a href="http://site.icu-project.org/charts/collation-icu4j-sun" /// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key /// generation timing and key length comparisons between ICU4J and - /// java.text.Collator over several languages. + /// <see cref="Collator"/> over several languages. /// </para> /// <para> /// CollationKeys generated by java.text.Collators are not compatible @@ -75,20 +75,21 @@ namespace Lucene.Net.Collation private readonly AttributeSource.AttributeFactory @delegate; /// <summary> - /// Create a CollationAttributeFactory, using - /// <see cref="AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY"/> as the + /// Create a <see cref="CollationAttributeFactory"/>, using + /// <see cref="AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY"/> as the /// factory for all other attributes. </summary> - /// <param name="collator"> CollationKey generator </param> - public CollationAttributeFactory(Collator collator) : this(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, collator) + /// <param name="collator"> <see cref="System.Globalization.SortKey"/> generator </param> + public CollationAttributeFactory(Collator collator) + : this(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, collator) { } - /// <summary> - /// Create a CollationAttributeFactory, using the supplied Attribute Factory - /// as the factory for all other attributes. </summary> - /// <param name="delegate"> Attribute Factory </param> - /// <param name="collator"> CollationKey generator </param> - public CollationAttributeFactory(AttributeSource.AttributeFactory @delegate, Collator collator) + /// <summary> + /// Create a <see cref="CollationAttributeFactory"/>, using the supplied Attribute Factory + /// as the factory for all other attributes. </summary> + /// <param name="delegate"> Attribute Factory </param> + /// <param name="collator"> <see cref="System.Globalization.SortKey"/> generator </param> + public CollationAttributeFactory(AttributeSource.AttributeFactory @delegate, Collator collator) { this.@delegate = @delegate; this.collator = collator; http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e67f7979/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs index 4eabd4d..6ebee3d 100644 --- a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs +++ b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs @@ -29,34 +29,34 @@ namespace Lucene.Net.Collation /// Configures <see cref="KeywordTokenizer"/> with <see cref="CollationAttributeFactory"/>. /// </para> /// <para> - /// Converts the token into its <see cref="java.text.CollationKey"/>, and then - /// encodes the CollationKey either directly or with - /// <see cref="IndexableBinaryStringTools"/> (see <a href="#version">below</a>), to allow + /// Converts the token into its <see cref="System.Globalization.SortKey"/>, and then + /// encodes the <see cref="System.Globalization.SortKey"/> either directly or with + /// <see cref="IndexableBinaryStringTools"/> (see version note below), to allow /// it to be stored as an index term. /// </para> /// <para> - /// <strong>WARNING:</strong> Make sure you use exactly the same Collator at - /// index and query time -- CollationKeys are only comparable when produced by - /// the same Collator. Since <see cref="java.text.RuleBasedCollator"/>s are not + /// <strong>WARNING:</strong> Make sure you use exactly the same <see cref="Collator"/> at + /// index and query time -- <see cref="System.Globalization.SortKey"/> are only comparable when produced by + /// the same <see cref="Collator"/>. Since <c>java.text.RuleBasedCollators</c> are not /// independently versioned, it is unsafe to search against stored - /// CollationKeys unless the following are exactly the same (best practice is + /// <see cref="System.Globalization.SortKey"/> unless the following are exactly the same (best practice is /// to store this information with the index and check that they remain the /// same at query time): /// </para> - /// <ol> - /// <li>JVM vendor</li> - /// <li>JVM version, including patch version</li> - /// <li> + /// <list type="number"> + /// <item>JVM vendor</item> + /// <item>JVM version, including patch version</item> + /// <item> /// The language (and country and variant, if specified) of the Locale /// used when constructing the collator via - /// <see cref="Collator#getInstance(java.util.Locale)"/>. - /// </li> - /// <li> - /// The collation strength used - see <see cref="Collator#setStrength(int)"/> - /// </li> - /// </ol> + /// <see cref="Collator.Create(System.Globalization.CultureInfo)"/>. + /// </item> + /// <item> + /// The collation strength used - see <see cref="Collator.Strength"/> + /// </item> + /// </list> /// <para> - /// The <code>ICUCollationKeyAnalyzer</code> in the analysis-icu package + /// The <c>ICUCollationKeyAnalyzer</c> in the analysis-icu package /// uses ICU4J's Collator, which makes its /// its version available, thus allowing collation to be versioned /// independently from the JVM. ICUCollationKeyAnalyzer is also significantly @@ -64,21 +64,20 @@ namespace Lucene.Net.Collation /// See <a href="http://site.icu-project.org/charts/collation-icu4j-sun" /// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key /// generation timing and key length comparisons between ICU4J and - /// java.text.Collator over several languages. + /// <see cref="Collator"/> over several languages. /// </para> /// <para> - /// CollationKeys generated by java.text.Collators are not compatible + /// CollationKeys generated by <see cref="Collator"/> are not compatible /// with those those generated by ICU Collators. Specifically, if you use /// CollationKeyAnalyzer to generate index terms, do not use /// ICUCollationKeyAnalyzer on the query side, or vice versa. /// </para> - /// <a name="version"/> /// <para>You must specify the required <see cref="LuceneVersion"/> - /// compatibility when creating CollationKeyAnalyzer: - /// <ul> - /// <li> As of 4.0, Collation Keys are directly encoded as bytes. Previous - /// versions will encode the bytes with <see cref="IndexableBinaryStringTools"/>. - /// </ul> + /// compatibility when creating <see cref="CollationKeyAnalyzer"/>: + /// <list type="bullet"> + /// <item> As of 4.0, Collation Keys are directly encoded as bytes. Previous + /// versions will encode the bytes with <see cref="IndexableBinaryStringTools"/>.</item> + /// </list> /// </para> /// </summary> // LUCENENET TODO: A better option would be to contribute to the icu.net library and @@ -90,12 +89,12 @@ namespace Lucene.Net.Collation private readonly CollationAttributeFactory factory; private readonly LuceneVersion matchVersion; - /// <summary> - /// Create a new CollationKeyAnalyzer, using the specified collator. - /// </summary> - /// <param name="matchVersion"> See <a href="#version">above</a> </param> - /// <param name="collator"> CollationKey generator </param> - public CollationKeyAnalyzer(LuceneVersion matchVersion, Collator collator) + /// <summary> + /// Create a new <see cref="CollationKeyAnalyzer"/>, using the specified collator. + /// </summary> + /// <param name="matchVersion"> See <see cref="CollationKeyAnalyzer"/> </param> + /// <param name="collator"> <see cref="System.Globalization.SortKey"/> generator </param> + public CollationKeyAnalyzer(LuceneVersion matchVersion, Collator collator) { this.matchVersion = matchVersion; this.collator = collator; @@ -103,7 +102,8 @@ namespace Lucene.Net.Collation } [Obsolete("Use <seealso cref=\"CollationKeyAnalyzer#CollationKeyAnalyzer(LuceneVersion, Collator)\"/> and specify a version instead. This ctor will be removed in Lucene 5.0")] - public CollationKeyAnalyzer(Collator collator) : this(LuceneVersion.LUCENE_31, collator) + public CollationKeyAnalyzer(Collator collator) + : this(LuceneVersion.LUCENE_31, collator) { } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e67f7979/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs index 477e524..ae90816 100644 --- a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs @@ -23,53 +23,53 @@ namespace Lucene.Net.Collation * limitations under the License. */ - /// <summary> - /// <para> - /// Converts each token into its <see cref="java.text.CollationKey"/>, and then - /// encodes the CollationKey with <see cref="IndexableBinaryStringTools"/>, to allow - /// it to be stored as an index term. - /// </para> - /// <para> - /// <strong>WARNING:</strong> Make sure you use exactly the same Collator at - /// index and query time -- CollationKeys are only comparable when produced by - /// the same Collator. Since <see cref="java.text.RuleBasedCollator"/>s are not - /// independently versioned, it is unsafe to search against stored - /// CollationKeys unless the following are exactly the same (best practice is - /// to store this information with the index and check that they remain the - /// same at query time): - /// </para> - /// <ol> - /// <li>JVM vendor</li> - /// <li>JVM version, including patch version</li> - /// <li> - /// The language (and country and variant, if specified) of the Locale - /// used when constructing the collator via - /// <see cref="Collator#getInstance(CultureInfo)"/>. - /// </li> - /// <li> - /// The collation strength used - see <see cref="Collator#setStrength(int)"/> - /// </li> - /// </ol> - /// <para> - /// The <code>ICUCollationKeyFilter</code> in the analysis-icu package - /// uses ICU4J's Collator, which makes its - /// version available, thus allowing collation to be versioned independently - /// from the JVM. ICUCollationKeyFilter is also significantly faster and - /// generates significantly shorter keys than CollationKeyFilter. See - /// <a href="http://site.icu-project.org/charts/collation-icu4j-sun" - /// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key - /// generation timing and key length comparisons between ICU4J and - /// java.text.Collator over several languages. - /// </para> - /// <para> - /// CollationKeys generated by java.text.Collators are not compatible - /// with those those generated by ICU Collators. Specifically, if you use - /// CollationKeyFilter to generate index terms, do not use - /// ICUCollationKeyFilter on the query side, or vice versa. - /// </para> </summary> - /// @deprecated Use <see cref="CollationAttributeFactory"/> instead, which encodes - /// terms directly as bytes. This filter will be removed in Lucene 5.0 - [Obsolete("Use <seealso cref=\"CollationAttributeFactory\"/> instead, which encodes")] + /// <summary> + /// <para> + /// Converts each token into its <see cref="System.Globalization.SortKey"/>, and then + /// encodes the <see cref="System.Globalization.SortKey"/> with <see cref="IndexableBinaryStringTools"/>, to allow + /// it to be stored as an index term. + /// </para> + /// <para> + /// <strong>WARNING:</strong> Make sure you use exactly the same <see cref="Collator"/> at + /// index and query time -- <see cref="System.Globalization.SortKey"/> are only comparable when produced by + /// the same <see cref="Collator"/>. Since <c>java.text.RuleBasedCollators</c> are not + /// independently versioned, it is unsafe to search against stored + /// <see cref="System.Globalization.SortKey"/> unless the following are exactly the same (best practice is + /// to store this information with the index and check that they remain the + /// same at query time): + /// </para> + /// <list type="number"> + /// <item>JVM vendor</item> + /// <item>JVM version, including patch version</item> + /// <item> + /// The language (and country and variant, if specified) of the Locale + /// used when constructing the collator via + /// <see cref="Collator.Create(System.Globalization.CultureInfo)"/>. + /// </item> + /// <item> + /// The collation strength used - see <see cref="Collator.Strength"/> + /// </item> + /// </list> + /// <para> + /// The <c>ICUCollationKeyFilter</c> in the analysis-icu package + /// uses ICU4J's Collator, which makes its + /// version available, thus allowing collation to be versioned independently + /// from the JVM. ICUCollationKeyFilter is also significantly faster and + /// generates significantly shorter keys than CollationKeyFilter. See + /// <a href="http://site.icu-project.org/charts/collation-icu4j-sun" + /// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key + /// generation timing and key length comparisons between ICU4J and + /// java.text.Collator over several languages. + /// </para> + /// <para> + /// CollationKeys generated by java.text.Collators are not compatible + /// with those those generated by ICU Collators. Specifically, if you use + /// CollationKeyFilter to generate index terms, do not use + /// ICUCollationKeyFilter on the query side, or vice versa. + /// </para> </summary> + /// @deprecated Use <see cref="CollationAttributeFactory"/> instead, which encodes + /// terms directly as bytes. This filter will be removed in Lucene 5.0 + [Obsolete("Use CollationAttributeFactory instead, which encodes terms directly as bytes. This filter will be removed in Lucene 5.0.")] // LUCENENET TODO: A better option would be to contribute to the icu.net library and // make it CLS compliant (at least the parts of it we use) [CLSCompliant(false)] @@ -80,7 +80,8 @@ namespace Lucene.Net.Collation /// <param name="input"> Source token stream </param> /// <param name="collator"> CollationKey generator </param> - public CollationKeyFilter(TokenStream input, Collator collator) : base(input) + public CollationKeyFilter(TokenStream input, Collator collator) + : base(input) { this.collator = collator; this.termAtt = this.AddAttribute<ICharTermAttribute>(); http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e67f7979/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilterFactory.cs index d5e53a1..45bb1e1 100644 --- a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilterFactory.cs @@ -7,7 +7,6 @@ using System; using System.Collections.Generic; using System.Globalization; using System.IO; -using System.Linq; using System.Text; namespace Lucene.Net.Collation @@ -29,49 +28,49 @@ namespace Lucene.Net.Collation * limitations under the License. */ - /// <summary> - /// Factory for <see cref="CollationKeyFilter"/>. - /// <para> - /// This factory can be created in two ways: - /// <ul> - /// <li>Based upon a system collator associated with a Locale.</li> - /// <li>Based upon a tailored ruleset.</li> - /// </ul> - /// </para> - /// <para> - /// Using a System collator: - /// <ul> - /// <li>language: ISO-639 language code (mandatory)</li> - /// <li>country: ISO-3166 country code (optional)</li> - /// <li>variant: vendor or browser-specific code (optional)</li> - /// <li>strength: 'primary','secondary','tertiary', or 'identical' (optional)</li> - /// <li>decomposition: 'no','canonical', or 'full' (optional)</li> - /// </ul> - /// </para> - /// <para> - /// Using a Tailored ruleset: - /// <ul> - /// <li>custom: UTF-8 text file containing rules supported by RuleBasedCollator (mandatory)</li> - /// <li>strength: 'primary','secondary','tertiary', or 'identical' (optional)</li> - /// <li>decomposition: 'no','canonical', or 'full' (optional)</li> - /// </ul> - /// - /// <pre class="prettyprint" > - /// <fieldType name="text_clltnky" class="solr.TextField" positionIncrementGap="100"> - /// <analyzer> - /// <tokenizer class="solr.KeywordTokenizerFactory"/> - /// <filter class="solr.CollationKeyFilterFactory" language="ja" country="JP"/> - /// </analyzer> - /// </fieldType></code> - /// - /// </para> - /// </summary> - /// <see cref="Collator"></seealso> - /// <see cref="CultureInfo"></seealso> - /// <see cref="RuleBasedCollator"> - /// @since solr 3.1 </seealso> - /// @deprecated use <see cref="CollationKeyAnalyzer"/> instead. - [Obsolete("use <seealso cref=\"CollationKeyAnalyzer\"/> instead.")] + /// <summary> + /// Factory for <see cref="CollationKeyFilter"/>. + /// <para> + /// This factory can be created in two ways: + /// <list type="bullet"> + /// <item>Based upon a system collator associated with a <see cref="System.Globalization.CultureInfo"/>.</item> + /// <item>Based upon a tailored ruleset.</item> + /// </list> + /// </para> + /// <para> + /// Using a System collator: + /// <list type="bullet"> + /// <item>language: ISO-639 language code (mandatory)</item> + /// <item>country: ISO-3166 country code (optional)</item> + /// <item>variant: vendor or browser-specific code (optional)</item> + /// <item>strength: 'primary','secondary','tertiary', or 'identical' (optional)</item> + /// <item>decomposition: 'no','canonical', or 'full' (optional)</item> + /// </list> + /// </para> + /// <para> + /// Using a Tailored ruleset: + /// <list type="bullet"> + /// <item>custom: UTF-8 text file containing rules supported by RuleBasedCollator (mandatory)</item> + /// <item>strength: 'primary','secondary','tertiary', or 'identical' (optional)</item> + /// <item>decomposition: 'no','canonical', or 'full' (optional)</item> + /// </list> + /// + /// <code> + /// <fieldType name="text_clltnky" class="solr.TextField" positionIncrementGap="100"> + /// <analyzer> + /// <tokenizer class="solr.KeywordTokenizerFactory"/> + /// <filter class="solr.CollationKeyFilterFactory" language="ja" country="JP"/> + /// </analyzer> + /// </fieldType></code> + /// + /// </para> + /// </summary> + /// <seealso cref="Collator"/> + /// <seealso cref="CultureInfo"/> + /// <seealso cref="RuleBasedCollator"/> + /// @since solr 3.1 + /// @deprecated use <see cref="CollationKeyAnalyzer"/> instead. + [Obsolete("use <seealso cref=\"CollationKeyAnalyzer\"/> instead.")] public class CollationKeyFilterFactory : TokenFilterFactory, IMultiTermAwareComponent, IResourceLoaderAware { private Collator collator; http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e67f7979/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs b/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs index ceebafb..05333ed 100644 --- a/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs +++ b/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs @@ -33,13 +33,13 @@ namespace Lucene.Net.Collation.TokenAttributes private readonly Collator collator; /// <summary> - /// Create a new CollatedTermAttributeImpl </summary> + /// Create a new <see cref="CollatedTermAttributeImpl"/> </summary> /// <param name="collator"> Collation key generator </param> public CollatedTermAttributeImpl(Collator collator) { - // clone in case JRE doesn't properly sync, - // or to reduce contention in case they do - this.collator = collator; + // clone in case JRE doesn't properly sync, + // or to reduce contention in case they do + this.collator = (Collator)collator.Clone(); } public override void FillBytesRef()
