Lucene.Net.Analysis.Common.Collation: For now, adding [CLSCompliant(false)] to CollationAttributeFactory, CollationKeyAnalyzer, CollationKeyFilter, and TokenAttributes.CollatedTermAttributeImpl because they expose types from icu.net (which is not marked CLS compliant).
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/21b3d8b7 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/21b3d8b7 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/21b3d8b7 Branch: refs/heads/api-work Commit: 21b3d8b7703c321155d8388621d6b0ed120750fc Parents: 7cd69ab Author: Shad Storhaug <[email protected]> Authored: Thu Feb 2 10:38:17 2017 +0700 Committer: Shad Storhaug <[email protected]> Committed: Thu Feb 2 18:30:28 2017 +0700 ---------------------------------------------------------------------- .../Collation/CollationAttributeFactory.cs | 93 ++++++++------- .../Collation/CollationKeyAnalyzer.cs | 119 ++++++++++--------- .../Collation/CollationKeyFilter.cs | 5 +- .../CollatedTermAttributeImpl.cs | 14 ++- 4 files changed, 122 insertions(+), 109 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/21b3d8b7/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs index 64687dd..d3a7b1f 100644 --- a/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs @@ -22,51 +22,54 @@ namespace Lucene.Net.Collation * limitations under the License. */ - /// <summary> - /// <para> - /// Converts each token into its <seealso cref="CollationKey"/>, and then - /// encodes the bytes as an index term. - /// </para> - /// <para> - /// <strong>WARNING:</strong> Make sure you use exactly the same Collator at - /// index and query time -- CollationKeys are only comparable when produced by - /// the same Collator. Since <seealso cref="RuleBasedCollator"/>s are not - /// independently versioned, it is unsafe to search against stored - /// CollationKeys unless the following are exactly the same (best practice is - /// to store this information with the index and check that they remain the - /// same at query time): - /// </para> - /// <ol> - /// <li>JVM vendor</li> - /// <li>JVM version, including patch version</li> - /// <li> - /// The language (and country and variant, if specified) of the Locale - /// used when constructing the collator via - /// <seealso cref="Collator#getInstance(Locale)"/>. - /// </li> - /// <li> - /// The collation strength used - see <seealso cref="Collator#setStrength(int)"/> - /// </li> - /// </ol> - /// <para> - /// The <code>ICUCollationAttributeFactory</code> in the analysis-icu package - /// uses ICU4J's Collator, which makes its - /// version available, thus allowing collation to be versioned independently - /// from the JVM. ICUCollationAttributeFactory is also significantly faster and - /// generates significantly shorter keys than CollationAttributeFactory. See - /// <a href="http://site.icu-project.org/charts/collation-icu4j-sun" - /// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key - /// generation timing and key length comparisons between ICU4J and - /// java.text.Collator over several languages. - /// </para> - /// <para> - /// CollationKeys generated by java.text.Collators are not compatible - /// with those those generated by ICU Collators. Specifically, if you use - /// CollationAttributeFactory to generate index terms, do not use - /// ICUCollationAttributeFactory on the query side, or vice versa. - /// </para> - /// </summary> - public class CollationAttributeFactory : AttributeSource.AttributeFactory + /// <summary> + /// <para> + /// Converts each token into its <seealso cref="CollationKey"/>, and then + /// encodes the bytes as an index term. + /// </para> + /// <para> + /// <strong>WARNING:</strong> Make sure you use exactly the same Collator at + /// index and query time -- CollationKeys are only comparable when produced by + /// the same Collator. Since <seealso cref="RuleBasedCollator"/>s are not + /// independently versioned, it is unsafe to search against stored + /// CollationKeys unless the following are exactly the same (best practice is + /// to store this information with the index and check that they remain the + /// same at query time): + /// </para> + /// <ol> + /// <li>JVM vendor</li> + /// <li>JVM version, including patch version</li> + /// <li> + /// The language (and country and variant, if specified) of the Locale + /// used when constructing the collator via + /// <seealso cref="Collator#getInstance(Locale)"/>. + /// </li> + /// <li> + /// The collation strength used - see <seealso cref="Collator#setStrength(int)"/> + /// </li> + /// </ol> + /// <para> + /// The <code>ICUCollationAttributeFactory</code> in the analysis-icu package + /// uses ICU4J's Collator, which makes its + /// version available, thus allowing collation to be versioned independently + /// from the JVM. ICUCollationAttributeFactory is also significantly faster and + /// generates significantly shorter keys than CollationAttributeFactory. See + /// <a href="http://site.icu-project.org/charts/collation-icu4j-sun" + /// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key + /// generation timing and key length comparisons between ICU4J and + /// java.text.Collator over several languages. + /// </para> + /// <para> + /// CollationKeys generated by java.text.Collators are not compatible + /// with those those generated by ICU Collators. Specifically, if you use + /// CollationAttributeFactory to generate index terms, do not use + /// ICUCollationAttributeFactory on the query side, or vice versa. + /// </para> + /// </summary> + // LUCENENET TODO: A better option would be to contribute to the icu.net library and + // make it CLS compliant (at least the parts of it we use) + [System.CLSCompliant(false)] + public class CollationAttributeFactory : AttributeSource.AttributeFactory { private readonly Collator collator; private readonly AttributeSource.AttributeFactory @delegate; http://git-wip-us.apache.org/repos/asf/lucenenet/blob/21b3d8b7/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs index f6db44c..b76e520 100644 --- a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs +++ b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs @@ -24,64 +24,67 @@ namespace Lucene.Net.Collation * limitations under the License. */ - /// <summary> - /// <para> - /// Configures <seealso cref="KeywordTokenizer"/> with <seealso cref="CollationAttributeFactory"/>. - /// </para> - /// <para> - /// Converts the token into its <seealso cref="java.text.CollationKey"/>, and then - /// encodes the CollationKey either directly or with - /// <seealso cref="IndexableBinaryStringTools"/> (see <a href="#version">below</a>), to allow - /// it to be stored as an index term. - /// </para> - /// <para> - /// <strong>WARNING:</strong> Make sure you use exactly the same Collator at - /// index and query time -- CollationKeys are only comparable when produced by - /// the same Collator. Since <seealso cref="java.text.RuleBasedCollator"/>s are not - /// independently versioned, it is unsafe to search against stored - /// CollationKeys unless the following are exactly the same (best practice is - /// to store this information with the index and check that they remain the - /// same at query time): - /// </para> - /// <ol> - /// <li>JVM vendor</li> - /// <li>JVM version, including patch version</li> - /// <li> - /// The language (and country and variant, if specified) of the Locale - /// used when constructing the collator via - /// <seealso cref="Collator#getInstance(java.util.Locale)"/>. - /// </li> - /// <li> - /// The collation strength used - see <seealso cref="Collator#setStrength(int)"/> - /// </li> - /// </ol> - /// <para> - /// The <code>ICUCollationKeyAnalyzer</code> in the analysis-icu package - /// uses ICU4J's Collator, which makes its - /// its version available, thus allowing collation to be versioned - /// independently from the JVM. ICUCollationKeyAnalyzer is also significantly - /// faster and generates significantly shorter keys than CollationKeyAnalyzer. - /// See <a href="http://site.icu-project.org/charts/collation-icu4j-sun" - /// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key - /// generation timing and key length comparisons between ICU4J and - /// java.text.Collator over several languages. - /// </para> - /// <para> - /// CollationKeys generated by java.text.Collators are not compatible - /// with those those generated by ICU Collators. Specifically, if you use - /// CollationKeyAnalyzer to generate index terms, do not use - /// ICUCollationKeyAnalyzer on the query side, or vice versa. - /// </para> - /// <a name="version"/> - /// <para>You must specify the required <seealso cref="Version"/> - /// compatibility when creating CollationKeyAnalyzer: - /// <ul> - /// <li> As of 4.0, Collation Keys are directly encoded as bytes. Previous - /// versions will encode the bytes with <seealso cref="IndexableBinaryStringTools"/>. - /// </ul> - /// </para> - /// </summary> - public sealed class CollationKeyAnalyzer : Analyzer + /// <summary> + /// <para> + /// Configures <seealso cref="KeywordTokenizer"/> with <seealso cref="CollationAttributeFactory"/>. + /// </para> + /// <para> + /// Converts the token into its <seealso cref="java.text.CollationKey"/>, and then + /// encodes the CollationKey either directly or with + /// <seealso cref="IndexableBinaryStringTools"/> (see <a href="#version">below</a>), to allow + /// it to be stored as an index term. + /// </para> + /// <para> + /// <strong>WARNING:</strong> Make sure you use exactly the same Collator at + /// index and query time -- CollationKeys are only comparable when produced by + /// the same Collator. Since <seealso cref="java.text.RuleBasedCollator"/>s are not + /// independently versioned, it is unsafe to search against stored + /// CollationKeys unless the following are exactly the same (best practice is + /// to store this information with the index and check that they remain the + /// same at query time): + /// </para> + /// <ol> + /// <li>JVM vendor</li> + /// <li>JVM version, including patch version</li> + /// <li> + /// The language (and country and variant, if specified) of the Locale + /// used when constructing the collator via + /// <seealso cref="Collator#getInstance(java.util.Locale)"/>. + /// </li> + /// <li> + /// The collation strength used - see <seealso cref="Collator#setStrength(int)"/> + /// </li> + /// </ol> + /// <para> + /// The <code>ICUCollationKeyAnalyzer</code> in the analysis-icu package + /// uses ICU4J's Collator, which makes its + /// its version available, thus allowing collation to be versioned + /// independently from the JVM. ICUCollationKeyAnalyzer is also significantly + /// faster and generates significantly shorter keys than CollationKeyAnalyzer. + /// See <a href="http://site.icu-project.org/charts/collation-icu4j-sun" + /// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key + /// generation timing and key length comparisons between ICU4J and + /// java.text.Collator over several languages. + /// </para> + /// <para> + /// CollationKeys generated by java.text.Collators are not compatible + /// with those those generated by ICU Collators. Specifically, if you use + /// CollationKeyAnalyzer to generate index terms, do not use + /// ICUCollationKeyAnalyzer on the query side, or vice versa. + /// </para> + /// <a name="version"/> + /// <para>You must specify the required <seealso cref="Version"/> + /// compatibility when creating CollationKeyAnalyzer: + /// <ul> + /// <li> As of 4.0, Collation Keys are directly encoded as bytes. Previous + /// versions will encode the bytes with <seealso cref="IndexableBinaryStringTools"/>. + /// </ul> + /// </para> + /// </summary> + // LUCENENET TODO: A better option would be to contribute to the icu.net library and + // make it CLS compliant (at least the parts of it we use) + [CLSCompliant(false)] + public sealed class CollationKeyAnalyzer : Analyzer { private readonly Collator collator; private readonly CollationAttributeFactory factory; http://git-wip-us.apache.org/repos/asf/lucenenet/blob/21b3d8b7/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs index 5012e9c..6e684c1 100644 --- a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs @@ -70,7 +70,10 @@ namespace Lucene.Net.Collation /// @deprecated Use <seealso cref="CollationAttributeFactory"/> instead, which encodes /// terms directly as bytes. This filter will be removed in Lucene 5.0 [Obsolete("Use <seealso cref=\"CollationAttributeFactory\"/> instead, which encodes")] - public sealed class CollationKeyFilter : TokenFilter + // LUCENENET TODO: A better option would be to contribute to the icu.net library and + // make it CLS compliant (at least the parts of it we use) + [CLSCompliant(false)] + public sealed class CollationKeyFilter : TokenFilter { private readonly Collator collator; private readonly ICharTermAttribute termAtt; http://git-wip-us.apache.org/repos/asf/lucenenet/blob/21b3d8b7/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs b/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs index dc3b85e..a29a5e8 100644 --- a/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs +++ b/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs @@ -1,5 +1,6 @@ using Icu.Collation; using Lucene.Net.Analysis.TokenAttributes; +using System; namespace Lucene.Net.Collation.TokenAttributes { @@ -20,11 +21,14 @@ namespace Lucene.Net.Collation.TokenAttributes * limitations under the License. */ - /// <summary> - /// Extension of <seealso cref="CharTermAttribute"/> that encodes the term - /// text as a binary Unicode collation key instead of as UTF-8 bytes. - /// </summary> - public class CollatedTermAttributeImpl : CharTermAttribute + /// <summary> + /// Extension of <seealso cref="CharTermAttribute"/> that encodes the term + /// text as a binary Unicode collation key instead of as UTF-8 bytes. + /// </summary> + // LUCENENET TODO: A better option would be to contribute to the icu.net library and + // make it CLS compliant (at least the parts of it we use) + [CLSCompliant(false)] + public class CollatedTermAttributeImpl : CharTermAttribute { private readonly Collator collator;
