[lucenenet] 02/02: BUG: Lucene.Net.Analysis.Common: Fixed classes that were originally using invariant culture to do so again. J2N's Character class default is to use the current culture, which had changed from the prior Character class that used invariant culture. Fixes TestICUFoldingFilter::TestRandomStrings().

nightowl888 Sun, 02 Aug 2020 08:19:26 -0700

This is an automated email from the ASF dual-hosted git repository.

nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git


commit 9ce76e99474ce818590210e5adfa2f2ec607ff26
Author: Shad Storhaug <[email protected]>
AuthorDate: Sun Aug 2 01:21:23 2020 +0700

    BUG: Lucene.Net.Analysis.Common: Fixed classes that were originally using 
invariant culture to do so again. J2N's Character class default is to use the 
current culture, which had changed from the prior Character class that used 
invariant culture. Fixes TestICUFoldingFilter::TestRandomStrings().
---
 .../Analysis/Core/LowerCaseTokenizer.cs                      |  5 +++--
 .../Analysis/Miscellaneous/StemmerOverrideFilter.cs          |  5 +++--
 .../Analysis/Synonym/SynonymFilter.cs                        | 10 ++++++----
 src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs | 12 ++++++------
 src/Lucene.Net.TestFramework/Analysis/MockTokenizer.cs       |  3 ++-
 .../Analysis/Util/TestCharArraySet.cs                        |  6 +++---
 6 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs 
b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs
index 96d8958..4ea4db6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs
@@ -1,5 +1,6 @@
 using J2N;
 using Lucene.Net.Util;
+using System.Globalization;
 using System.IO;
 
 namespace Lucene.Net.Analysis.Core
@@ -73,11 +74,11 @@ namespace Lucene.Net.Analysis.Core
 
         /// <summary>
         /// Converts char to lower case
-        /// <see cref="Character.ToLower(int)"/>.
+        /// <see cref="Character.ToLower(int, CultureInfo)"/> in the invariant 
culture.
         /// </summary>
         protected override int Normalize(int c)
         {
-            return Character.ToLower(c);
+            return Character.ToLower(c, CultureInfo.InvariantCulture); // 
LUCENENET specific - need to use invariant culture to match Java
         }
     }
 }
\ No newline at end of file
diff --git 
a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs
 
b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs
index ebd0178..bc81134 100644
--- 
a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs
+++ 
b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs
@@ -3,6 +3,7 @@ using Lucene.Net.Analysis.TokenAttributes;
 using Lucene.Net.Util;
 using Lucene.Net.Util.Fst;
 using System.Collections.Generic;
+using System.Globalization;
 using System.IO;
 
 namespace Lucene.Net.Analysis.Miscellaneous
@@ -134,7 +135,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
                 while (bufUpto < bufferLen)
                 {
                     int codePoint = Character.CodePointAt(buffer, bufUpto, 
bufferLen);
-                    if (fst.FindTargetArc(ignoreCase ? 
Character.ToLower(codePoint) : codePoint, scratchArc, scratchArc, fstReader) == 
null)
+                    if (fst.FindTargetArc(ignoreCase ? 
Character.ToLower(codePoint, CultureInfo.InvariantCulture) : codePoint, 
scratchArc, scratchArc, fstReader) == null)
                     {
                         return null;
                     }
@@ -192,7 +193,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
                     char[] buffer = charsSpare.Chars;
                     for (int i = 0; i < length;)
                     {
-                        i += 
Character.ToChars(Character.ToLower(Character.CodePointAt(input, i)), buffer, 
i);
+                        i += 
Character.ToChars(Character.ToLower(Character.CodePointAt(input, i), 
CultureInfo.InvariantCulture), buffer, i);
                     }
                     UnicodeUtil.UTF16toUTF8(buffer, 0, length, spare);
                 }
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs 
b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
index 1e840da..6b039ee 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
@@ -5,6 +5,7 @@ using Lucene.Net.Util;
 using Lucene.Net.Util.Fst;
 using System;
 using System.Diagnostics;
+using System.Globalization;
 
 namespace Lucene.Net.Analysis.Synonym
 {
@@ -252,9 +253,10 @@ namespace Lucene.Net.Analysis.Synonym
 
         /// <param name="input"> input tokenstream </param>
         /// <param name="synonyms"> synonym map </param>
-        /// <param name="ignoreCase"> case-folds input for matching with <see 
cref="Character.ToLower(int)"/>.
-        ///                   Note, if you set this to true, its your 
responsibility to lowercase
-        ///                   the input entries when you create the <see 
cref="SynonymMap"/> </param>
+        /// <param name="ignoreCase"> case-folds input for matching with <see 
cref="Character.ToLower(int, CultureInfo)"/>
+        ///                   in using <see 
cref="CultureInfo.InvariantCulture"/>.
+        ///                   Note, if you set this to <c>true</c>, its your 
responsibility to lowercase
+        ///                   the input entries when you create the <see 
cref="SynonymMap"/>.</param>
         public SynonymFilter(TokenStream input, SynonymMap synonyms, bool 
ignoreCase) 
             : base(input)
         {
@@ -411,7 +413,7 @@ namespace Lucene.Net.Analysis.Synonym
                 while (bufUpto < bufferLen)
                 {
                     int codePoint = Character.CodePointAt(buffer, bufUpto, 
bufferLen);
-                    if (fst.FindTargetArc(ignoreCase ? 
Character.ToLower(codePoint) : codePoint, scratchArc, scratchArc, fstReader) == 
null)
+                    if (fst.FindTargetArc(ignoreCase ? 
Character.ToLower(codePoint, CultureInfo.InvariantCulture) : codePoint, 
scratchArc, scratchArc, fstReader) == null)
                     {
                         //System.out.println("    stop");
                         goto byTokenBreak;
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs 
b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
index f69094d..2bebdcb 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
@@ -664,7 +664,7 @@ namespace Lucene.Net.Analysis.Util
                 for (int i = 0; i < length;)
                 {
                     var codePointAt = charUtils.CodePointAt(text1, offset + i, 
limit);
-                    if (Character.ToLower(codePointAt) != 
charUtils.CodePointAt(text2, i, text2.Length))
+                    if (Character.ToLower(codePointAt, 
CultureInfo.InvariantCulture) != charUtils.CodePointAt(text2, i, text2.Length)) 
// LUCENENET specific - need to use invariant culture to match Java
                     {
                         return false;
                     }
@@ -696,7 +696,7 @@ namespace Lucene.Net.Analysis.Util
                 for (int i = 0; i < length;)
                 {
                     int codePointAt = charUtils.CodePointAt(text1, i);
-                    if (Character.ToLower(codePointAt) != 
charUtils.CodePointAt(text2, i, text2.Length))
+                    if (Character.ToLower(codePointAt, 
CultureInfo.InvariantCulture) != charUtils.CodePointAt(text2, i, text2.Length)) 
// LUCENENET specific - need to use invariant culture to match Java
                     {
                         return false;
                     }
@@ -728,7 +728,7 @@ namespace Lucene.Net.Analysis.Util
                 for (int i = 0; i < length;)
                 {
                     int codePointAt = charUtils.CodePointAt(text1, i);
-                    if (Character.ToLower(codePointAt) != 
charUtils.CodePointAt(text2, i, text2.Length))
+                    if (Character.ToLower(codePointAt, 
CultureInfo.InvariantCulture) != charUtils.CodePointAt(text2, i, text2.Length)) 
// LUCENENET specific - need to use invariant culture to match Java
                     {
                         return false;
                     }
@@ -811,7 +811,7 @@ namespace Lucene.Net.Analysis.Util
                 for (int i = offset; i < stop;)
                 {
                     int codePointAt = charUtils.CodePointAt(text, i, stop);
-                    code = code * 31 + Character.ToLower(codePointAt);
+                    code = code * 31 + Character.ToLower(codePointAt, 
CultureInfo.InvariantCulture); // LUCENENET specific - need to use invariant 
culture to match Java
                     i += Character.CharCount(codePointAt);
                 }
             }
@@ -839,7 +839,7 @@ namespace Lucene.Net.Analysis.Util
                 for (int i = 0; i < length;)
                 {
                     int codePointAt = charUtils.CodePointAt(text, i);
-                    code = code * 31 + Character.ToLower(codePointAt);
+                    code = code * 31 + Character.ToLower(codePointAt, 
CultureInfo.InvariantCulture); // LUCENENET specific - need to use invariant 
culture to match Java
                     i += Character.CharCount(codePointAt);
                 }
             }
@@ -867,7 +867,7 @@ namespace Lucene.Net.Analysis.Util
                 for (int i = 0; i < length;)
                 {
                     int codePointAt = charUtils.CodePointAt(text, i);
-                    code = code * 31 + Character.ToLower(codePointAt);
+                    code = code * 31 + Character.ToLower(codePointAt, 
CultureInfo.InvariantCulture); // LUCENENET specific - need to use invariant 
culture to match Java
                     i += Character.CharCount(codePointAt);
                 }
             }
diff --git a/src/Lucene.Net.TestFramework/Analysis/MockTokenizer.cs 
b/src/Lucene.Net.TestFramework/Analysis/MockTokenizer.cs
index 2303aab..4be7f18 100644
--- a/src/Lucene.Net.TestFramework/Analysis/MockTokenizer.cs
+++ b/src/Lucene.Net.TestFramework/Analysis/MockTokenizer.cs
@@ -7,6 +7,7 @@ using CharacterRunAutomaton = 
Lucene.Net.Util.Automaton.CharacterRunAutomaton;
 using Debug = Lucene.Net.Diagnostics.Debug; // LUCENENET NOTE: We cannot use 
System.Diagnostics.Debug because those calls will be optimized out of the 
release!
 using RegExp = Lucene.Net.Util.Automaton.RegExp;
 using Assert = Lucene.Net.TestFramework.Assert;
+using System.Globalization;
 
 namespace Lucene.Net.Analysis
 {
@@ -290,7 +291,7 @@ namespace Lucene.Net.Analysis
 
         protected virtual int Normalize(int c)
         {
-            return lowerCase ? Character.ToLower(c) : c;
+            return lowerCase ? Character.ToLower(c, 
CultureInfo.InvariantCulture) : c; // LUCENENET specific - need to use 
invariant culture to match Java
         }
 
         public override void Reset()
diff --git 
a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArraySet.cs 
b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArraySet.cs
index e3c54d6..e637483 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArraySet.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArraySet.cs
@@ -419,7 +419,7 @@ namespace Lucene.Net.Analysis.Util
             IList<string> stopwordsUpper = new List<string>();
             foreach (string @string in stopwords)
             {
-                stopwordsUpper.Add(@string.ToUpper());
+                stopwordsUpper.Add(@string.ToUpperInvariant());
             }
             setIngoreCase.addAll(TEST_STOP_WORDS);
             setIngoreCase.Add(Convert.ToInt32(1));
@@ -472,7 +472,7 @@ namespace Lucene.Net.Analysis.Util
             IList<string> stopwordsUpper = new List<string>();
             foreach (string @string in stopwords)
             {
-                stopwordsUpper.Add(@string.ToUpper());
+                stopwordsUpper.Add(@string.ToUpperInvariant());
             }
             setIngoreCase.addAll(TEST_STOP_WORDS);
             setIngoreCase.Add(Convert.ToInt32(1));
@@ -523,7 +523,7 @@ namespace Lucene.Net.Analysis.Util
             IList<string> stopwordsUpper = new List<string>();
             foreach (string @string in stopwords)
             {
-                stopwordsUpper.Add(@string.ToUpper());
+                stopwordsUpper.Add(@string.ToUpperInvariant());
             }
             set.addAll(TEST_STOP_WORDS);

Reply via email to