This is an automated email from the ASF dual-hosted git repository. nightowl888 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/lucenenet.git
commit 87f8ab6f1f5e560661423b883f04d2020495ac25 Author: Shad Storhaug <[email protected]> AuthorDate: Tue Sep 3 23:28:17 2019 +0700 SWEEP: Lucene.Net.Suggest: Fixed culture-specific conversion issues --- src/Lucene.Net.Suggest/Spell/DirectSpellChecker.cs | 23 +++++++++++++++++++++- .../Suggest/Analyzing/AnalyzingSuggester.cs | 4 ++-- .../Suggest/Fst/FSTCompletionLookup.cs | 2 +- .../Suggest/Fst/WFSTCompletionLookup.cs | 2 +- .../Suggest/Jaspell/JaspellLookup.cs | 4 ++-- .../Suggest/Jaspell/JaspellTernarySearchTrie.cs | 4 ++-- src/Lucene.Net.Suggest/Suggest/Tst/TSTLookup.cs | 4 ++-- 7 files changed, 32 insertions(+), 11 deletions(-) diff --git a/src/Lucene.Net.Suggest/Spell/DirectSpellChecker.cs b/src/Lucene.Net.Suggest/Spell/DirectSpellChecker.cs index e808424..0ade7dc 100644 --- a/src/Lucene.Net.Suggest/Spell/DirectSpellChecker.cs +++ b/src/Lucene.Net.Suggest/Spell/DirectSpellChecker.cs @@ -4,6 +4,7 @@ using Lucene.Net.Util; using Lucene.Net.Util.Automaton; using System; using System.Collections.Generic; +using System.Globalization; using System.Linq; namespace Lucene.Net.Search.Spell @@ -92,6 +93,11 @@ namespace Lucene.Net.Search.Spell private IStringDistance distance = INTERNAL_LEVENSHTEIN; /// <summary> + /// The culture to use for lowercasing terms. + /// </summary> + private CultureInfo lowerCaseTermsCulture = null; // LUCENENET specific + + /// <summary> /// Creates a DirectSpellChecker with default configuration values /// </summary> public DirectSpellChecker() @@ -277,6 +283,21 @@ namespace Lucene.Net.Search.Spell } } + /// <summary> + /// Gets or sets the culture to use for lowercasing terms. + /// Set to <c>null</c> (the default) to use <see cref="CultureInfo.CurrentCulture"/>. + /// </summary> + public virtual CultureInfo LowerCaseTermsCulture // LUCENENET specific + { + get + { + return lowerCaseTermsCulture ?? CultureInfo.CurrentCulture; + } + set + { + lowerCaseTermsCulture = value; + } + } /// <summary> /// Gets or sets the comparer for sorting suggestions. @@ -364,7 +385,7 @@ namespace Lucene.Net.Search.Spell if (lowerCaseTerms) { - term = new Term(term.Field, text.ToLower()); + term = new Term(term.Field, LowerCaseTermsCulture.TextInfo.ToLower(text)); } int docfreq = ir.DocFreq(term); diff --git a/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingSuggester.cs b/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingSuggester.cs index 6eff94b..693f8b3 100644 --- a/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingSuggester.cs +++ b/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingSuggester.cs @@ -666,13 +666,13 @@ namespace Lucene.Net.Search.Suggest.Analyzing BytesRef payload = new BytesRef(payloadLen); Array.Copy(output2.Bytes, sepIndex + 1, payload.Bytes, 0, payloadLen); payload.Length = payloadLen; - result = new LookupResult(spare.ToString(), DecodeWeight(output1.Value), payload); + result = new LookupResult(spare.ToString(), DecodeWeight(output1.GetValueOrDefault()), payload); } else { spare.Grow(output2.Length); UnicodeUtil.UTF8toUTF16(output2, spare); - result = new LookupResult(spare.ToString(), DecodeWeight(output1.Value)); + result = new LookupResult(spare.ToString(), DecodeWeight(output1.GetValueOrDefault())); } return result; diff --git a/src/Lucene.Net.Suggest/Suggest/Fst/FSTCompletionLookup.cs b/src/Lucene.Net.Suggest/Suggest/Fst/FSTCompletionLookup.cs index ebd9094..7ad99b4 100644 --- a/src/Lucene.Net.Suggest/Suggest/Fst/FSTCompletionLookup.cs +++ b/src/Lucene.Net.Suggest/Suggest/Fst/FSTCompletionLookup.cs @@ -283,7 +283,7 @@ namespace Lucene.Net.Search.Suggest.Fst public virtual object Get(string key) { int bucket = normalCompletion.GetBucket(key); - return bucket == -1 ? (long?)null : Convert.ToInt64(bucket); + return bucket == -1 ? (long?)null : bucket; } public override bool Store(DataOutput output) diff --git a/src/Lucene.Net.Suggest/Suggest/Fst/WFSTCompletionLookup.cs b/src/Lucene.Net.Suggest/Suggest/Fst/WFSTCompletionLookup.cs index 5ef4114..61febac 100644 --- a/src/Lucene.Net.Suggest/Suggest/Fst/WFSTCompletionLookup.cs +++ b/src/Lucene.Net.Suggest/Suggest/Fst/WFSTCompletionLookup.cs @@ -261,7 +261,7 @@ namespace Lucene.Net.Search.Suggest.Fst } else { - return Convert.ToInt32(DecodeWeight(result.GetValueOrDefault() + arc.NextFinalOutput.GetValueOrDefault())); + return DecodeWeight(result.GetValueOrDefault() + arc.NextFinalOutput.GetValueOrDefault()); } } diff --git a/src/Lucene.Net.Suggest/Suggest/Jaspell/JaspellLookup.cs b/src/Lucene.Net.Suggest/Suggest/Jaspell/JaspellLookup.cs index f32b5d0..4d72b98 100644 --- a/src/Lucene.Net.Suggest/Suggest/Jaspell/JaspellLookup.cs +++ b/src/Lucene.Net.Suggest/Suggest/Jaspell/JaspellLookup.cs @@ -77,7 +77,7 @@ namespace Lucene.Net.Search.Suggest.Jaspell } charsSpare.Grow(spare.Length); UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare); - trie.Put(charsSpare.ToString(), Convert.ToInt64(weight)); + trie.Put(charsSpare.ToString(), weight); } } @@ -166,7 +166,7 @@ namespace Lucene.Net.Search.Suggest.Jaspell sbyte mask = (sbyte)@in.ReadByte(); if ((mask & HAS_VALUE) != 0) { - node.data = Convert.ToInt64(@in.ReadInt64()); + node.data = @in.ReadInt64(); } if ((mask & LO_KID) != 0) { diff --git a/src/Lucene.Net.Suggest/Suggest/Jaspell/JaspellTernarySearchTrie.cs b/src/Lucene.Net.Suggest/Suggest/Jaspell/JaspellTernarySearchTrie.cs index 7ed60d1..07993ac 100644 --- a/src/Lucene.Net.Suggest/Suggest/Jaspell/JaspellTernarySearchTrie.cs +++ b/src/Lucene.Net.Suggest/Suggest/Jaspell/JaspellTernarySearchTrie.cs @@ -200,7 +200,7 @@ namespace Lucene.Net.Search.Suggest.Jaspell /// </summary> public JaspellTernarySearchTrie(CultureInfo culture) { - this.culture = culture; + this.culture = culture ?? throw new ArgumentNullException(nameof(culture)); } // for loading @@ -299,7 +299,7 @@ namespace Lucene.Net.Search.Suggest.Jaspell occur = one; if (pos != -1) { - occur = Convert.ToSingle(word.Substring(pos + 1).Trim()); + occur = Convert.ToSingle(word.Substring(pos + 1).Trim(), CultureInfo.InvariantCulture); word = word.Substring(0, pos); } string key = culture.TextInfo.ToLower(word); diff --git a/src/Lucene.Net.Suggest/Suggest/Tst/TSTLookup.cs b/src/Lucene.Net.Suggest/Suggest/Tst/TSTLookup.cs index 2686a7a..13dca99 100644 --- a/src/Lucene.Net.Suggest/Suggest/Tst/TSTLookup.cs +++ b/src/Lucene.Net.Suggest/Suggest/Tst/TSTLookup.cs @@ -74,7 +74,7 @@ namespace Lucene.Net.Search.Suggest.Tst charsSpare.Grow(spare.Length); UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare); tokens.Add(charsSpare.ToString()); - vals.Add(Convert.ToInt64(tfit.Weight)); + vals.Add(tfit.Weight); } autocomplete.BalancedTree(tokens.ToArray(), vals.ToArray(), 0, tokens.Count - 1, root); } @@ -185,7 +185,7 @@ namespace Lucene.Net.Search.Suggest.Tst } if ((mask & HAS_VALUE) != 0) { - node.val = Convert.ToInt64(@in.ReadInt64()); + node.val = @in.ReadInt64(); } if ((mask & LO_KID) != 0) {
