This is an automated email from the ASF dual-hosted git repository. nightowl888 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/lucenenet.git
commit 444b86b83cf9977c274011538d9075f33cbc061a Author: Shad Storhaug <[email protected]> AuthorDate: Tue Sep 3 10:17:05 2019 +0700 SWEEP: Lucene.Net.Analysis.Common: Fixed culture sensitivity issues converting numbers to strings and uppercasing/lowercasing --- src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs | 2 +- .../Analysis/Cn/ChineseTokenizer.cs | 2 +- .../Analysis/Compound/Hyphenation/HyphenationTree.cs | 2 +- .../Analysis/Compound/Hyphenation/TernaryTree.cs | 6 +++--- .../Analysis/Hunspell/Dictionary.cs | 4 ++-- .../Analysis/Miscellaneous/CapitalizationFilter.cs | 2 +- .../Analysis/Miscellaneous/PatternAnalyzer.cs | 4 ++-- .../Analysis/Miscellaneous/TruncateTokenFilterFactory.cs | 6 ++++-- .../Analysis/Pt/RSLPStemmerBase.cs | 13 +++++++------ .../Analysis/Util/AbstractAnalysisFactory.cs | 4 ++-- src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/DiffIt.cs | 3 ++- 11 files changed, 26 insertions(+), 22 deletions(-) diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs index babbee1..9388afa 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs @@ -252,7 +252,7 @@ namespace Lucene.Net.Analysis.Cjk } // store the LowerCase(c) in the buffer - buffer[length++] = char.ToLower(c); + buffer[length++] = char.ToLowerInvariant(c); tokenType = SINGLE_TOKEN_TYPE; // break the procedure if buffer overflowed! diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs index 9b127df..f60c340 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs @@ -89,7 +89,7 @@ namespace Lucene.Net.Analysis.Cn { start = offset - 1; } - buffer[length++] = char.ToLower(c); // buffer it + buffer[length++] = char.ToLowerInvariant(c); // buffer it } diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs index 1a87cb0..2342d2c 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs @@ -592,7 +592,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation // public override void printStats(PrintStream @out) // { - //@out.println("Value space size = " + Convert.ToString(vspace.length())); + //@out.println("Value space size = " + Convert.ToString(vspace.length(), CultureInfo.InvariantCulture)); //base.printStats(@out); // } diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs index b0cbc23..a44222b 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs @@ -789,10 +789,10 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation public virtual void PrintStats(TextWriter @out) { - @out.WriteLine("Number of keys = " + Convert.ToString(m_length)); - @out.WriteLine("Node count = " + Convert.ToString(m_freenode)); + @out.WriteLine("Number of keys = " + Convert.ToString(m_length)); // LUCENENET: Intentionally using current culture + @out.WriteLine("Node count = " + Convert.ToString(m_freenode)); // LUCENENET: Intentionally using current culture // System.out.println("Array length = " + Integer.toString(eq.length)); - @out.WriteLine("Key Array length = " + Convert.ToString(m_kv.Length)); + @out.WriteLine("Key Array length = " + Convert.ToString(m_kv.Length)); // LUCENENET: Intentionally using current culture /* * for(int i=0; i<kv.length(); i++) if ( kv.get(i) != 0 ) diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs index ba7efaa..39ddc66 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs @@ -1134,7 +1134,7 @@ namespace Lucene.Net.Analysis.Hunspell if (ignoreCase && iconv == null) { // if we have no input conversion mappings, do this on-the-fly - ch = char.ToLower(ch); + ch = char.ToLowerInvariant(ch); } reuse.Append(ch); @@ -1154,7 +1154,7 @@ namespace Lucene.Net.Analysis.Hunspell { for (int i = 0; i < reuse.Length; i++) { - reuse[i] = char.ToLower(reuse[i]); + reuse[i] = char.ToLowerInvariant(reuse[i]); } } } diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs index 2eaaf54..ec92956 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs @@ -271,7 +271,7 @@ namespace Lucene.Net.Analysis.Miscellaneous { get { - return (culture != null) ? culture : CultureInfo.CurrentCulture; + return culture ?? CultureInfo.CurrentCulture; } } } diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs index 70e7ce2..ac4d888 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs @@ -411,7 +411,7 @@ namespace Lucene.Net.Analysis.Miscellaneous string text = str.Substring(start, end - start); if (toLowerCase) { - text = text.ToLower(); + text = text.ToLower(); // LUCENENET: Since this class is obsolete, we aren't going to bother with passing culture in the constructor. } termAtt.SetEmpty().Append(text); offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(end)); @@ -526,7 +526,7 @@ namespace Lucene.Net.Analysis.Miscellaneous text = s.Substring(start, i - start); if (toLowerCase) { - text = text.ToLower(); + text = text.ToLower(); // LUCENENET: Since this class is obsolete, we aren't going to bother with passing culture in the constructor. } // if (toLowerCase) { //// use next line once JDK 1.5 String.toLowerCase() performance regression is fixed diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs index a1c4cec..cecbabb 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs @@ -46,10 +46,12 @@ namespace Lucene.Net.Analysis.Miscellaneous public TruncateTokenFilterFactory(IDictionary<string, string> args) : base(args) { - prefixLength = sbyte.Parse(Get(args, PREFIX_LENGTH_KEY, "5"), NumberStyles.Integer, CultureInfo.InvariantCulture); + var prefixLengthString = Get(args, PREFIX_LENGTH_KEY, "5"); + prefixLength = sbyte.Parse(prefixLengthString, NumberStyles.Integer, CultureInfo.InvariantCulture); if (prefixLength < 1) { - throw new System.ArgumentException(PREFIX_LENGTH_KEY + " parameter must be a positive number: " + prefixLength.ToString(CultureInfo.InvariantCulture)); + // LUCENENET specific - showing the original string provided by the user so we don't need to worry about culture-specific number conversion issues + throw new System.ArgumentException(PREFIX_LENGTH_KEY + " parameter must be a positive number: " + prefixLengthString); } if (args.Count > 0) { diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs index 26f3c59..885f76a 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs @@ -3,6 +3,7 @@ using Lucene.Net.Support; using Lucene.Net.Util; using System; using System.Collections.Generic; +using System.Globalization; using System.IO; using System.Linq; using System.Text; @@ -305,8 +306,8 @@ namespace Lucene.Net.Analysis.Pt } //Debug.Assert(headerPattern.GetGroupNumbers().Length == 4); string name = matcher.Groups[1].Value; - int min = int.Parse(matcher.Groups[2].Value); - int type = int.Parse(matcher.Groups[3].Value); + int min = int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture); + int type = int.Parse(matcher.Groups[3].Value, CultureInfo.InvariantCulture); string[] suffixes = ParseList(matcher.Groups[4].Value); Rule[] rules = ParseRules(r, type); return new Step(name, rules, min, suffixes); @@ -321,14 +322,14 @@ namespace Lucene.Net.Analysis.Pt Match matcher = stripPattern.Match(line); if (matcher.Success) { - rules.Add(new Rule(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value), "")); + rules.Add(new Rule(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), "")); } else { matcher = repPattern.Match(line); if (matcher.Success) { - rules.Add(new Rule(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value), matcher.Groups[3].Value)); + rules.Add(new Rule(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), matcher.Groups[3].Value)); } else { @@ -337,11 +338,11 @@ namespace Lucene.Net.Analysis.Pt { if (type == 0) { - rules.Add(new RuleWithSuffixExceptions(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value), matcher.Groups[3].Value, ParseList(matcher.Groups[4].Value))); + rules.Add(new RuleWithSuffixExceptions(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), matcher.Groups[3].Value, ParseList(matcher.Groups[4].Value))); } else { - rules.Add(new RuleWithSetExceptions(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value), matcher.Groups[3].Value, ParseList(matcher.Groups[4].Value))); + rules.Add(new RuleWithSetExceptions(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), matcher.Groups[3].Value, ParseList(matcher.Groups[4].Value))); } } else diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs index 0b76dc5..4241335 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs @@ -236,7 +236,7 @@ namespace Lucene.Net.Analysis.Util /// </summary> protected float RequireSingle(IDictionary<string, string> args, string name) { - return float.Parse(Require(args, name)); + return float.Parse(Require(args, name), CultureInfo.InvariantCulture); } /// <summary> @@ -248,7 +248,7 @@ namespace Lucene.Net.Analysis.Util if (args.TryGetValue(name, out s)) { args.Remove(name); - return float.Parse(s); + return float.Parse(s, CultureInfo.InvariantCulture); } return defaultVal; } diff --git a/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/DiffIt.cs b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/DiffIt.cs index 85a66c6..cf8a043 100644 --- a/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/DiffIt.cs +++ b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/DiffIt.cs @@ -1,6 +1,7 @@ using Lucene.Net.Support; using System; using System.Collections.Generic; +using System.Globalization; using System.IO; using System.Text; using Console = Lucene.Net.Support.SystemConsole; @@ -76,7 +77,7 @@ namespace Egothor.Stemmer internal static int Get(int i, string s) { int result; - if (!int.TryParse(s.Substring(i, 1), out result)) + if (!int.TryParse(s.Substring(i, 1), NumberStyles.Integer, CultureInfo.InvariantCulture, out result)) { return 1; }
