This is an automated email from the ASF dual-hosted git repository. nightowl888 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/lucenenet.git
commit bebbae94145916520972ce25b3ebc2778db7c699 Author: Shad Storhaug <[email protected]> AuthorDate: Sat Dec 14 01:33:22 2019 +0700 Upgraded to ICU4N 60.1.0-alpha.192 --- build/Dependencies.props | 2 +- .../Analysis/Icu/ICUTransformFilter.cs | 40 ++++++++-------------- .../Icu/Segmentation/BreakIteratorWrapper.cs | 36 +++++++------------ .../Icu/Segmentation/CompositeBreakIterator.cs | 10 ++---- .../Icu/Segmentation/DefaultICUTokenizerConfig.cs | 10 +++--- .../Icu/Segmentation/ICUTokenizerConfig.cs | 2 +- 6 files changed, 35 insertions(+), 65 deletions(-) diff --git a/build/Dependencies.props b/build/Dependencies.props index 885eca4..595cdb7 100644 --- a/build/Dependencies.props +++ b/build/Dependencies.props @@ -32,7 +32,7 @@ https://github.com/apache/lucene-solr/tree/31d7ec7bbfdcd2c4cc61d9d35e962165410b65fe/lucene/analysis/icu/src/data/utr30 Just make sure they are adjusted to the right version of ICU/Lucene. <ICU4NPackageVersion>[60.1,60.2)</ICU4NPackageVersion> --> - <ICU4NPackageVersion>60.1.0-alpha.53</ICU4NPackageVersion> + <ICU4NPackageVersion>60.1.0-alpha.192</ICU4NPackageVersion> <ICU4NCollationPackageVersion>$(ICU4NPackageVersion)</ICU4NCollationPackageVersion> <ICU4NCurrencyDataPackageVersion>$(ICU4NPackageVersion)</ICU4NCurrencyDataPackageVersion> <ICU4NLanguageDataPackageVersion>$(ICU4NPackageVersion)</ICU4NLanguageDataPackageVersion> diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUTransformFilter.cs b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUTransformFilter.cs index 8564acb..839a916 100644 --- a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUTransformFilter.cs +++ b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUTransformFilter.cs @@ -128,21 +128,15 @@ namespace Lucene.Net.Analysis.Icu this.length = token.Length; } - public int Char32At(int pos) - { - return UTF16.CharAt(buffer, 0, length, pos); - } + public int Char32At(int pos) => UTF16.CharAt(buffer, 0, length, pos); - public char this[int pos] - { - get { return buffer[pos]; } - } + public char this[int pos] => buffer[pos]; - public void Copy(int start, int limit, int dest) + public void Copy(int startIndex, int length, int destinationIndex) // LUCENENET: Changed 2nd parameter from limit to length { - char[] text = new char[limit - start]; - CopyTo(start, text, 0, limit - start); - Replace(dest, dest, text, 0, limit - start); + char[] text = new char[length]; // LUCENENET: Corrected length + CopyTo(startIndex, text, 0, length); // LUCENENET: Corrected length + Replace(destinationIndex, destinationIndex - destinationIndex, text, 0, length); // LUCENENET: Corrected length & charsLen } public void CopyTo(int sourceIndex, char[] destination, int destinationIndex, int count) @@ -150,33 +144,27 @@ namespace Lucene.Net.Analysis.Icu System.Array.Copy(buffer, sourceIndex, destination, destinationIndex, count); } - public bool HasMetaData - { - get { return false; } - } + public bool HasMetaData => false; - public int Length - { - get { return length; } - } + public int Length => length; - public void Replace(int start, int limit, string text) + public void Replace(int start, int length, string text) // LUCENENET: Changed 2nd parameter from limit to length { int charsLen = text.Length; - int newLength = ShiftForReplace(start, limit, charsLen); + int newLength = ShiftForReplace(start, length + start, charsLen); // LUCENENET: Changed 2nd parameter to calculate limit // insert the replacement text text.CopyTo(0, buffer, start, charsLen); - token.Length = (length = newLength); + token.Length = (this.length = newLength); } - public void Replace(int start, int limit, char[] text, int charsStart, + public void Replace(int start, int length, char[] text, int charsStart, int charsLen) { // shift text if necessary for the replacement - int newLength = ShiftForReplace(start, limit, charsLen); + int newLength = ShiftForReplace(start, length + start, charsLen); // LUCENENET: Changed 2nd parameter to calculate limit // insert the replacement text System.Array.Copy(text, charsStart, buffer, start, charsLen); - token.Length = (length = newLength); + token.Length = (this.length = newLength); } /// <summary>shift text (if necessary) for a replacement operation</summary> diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/BreakIteratorWrapper.cs b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/BreakIteratorWrapper.cs index 2b4db33..af50927 100644 --- a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/BreakIteratorWrapper.cs +++ b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/BreakIteratorWrapper.cs @@ -47,7 +47,7 @@ namespace Lucene.Net.Analysis.Icu.Segmentation public abstract int Next(); public abstract int Current { get; } - public abstract ICU4N.Text.RuleStatus RuleStatus { get; } + public abstract int RuleStatus { get; } public abstract void SetText(CharacterIterator text); public void SetText(char[] text, int start, int length) @@ -87,15 +87,9 @@ namespace Lucene.Net.Analysis.Icu.Segmentation this.rbbi = rbbi; } - public override int Current - { - get { return rbbi.Current; } - } + public override int Current => rbbi.Current; - public override ICU4N.Text.RuleStatus RuleStatus - { - get { return rbbi.RuleStatus; } - } + public override int RuleStatus => rbbi.RuleStatus; public override int Next() { @@ -119,22 +113,16 @@ namespace Lucene.Net.Analysis.Icu.Segmentation private sealed class BIWrapper : BreakIteratorWrapper { private readonly BreakIterator bi; - private ICU4N.Text.RuleStatus status; + private int status; internal BIWrapper(BreakIterator bi) { this.bi = bi; } - public override int Current - { - get { return bi.Current; } - } + public override int Current => bi.Current; - public override ICU4N.Text.RuleStatus RuleStatus - { - get { return status; } - } + public override int RuleStatus => status; public override int Next() { @@ -144,10 +132,10 @@ namespace Lucene.Net.Analysis.Icu.Segmentation return next; } - private RuleStatus CalcStatus(int current, int next) + private int CalcStatus(int current, int next) { if (current == BreakIterator.Done || next == BreakIterator.Done) - return ICU4N.Text.RuleStatus.WordNone; + return BreakIterator.WordNone; int begin = m_start + current; int end = m_start + next; @@ -158,22 +146,22 @@ namespace Lucene.Net.Analysis.Icu.Segmentation codepoint = UTF16.CharAt(m_text, 0, end, begin); if (UChar.IsDigit(codepoint)) - return ICU4N.Text.RuleStatus.WordNumber; + return BreakIterator.WordNumber; else if (UChar.IsLetter(codepoint)) { // TODO: try to separately specify ideographic, kana? // [currently all bundled as letter for this case] - return ICU4N.Text.RuleStatus.WordLetter; + return BreakIterator.WordLetter; } } - return ICU4N.Text.RuleStatus.WordNone; + return BreakIterator.WordNone; } public override void SetText(CharacterIterator text) { bi.SetText(text); - status = ICU4N.Text.RuleStatus.WordNone; + status = BreakIterator.WordNone; } } } diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/CompositeBreakIterator.cs b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/CompositeBreakIterator.cs index 15b6fcd..d697ae1 100644 --- a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/CompositeBreakIterator.cs +++ b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/CompositeBreakIterator.cs @@ -91,19 +91,13 @@ namespace Lucene.Net.Analysis.Icu.Segmentation /// Gets the rule status code (token type) from the underlying break /// iterator. See <see cref="RuleBasedBreakIterator"/> constants. /// </summary> - public RuleStatus RuleStatus - { - get { return rbbi.RuleStatus; } - } + public int RuleStatus => rbbi.RuleStatus; /// <summary> /// Gets the <see cref="UScript"/> script code for the current token. This code can be /// decoded with <see cref="UScript"/> into a name or ISO 15924 code. /// </summary> - public int ScriptCode - { - get { return scriptIterator.ScriptCode; } - } + public int ScriptCode => scriptIterator.ScriptCode; /// <summary> /// Set a new region of text to be examined by this iterator. diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/DefaultICUTokenizerConfig.cs b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/DefaultICUTokenizerConfig.cs index 3fb7334..bdee024 100644 --- a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/DefaultICUTokenizerConfig.cs +++ b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/DefaultICUTokenizerConfig.cs @@ -113,17 +113,17 @@ namespace Lucene.Net.Analysis.Icu.Segmentation } } - public override string GetType(int script, RuleStatus ruleStatus) + public override string GetType(int script, int ruleStatus) { switch (ruleStatus) { - case RuleStatus.WordIdeo: + case BreakIterator.WordIdeo: return WORD_IDEO; - case RuleStatus.WordKana: //RuleBasedBreakIterator.WORD_KANA: + case BreakIterator.WordKana: //RuleBasedBreakIterator.WORD_KANA: return script == UScript.Hiragana ? WORD_HIRAGANA : WORD_KATAKANA; - case RuleStatus.WordLetter: //RuleBasedBreakIterator.WORD_LETTER: + case BreakIterator.WordLetter: //RuleBasedBreakIterator.WORD_LETTER: return script == UScript.Hangul ? WORD_HANGUL : WORD_LETTER; - case RuleStatus.WordNumber: //RuleBasedBreakIterator.WORD_NUMBER: + case BreakIterator.WordNumber: //RuleBasedBreakIterator.WORD_NUMBER: return WORD_NUMBER; default: /* some other custom code */ return "<OTHER>"; diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ICUTokenizerConfig.cs b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ICUTokenizerConfig.cs index c65b7f8..e8014f5 100644 --- a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ICUTokenizerConfig.cs +++ b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ICUTokenizerConfig.cs @@ -42,7 +42,7 @@ namespace Lucene.Net.Analysis.Icu.Segmentation /// <summary> /// Return a token type value for a given script and BreakIterator rule status. /// </summary> - public abstract string GetType(int script, RuleStatus ruleStatus); + public abstract string GetType(int script, int ruleStatus); /// <summary> /// true if Han, Hiragana, and Katakana scripts should all be returned as Japanese /// </summary>
