Lucene.Net.Analysis.Common.Miscellaneous (WordDelimiterFilter + Lucene47WordDelimiterFilter) refactor: Changed flags to a [Flags] enum and de-nested them from WordDelimiterFilter (note they are shared with Lucene47WordDelimiterFilter).
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/7c29325a Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/7c29325a Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/7c29325a Branch: refs/heads/api-work Commit: 7c29325ad06185e58a9a09ce3bc90a269e54c956 Parents: d38efd3 Author: Shad Storhaug <[email protected]> Authored: Fri Mar 3 12:42:32 2017 +0700 Committer: Shad Storhaug <[email protected]> Committed: Sun Mar 5 17:08:34 2017 +0700 ---------------------------------------------------------------------- .../Lucene47WordDelimiterFilter.cs | 83 ++-------- .../Miscellaneous/WordDelimiterFilter.cs | 158 ++++++++++--------- .../Miscellaneous/WordDelimiterFilterFactory.cs | 22 +-- .../Analysis/Core/TestBugInSomething.cs | 2 +- .../TestLucene47WordDelimiterFilter.cs | 79 +++++++--- .../Miscellaneous/TestWordDelimiterFilter.cs | 112 +++++++++---- 6 files changed, 250 insertions(+), 206 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7c29325a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs index 7db0cd5..50a8271 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs @@ -38,66 +38,7 @@ namespace Lucene.Net.Analysis.Miscellaneous public const int ALPHA = 0x03; public const int ALPHANUM = 0x07; - /// <summary> - /// Causes parts of words to be generated: - /// <p/> - /// "PowerShot" => "Power" "Shot" - /// </summary> - public const int GENERATE_WORD_PARTS = 1; - - /// <summary> - /// Causes number subwords to be generated: - /// <p/> - /// "500-42" => "500" "42" - /// </summary> - public const int GENERATE_NUMBER_PARTS = 2; - - /// <summary> - /// Causes maximum runs of word parts to be catenated: - /// <p/> - /// "wi-fi" => "wifi" - /// </summary> - public const int CATENATE_WORDS = 4; - - /// <summary> - /// Causes maximum runs of word parts to be catenated: - /// <p/> - /// "wi-fi" => "wifi" - /// </summary> - public const int CATENATE_NUMBERS = 8; - - /// <summary> - /// Causes all subword parts to be catenated: - /// <p/> - /// "wi-fi-4000" => "wifi4000" - /// </summary> - public const int CATENATE_ALL = 16; - - /// <summary> - /// Causes original words are preserved and added to the subword list (Defaults to false) - /// <p/> - /// "500-42" => "500" "42" "500-42" - /// </summary> - public const int PRESERVE_ORIGINAL = 32; - - /// <summary> - /// If not set, causes case changes to be ignored (subwords will only be generated - /// given SUBWORD_DELIM tokens) - /// </summary> - public const int SPLIT_ON_CASE_CHANGE = 64; - - /// <summary> - /// If not set, causes numeric changes to be ignored (subwords will only be generated - /// given SUBWORD_DELIM tokens). - /// </summary> - public const int SPLIT_ON_NUMERICS = 128; - - /// <summary> - /// Causes trailing "'s" to be removed for each subword - /// <p/> - /// "O'Neil's" => "O", "Neil" - /// </summary> - public const int STEM_ENGLISH_POSSESSIVE = 256; + // LUCENENET specific - made flags into their own [Flags] enum named WordDelimiterFlags and de-nested from this type /// <summary> /// If not null is the set of tokens to protect from being delimited @@ -105,7 +46,7 @@ namespace Lucene.Net.Analysis.Miscellaneous /// </summary> private readonly CharArraySet protWords; - private readonly int flags; + private readonly WordDelimiterFlags flags; private readonly ICharTermAttribute termAttribute; private readonly IOffsetAttribute offsetAttribute; @@ -148,7 +89,7 @@ namespace Lucene.Net.Analysis.Miscellaneous /// <param name="charTypeTable"> table containing character types </param> /// <param name="configurationFlags"> Flags configuring the filter </param> /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param> - public Lucene47WordDelimiterFilter(TokenStream @in, byte[] charTypeTable, int configurationFlags, CharArraySet protWords) + public Lucene47WordDelimiterFilter(TokenStream @in, byte[] charTypeTable, WordDelimiterFlags configurationFlags, CharArraySet protWords) : base(@in) { termAttribute = AddAttribute<ICharTermAttribute>(); @@ -160,7 +101,7 @@ namespace Lucene.Net.Analysis.Miscellaneous this.flags = configurationFlags; this.protWords = protWords; - this.iterator = new WordDelimiterIterator(charTypeTable, Has(SPLIT_ON_CASE_CHANGE), Has(SPLIT_ON_NUMERICS), Has(STEM_ENGLISH_POSSESSIVE)); + this.iterator = new WordDelimiterIterator(charTypeTable, Has(WordDelimiterFlags.SPLIT_ON_CASE_CHANGE), Has(WordDelimiterFlags.SPLIT_ON_NUMERICS), Has(WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE)); } /// <summary> @@ -170,7 +111,7 @@ namespace Lucene.Net.Analysis.Miscellaneous /// <param name="in"> <see cref="TokenStream"/> to be filtered </param> /// <param name="configurationFlags"> Flags configuring the filter </param> /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param> - public Lucene47WordDelimiterFilter(TokenStream @in, int configurationFlags, CharArraySet protWords) + public Lucene47WordDelimiterFilter(TokenStream @in, WordDelimiterFlags configurationFlags, CharArraySet protWords) : this(@in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords) { } @@ -204,7 +145,7 @@ namespace Lucene.Net.Analysis.Miscellaneous } // word of simply delimiters - if (iterator.end == WordDelimiterIterator.DONE && !Has(PRESERVE_ORIGINAL)) + if (iterator.end == WordDelimiterIterator.DONE && !Has(WordDelimiterFlags.PRESERVE_ORIGINAL)) { // if the posInc is 1, simply ignore it in the accumulation if (posIncAttribute.PositionIncrement == 1) @@ -217,10 +158,10 @@ namespace Lucene.Net.Analysis.Miscellaneous SaveState(); hasOutputToken = false; - hasOutputFollowingOriginal = !Has(PRESERVE_ORIGINAL); + hasOutputFollowingOriginal = !Has(WordDelimiterFlags.PRESERVE_ORIGINAL); lastConcatCount = 0; - if (Has(PRESERVE_ORIGINAL)) + if (Has(WordDelimiterFlags.PRESERVE_ORIGINAL)) { posIncAttribute.PositionIncrement = accumPosInc; accumPosInc = 0; @@ -287,7 +228,7 @@ namespace Lucene.Net.Analysis.Miscellaneous } // add all subwords (catenateAll) - if (Has(CATENATE_ALL)) + if (Has(WordDelimiterFlags.CATENATE_ALL)) { Concatenate(concatAll); } @@ -378,7 +319,7 @@ namespace Lucene.Net.Analysis.Miscellaneous /// <returns> <c>true</c> if concatenation should occur, <c>false</c> otherwise </returns> private bool ShouldConcatenate(int wordType) { - return (Has(CATENATE_WORDS) && IsAlpha(wordType)) || (Has(CATENATE_NUMBERS) && IsDigit(wordType)); + return (Has(WordDelimiterFlags.CATENATE_WORDS) && IsAlpha(wordType)) || (Has(WordDelimiterFlags.CATENATE_NUMBERS) && IsDigit(wordType)); } /// <summary> @@ -388,7 +329,7 @@ namespace Lucene.Net.Analysis.Miscellaneous /// <returns> <c>true</c> if a word/number part should be generated, <c>false</c> otherwise </returns> private bool ShouldGenerateParts(int wordType) { - return (Has(GENERATE_WORD_PARTS) && IsAlpha(wordType)) || (Has(GENERATE_NUMBER_PARTS) && IsDigit(wordType)); + return (Has(WordDelimiterFlags.GENERATE_WORD_PARTS) && IsAlpha(wordType)) || (Has(WordDelimiterFlags.GENERATE_NUMBER_PARTS) && IsDigit(wordType)); } /// <summary> @@ -511,7 +452,7 @@ namespace Lucene.Net.Analysis.Miscellaneous /// </summary> /// <param name="flag"> Flag to see if set </param> /// <returns> <c>true</c> if flag is set </returns> - private bool Has(int flag) + private bool Has(WordDelimiterFlags flag) { return (flags & flag) != 0; } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7c29325a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs index 298d3db..f79ef5e 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs @@ -25,6 +25,76 @@ namespace Lucene.Net.Analysis.Miscellaneous */ /// <summary> + /// Configuration options for the <see cref="WordDelimiterFilter"/>. + /// <para/> + /// LUCENENET specific - these options were passed as int constant flags in Lucene. + /// </summary> + [System.Flags] + public enum WordDelimiterFlags + { + /// <summary> + /// Causes parts of words to be generated: + /// <para/> + /// "PowerShot" => "Power" "Shot" + /// </summary> + GENERATE_WORD_PARTS = 1, + + /// <summary> + /// Causes number subwords to be generated: + /// <para/> + /// "500-42" => "500" "42" + /// </summary> + GENERATE_NUMBER_PARTS = 2, + + /// <summary> + /// Causes maximum runs of word parts to be catenated: + /// <para/> + /// "wi-fi" => "wifi" + /// </summary> + CATENATE_WORDS = 4, + + /// <summary> + /// Causes maximum runs of word parts to be catenated: + /// <para/> + /// "wi-fi" => "wifi" + /// </summary> + CATENATE_NUMBERS = 8, + + /// <summary> + /// Causes all subword parts to be catenated: + /// <para/> + /// "wi-fi-4000" => "wifi4000" + /// </summary> + CATENATE_ALL = 16, + + /// <summary> + /// Causes original words are preserved and added to the subword list (Defaults to false) + /// <para/> + /// "500-42" => "500" "42" "500-42" + /// </summary> + PRESERVE_ORIGINAL = 32, + + /// <summary> + /// If not set, causes case changes to be ignored (subwords will only be generated + /// given SUBWORD_DELIM tokens) + /// </summary> + SPLIT_ON_CASE_CHANGE = 64, + + /// <summary> + /// If not set, causes numeric changes to be ignored (subwords will only be generated + /// given SUBWORD_DELIM tokens). + /// </summary> + SPLIT_ON_NUMERICS = 128, + + /// <summary> + /// Causes trailing "'s" to be removed for each subword + /// <para/> + /// "O'Neil's" => "O", "Neil" + /// </summary> + STEM_ENGLISH_POSSESSIVE = 256 + } + + /// <summary> /// Splits words into subwords and performs optional transformations on subword /// groups. Words are split into subwords with the following rules: /// <list type="bullet"> @@ -88,68 +158,8 @@ namespace Lucene.Net.Analysis.Miscellaneous public const int ALPHANUM = 0x07; - // LUCENENET TODO: Change the following to a [Flags] enum - - /// <summary> - /// Causes parts of words to be generated: - /// <p/> - /// "PowerShot" => "Power" "Shot" - /// </summary> - public const int GENERATE_WORD_PARTS = 1; - - /// <summary> - /// Causes number subwords to be generated: - /// <p/> - /// "500-42" => "500" "42" - /// </summary> - public const int GENERATE_NUMBER_PARTS = 2; - - /// <summary> - /// Causes maximum runs of word parts to be catenated: - /// <p/> - /// "wi-fi" => "wifi" - /// </summary> - public const int CATENATE_WORDS = 4; - - /// <summary> - /// Causes maximum runs of word parts to be catenated: - /// <p/> - /// "wi-fi" => "wifi" - /// </summary> - public const int CATENATE_NUMBERS = 8; + // LUCENENET specific - made flags into their own [Flags] enum named WordDelimiterFlags and de-nested from this type - /// <summary> - /// Causes all subword parts to be catenated: - /// <p/> - /// "wi-fi-4000" => "wifi4000" - /// </summary> - public const int CATENATE_ALL = 16; - - /// <summary> - /// Causes original words are preserved and added to the subword list (Defaults to false) - /// <p/> - /// "500-42" => "500" "42" "500-42" - /// </summary> - public const int PRESERVE_ORIGINAL = 32; - - /// <summary> - /// If not set, causes case changes to be ignored (subwords will only be generated - /// given SUBWORD_DELIM tokens) - /// </summary> - public const int SPLIT_ON_CASE_CHANGE = 64; - - /// <summary> - /// If not set, causes numeric changes to be ignored (subwords will only be generated - /// given SUBWORD_DELIM tokens). - /// </summary> - public const int SPLIT_ON_NUMERICS = 128; - - /// <summary> - /// Causes trailing "'s" to be removed for each subword - /// <p/> - /// "O'Neil's" => "O", "Neil" - /// </summary> - public const int STEM_ENGLISH_POSSESSIVE = 256; /// <summary> /// If not null is the set of tokens to protect from being delimited @@ -157,7 +167,7 @@ namespace Lucene.Net.Analysis.Miscellaneous /// </summary> private readonly CharArraySet protWords; - private readonly int flags; + private readonly WordDelimiterFlags flags; private readonly ICharTermAttribute termAttribute; private readonly IOffsetAttribute offsetAttribute; @@ -201,7 +211,7 @@ namespace Lucene.Net.Analysis.Miscellaneous /// <param name="charTypeTable"> table containing character types </param> /// <param name="configurationFlags"> Flags configuring the filter </param> /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param> - public WordDelimiterFilter(LuceneVersion matchVersion, TokenStream @in, byte[] charTypeTable, int configurationFlags, CharArraySet protWords) + public WordDelimiterFilter(LuceneVersion matchVersion, TokenStream @in, byte[] charTypeTable, WordDelimiterFlags configurationFlags, CharArraySet protWords) : base(@in) { this.termAttribute = AddAttribute<ICharTermAttribute>(); @@ -218,7 +228,10 @@ namespace Lucene.Net.Analysis.Miscellaneous } this.flags = configurationFlags; this.protWords = protWords; - this.iterator = new WordDelimiterIterator(charTypeTable, Has(SPLIT_ON_CASE_CHANGE), Has(SPLIT_ON_NUMERICS), Has(STEM_ENGLISH_POSSESSIVE)); + this.iterator = new WordDelimiterIterator(charTypeTable, + Has(WordDelimiterFlags.SPLIT_ON_CASE_CHANGE), + Has(WordDelimiterFlags.SPLIT_ON_NUMERICS), + Has(WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE)); } /// <summary> @@ -229,7 +242,7 @@ namespace Lucene.Net.Analysis.Miscellaneous /// <param name="in"> <see cref="TokenStream"/> to be filtered </param> /// <param name="configurationFlags"> Flags configuring the filter </param> /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param> - public WordDelimiterFilter(LuceneVersion matchVersion, TokenStream @in, int configurationFlags, CharArraySet protWords) + public WordDelimiterFilter(LuceneVersion matchVersion, TokenStream @in, WordDelimiterFlags configurationFlags, CharArraySet protWords) : this(matchVersion, @in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords) { } @@ -264,7 +277,7 @@ namespace Lucene.Net.Analysis.Miscellaneous } // word of simply delimiters - if (iterator.end == WordDelimiterIterator.DONE && !Has(PRESERVE_ORIGINAL)) + if (iterator.end == WordDelimiterIterator.DONE && !Has(WordDelimiterFlags.PRESERVE_ORIGINAL)) { // if the posInc is 1, simply ignore it in the accumulation // TODO: proper hole adjustment (FilteringTokenFilter-like) instead of this previous logic! @@ -278,10 +291,10 @@ namespace Lucene.Net.Analysis.Miscellaneous SaveState(); hasOutputToken = false; - hasOutputFollowingOriginal = !Has(PRESERVE_ORIGINAL); + hasOutputFollowingOriginal = !Has(WordDelimiterFlags.PRESERVE_ORIGINAL); lastConcatCount = 0; - if (Has(PRESERVE_ORIGINAL)) + if (Has(WordDelimiterFlags.PRESERVE_ORIGINAL)) { posIncAttribute.PositionIncrement = accumPosInc; accumPosInc = 0; @@ -371,7 +384,7 @@ namespace Lucene.Net.Analysis.Miscellaneous } // add all subwords (catenateAll) - if (Has(CATENATE_ALL)) + if (Has(WordDelimiterFlags.CATENATE_ALL)) { Concatenate(concatAll); } @@ -511,7 +524,7 @@ namespace Lucene.Net.Analysis.Miscellaneous /// <returns> <c>true</c> if concatenation should occur, <c>false</c> otherwise </returns> private bool ShouldConcatenate(int wordType) { - return (Has(CATENATE_WORDS) && IsAlpha(wordType)) || (Has(CATENATE_NUMBERS) && IsDigit(wordType)); + return (Has(WordDelimiterFlags.CATENATE_WORDS) && IsAlpha(wordType)) || (Has(WordDelimiterFlags.CATENATE_NUMBERS) && IsDigit(wordType)); } /// <summary> @@ -521,7 +534,8 @@ namespace Lucene.Net.Analysis.Miscellaneous /// <returns> <c>true</c> if a word/number part should be generated, <c>false</c> otherwise </returns> private bool ShouldGenerateParts(int wordType) { - return (Has(GENERATE_WORD_PARTS) && IsAlpha(wordType)) || (Has(GENERATE_NUMBER_PARTS) && IsDigit(wordType)); + return (Has(WordDelimiterFlags.GENERATE_WORD_PARTS) && IsAlpha(wordType)) || + (Has(WordDelimiterFlags.GENERATE_NUMBER_PARTS) && IsDigit(wordType)); } /// <summary> @@ -644,7 +658,7 @@ namespace Lucene.Net.Analysis.Miscellaneous /// </summary> /// <param name="flag"> Flag to see if set </param> /// <returns> <c>true</c> if flag is set </returns> - private bool Has(int flag) + private bool Has(WordDelimiterFlags flag) { return (flags & flag) != 0; } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7c29325a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs index 3afc444..32600b0 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs @@ -47,7 +47,7 @@ namespace Lucene.Net.Analysis.Miscellaneous private readonly string wordFiles; private readonly string types; - private readonly int flags; + private readonly WordDelimiterFlags flags; private byte[] typeTable = null; private CharArraySet protectedWords = null; @@ -57,42 +57,42 @@ namespace Lucene.Net.Analysis.Miscellaneous : base(args) { AssureMatchVersion(); - int flags = 0; + WordDelimiterFlags flags = 0; if (GetInt32(args, "generateWordParts", 1) != 0) { - flags |= WordDelimiterFilter.GENERATE_WORD_PARTS; + flags |= WordDelimiterFlags.GENERATE_WORD_PARTS; } if (GetInt32(args, "generateNumberParts", 1) != 0) { - flags |= WordDelimiterFilter.GENERATE_NUMBER_PARTS; + flags |= WordDelimiterFlags.GENERATE_NUMBER_PARTS; } if (GetInt32(args, "catenateWords", 0) != 0) { - flags |= WordDelimiterFilter.CATENATE_WORDS; + flags |= WordDelimiterFlags.CATENATE_WORDS; } if (GetInt32(args, "catenateNumbers", 0) != 0) { - flags |= WordDelimiterFilter.CATENATE_NUMBERS; + flags |= WordDelimiterFlags.CATENATE_NUMBERS; } if (GetInt32(args, "catenateAll", 0) != 0) { - flags |= WordDelimiterFilter.CATENATE_ALL; + flags |= WordDelimiterFlags.CATENATE_ALL; } if (GetInt32(args, "splitOnCaseChange", 1) != 0) { - flags |= WordDelimiterFilter.SPLIT_ON_CASE_CHANGE; + flags |= WordDelimiterFlags.SPLIT_ON_CASE_CHANGE; } if (GetInt32(args, "splitOnNumerics", 1) != 0) { - flags |= WordDelimiterFilter.SPLIT_ON_NUMERICS; + flags |= WordDelimiterFlags.SPLIT_ON_NUMERICS; } if (GetInt32(args, "preserveOriginal", 0) != 0) { - flags |= WordDelimiterFilter.PRESERVE_ORIGINAL; + flags |= WordDelimiterFlags.PRESERVE_ORIGINAL; } if (GetInt32(args, "stemEnglishPossessive", 1) != 0) { - flags |= WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE; + flags |= WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE; } wordFiles = Get(args, PROTECTED_TOKENS); types = Get(args, TYPES); http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7c29325a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs index ed4580b..10fb996 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs @@ -357,7 +357,7 @@ namespace Lucene.Net.Analysis.Core { Tokenizer tokenizer = new WikipediaTokenizer(reader); TokenStream stream = new SopTokenFilter(tokenizer); - stream = new WordDelimiterFilter(TEST_VERSION_CURRENT, stream, table, -50, protWords); + stream = new WordDelimiterFilter(TEST_VERSION_CURRENT, stream, table, (WordDelimiterFlags)(object)-50, protWords); stream = new SopTokenFilter(stream); return new TokenStreamComponents(tokenizer, stream); } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7c29325a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestLucene47WordDelimiterFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestLucene47WordDelimiterFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestLucene47WordDelimiterFilter.cs index 01a57c7..3d97c0d 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestLucene47WordDelimiterFilter.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestLucene47WordDelimiterFilter.cs @@ -56,7 +56,12 @@ namespace Lucene.Net.Analysis.Miscellaneous [Test] public virtual void TestOffsets() { - int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.CATENATE_ALL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE; + WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS + | WordDelimiterFlags.GENERATE_NUMBER_PARTS + | WordDelimiterFlags.CATENATE_ALL + | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE + | WordDelimiterFlags.SPLIT_ON_NUMERICS + | WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE; // test that subwords and catenated subwords have // the correct offsets. TokenFilter wdf = new Lucene47WordDelimiterFilter(new SingleTokenTokenStream(new Token("foo-bar", 5, 12)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, flags, null); @@ -71,7 +76,12 @@ namespace Lucene.Net.Analysis.Miscellaneous [Test] public virtual void TestOffsetChange() { - int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.CATENATE_ALL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE; + WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS + | WordDelimiterFlags.GENERATE_NUMBER_PARTS + | WordDelimiterFlags.CATENATE_ALL + | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE + | WordDelimiterFlags.SPLIT_ON_NUMERICS + | WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE; TokenFilter wdf = new Lucene47WordDelimiterFilter(new SingleTokenTokenStream(new Token("übelkeit)", 7, 16)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, flags, null); AssertTokenStreamContents(wdf, new string[] { "übelkeit" }, new int[] { 7 }, new int[] { 15 }); @@ -80,7 +90,12 @@ namespace Lucene.Net.Analysis.Miscellaneous [Test] public virtual void TestOffsetChange2() { - int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.CATENATE_ALL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE; + WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS + | WordDelimiterFlags.GENERATE_NUMBER_PARTS + | WordDelimiterFlags.CATENATE_ALL + | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE + | WordDelimiterFlags.SPLIT_ON_NUMERICS + | WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE; TokenFilter wdf = new Lucene47WordDelimiterFilter(new SingleTokenTokenStream(new Token("(übelkeit", 7, 17)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, flags, null); AssertTokenStreamContents(wdf, new string[] { "übelkeit" }, new int[] { 8 }, new int[] { 17 }); @@ -89,7 +104,12 @@ namespace Lucene.Net.Analysis.Miscellaneous [Test] public virtual void TestOffsetChange3() { - int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.CATENATE_ALL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE; + WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS + | WordDelimiterFlags.GENERATE_NUMBER_PARTS + | WordDelimiterFlags.CATENATE_ALL + | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE + | WordDelimiterFlags.SPLIT_ON_NUMERICS + | WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE; TokenFilter wdf = new Lucene47WordDelimiterFilter(new SingleTokenTokenStream(new Token("(übelkeit", 7, 16)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, flags, null); AssertTokenStreamContents(wdf, new string[] { "übelkeit" }, new int[] { 8 }, new int[] { 16 }); @@ -98,7 +118,12 @@ namespace Lucene.Net.Analysis.Miscellaneous [Test] public virtual void TestOffsetChange4() { - int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.CATENATE_ALL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE; + WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS + | WordDelimiterFlags.GENERATE_NUMBER_PARTS + | WordDelimiterFlags.CATENATE_ALL + | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE + | WordDelimiterFlags.SPLIT_ON_NUMERICS + | WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE; TokenFilter wdf = new Lucene47WordDelimiterFilter(new SingleTokenTokenStream(new Token("(foo,bar)", 7, 16)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, flags, null); AssertTokenStreamContents(wdf, new string[] { "foo", "bar", "foobar" }, new int[] { 8, 12, 8 }, new int[] { 11, 15, 15 }, null, null, null, null, false); @@ -106,7 +131,11 @@ namespace Lucene.Net.Analysis.Miscellaneous public virtual void DoSplit(string input, params string[] output) { - int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE; + WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS + | WordDelimiterFlags.GENERATE_NUMBER_PARTS + | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE + | WordDelimiterFlags.SPLIT_ON_NUMERICS + | WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE; MockTokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.KEYWORD, false); TokenFilter wdf = new Lucene47WordDelimiterFilter(tokenizer, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, flags, null); @@ -151,8 +180,11 @@ namespace Lucene.Net.Analysis.Miscellaneous public virtual void DoSplitPossessive(int stemPossessive, string input, params string[] output) { - int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS; - flags |= (stemPossessive == 1) ? WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE : 0; + WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS + | WordDelimiterFlags.GENERATE_NUMBER_PARTS + | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE + | WordDelimiterFlags.SPLIT_ON_NUMERICS; + flags |= (stemPossessive == 1) ? WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE : 0; MockTokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.KEYWORD, false); TokenFilter wdf = new Lucene47WordDelimiterFilter(tokenizer, flags, null); @@ -206,7 +238,12 @@ namespace Lucene.Net.Analysis.Miscellaneous [Test] public virtual void TestPositionIncrements() { - int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.CATENATE_ALL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE; + WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS + | WordDelimiterFlags.GENERATE_NUMBER_PARTS + | WordDelimiterFlags.CATENATE_ALL + | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE + | WordDelimiterFlags.SPLIT_ON_NUMERICS + | WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE; CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "NUTCH" }, false); /* analyzer that uses whitespace + wdf */ @@ -246,10 +283,10 @@ namespace Lucene.Net.Analysis.Miscellaneous { private readonly TestLucene47WordDelimiterFilter outerInstance; - private int flags; + private WordDelimiterFlags flags; private CharArraySet protWords; - public AnalyzerAnonymousInnerClassHelper(TestLucene47WordDelimiterFilter outerInstance, int flags, CharArraySet protWords) + public AnalyzerAnonymousInnerClassHelper(TestLucene47WordDelimiterFilter outerInstance, WordDelimiterFlags flags, CharArraySet protWords) { this.outerInstance = outerInstance; this.flags = flags; @@ -267,10 +304,10 @@ namespace Lucene.Net.Analysis.Miscellaneous { private readonly TestLucene47WordDelimiterFilter outerInstance; - private int flags; + private WordDelimiterFlags flags; private CharArraySet protWords; - public AnalyzerAnonymousInnerClassHelper2(TestLucene47WordDelimiterFilter outerInstance, int flags, CharArraySet protWords) + public AnalyzerAnonymousInnerClassHelper2(TestLucene47WordDelimiterFilter outerInstance, WordDelimiterFlags flags, CharArraySet protWords) { this.outerInstance = outerInstance; this.flags = flags; @@ -288,10 +325,10 @@ namespace Lucene.Net.Analysis.Miscellaneous { private readonly TestLucene47WordDelimiterFilter outerInstance; - private int flags; + private WordDelimiterFlags flags; private CharArraySet protWords; - public AnalyzerAnonymousInnerClassHelper3(TestLucene47WordDelimiterFilter outerInstance, int flags, CharArraySet protWords) + public AnalyzerAnonymousInnerClassHelper3(TestLucene47WordDelimiterFilter outerInstance, WordDelimiterFlags flags, CharArraySet protWords) { this.outerInstance = outerInstance; this.flags = flags; @@ -314,7 +351,7 @@ namespace Lucene.Net.Analysis.Miscellaneous int numIterations = AtLeast(5); for (int i = 0; i < numIterations; i++) { - int flags = Random().Next(512); + WordDelimiterFlags flags = (WordDelimiterFlags)Random().Next(512); CharArraySet protectedWords; if (Random().nextBoolean()) { @@ -334,10 +371,10 @@ namespace Lucene.Net.Analysis.Miscellaneous { private readonly TestLucene47WordDelimiterFilter outerInstance; - private int flags; + private WordDelimiterFlags flags; private CharArraySet protectedWords; - public AnalyzerAnonymousInnerClassHelper4(TestLucene47WordDelimiterFilter outerInstance, int flags, CharArraySet protectedWords) + public AnalyzerAnonymousInnerClassHelper4(TestLucene47WordDelimiterFilter outerInstance, WordDelimiterFlags flags, CharArraySet protectedWords) { this.outerInstance = outerInstance; this.flags = flags; @@ -357,7 +394,7 @@ namespace Lucene.Net.Analysis.Miscellaneous Random random = Random(); for (int i = 0; i < 512; i++) { - int flags = i; + WordDelimiterFlags flags = (WordDelimiterFlags)i; CharArraySet protectedWords; if (random.nextBoolean()) { @@ -378,10 +415,10 @@ namespace Lucene.Net.Analysis.Miscellaneous { private readonly TestLucene47WordDelimiterFilter outerInstance; - private int flags; + private WordDelimiterFlags flags; private CharArraySet protectedWords; - public AnalyzerAnonymousInnerClassHelper5(TestLucene47WordDelimiterFilter outerInstance, int flags, CharArraySet protectedWords) + public AnalyzerAnonymousInnerClassHelper5(TestLucene47WordDelimiterFilter outerInstance, WordDelimiterFlags flags, CharArraySet protectedWords) { this.outerInstance = outerInstance; this.flags = flags; http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7c29325a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestWordDelimiterFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestWordDelimiterFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestWordDelimiterFilter.cs index ef65752..530682a 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestWordDelimiterFilter.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestWordDelimiterFilter.cs @@ -51,7 +51,12 @@ namespace Lucene.Net.Analysis.Miscellaneous [Test] public virtual void TestOffsets() { - int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.CATENATE_ALL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE; + WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS + | WordDelimiterFlags.GENERATE_NUMBER_PARTS + | WordDelimiterFlags.CATENATE_ALL + | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE + | WordDelimiterFlags.SPLIT_ON_NUMERICS + | WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE; // test that subwords and catenated subwords have // the correct offsets. WordDelimiterFilter wdf = new WordDelimiterFilter(TEST_VERSION_CURRENT, new SingleTokenTokenStream(new Token("foo-bar", 5, 12)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, flags, null); @@ -66,7 +71,12 @@ namespace Lucene.Net.Analysis.Miscellaneous [Test] public virtual void TestOffsetChange() { - int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.CATENATE_ALL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE; + WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS + | WordDelimiterFlags.GENERATE_NUMBER_PARTS + | WordDelimiterFlags.CATENATE_ALL + | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE + | WordDelimiterFlags.SPLIT_ON_NUMERICS + | WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE; WordDelimiterFilter wdf = new WordDelimiterFilter(TEST_VERSION_CURRENT, new SingleTokenTokenStream(new Token("übelkeit)", 7, 16)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, flags, null); AssertTokenStreamContents(wdf, new string[] { "übelkeit" }, new int[] { 7 }, new int[] { 15 }); @@ -75,7 +85,12 @@ namespace Lucene.Net.Analysis.Miscellaneous [Test] public virtual void TestOffsetChange2() { - int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.CATENATE_ALL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE; + WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS + | WordDelimiterFlags.GENERATE_NUMBER_PARTS + | WordDelimiterFlags.CATENATE_ALL + | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE + | WordDelimiterFlags.SPLIT_ON_NUMERICS + | WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE; WordDelimiterFilter wdf = new WordDelimiterFilter(TEST_VERSION_CURRENT, new SingleTokenTokenStream(new Token("(übelkeit", 7, 17)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, flags, null); AssertTokenStreamContents(wdf, new string[] { "übelkeit" }, new int[] { 8 }, new int[] { 17 }); @@ -84,7 +99,12 @@ namespace Lucene.Net.Analysis.Miscellaneous [Test] public virtual void TestOffsetChange3() { - int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.CATENATE_ALL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE; + WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS + | WordDelimiterFlags.GENERATE_NUMBER_PARTS + | WordDelimiterFlags.CATENATE_ALL + | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE + | WordDelimiterFlags.SPLIT_ON_NUMERICS + | WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE; WordDelimiterFilter wdf = new WordDelimiterFilter(TEST_VERSION_CURRENT, new SingleTokenTokenStream(new Token("(übelkeit", 7, 16)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, flags, null); AssertTokenStreamContents(wdf, new string[] { "übelkeit" }, new int[] { 8 }, new int[] { 16 }); @@ -93,7 +113,12 @@ namespace Lucene.Net.Analysis.Miscellaneous [Test] public virtual void TestOffsetChange4() { - int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.CATENATE_ALL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE; + WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS + | WordDelimiterFlags.GENERATE_NUMBER_PARTS + | WordDelimiterFlags.CATENATE_ALL + | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE + | WordDelimiterFlags.SPLIT_ON_NUMERICS + | WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE; WordDelimiterFilter wdf = new WordDelimiterFilter(TEST_VERSION_CURRENT, new SingleTokenTokenStream(new Token("(foo,bar)", 7, 16)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, flags, null); AssertTokenStreamContents(wdf, new string[] { "foo", "foobar", "bar" }, new int[] { 8, 8, 12 }, new int[] { 11, 15, 15 }); @@ -101,7 +126,11 @@ namespace Lucene.Net.Analysis.Miscellaneous public virtual void doSplit(string input, params string[] output) { - int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE; + WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS + | WordDelimiterFlags.GENERATE_NUMBER_PARTS + | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE + | WordDelimiterFlags.SPLIT_ON_NUMERICS + | WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE; WordDelimiterFilter wdf = new WordDelimiterFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader(input), MockTokenizer.KEYWORD, false), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, flags, null); AssertTokenStreamContents(wdf, output); @@ -145,8 +174,11 @@ namespace Lucene.Net.Analysis.Miscellaneous public virtual void doSplitPossessive(int stemPossessive, string input, params string[] output) { - int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS; - flags |= (stemPossessive == 1) ? WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE : 0; + WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS + | WordDelimiterFlags.GENERATE_NUMBER_PARTS + | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE + | WordDelimiterFlags.SPLIT_ON_NUMERICS; + flags |= (stemPossessive == 1) ? WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE : 0; WordDelimiterFilter wdf = new WordDelimiterFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader(input), MockTokenizer.KEYWORD, false), flags, null); AssertTokenStreamContents(wdf, output); @@ -199,7 +231,12 @@ namespace Lucene.Net.Analysis.Miscellaneous [Test] public virtual void TestPositionIncrements() { - int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.CATENATE_ALL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE; + WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS + | WordDelimiterFlags.GENERATE_NUMBER_PARTS + | WordDelimiterFlags.CATENATE_ALL + | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE + | WordDelimiterFlags.SPLIT_ON_NUMERICS + | WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE; CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "NUTCH" }, false); @@ -240,10 +277,10 @@ namespace Lucene.Net.Analysis.Miscellaneous { private readonly TestWordDelimiterFilter outerInstance; - private int flags; + private WordDelimiterFlags flags; private CharArraySet protWords; - public AnalyzerAnonymousInnerClassHelper(TestWordDelimiterFilter outerInstance, int flags, CharArraySet protWords) + public AnalyzerAnonymousInnerClassHelper(TestWordDelimiterFilter outerInstance, WordDelimiterFlags flags, CharArraySet protWords) { this.outerInstance = outerInstance; this.flags = flags; @@ -261,10 +298,10 @@ namespace Lucene.Net.Analysis.Miscellaneous { private readonly TestWordDelimiterFilter outerInstance; - private int flags; + private WordDelimiterFlags flags; private CharArraySet protWords; - public AnalyzerAnonymousInnerClassHelper2(TestWordDelimiterFilter outerInstance, int flags, CharArraySet protWords) + public AnalyzerAnonymousInnerClassHelper2(TestWordDelimiterFilter outerInstance, WordDelimiterFlags flags, CharArraySet protWords) { this.outerInstance = outerInstance; this.flags = flags; @@ -282,10 +319,10 @@ namespace Lucene.Net.Analysis.Miscellaneous { private readonly TestWordDelimiterFilter outerInstance; - private int flags; + private WordDelimiterFlags flags; private CharArraySet protWords; - public AnalyzerAnonymousInnerClassHelper3(TestWordDelimiterFilter outerInstance, int flags, CharArraySet protWords) + public AnalyzerAnonymousInnerClassHelper3(TestWordDelimiterFilter outerInstance, WordDelimiterFlags flags, CharArraySet protWords) { this.outerInstance = outerInstance; this.flags = flags; @@ -305,7 +342,14 @@ namespace Lucene.Net.Analysis.Miscellaneous [Test] public virtual void TestLotsOfConcatenating() { - int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.CATENATE_WORDS | WordDelimiterFilter.CATENATE_NUMBERS | WordDelimiterFilter.CATENATE_ALL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE; + WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS + | WordDelimiterFlags.GENERATE_NUMBER_PARTS + | WordDelimiterFlags.CATENATE_WORDS + | WordDelimiterFlags.CATENATE_NUMBERS + | WordDelimiterFlags.CATENATE_ALL + | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE + | WordDelimiterFlags.SPLIT_ON_NUMERICS + | WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE; /* analyzer that uses whitespace + wdf */ Analyzer a = new AnalyzerAnonymousInnerClassHelper4(this, flags); @@ -317,9 +361,9 @@ namespace Lucene.Net.Analysis.Miscellaneous { private readonly TestWordDelimiterFilter outerInstance; - private int flags; + private WordDelimiterFlags flags; - public AnalyzerAnonymousInnerClassHelper4(TestWordDelimiterFilter outerInstance, int flags) + public AnalyzerAnonymousInnerClassHelper4(TestWordDelimiterFilter outerInstance, WordDelimiterFlags flags) { this.outerInstance = outerInstance; this.flags = flags; @@ -337,7 +381,15 @@ namespace Lucene.Net.Analysis.Miscellaneous [Test] public virtual void TestLotsOfConcatenating2() { - int flags = WordDelimiterFilter.PRESERVE_ORIGINAL | WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.CATENATE_WORDS | WordDelimiterFilter.CATENATE_NUMBERS | WordDelimiterFilter.CATENATE_ALL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE; + WordDelimiterFlags flags = WordDelimiterFlags.PRESERVE_ORIGINAL + | WordDelimiterFlags.GENERATE_WORD_PARTS + | WordDelimiterFlags.GENERATE_NUMBER_PARTS + | WordDelimiterFlags.CATENATE_WORDS + | WordDelimiterFlags.CATENATE_NUMBERS + | WordDelimiterFlags.CATENATE_ALL + | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE + | WordDelimiterFlags.SPLIT_ON_NUMERICS + | WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE; /* analyzer that uses whitespace + wdf */ Analyzer a = new AnalyzerAnonymousInnerClassHelper5(this, flags); @@ -349,9 +401,9 @@ namespace Lucene.Net.Analysis.Miscellaneous { private readonly TestWordDelimiterFilter outerInstance; - private int flags; + private WordDelimiterFlags flags; - public AnalyzerAnonymousInnerClassHelper5(TestWordDelimiterFilter outerInstance, int flags) + public AnalyzerAnonymousInnerClassHelper5(TestWordDelimiterFilter outerInstance, WordDelimiterFlags flags) { this.outerInstance = outerInstance; this.flags = flags; @@ -371,7 +423,7 @@ namespace Lucene.Net.Analysis.Miscellaneous int numIterations = AtLeast(5); for (int i = 0; i < numIterations; i++) { - int flags = Random().Next(512); + WordDelimiterFlags flags = (WordDelimiterFlags)Random().Next(512); CharArraySet protectedWords; if (Random().nextBoolean()) { @@ -391,10 +443,10 @@ namespace Lucene.Net.Analysis.Miscellaneous { private readonly TestWordDelimiterFilter outerInstance; - private int flags; + private WordDelimiterFlags flags; private CharArraySet protectedWords; - public AnalyzerAnonymousInnerClassHelper6(TestWordDelimiterFilter outerInstance, int flags, CharArraySet protectedWords) + public AnalyzerAnonymousInnerClassHelper6(TestWordDelimiterFilter outerInstance, WordDelimiterFlags flags, CharArraySet protectedWords) { this.outerInstance = outerInstance; this.flags = flags; @@ -416,7 +468,7 @@ namespace Lucene.Net.Analysis.Miscellaneous int numIterations = AtLeast(5); for (int i = 0; i < numIterations; i++) { - int flags = Random().Next(512); + WordDelimiterFlags flags = (WordDelimiterFlags)Random().Next(512); CharArraySet protectedWords; if (Random().nextBoolean()) { @@ -436,10 +488,10 @@ namespace Lucene.Net.Analysis.Miscellaneous { private readonly TestWordDelimiterFilter outerInstance; - private int flags; + private WordDelimiterFlags flags; private CharArraySet protectedWords; - public AnalyzerAnonymousInnerClassHelper7(TestWordDelimiterFilter outerInstance, int flags, CharArraySet protectedWords) + public AnalyzerAnonymousInnerClassHelper7(TestWordDelimiterFilter outerInstance, WordDelimiterFlags flags, CharArraySet protectedWords) { this.outerInstance = outerInstance; this.flags = flags; @@ -460,7 +512,7 @@ namespace Lucene.Net.Analysis.Miscellaneous Random random = Random(); for (int i = 0; i < 512; i++) { - int flags = i; + WordDelimiterFlags flags = (WordDelimiterFlags)i; CharArraySet protectedWords; if (random.nextBoolean()) { @@ -481,10 +533,10 @@ namespace Lucene.Net.Analysis.Miscellaneous { private readonly TestWordDelimiterFilter outerInstance; - private int flags; + private WordDelimiterFlags flags; private CharArraySet protectedWords; - public AnalyzerAnonymousInnerClassHelper8(TestWordDelimiterFilter outerInstance, int flags, CharArraySet protectedWords) + public AnalyzerAnonymousInnerClassHelper8(TestWordDelimiterFilter outerInstance, WordDelimiterFlags flags, CharArraySet protectedWords) { this.outerInstance = outerInstance; this.flags = flags;
