Repository: lucenenet Updated Branches: refs/heads/branch_4x f1fbbd9f1 -> ac6215581
Mocks for test/Analysis Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/ac621558 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/ac621558 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/ac621558 Branch: refs/heads/branch_4x Commit: ac6215581b6cdabd9073644fc49a67be5b88a01e Parents: f1fbbd9 Author: synhershko <[email protected]> Authored: Tue Apr 8 01:48:01 2014 +0300 Committer: synhershko <[email protected]> Committed: Tue Apr 8 01:48:01 2014 +0300 ---------------------------------------------------------------------- src/core/Support/Arrays.cs | 5 + test/test-framework/Analysis/MockAnalyzer.cs | 138 +++++++++++++++++++ .../Analysis/MockFixedLengthPayloadFilter.cs | 47 +++++++ test/test-framework/Analysis/MockTokenFilter.cs | 97 +++++++++++++ .../Analysis/MockVariableLengthPayloadFilter.cs | 43 ++++++ .../Lucene.Net.TestFramework.csproj | 3 + 6 files changed, 333 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ac621558/src/core/Support/Arrays.cs ---------------------------------------------------------------------- diff --git a/src/core/Support/Arrays.cs b/src/core/Support/Arrays.cs index 99c6b24..732f4aa 100644 --- a/src/core/Support/Arrays.cs +++ b/src/core/Support/Arrays.cs @@ -113,5 +113,10 @@ namespace Lucene.Net.Support return hashCode; } + + public static List<T> asList<T>(params T[] objects) + { + return objects.ToList(); + } } } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ac621558/test/test-framework/Analysis/MockAnalyzer.cs ---------------------------------------------------------------------- diff --git a/test/test-framework/Analysis/MockAnalyzer.cs b/test/test-framework/Analysis/MockAnalyzer.cs new file mode 100644 index 0000000..62e4a01 --- /dev/null +++ b/test/test-framework/Analysis/MockAnalyzer.cs @@ -0,0 +1,138 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using Lucene.Net.Support; +using Lucene.Net.Util.Automaton; + +namespace Lucene.Net.Analysis +{ +/** + * Analyzer for testing + * <p> + * This analyzer is a replacement for Whitespace/Simple/KeywordAnalyzers + * for unit tests. If you are testing a custom component such as a queryparser + * or analyzer-wrapper that consumes analysis streams, its a great idea to test + * it with this analyzer instead. MockAnalyzer has the following behavior: + * <ul> + * <li>By default, the assertions in {@link MockTokenizer} are turned on for extra + * checks that the consumer is consuming properly. These checks can be disabled + * with {@link #setEnableChecks(boolean)}. + * <li>Payload data is randomly injected into the stream for more thorough testing + * of payloads. + * </ul> + * @see MockTokenizer + */ +public class MockAnalyzer : Analyzer { + private CharacterRunAutomaton runAutomaton; + private bool lowerCase; + private CharacterRunAutomaton filter; + private bool enablePositionIncrements; + private int positionIncrementGap; + private Random random; + private HashMap<String, int> previousMappings = new HashMap<String,int>(); + private bool enableChecks = true; + private int maxTokenLength = MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH; + + /** + * Creates a new MockAnalyzer. + * + * @param random Random for payloads behavior + * @param runAutomaton DFA describing how tokenization should happen (e.g. [a-zA-Z]+) + * @param lowerCase true if the tokenizer should lowercase terms + * @param filter DFA describing how terms should be filtered (set of stopwords, etc) + * @param enablePositionIncrements true if position increments should reflect filtered terms. + */ + public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, bool lowerCase, CharacterRunAutomaton filter, bool enablePositionIncrements) : base(new PerFieldReuseStrategy()) { + // TODO: this should be solved in a different way; Random should not be shared (!). + this.random = new Random(random.nextLong()); + this.runAutomaton = runAutomaton; + this.lowerCase = lowerCase; + this.filter = filter; + this.enablePositionIncrements = enablePositionIncrements; + } + + /** + * Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean) + * MockAnalyzer(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false}). + */ + public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, bool lowerCase) : + this(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, true){ + } + + /** + * Create a Whitespace-lowercasing analyzer with no stopwords removal. + * <p> + * Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean) + * MockAnalyzer(random, MockTokenizer.WHITESPACE, true, MockTokenFilter.EMPTY_STOPSET, false}). + */ + public MockAnalyzer(Random random): + this(random, MockTokenizer.WHITESPACE, true){ + } + + public override TokenStreamComponents CreateComponents(String fieldName, TextReader reader) { + MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase, maxTokenLength); + tokenizer.setEnableChecks(enableChecks); + MockTokenFilter filt = new MockTokenFilter(tokenizer, filter); + filt.setEnablePositionIncrements(enablePositionIncrements); + return new TokenStreamComponents(tokenizer, maybePayload(filt, fieldName)); + } + + // TODO synchronized + private TokenFilter maybePayload(TokenFilter stream, String fieldName) { + var val = previousMappings.Get(fieldName); + if (val == null) { + val = -1; // no payloads + if (LuceneTestCase.rarely(random)) { + switch(random.nextInt(3)) { + case 0: val = -1; // no payloads + break; + case 1: val = int.MaxValue; // variable length payload + break; + case 2: val = random.Next(0, 12); // fixed length payload + break; + } + } + if (LuceneTestCase.VERBOSE) { + if (val == int.MaxValue) { + Console.WriteLine("MockAnalyzer: field=" + fieldName + " gets variable length payloads"); + } else if (val != -1) { + Console.WriteLine("MockAnalyzer: field=" + fieldName + " gets fixed length=" + val + " payloads"); + } + } + previousMappings.put(fieldName, val); // save it so we are consistent for this field + } + + if (val == -1) + return stream; + else if (val == int.MaxValue) + return new MockVariableLengthPayloadFilter(random, stream); + else + return new MockFixedLengthPayloadFilter(random, stream, val); + } + + public void SetPositionIncrementGap(int positionIncrementGap){ + this.positionIncrementGap = positionIncrementGap; + } + + public override int GetPositionIncrementGap(String fieldName){ + return positionIncrementGap; + } + + /** + * Toggle consumer workflow checking: if your test consumes tokenstreams normally you + * should leave this enabled. + */ + public void setEnableChecks(bool enableChecks) { + this.enableChecks = enableChecks; + } + + /** + * Toggle maxTokenLength for MockTokenizer + */ + public void setMaxTokenLength(int length) { + this.maxTokenLength = length; + } +} +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ac621558/test/test-framework/Analysis/MockFixedLengthPayloadFilter.cs ---------------------------------------------------------------------- diff --git a/test/test-framework/Analysis/MockFixedLengthPayloadFilter.cs b/test/test-framework/Analysis/MockFixedLengthPayloadFilter.cs new file mode 100644 index 0000000..c6e7069 --- /dev/null +++ b/test/test-framework/Analysis/MockFixedLengthPayloadFilter.cs @@ -0,0 +1,47 @@ +using System; +using Lucene.Net.Analysis.Tokenattributes; +using Lucene.Net.Util; + +namespace Lucene.Net.Analysis +{ + /** + * TokenFilter that adds random fixed-length payloads. + */ + public class MockFixedLengthPayloadFilter : TokenFilter + { + private readonly PayloadAttribute payloadAtt; + private Random random; + private sbyte[] bytes; + private BytesRef payload; + + public MockFixedLengthPayloadFilter(Random random, TokenStream ts, int length) + : base(ts) + { + if (length < 0) + { + throw new ArgumentException("length must be >= 0"); + } + this.random = random; + this.bytes = new sbyte[length]; + this.payload = new BytesRef(bytes); + + payloadAtt = AddAttribute<PayloadAttribute>(); + } + + public override bool IncrementToken() + { + if (input.IncrementToken()) + { + byte[] b = new byte[bytes.Length]; + random.NextBytes(b); + Buffer.BlockCopy(b, 0, bytes, 0, b.Length); + payloadAtt.Payload = payload; + return true; + } + else + { + return false; + } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ac621558/test/test-framework/Analysis/MockTokenFilter.cs ---------------------------------------------------------------------- diff --git a/test/test-framework/Analysis/MockTokenFilter.cs b/test/test-framework/Analysis/MockTokenFilter.cs new file mode 100644 index 0000000..0e39e45 --- /dev/null +++ b/test/test-framework/Analysis/MockTokenFilter.cs @@ -0,0 +1,97 @@ +using Lucene.Net.Analysis.Tokenattributes; +using Lucene.Net.Support; +using Lucene.Net.Util.Automaton; + +namespace Lucene.Net.Analysis +{ +/** + * A tokenfilter for testing that removes terms accepted by a DFA. + * <ul> + * <li>Union a list of singletons to act like a stopfilter. + * <li>Use the complement to act like a keepwordfilter + * <li>Use a regex like <code>.{12,}</code> to act like a lengthfilter + * </ul> + */ + internal class MockTokenFilter : TokenFilter + { + /** Empty set of stopwords */ + + public static CharacterRunAutomaton EMPTY_STOPSET = + new CharacterRunAutomaton(BasicAutomata.MakeEmpty()); + + /** Set of common english stopwords */ + + public static CharacterRunAutomaton ENGLISH_STOPSET = + new CharacterRunAutomaton(BasicOperations.Union(Arrays.asList<Automaton>( + makeString("a"), makeString("an"), makeString("and"), makeString("are"), + makeString("as"), makeString("at"), makeString("be"), makeString("but"), + makeString("by"), makeString("for"), makeString("if"), makeString("in"), + makeString("into"), makeString("is"), makeString("it"), makeString("no"), + makeString("not"), makeString("of"), makeString("on"), makeString("or"), + makeString("such"), makeString("that"), makeString("the"), makeString("their"), + makeString("then"), makeString("there"), makeString("these"), makeString("they"), + makeString("this"), makeString("to"), makeString("was"), makeString("will"), + makeString("with")))); + + private static Automaton makeString(string an) + { + return BasicAutomata.MakeString(an); + } + + private CharacterRunAutomaton filter; + private bool enablePositionIncrements = true; + + private readonly CharTermAttribute termAtt; + private readonly PositionIncrementAttribute posIncrAtt; + + public MockTokenFilter(TokenStream input, CharacterRunAutomaton filter):base(input) + { + this.filter = filter; + termAtt = AddAttribute<CharTermAttribute>(); + posIncrAtt = AddAttribute<PositionIncrementAttribute>(); + } + + public override bool IncrementToken() + { + // TODO: fix me when posInc=false, to work like FilteringTokenFilter in that case and not return + // initial token with posInc=0 ever + + // return the first non-stop word found + int skippedPositions = 0; + while (input.IncrementToken()) + { + if (!filter.Run(termAtt.Buffer, 0, termAtt.Length)) + { + if (enablePositionIncrements) + { + posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions; + } + return true; + } + skippedPositions += posIncrAtt.PositionIncrement; + } + // reached EOS -- return false + return false; + } + + /** + * @see #setEnablePositionIncrements(boolean) + */ + + public bool getEnablePositionIncrements() + { + return enablePositionIncrements; + } + + /** + * If <code>true</code>, this Filter will preserve + * positions of the incoming tokens (ie, accumulate and + * set position increments of the removed stop tokens). + */ + + public void setEnablePositionIncrements(bool enable) + { + this.enablePositionIncrements = enable; + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ac621558/test/test-framework/Analysis/MockVariableLengthPayloadFilter.cs ---------------------------------------------------------------------- diff --git a/test/test-framework/Analysis/MockVariableLengthPayloadFilter.cs b/test/test-framework/Analysis/MockVariableLengthPayloadFilter.cs new file mode 100644 index 0000000..fb1730d --- /dev/null +++ b/test/test-framework/Analysis/MockVariableLengthPayloadFilter.cs @@ -0,0 +1,43 @@ +using System; +using Lucene.Net.Analysis.Tokenattributes; +using Lucene.Net.Util; + +namespace Lucene.Net.Analysis +{ + /** + * TokenFilter that adds random variable-length payloads. + */ + public class MockVariableLengthPayloadFilter : TokenFilter + { + private static int MAXLENGTH = 129; + + private readonly PayloadAttribute payloadAtt; + private Random random; + private sbyte[] bytes = new sbyte[MAXLENGTH]; + private BytesRef payload; + + public MockVariableLengthPayloadFilter(Random random, TokenStream ts) + : base(ts) + { + this.random = random; + this.payload = new BytesRef(bytes); + } + + public override bool IncrementToken() + { + if (input.IncrementToken()) + { + byte[] b = new byte[MAXLENGTH]; + random.NextBytes(b); + Buffer.BlockCopy(b, 0, bytes, 0, b.Length); + payload.length = random.Next(MAXLENGTH); + payloadAtt.Payload = payload; + return true; + } + else + { + return false; + } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ac621558/test/test-framework/Lucene.Net.TestFramework.csproj ---------------------------------------------------------------------- diff --git a/test/test-framework/Lucene.Net.TestFramework.csproj b/test/test-framework/Lucene.Net.TestFramework.csproj index 6b31aaa..b0c3d77 100644 --- a/test/test-framework/Lucene.Net.TestFramework.csproj +++ b/test/test-framework/Lucene.Net.TestFramework.csproj @@ -56,7 +56,10 @@ <Reference Include="System.Xml" /> </ItemGroup> <ItemGroup> + <Compile Include="Analysis\MockFixedLengthPayloadFilter.cs" /> + <Compile Include="Analysis\MockTokenFilter.cs" /> <Compile Include="Analysis\MockTokenizer.cs" /> + <Compile Include="Analysis\MockVariableLengthPayloadFilter.cs" /> <Compile Include="JavaCompatibility\LuceneTestCase.cs" /> <Compile Include="JavaCompatibility\LuceneTypesHelpers.cs" /> <Compile Include="JavaCompatibility\SystemTypesHelpers.cs" />
