http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/Spans/TestSpans.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/Spans/TestSpans.cs b/src/Lucene.Net.Tests/Search/Spans/TestSpans.cs new file mode 100644 index 0000000..651bfcc --- /dev/null +++ b/src/Lucene.Net.Tests/Search/Spans/TestSpans.cs @@ -0,0 +1,571 @@ +using System.Collections.Generic; +using Lucene.Net.Documents; + +namespace Lucene.Net.Search.Spans +{ + using Lucene.Net.Index; + using NUnit.Framework; + using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext; + using DefaultSimilarity = Lucene.Net.Search.Similarities.DefaultSimilarity; + using Directory = Lucene.Net.Store.Directory; + using DirectoryReader = Lucene.Net.Index.DirectoryReader; + using Document = Documents.Document; + using Field = Field; + using IndexReader = Lucene.Net.Index.IndexReader; + using IndexReaderContext = Lucene.Net.Index.IndexReaderContext; + using IndexWriter = Lucene.Net.Index.IndexWriter; + using IndexWriterConfig = Lucene.Net.Index.IndexWriterConfig; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter; + using ReaderUtil = Lucene.Net.Index.ReaderUtil; + using Similarity = Lucene.Net.Search.Similarities.Similarity; + using Term = Lucene.Net.Index.Term; + + [TestFixture] + public class TestSpans : LuceneTestCase + { + private IndexSearcher Searcher; + private IndexReader Reader; + private Directory Directory; + + public const string field = "field"; + + [SetUp] + public override void SetUp() + { + base.SetUp(); + Directory = NewDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); + for (int i = 0; i < DocFields.Length; i++) + { + Document doc = new Document(); + doc.Add(NewTextField(field, DocFields[i], Field.Store.YES)); + writer.AddDocument(doc); + } + Reader = writer.Reader; + writer.Dispose(); + Searcher = NewSearcher(Reader); + } + + [TearDown] + public override void TearDown() + { + Reader.Dispose(); + Directory.Dispose(); + base.TearDown(); + } + + private string[] DocFields = new string[] { "w1 w2 w3 w4 w5", "w1 w3 w2 w3", "w1 xx w2 yy w3", "w1 w3 xx w2 yy w3", "u2 u2 u1", "u2 xx u2 u1", "u2 u2 xx u1", "u2 xx u2 yy u1", "u2 xx u1 u2", "u2 u1 xx u2", "u1 u2 xx u2", "t1 t2 t1 t3 t2 t3", "s2 s1 s1 xx xx s2 xx s2 xx s1 xx xx xx xx xx s2 xx" }; + + public virtual SpanTermQuery MakeSpanTermQuery(string text) + { + return new SpanTermQuery(new Term(field, text)); + } + + private void CheckHits(Query query, int[] results) + { + Search.CheckHits.DoCheckHits(Random(), query, field, Searcher, results, Similarity); + } + + private void OrderedSlopTest3SQ(SpanQuery q1, SpanQuery q2, SpanQuery q3, int slop, int[] expectedDocs) + { + bool ordered = true; + SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { q1, q2, q3 }, slop, ordered); + CheckHits(snq, expectedDocs); + } + + public virtual void OrderedSlopTest3(int slop, int[] expectedDocs) + { + OrderedSlopTest3SQ(MakeSpanTermQuery("w1"), MakeSpanTermQuery("w2"), MakeSpanTermQuery("w3"), slop, expectedDocs); + } + + public virtual void OrderedSlopTest3Equal(int slop, int[] expectedDocs) + { + OrderedSlopTest3SQ(MakeSpanTermQuery("w1"), MakeSpanTermQuery("w3"), MakeSpanTermQuery("w3"), slop, expectedDocs); + } + + public virtual void OrderedSlopTest1Equal(int slop, int[] expectedDocs) + { + OrderedSlopTest3SQ(MakeSpanTermQuery("u2"), MakeSpanTermQuery("u2"), MakeSpanTermQuery("u1"), slop, expectedDocs); + } + + [Test] + public virtual void TestSpanNearOrdered01() + { + OrderedSlopTest3(0, new int[] { 0 }); + } + + [Test] + public virtual void TestSpanNearOrdered02() + { + OrderedSlopTest3(1, new int[] { 0, 1 }); + } + + [Test] + public virtual void TestSpanNearOrdered03() + { + OrderedSlopTest3(2, new int[] { 0, 1, 2 }); + } + + [Test] + public virtual void TestSpanNearOrdered04() + { + OrderedSlopTest3(3, new int[] { 0, 1, 2, 3 }); + } + + [Test] + public virtual void TestSpanNearOrdered05() + { + OrderedSlopTest3(4, new int[] { 0, 1, 2, 3 }); + } + + [Test] + public virtual void TestSpanNearOrderedEqual01() + { + OrderedSlopTest3Equal(0, new int[] { }); + } + + [Test] + public virtual void TestSpanNearOrderedEqual02() + { + OrderedSlopTest3Equal(1, new int[] { 1 }); + } + + [Test] + public virtual void TestSpanNearOrderedEqual03() + { + OrderedSlopTest3Equal(2, new int[] { 1 }); + } + + [Test] + public virtual void TestSpanNearOrderedEqual04() + { + OrderedSlopTest3Equal(3, new int[] { 1, 3 }); + } + + [Test] + public virtual void TestSpanNearOrderedEqual11() + { + OrderedSlopTest1Equal(0, new int[] { 4 }); + } + + [Test] + public virtual void TestSpanNearOrderedEqual12() + { + OrderedSlopTest1Equal(0, new int[] { 4 }); + } + + [Test] + public virtual void TestSpanNearOrderedEqual13() + { + OrderedSlopTest1Equal(1, new int[] { 4, 5, 6 }); + } + + [Test] + public virtual void TestSpanNearOrderedEqual14() + { + OrderedSlopTest1Equal(2, new int[] { 4, 5, 6, 7 }); + } + + [Test] + public virtual void TestSpanNearOrderedEqual15() + { + OrderedSlopTest1Equal(3, new int[] { 4, 5, 6, 7 }); + } + + [Test] + public virtual void TestSpanNearOrderedOverlap() + { + bool ordered = true; + int slop = 1; + SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { MakeSpanTermQuery("t1"), MakeSpanTermQuery("t2"), MakeSpanTermQuery("t3") }, slop, ordered); + Spans spans = MultiSpansWrapper.Wrap(Searcher.TopReaderContext, snq); + + Assert.IsTrue(spans.Next(), "first range"); + Assert.AreEqual(11, spans.Doc, "first doc"); + Assert.AreEqual(0, spans.Start, "first start"); + Assert.AreEqual(4, spans.End, "first end"); + + Assert.IsTrue(spans.Next(), "second range"); + Assert.AreEqual(11, spans.Doc, "second doc"); + Assert.AreEqual(2, spans.Start, "second start"); + Assert.AreEqual(6, spans.End, "second end"); + + Assert.IsFalse(spans.Next(), "third range"); + } + + [Test] + public virtual void TestSpanNearUnOrdered() + { + //See http://www.gossamer-threads.com/lists/lucene/java-dev/52270 for discussion about this test + SpanNearQuery snq; + snq = new SpanNearQuery(new SpanQuery[] { MakeSpanTermQuery("u1"), MakeSpanTermQuery("u2") }, 0, false); + Spans spans = MultiSpansWrapper.Wrap(Searcher.TopReaderContext, snq); + Assert.IsTrue(spans.Next(), "Does not have next and it should"); + Assert.AreEqual(4, spans.Doc, "doc"); + Assert.AreEqual(1, spans.Start, "start"); + Assert.AreEqual(3, spans.End, "end"); + + Assert.IsTrue(spans.Next(), "Does not have next and it should"); + Assert.AreEqual(5, spans.Doc, "doc"); + Assert.AreEqual(2, spans.Start, "start"); + Assert.AreEqual(4, spans.End, "end"); + + Assert.IsTrue(spans.Next(), "Does not have next and it should"); + Assert.AreEqual(8, spans.Doc, "doc"); + Assert.AreEqual(2, spans.Start, "start"); + Assert.AreEqual(4, spans.End, "end"); + + Assert.IsTrue(spans.Next(), "Does not have next and it should"); + Assert.AreEqual(9, spans.Doc, "doc"); + Assert.AreEqual(0, spans.Start, "start"); + Assert.AreEqual(2, spans.End, "end"); + + Assert.IsTrue(spans.Next(), "Does not have next and it should"); + Assert.AreEqual(10, spans.Doc, "doc"); + Assert.AreEqual(0, spans.Start, "start"); + Assert.AreEqual(2, spans.End, "end"); + Assert.IsTrue(spans.Next() == false, "Has next and it shouldn't: " + spans.Doc); + + SpanNearQuery u1u2 = new SpanNearQuery(new SpanQuery[] { MakeSpanTermQuery("u1"), MakeSpanTermQuery("u2") }, 0, false); + snq = new SpanNearQuery(new SpanQuery[] { u1u2, MakeSpanTermQuery("u2") }, 1, false); + spans = MultiSpansWrapper.Wrap(Searcher.TopReaderContext, snq); + Assert.IsTrue(spans.Next(), "Does not have next and it should"); + Assert.AreEqual(4, spans.Doc, "doc"); + Assert.AreEqual(0, spans.Start, "start"); + Assert.AreEqual(3, spans.End, "end"); + + Assert.IsTrue(spans.Next(), "Does not have next and it should"); + //unordered spans can be subsets + Assert.AreEqual(4, spans.Doc, "doc"); + Assert.AreEqual(1, spans.Start, "start"); + Assert.AreEqual(3, spans.End, "end"); + + Assert.IsTrue(spans.Next(), "Does not have next and it should"); + Assert.AreEqual(5, spans.Doc, "doc"); + Assert.AreEqual(0, spans.Start, "start"); + Assert.AreEqual(4, spans.End, "end"); + + Assert.IsTrue(spans.Next(), "Does not have next and it should"); + Assert.AreEqual(5, spans.Doc, "doc"); + Assert.AreEqual(2, spans.Start, "start"); + Assert.AreEqual(4, spans.End, "end"); + + Assert.IsTrue(spans.Next(), "Does not have next and it should"); + Assert.AreEqual(8, spans.Doc, "doc"); + Assert.AreEqual(0, spans.Start, "start"); + Assert.AreEqual(4, spans.End, "end"); + + Assert.IsTrue(spans.Next(), "Does not have next and it should"); + Assert.AreEqual(8, spans.Doc, "doc"); + Assert.AreEqual(2, spans.Start, "start"); + Assert.AreEqual(4, spans.End, "end"); + + Assert.IsTrue(spans.Next(), "Does not have next and it should"); + Assert.AreEqual(9, spans.Doc, "doc"); + Assert.AreEqual(0, spans.Start, "start"); + Assert.AreEqual(2, spans.End, "end"); + + Assert.IsTrue(spans.Next(), "Does not have next and it should"); + Assert.AreEqual(9, spans.Doc, "doc"); + Assert.AreEqual(0, spans.Start, "start"); + Assert.AreEqual(4, spans.End, "end"); + + Assert.IsTrue(spans.Next(), "Does not have next and it should"); + Assert.AreEqual(10, spans.Doc, "doc"); + Assert.AreEqual(0, spans.Start, "start"); + Assert.AreEqual(2, spans.End, "end"); + + Assert.IsTrue(spans.Next() == false, "Has next and it shouldn't"); + } + + private Spans OrSpans(string[] terms) + { + SpanQuery[] sqa = new SpanQuery[terms.Length]; + for (int i = 0; i < terms.Length; i++) + { + sqa[i] = MakeSpanTermQuery(terms[i]); + } + return MultiSpansWrapper.Wrap(Searcher.TopReaderContext, new SpanOrQuery(sqa)); + } + + private void TstNextSpans(Spans spans, int doc, int start, int end) + { + Assert.IsTrue(spans.Next(), "next"); + Assert.AreEqual(doc, spans.Doc, "doc"); + Assert.AreEqual(start, spans.Start, "start"); + Assert.AreEqual(end, spans.End, "end"); + } + + [Test] + public virtual void TestSpanOrEmpty() + { + Spans spans = OrSpans(new string[0]); + Assert.IsFalse(spans.Next(), "empty next"); + + SpanOrQuery a = new SpanOrQuery(); + SpanOrQuery b = new SpanOrQuery(); + Assert.IsTrue(a.Equals(b), "empty should equal"); + } + + [Test] + public virtual void TestSpanOrSingle() + { + Spans spans = OrSpans(new string[] { "w5" }); + TstNextSpans(spans, 0, 4, 5); + Assert.IsFalse(spans.Next(), "final next"); + } + + [Test] + public virtual void TestSpanOrMovesForward() + { + Spans spans = OrSpans(new string[] { "w1", "xx" }); + + spans.Next(); + int doc = spans.Doc; + Assert.AreEqual(0, doc); + + spans.SkipTo(0); + doc = spans.Doc; + + // LUCENE-1583: + // according to Spans, a skipTo to the same doc or less + // should still call next() on the underlying Spans + Assert.AreEqual(1, doc); + } + + [Test] + public virtual void TestSpanOrDouble() + { + Spans spans = OrSpans(new string[] { "w5", "yy" }); + TstNextSpans(spans, 0, 4, 5); + TstNextSpans(spans, 2, 3, 4); + TstNextSpans(spans, 3, 4, 5); + TstNextSpans(spans, 7, 3, 4); + Assert.IsFalse(spans.Next(), "final next"); + } + + [Test] + public virtual void TestSpanOrDoubleSkip() + { + Spans spans = OrSpans(new string[] { "w5", "yy" }); + Assert.IsTrue(spans.SkipTo(3), "initial skipTo"); + Assert.AreEqual(3, spans.Doc, "doc"); + Assert.AreEqual(4, spans.Start, "start"); + Assert.AreEqual(5, spans.End, "end"); + TstNextSpans(spans, 7, 3, 4); + Assert.IsFalse(spans.Next(), "final next"); + } + + [Test] + public virtual void TestSpanOrUnused() + { + Spans spans = OrSpans(new string[] { "w5", "unusedTerm", "yy" }); + TstNextSpans(spans, 0, 4, 5); + TstNextSpans(spans, 2, 3, 4); + TstNextSpans(spans, 3, 4, 5); + TstNextSpans(spans, 7, 3, 4); + Assert.IsFalse(spans.Next(), "final next"); + } + + [Test] + public virtual void TestSpanOrTripleSameDoc() + { + Spans spans = OrSpans(new string[] { "t1", "t2", "t3" }); + TstNextSpans(spans, 11, 0, 1); + TstNextSpans(spans, 11, 1, 2); + TstNextSpans(spans, 11, 2, 3); + TstNextSpans(spans, 11, 3, 4); + TstNextSpans(spans, 11, 4, 5); + TstNextSpans(spans, 11, 5, 6); + Assert.IsFalse(spans.Next(), "final next"); + } + + [Test] + public virtual void TestSpanScorerZeroSloppyFreq() + { + bool ordered = true; + int slop = 1; + IndexReaderContext topReaderContext = Searcher.TopReaderContext; + IList<AtomicReaderContext> leaves = topReaderContext.Leaves; + int subIndex = ReaderUtil.SubIndex(11, leaves); + for (int i = 0, c = leaves.Count; i < c; i++) + { + AtomicReaderContext ctx = leaves[i]; + + Similarity sim = new DefaultSimilarityAnonymousInnerClassHelper(this); + + Similarity oldSim = Searcher.Similarity; + Scorer spanScorer; + try + { + Searcher.Similarity = sim; + SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { MakeSpanTermQuery("t1"), MakeSpanTermQuery("t2") }, slop, ordered); + + spanScorer = Searcher.CreateNormalizedWeight(snq).GetScorer(ctx, ((AtomicReader)ctx.Reader).LiveDocs); + } + finally + { + Searcher.Similarity = oldSim; + } + if (i == subIndex) + { + Assert.IsTrue(spanScorer.NextDoc() != DocIdSetIterator.NO_MORE_DOCS, "first doc"); + Assert.AreEqual(spanScorer.DocID + ctx.DocBase, 11, "first doc number"); + float score = spanScorer.GetScore(); + Assert.IsTrue(score == 0.0f, "first doc score should be zero, " + score); + } + else + { + Assert.IsTrue(spanScorer.NextDoc() == DocIdSetIterator.NO_MORE_DOCS, "no second doc"); + } + } + } + + private class DefaultSimilarityAnonymousInnerClassHelper : DefaultSimilarity + { + private readonly TestSpans OuterInstance; + + public DefaultSimilarityAnonymousInnerClassHelper(TestSpans outerInstance) + { + this.OuterInstance = outerInstance; + } + + public override float SloppyFreq(int distance) + { + return 0.0f; + } + } + + // LUCENE-1404 + private void AddDoc(IndexWriter writer, string id, string text) + { + Document doc = new Document(); + doc.Add(NewStringField("id", id, Field.Store.YES)); + doc.Add(NewTextField("text", text, Field.Store.YES)); + writer.AddDocument(doc); + } + + // LUCENE-1404 + private int HitCount(IndexSearcher searcher, string word) + { + return searcher.Search(new TermQuery(new Term("text", word)), 10).TotalHits; + } + + // LUCENE-1404 + private SpanQuery CreateSpan(string value) + { + return new SpanTermQuery(new Term("text", value)); + } + + // LUCENE-1404 + private SpanQuery CreateSpan(int slop, bool ordered, SpanQuery[] clauses) + { + return new SpanNearQuery(clauses, slop, ordered); + } + + // LUCENE-1404 + private SpanQuery CreateSpan(int slop, bool ordered, string term1, string term2) + { + return CreateSpan(slop, ordered, new SpanQuery[] { CreateSpan(term1), CreateSpan(term2) }); + } + + // LUCENE-1404 + [Test] + public virtual void TestNPESpanQuery() + { + Directory dir = NewDirectory(); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); + + // Add documents + AddDoc(writer, "1", "the big dogs went running to the market"); + AddDoc(writer, "2", "the cat chased the mouse, then the cat ate the mouse quickly"); + + // Commit + writer.Dispose(); + + // Get searcher + IndexReader reader = DirectoryReader.Open(dir); + IndexSearcher searcher = NewSearcher(reader); + + // Control (make sure docs indexed) + Assert.AreEqual(2, HitCount(searcher, "the")); + Assert.AreEqual(1, HitCount(searcher, "cat")); + Assert.AreEqual(1, HitCount(searcher, "dogs")); + Assert.AreEqual(0, HitCount(searcher, "rabbit")); + + // this throws exception (it shouldn't) + Assert.AreEqual(1, searcher.Search(CreateSpan(0, true, new SpanQuery[] { CreateSpan(4, false, "chased", "cat"), CreateSpan("ate") }), 10).TotalHits); + reader.Dispose(); + dir.Dispose(); + } + + [Test] + public virtual void TestSpanNots() + { + Assert.AreEqual(0, SpanCount("s2", "s2", 0, 0), 0, "SpanNotIncludeExcludeSame1"); + Assert.AreEqual(0, SpanCount("s2", "s2", 10, 10), 0, "SpanNotIncludeExcludeSame2"); + + //focus on behind + Assert.AreEqual(1, SpanCount("s2", "s1", 6, 0), "SpanNotS2NotS1_6_0"); + Assert.AreEqual(2, SpanCount("s2", "s1", 5, 0), "SpanNotS2NotS1_5_0"); + Assert.AreEqual(3, SpanCount("s2", "s1", 3, 0), "SpanNotS2NotS1_3_0"); + Assert.AreEqual(4, SpanCount("s2", "s1", 2, 0), "SpanNotS2NotS1_2_0"); + Assert.AreEqual(4, SpanCount("s2", "s1", 0, 0), "SpanNotS2NotS1_0_0"); + + //focus on both + Assert.AreEqual(2, SpanCount("s2", "s1", 3, 1), "SpanNotS2NotS1_3_1"); + Assert.AreEqual(3, SpanCount("s2", "s1", 2, 1), "SpanNotS2NotS1_2_1"); + Assert.AreEqual(3, SpanCount("s2", "s1", 1, 1), "SpanNotS2NotS1_1_1"); + Assert.AreEqual(0, SpanCount("s2", "s1", 10, 10), "SpanNotS2NotS1_10_10"); + + //focus on ahead + Assert.AreEqual(0, SpanCount("s1", "s2", 10, 10), "SpanNotS1NotS2_10_10"); + Assert.AreEqual(3, SpanCount("s1", "s2", 0, 1), "SpanNotS1NotS2_0_1"); + Assert.AreEqual(3, SpanCount("s1", "s2", 0, 2), "SpanNotS1NotS2_0_2"); + Assert.AreEqual(2, SpanCount("s1", "s2", 0, 3), "SpanNotS1NotS2_0_3"); + Assert.AreEqual(1, SpanCount("s1", "s2", 0, 4), "SpanNotS1NotS2_0_4"); + Assert.AreEqual(0, SpanCount("s1", "s2", 0, 8), "SpanNotS1NotS2_0_8"); + + //exclude doesn't exist + Assert.AreEqual(3, SpanCount("s1", "s3", 8, 8), "SpanNotS1NotS3_8_8"); + + //include doesn't exist + Assert.AreEqual(0, SpanCount("s3", "s1", 8, 8), "SpanNotS3NotS1_8_8"); + } + + private int SpanCount(string include, string exclude, int pre, int post) + { + SpanTermQuery iq = new SpanTermQuery(new Term(field, include)); + SpanTermQuery eq = new SpanTermQuery(new Term(field, exclude)); + SpanNotQuery snq = new SpanNotQuery(iq, eq, pre, post); + Spans spans = MultiSpansWrapper.Wrap(Searcher.TopReaderContext, snq); + + int i = 0; + while (spans.Next()) + { + i++; + } + return i; + } + } +} \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/Spans/TestSpansAdvanced.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/Spans/TestSpansAdvanced.cs b/src/Lucene.Net.Tests/Search/Spans/TestSpansAdvanced.cs new file mode 100644 index 0000000..fc32eaa --- /dev/null +++ b/src/Lucene.Net.Tests/Search/Spans/TestSpansAdvanced.cs @@ -0,0 +1,181 @@ +using System; +using Lucene.Net.Documents; + +namespace Lucene.Net.Search.Spans +{ + using Lucene.Net.Search; + using NUnit.Framework; + using DefaultSimilarity = Lucene.Net.Search.Similarities.DefaultSimilarity; + using Directory = Lucene.Net.Store.Directory; + using Document = Documents.Document; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Field = Field; + using IndexReader = Lucene.Net.Index.IndexReader; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using MockTokenFilter = Lucene.Net.Analysis.MockTokenFilter; + using MockTokenizer = Lucene.Net.Analysis.MockTokenizer; + using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter; + using Term = Lucene.Net.Index.Term; + + /// <summary> + ///***************************************************************************** + /// Tests the span query bug in Lucene. It demonstrates that SpanTermQuerys don't + /// work correctly in a BooleanQuery. + /// + /// </summary> + [TestFixture] + public class TestSpansAdvanced : LuceneTestCase + { + // location to the index + protected internal Directory MDirectory; + + protected internal IndexReader Reader; + protected internal IndexSearcher Searcher; + + // field names in the index + private const string FIELD_ID = "ID"; + + protected internal const string FIELD_TEXT = "TEXT"; + + /// <summary> + /// Initializes the tests by adding 4 identical documents to the index. + /// </summary> + [SetUp] + public override void SetUp() + { + base.SetUp(); + // create test index + MDirectory = NewDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(Random(), MDirectory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).SetMergePolicy(NewLogMergePolicy()).SetSimilarity(new DefaultSimilarity())); + AddDocument(writer, "1", "I think it should work."); + AddDocument(writer, "2", "I think it should work."); + AddDocument(writer, "3", "I think it should work."); + AddDocument(writer, "4", "I think it should work."); + Reader = writer.Reader; + writer.Dispose(); + Searcher = NewSearcher(Reader); + Searcher.Similarity = new DefaultSimilarity(); + } + + [TearDown] + public override void TearDown() + { + if (Reader != null) + { + Reader.Dispose(); + } + + if (MDirectory != null) + { + MDirectory.Dispose(); + MDirectory = null; + } + base.TearDown(); + } + + /// <summary> + /// Adds the document to the index. + /// </summary> + /// <param name="writer"> the Lucene index writer </param> + /// <param name="id"> the unique id of the document </param> + /// <param name="text"> the text of the document </param> + protected internal virtual void AddDocument(RandomIndexWriter writer, string id, string text) + { + Document document = new Document(); + document.Add(NewStringField(FIELD_ID, id, Field.Store.YES)); + document.Add(NewTextField(FIELD_TEXT, text, Field.Store.YES)); + writer.AddDocument(document); + } + + /// <summary> + /// Tests two span queries. + /// </summary> + [Test] + public virtual void TestBooleanQueryWithSpanQueries() + { + DoTestBooleanQueryWithSpanQueries(Searcher, 0.3884282f); + } + + /// <summary> + /// Tests two span queries. + /// </summary> + protected internal virtual void DoTestBooleanQueryWithSpanQueries(IndexSearcher s, float expectedScore) + { + Query spanQuery = new SpanTermQuery(new Term(FIELD_TEXT, "work")); + BooleanQuery query = new BooleanQuery(); + query.Add(spanQuery, Occur.MUST); + query.Add(spanQuery, Occur.MUST); + string[] expectedIds = new string[] { "1", "2", "3", "4" }; + float[] expectedScores = new float[] { expectedScore, expectedScore, expectedScore, expectedScore }; + AssertHits(s, query, "two span queries", expectedIds, expectedScores); + } + + /// <summary> + /// Checks to see if the hits are what we expected. + /// + /// LUCENENET specific + /// Is non-static because it depends on the non-static variable, <see cref="LuceneTestCase.Similarity"/> + /// </summary> + /// <param name="query"> the query to execute </param> + /// <param name="description"> the description of the search </param> + /// <param name="expectedIds"> the expected document ids of the hits </param> + /// <param name="expectedScores"> the expected scores of the hits </param> + protected internal void AssertHits(IndexSearcher s, Query query, string description, string[] expectedIds, float[] expectedScores) + { + QueryUtils.Check(Random(), query, s, Similarity); + + const float tolerance = 1e-5f; + + // Hits hits = searcher.Search(query); + // hits normalizes and throws things off if one score is greater than 1.0 + TopDocs topdocs = s.Search(query, null, 10000); + + /* + /// // display the hits System.out.println(hits.Length() + + /// " hits for search: \"" + description + '\"'); for (int i = 0; i < + /// hits.Length(); i++) { System.out.println(" " + FIELD_ID + ':' + + /// hits.Doc(i).Get(FIELD_ID) + " (score:" + hits.Score(i) + ')'); } + /// **** + */ + + // did we get the hits we expected + Assert.AreEqual(expectedIds.Length, topdocs.TotalHits); + for (int i = 0; i < topdocs.TotalHits; i++) + { + // System.out.println(i + " exp: " + expectedIds[i]); + // System.out.println(i + " field: " + hits.Doc(i).Get(FIELD_ID)); + + int id = topdocs.ScoreDocs[i].Doc; + float score = topdocs.ScoreDocs[i].Score; + Document doc = s.Doc(id); + Assert.AreEqual(expectedIds[i], doc.Get(FIELD_ID)); + bool scoreEq = Math.Abs(expectedScores[i] - score) < tolerance; + if (!scoreEq) + { + Console.WriteLine(i + " warning, expected score: " + expectedScores[i] + ", actual " + score); + Console.WriteLine(s.Explain(query, id)); + } + Assert.AreEqual(expectedScores[i], score, tolerance); + Assert.AreEqual(s.Explain(query, id).Value, score, tolerance); + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/Spans/TestSpansAdvanced2.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/Spans/TestSpansAdvanced2.cs b/src/Lucene.Net.Tests/Search/Spans/TestSpansAdvanced2.cs new file mode 100644 index 0000000..6565720 --- /dev/null +++ b/src/Lucene.Net.Tests/Search/Spans/TestSpansAdvanced2.cs @@ -0,0 +1,124 @@ +namespace Lucene.Net.Search.Spans +{ + using Lucene.Net.Search; + using NUnit.Framework; + using DefaultSimilarity = Lucene.Net.Search.Similarities.DefaultSimilarity; + using DirectoryReader = Lucene.Net.Index.DirectoryReader; + using IndexReader = Lucene.Net.Index.IndexReader; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using MockTokenFilter = Lucene.Net.Analysis.MockTokenFilter; + using MockTokenizer = Lucene.Net.Analysis.MockTokenizer; + using OpenMode = Lucene.Net.Index.OpenMode; + using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter; + using Term = Lucene.Net.Index.Term; + + /// <summary> + ///***************************************************************************** + /// Some expanded tests to make sure my patch doesn't break other SpanTermQuery + /// functionality. + /// + /// </summary> + [TestFixture] + public class TestSpansAdvanced2 : TestSpansAdvanced + { + internal IndexSearcher Searcher2; + internal IndexReader Reader2; + + /// <summary> + /// Initializes the tests by adding documents to the index. + /// </summary> + [SetUp] + public override void SetUp() + { + base.SetUp(); + + // create test index + RandomIndexWriter writer = new RandomIndexWriter(Random(), MDirectory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).SetOpenMode(OpenMode.APPEND).SetMergePolicy(NewLogMergePolicy()).SetSimilarity(new DefaultSimilarity())); + AddDocument(writer, "A", "Should we, could we, would we?"); + AddDocument(writer, "B", "It should. Should it?"); + AddDocument(writer, "C", "It shouldn't."); + AddDocument(writer, "D", "Should we, should we, should we."); + Reader2 = writer.Reader; + writer.Dispose(); + + // re-open the searcher since we added more docs + Searcher2 = NewSearcher(Reader2); + Searcher2.Similarity = new DefaultSimilarity(); + } + + [TearDown] + public override void TearDown() + { + Reader2.Dispose(); + base.TearDown(); + } + + /// <summary> + /// Verifies that the index has the correct number of documents. + /// </summary> + [Test] + public virtual void TestVerifyIndex() + { + IndexReader reader = DirectoryReader.Open(MDirectory); + Assert.AreEqual(8, reader.NumDocs); + reader.Dispose(); + } + + /// <summary> + /// Tests a single span query that matches multiple documents. + /// </summary> + [Test] + public virtual void TestSingleSpanQuery() + { + Query spanQuery = new SpanTermQuery(new Term(FIELD_TEXT, "should")); + string[] expectedIds = new string[] { "B", "D", "1", "2", "3", "4", "A" }; + float[] expectedScores = new float[] { 0.625f, 0.45927936f, 0.35355338f, 0.35355338f, 0.35355338f, 0.35355338f, 0.26516503f }; + AssertHits(Searcher2, spanQuery, "single span query", expectedIds, expectedScores); + } + + /// <summary> + /// Tests a single span query that matches multiple documents. + /// </summary> + [Test] + public virtual void TestMultipleDifferentSpanQueries() + { + Query spanQuery1 = new SpanTermQuery(new Term(FIELD_TEXT, "should")); + Query spanQuery2 = new SpanTermQuery(new Term(FIELD_TEXT, "we")); + BooleanQuery query = new BooleanQuery(); + query.Add(spanQuery1, Occur.MUST); + query.Add(spanQuery2, Occur.MUST); + string[] expectedIds = new string[] { "D", "A" }; + // these values were pre LUCENE-413 + // final float[] expectedScores = new float[] { 0.93163157f, 0.20698164f }; + float[] expectedScores = new float[] { 1.0191123f, 0.93163157f }; + AssertHits(Searcher2, query, "multiple different span queries", expectedIds, expectedScores); + } + + /// <summary> + /// Tests two span queries. + /// </summary> + [Test] + public override void TestBooleanQueryWithSpanQueries() + { + DoTestBooleanQueryWithSpanQueries(Searcher2, 0.73500174f); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/TestAutomatonQuery.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/TestAutomatonQuery.cs b/src/Lucene.Net.Tests/Search/TestAutomatonQuery.cs new file mode 100644 index 0000000..0eeb5f9 --- /dev/null +++ b/src/Lucene.Net.Tests/Search/TestAutomatonQuery.cs @@ -0,0 +1,278 @@ +using Lucene.Net.Documents; +using Lucene.Net.Support; +using System; +using System.Threading; + +namespace Lucene.Net.Search +{ + using Attributes; + using NUnit.Framework; + using Automaton = Lucene.Net.Util.Automaton.Automaton; + using AutomatonTestUtil = Lucene.Net.Util.Automaton.AutomatonTestUtil; + using BasicAutomata = Lucene.Net.Util.Automaton.BasicAutomata; + using BasicOperations = Lucene.Net.Util.Automaton.BasicOperations; + using Directory = Lucene.Net.Store.Directory; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Document = Documents.Document; + using Field = Field; + using IndexReader = Lucene.Net.Index.IndexReader; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using MultiFields = Lucene.Net.Index.MultiFields; + using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter; + using SingleTermsEnum = Lucene.Net.Index.SingleTermsEnum; + using Term = Lucene.Net.Index.Term; + using Terms = Lucene.Net.Index.Terms; + using TermsEnum = Lucene.Net.Index.TermsEnum; + using TestUtil = Lucene.Net.Util.TestUtil; + + [TestFixture] + public class TestAutomatonQuery : LuceneTestCase + { + private Directory Directory; + private IndexReader Reader; + private IndexSearcher Searcher; + + private readonly string FN = "field"; + + [SetUp] + public override void SetUp() + { + base.SetUp(); + Directory = NewDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone); + Document doc = new Document(); + Field titleField = NewTextField("title", "some title", Field.Store.NO); + Field field = NewTextField(FN, "this is document one 2345", Field.Store.NO); + Field footerField = NewTextField("footer", "a footer", Field.Store.NO); + doc.Add(titleField); + doc.Add(field); + doc.Add(footerField); + writer.AddDocument(doc); + field.SetStringValue("some text from doc two a short piece 5678.91"); + writer.AddDocument(doc); + field.SetStringValue("doc three has some different stuff" + " with numbers 1234 5678.9 and letter b"); + writer.AddDocument(doc); + Reader = writer.Reader; + Searcher = NewSearcher(Reader); + writer.Dispose(); + } + + [TearDown] + public override void TearDown() + { + Reader.Dispose(); + Directory.Dispose(); + base.TearDown(); + } + + private Term NewTerm(string value) + { + return new Term(FN, value); + } + + private int AutomatonQueryNrHits(AutomatonQuery query) + { + if (VERBOSE) + { + Console.WriteLine("TEST: run aq=" + query); + } + return Searcher.Search(query, 5).TotalHits; + } + + private void AssertAutomatonHits(int expected, Automaton automaton) + { + AutomatonQuery query = new AutomatonQuery(NewTerm("bogus"), automaton); + + query.MultiTermRewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; + Assert.AreEqual(expected, AutomatonQueryNrHits(query)); + + query.MultiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE; + Assert.AreEqual(expected, AutomatonQueryNrHits(query)); + + query.MultiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; + Assert.AreEqual(expected, AutomatonQueryNrHits(query)); + + query.MultiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; + Assert.AreEqual(expected, AutomatonQueryNrHits(query)); + } + + /// <summary> + /// Test some very simple automata. + /// </summary> + [Test] + public virtual void TestBasicAutomata() + { + AssertAutomatonHits(0, BasicAutomata.MakeEmpty()); + AssertAutomatonHits(0, BasicAutomata.MakeEmptyString()); + AssertAutomatonHits(2, BasicAutomata.MakeAnyChar()); + AssertAutomatonHits(3, BasicAutomata.MakeAnyString()); + AssertAutomatonHits(2, BasicAutomata.MakeString("doc")); + AssertAutomatonHits(1, BasicAutomata.MakeChar('a')); + AssertAutomatonHits(2, BasicAutomata.MakeCharRange('a', 'b')); + AssertAutomatonHits(2, BasicAutomata.MakeInterval(1233, 2346, 0)); + AssertAutomatonHits(1, BasicAutomata.MakeInterval(0, 2000, 0)); + AssertAutomatonHits(2, BasicOperations.Union(BasicAutomata.MakeChar('a'), BasicAutomata.MakeChar('b'))); + AssertAutomatonHits(0, BasicOperations.Intersection(BasicAutomata.MakeChar('a'), BasicAutomata.MakeChar('b'))); + AssertAutomatonHits(1, BasicOperations.Minus(BasicAutomata.MakeCharRange('a', 'b'), BasicAutomata.MakeChar('a'))); + } + + /// <summary> + /// Test that a nondeterministic automaton works correctly. (It should will be + /// determinized) + /// </summary> + [Test] + public virtual void TestNFA() + { + // accept this or three, the union is an NFA (two transitions for 't' from + // initial state) + Automaton nfa = BasicOperations.Union(BasicAutomata.MakeString("this"), BasicAutomata.MakeString("three")); + AssertAutomatonHits(2, nfa); + } + + [Test] + public virtual void TestEquals() + { + AutomatonQuery a1 = new AutomatonQuery(NewTerm("foobar"), BasicAutomata.MakeString("foobar")); + // reference to a1 + AutomatonQuery a2 = a1; + // same as a1 (accepts the same language, same term) + AutomatonQuery a3 = new AutomatonQuery(NewTerm("foobar"), BasicOperations.Concatenate(BasicAutomata.MakeString("foo"), BasicAutomata.MakeString("bar"))); + // different than a1 (same term, but different language) + AutomatonQuery a4 = new AutomatonQuery(NewTerm("foobar"), BasicAutomata.MakeString("different")); + // different than a1 (different term, same language) + AutomatonQuery a5 = new AutomatonQuery(NewTerm("blah"), BasicAutomata.MakeString("foobar")); + + Assert.AreEqual(a1.GetHashCode(), a2.GetHashCode()); + Assert.AreEqual(a1, a2); + + Assert.AreEqual(a1.GetHashCode(), a3.GetHashCode()); + Assert.AreEqual(a1, a3); + + // different class + AutomatonQuery w1 = new WildcardQuery(NewTerm("foobar")); + // different class + AutomatonQuery w2 = new RegexpQuery(NewTerm("foobar")); + + Assert.IsFalse(a1.Equals(w1)); + Assert.IsFalse(a1.Equals(w2)); + Assert.IsFalse(w1.Equals(w2)); + Assert.IsFalse(a1.Equals(a4)); + Assert.IsFalse(a1.Equals(a5)); + Assert.IsFalse(a1.Equals(null)); + } + + /// <summary> + /// Test that rewriting to a single term works as expected, preserves + /// MultiTermQuery semantics. + /// </summary> + [Test] + public virtual void TestRewriteSingleTerm() + { + AutomatonQuery aq = new AutomatonQuery(NewTerm("bogus"), BasicAutomata.MakeString("piece")); + Terms terms = MultiFields.GetTerms(Searcher.IndexReader, FN); + Assert.IsTrue(aq.GetTermsEnum(terms) is SingleTermsEnum); + Assert.AreEqual(1, AutomatonQueryNrHits(aq)); + } + + /// <summary> + /// Test that rewriting to a prefix query works as expected, preserves + /// MultiTermQuery semantics. + /// </summary> + [Test] + public virtual void TestRewritePrefix() + { + Automaton pfx = BasicAutomata.MakeString("do"); + pfx.ExpandSingleton(); // expand singleton representation for testing + Automaton prefixAutomaton = BasicOperations.Concatenate(pfx, BasicAutomata.MakeAnyString()); + AutomatonQuery aq = new AutomatonQuery(NewTerm("bogus"), prefixAutomaton); + Terms terms = MultiFields.GetTerms(Searcher.IndexReader, FN); + + var en = aq.GetTermsEnum(terms); + Assert.IsTrue(en is PrefixTermsEnum, "Expected type PrefixTermEnum but was {0}", en.GetType().Name); + Assert.AreEqual(3, AutomatonQueryNrHits(aq)); + } + + /// <summary> + /// Test handling of the empty language + /// </summary> + [Test] + public virtual void TestEmptyOptimization() + { + AutomatonQuery aq = new AutomatonQuery(NewTerm("bogus"), BasicAutomata.MakeEmpty()); + // not yet available: Assert.IsTrue(aq.getEnum(searcher.getIndexReader()) + // instanceof EmptyTermEnum); + Terms terms = MultiFields.GetTerms(Searcher.IndexReader, FN); + Assert.AreSame(TermsEnum.EMPTY, aq.GetTermsEnum(terms)); + Assert.AreEqual(0, AutomatonQueryNrHits(aq)); + } + +#if !NETSTANDARD + // LUCENENET: There is no Timeout on NUnit for .NET Core. + [Timeout(40000)] +#endif + [Test, LongRunningTest, HasTimeout] + public virtual void TestHashCodeWithThreads() + { + AutomatonQuery[] queries = new AutomatonQuery[1000]; + for (int i = 0; i < queries.Length; i++) + { + queries[i] = new AutomatonQuery(new Term("bogus", "bogus"), AutomatonTestUtil.RandomAutomaton(Random())); + } + CountdownEvent startingGun = new CountdownEvent(1); + int numThreads = TestUtil.NextInt(Random(), 2, 5); + ThreadClass[] threads = new ThreadClass[numThreads]; + for (int threadID = 0; threadID < numThreads; threadID++) + { + ThreadClass thread = new ThreadAnonymousInnerClassHelper(this, queries, startingGun); + threads[threadID] = thread; + thread.Start(); + } + startingGun.Signal(); + foreach (ThreadClass thread in threads) + { + thread.Join(); + } + } + + private class ThreadAnonymousInnerClassHelper : ThreadClass + { + private readonly TestAutomatonQuery OuterInstance; + + private AutomatonQuery[] Queries; + private CountdownEvent StartingGun; + + public ThreadAnonymousInnerClassHelper(TestAutomatonQuery outerInstance, AutomatonQuery[] queries, CountdownEvent startingGun) + { + this.OuterInstance = outerInstance; + this.Queries = queries; + this.StartingGun = startingGun; + } + + public override void Run() + { + StartingGun.Wait(); + for (int i = 0; i < Queries.Length; i++) + { + Queries[i].GetHashCode(); + } + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/TestAutomatonQueryUnicode.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/TestAutomatonQueryUnicode.cs b/src/Lucene.Net.Tests/Search/TestAutomatonQueryUnicode.cs new file mode 100644 index 0000000..207f243 --- /dev/null +++ b/src/Lucene.Net.Tests/Search/TestAutomatonQueryUnicode.cs @@ -0,0 +1,139 @@ +using Lucene.Net.Documents; + +namespace Lucene.Net.Search +{ + using NUnit.Framework; + using Automaton = Lucene.Net.Util.Automaton.Automaton; + using Directory = Lucene.Net.Store.Directory; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Document = Documents.Document; + using Field = Field; + using IndexReader = Lucene.Net.Index.IndexReader; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter; + using RegExp = Lucene.Net.Util.Automaton.RegExp; + using Term = Lucene.Net.Index.Term; + + /// <summary> + /// Test the automaton query for several unicode corner cases, + /// specifically enumerating strings/indexes containing supplementary characters, + /// and the differences between UTF-8/UTF-32 and UTF-16 binary sort order. + /// </summary> + [TestFixture] + public class TestAutomatonQueryUnicode : LuceneTestCase + { + private IndexReader Reader; + private IndexSearcher Searcher; + private Directory Directory; + + private readonly string FN = "field"; + + [SetUp] + public override void SetUp() + { + base.SetUp(); + Directory = NewDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone); + Document doc = new Document(); + Field titleField = NewTextField("title", "some title", Field.Store.NO); + Field field = NewTextField(FN, "", Field.Store.NO); + Field footerField = NewTextField("footer", "a footer", Field.Store.NO); + doc.Add(titleField); + doc.Add(field); + doc.Add(footerField); + field.SetStringValue("\uD866\uDF05abcdef"); + writer.AddDocument(doc); + field.SetStringValue("\uD866\uDF06ghijkl"); + writer.AddDocument(doc); + // this sorts before the previous two in UTF-8/UTF-32, but after in UTF-16!!! + field.SetStringValue("\uFB94mnopqr"); + writer.AddDocument(doc); + field.SetStringValue("\uFB95stuvwx"); // this one too. + writer.AddDocument(doc); + field.SetStringValue("a\uFFFCbc"); + writer.AddDocument(doc); + field.SetStringValue("a\uFFFDbc"); + writer.AddDocument(doc); + field.SetStringValue("a\uFFFEbc"); + writer.AddDocument(doc); + field.SetStringValue("a\uFB94bc"); + writer.AddDocument(doc); + field.SetStringValue("bacadaba"); + writer.AddDocument(doc); + field.SetStringValue("\uFFFD"); + writer.AddDocument(doc); + field.SetStringValue("\uFFFD\uD866\uDF05"); + writer.AddDocument(doc); + field.SetStringValue("\uFFFD\uFFFD"); + writer.AddDocument(doc); + Reader = writer.Reader; + Searcher = NewSearcher(Reader); + writer.Dispose(); + } + + [TearDown] + public override void TearDown() + { + Reader.Dispose(); + Directory.Dispose(); + base.TearDown(); + } + + private Term NewTerm(string value) + { + return new Term(FN, value); + } + + private int AutomatonQueryNrHits(AutomatonQuery query) + { + return Searcher.Search(query, 5).TotalHits; + } + + private void AssertAutomatonHits(int expected, Automaton automaton) + { + AutomatonQuery query = new AutomatonQuery(NewTerm("bogus"), automaton); + + query.MultiTermRewriteMethod = (MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + Assert.AreEqual(expected, AutomatonQueryNrHits(query)); + + query.MultiTermRewriteMethod = (MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); + Assert.AreEqual(expected, AutomatonQueryNrHits(query)); + + query.MultiTermRewriteMethod = (MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); + Assert.AreEqual(expected, AutomatonQueryNrHits(query)); + + query.MultiTermRewriteMethod = (MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); + Assert.AreEqual(expected, AutomatonQueryNrHits(query)); + } + + /// <summary> + /// Test that AutomatonQuery interacts with lucene's sort order correctly. + /// + /// this expression matches something either starting with the arabic + /// presentation forms block, or a supplementary character. + /// </summary> + [Test] + public virtual void TestSortOrder() + { + Automaton a = (new RegExp("((\uD866\uDF05)|\uFB94).*")).ToAutomaton(); + AssertAutomatonHits(2, a); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/TestBoolean2.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/TestBoolean2.cs b/src/Lucene.Net.Tests/Search/TestBoolean2.cs new file mode 100644 index 0000000..514f560 --- /dev/null +++ b/src/Lucene.Net.Tests/Search/TestBoolean2.cs @@ -0,0 +1,423 @@ +using System; +using Lucene.Net.Documents; + +namespace Lucene.Net.Search +{ + using Lucene.Net.Randomized.Generators; + using NUnit.Framework; + using DefaultSimilarity = Lucene.Net.Search.Similarities.DefaultSimilarity; + using Directory = Lucene.Net.Store.Directory; + using DirectoryReader = Lucene.Net.Index.DirectoryReader; + using Document = Documents.Document; + using Field = Field; + using IndexReader = Lucene.Net.Index.IndexReader; + using IOContext = Lucene.Net.Store.IOContext; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using MockDirectoryWrapper = Lucene.Net.Store.MockDirectoryWrapper; + using RAMDirectory = Lucene.Net.Store.RAMDirectory; + using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter; + using Similarity = Lucene.Net.Search.Similarities.Similarity; + using Term = Lucene.Net.Index.Term; + using TestUtil = Lucene.Net.Util.TestUtil; + + /// <summary> + /// Test BooleanQuery2 against BooleanQuery by overriding the standard query parser. + /// this also tests the scoring order of BooleanQuery. + /// </summary> + [TestFixture] + public class TestBoolean2 : LuceneTestCase + { + private static IndexSearcher Searcher; + private static IndexSearcher BigSearcher; + private static IndexReader Reader; + private static IndexReader LittleReader; + private static int NUM_EXTRA_DOCS = 6000; + + public const string field = "field"; + private static Directory Directory; + private static Directory Dir2; + private static int MulFactor; + + /// <summary> + /// LUCENENET specific + /// Is non-static because NewIndexWriterConfig is no longer static. + /// </summary> + [OneTimeSetUp] + public void BeforeClass() + { + Directory = NewDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); + for (int i = 0; i < DocFields.Length; i++) + { + Document doc = new Document(); + doc.Add(NewTextField(field, DocFields[i], Field.Store.NO)); + writer.AddDocument(doc); + } + writer.Dispose(); + LittleReader = DirectoryReader.Open(Directory); + Searcher = NewSearcher(LittleReader); + // this is intentionally using the baseline sim, because it compares against bigSearcher (which uses a random one) + Searcher.Similarity = new DefaultSimilarity(); + + // Make big index + Dir2 = new MockDirectoryWrapper(Random(), new RAMDirectory(Directory, IOContext.DEFAULT)); + + // First multiply small test index: + MulFactor = 1; + int docCount = 0; + if (VERBOSE) + { + Console.WriteLine("\nTEST: now copy index..."); + } + do + { + if (VERBOSE) + { + Console.WriteLine("\nTEST: cycle..."); + } + Directory copy = new MockDirectoryWrapper(Random(), new RAMDirectory(Dir2, IOContext.DEFAULT)); + RandomIndexWriter w = new RandomIndexWriter(Random(), Dir2, Similarity, TimeZone); + w.AddIndexes(copy); + docCount = w.MaxDoc; + w.Dispose(); + MulFactor *= 2; + } while (docCount < 3000); + + RandomIndexWriter riw = new RandomIndexWriter(Random(), Dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); + Document doc_ = new Document(); + doc_.Add(NewTextField("field2", "xxx", Field.Store.NO)); + for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) + { + riw.AddDocument(doc_); + } + doc_ = new Document(); + doc_.Add(NewTextField("field2", "big bad bug", Field.Store.NO)); + for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) + { + riw.AddDocument(doc_); + } + Reader = riw.Reader; + BigSearcher = NewSearcher(Reader); + riw.Dispose(); + } + + [OneTimeTearDown] + public static void AfterClass() + { + Reader.Dispose(); + LittleReader.Dispose(); + Dir2.Dispose(); + Directory.Dispose(); + Searcher = null; + Reader = null; + LittleReader = null; + Dir2 = null; + Directory = null; + BigSearcher = null; + } + + private static string[] DocFields = new string[] { "w1 w2 w3 w4 w5", "w1 w3 w2 w3", "w1 xx w2 yy w3", "w1 w3 xx w2 yy w3" }; + + public virtual void QueriesTest(Query query, int[] expDocNrs) + { + TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, false); + Searcher.Search(query, null, collector); + ScoreDoc[] hits1 = collector.GetTopDocs().ScoreDocs; + + collector = TopScoreDocCollector.Create(1000, true); + Searcher.Search(query, null, collector); + ScoreDoc[] hits2 = collector.GetTopDocs().ScoreDocs; + + Assert.AreEqual(MulFactor * collector.TotalHits, BigSearcher.Search(query, 1).TotalHits); + + CheckHits.CheckHitsQuery(query, hits1, hits2, expDocNrs); + } + + [Test] + public virtual void TestQueries01() + { + BooleanQuery query = new BooleanQuery(); + query.Add(new TermQuery(new Term(field, "w3")), Occur.MUST); + query.Add(new TermQuery(new Term(field, "xx")), Occur.MUST); + int[] expDocNrs = new int[] { 2, 3 }; + QueriesTest(query, expDocNrs); + } + + [Test] + public virtual void TestQueries02() + { + BooleanQuery query = new BooleanQuery(); + query.Add(new TermQuery(new Term(field, "w3")), Occur.MUST); + query.Add(new TermQuery(new Term(field, "xx")), Occur.SHOULD); + int[] expDocNrs = new int[] { 2, 3, 1, 0 }; + QueriesTest(query, expDocNrs); + } + + [Test] + public virtual void TestQueries03() + { + BooleanQuery query = new BooleanQuery(); + query.Add(new TermQuery(new Term(field, "w3")), Occur.SHOULD); + query.Add(new TermQuery(new Term(field, "xx")), Occur.SHOULD); + int[] expDocNrs = new int[] { 2, 3, 1, 0 }; + QueriesTest(query, expDocNrs); + } + + [Test] + public virtual void TestQueries04() + { + BooleanQuery query = new BooleanQuery(); + query.Add(new TermQuery(new Term(field, "w3")), Occur.SHOULD); + query.Add(new TermQuery(new Term(field, "xx")), Occur.MUST_NOT); + int[] expDocNrs = new int[] { 1, 0 }; + QueriesTest(query, expDocNrs); + } + + [Test] + public virtual void TestQueries05() + { + BooleanQuery query = new BooleanQuery(); + query.Add(new TermQuery(new Term(field, "w3")), Occur.MUST); + query.Add(new TermQuery(new Term(field, "xx")), Occur.MUST_NOT); + int[] expDocNrs = new int[] { 1, 0 }; + QueriesTest(query, expDocNrs); + } + + [Test] + public virtual void TestQueries06() + { + BooleanQuery query = new BooleanQuery(); + query.Add(new TermQuery(new Term(field, "w3")), Occur.MUST); + query.Add(new TermQuery(new Term(field, "xx")), Occur.MUST_NOT); + query.Add(new TermQuery(new Term(field, "w5")), Occur.MUST_NOT); + int[] expDocNrs = new int[] { 1 }; + QueriesTest(query, expDocNrs); + } + + [Test] + public virtual void TestQueries07() + { + BooleanQuery query = new BooleanQuery(); + query.Add(new TermQuery(new Term(field, "w3")), Occur.MUST_NOT); + query.Add(new TermQuery(new Term(field, "xx")), Occur.MUST_NOT); + query.Add(new TermQuery(new Term(field, "w5")), Occur.MUST_NOT); + int[] expDocNrs = new int[] { }; + QueriesTest(query, expDocNrs); + } + + [Test] + public virtual void TestQueries08() + { + BooleanQuery query = new BooleanQuery(); + query.Add(new TermQuery(new Term(field, "w3")), Occur.MUST); + query.Add(new TermQuery(new Term(field, "xx")), Occur.SHOULD); + query.Add(new TermQuery(new Term(field, "w5")), Occur.MUST_NOT); + int[] expDocNrs = new int[] { 2, 3, 1 }; + QueriesTest(query, expDocNrs); + } + + [Test] + public virtual void TestQueries09() + { + BooleanQuery query = new BooleanQuery(); + query.Add(new TermQuery(new Term(field, "w3")), Occur.MUST); + query.Add(new TermQuery(new Term(field, "xx")), Occur.MUST); + query.Add(new TermQuery(new Term(field, "w2")), Occur.MUST); + query.Add(new TermQuery(new Term(field, "zz")), Occur.SHOULD); + int[] expDocNrs = new int[] { 2, 3 }; + QueriesTest(query, expDocNrs); + } + + [Test] + public virtual void TestQueries10() + { + BooleanQuery query = new BooleanQuery(); + query.Add(new TermQuery(new Term(field, "w3")), Occur.MUST); + query.Add(new TermQuery(new Term(field, "xx")), Occur.MUST); + query.Add(new TermQuery(new Term(field, "w2")), Occur.MUST); + query.Add(new TermQuery(new Term(field, "zz")), Occur.SHOULD); + + int[] expDocNrs = new int[] { 2, 3 }; + Similarity oldSimilarity = Searcher.Similarity; + try + { + Searcher.Similarity = new DefaultSimilarityAnonymousInnerClassHelper(this); + QueriesTest(query, expDocNrs); + } + finally + { + Searcher.Similarity = oldSimilarity; + } + } + + private class DefaultSimilarityAnonymousInnerClassHelper : DefaultSimilarity + { + private readonly TestBoolean2 OuterInstance; + + public DefaultSimilarityAnonymousInnerClassHelper(TestBoolean2 outerInstance) + { + this.OuterInstance = outerInstance; + } + + public override float Coord(int overlap, int maxOverlap) + { + return overlap / ((float)maxOverlap - 1); + } + } + + [Test] + public virtual void TestRandomQueries() + { + string[] vals = new string[] { "w1", "w2", "w3", "w4", "w5", "xx", "yy", "zzz" }; + + int tot = 0; + + BooleanQuery q1 = null; + try + { + // increase number of iterations for more complete testing + int num = AtLeast(20); + for (int i = 0; i < num; i++) + { + int level = Random().Next(3); + q1 = RandBoolQuery(new Random(Random().Next()), Random().NextBoolean(), level, field, vals, null); + + // Can't sort by relevance since floating point numbers may not quite + // match up. + Sort sort = Sort.INDEXORDER; + + QueryUtils.Check(Random(), q1, Searcher, Similarity); // baseline sim + try + { + // a little hackish, QueryUtils.check is too costly to do on bigSearcher in this loop. + Searcher.Similarity = BigSearcher.Similarity; // random sim + QueryUtils.Check(Random(), q1, Searcher, Similarity); + } + finally + { + Searcher.Similarity = new DefaultSimilarity(); // restore + } + + TopFieldCollector collector = TopFieldCollector.Create(sort, 1000, false, true, true, true); + + Searcher.Search(q1, null, collector); + ScoreDoc[] hits1 = collector.GetTopDocs().ScoreDocs; + + collector = TopFieldCollector.Create(sort, 1000, false, true, true, false); + + Searcher.Search(q1, null, collector); + ScoreDoc[] hits2 = collector.GetTopDocs().ScoreDocs; + tot += hits2.Length; + CheckHits.CheckEqual(q1, hits1, hits2); + + BooleanQuery q3 = new BooleanQuery(); + q3.Add(q1, Occur.SHOULD); + q3.Add(new PrefixQuery(new Term("field2", "b")), Occur.SHOULD); + TopDocs hits4 = BigSearcher.Search(q3, 1); + Assert.AreEqual(MulFactor * collector.TotalHits + NUM_EXTRA_DOCS / 2, hits4.TotalHits); + } + } + catch (Exception) + { + // For easier debugging + Console.WriteLine("failed query: " + q1); + throw; + } + + // System.out.println("Total hits:"+tot); + } + + // used to set properties or change every BooleanQuery + // generated from randBoolQuery. + public interface Callback + { + void PostCreate(BooleanQuery q); + } + + // Random rnd is passed in so that the exact same random query may be created + // more than once. + public static BooleanQuery RandBoolQuery(Random rnd, bool allowMust, int level, string field, string[] vals, Callback cb) + { + BooleanQuery current = new BooleanQuery(rnd.Next() < 0); + for (int i = 0; i < rnd.Next(vals.Length) + 1; i++) + { + int qType = 0; // term query + if (level > 0) + { + qType = rnd.Next(10); + } + Query q; + if (qType < 3) + { + q = new TermQuery(new Term(field, vals[rnd.Next(vals.Length)])); + } + else if (qType < 4) + { + Term t1 = new Term(field, vals[rnd.Next(vals.Length)]); + Term t2 = new Term(field, vals[rnd.Next(vals.Length)]); + PhraseQuery pq = new PhraseQuery(); + pq.Add(t1); + pq.Add(t2); + pq.Slop = 10; // increase possibility of matching + q = pq; + } + else if (qType < 7) + { + q = new WildcardQuery(new Term(field, "w*")); + } + else + { + q = RandBoolQuery(rnd, allowMust, level - 1, field, vals, cb); + } + + int r = rnd.Next(10); + Occur occur; + if (r < 2) + { + occur = Occur.MUST_NOT; + } + else if (r < 5) + { + if (allowMust) + { + occur = Occur.MUST; + } + else + { + occur = Occur.SHOULD; + } + } + else + { + occur = Occur.SHOULD; + } + + current.Add(q, occur); + } + if (cb != null) + { + cb.PostCreate(current); + } + return current; + } + } +} \ No newline at end of file
