http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/TestBooleanMinShouldMatch.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/TestBooleanMinShouldMatch.cs b/src/Lucene.Net.Tests/Search/TestBooleanMinShouldMatch.cs new file mode 100644 index 0000000..29471a0 --- /dev/null +++ b/src/Lucene.Net.Tests/Search/TestBooleanMinShouldMatch.cs @@ -0,0 +1,527 @@ +using System; +using System.Globalization; +using Lucene.Net.Documents; + +namespace Lucene.Net.Search +{ + using Lucene.Net.Randomized.Generators; + using NUnit.Framework; + using DefaultSimilarity = Lucene.Net.Search.Similarities.DefaultSimilarity; + using Directory = Lucene.Net.Store.Directory; + using Document = Documents.Document; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Field = Field; + using IndexReader = Lucene.Net.Index.IndexReader; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter; + using Similarity = Lucene.Net.Search.Similarities.Similarity; + using Term = Lucene.Net.Index.Term; + + /// <summary> + /// Test that BooleanQuery.setMinimumNumberShouldMatch works. + /// </summary> + [TestFixture] + public class TestBooleanMinShouldMatch : LuceneTestCase + { + private static Directory Index; + private static IndexReader r; + private static IndexSearcher s; + + /// <summary> + /// LUCENENET specific + /// Is non-static because NewStringField is no longer static. + /// </summary> + [OneTimeSetUp] + public void BeforeClass() + { + string[] data = new string[] { "A 1 2 3 4 5 6", "Z 4 5 6", null, "B 2 4 5 6", "Y 3 5 6", null, "C 3 6", "X 4 5 6" }; + + Index = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter(Random(), Index, Similarity, TimeZone); + + for (int i = 0; i < data.Length; i++) + { + Document doc = new Document(); + doc.Add(NewStringField("id", Convert.ToString(i), Field.Store.YES)); //Field.Keyword("id",String.valueOf(i))); + doc.Add(NewStringField("all", "all", Field.Store.YES)); //Field.Keyword("all","all")); + if (null != data[i]) + { + doc.Add(NewTextField("data", data[i], Field.Store.YES)); //Field.Text("data",data[i])); + } + w.AddDocument(doc); + } + + r = w.Reader; + s = NewSearcher(r); + w.Dispose(); + //System.out.println("Set up " + getName()); + } + + [OneTimeTearDown] + public static void AfterClass() + { + s = null; + r.Dispose(); + r = null; + Index.Dispose(); + Index = null; + } + + public virtual void VerifyNrHits(Query q, int expected) + { + // bs1 + ScoreDoc[] h = s.Search(q, null, 1000).ScoreDocs; + if (expected != h.Length) + { + PrintHits(TestName, h, s); + } + Assert.AreEqual(expected, h.Length, "result count"); + //System.out.println("TEST: now check"); + // bs2 + TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true); + s.Search(q, collector); + ScoreDoc[] h2 = collector.GetTopDocs().ScoreDocs; + if (expected != h2.Length) + { + PrintHits(TestName, h2, s); + } + Assert.AreEqual(expected, h2.Length, "result count (bs2)"); + + QueryUtils.Check(Random(), q, s, Similarity); + } + + [Test] + public virtual void TestAllOptional() + { + BooleanQuery q = new BooleanQuery(); + for (int i = 1; i <= 4; i++) + { + q.Add(new TermQuery(new Term("data", "" + i)), Occur.SHOULD); //false, false); + } + q.MinimumNumberShouldMatch = 2; // match at least two of 4 + VerifyNrHits(q, 2); + } + + [Test] + public virtual void TestOneReqAndSomeOptional() + { + /* one required, some optional */ + BooleanQuery q = new BooleanQuery(); + q.Add(new TermQuery(new Term("all", "all")), Occur.MUST); //true, false); + q.Add(new TermQuery(new Term("data", "5")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "4")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "3")), Occur.SHOULD); //false, false); + + q.MinimumNumberShouldMatch = 2; // 2 of 3 optional + + VerifyNrHits(q, 5); + } + + [Test] + public virtual void TestSomeReqAndSomeOptional() + { + /* two required, some optional */ + BooleanQuery q = new BooleanQuery(); + q.Add(new TermQuery(new Term("all", "all")), Occur.MUST); //true, false); + q.Add(new TermQuery(new Term("data", "6")), Occur.MUST); //true, false); + q.Add(new TermQuery(new Term("data", "5")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "4")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "3")), Occur.SHOULD); //false, false); + + q.MinimumNumberShouldMatch = 2; // 2 of 3 optional + + VerifyNrHits(q, 5); + } + + [Test] + public virtual void TestOneProhibAndSomeOptional() + { + /* one prohibited, some optional */ + BooleanQuery q = new BooleanQuery(); + q.Add(new TermQuery(new Term("data", "1")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "2")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "3")), Occur.MUST_NOT); //false, true ); + q.Add(new TermQuery(new Term("data", "4")), Occur.SHOULD); //false, false); + + q.MinimumNumberShouldMatch = 2; // 2 of 3 optional + + VerifyNrHits(q, 1); + } + + [Test] + public virtual void TestSomeProhibAndSomeOptional() + { + /* two prohibited, some optional */ + BooleanQuery q = new BooleanQuery(); + q.Add(new TermQuery(new Term("data", "1")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "2")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "3")), Occur.MUST_NOT); //false, true ); + q.Add(new TermQuery(new Term("data", "4")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "C")), Occur.MUST_NOT); //false, true ); + + q.MinimumNumberShouldMatch = 2; // 2 of 3 optional + + VerifyNrHits(q, 1); + } + + [Test] + public virtual void TestOneReqOneProhibAndSomeOptional() + { + /* one required, one prohibited, some optional */ + BooleanQuery q = new BooleanQuery(); + q.Add(new TermQuery(new Term("data", "6")), Occur.MUST); // true, false); + q.Add(new TermQuery(new Term("data", "5")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "4")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "3")), Occur.MUST_NOT); //false, true ); + q.Add(new TermQuery(new Term("data", "2")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "1")), Occur.SHOULD); //false, false); + + q.MinimumNumberShouldMatch = 3; // 3 of 4 optional + + VerifyNrHits(q, 1); + } + + [Test] + public virtual void TestSomeReqOneProhibAndSomeOptional() + { + /* two required, one prohibited, some optional */ + BooleanQuery q = new BooleanQuery(); + q.Add(new TermQuery(new Term("all", "all")), Occur.MUST); //true, false); + q.Add(new TermQuery(new Term("data", "6")), Occur.MUST); //true, false); + q.Add(new TermQuery(new Term("data", "5")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "4")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "3")), Occur.MUST_NOT); //false, true ); + q.Add(new TermQuery(new Term("data", "2")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "1")), Occur.SHOULD); //false, false); + + q.MinimumNumberShouldMatch = 3; // 3 of 4 optional + + VerifyNrHits(q, 1); + } + + [Test] + public virtual void TestOneReqSomeProhibAndSomeOptional() + { + /* one required, two prohibited, some optional */ + BooleanQuery q = new BooleanQuery(); + q.Add(new TermQuery(new Term("data", "6")), Occur.MUST); //true, false); + q.Add(new TermQuery(new Term("data", "5")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "4")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "3")), Occur.MUST_NOT); //false, true ); + q.Add(new TermQuery(new Term("data", "2")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "1")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "C")), Occur.MUST_NOT); //false, true ); + + q.MinimumNumberShouldMatch = 3; // 3 of 4 optional + + VerifyNrHits(q, 1); + } + + [Test] + public virtual void TestSomeReqSomeProhibAndSomeOptional() + { + /* two required, two prohibited, some optional */ + BooleanQuery q = new BooleanQuery(); + q.Add(new TermQuery(new Term("all", "all")), Occur.MUST); //true, false); + q.Add(new TermQuery(new Term("data", "6")), Occur.MUST); //true, false); + q.Add(new TermQuery(new Term("data", "5")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "4")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "3")), Occur.MUST_NOT); //false, true ); + q.Add(new TermQuery(new Term("data", "2")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "1")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "C")), Occur.MUST_NOT); //false, true ); + + q.MinimumNumberShouldMatch = 3; // 3 of 4 optional + + VerifyNrHits(q, 1); + } + + [Test] + public virtual void TestMinHigherThenNumOptional() + { + /* two required, two prohibited, some optional */ + BooleanQuery q = new BooleanQuery(); + q.Add(new TermQuery(new Term("all", "all")), Occur.MUST); //true, false); + q.Add(new TermQuery(new Term("data", "6")), Occur.MUST); //true, false); + q.Add(new TermQuery(new Term("data", "5")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "4")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "3")), Occur.MUST_NOT); //false, true ); + q.Add(new TermQuery(new Term("data", "2")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "1")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "C")), Occur.MUST_NOT); //false, true ); + + q.MinimumNumberShouldMatch = 90; // 90 of 4 optional ?!?!?! + + VerifyNrHits(q, 0); + } + + [Test] + public virtual void TestMinEqualToNumOptional() + { + /* two required, two optional */ + BooleanQuery q = new BooleanQuery(); + q.Add(new TermQuery(new Term("all", "all")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "6")), Occur.MUST); //true, false); + q.Add(new TermQuery(new Term("data", "3")), Occur.MUST); //true, false); + q.Add(new TermQuery(new Term("data", "2")), Occur.SHOULD); //false, false); + + q.MinimumNumberShouldMatch = 2; // 2 of 2 optional + + VerifyNrHits(q, 1); + } + + [Test] + public virtual void TestOneOptionalEqualToMin() + { + /* two required, one optional */ + BooleanQuery q = new BooleanQuery(); + q.Add(new TermQuery(new Term("all", "all")), Occur.MUST); //true, false); + q.Add(new TermQuery(new Term("data", "3")), Occur.SHOULD); //false, false); + q.Add(new TermQuery(new Term("data", "2")), Occur.MUST); //true, false); + + q.MinimumNumberShouldMatch = 1; // 1 of 1 optional + + VerifyNrHits(q, 1); + } + + [Test] + public virtual void TestNoOptionalButMin() + { + /* two required, no optional */ + BooleanQuery q = new BooleanQuery(); + q.Add(new TermQuery(new Term("all", "all")), Occur.MUST); //true, false); + q.Add(new TermQuery(new Term("data", "2")), Occur.MUST); //true, false); + + q.MinimumNumberShouldMatch = 1; // 1 of 0 optional + + VerifyNrHits(q, 0); + } + + [Test] + public virtual void TestNoOptionalButMin2() + { + /* one required, no optional */ + BooleanQuery q = new BooleanQuery(); + q.Add(new TermQuery(new Term("all", "all")), Occur.MUST); //true, false); + + q.MinimumNumberShouldMatch = 1; // 1 of 0 optional + + VerifyNrHits(q, 0); + } + + [Test] + public virtual void TestRandomQueries() + { + const string field = "data"; + string[] vals = new string[] { "1", "2", "3", "4", "5", "6", "A", "Z", "B", "Y", "Z", "X", "foo" }; + int maxLev = 4; + + // callback object to set a random setMinimumNumberShouldMatch + TestBoolean2.Callback minNrCB = new CallbackAnonymousInnerClassHelper(this, field, vals); + + // increase number of iterations for more complete testing + int num = AtLeast(20); + for (int i = 0; i < num; i++) + { + int lev = Random().Next(maxLev); + int seed = Random().Next(); + BooleanQuery q1 = TestBoolean2.RandBoolQuery(new Random(seed), true, lev, field, vals, null); + // BooleanQuery q2 = TestBoolean2.randBoolQuery(new Random(seed), lev, field, vals, minNrCB); + BooleanQuery q2 = TestBoolean2.RandBoolQuery(new Random(seed), true, lev, field, vals, null); + // only set minimumNumberShouldMatch on the top level query since setting + // at a lower level can change the score. + minNrCB.PostCreate(q2); + + // Can't use Hits because normalized scores will mess things + // up. The non-sorting version of search() that returns TopDocs + // will not normalize scores. + TopDocs top1 = s.Search(q1, null, 100); + TopDocs top2 = s.Search(q2, null, 100); + if (i < 100) + { + QueryUtils.Check(Random(), q1, s, Similarity); + QueryUtils.Check(Random(), q2, s, Similarity); + } + AssertSubsetOfSameScores(q2, top1, top2); + } + // System.out.println("Total hits:"+tot); + } + + private class CallbackAnonymousInnerClassHelper : TestBoolean2.Callback + { + private readonly TestBooleanMinShouldMatch OuterInstance; + + private string Field; + private string[] Vals; + + public CallbackAnonymousInnerClassHelper(TestBooleanMinShouldMatch outerInstance, string field, string[] vals) + { + this.OuterInstance = outerInstance; + this.Field = field; + this.Vals = vals; + } + + public virtual void PostCreate(BooleanQuery q) + { + BooleanClause[] c = q.GetClauses(); + int opt = 0; + for (int i = 0; i < c.Length; i++) + { + if (c[i].Occur == Occur.SHOULD) + { + opt++; + } + } + q.MinimumNumberShouldMatch = Random().Next(opt + 2); + if (Random().NextBoolean()) + { + // also add a random negation + Term randomTerm = new Term(Field, Vals[Random().Next(Vals.Length)]); + q.Add(new TermQuery(randomTerm), Occur.MUST_NOT); + } + } + } + + private void AssertSubsetOfSameScores(Query q, TopDocs top1, TopDocs top2) + { + // The constrained query + // should be a subset to the unconstrained query. + if (top2.TotalHits > top1.TotalHits) + { + Assert.Fail("Constrained results not a subset:\n" + CheckHits.TopdocsString(top1, 0, 0) + CheckHits.TopdocsString(top2, 0, 0) + "for query:" + q.ToString()); + } + + for (int hit = 0; hit < top2.TotalHits; hit++) + { + int id = top2.ScoreDocs[hit].Doc; + float score = top2.ScoreDocs[hit].Score; + bool found = false; + // find this doc in other hits + for (int other = 0; other < top1.TotalHits; other++) + { + if (top1.ScoreDocs[other].Doc == id) + { + found = true; + float otherScore = top1.ScoreDocs[other].Score; + // check if scores match + Assert.AreEqual(score, otherScore, CheckHits.ExplainToleranceDelta(score, otherScore), "Doc " + id + " scores don't match\n" + CheckHits.TopdocsString(top1, 0, 0) + CheckHits.TopdocsString(top2, 0, 0) + "for query:" + q.ToString()); + } + } + + // check if subset + if (!found) + { + Assert.Fail("Doc " + id + " not found\n" + CheckHits.TopdocsString(top1, 0, 0) + CheckHits.TopdocsString(top2, 0, 0) + "for query:" + q.ToString()); + } + } + } + + [Test] + public virtual void TestRewriteCoord1() + { + Similarity oldSimilarity = s.Similarity; + try + { + s.Similarity = new DefaultSimilarityAnonymousInnerClassHelper(this); + BooleanQuery q1 = new BooleanQuery(); + q1.Add(new TermQuery(new Term("data", "1")), Occur.SHOULD); + BooleanQuery q2 = new BooleanQuery(); + q2.Add(new TermQuery(new Term("data", "1")), Occur.SHOULD); + q2.MinimumNumberShouldMatch = 1; + TopDocs top1 = s.Search(q1, null, 100); + TopDocs top2 = s.Search(q2, null, 100); + AssertSubsetOfSameScores(q2, top1, top2); + } + finally + { + s.Similarity = oldSimilarity; + } + } + + private class DefaultSimilarityAnonymousInnerClassHelper : DefaultSimilarity + { + private readonly TestBooleanMinShouldMatch OuterInstance; + + public DefaultSimilarityAnonymousInnerClassHelper(TestBooleanMinShouldMatch outerInstance) + { + this.OuterInstance = outerInstance; + } + + public override float Coord(int overlap, int maxOverlap) + { + return overlap / ((float)maxOverlap + 1); + } + } + + [Test] + public virtual void TestRewriteNegate() + { + Similarity oldSimilarity = s.Similarity; + try + { + s.Similarity = new DefaultSimilarityAnonymousInnerClassHelper2(this); + BooleanQuery q1 = new BooleanQuery(); + q1.Add(new TermQuery(new Term("data", "1")), Occur.SHOULD); + BooleanQuery q2 = new BooleanQuery(); + q2.Add(new TermQuery(new Term("data", "1")), Occur.SHOULD); + q2.Add(new TermQuery(new Term("data", "Z")), Occur.MUST_NOT); + TopDocs top1 = s.Search(q1, null, 100); + TopDocs top2 = s.Search(q2, null, 100); + AssertSubsetOfSameScores(q2, top1, top2); + } + finally + { + s.Similarity = oldSimilarity; + } + } + + private class DefaultSimilarityAnonymousInnerClassHelper2 : DefaultSimilarity + { + private readonly TestBooleanMinShouldMatch OuterInstance; + + public DefaultSimilarityAnonymousInnerClassHelper2(TestBooleanMinShouldMatch outerInstance) + { + this.OuterInstance = outerInstance; + } + + public override float Coord(int overlap, int maxOverlap) + { + return overlap / ((float)maxOverlap + 1); + } + } + + protected internal virtual void PrintHits(string test, ScoreDoc[] h, IndexSearcher searcher) + { + Console.Error.WriteLine("------- " + test + " -------"); + + NumberFormatInfo f = new NumberFormatInfo(); + f.NumberDecimalSeparator = "."; + + //DecimalFormat f = new DecimalFormat("0.000000", DecimalFormatSymbols.getInstance(Locale.ROOT)); + + for (int i = 0; i < h.Length; i++) + { + Document d = searcher.Doc(h[i].Doc); + decimal score = (decimal)h[i].Score; + Console.Error.WriteLine("#" + i + ": " + score.ToString(f) + " - " + d.Get("id") + " - " + d.Get("data")); + } + } + } +} \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/TestBooleanOr.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/TestBooleanOr.cs b/src/Lucene.Net.Tests/Search/TestBooleanOr.cs new file mode 100644 index 0000000..ba4f218 --- /dev/null +++ b/src/Lucene.Net.Tests/Search/TestBooleanOr.cs @@ -0,0 +1,253 @@ +using Lucene.Net.Documents; + +namespace Lucene.Net.Search +{ + using Lucene.Net.Support; + using NUnit.Framework; + using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext; + using Directory = Lucene.Net.Store.Directory; + using Document = Documents.Document; + using FixedBitSet = Lucene.Net.Util.FixedBitSet; + using IndexReader = Lucene.Net.Index.IndexReader; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter; + using Term = Lucene.Net.Index.Term; + using TestUtil = Lucene.Net.Util.TestUtil; + using TextField = TextField; + + [TestFixture] + public class TestBooleanOr : LuceneTestCase + { + private static string FIELD_T = "T"; + private static string FIELD_C = "C"; + + private TermQuery T1 = new TermQuery(new Term(FIELD_T, "files")); + private TermQuery T2 = new TermQuery(new Term(FIELD_T, "deleting")); + private TermQuery C1 = new TermQuery(new Term(FIELD_C, "production")); + private TermQuery C2 = new TermQuery(new Term(FIELD_C, "optimize")); + + private IndexSearcher Searcher = null; + private Directory Dir; + private IndexReader Reader; + + private int Search(Query q) + { + QueryUtils.Check(Random(), q, Searcher, Similarity); + return Searcher.Search(q, null, 1000).TotalHits; + } + + [Test] + public virtual void TestElements() + { + Assert.AreEqual(1, Search(T1)); + Assert.AreEqual(1, Search(T2)); + Assert.AreEqual(1, Search(C1)); + Assert.AreEqual(1, Search(C2)); + } + + /// <summary> + /// <code>T:files T:deleting C:production C:optimize </code> + /// it works. + /// </summary> + [Test] + public virtual void TestFlat() + { + BooleanQuery q = new BooleanQuery(); + q.Add(new BooleanClause(T1, Occur.SHOULD)); + q.Add(new BooleanClause(T2, Occur.SHOULD)); + q.Add(new BooleanClause(C1, Occur.SHOULD)); + q.Add(new BooleanClause(C2, Occur.SHOULD)); + Assert.AreEqual(1, Search(q)); + } + + /// <summary> + /// <code>(T:files T:deleting) (+C:production +C:optimize)</code> + /// it works. + /// </summary> + [Test] + public virtual void TestParenthesisMust() + { + BooleanQuery q3 = new BooleanQuery(); + q3.Add(new BooleanClause(T1, Occur.SHOULD)); + q3.Add(new BooleanClause(T2, Occur.SHOULD)); + BooleanQuery q4 = new BooleanQuery(); + q4.Add(new BooleanClause(C1, Occur.MUST)); + q4.Add(new BooleanClause(C2, Occur.MUST)); + BooleanQuery q2 = new BooleanQuery(); + q2.Add(q3, Occur.SHOULD); + q2.Add(q4, Occur.SHOULD); + Assert.AreEqual(1, Search(q2)); + } + + /// <summary> + /// <code>(T:files T:deleting) +(C:production C:optimize)</code> + /// not working. results NO HIT. + /// </summary> + [Test] + public virtual void TestParenthesisMust2() + { + BooleanQuery q3 = new BooleanQuery(); + q3.Add(new BooleanClause(T1, Occur.SHOULD)); + q3.Add(new BooleanClause(T2, Occur.SHOULD)); + BooleanQuery q4 = new BooleanQuery(); + q4.Add(new BooleanClause(C1, Occur.SHOULD)); + q4.Add(new BooleanClause(C2, Occur.SHOULD)); + BooleanQuery q2 = new BooleanQuery(); + q2.Add(q3, Occur.SHOULD); + q2.Add(q4, Occur.MUST); + Assert.AreEqual(1, Search(q2)); + } + + /// <summary> + /// <code>(T:files T:deleting) (C:production C:optimize)</code> + /// not working. results NO HIT. + /// </summary> + [Test] + public virtual void TestParenthesisShould() + { + BooleanQuery q3 = new BooleanQuery(); + q3.Add(new BooleanClause(T1, Occur.SHOULD)); + q3.Add(new BooleanClause(T2, Occur.SHOULD)); + BooleanQuery q4 = new BooleanQuery(); + q4.Add(new BooleanClause(C1, Occur.SHOULD)); + q4.Add(new BooleanClause(C2, Occur.SHOULD)); + BooleanQuery q2 = new BooleanQuery(); + q2.Add(q3, Occur.SHOULD); + q2.Add(q4, Occur.SHOULD); + Assert.AreEqual(1, Search(q2)); + } + + [SetUp] + public override void SetUp() + { + base.SetUp(); + + // + Dir = NewDirectory(); + + // + RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, Similarity, TimeZone); + + // + Document d = new Document(); + d.Add(NewField(FIELD_T, "Optimize not deleting all files", TextField.TYPE_STORED)); + d.Add(NewField(FIELD_C, "Deleted When I run an optimize in our production environment.", TextField.TYPE_STORED)); + + // + writer.AddDocument(d); + + Reader = writer.Reader; + // + Searcher = NewSearcher(Reader); + writer.Dispose(); + } + + [TearDown] + public override void TearDown() + { + Reader.Dispose(); + Dir.Dispose(); + base.TearDown(); + } + + [Test] + public virtual void TestBooleanScorerMax() + { + Directory dir = NewDirectory(); + RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); + + int docCount = AtLeast(10000); + + for (int i = 0; i < docCount; i++) + { + Document doc = new Document(); + doc.Add(NewField("field", "a", TextField.TYPE_NOT_STORED)); + riw.AddDocument(doc); + } + + riw.ForceMerge(1); + IndexReader r = riw.Reader; + riw.Dispose(); + + IndexSearcher s = NewSearcher(r); + BooleanQuery bq = new BooleanQuery(); + bq.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); + bq.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); + + Weight w = s.CreateNormalizedWeight(bq); + + Assert.AreEqual(1, s.IndexReader.Leaves.Count); + BulkScorer scorer = w.GetBulkScorer(s.IndexReader.Leaves[0], false, null); + + FixedBitSet hits = new FixedBitSet(docCount); + AtomicInt32 end = new AtomicInt32(); + ICollector c = new CollectorAnonymousInnerClassHelper(this, scorer, hits, end); + + while (end.Get() < docCount) + { + int inc = TestUtil.NextInt(Random(), 1, 1000); + end.AddAndGet(inc); + scorer.Score(c, end.Get()); + } + + Assert.AreEqual(docCount, hits.Cardinality()); + r.Dispose(); + dir.Dispose(); + } + + private class CollectorAnonymousInnerClassHelper : ICollector + { + private readonly TestBooleanOr OuterInstance; + + private BulkScorer scorer; + private FixedBitSet Hits; + private AtomicInt32 End; + + public CollectorAnonymousInnerClassHelper(TestBooleanOr outerInstance, BulkScorer scorer, FixedBitSet hits, AtomicInt32 end) + { + this.OuterInstance = outerInstance; + this.scorer = scorer; + this.Hits = hits; + this.End = end; + } + + public virtual void SetNextReader(AtomicReaderContext context) + { + } + + public virtual void Collect(int doc) + { + Assert.IsTrue(doc < End.Get(), "collected doc=" + doc + " beyond max=" + End); + Hits.Set(doc); + } + + public virtual void SetScorer(Scorer scorer) + { + } + + public virtual bool AcceptsDocsOutOfOrder + { + get { return true; } + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/TestBooleanQuery.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/TestBooleanQuery.cs b/src/Lucene.Net.Tests/Search/TestBooleanQuery.cs new file mode 100644 index 0000000..4e8d377 --- /dev/null +++ b/src/Lucene.Net.Tests/Search/TestBooleanQuery.cs @@ -0,0 +1,412 @@ +using System; +using System.Collections.Generic; +using Lucene.Net.Documents; + +namespace Lucene.Net.Search +{ + using Index; + using NUnit.Framework; + using Support; + using System.Threading.Tasks; + using Util; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Analyzer = Lucene.Net.Analysis.Analyzer; + using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext; + using DefaultSimilarity = Lucene.Net.Search.Similarities.DefaultSimilarity; + using Directory = Lucene.Net.Store.Directory; + using DirectoryReader = Lucene.Net.Index.DirectoryReader; + using Document = Documents.Document; + using Field = Field; + using IndexReader = Lucene.Net.Index.IndexReader; + using IndexWriter = Lucene.Net.Index.IndexWriter; + using IndexWriterConfig = Lucene.Net.Index.IndexWriterConfig; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter; + using SpanQuery = Lucene.Net.Search.Spans.SpanQuery; + using SpanTermQuery = Lucene.Net.Search.Spans.SpanTermQuery; + using Term = Lucene.Net.Index.Term; + using TextField = TextField; + + [TestFixture] + public class TestBooleanQuery : LuceneTestCase + { + [Test] + public virtual void TestEquality() + { + BooleanQuery bq1 = new BooleanQuery(); + bq1.Add(new TermQuery(new Term("field", "value1")), Occur.SHOULD); + bq1.Add(new TermQuery(new Term("field", "value2")), Occur.SHOULD); + BooleanQuery nested1 = new BooleanQuery(); + nested1.Add(new TermQuery(new Term("field", "nestedvalue1")), Occur.SHOULD); + nested1.Add(new TermQuery(new Term("field", "nestedvalue2")), Occur.SHOULD); + bq1.Add(nested1, Occur.SHOULD); + + BooleanQuery bq2 = new BooleanQuery(); + bq2.Add(new TermQuery(new Term("field", "value1")), Occur.SHOULD); + bq2.Add(new TermQuery(new Term("field", "value2")), Occur.SHOULD); + BooleanQuery nested2 = new BooleanQuery(); + nested2.Add(new TermQuery(new Term("field", "nestedvalue1")), Occur.SHOULD); + nested2.Add(new TermQuery(new Term("field", "nestedvalue2")), Occur.SHOULD); + bq2.Add(nested2, Occur.SHOULD); + + Assert.IsTrue(bq1.Equals(bq2)); + //Assert.AreEqual(bq1, bq2); + } + + [Test] + public virtual void TestException() + { + try + { + BooleanQuery.MaxClauseCount = 0; + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.ArgumentException e) +#pragma warning restore 168 + { + // okay + } + } + + // LUCENE-1630 + [Test] + public virtual void TestNullOrSubScorer() + { + Directory dir = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + Document doc = new Document(); + doc.Add(NewTextField("field", "a b c d", Field.Store.NO)); + w.AddDocument(doc); + + IndexReader r = w.Reader; + IndexSearcher s = NewSearcher(r); + // this test relies upon coord being the default implementation, + // otherwise scores are different! + s.Similarity = new DefaultSimilarity(); + + BooleanQuery q = new BooleanQuery(); + q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); + + // LUCENE-2617: make sure that a term not in the index still contributes to the score via coord factor + float score = s.Search(q, 10).MaxScore; + Query subQuery = new TermQuery(new Term("field", "not_in_index")); + subQuery.Boost = 0; + q.Add(subQuery, Occur.SHOULD); + float score2 = s.Search(q, 10).MaxScore; + Assert.AreEqual(score * .5F, score2, 1e-6); + + // LUCENE-2617: make sure that a clause not in the index still contributes to the score via coord factor + BooleanQuery qq = (BooleanQuery)q.Clone(); + PhraseQuery phrase = new PhraseQuery(); + phrase.Add(new Term("field", "not_in_index")); + phrase.Add(new Term("field", "another_not_in_index")); + phrase.Boost = 0; + qq.Add(phrase, Occur.SHOULD); + score2 = s.Search(qq, 10).MaxScore; + Assert.AreEqual(score * (1 / 3F), score2, 1e-6); + + // now test BooleanScorer2 + subQuery = new TermQuery(new Term("field", "b")); + subQuery.Boost = 0; + q.Add(subQuery, Occur.MUST); + score2 = s.Search(q, 10).MaxScore; + Assert.AreEqual(score * (2 / 3F), score2, 1e-6); + + // PhraseQuery w/ no terms added returns a null scorer + PhraseQuery pq = new PhraseQuery(); + q.Add(pq, Occur.SHOULD); + Assert.AreEqual(1, s.Search(q, 10).TotalHits); + + // A required clause which returns null scorer should return null scorer to + // IndexSearcher. + q = new BooleanQuery(); + pq = new PhraseQuery(); + q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); + q.Add(pq, Occur.MUST); + Assert.AreEqual(0, s.Search(q, 10).TotalHits); + + DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); + dmq.Add(new TermQuery(new Term("field", "a"))); + dmq.Add(pq); + Assert.AreEqual(1, s.Search(dmq, 10).TotalHits); + + r.Dispose(); + w.Dispose(); + dir.Dispose(); + } + + [Test] + public virtual void TestDeMorgan() + { + Directory dir1 = NewDirectory(); + RandomIndexWriter iw1 = new RandomIndexWriter(Random(), dir1, Similarity, TimeZone); + Document doc1 = new Document(); + doc1.Add(NewTextField("field", "foo bar", Field.Store.NO)); + iw1.AddDocument(doc1); + IndexReader reader1 = iw1.Reader; + iw1.Dispose(); + + Directory dir2 = NewDirectory(); + RandomIndexWriter iw2 = new RandomIndexWriter(Random(), dir2, Similarity, TimeZone); + Document doc2 = new Document(); + doc2.Add(NewTextField("field", "foo baz", Field.Store.NO)); + iw2.AddDocument(doc2); + IndexReader reader2 = iw2.Reader; + iw2.Dispose(); + + BooleanQuery query = new BooleanQuery(); // Query: +foo -ba* + query.Add(new TermQuery(new Term("field", "foo")), Occur.MUST); + WildcardQuery wildcardQuery = new WildcardQuery(new Term("field", "ba*")); + wildcardQuery.MultiTermRewriteMethod = (MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + query.Add(wildcardQuery, Occur.MUST_NOT); + + MultiReader multireader = new MultiReader(reader1, reader2); + IndexSearcher searcher = NewSearcher(multireader); + Assert.AreEqual(0, searcher.Search(query, 10).TotalHits); + + + Task foo = new Task(TestDeMorgan); + + TaskScheduler es = TaskScheduler.Default; + searcher = new IndexSearcher(multireader, es); + if (VERBOSE) + { + Console.WriteLine("rewritten form: " + searcher.Rewrite(query)); + } + Assert.AreEqual(0, searcher.Search(query, 10).TotalHits); + + multireader.Dispose(); + reader1.Dispose(); + reader2.Dispose(); + dir1.Dispose(); + dir2.Dispose(); + } + + [Test] + public virtual void TestBS2DisjunctionNextVsAdvance() + { + Directory d = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter(Random(), d, Similarity, TimeZone); + int numDocs = AtLeast(300); + for (int docUpto = 0; docUpto < numDocs; docUpto++) + { + string contents = "a"; + if (Random().Next(20) <= 16) + { + contents += " b"; + } + if (Random().Next(20) <= 8) + { + contents += " c"; + } + if (Random().Next(20) <= 4) + { + contents += " d"; + } + if (Random().Next(20) <= 2) + { + contents += " e"; + } + if (Random().Next(20) <= 1) + { + contents += " f"; + } + Document doc = new Document(); + doc.Add(new TextField("field", contents, Field.Store.NO)); + w.AddDocument(doc); + } + w.ForceMerge(1); + IndexReader r = w.Reader; + IndexSearcher s = NewSearcher(r); + w.Dispose(); + + for (int iter = 0; iter < 10 * RANDOM_MULTIPLIER; iter++) + { + if (VERBOSE) + { + Console.WriteLine("iter=" + iter); + } + IList<string> terms = new List<string>(Arrays.AsList("a", "b", "c", "d", "e", "f")); + int numTerms = TestUtil.NextInt(Random(), 1, terms.Count); + while (terms.Count > numTerms) + { + terms.RemoveAt(Random().Next(terms.Count)); + } + + if (VERBOSE) + { + Console.WriteLine(" terms=" + terms); + } + + BooleanQuery q = new BooleanQuery(); + foreach (string term in terms) + { + q.Add(new BooleanClause(new TermQuery(new Term("field", term)), Occur.SHOULD)); + } + + Weight weight = s.CreateNormalizedWeight(q); + + Scorer scorer = weight.GetScorer(s.m_leafContexts[0], null); + + // First pass: just use .NextDoc() to gather all hits + IList<ScoreDoc> hits = new List<ScoreDoc>(); + while (scorer.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) + { + hits.Add(new ScoreDoc(scorer.DocID, scorer.GetScore())); + } + + if (VERBOSE) + { + Console.WriteLine(" " + hits.Count + " hits"); + } + + // Now, randomly next/advance through the list and + // verify exact match: + for (int iter2 = 0; iter2 < 10; iter2++) + { + weight = s.CreateNormalizedWeight(q); + scorer = weight.GetScorer(s.m_leafContexts[0], null); + + if (VERBOSE) + { + Console.WriteLine(" iter2=" + iter2); + } + + int upto = -1; + while (upto < hits.Count) + { + int nextUpto; + int nextDoc; + int left = hits.Count - upto; + if (left == 1 || Random().nextBoolean()) + { + // next + nextUpto = 1 + upto; + nextDoc = scorer.NextDoc(); + } + else + { + // advance + int inc = TestUtil.NextInt(Random(), 1, left - 1); + nextUpto = inc + upto; + nextDoc = scorer.Advance(hits[nextUpto].Doc); + } + + if (nextUpto == hits.Count) + { + Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, nextDoc); + } + else + { + ScoreDoc hit = hits[nextUpto]; + Assert.AreEqual(hit.Doc, nextDoc); + // Test for precise float equality: + Assert.IsTrue(hit.Score == scorer.GetScore(), "doc " + hit.Doc + " has wrong score: expected=" + hit.Score + " actual=" + scorer.GetScore()); + } + upto = nextUpto; + } + } + } + + r.Dispose(); + d.Dispose(); + } + + // LUCENE-4477 / LUCENE-4401: + [Test] + public virtual void TestBooleanSpanQuery() + { + bool failed = false; + int hits = 0; + Directory directory = NewDirectory(); + Analyzer indexerAnalyzer = new MockAnalyzer(Random()); + + IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer); + IndexWriter writer = new IndexWriter(directory, config); + string FIELD = "content"; + Document d = new Document(); + d.Add(new TextField(FIELD, "clockwork orange", Field.Store.YES)); + writer.AddDocument(d); + writer.Dispose(); + + IndexReader indexReader = DirectoryReader.Open(directory); + IndexSearcher searcher = NewSearcher(indexReader); + + BooleanQuery query = new BooleanQuery(); + SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork")); + SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork")); + query.Add(sq1, Occur.SHOULD); + query.Add(sq2, Occur.SHOULD); + TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true); + searcher.Search(query, collector); + hits = collector.GetTopDocs().ScoreDocs.Length; + foreach (ScoreDoc scoreDoc in collector.GetTopDocs().ScoreDocs) + { + Console.WriteLine(scoreDoc.Doc); + } + indexReader.Dispose(); + Assert.AreEqual(failed, false, "Bug in boolean query composed of span queries"); + Assert.AreEqual(hits, 1, "Bug in boolean query composed of span queries"); + directory.Dispose(); + } + + // LUCENE-5487 + [Test] + public virtual void TestInOrderWithMinShouldMatch() + { + Directory dir = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + Document doc = new Document(); + doc.Add(NewTextField("field", "some text here", Field.Store.NO)); + w.AddDocument(doc); + IndexReader r = w.Reader; + w.Dispose(); + IndexSearcher s = new IndexSearcherAnonymousInnerClassHelper(this, r); + BooleanQuery bq = new BooleanQuery(); + bq.Add(new TermQuery(new Term("field", "some")), Occur.SHOULD); + bq.Add(new TermQuery(new Term("field", "text")), Occur.SHOULD); + bq.Add(new TermQuery(new Term("field", "here")), Occur.SHOULD); + bq.MinimumNumberShouldMatch = 2; + s.Search(bq, 10); + r.Dispose(); + dir.Dispose(); + } + + private class IndexSearcherAnonymousInnerClassHelper : IndexSearcher + { + private readonly TestBooleanQuery OuterInstance; + + public IndexSearcherAnonymousInnerClassHelper(TestBooleanQuery outerInstance, IndexReader r) + : base(r) + { + this.OuterInstance = outerInstance; + } + + protected override void Search(IList<AtomicReaderContext> leaves, Weight weight, ICollector collector) + { + Assert.AreEqual(-1, collector.GetType().Name.IndexOf("OutOfOrder")); + base.Search(leaves, weight, collector); + } + } + } +} + \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/TestBooleanQueryVisitSubscorers.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/TestBooleanQueryVisitSubscorers.cs b/src/Lucene.Net.Tests/Search/TestBooleanQueryVisitSubscorers.cs new file mode 100644 index 0000000..6a83426 --- /dev/null +++ b/src/Lucene.Net.Tests/Search/TestBooleanQueryVisitSubscorers.cs @@ -0,0 +1,206 @@ +using System.Collections.Generic; +using Lucene.Net.Documents; + +namespace Lucene.Net.Search +{ + using NUnit.Framework; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Analyzer = Lucene.Net.Analysis.Analyzer; + using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext; + using ChildScorer = Lucene.Net.Search.Scorer.ChildScorer; + using Directory = Lucene.Net.Store.Directory; + using Document = Documents.Document; + using IndexReader = Lucene.Net.Index.IndexReader; + using IndexWriterConfig = Lucene.Net.Index.IndexWriterConfig; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter; + using Store = Field.Store; + using Term = Lucene.Net.Index.Term; + using TextField = TextField; + + // TODO: refactor to a base class, that collects freqs from the scorer tree + // and test all queries with it + [TestFixture] + public class TestBooleanQueryVisitSubscorers : LuceneTestCase + { + internal Analyzer Analyzer; + internal IndexReader Reader; + internal IndexSearcher Searcher; + internal Directory Dir; + + internal const string F1 = "title"; + internal const string F2 = "body"; + + [SetUp] + public override void SetUp() + { + base.SetUp(); + Analyzer = new MockAnalyzer(Random()); + Dir = NewDirectory(); + IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, Analyzer); + config.SetMergePolicy(NewLogMergePolicy()); // we will use docids to validate + RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, config); + writer.AddDocument(Doc("lucene", "lucene is a very popular search engine library")); + writer.AddDocument(Doc("solr", "solr is a very popular search server and is using lucene")); + writer.AddDocument(Doc("nutch", "nutch is an internet search engine with web crawler and is using lucene and hadoop")); + Reader = writer.Reader; + writer.Dispose(); + Searcher = NewSearcher(Reader); + } + + [TearDown] + public override void TearDown() + { + Reader.Dispose(); + Dir.Dispose(); + base.TearDown(); + } + + [Test] + public virtual void TestDisjunctions() + { + BooleanQuery bq = new BooleanQuery(); + bq.Add(new TermQuery(new Term(F1, "lucene")), Occur.SHOULD); + bq.Add(new TermQuery(new Term(F2, "lucene")), Occur.SHOULD); + bq.Add(new TermQuery(new Term(F2, "search")), Occur.SHOULD); + IDictionary<int, int> tfs = GetDocCounts(Searcher, bq); + Assert.AreEqual(3, tfs.Count); // 3 documents + Assert.AreEqual(3, (int)tfs[0]); // f1:lucene + f2:lucene + f2:search + Assert.AreEqual(2, (int)tfs[1]); // f2:search + f2:lucene + Assert.AreEqual(2, (int)tfs[2]); // f2:search + f2:lucene + } + + [Test] + public virtual void TestNestedDisjunctions() + { + BooleanQuery bq = new BooleanQuery(); + bq.Add(new TermQuery(new Term(F1, "lucene")), Occur.SHOULD); + BooleanQuery bq2 = new BooleanQuery(); + bq2.Add(new TermQuery(new Term(F2, "lucene")), Occur.SHOULD); + bq2.Add(new TermQuery(new Term(F2, "search")), Occur.SHOULD); + bq.Add(bq2, Occur.SHOULD); + IDictionary<int, int> tfs = GetDocCounts(Searcher, bq); + Assert.AreEqual(3, tfs.Count); // 3 documents + Assert.AreEqual(3, (int)tfs[0]); // f1:lucene + f2:lucene + f2:search + Assert.AreEqual(2, (int)tfs[1]); // f2:search + f2:lucene + Assert.AreEqual(2, (int)tfs[2]); // f2:search + f2:lucene + } + + [Test] + public virtual void TestConjunctions() + { + BooleanQuery bq = new BooleanQuery(); + bq.Add(new TermQuery(new Term(F2, "lucene")), Occur.MUST); + bq.Add(new TermQuery(new Term(F2, "is")), Occur.MUST); + IDictionary<int, int> tfs = GetDocCounts(Searcher, bq); + Assert.AreEqual(3, tfs.Count); // 3 documents + Assert.AreEqual(2, (int)tfs[0]); // f2:lucene + f2:is + Assert.AreEqual(3, (int)tfs[1]); // f2:is + f2:is + f2:lucene + Assert.AreEqual(3, (int)tfs[2]); // f2:is + f2:is + f2:lucene + } + + internal static Document Doc(string v1, string v2) + { + Document doc = new Document(); + doc.Add(new TextField(F1, v1, Store.YES)); + doc.Add(new TextField(F2, v2, Store.YES)); + return doc; + } + + internal static IDictionary<int, int> GetDocCounts(IndexSearcher searcher, Query query) + { + MyCollector collector = new MyCollector(); + searcher.Search(query, collector); + return collector.DocCounts; + } + + internal class MyCollector : ICollector + { + internal TopDocsCollector<ScoreDoc> Collector; + internal int DocBase; + + public readonly IDictionary<int, int> DocCounts = new Dictionary<int, int>(); + internal readonly HashSet<Scorer> TqsSet = new HashSet<Scorer>(); + + internal MyCollector() + { + Collector = TopScoreDocCollector.Create(10, true); + } + + public virtual bool AcceptsDocsOutOfOrder + { + get { return false; } + } + + public virtual void Collect(int doc) + { + int freq = 0; + foreach (Scorer scorer in TqsSet) + { + if (doc == scorer.DocID) + { + freq += scorer.Freq; + } + } + DocCounts[doc + DocBase] = freq; + Collector.Collect(doc); + } + + public virtual void SetNextReader(AtomicReaderContext context) + { + this.DocBase = context.DocBase; + Collector.SetNextReader(context); + } + + public virtual void SetScorer(Scorer scorer) + { + Collector.SetScorer(scorer); + TqsSet.Clear(); + FillLeaves(scorer, TqsSet); + } + + internal virtual void FillLeaves(Scorer scorer, ISet<Scorer> set) + { + if (scorer.Weight.Query is TermQuery) + { + set.Add(scorer); + } + else + { + foreach (ChildScorer child in scorer.GetChildren()) + { + FillLeaves(child.Child, set); + } + } + } + + public virtual TopDocs GetTopDocs() + { + return Collector.GetTopDocs(); + } + + public virtual int Freq(int doc) + { + return DocCounts[doc]; + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/TestBooleanScorer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/TestBooleanScorer.cs b/src/Lucene.Net.Tests/Search/TestBooleanScorer.cs new file mode 100644 index 0000000..8131ae3 --- /dev/null +++ b/src/Lucene.Net.Tests/Search/TestBooleanScorer.cs @@ -0,0 +1,330 @@ +using System; +using System.Collections.Generic; +using Lucene.Net.Documents; + +namespace Lucene.Net.Search +{ + using Lucene.Net.Support; + using NUnit.Framework; + using System.Diagnostics; + using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext; + using IBits = Lucene.Net.Util.IBits; + using BooleanWeight = Lucene.Net.Search.BooleanQuery.BooleanWeight; + using Directory = Lucene.Net.Store.Directory; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Document = Documents.Document; + using Field = Field; + using IndexReader = Lucene.Net.Index.IndexReader; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter; + using Term = Lucene.Net.Index.Term; + using TextField = TextField; + + [TestFixture] + public class TestBooleanScorer : LuceneTestCase + { + private const string FIELD = "category"; + + [Test] + public virtual void TestMethod() + { + Directory directory = NewDirectory(); + + string[] values = new string[] { "1", "2", "3", "4" }; + + RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); + for (int i = 0; i < values.Length; i++) + { + Document doc = new Document(); + doc.Add(NewStringField(FIELD, values[i], Field.Store.YES)); + writer.AddDocument(doc); + } + IndexReader ir = writer.Reader; + writer.Dispose(); + + BooleanQuery booleanQuery1 = new BooleanQuery(); + booleanQuery1.Add(new TermQuery(new Term(FIELD, "1")), Occur.SHOULD); + booleanQuery1.Add(new TermQuery(new Term(FIELD, "2")), Occur.SHOULD); + + BooleanQuery query = new BooleanQuery(); + query.Add(booleanQuery1, Occur.MUST); + query.Add(new TermQuery(new Term(FIELD, "9")), Occur.MUST_NOT); + + IndexSearcher indexSearcher = NewSearcher(ir); + ScoreDoc[] hits = indexSearcher.Search(query, null, 1000).ScoreDocs; + Assert.AreEqual(2, hits.Length, "Number of matched documents"); + ir.Dispose(); + directory.Dispose(); + } + + [Test] + public virtual void TestEmptyBucketWithMoreDocs() + { + // this test checks the logic of nextDoc() when all sub scorers have docs + // beyond the first bucket (for example). Currently, the code relies on the + // 'more' variable to work properly, and this test ensures that if the logic + // changes, we have a test to back it up. + + Directory directory = NewDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); + writer.Commit(); + IndexReader ir = writer.Reader; + writer.Dispose(); + IndexSearcher searcher = NewSearcher(ir); + BooleanWeight weight = (BooleanWeight)(new BooleanQuery()).CreateWeight(searcher); + + BulkScorer[] scorers = new BulkScorer[] { + new BulkScorerAnonymousInnerClassHelper() + }; + + BooleanScorer bs = new BooleanScorer(weight, false, 1, Arrays.AsList(scorers), new List<BulkScorer>(), scorers.Length); + + IList<int> hits = new List<int>(); + bs.Score(new CollectorAnonymousInnerClassHelper(this, hits)); + + Assert.AreEqual(1, hits.Count, "should have only 1 hit"); + Assert.AreEqual(3000, (int)hits[0], "hit should have been docID=3000"); + ir.Dispose(); + directory.Dispose(); + } + + private class BulkScorerAnonymousInnerClassHelper : BulkScorer + { + private int doc = -1; + + public override bool Score(ICollector c, int maxDoc) + { + Debug.Assert(doc == -1); + doc = 3000; + FakeScorer fs = new FakeScorer(); + fs.doc = doc; + fs.score = 1.0f; + c.SetScorer(fs); + c.Collect(3000); + return false; + } + } + + private class CollectorAnonymousInnerClassHelper : ICollector + { + private readonly TestBooleanScorer OuterInstance; + + private IList<int> Hits; + + public CollectorAnonymousInnerClassHelper(TestBooleanScorer outerInstance, IList<int> hits) + { + this.OuterInstance = outerInstance; + this.Hits = hits; + } + + internal int docBase; + + public virtual void SetScorer(Scorer scorer) + { + } + + public virtual void Collect(int doc) + { + Hits.Add(docBase + doc); + } + + public virtual void SetNextReader(AtomicReaderContext context) + { + docBase = context.DocBase; + } + + public virtual bool AcceptsDocsOutOfOrder + { + get { return true; } + } + } + + [Test] + public virtual void TestMoreThan32ProhibitedClauses() + { + Directory d = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter(Random(), d, Similarity, TimeZone); + Document doc = new Document(); + doc.Add(new TextField("field", "0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33", Field.Store.NO)); + w.AddDocument(doc); + doc = new Document(); + doc.Add(new TextField("field", "33", Field.Store.NO)); + w.AddDocument(doc); + IndexReader r = w.Reader; + w.Dispose(); + // we don't wrap with AssertingIndexSearcher in order to have the original scorer in setScorer. + IndexSearcher s = NewSearcher(r, true, false); + + BooleanQuery q = new BooleanQuery(); + for (int term = 0; term < 33; term++) + { + q.Add(new BooleanClause(new TermQuery(new Term("field", "" + term)), Occur.MUST_NOT)); + } + q.Add(new BooleanClause(new TermQuery(new Term("field", "33")), Occur.SHOULD)); + + int[] count = new int[1]; + s.Search(q, new CollectorAnonymousInnerClassHelper2(this, doc, count)); + + Assert.AreEqual(1, count[0]); + + r.Dispose(); + d.Dispose(); + } + + private class CollectorAnonymousInnerClassHelper2 : ICollector + { + private readonly TestBooleanScorer OuterInstance; + + private Document Doc; + private int[] Count; + + public CollectorAnonymousInnerClassHelper2(TestBooleanScorer outerInstance, Document doc, int[] count) + { + this.OuterInstance = outerInstance; + this.Doc = doc; + this.Count = count; + } + + public virtual void SetScorer(Scorer scorer) + { + // Make sure we got BooleanScorer: + Type clazz = scorer.GetType(); + Assert.AreEqual(typeof(FakeScorer).Name, clazz.Name, "Scorer is implemented by wrong class"); + } + + public virtual void Collect(int doc) + { + Count[0]++; + } + + public virtual void SetNextReader(AtomicReaderContext context) + { + } + + public virtual bool AcceptsDocsOutOfOrder + { + get { return true; } + } + } + + /// <summary> + /// Throws UOE if Weight.scorer is called </summary> + private class CrazyMustUseBulkScorerQuery : Query + { + public override string ToString(string field) + { + return "MustUseBulkScorerQuery"; + } + + public override Weight CreateWeight(IndexSearcher searcher) + { + return new WeightAnonymousInnerClassHelper(this); + } + + private class WeightAnonymousInnerClassHelper : Weight + { + private readonly CrazyMustUseBulkScorerQuery OuterInstance; + + public WeightAnonymousInnerClassHelper(CrazyMustUseBulkScorerQuery outerInstance) + { + this.OuterInstance = outerInstance; + } + + public override Explanation Explain(AtomicReaderContext context, int doc) + { + throw new System.NotSupportedException(); + } + + public override Query Query + { + get + { + return OuterInstance; + } + } + + public override float GetValueForNormalization() + { + return 1.0f; + } + + public override void Normalize(float norm, float topLevelBoost) + { + } + + public override Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs) + { + throw new System.NotSupportedException(); + } + + public override BulkScorer GetBulkScorer(AtomicReaderContext context, bool scoreDocsInOrder, IBits acceptDocs) + { + return new BulkScorerAnonymousInnerClassHelper(this); + } + + private class BulkScorerAnonymousInnerClassHelper : BulkScorer + { + private readonly WeightAnonymousInnerClassHelper OuterInstance; + + public BulkScorerAnonymousInnerClassHelper(WeightAnonymousInnerClassHelper outerInstance) + { + this.OuterInstance = outerInstance; + } + + public override bool Score(ICollector collector, int max) + { + collector.SetScorer(new FakeScorer()); + collector.Collect(0); + return false; + } + } + } + } + + /// <summary> + /// Make sure BooleanScorer can embed another + /// BooleanScorer. + /// </summary> + [Test] + public virtual void TestEmbeddedBooleanScorer() + { + Directory dir = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + Document doc = new Document(); + doc.Add(NewTextField("field", "doctors are people who prescribe medicines of which they know little, to cure diseases of which they know less, in human beings of whom they know nothing", Field.Store.NO)); + w.AddDocument(doc); + IndexReader r = w.Reader; + w.Dispose(); + + IndexSearcher s = NewSearcher(r); + BooleanQuery q1 = new BooleanQuery(); + q1.Add(new TermQuery(new Term("field", "little")), Occur.SHOULD); + q1.Add(new TermQuery(new Term("field", "diseases")), Occur.SHOULD); + + BooleanQuery q2 = new BooleanQuery(); + q2.Add(q1, Occur.SHOULD); + q2.Add(new CrazyMustUseBulkScorerQuery(), Occur.SHOULD); + + Assert.AreEqual(1, s.Search(q2, 10).TotalHits); + r.Dispose(); + dir.Dispose(); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/TestCachingCollector.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/TestCachingCollector.cs b/src/Lucene.Net.Tests/Search/TestCachingCollector.cs new file mode 100644 index 0000000..a0ed92a --- /dev/null +++ b/src/Lucene.Net.Tests/Search/TestCachingCollector.cs @@ -0,0 +1,252 @@ +namespace Lucene.Net.Search +{ + using NUnit.Framework; + using System; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + [TestFixture] + public class TestCachingCollector : LuceneTestCase + { + private const double ONE_BYTE = 1.0 / (1024 * 1024); // 1 byte out of MB + + private class MockScorer : Scorer + { + internal MockScorer() + : base((Weight)null) + { + } + + public override float GetScore() + { + return 0; + } + + public override int Freq + { + get { return 0; } + } + + public override int DocID + { + get { return 0; } + } + + public override int NextDoc() + { + return 0; + } + + public override int Advance(int target) + { + return 0; + } + + public override long GetCost() + { + return 1; + } + } + + private class NoOpCollector : ICollector + { + internal readonly bool AcceptDocsOutOfOrder; + + public NoOpCollector(bool acceptDocsOutOfOrder) + { + this.AcceptDocsOutOfOrder = acceptDocsOutOfOrder; + } + + public virtual void SetScorer(Scorer scorer) + { + } + + public virtual void Collect(int doc) + { + } + + public virtual void SetNextReader(AtomicReaderContext context) + { + } + + public virtual bool AcceptsDocsOutOfOrder + { + get { return AcceptDocsOutOfOrder; } + } + } + + [Test] + public virtual void TestBasic() + { + foreach (bool cacheScores in new bool[] { false, true }) + { + CachingCollector cc = CachingCollector.Create(new NoOpCollector(false), cacheScores, 1.0); + cc.SetScorer(new MockScorer()); + + // collect 1000 docs + for (int i = 0; i < 1000; i++) + { + cc.Collect(i); + } + + // now replay them + cc.Replay(new CollectorAnonymousInnerClassHelper(this)); + } + } + + private class CollectorAnonymousInnerClassHelper : ICollector + { + private readonly TestCachingCollector OuterInstance; + + public CollectorAnonymousInnerClassHelper(TestCachingCollector outerInstance) + { + this.OuterInstance = outerInstance; + prevDocID = -1; + } + + internal int prevDocID; + + public virtual void SetScorer(Scorer scorer) + { + } + + public virtual void SetNextReader(AtomicReaderContext context) + { + } + + public virtual void Collect(int doc) + { + Assert.AreEqual(prevDocID + 1, doc); + prevDocID = doc; + } + + public virtual bool AcceptsDocsOutOfOrder + { + get { return false; } + } + } + + [Test] + public virtual void TestIllegalStateOnReplay() + { + CachingCollector cc = CachingCollector.Create(new NoOpCollector(false), true, 50 * ONE_BYTE); + cc.SetScorer(new MockScorer()); + + // collect 130 docs, this should be enough for triggering cache abort. + for (int i = 0; i < 130; i++) + { + cc.Collect(i); + } + + Assert.IsFalse(cc.IsCached, "CachingCollector should not be cached due to low memory limit"); + + try + { + cc.Replay(new NoOpCollector(false)); + Assert.Fail("replay should fail if CachingCollector is not cached"); + } +#pragma warning disable 168 + catch (InvalidOperationException e) +#pragma warning restore 168 + { + // expected + } + } + + [Test] + public virtual void TestIllegalCollectorOnReplay() + { + // tests that the Collector passed to replay() has an out-of-order mode that + // is valid with the Collector passed to the ctor + + // 'src' Collector does not support out-of-order + CachingCollector cc = CachingCollector.Create(new NoOpCollector(false), true, 50 * ONE_BYTE); + cc.SetScorer(new MockScorer()); + for (int i = 0; i < 10; i++) + { + cc.Collect(i); + } + cc.Replay(new NoOpCollector(true)); // this call should not fail + cc.Replay(new NoOpCollector(false)); // this call should not fail + + // 'src' Collector supports out-of-order + cc = CachingCollector.Create(new NoOpCollector(true), true, 50 * ONE_BYTE); + cc.SetScorer(new MockScorer()); + for (int i = 0; i < 10; i++) + { + cc.Collect(i); + } + cc.Replay(new NoOpCollector(true)); // this call should not fail + try + { + cc.Replay(new NoOpCollector(false)); // this call should fail + Assert.Fail("should have failed if an in-order Collector was given to replay(), " + "while CachingCollector was initialized with out-of-order collection"); + } +#pragma warning disable 168 + catch (System.ArgumentException e) +#pragma warning restore 168 + { + // ok + } + } + + [Test] + public virtual void TestCachedArraysAllocation() + { + // tests the cached arrays allocation -- if the 'nextLength' was too high, + // caching would terminate even if a smaller length would suffice. + + // set RAM limit enough for 150 docs + random(10000) + int numDocs = Random().Next(10000) + 150; + foreach (bool cacheScores in new bool[] { false, true }) + { + int bytesPerDoc = cacheScores ? 8 : 4; + CachingCollector cc = CachingCollector.Create(new NoOpCollector(false), cacheScores, bytesPerDoc * ONE_BYTE * numDocs); + cc.SetScorer(new MockScorer()); + for (int i = 0; i < numDocs; i++) + { + cc.Collect(i); + } + Assert.IsTrue(cc.IsCached); + + // The 151's document should terminate caching + cc.Collect(numDocs); + Assert.IsFalse(cc.IsCached); + } + } + + [Test] + public virtual void TestNoWrappedCollector() + { + foreach (bool cacheScores in new bool[] { false, true }) + { + // create w/ null wrapped collector, and test that the methods work + CachingCollector cc = CachingCollector.Create(true, cacheScores, 50 * ONE_BYTE); + cc.SetNextReader(null); + cc.SetScorer(new MockScorer()); + cc.Collect(0); + + Assert.IsTrue(cc.IsCached); + cc.Replay(new NoOpCollector(true)); + } + } + } +} \ No newline at end of file
