http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/JustCompileSearch.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/JustCompileSearch.cs b/src/Lucene.Net.Tests/Search/JustCompileSearch.cs new file mode 100644 index 0000000..272d338 --- /dev/null +++ b/src/Lucene.Net.Tests/Search/JustCompileSearch.cs @@ -0,0 +1,358 @@ +using System; + +namespace Lucene.Net.Search +{ + using Lucene.Net.Util; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext; + using IBits = Lucene.Net.Util.IBits; + using BytesRef = Lucene.Net.Util.BytesRef; + using FieldInvertState = Lucene.Net.Index.FieldInvertState; + using Similarity = Lucene.Net.Search.Similarities.Similarity; + using Terms = Lucene.Net.Index.Terms; + using TermsEnum = Lucene.Net.Index.TermsEnum; + + /// <summary> + /// Holds all implementations of classes in the o.a.l.search package as a + /// back-compatibility test. It does not run any tests per-se, however if + /// someone adds a method to an interface or abstract method to an abstract + /// class, one of the implementations here will fail to compile and so we know + /// back-compat policy was violated. + /// </summary> + internal sealed class JustCompileSearch + { + private const string UNSUPPORTED_MSG = "unsupported: used for back-compat testing only !"; + + internal sealed class JustCompileCollector : ICollector + { + public void Collect(int doc) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public void SetNextReader(AtomicReaderContext context) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public void SetScorer(Scorer scorer) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public bool AcceptsDocsOutOfOrder + { + get { throw new System.NotSupportedException(UNSUPPORTED_MSG); } + } + } + + internal sealed class JustCompileDocIdSet : DocIdSet + { + public override DocIdSetIterator GetIterator() + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + } + + internal sealed class JustCompileDocIdSetIterator : DocIdSetIterator + { + public override int DocID + { + get { throw new System.NotSupportedException(UNSUPPORTED_MSG); } + } + + public override int NextDoc() + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override int Advance(int target) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override long GetCost() + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + } + + internal sealed class JustCompileExtendedFieldCacheLongParser : FieldCache.IInt64Parser + { + /// <summary> + /// NOTE: This was parseLong() in Lucene + /// </summary> + public long ParseInt64(BytesRef @string) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public TermsEnum TermsEnum(Terms terms) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + } + + internal sealed class JustCompileExtendedFieldCacheDoubleParser : FieldCache.IDoubleParser + { + public double ParseDouble(BytesRef term) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public TermsEnum TermsEnum(Terms terms) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + } + + internal sealed class JustCompileFieldComparer : FieldComparer<object> + { + public override int Compare(int slot1, int slot2) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override int CompareBottom(int doc) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override void Copy(int slot, int doc) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override void SetBottom(int slot) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override void SetTopValue(object value) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override FieldComparer SetNextReader(AtomicReaderContext context) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + // LUCENENET NOTE: This was value(int) in Lucene. + public override IComparable this[int slot] + { + get { throw new System.NotSupportedException(UNSUPPORTED_MSG); } + } + + public override int CompareTop(int doc) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + } + + internal sealed class JustCompileFieldComparerSource : FieldComparerSource + { + public override FieldComparer NewComparer(string fieldname, int numHits, int sortPos, bool reversed) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + } + + internal sealed class JustCompileFilter : Filter + { + // Filter is just an abstract class with no abstract methods. However it is + // still added here in case someone will add abstract methods in the future. + + public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs) + { + return null; + } + } + + internal sealed class JustCompileFilteredDocIdSet : FilteredDocIdSet + { + public JustCompileFilteredDocIdSet(DocIdSet innerSet) + : base(innerSet) + { + } + + protected override bool Match(int docid) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + } + + internal sealed class JustCompileFilteredDocIdSetIterator : FilteredDocIdSetIterator + { + public JustCompileFilteredDocIdSetIterator(DocIdSetIterator innerIter) + : base(innerIter) + { + } + + protected override bool Match(int doc) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override long GetCost() + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + } + + internal sealed class JustCompileQuery : Query + { + public override string ToString(string field) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + } + + internal sealed class JustCompileScorer : Scorer + { + internal JustCompileScorer(Weight weight) + : base(weight) + { + } + + public override float GetScore() + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override int Freq + { + get { throw new System.NotSupportedException(UNSUPPORTED_MSG); } + } + + public override int DocID + { + get { throw new System.NotSupportedException(UNSUPPORTED_MSG); } + } + + public override int NextDoc() + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override int Advance(int target) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override long GetCost() + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + } + + internal sealed class JustCompileSimilarity : Similarity + { + public override SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override SimScorer GetSimScorer(SimWeight stats, AtomicReaderContext context) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override long ComputeNorm(FieldInvertState state) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + } + + internal sealed class JustCompileTopDocsCollector : TopDocsCollector<ScoreDoc> + { + internal JustCompileTopDocsCollector(PriorityQueue<ScoreDoc> pq) + : base(pq) + { + } + + public override void Collect(int doc) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override void SetNextReader(AtomicReaderContext context) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override void SetScorer(Scorer scorer) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override bool AcceptsDocsOutOfOrder + { + get { throw new System.NotSupportedException(UNSUPPORTED_MSG); } + } + + public override TopDocs GetTopDocs() + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override TopDocs GetTopDocs(int start) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override TopDocs GetTopDocs(int start, int end) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + } + + internal sealed class JustCompileWeight : Weight + { + public override Explanation Explain(AtomicReaderContext context, int doc) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override Query Query + { + get + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + } + + public override void Normalize(float norm, float topLevelBoost) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override float GetValueForNormalization() + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + + public override Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs) + { + throw new System.NotSupportedException(UNSUPPORTED_MSG); + } + } + } +} \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/MockFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/MockFilter.cs b/src/Lucene.Net.Tests/Search/MockFilter.cs new file mode 100644 index 0000000..19daade --- /dev/null +++ b/src/Lucene.Net.Tests/Search/MockFilter.cs @@ -0,0 +1,44 @@ +namespace Lucene.Net.Search +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext; + using IBits = Lucene.Net.Util.IBits; + using FixedBitSet = Lucene.Net.Util.FixedBitSet; + + public class MockFilter : Filter + { + private bool WasCalled_Renamed; + + public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs) + { + WasCalled_Renamed = true; + return new FixedBitSet(context.Reader.MaxDoc); + } + + public virtual void Clear() + { + WasCalled_Renamed = false; + } + + public virtual bool WasCalled() + { + return WasCalled_Renamed; + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/MultiCollectorTest.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/MultiCollectorTest.cs b/src/Lucene.Net.Tests/Search/MultiCollectorTest.cs new file mode 100644 index 0000000..5ed25ad --- /dev/null +++ b/src/Lucene.Net.Tests/Search/MultiCollectorTest.cs @@ -0,0 +1,118 @@ +namespace Lucene.Net.Search +{ + using NUnit.Framework; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + [TestFixture] + public class MultiCollectorTest : LuceneTestCase + { + private class DummyCollector : ICollector + { + internal bool AcceptsDocsOutOfOrderCalled = false; + internal bool CollectCalled = false; + internal bool SetNextReaderCalled = false; + internal bool SetScorerCalled = false; + + public virtual bool AcceptsDocsOutOfOrder + { + get + { + AcceptsDocsOutOfOrderCalled = true; + return true; + } + } + + public virtual void Collect(int doc) + { + CollectCalled = true; + } + + public virtual void SetNextReader(AtomicReaderContext context) + { + SetNextReaderCalled = true; + } + + public virtual void SetScorer(Scorer scorer) + { + SetScorerCalled = true; + } + } + + [Test] + public virtual void TestNullCollectors() + { + // Tests that the collector rejects all null collectors. + try + { + MultiCollector.Wrap(null, null); + Assert.Fail("only null collectors should not be supported"); + } +#pragma warning disable 168 + catch (System.ArgumentException e) +#pragma warning restore 168 + { + // expected + } + + // Tests that the collector handles some null collectors well. If it + // doesn't, an NPE would be thrown. + ICollector c = MultiCollector.Wrap(new DummyCollector(), null, new DummyCollector()); + Assert.IsTrue(c is MultiCollector); + Assert.IsTrue(c.AcceptsDocsOutOfOrder); + c.Collect(1); + c.SetNextReader(null); + c.SetScorer(null); + } + + [Test] + public virtual void TestSingleCollector() + { + // Tests that if a single Collector is input, it is returned (and not MultiCollector). + DummyCollector dc = new DummyCollector(); + Assert.AreSame(dc, MultiCollector.Wrap(dc)); + Assert.AreSame(dc, MultiCollector.Wrap(dc, null)); + } + + [Test] + public virtual void TestCollector() + { + // Tests that the collector delegates calls to input collectors properly. + + // Tests that the collector handles some null collectors well. If it + // doesn't, an NPE would be thrown. + DummyCollector[] dcs = new DummyCollector[] { new DummyCollector(), new DummyCollector() }; + ICollector c = MultiCollector.Wrap(dcs); + Assert.IsTrue(c.AcceptsDocsOutOfOrder); + c.Collect(1); + c.SetNextReader(null); + c.SetScorer(null); + + foreach (DummyCollector dc in dcs) + { + Assert.IsTrue(dc.AcceptsDocsOutOfOrderCalled); + Assert.IsTrue(dc.CollectCalled); + Assert.IsTrue(dc.SetNextReaderCalled); + Assert.IsTrue(dc.SetScorerCalled); + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/Payloads/PayloadHelper.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/Payloads/PayloadHelper.cs b/src/Lucene.Net.Tests/Search/Payloads/PayloadHelper.cs new file mode 100644 index 0000000..d0bfdfe --- /dev/null +++ b/src/Lucene.Net.Tests/Search/Payloads/PayloadHelper.cs @@ -0,0 +1,158 @@ +using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Documents; +using NUnit.Framework; +using System; + +namespace Lucene.Net.Search.Payloads +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Lucene.Net.Analysis; + using System.IO; + using BytesRef = Lucene.Net.Util.BytesRef; + using Directory = Lucene.Net.Store.Directory; + using DirectoryReader = Lucene.Net.Index.DirectoryReader; + using Document = Documents.Document; + using English = Lucene.Net.Util.English; + using Field = Field; + using IndexReader = Lucene.Net.Index.IndexReader; + using IndexWriter = Lucene.Net.Index.IndexWriter; + using IndexWriterConfig = Lucene.Net.Index.IndexWriterConfig; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using MockDirectoryWrapper = Lucene.Net.Store.MockDirectoryWrapper; + using RAMDirectory = Lucene.Net.Store.RAMDirectory; + using Similarity = Lucene.Net.Search.Similarities.Similarity; + using TextField = TextField; + + /// + /// + /// + public class PayloadHelper + { + private byte[] PayloadField = new byte[] { 1 }; + private byte[] PayloadMultiField1 = new byte[] { 2 }; + private byte[] PayloadMultiField2 = new byte[] { 4 }; + public const string NO_PAYLOAD_FIELD = "noPayloadField"; + public const string MULTI_FIELD = "multiField"; + public const string FIELD = "field"; + + public IndexReader Reader; + + public sealed class PayloadAnalyzer : Analyzer + { + private readonly PayloadHelper OuterInstance; + + public PayloadAnalyzer(PayloadHelper outerInstance) + : base(PER_FIELD_REUSE_STRATEGY) + { + this.OuterInstance = outerInstance; + } + + protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) + { + Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); + return new TokenStreamComponents(result, new PayloadFilter(OuterInstance, result, fieldName)); + } + } + + public sealed class PayloadFilter : TokenFilter + { + private readonly PayloadHelper OuterInstance; + + internal readonly string FieldName; + internal int NumSeen = 0; + internal readonly IPayloadAttribute PayloadAtt; + + public PayloadFilter(PayloadHelper outerInstance, TokenStream input, string fieldName) + : base(input) + { + this.OuterInstance = outerInstance; + this.FieldName = fieldName; + PayloadAtt = AddAttribute<IPayloadAttribute>(); + } + + public override bool IncrementToken() + { + if (m_input.IncrementToken()) + { + if (FieldName.Equals(FIELD)) + { + PayloadAtt.Payload = new BytesRef(OuterInstance.PayloadField); + } + else if (FieldName.Equals(MULTI_FIELD)) + { + if (NumSeen % 2 == 0) + { + PayloadAtt.Payload = new BytesRef(OuterInstance.PayloadMultiField1); + } + else + { + PayloadAtt.Payload = new BytesRef(OuterInstance.PayloadMultiField2); + } + NumSeen++; + } + return true; + } + return false; + } + + public override void Reset() + { + base.Reset(); + this.NumSeen = 0; + } + } + + /// <summary> + /// Sets up a RAMDirectory, and adds documents (using English.IntToEnglish()) with two fields: field and multiField + /// and analyzes them using the PayloadAnalyzer </summary> + /// <param name="similarity"> The Similarity class to use in the Searcher </param> + /// <param name="numDocs"> The num docs to add </param> + /// <returns> An IndexSearcher </returns> + // TODO: randomize + public virtual IndexSearcher SetUp(Random random, Similarity similarity, int numDocs) + { + Directory directory = new MockDirectoryWrapper(random, new RAMDirectory()); + PayloadAnalyzer analyzer = new PayloadAnalyzer(this); + + // TODO randomize this + IndexWriter writer = new IndexWriter(directory, (new IndexWriterConfig(LuceneTestCase.TEST_VERSION_CURRENT, analyzer)).SetSimilarity(similarity)); + // writer.infoStream = System.out; + for (int i = 0; i < numDocs; i++) + { + Document doc = new Document(); + doc.Add(new TextField(FIELD, English.IntToEnglish(i), Field.Store.YES)); + doc.Add(new TextField(MULTI_FIELD, English.IntToEnglish(i) + " " + English.IntToEnglish(i), Field.Store.YES)); + doc.Add(new TextField(NO_PAYLOAD_FIELD, English.IntToEnglish(i), Field.Store.YES)); + writer.AddDocument(doc); + } + Reader = DirectoryReader.Open(writer, true); + writer.Dispose(); + + IndexSearcher searcher = LuceneTestCase.NewSearcher(Reader, similarity); + searcher.Similarity = similarity; + return searcher; + } + + [TearDown] + public virtual void TearDown() + { + Reader.Dispose(); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/Payloads/TestPayloadExplanations.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/Payloads/TestPayloadExplanations.cs b/src/Lucene.Net.Tests/Search/Payloads/TestPayloadExplanations.cs new file mode 100644 index 0000000..e205871 --- /dev/null +++ b/src/Lucene.Net.Tests/Search/Payloads/TestPayloadExplanations.cs @@ -0,0 +1,117 @@ +using NUnit.Framework; + +namespace Lucene.Net.Search.Payloads +{ + using BytesRef = Lucene.Net.Util.BytesRef; + using DefaultSimilarity = Lucene.Net.Search.Similarities.DefaultSimilarity; + using SpanQuery = Lucene.Net.Search.Spans.SpanQuery; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Term = Lucene.Net.Index.Term; + + /// <summary> + /// TestExplanations subclass focusing on payload queries + /// </summary> + [TestFixture] + public class TestPayloadExplanations : TestExplanations + { + private PayloadFunction[] Functions = new PayloadFunction[] { new AveragePayloadFunction(), new MinPayloadFunction(), new MaxPayloadFunction() }; + + [SetUp] + public override void SetUp() + { + base.SetUp(); + Searcher.Similarity = new DefaultSimilarityAnonymousInnerClassHelper(this); + } + + private class DefaultSimilarityAnonymousInnerClassHelper : DefaultSimilarity + { + private readonly TestPayloadExplanations OuterInstance; + + public DefaultSimilarityAnonymousInnerClassHelper(TestPayloadExplanations outerInstance) + { + this.OuterInstance = outerInstance; + } + + public override float ScorePayload(int doc, int start, int end, BytesRef payload) + { + return 1 + (payload.GetHashCode() % 10); + } + } + + /// <summary> + /// macro for payloadtermquery </summary> + private SpanQuery Pt(string s, PayloadFunction fn, bool includeSpanScore) + { + return new PayloadTermQuery(new Term(FIELD, s), fn, includeSpanScore); + } + + /* simple PayloadTermQueries */ + + [Test] + public virtual void TestPT1() + { + foreach (PayloadFunction fn in Functions) + { + Qtest(Pt("w1", fn, false), new int[] { 0, 1, 2, 3 }); + Qtest(Pt("w1", fn, true), new int[] { 0, 1, 2, 3 }); + } + } + + [Test] + public virtual void TestPT2() + { + foreach (PayloadFunction fn in Functions) + { + SpanQuery q = Pt("w1", fn, false); + q.Boost = 1000; + Qtest(q, new int[] { 0, 1, 2, 3 }); + q = Pt("w1", fn, true); + q.Boost = 1000; + Qtest(q, new int[] { 0, 1, 2, 3 }); + } + } + + [Test] + public virtual void TestPT4() + { + foreach (PayloadFunction fn in Functions) + { + Qtest(Pt("xx", fn, false), new int[] { 2, 3 }); + Qtest(Pt("xx", fn, true), new int[] { 2, 3 }); + } + } + + [Test] + public virtual void TestPT5() + { + foreach (PayloadFunction fn in Functions) + { + SpanQuery q = Pt("xx", fn, false); + q.Boost = 1000; + Qtest(q, new int[] { 2, 3 }); + q = Pt("xx", fn, true); + q.Boost = 1000; + Qtest(q, new int[] { 2, 3 }); + } + } + + // TODO: test the payloadnear query too! + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/Payloads/TestPayloadNearQuery.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/Payloads/TestPayloadNearQuery.cs b/src/Lucene.Net.Tests/Search/Payloads/TestPayloadNearQuery.cs new file mode 100644 index 0000000..d621574 --- /dev/null +++ b/src/Lucene.Net.Tests/Search/Payloads/TestPayloadNearQuery.cs @@ -0,0 +1,392 @@ +using System.Text.RegularExpressions; +using Lucene.Net.Analysis.TokenAttributes; +using System; +using Lucene.Net.Documents; + +namespace Lucene.Net.Search.Payloads +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Lucene.Net.Analysis; + using NUnit.Framework; + using System.IO; + using BytesRef = Lucene.Net.Util.BytesRef; + using DefaultSimilarity = Lucene.Net.Search.Similarities.DefaultSimilarity; + using Directory = Lucene.Net.Store.Directory; + using Document = Documents.Document; + using English = Lucene.Net.Util.English; + using Field = Field; + using FieldInvertState = Lucene.Net.Index.FieldInvertState; + using IndexReader = Lucene.Net.Index.IndexReader; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter; + using SpanNearQuery = Lucene.Net.Search.Spans.SpanNearQuery; + using SpanQuery = Lucene.Net.Search.Spans.SpanQuery; + using SpanTermQuery = Lucene.Net.Search.Spans.SpanTermQuery; + using Term = Lucene.Net.Index.Term; + + [TestFixture] + public class TestPayloadNearQuery : LuceneTestCase + { + private static IndexSearcher Searcher; + private static IndexReader Reader; + private static Directory Directory; + private static BoostingSimilarity similarity = new BoostingSimilarity(); + private static byte[] Payload2 = { 2 }; + private static byte[] Payload4 = { 4 }; + private static readonly Regex _whiteSpaceRegex = new Regex("[\\s]+", RegexOptions.Compiled); + + private class PayloadAnalyzer : Analyzer + { + protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) + { + Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); + return new TokenStreamComponents(result, new PayloadFilter(result, fieldName)); + } + } + + private class PayloadFilter : TokenFilter + { + internal readonly string FieldName; + internal int NumSeen = 0; + internal readonly IPayloadAttribute PayAtt; + + public PayloadFilter(TokenStream input, string fieldName) + : base(input) + { + this.FieldName = fieldName; + PayAtt = AddAttribute<IPayloadAttribute>(); + } + + public sealed override bool IncrementToken() + { + bool result = false; + if (m_input.IncrementToken()) + { + if (NumSeen % 2 == 0) + { + PayAtt.Payload = new BytesRef(Payload2); + } + else + { + PayAtt.Payload = new BytesRef(Payload4); + } + NumSeen++; + result = true; + } + return result; + } + + public override void Reset() + { + base.Reset(); + this.NumSeen = 0; + } + } + + private PayloadNearQuery NewPhraseQuery(string fieldName, string phrase, bool inOrder, PayloadFunction function) + { + var words = _whiteSpaceRegex.Split(phrase); + var clauses = new SpanQuery[words.Length]; + for (var i = 0; i < clauses.Length; i++) + { + clauses[i] = new SpanTermQuery(new Term(fieldName, words[i])); + } + return new PayloadNearQuery(clauses, 0, inOrder, function); + } + + /// <summary> + /// LUCENENET specific + /// Is non-static because NewIndexWriterConfig is no longer static. + /// </summary> + [OneTimeSetUp] + public void BeforeClass() + { + Directory = NewDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).SetSimilarity(similarity)); + //writer.infoStream = System.out; + for (int i = 0; i < 1000; i++) + { + Document doc = new Document(); + doc.Add(NewTextField("field", English.IntToEnglish(i), Field.Store.YES)); + string txt = English.IntToEnglish(i) + ' ' + English.IntToEnglish(i + 1); + doc.Add(NewTextField("field2", txt, Field.Store.YES)); + writer.AddDocument(doc); + } + Reader = writer.Reader; + writer.Dispose(); + + Searcher = NewSearcher(Reader); + Searcher.Similarity = similarity; + } + + [OneTimeTearDown] + public static void AfterClass() + { + Searcher = null; + Reader.Dispose(); + Reader = null; + Directory.Dispose(); + Directory = null; + } + + [Test] + public virtual void Test() + { + PayloadNearQuery query; + TopDocs hits; + + query = NewPhraseQuery("field", "twenty two", true, new AveragePayloadFunction()); + QueryUtils.Check(query); + + // all 10 hits should have score = 3 because adjacent terms have payloads of 2,4 + // and all the similarity factors are set to 1 + hits = Searcher.Search(query, null, 100); + Assert.IsTrue(hits != null, "hits is null and it shouldn't be"); + // 10 documents were added with the tokens "twenty two", each has 3 instances + Assert.AreEqual(10, hits.TotalHits, "should be 10 hits"); + for (int j = 0; j < hits.ScoreDocs.Length; j++) + { + ScoreDoc doc = hits.ScoreDocs[j]; + Assert.AreEqual(3, doc.Score, doc.Score + " does not equal: " + 3); + } + for (int i = 1; i < 10; i++) + { + query = NewPhraseQuery("field", English.IntToEnglish(i) + " hundred", true, new AveragePayloadFunction()); + if (VERBOSE) + { + Console.WriteLine("TEST: run query=" + query); + } + // all should have score = 3 because adjacent terms have payloads of 2,4 + // and all the similarity factors are set to 1 + hits = Searcher.Search(query, null, 100); + Assert.IsTrue(hits != null, "hits is null and it shouldn't be"); + Assert.AreEqual(100, hits.TotalHits, "should be 100 hits"); + for (int j = 0; j < hits.ScoreDocs.Length; j++) + { + ScoreDoc doc = hits.ScoreDocs[j]; + // System.out.println("Doc: " + doc.toString()); + // System.out.println("Explain: " + searcher.Explain(query, doc.Doc)); + Assert.AreEqual(3, doc.Score, doc.Score + " does not equal: " + 3); + } + } + } + + [Test] + public virtual void TestPayloadNear() + { + SpanNearQuery q1, q2; + PayloadNearQuery query; + //SpanNearQuery(clauses, 10000, false) + q1 = SpanNearQuery("field2", "twenty two"); + q2 = SpanNearQuery("field2", "twenty three"); + SpanQuery[] clauses = new SpanQuery[2]; + clauses[0] = q1; + clauses[1] = q2; + query = new PayloadNearQuery(clauses, 10, false); + //System.out.println(query.toString()); + Assert.AreEqual(12, Searcher.Search(query, null, 100).TotalHits); + /* + System.out.println(hits.TotalHits); + for (int j = 0; j < hits.ScoreDocs.Length; j++) { + ScoreDoc doc = hits.ScoreDocs[j]; + System.out.println("doc: "+doc.Doc+", score: "+doc.Score); + } + */ + } + + [Test] + public virtual void TestAverageFunction() + { + PayloadNearQuery query; + TopDocs hits; + + query = NewPhraseQuery("field", "twenty two", true, new AveragePayloadFunction()); + QueryUtils.Check(query); + // all 10 hits should have score = 3 because adjacent terms have payloads of 2,4 + // and all the similarity factors are set to 1 + hits = Searcher.Search(query, null, 100); + Assert.IsTrue(hits != null, "hits is null and it shouldn't be"); + Assert.AreEqual(10, hits.TotalHits, "should be 10 hits"); + for (int j = 0; j < hits.ScoreDocs.Length; j++) + { + ScoreDoc doc = hits.ScoreDocs[j]; + Assert.AreEqual(3, doc.Score, doc.Score + " does not equal: " + 3); + Explanation explain = Searcher.Explain(query, hits.ScoreDocs[j].Doc); + string exp = explain.ToString(); + Assert.IsTrue(exp.IndexOf("AveragePayloadFunction") > -1, exp); + Assert.AreEqual(3f, explain.Value, hits.ScoreDocs[j].Score + " explain value does not equal: " + 3); + } + } + + [Test] + public virtual void TestMaxFunction() + { + PayloadNearQuery query; + TopDocs hits; + + query = NewPhraseQuery("field", "twenty two", true, new MaxPayloadFunction()); + QueryUtils.Check(query); + // all 10 hits should have score = 4 (max payload value) + hits = Searcher.Search(query, null, 100); + Assert.IsTrue(hits != null, "hits is null and it shouldn't be"); + Assert.AreEqual(10, hits.TotalHits, "should be 10 hits"); + for (int j = 0; j < hits.ScoreDocs.Length; j++) + { + ScoreDoc doc = hits.ScoreDocs[j]; + Assert.AreEqual(4, doc.Score, doc.Score + " does not equal: " + 4); + Explanation explain = Searcher.Explain(query, hits.ScoreDocs[j].Doc); + string exp = explain.ToString(); + Assert.IsTrue(exp.IndexOf("MaxPayloadFunction") > -1, exp); + Assert.AreEqual(4f, explain.Value, hits.ScoreDocs[j].Score + " explain value does not equal: " + 4); + } + } + + [Test] + public virtual void TestMinFunction() + { + PayloadNearQuery query; + TopDocs hits; + + query = NewPhraseQuery("field", "twenty two", true, new MinPayloadFunction()); + QueryUtils.Check(query); + // all 10 hits should have score = 2 (min payload value) + hits = Searcher.Search(query, null, 100); + Assert.IsTrue(hits != null, "hits is null and it shouldn't be"); + Assert.AreEqual(10, hits.TotalHits, "should be 10 hits"); + for (int j = 0; j < hits.ScoreDocs.Length; j++) + { + ScoreDoc doc = hits.ScoreDocs[j]; + Assert.AreEqual(2, doc.Score, doc.Score + " does not equal: " + 2); + Explanation explain = Searcher.Explain(query, hits.ScoreDocs[j].Doc); + string exp = explain.ToString(); + Assert.IsTrue(exp.IndexOf("MinPayloadFunction") > -1, exp); + Assert.AreEqual(2f, explain.Value, hits.ScoreDocs[j].Score + " explain value does not equal: " + 2); + } + } + + private SpanQuery[] Clauses + { + get + { + SpanNearQuery q1, q2; + q1 = SpanNearQuery("field2", "twenty two"); + q2 = SpanNearQuery("field2", "twenty three"); + SpanQuery[] clauses = new SpanQuery[2]; + clauses[0] = q1; + clauses[1] = q2; + return clauses; + } + } + + private SpanNearQuery SpanNearQuery(string fieldName, string words) + { + var wordList = _whiteSpaceRegex.Split(words); + var clauses = new SpanQuery[wordList.Length]; + for (var i = 0; i < clauses.Length; i++) + { + clauses[i] = new PayloadTermQuery(new Term(fieldName, wordList[i]), new AveragePayloadFunction()); + } + return new SpanNearQuery(clauses, 10000, false); + } + + [Test] + public virtual void TestLongerSpan() + { + PayloadNearQuery query; + TopDocs hits; + query = NewPhraseQuery("field", "nine hundred ninety nine", true, new AveragePayloadFunction()); + hits = Searcher.Search(query, null, 100); + Assert.IsTrue(hits != null, "hits is null and it shouldn't be"); + ScoreDoc doc = hits.ScoreDocs[0]; + // System.out.println("Doc: " + doc.toString()); + // System.out.println("Explain: " + searcher.Explain(query, doc.Doc)); + Assert.IsTrue(hits.TotalHits == 1, "there should only be one hit"); + // should have score = 3 because adjacent terms have payloads of 2,4 + Assert.AreEqual(3, doc.Score, doc.Score + " does not equal: " + 3); + } + + [Test] + public virtual void TestComplexNested() + { + PayloadNearQuery query; + TopDocs hits; + + // combine ordered and unordered spans with some nesting to make sure all payloads are counted + + SpanQuery q1 = NewPhraseQuery("field", "nine hundred", true, new AveragePayloadFunction()); + SpanQuery q2 = NewPhraseQuery("field", "ninety nine", true, new AveragePayloadFunction()); + SpanQuery q3 = NewPhraseQuery("field", "nine ninety", false, new AveragePayloadFunction()); + SpanQuery q4 = NewPhraseQuery("field", "hundred nine", false, new AveragePayloadFunction()); + SpanQuery[] clauses = new SpanQuery[] { new PayloadNearQuery(new SpanQuery[] { q1, q2 }, 0, true), new PayloadNearQuery(new SpanQuery[] { q3, q4 }, 0, false) }; + query = new PayloadNearQuery(clauses, 0, false); + hits = Searcher.Search(query, null, 100); + Assert.IsTrue(hits != null, "hits is null and it shouldn't be"); + // should be only 1 hit - doc 999 + Assert.IsTrue(hits.ScoreDocs.Length == 1, "should only be one hit"); + // the score should be 3 - the average of all the underlying payloads + ScoreDoc doc = hits.ScoreDocs[0]; + // System.out.println("Doc: " + doc.toString()); + // System.out.println("Explain: " + searcher.Explain(query, doc.Doc)); + Assert.IsTrue(doc.Score == 3, doc.Score + " does not equal: " + 3); + } + + internal class BoostingSimilarity : DefaultSimilarity + { + public override float QueryNorm(float sumOfSquaredWeights) + { + return 1.0f; + } + + public override float Coord(int overlap, int maxOverlap) + { + return 1.0f; + } + + public override float ScorePayload(int docId, int start, int end, BytesRef payload) + { + //we know it is size 4 here, so ignore the offset/length + return payload.Bytes[payload.Offset]; + } + + //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + //Make everything else 1 so we see the effect of the payload + //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + public override float LengthNorm(FieldInvertState state) + { + return state.Boost; + } + + public override float SloppyFreq(int distance) + { + return 1.0f; + } + + public override float Tf(float freq) + { + return 1.0f; + } + + // idf used for phrase queries + public override Explanation IdfExplain(CollectionStatistics collectionStats, TermStatistics[] termStats) + { + return new Explanation(1.0f, "Inexplicable"); + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/Payloads/TestPayloadTermQuery.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/Payloads/TestPayloadTermQuery.cs b/src/Lucene.Net.Tests/Search/Payloads/TestPayloadTermQuery.cs new file mode 100644 index 0000000..a68867e --- /dev/null +++ b/src/Lucene.Net.Tests/Search/Payloads/TestPayloadTermQuery.cs @@ -0,0 +1,367 @@ +using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Documents; + +namespace Lucene.Net.Search.Payloads +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Lucene.Net.Analysis; + using NUnit.Framework; + using System.IO; + using BytesRef = Lucene.Net.Util.BytesRef; + using DefaultSimilarity = Lucene.Net.Search.Similarities.DefaultSimilarity; + using Directory = Lucene.Net.Store.Directory; + using DirectoryReader = Lucene.Net.Index.DirectoryReader; + using Document = Documents.Document; + using English = Lucene.Net.Util.English; + using Field = Field; + using FieldInvertState = Lucene.Net.Index.FieldInvertState; + using IndexReader = Lucene.Net.Index.IndexReader; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using MultiSpansWrapper = Lucene.Net.Search.Spans.MultiSpansWrapper; + using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter; + using Similarity = Lucene.Net.Search.Similarities.Similarity; + using Spans = Lucene.Net.Search.Spans.Spans; + using SpanTermQuery = Lucene.Net.Search.Spans.SpanTermQuery; + using Term = Lucene.Net.Index.Term; + + [TestFixture] + public class TestPayloadTermQuery : LuceneTestCase + { + private static IndexSearcher Searcher; + private static IndexReader Reader; + private static readonly Similarity similarity = new BoostingSimilarity(); + private static readonly byte[] PayloadField = { 1 }; + private static readonly byte[] PayloadMultiField1 = { 2 }; + private static readonly byte[] PayloadMultiField2 = { 4 }; + protected internal static Directory Directory; + + private class PayloadAnalyzer : Analyzer + { + internal PayloadAnalyzer() + : base(PER_FIELD_REUSE_STRATEGY) + { + } + + protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) + { + Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); + return new TokenStreamComponents(result, new PayloadFilter(result, fieldName)); + } + } + + private class PayloadFilter : TokenFilter + { + internal readonly string FieldName; + internal int NumSeen = 0; + + internal readonly IPayloadAttribute PayloadAtt; + + public PayloadFilter(TokenStream input, string fieldName) + : base(input) + { + this.FieldName = fieldName; + PayloadAtt = AddAttribute<IPayloadAttribute>(); + } + + public sealed override bool IncrementToken() + { + bool hasNext = m_input.IncrementToken(); + if (hasNext) + { + if (FieldName.Equals("field")) + { + PayloadAtt.Payload = new BytesRef(PayloadField); + } + else if (FieldName.Equals("multiField")) + { + if (NumSeen % 2 == 0) + { + PayloadAtt.Payload = new BytesRef(PayloadMultiField1); + } + else + { + PayloadAtt.Payload = new BytesRef(PayloadMultiField2); + } + NumSeen++; + } + return true; + } + else + { + return false; + } + } + + public override void Reset() + { + base.Reset(); + this.NumSeen = 0; + } + } + + /// <summary> + /// LUCENENET specific + /// Is non-static because NewIndexWriterConfig is no longer static. + /// </summary> + [OneTimeSetUp] + public void BeforeClass() + { + Directory = NewDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).SetSimilarity(similarity).SetMergePolicy(NewLogMergePolicy())); + //writer.infoStream = System.out; + for (int i = 0; i < 1000; i++) + { + Document doc = new Document(); + Field noPayloadField = NewTextField(PayloadHelper.NO_PAYLOAD_FIELD, English.IntToEnglish(i), Field.Store.YES); + //noPayloadField.setBoost(0); + doc.Add(noPayloadField); + doc.Add(NewTextField("field", English.IntToEnglish(i), Field.Store.YES)); + doc.Add(NewTextField("multiField", English.IntToEnglish(i) + " " + English.IntToEnglish(i), Field.Store.YES)); + writer.AddDocument(doc); + } + Reader = writer.Reader; + writer.Dispose(); + + Searcher = NewSearcher(Reader); + Searcher.Similarity = similarity; + } + + [OneTimeTearDown] + public static void AfterClass() + { + Searcher = null; + Reader.Dispose(); + Reader = null; + Directory.Dispose(); + Directory = null; + } + + [Test] + public virtual void Test() + { + PayloadTermQuery query = new PayloadTermQuery(new Term("field", "seventy"), new MaxPayloadFunction()); + TopDocs hits = Searcher.Search(query, null, 100); + Assert.IsTrue(hits != null, "hits is null and it shouldn't be"); + Assert.IsTrue(hits.TotalHits == 100, "hits Size: " + hits.TotalHits + " is not: " + 100); + + //they should all have the exact same score, because they all contain seventy once, and we set + //all the other similarity factors to be 1 + + Assert.IsTrue(hits.MaxScore == 1, hits.MaxScore + " does not equal: " + 1); + for (int i = 0; i < hits.ScoreDocs.Length; i++) + { + ScoreDoc doc = hits.ScoreDocs[i]; + Assert.IsTrue(doc.Score == 1, doc.Score + " does not equal: " + 1); + } + CheckHits.CheckExplanations(query, PayloadHelper.FIELD, Searcher, true); + Spans spans = MultiSpansWrapper.Wrap(Searcher.TopReaderContext, query); + Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); + /*float score = hits.Score(0); + for (int i =1; i < hits.Length(); i++) + { + Assert.IsTrue(score == hits.Score(i), "scores are not equal and they should be"); + }*/ + } + + [Test] + public virtual void TestQuery() + { + PayloadTermQuery boostingFuncTermQuery = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), new MaxPayloadFunction()); + QueryUtils.Check(boostingFuncTermQuery); + + SpanTermQuery spanTermQuery = new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy")); + + Assert.IsTrue(boostingFuncTermQuery.Equals(spanTermQuery) == spanTermQuery.Equals(boostingFuncTermQuery)); + + PayloadTermQuery boostingFuncTermQuery2 = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), new AveragePayloadFunction()); + + QueryUtils.CheckUnequal(boostingFuncTermQuery, boostingFuncTermQuery2); + } + + [Test] + public virtual void TestMultipleMatchesPerDoc() + { + PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), new MaxPayloadFunction()); + TopDocs hits = Searcher.Search(query, null, 100); + Assert.IsTrue(hits != null, "hits is null and it shouldn't be"); + Assert.IsTrue(hits.TotalHits == 100, "hits Size: " + hits.TotalHits + " is not: " + 100); + + //they should all have the exact same score, because they all contain seventy once, and we set + //all the other similarity factors to be 1 + + //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash); + Assert.IsTrue(hits.MaxScore == 4.0, hits.MaxScore + " does not equal: " + 4.0); + //there should be exactly 10 items that score a 4, all the rest should score a 2 + //The 10 items are: 70 + i*100 where i in [0-9] + int numTens = 0; + for (int i = 0; i < hits.ScoreDocs.Length; i++) + { + ScoreDoc doc = hits.ScoreDocs[i]; + if (doc.Doc % 10 == 0) + { + numTens++; + Assert.IsTrue(doc.Score == 4.0, doc.Score + " does not equal: " + 4.0); + } + else + { + Assert.IsTrue(doc.Score == 2, doc.Score + " does not equal: " + 2); + } + } + Assert.IsTrue(numTens == 10, numTens + " does not equal: " + 10); + CheckHits.CheckExplanations(query, "field", Searcher, true); + Spans spans = MultiSpansWrapper.Wrap(Searcher.TopReaderContext, query); + Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); + //should be two matches per document + int count = 0; + //100 hits times 2 matches per hit, we should have 200 in count + while (spans.Next()) + { + count++; + } + Assert.IsTrue(count == 200, count + " does not equal: " + 200); + } + + //Set includeSpanScore to false, in which case just the payload score comes through. + [Test] + public virtual void TestIgnoreSpanScorer() + { + PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), new MaxPayloadFunction(), false); + + IndexReader reader = DirectoryReader.Open(Directory); + IndexSearcher theSearcher = NewSearcher(reader); + theSearcher.Similarity = new FullSimilarity(); + TopDocs hits = Searcher.Search(query, null, 100); + Assert.IsTrue(hits != null, "hits is null and it shouldn't be"); + Assert.IsTrue(hits.TotalHits == 100, "hits Size: " + hits.TotalHits + " is not: " + 100); + + //they should all have the exact same score, because they all contain seventy once, and we set + //all the other similarity factors to be 1 + + //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash); + Assert.IsTrue(hits.MaxScore == 4.0, hits.MaxScore + " does not equal: " + 4.0); + //there should be exactly 10 items that score a 4, all the rest should score a 2 + //The 10 items are: 70 + i*100 where i in [0-9] + int numTens = 0; + for (int i = 0; i < hits.ScoreDocs.Length; i++) + { + ScoreDoc doc = hits.ScoreDocs[i]; + if (doc.Doc % 10 == 0) + { + numTens++; + Assert.IsTrue(doc.Score == 4.0, doc.Score + " does not equal: " + 4.0); + } + else + { + Assert.IsTrue(doc.Score == 2, doc.Score + " does not equal: " + 2); + } + } + Assert.IsTrue(numTens == 10, numTens + " does not equal: " + 10); + CheckHits.CheckExplanations(query, "field", Searcher, true); + Spans spans = MultiSpansWrapper.Wrap(Searcher.TopReaderContext, query); + Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); + //should be two matches per document + int count = 0; + //100 hits times 2 matches per hit, we should have 200 in count + while (spans.Next()) + { + count++; + } + reader.Dispose(); + } + + [Test] + public virtual void TestNoMatch() + { + PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.FIELD, "junk"), new MaxPayloadFunction()); + TopDocs hits = Searcher.Search(query, null, 100); + Assert.IsTrue(hits != null, "hits is null and it shouldn't be"); + Assert.IsTrue(hits.TotalHits == 0, "hits Size: " + hits.TotalHits + " is not: " + 0); + } + + [Test] + public virtual void TestNoPayload() + { + PayloadTermQuery q1 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "zero"), new MaxPayloadFunction()); + PayloadTermQuery q2 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "foo"), new MaxPayloadFunction()); + BooleanClause c1 = new BooleanClause(q1, Occur.MUST); + BooleanClause c2 = new BooleanClause(q2, Occur.MUST_NOT); + BooleanQuery query = new BooleanQuery(); + query.Add(c1); + query.Add(c2); + TopDocs hits = Searcher.Search(query, null, 100); + Assert.IsTrue(hits != null, "hits is null and it shouldn't be"); + Assert.IsTrue(hits.TotalHits == 1, "hits Size: " + hits.TotalHits + " is not: " + 1); + int[] results = new int[1]; + results[0] = 0; //hits.ScoreDocs[0].Doc; + CheckHits.CheckHitCollector(Random(), query, PayloadHelper.NO_PAYLOAD_FIELD, Searcher, results, Similarity); + } + + internal class BoostingSimilarity : DefaultSimilarity + { + public override float QueryNorm(float sumOfSquaredWeights) + { + return 1; + } + + public override float Coord(int overlap, int maxOverlap) + { + return 1; + } + + // TODO: Remove warning after API has been finalized + public override float ScorePayload(int docId, int start, int end, BytesRef payload) + { + //we know it is size 4 here, so ignore the offset/length + return payload.Bytes[payload.Offset]; + } + + //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + //Make everything else 1 so we see the effect of the payload + //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + public override float LengthNorm(FieldInvertState state) + { + return state.Boost; + } + + public override float SloppyFreq(int distance) + { + return 1; + } + + public override float Idf(long docFreq, long numDocs) + { + return 1; + } + + public override float Tf(float freq) + { + return freq == 0 ? 0 : 1; + } + } + + internal class FullSimilarity : DefaultSimilarity + { + public virtual float ScorePayload(int docId, string fieldName, sbyte[] payload, int offset, int length) + { + //we know it is size 4 here, so ignore the offset/length + return payload[offset]; + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Search/Similarities/TestSimilarity2.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Search/Similarities/TestSimilarity2.cs b/src/Lucene.Net.Tests/Search/Similarities/TestSimilarity2.cs new file mode 100644 index 0000000..5f92b87 --- /dev/null +++ b/src/Lucene.Net.Tests/Search/Similarities/TestSimilarity2.cs @@ -0,0 +1,275 @@ +using System.Collections.Generic; +using Lucene.Net.Documents; +using Lucene.Net.Index; + +namespace Lucene.Net.Search.Similarities +{ + using NUnit.Framework; + using Directory = Lucene.Net.Store.Directory; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Document = Documents.Document; + using Field = Field; + using FieldType = FieldType; + using IndexReader = Lucene.Net.Index.IndexReader; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter; + using SpanOrQuery = Lucene.Net.Search.Spans.SpanOrQuery; + using SpanTermQuery = Lucene.Net.Search.Spans.SpanTermQuery; + using Term = Lucene.Net.Index.Term; + using TextField = TextField; + + /// <summary> + /// Tests against all the similarities we have + /// </summary> + [TestFixture] + public class TestSimilarity2 : LuceneTestCase + { + internal IList<Similarity> Sims; + + [SetUp] + public override void SetUp() + { + base.SetUp(); + Sims = new List<Similarity>(); + Sims.Add(new DefaultSimilarity()); + Sims.Add(new BM25Similarity()); + // TODO: not great that we dup this all with TestSimilarityBase + foreach (BasicModel basicModel in TestSimilarityBase.BASIC_MODELS) + { + foreach (AfterEffect afterEffect in TestSimilarityBase.AFTER_EFFECTS) + { + foreach (Normalization normalization in TestSimilarityBase.NORMALIZATIONS) + { + Sims.Add(new DFRSimilarity(basicModel, afterEffect, normalization)); + } + } + } + foreach (Distribution distribution in TestSimilarityBase.DISTRIBUTIONS) + { + foreach (Lambda lambda in TestSimilarityBase.LAMBDAS) + { + foreach (Normalization normalization in TestSimilarityBase.NORMALIZATIONS) + { + Sims.Add(new IBSimilarity(distribution, lambda, normalization)); + } + } + } + Sims.Add(new LMDirichletSimilarity()); + Sims.Add(new LMJelinekMercerSimilarity(0.1f)); + Sims.Add(new LMJelinekMercerSimilarity(0.7f)); + } + + /// <summary> + /// because of stupid things like querynorm, its possible we computeStats on a field that doesnt exist at all + /// test this against a totally empty index, to make sure sims handle it + /// </summary> + [Test] + public virtual void TestEmptyIndex() + { + Directory dir = NewDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + IndexReader ir = iw.Reader; + iw.Dispose(); + IndexSearcher @is = NewSearcher(ir); + + foreach (Similarity sim in Sims) + { + @is.Similarity = sim; + Assert.AreEqual(0, @is.Search(new TermQuery(new Term("foo", "bar")), 10).TotalHits); + } + ir.Dispose(); + dir.Dispose(); + } + + /// <summary> + /// similar to the above, but ORs the query with a real field </summary> + [Test] + public virtual void TestEmptyField() + { + Directory dir = NewDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + Document doc = new Document(); + doc.Add(NewTextField("foo", "bar", Field.Store.NO)); + iw.AddDocument(doc); + IndexReader ir = iw.Reader; + iw.Dispose(); + IndexSearcher @is = NewSearcher(ir); + + foreach (Similarity sim in Sims) + { + @is.Similarity = sim; + BooleanQuery query = new BooleanQuery(true); + query.Add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD); + query.Add(new TermQuery(new Term("bar", "baz")), Occur.SHOULD); + Assert.AreEqual(1, @is.Search(query, 10).TotalHits); + } + ir.Dispose(); + dir.Dispose(); + } + + /// <summary> + /// similar to the above, however the field exists, but we query with a term that doesnt exist too </summary> + [Test] + public virtual void TestEmptyTerm() + { + Directory dir = NewDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + Document doc = new Document(); + doc.Add(NewTextField("foo", "bar", Field.Store.NO)); + iw.AddDocument(doc); + IndexReader ir = iw.Reader; + iw.Dispose(); + IndexSearcher @is = NewSearcher(ir); + + foreach (Similarity sim in Sims) + { + @is.Similarity = sim; + BooleanQuery query = new BooleanQuery(true); + query.Add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD); + query.Add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD); + Assert.AreEqual(1, @is.Search(query, 10).TotalHits); + } + ir.Dispose(); + dir.Dispose(); + } + + /// <summary> + /// make sure we can retrieve when norms are disabled </summary> + [Test] + public virtual void TestNoNorms() + { + Directory dir = NewDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + Document doc = new Document(); + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + ft.OmitNorms = true; + ft.Freeze(); + doc.Add(NewField("foo", "bar", ft)); + iw.AddDocument(doc); + IndexReader ir = iw.Reader; + iw.Dispose(); + IndexSearcher @is = NewSearcher(ir); + + foreach (Similarity sim in Sims) + { + @is.Similarity = sim; + BooleanQuery query = new BooleanQuery(true); + query.Add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD); + Assert.AreEqual(1, @is.Search(query, 10).TotalHits); + } + ir.Dispose(); + dir.Dispose(); + } + + /// <summary> + /// make sure all sims work if TF is omitted </summary> + [Test] + public virtual void TestOmitTF() + { + Directory dir = NewDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + Document doc = new Document(); + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + ft.IndexOptions = IndexOptions.DOCS_ONLY; + ft.Freeze(); + Field f = NewField("foo", "bar", ft); + doc.Add(f); + iw.AddDocument(doc); + IndexReader ir = iw.Reader; + iw.Dispose(); + IndexSearcher @is = NewSearcher(ir); + + foreach (Similarity sim in Sims) + { + @is.Similarity = sim; + BooleanQuery query = new BooleanQuery(true); + query.Add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD); + Assert.AreEqual(1, @is.Search(query, 10).TotalHits); + } + ir.Dispose(); + dir.Dispose(); + } + + /// <summary> + /// make sure all sims work if TF and norms is omitted </summary> + [Test] + public virtual void TestOmitTFAndNorms() + { + Directory dir = NewDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + Document doc = new Document(); + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + ft.IndexOptions = IndexOptions.DOCS_ONLY; + ft.OmitNorms = true; + ft.Freeze(); + Field f = NewField("foo", "bar", ft); + doc.Add(f); + iw.AddDocument(doc); + IndexReader ir = iw.Reader; + iw.Dispose(); + IndexSearcher @is = NewSearcher(ir); + + foreach (Similarity sim in Sims) + { + @is.Similarity = sim; + BooleanQuery query = new BooleanQuery(true); + query.Add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD); + Assert.AreEqual(1, @is.Search(query, 10).TotalHits); + } + ir.Dispose(); + dir.Dispose(); + } + + /// <summary> + /// make sure all sims work with spanOR(termX, termY) where termY does not exist </summary> + [Test] + public virtual void TestCrazySpans() + { + // The problem: "normal" lucene queries create scorers, returning null if terms dont exist + // this means they never score a term that does not exist. + // however with spans, there is only one scorer for the whole hierarchy: + // inner queries are not real queries, their boosts are ignored, etc. + Directory dir = NewDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + Document doc = new Document(); + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + doc.Add(NewField("foo", "bar", ft)); + iw.AddDocument(doc); + IndexReader ir = iw.Reader; + iw.Dispose(); + IndexSearcher @is = NewSearcher(ir); + + foreach (Similarity sim in Sims) + { + @is.Similarity = sim; + SpanTermQuery s1 = new SpanTermQuery(new Term("foo", "bar")); + SpanTermQuery s2 = new SpanTermQuery(new Term("foo", "baz")); + Query query = new SpanOrQuery(s1, s2); + TopDocs td = @is.Search(query, 10); + Assert.AreEqual(1, td.TotalHits); + float score = td.ScoreDocs[0].Score; + Assert.IsTrue(score >= 0.0f); + Assert.IsFalse(float.IsInfinity(score), "inf score for " + sim); + } + ir.Dispose(); + dir.Dispose(); + } + } +} \ No newline at end of file
