http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestPostingsOffsets.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestPostingsOffsets.cs b/src/Lucene.Net.Tests/Index/TestPostingsOffsets.cs new file mode 100644 index 0000000..706987e --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestPostingsOffsets.cs @@ -0,0 +1,580 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using Lucene.Net.Documents; +using Lucene.Net.Search; + +namespace Lucene.Net.Index +{ + using Lucene.Net.Randomized.Generators; + using NUnit.Framework; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Analyzer = Lucene.Net.Analysis.Analyzer; + using BytesRef = Lucene.Net.Util.BytesRef; + using CannedTokenStream = Lucene.Net.Analysis.CannedTokenStream; + using Directory = Lucene.Net.Store.Directory; + using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator; + using Document = Documents.Document; + using English = Lucene.Net.Util.English; + using Field = Field; + using FieldType = FieldType; + using Int32Field = Int32Field; + using IOUtils = Lucene.Net.Util.IOUtils; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using MockPayloadAnalyzer = Lucene.Net.Analysis.MockPayloadAnalyzer; + using StringField = StringField; + using TestUtil = Lucene.Net.Util.TestUtil; + using TextField = TextField; + using Token = Lucene.Net.Analysis.Token; + using TokenStream = Lucene.Net.Analysis.TokenStream; + + // TODO: we really need to test indexingoffsets, but then getting only docs / docs + freqs. + // not all codecs store prx separate... + // TODO: fix sep codec to index offsets so we can greatly reduce this list! + [SuppressCodecs("Lucene3x", "MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom")] + [TestFixture] + public class TestPostingsOffsets : LuceneTestCase + { + internal IndexWriterConfig Iwc; + + [SetUp] + public override void SetUp() + { + base.SetUp(); + Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); + } + + [Test] + public virtual void TestBasic() + { + Directory dir = NewDirectory(); + + RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc); + Document doc = new Document(); + + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; + if (Random().NextBoolean()) + { + ft.StoreTermVectors = true; + ft.StoreTermVectorPositions = Random().NextBoolean(); + ft.StoreTermVectorOffsets = Random().NextBoolean(); + } + Token[] tokens = new Token[] { MakeToken("a", 1, 0, 6), MakeToken("b", 1, 8, 9), MakeToken("a", 1, 9, 17), MakeToken("c", 1, 19, 50) }; + doc.Add(new Field("content", new CannedTokenStream(tokens), ft)); + + w.AddDocument(doc); + IndexReader r = w.Reader; + w.Dispose(); + + DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("a")); + Assert.IsNotNull(dp); + Assert.AreEqual(0, dp.NextDoc()); + Assert.AreEqual(2, dp.Freq); + Assert.AreEqual(0, dp.NextPosition()); + Assert.AreEqual(0, dp.StartOffset); + Assert.AreEqual(6, dp.EndOffset); + Assert.AreEqual(2, dp.NextPosition()); + Assert.AreEqual(9, dp.StartOffset); + Assert.AreEqual(17, dp.EndOffset); + Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc()); + + dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("b")); + Assert.IsNotNull(dp); + Assert.AreEqual(0, dp.NextDoc()); + Assert.AreEqual(1, dp.Freq); + Assert.AreEqual(1, dp.NextPosition()); + Assert.AreEqual(8, dp.StartOffset); + Assert.AreEqual(9, dp.EndOffset); + Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc()); + + dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("c")); + Assert.IsNotNull(dp); + Assert.AreEqual(0, dp.NextDoc()); + Assert.AreEqual(1, dp.Freq); + Assert.AreEqual(3, dp.NextPosition()); + Assert.AreEqual(19, dp.StartOffset); + Assert.AreEqual(50, dp.EndOffset); + Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc()); + + r.Dispose(); + dir.Dispose(); + } + + [Test] + public virtual void TestSkipping() + { + DoTestNumbers(false); + } + + [Test] + public virtual void TestPayloads() + { + DoTestNumbers(true); + } + + public virtual void DoTestNumbers(bool withPayloads) + { + Directory dir = NewDirectory(); + Analyzer analyzer = withPayloads ? (Analyzer)new MockPayloadAnalyzer() : new MockAnalyzer(Random()); + Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); + Iwc.SetMergePolicy(NewLogMergePolicy()); // will rely on docids a bit for skipping + RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc); + + FieldType ft = new FieldType(TextField.TYPE_STORED); + ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; + if (Random().NextBoolean()) + { + ft.StoreTermVectors = true; + ft.StoreTermVectorOffsets = Random().NextBoolean(); + ft.StoreTermVectorPositions = Random().NextBoolean(); + } + + int numDocs = AtLeast(500); + for (int i = 0; i < numDocs; i++) + { + Document doc = new Document(); + doc.Add(new Field("numbers", English.IntToEnglish(i), ft)); + doc.Add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft)); + doc.Add(new StringField("id", "" + i, Field.Store.NO)); + w.AddDocument(doc); + } + + IndexReader reader = w.Reader; + w.Dispose(); + + string[] terms = new string[] { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" }; + + foreach (string term in terms) + { + DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef(term)); + int doc; + while ((doc = dp.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) + { + string storedNumbers = reader.Document(doc).Get("numbers"); + int freq = dp.Freq; + for (int i = 0; i < freq; i++) + { + dp.NextPosition(); + int start = dp.StartOffset; + Debug.Assert(start >= 0); + int end = dp.EndOffset; + Debug.Assert(end >= 0 && end >= start); + // check that the offsets correspond to the term in the src text + Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals(term)); + if (withPayloads) + { + // check that we have a payload and it starts with "pos" + Assert.IsNotNull(dp.GetPayload()); + BytesRef payload = dp.GetPayload(); + Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:")); + } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer! + } + } + } + + // check we can skip correctly + int numSkippingTests = AtLeast(50); + + for (int j = 0; j < numSkippingTests; j++) + { + int num = TestUtil.NextInt(Random(), 100, Math.Min(numDocs - 1, 999)); + DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred")); + int doc = dp.Advance(num); + Assert.AreEqual(num, doc); + int freq = dp.Freq; + for (int i = 0; i < freq; i++) + { + string storedNumbers = reader.Document(doc).Get("numbers"); + dp.NextPosition(); + int start = dp.StartOffset; + Debug.Assert(start >= 0); + int end = dp.EndOffset; + Debug.Assert(end >= 0 && end >= start); + // check that the offsets correspond to the term in the src text + Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals("hundred")); + if (withPayloads) + { + // check that we have a payload and it starts with "pos" + Assert.IsNotNull(dp.GetPayload()); + BytesRef payload = dp.GetPayload(); + Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:")); + } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer! + } + } + + // check that other fields (without offsets) work correctly + + for (int i = 0; i < numDocs; i++) + { + DocsEnum dp = MultiFields.GetTermDocsEnum(reader, null, "id", new BytesRef("" + i), 0); + Assert.AreEqual(i, dp.NextDoc()); + Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc()); + } + + reader.Dispose(); + dir.Dispose(); + } + + [Test] + public virtual void TestRandom() + { + // token -> docID -> tokens + IDictionary<string, IDictionary<int?, IList<Token>>> actualTokens = new Dictionary<string, IDictionary<int?, IList<Token>>>(); + + Directory dir = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc); + + int numDocs = AtLeast(20); + //final int numDocs = AtLeast(5); + + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + + // TODO: randomize what IndexOptions we use; also test + // changing this up in one IW buffered segment...: + ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; + if (Random().NextBoolean()) + { + ft.StoreTermVectors = true; + ft.StoreTermVectorOffsets = Random().NextBoolean(); + ft.StoreTermVectorPositions = Random().NextBoolean(); + } + + for (int docCount = 0; docCount < numDocs; docCount++) + { + Document doc = new Document(); + doc.Add(new Int32Field("id", docCount, Field.Store.NO)); + IList<Token> tokens = new List<Token>(); + int numTokens = AtLeast(100); + //final int numTokens = AtLeast(20); + int pos = -1; + int offset = 0; + //System.out.println("doc id=" + docCount); + for (int tokenCount = 0; tokenCount < numTokens; tokenCount++) + { + string text; + if (Random().NextBoolean()) + { + text = "a"; + } + else if (Random().NextBoolean()) + { + text = "b"; + } + else if (Random().NextBoolean()) + { + text = "c"; + } + else + { + text = "d"; + } + + int posIncr = Random().NextBoolean() ? 1 : Random().Next(5); + if (tokenCount == 0 && posIncr == 0) + { + posIncr = 1; + } + int offIncr = Random().NextBoolean() ? 0 : Random().Next(5); + int tokenOffset = Random().Next(5); + + Token token = MakeToken(text, posIncr, offset + offIncr, offset + offIncr + tokenOffset); + if (!actualTokens.ContainsKey(text)) + { + actualTokens[text] = new Dictionary<int?, IList<Token>>(); + } + IDictionary<int?, IList<Token>> postingsByDoc = actualTokens[text]; + if (!postingsByDoc.ContainsKey(docCount)) + { + postingsByDoc[docCount] = new List<Token>(); + } + postingsByDoc[docCount].Add(token); + tokens.Add(token); + pos += posIncr; + // stuff abs position into type: + token.Type = "" + pos; + offset += offIncr + tokenOffset; + //System.out.println(" " + token + " posIncr=" + token.getPositionIncrement() + " pos=" + pos + " off=" + token.StartOffset + "/" + token.EndOffset + " (freq=" + postingsByDoc.Get(docCount).Size() + ")"); + } + doc.Add(new Field("content", new CannedTokenStream(tokens.ToArray()), ft)); + w.AddDocument(doc); + } + DirectoryReader r = w.Reader; + w.Dispose(); + + string[] terms = new string[] { "a", "b", "c", "d" }; + foreach (AtomicReaderContext ctx in r.Leaves) + { + // TODO: improve this + AtomicReader sub = (AtomicReader)ctx.Reader; + //System.out.println("\nsub=" + sub); + TermsEnum termsEnum = sub.Fields.GetTerms("content").GetIterator(null); + DocsEnum docs = null; + DocsAndPositionsEnum docsAndPositions = null; + DocsAndPositionsEnum docsAndPositionsAndOffsets = null; + FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(sub, "id", false); + foreach (string term in terms) + { + //System.out.println(" term=" + term); + if (termsEnum.SeekExact(new BytesRef(term))) + { + docs = termsEnum.Docs(null, docs); + Assert.IsNotNull(docs); + int doc; + //System.out.println(" doc/freq"); + while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) + { + IList<Token> expected = actualTokens[term][docIDToID.Get(doc)]; + //System.out.println(" doc=" + docIDToID.Get(doc) + " docID=" + doc + " " + expected.Size() + " freq"); + Assert.IsNotNull(expected); + Assert.AreEqual(expected.Count, docs.Freq); + } + + // explicitly exclude offsets here + docsAndPositions = termsEnum.DocsAndPositions(null, docsAndPositions, DocsAndPositionsEnum.FLAG_PAYLOADS); + Assert.IsNotNull(docsAndPositions); + //System.out.println(" doc/freq/pos"); + while ((doc = docsAndPositions.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) + { + IList<Token> expected = actualTokens[term][docIDToID.Get(doc)]; + //System.out.println(" doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq"); + Assert.IsNotNull(expected); + Assert.AreEqual(expected.Count, docsAndPositions.Freq); + foreach (Token token in expected) + { + int pos = Convert.ToInt32(token.Type); + //System.out.println(" pos=" + pos); + Assert.AreEqual(pos, docsAndPositions.NextPosition()); + } + } + + docsAndPositionsAndOffsets = termsEnum.DocsAndPositions(null, docsAndPositions); + Assert.IsNotNull(docsAndPositionsAndOffsets); + //System.out.println(" doc/freq/pos/offs"); + while ((doc = docsAndPositionsAndOffsets.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) + { + IList<Token> expected = actualTokens[term][docIDToID.Get(doc)]; + //System.out.println(" doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq"); + Assert.IsNotNull(expected); + Assert.AreEqual(expected.Count, docsAndPositionsAndOffsets.Freq); + foreach (Token token in expected) + { + int pos = Convert.ToInt32(token.Type); + //System.out.println(" pos=" + pos); + Assert.AreEqual(pos, docsAndPositionsAndOffsets.NextPosition()); + Assert.AreEqual(token.StartOffset, docsAndPositionsAndOffsets.StartOffset); + Assert.AreEqual(token.EndOffset, docsAndPositionsAndOffsets.EndOffset); + } + } + } + } + // TODO: test advance: + } + r.Dispose(); + dir.Dispose(); + } + + [Test] + public virtual void TestWithUnindexedFields() + { + Directory dir = NewDirectory(); + RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, Iwc); + for (int i = 0; i < 100; i++) + { + Document doc = new Document(); + // ensure at least one doc is indexed with offsets + if (i < 99 && Random().Next(2) == 0) + { + // stored only + FieldType ft = new FieldType(); + ft.IsIndexed = false; + ft.IsStored = true; + doc.Add(new Field("foo", "boo!", ft)); + } + else + { + FieldType ft = new FieldType(TextField.TYPE_STORED); + ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; + if (Random().NextBoolean()) + { + // store some term vectors for the checkindex cross-check + ft.StoreTermVectors = true; + ft.StoreTermVectorPositions = true; + ft.StoreTermVectorOffsets = true; + } + doc.Add(new Field("foo", "bar", ft)); + } + riw.AddDocument(doc); + } + CompositeReader ir = riw.Reader; + AtomicReader slow = SlowCompositeReaderWrapper.Wrap(ir); + FieldInfos fis = slow.FieldInfos; + Assert.AreEqual(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, fis.FieldInfo("foo").IndexOptions); + slow.Dispose(); + ir.Dispose(); + riw.Dispose(); + dir.Dispose(); + } + + [Test] + public virtual void TestAddFieldTwice() + { + Directory dir = NewDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + Document doc = new Document(); + FieldType customType3 = new FieldType(TextField.TYPE_STORED); + customType3.StoreTermVectors = true; + customType3.StoreTermVectorPositions = true; + customType3.StoreTermVectorOffsets = true; + customType3.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; + doc.Add(new Field("content3", "here is more content with aaa aaa aaa", customType3)); + doc.Add(new Field("content3", "here is more content with aaa aaa aaa", customType3)); + iw.AddDocument(doc); + iw.Dispose(); + dir.Dispose(); // checkindex + } + + // NOTE: the next two tests aren't that good as we need an EvilToken... + [Test] + public virtual void TestNegativeOffsets() + { + try + { + CheckTokens(new Token[] { MakeToken("foo", 1, -1, -1) }); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.ArgumentException expected) +#pragma warning restore 168 + { + //expected + } + } + + [Test] + public virtual void TestIllegalOffsets() + { + try + { + CheckTokens(new Token[] { MakeToken("foo", 1, 1, 0) }); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.ArgumentException expected) +#pragma warning restore 168 + { + //expected + } + } + + [Test] + public virtual void TestBackwardsOffsets() + { + try + { + CheckTokens(new Token[] { MakeToken("foo", 1, 0, 3), MakeToken("foo", 1, 4, 7), MakeToken("foo", 0, 3, 6) }); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.ArgumentException expected) +#pragma warning restore 168 + { + // expected + } + } + + [Test] + public virtual void TestStackedTokens() + { + CheckTokens(new Token[] { MakeToken("foo", 1, 0, 3), MakeToken("foo", 0, 0, 3), MakeToken("foo", 0, 0, 3) }); + } + + [Test] + public virtual void TestLegalbutVeryLargeOffsets() + { + Directory dir = NewDirectory(); + IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null)); + Document doc = new Document(); + Token t1 = new Token("foo", 0, int.MaxValue - 500); + if (Random().NextBoolean()) + { + t1.Payload = new BytesRef("test"); + } + Token t2 = new Token("foo", int.MaxValue - 500, int.MaxValue); + TokenStream tokenStream = new CannedTokenStream(new Token[] { t1, t2 }); + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; + // store some term vectors for the checkindex cross-check + ft.StoreTermVectors = true; + ft.StoreTermVectorPositions = true; + ft.StoreTermVectorOffsets = true; + Field field = new Field("foo", tokenStream, ft); + doc.Add(field); + iw.AddDocument(doc); + iw.Dispose(); + dir.Dispose(); + } + + // TODO: more tests with other possibilities + + private void CheckTokens(Token[] tokens) + { + Directory dir = NewDirectory(); + RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, Iwc); + bool success = false; + try + { + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; + // store some term vectors for the checkindex cross-check + ft.StoreTermVectors = true; + ft.StoreTermVectorPositions = true; + ft.StoreTermVectorOffsets = true; + + Document doc = new Document(); + doc.Add(new Field("body", new CannedTokenStream(tokens), ft)); + riw.AddDocument(doc); + success = true; + } + finally + { + if (success) + { + IOUtils.Close(riw, dir); + } + else + { + IOUtils.CloseWhileHandlingException(riw, dir); + } + } + } + + private Token MakeToken(string text, int posIncr, int startOffset, int endOffset) + { + Token t = new Token(); + t.Append(text); + t.PositionIncrement = posIncr; + t.SetOffset(startOffset, endOffset); + return t; + } + } +} \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestPrefixCodedTerms.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestPrefixCodedTerms.cs b/src/Lucene.Net.Tests/Index/TestPrefixCodedTerms.cs new file mode 100644 index 0000000..031d5c0 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestPrefixCodedTerms.cs @@ -0,0 +1,142 @@ +using Lucene.Net.Support; +using System.Collections.Generic; + +namespace Lucene.Net.Index +{ + using Lucene.Net.Util; + using NUnit.Framework; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + //using MergedIterator = Lucene.Net.Util.MergedIterator; + using TestUtil = Lucene.Net.Util.TestUtil; + + [TestFixture] + public class TestPrefixCodedTerms : LuceneTestCase + { + [Test] + public virtual void TestEmpty() + { + PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder(); + PrefixCodedTerms pb = b.Finish(); + Assert.IsFalse(pb.GetEnumerator().MoveNext()); + } + + [Test] + public virtual void TestOne() + { + Term term = new Term("foo", "bogus"); + PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder(); + b.Add(term); + PrefixCodedTerms pb = b.Finish(); + IEnumerator<Term> iterator = pb.GetEnumerator(); + Assert.IsTrue(iterator.MoveNext()); + Assert.AreEqual(term, iterator.Current); + } + + [Test] + public virtual void TestRandom() + { + SortedSet<Term> terms = new SortedSet<Term>(); + int nterms = AtLeast(10000); + for (int i = 0; i < nterms; i++) + { + Term term = new Term(TestUtil.RandomUnicodeString(Random(), 2), TestUtil.RandomUnicodeString(Random())); + terms.Add(term); + } + + PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder(); + foreach (Term @ref in terms) + { + b.Add(@ref); + } + PrefixCodedTerms pb = b.Finish(); + + IEnumerator<Term> expected = terms.GetEnumerator(); + foreach (Term t in pb) + { + Assert.IsTrue(expected.MoveNext()); + Assert.AreEqual(expected.Current, t); + } + Assert.IsFalse(expected.MoveNext()); + } + + [Test] + public virtual void TestMergeOne() + { + Term t1 = new Term("foo", "a"); + PrefixCodedTerms.Builder b1 = new PrefixCodedTerms.Builder(); + b1.Add(t1); + PrefixCodedTerms pb1 = b1.Finish(); + + Term t2 = new Term("foo", "b"); + PrefixCodedTerms.Builder b2 = new PrefixCodedTerms.Builder(); + b2.Add(t2); + PrefixCodedTerms pb2 = b2.Finish(); + + IEnumerator<Term> merged = new MergedIterator<Term>(pb1.GetEnumerator(), pb2.GetEnumerator()); + Assert.IsTrue(merged.MoveNext()); + Assert.AreEqual(t1, merged.Current); + Assert.IsTrue(merged.MoveNext()); + Assert.AreEqual(t2, merged.Current); + } + + [Test] + public virtual void TestMergeRandom() + { + PrefixCodedTerms[] pb = new PrefixCodedTerms[TestUtil.NextInt(Random(), 2, 10)]; + SortedSet<Term> superSet = new SortedSet<Term>(); + + for (int i = 0; i < pb.Length; i++) + { + SortedSet<Term> terms = new SortedSet<Term>(); + int nterms = TestUtil.NextInt(Random(), 0, 10000); + for (int j = 0; j < nterms; j++) + { + Term term = new Term(TestUtil.RandomUnicodeString(Random(), 2), TestUtil.RandomUnicodeString(Random(), 4)); + terms.Add(term); + } + superSet.AddAll(terms); + + PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder(); + foreach (Term @ref in terms) + { + b.Add(@ref); + } + pb[i] = b.Finish(); + } + + List<IEnumerator<Term>> subs = new List<IEnumerator<Term>>(); + for (int i = 0; i < pb.Length; i++) + { + subs.Add(pb[i].GetEnumerator()); + } + + IEnumerator<Term> expected = superSet.GetEnumerator(); + IEnumerator<Term> actual = new MergedIterator<Term>(subs.ToArray()); + while (actual.MoveNext()) + { + Assert.IsTrue(expected.MoveNext()); + Assert.AreEqual(expected.Current, actual.Current); + } + Assert.IsFalse(expected.MoveNext()); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestReaderClosed.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestReaderClosed.cs b/src/Lucene.Net.Tests/Index/TestReaderClosed.cs new file mode 100644 index 0000000..99df942 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestReaderClosed.cs @@ -0,0 +1,118 @@ +using Lucene.Net.Documents; + +namespace Lucene.Net.Index +{ + using NUnit.Framework; + using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException; + using Directory = Lucene.Net.Store.Directory; + using Document = Documents.Document; + using Field = Field; + using IndexSearcher = Lucene.Net.Search.IndexSearcher; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using MockTokenizer = Lucene.Net.Analysis.MockTokenizer; + using TermRangeQuery = Lucene.Net.Search.TermRangeQuery; + using TestUtil = Lucene.Net.Util.TestUtil; + + [TestFixture] + public class TestReaderClosed : LuceneTestCase + { + private IndexReader Reader; + private Directory Dir; + + [SetUp] + public override void SetUp() + { + base.SetUp(); + Dir = NewDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); + + Document doc = new Document(); + Field field = NewStringField("field", "", Field.Store.NO); + doc.Add(field); + + // we generate aweful prefixes: good for testing. + // but for preflex codec, the test can be very slow, so use less iterations. + int num = AtLeast(10); + for (int i = 0; i < num; i++) + { + field.SetStringValue(TestUtil.RandomUnicodeString(Random(), 10)); + writer.AddDocument(doc); + } + Reader = writer.Reader; + writer.Dispose(); + } + + [Test] + public virtual void Test() + { + Assert.IsTrue(Reader.RefCount > 0); + IndexSearcher searcher = NewSearcher(Reader); + TermRangeQuery query = TermRangeQuery.NewStringRange("field", "a", "z", true, true); + searcher.Search(query, 5); + Reader.Dispose(); + try + { + searcher.Search(query, 5); + } +#pragma warning disable 168 + catch (AlreadyClosedException ace) +#pragma warning restore 168 + { + // expected + } + } + + // LUCENE-3800 + [Test] + public virtual void TestReaderChaining() + { + Assert.IsTrue(Reader.RefCount > 0); + IndexReader wrappedReader = SlowCompositeReaderWrapper.Wrap(Reader); + wrappedReader = new ParallelAtomicReader((AtomicReader)wrappedReader); + + IndexSearcher searcher = NewSearcher(wrappedReader); + TermRangeQuery query = TermRangeQuery.NewStringRange("field", "a", "z", true, true); + searcher.Search(query, 5); + Reader.Dispose(); // close original child reader + try + { + searcher.Search(query, 5); + } + catch (AlreadyClosedException ace) + { + Assert.AreEqual("this IndexReader cannot be used anymore as one of its child readers was closed", ace.Message); + } + finally + { + // shutdown executor: in case of wrap-wrap-wrapping + searcher.IndexReader.Dispose(); + } + } + + [TearDown] + public override void TearDown() + { + Dir.Dispose(); + base.TearDown(); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestRollback.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestRollback.cs b/src/Lucene.Net.Tests/Index/TestRollback.cs new file mode 100644 index 0000000..f613e47 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestRollback.cs @@ -0,0 +1,67 @@ +using System; +using Lucene.Net.Documents; + +namespace Lucene.Net.Index +{ + using NUnit.Framework; + using Directory = Lucene.Net.Store.Directory; + using Document = Documents.Document; + using Field = Field; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + + [TestFixture] + public class TestRollback : LuceneTestCase + { + // LUCENE-2536 + [Test] + public virtual void TestRollbackIntegrityWithBufferFlush() + { + Directory dir = NewDirectory(); + RandomIndexWriter rw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + for (int i = 0; i < 5; i++) + { + Document doc = new Document(); + doc.Add(NewStringField("pk", Convert.ToString(i), Field.Store.YES)); + rw.AddDocument(doc); + } + rw.Dispose(); + + // If buffer size is small enough to cause a flush, errors ensue... + IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetOpenMode(OpenMode.APPEND)); + + for (int i = 0; i < 3; i++) + { + Document doc = new Document(); + string value = Convert.ToString(i); + doc.Add(NewStringField("pk", value, Field.Store.YES)); + doc.Add(NewStringField("text", "foo", Field.Store.YES)); + w.UpdateDocument(new Term("pk", value), doc); + } + w.Rollback(); + + IndexReader r = DirectoryReader.Open(dir); + Assert.AreEqual(5, r.NumDocs, "index should contain same number of docs post rollback"); + r.Dispose(); + dir.Dispose(); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestRollingUpdates.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestRollingUpdates.cs b/src/Lucene.Net.Tests/Index/TestRollingUpdates.cs new file mode 100644 index 0000000..8989662 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestRollingUpdates.cs @@ -0,0 +1,285 @@ +using System; +using Lucene.Net.Documents; + +namespace Lucene.Net.Index +{ + using Codecs.Memory; + //using MemoryPostingsFormat = Lucene.Net.Codecs.memory.MemoryPostingsFormat; + + using Lucene.Net.Randomized.Generators; + using Lucene.Net.Store; + using Lucene.Net.Support; + using Lucene.Net.Util; + using NUnit.Framework; + using Codec = Lucene.Net.Codecs.Codec; + using IndexSearcher = Lucene.Net.Search.IndexSearcher; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using TermQuery = Lucene.Net.Search.TermQuery; + using TopDocs = Lucene.Net.Search.TopDocs; + + [TestFixture] + public class TestRollingUpdates : LuceneTestCase + { + // Just updates the same set of N docs over and over, to + // stress out deletions + + [Test] + public virtual void TestRollingUpdates_Mem() + { + Random random = new Random(Random().Next()); + BaseDirectoryWrapper dir = NewDirectory(); + LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); + + //provider.register(new MemoryCodec()); + if ((!"Lucene3x".Equals(Codec.Default.Name)) && Random().NextBoolean()) + { + Codec.Default = + TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(Random().nextBoolean(), random.NextFloat())); + } + + MockAnalyzer analyzer = new MockAnalyzer(Random()); + analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); + + IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); + int SIZE = AtLeast(20); + int id = 0; + IndexReader r = null; + IndexSearcher s = null; + int numUpdates = (int)(SIZE * (2 + (TEST_NIGHTLY ? 200 * Random().NextDouble() : 5 * Random().NextDouble()))); + if (VERBOSE) + { + Console.WriteLine("TEST: numUpdates=" + numUpdates); + } + int updateCount = 0; + // TODO: sometimes update ids not in order... + for (int docIter = 0; docIter < numUpdates; docIter++) + { + Documents.Document doc = docs.NextDoc(); + string myID = "" + id; + if (id == SIZE - 1) + { + id = 0; + } + else + { + id++; + } + if (VERBOSE) + { + Console.WriteLine(" docIter=" + docIter + " id=" + id); + } + ((Field)doc.GetField("docid")).SetStringValue(myID); + + Term idTerm = new Term("docid", myID); + + bool doUpdate; + if (s != null && updateCount < SIZE) + { + TopDocs hits = s.Search(new TermQuery(idTerm), 1); + Assert.AreEqual(1, hits.TotalHits); + doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc); + if (VERBOSE) + { + if (doUpdate) + { + Console.WriteLine(" tryDeleteDocument failed"); + } + else + { + Console.WriteLine(" tryDeleteDocument succeeded"); + } + } + } + else + { + doUpdate = true; + if (VERBOSE) + { + Console.WriteLine(" no searcher: doUpdate=true"); + } + } + + updateCount++; + + if (doUpdate) + { + w.UpdateDocument(idTerm, doc); + } + else + { + w.AddDocument(doc); + } + + if (docIter >= SIZE && Random().Next(50) == 17) + { + if (r != null) + { + r.Dispose(); + } + + bool applyDeletions = Random().NextBoolean(); + + if (VERBOSE) + { + Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions); + } + + r = w.GetReader(applyDeletions); + if (applyDeletions) + { + s = NewSearcher(r); + } + else + { + s = null; + } + Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE); + updateCount = 0; + } + } + + if (r != null) + { + r.Dispose(); + } + + w.Commit(); + Assert.AreEqual(SIZE, w.NumDocs); + + w.Dispose(); + + TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates"); + + docs.Dispose(); + + // LUCENE-4455: + SegmentInfos infos = new SegmentInfos(); + infos.Read(dir); + long totalBytes = 0; + foreach (SegmentCommitInfo sipc in infos.Segments) + { + totalBytes += sipc.SizeInBytes(); + } + long totalBytes2 = 0; + foreach (string fileName in dir.ListAll()) + { + if (!fileName.StartsWith(IndexFileNames.SEGMENTS)) + { + totalBytes2 += dir.FileLength(fileName); + } + } + Assert.AreEqual(totalBytes2, totalBytes); + dir.Dispose(); + } + + [Test] + public virtual void TestUpdateSameDoc() + { + Directory dir = NewDirectory(); + + LineFileDocs docs = new LineFileDocs(Random()); + for (int r = 0; r < 3; r++) + { + IndexWriter w = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2)); + int numUpdates = AtLeast(20); + int numThreads = TestUtil.NextInt(Random(), 2, 6); + IndexingThread[] threads = new IndexingThread[numThreads]; + for (int i = 0; i < numThreads; i++) + { + threads[i] = new IndexingThread(docs, w, numUpdates, NewStringField); + threads[i].Start(); + } + + for (int i = 0; i < numThreads; i++) + { + threads[i].Join(); + } + + w.Dispose(); + } + + IndexReader open = DirectoryReader.Open(dir); + Assert.AreEqual(1, open.NumDocs); + open.Dispose(); + docs.Dispose(); + dir.Dispose(); + } + + internal class IndexingThread : ThreadClass + { + internal readonly LineFileDocs Docs; + internal readonly IndexWriter Writer; + internal readonly int Num; + + private readonly Func<string, string, Field.Store, Field> NewStringField; + + /// <param name="newStringField"> + /// LUCENENET specific + /// Passed in because <see cref="LuceneTestCase.NewStringField(string, string, Field.Store)"/> + /// is no longer static. + /// </param> + public IndexingThread(LineFileDocs docs, IndexWriter writer, int num, Func<string, string, Field.Store, Field> newStringField) + : base() + { + this.Docs = docs; + this.Writer = writer; + this.Num = num; + NewStringField = newStringField; + } + + public override void Run() + { + try + { + DirectoryReader open = null; + for (int i = 0; i < Num; i++) + { + Documents.Document doc = new Documents.Document(); // docs.NextDoc(); + doc.Add(NewStringField("id", "test", Field.Store.NO)); + Writer.UpdateDocument(new Term("id", "test"), doc); + if (Random().Next(3) == 0) + { + if (open == null) + { + open = DirectoryReader.Open(Writer, true); + } + DirectoryReader reader = DirectoryReader.OpenIfChanged(open); + if (reader != null) + { + open.Dispose(); + open = reader; + } + Assert.AreEqual(1, open.NumDocs, "iter: " + i + " numDocs: " + open.NumDocs + " del: " + open.NumDeletedDocs + " max: " + open.MaxDoc); + } + } + if (open != null) + { + open.Dispose(); + } + } + catch (Exception e) + { + throw new Exception(e.Message, e); + } + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestSameTokenSamePosition.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestSameTokenSamePosition.cs b/src/Lucene.Net.Tests/Index/TestSameTokenSamePosition.cs new file mode 100644 index 0000000..ca9637a --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestSameTokenSamePosition.cs @@ -0,0 +1,110 @@ +using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Documents; +using NUnit.Framework; + +namespace Lucene.Net.Index +{ + using Directory = Lucene.Net.Store.Directory; + using Document = Documents.Document; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using TextField = TextField; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using TokenStream = Lucene.Net.Analysis.TokenStream; + + [TestFixture] + public class TestSameTokenSamePosition : LuceneTestCase + { + /// <summary> + /// Attempt to reproduce an assertion error that happens + /// only with the trunk version around April 2011. + /// </summary> + [Test] + public virtual void Test() + { + Directory dir = NewDirectory(); + RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + Document doc = new Document(); + doc.Add(new TextField("eng", new BugReproTokenStream())); + riw.AddDocument(doc); + riw.Dispose(); + dir.Dispose(); + } + + /// <summary> + /// Same as the above, but with more docs + /// </summary> + [Test] + public virtual void TestMoreDocs() + { + Directory dir = NewDirectory(); + RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + for (int i = 0; i < 100; i++) + { + Document doc = new Document(); + doc.Add(new TextField("eng", new BugReproTokenStream())); + riw.AddDocument(doc); + } + riw.Dispose(); + dir.Dispose(); + } + } + + internal sealed class BugReproTokenStream : TokenStream + { + private readonly ICharTermAttribute TermAtt; + private readonly IOffsetAttribute OffsetAtt; + private readonly IPositionIncrementAttribute PosIncAtt; + private readonly int TokenCount = 4; + private int NextTokenIndex = 0; + private readonly string[] Terms = new string[] { "six", "six", "drunken", "drunken" }; + private readonly int[] Starts = new int[] { 0, 0, 4, 4 }; + private readonly int[] Ends = new int[] { 3, 3, 11, 11 }; + private readonly int[] Incs = new int[] { 1, 0, 1, 0 }; + + public BugReproTokenStream() + { + TermAtt = AddAttribute<ICharTermAttribute>(); + OffsetAtt = AddAttribute<IOffsetAttribute>(); + PosIncAtt = AddAttribute<IPositionIncrementAttribute>(); + } + + public override bool IncrementToken() + { + if (NextTokenIndex < TokenCount) + { + TermAtt.SetEmpty().Append(Terms[NextTokenIndex]); + OffsetAtt.SetOffset(Starts[NextTokenIndex], Ends[NextTokenIndex]); + PosIncAtt.PositionIncrement = Incs[NextTokenIndex]; + NextTokenIndex++; + return true; + } + else + { + return false; + } + } + + public override void Reset() + { + base.Reset(); + this.NextTokenIndex = 0; + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestSegmentMerger.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestSegmentMerger.cs b/src/Lucene.Net.Tests/Index/TestSegmentMerger.cs new file mode 100644 index 0000000..30786b5 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestSegmentMerger.cs @@ -0,0 +1,207 @@ +using Lucene.Net.Support; + +namespace Lucene.Net.Index +{ + using NUnit.Framework; + using BytesRef = Lucene.Net.Util.BytesRef; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Codec = Lucene.Net.Codecs.Codec; + using Constants = Lucene.Net.Util.Constants; + using Directory = Lucene.Net.Store.Directory; + using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator; + using Document = Documents.Document; + using FixedBitSet = Lucene.Net.Util.FixedBitSet; + using InfoStream = Lucene.Net.Util.InfoStream; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using TestUtil = Lucene.Net.Util.TestUtil; + + [TestFixture] + public class TestSegmentMerger : LuceneTestCase + { + //The variables for the new merged segment + private Directory MergedDir; + + private string MergedSegment = "test"; + + //First segment to be merged + private Directory Merge1Dir; + + private Document Doc1; + private SegmentReader Reader1; + + //Second Segment to be merged + private Directory Merge2Dir; + + private Document Doc2; + private SegmentReader Reader2; + + [SetUp] + public override void SetUp() + { + base.SetUp(); + this.Doc1 = new Document(); + this.Doc2 = new Document(); + MergedDir = NewDirectory(); + Merge1Dir = NewDirectory(); + Merge2Dir = NewDirectory(); + DocHelper.SetupDoc(Doc1); + SegmentCommitInfo info1 = DocHelper.WriteDoc(Random(), Merge1Dir, Doc1); + DocHelper.SetupDoc(Doc2); + SegmentCommitInfo info2 = DocHelper.WriteDoc(Random(), Merge2Dir, Doc2); + Reader1 = new SegmentReader(info1, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); + Reader2 = new SegmentReader(info2, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); + } + + [TearDown] + public override void TearDown() + { + Reader1.Dispose(); + Reader2.Dispose(); + MergedDir.Dispose(); + Merge1Dir.Dispose(); + Merge2Dir.Dispose(); + base.TearDown(); + } + + [Test] + public virtual void Test() + { + Assert.IsTrue(MergedDir != null); + Assert.IsTrue(Merge1Dir != null); + Assert.IsTrue(Merge2Dir != null); + Assert.IsTrue(Reader1 != null); + Assert.IsTrue(Reader2 != null); + } + + [Test] + public virtual void TestMerge() + { + Codec codec = Codec.Default; + SegmentInfo si = new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, -1, false, codec, null); + + SegmentMerger merger = new SegmentMerger(Arrays.AsList<AtomicReader>(Reader1, Reader2), si, InfoStream.Default, MergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, CheckAbort.NONE, new FieldInfos.FieldNumbers(), NewIOContext(Random()), true); + MergeState mergeState = merger.Merge(); + int docsMerged = mergeState.SegmentInfo.DocCount; + Assert.IsTrue(docsMerged == 2); + //Should be able to open a new SegmentReader against the new directory + SegmentReader mergedReader = new SegmentReader(new SegmentCommitInfo(new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, docsMerged, false, codec, null), 0, -1L, -1L), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); + Assert.IsTrue(mergedReader != null); + Assert.IsTrue(mergedReader.NumDocs == 2); + Document newDoc1 = mergedReader.Document(0); + Assert.IsTrue(newDoc1 != null); + //There are 2 unstored fields on the document + Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(Doc1) - DocHelper.Unstored.Count); + Document newDoc2 = mergedReader.Document(1); + Assert.IsTrue(newDoc2 != null); + Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(Doc2) - DocHelper.Unstored.Count); + + DocsEnum termDocs = TestUtil.Docs(Random(), mergedReader, DocHelper.TEXT_FIELD_2_KEY, new BytesRef("field"), MultiFields.GetLiveDocs(mergedReader), null, 0); + Assert.IsTrue(termDocs != null); + Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); + + int tvCount = 0; + foreach (FieldInfo fieldInfo in mergedReader.FieldInfos) + { + if (fieldInfo.HasVectors) + { + tvCount++; + } + } + + //System.out.println("stored size: " + stored.Size()); + Assert.AreEqual(3, tvCount, "We do not have 3 fields that were indexed with term vector"); + + Terms vector = mergedReader.GetTermVectors(0).GetTerms(DocHelper.TEXT_FIELD_2_KEY); + Assert.IsNotNull(vector); + Assert.AreEqual(3, vector.Count); + TermsEnum termsEnum = vector.GetIterator(null); + + int i = 0; + while (termsEnum.Next() != null) + { + string term = termsEnum.Term.Utf8ToString(); + int freq = (int)termsEnum.TotalTermFreq; + //System.out.println("Term: " + term + " Freq: " + freq); + Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != -1); + Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq); + i++; + } + + TestSegmentReader.CheckNorms(mergedReader); + mergedReader.Dispose(); + } + + private static bool Equals(MergeState.DocMap map1, MergeState.DocMap map2) + { + if (map1.MaxDoc != map2.MaxDoc) + { + return false; + } + for (int i = 0; i < map1.MaxDoc; ++i) + { + if (map1.Get(i) != map2.Get(i)) + { + return false; + } + } + return true; + } + + [Test] + public virtual void TestBuildDocMap() + { + int maxDoc = TestUtil.NextInt(Random(), 1, 128); + int numDocs = TestUtil.NextInt(Random(), 0, maxDoc); + int numDeletedDocs = maxDoc - numDocs; + FixedBitSet liveDocs = new FixedBitSet(maxDoc); + for (int i = 0; i < numDocs; ++i) + { + while (true) + { + int docID = Random().Next(maxDoc); + if (!liveDocs.Get(docID)) + { + liveDocs.Set(docID); + break; + } + } + } + + MergeState.DocMap docMap = MergeState.DocMap.Build(maxDoc, liveDocs); + + Assert.AreEqual(maxDoc, docMap.MaxDoc); + Assert.AreEqual(numDocs, docMap.NumDocs); + Assert.AreEqual(numDeletedDocs, docMap.NumDeletedDocs); + // assert the mapping is compact + for (int i = 0, del = 0; i < maxDoc; ++i) + { + if (!liveDocs.Get(i)) + { + Assert.AreEqual(-1, docMap.Get(i)); + ++del; + } + else + { + Assert.AreEqual(i - del, docMap.Get(i)); + } + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestSegmentReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestSegmentReader.cs b/src/Lucene.Net.Tests/Index/TestSegmentReader.cs new file mode 100644 index 0000000..b98287d --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestSegmentReader.cs @@ -0,0 +1,277 @@ +using System; +using System.Collections.Generic; + +namespace Lucene.Net.Index +{ + using NUnit.Framework; + using BytesRef = Lucene.Net.Util.BytesRef; + using Directory = Lucene.Net.Store.Directory; + using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Document = Documents.Document; + using IOContext = Lucene.Net.Store.IOContext; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using TestUtil = Lucene.Net.Util.TestUtil; + + [TestFixture] + public class TestSegmentReader : LuceneTestCase + { + private Directory Dir; + private Document TestDoc; + private SegmentReader Reader; + + //TODO: Setup the reader w/ multiple documents + [SetUp] + public override void SetUp() + { + base.SetUp(); + Dir = NewDirectory(); + TestDoc = new Document(); + DocHelper.SetupDoc(TestDoc); + SegmentCommitInfo info = DocHelper.WriteDoc(Random(), Dir, TestDoc); + Reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.READ); + } + + [TearDown] + public override void TearDown() + { + Reader.Dispose(); + Dir.Dispose(); + base.TearDown(); + } + + [Test] + public virtual void Test() + { + Assert.IsTrue(Dir != null); + Assert.IsTrue(Reader != null); + Assert.IsTrue(DocHelper.NameValues.Count > 0); + Assert.IsTrue(DocHelper.NumFields(TestDoc) == DocHelper.All.Count); + } + + [Test] + public virtual void TestDocument() + { + Assert.IsTrue(Reader.NumDocs == 1); + Assert.IsTrue(Reader.MaxDoc >= 1); + Document result = Reader.Document(0); + Assert.IsTrue(result != null); + //There are 2 unstored fields on the document that are not preserved across writing + Assert.IsTrue(DocHelper.NumFields(result) == DocHelper.NumFields(TestDoc) - DocHelper.Unstored.Count); + + IList<IIndexableField> fields = result.Fields; + foreach (IIndexableField field in fields) + { + Assert.IsTrue(field != null); + Assert.IsTrue(DocHelper.NameValues.ContainsKey(field.Name)); + } + } + + [Test] + public virtual void TestGetFieldNameVariations() + { + ICollection<string> allFieldNames = new HashSet<string>(); + ICollection<string> indexedFieldNames = new HashSet<string>(); + ICollection<string> notIndexedFieldNames = new HashSet<string>(); + ICollection<string> tvFieldNames = new HashSet<string>(); + ICollection<string> noTVFieldNames = new HashSet<string>(); + + foreach (FieldInfo fieldInfo in Reader.FieldInfos) + { + string name = fieldInfo.Name; + allFieldNames.Add(name); + if (fieldInfo.IsIndexed) + { + indexedFieldNames.Add(name); + } + else + { + notIndexedFieldNames.Add(name); + } + if (fieldInfo.HasVectors) + { + tvFieldNames.Add(name); + } + else if (fieldInfo.IsIndexed) + { + noTVFieldNames.Add(name); + } + } + + Assert.IsTrue(allFieldNames.Count == DocHelper.All.Count); + foreach (string s in allFieldNames) + { + Assert.IsTrue(DocHelper.NameValues.ContainsKey(s) == true || s.Equals("")); + } + + Assert.IsTrue(indexedFieldNames.Count == DocHelper.Indexed.Count); + foreach (string s in indexedFieldNames) + { + Assert.IsTrue(DocHelper.Indexed.ContainsKey(s) == true || s.Equals("")); + } + + Assert.IsTrue(notIndexedFieldNames.Count == DocHelper.Unindexed.Count); + //Get all indexed fields that are storing term vectors + Assert.IsTrue(tvFieldNames.Count == DocHelper.Termvector.Count); + + Assert.IsTrue(noTVFieldNames.Count == DocHelper.Notermvector.Count); + } + + [Test] + public virtual void TestTerms() + { + Fields fields = MultiFields.GetFields(Reader); + foreach (string field in fields) + { + Terms terms = fields.GetTerms(field); + Assert.IsNotNull(terms); + TermsEnum termsEnum = terms.GetIterator(null); + while (termsEnum.Next() != null) + { + BytesRef term = termsEnum.Term; + Assert.IsTrue(term != null); + string fieldValue = (string)DocHelper.NameValues[field]; + Assert.IsTrue(fieldValue.IndexOf(term.Utf8ToString()) != -1); + } + } + + DocsEnum termDocs = TestUtil.Docs(Random(), Reader, DocHelper.TEXT_FIELD_1_KEY, new BytesRef("field"), MultiFields.GetLiveDocs(Reader), null, 0); + Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); + + termDocs = TestUtil.Docs(Random(), Reader, DocHelper.NO_NORMS_KEY, new BytesRef(DocHelper.NO_NORMS_TEXT), MultiFields.GetLiveDocs(Reader), null, 0); + + Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); + + DocsAndPositionsEnum positions = MultiFields.GetTermPositionsEnum(Reader, MultiFields.GetLiveDocs(Reader), DocHelper.TEXT_FIELD_1_KEY, new BytesRef("field")); + // NOTE: prior rev of this test was failing to first + // call next here: + Assert.IsTrue(positions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); + Assert.IsTrue(positions.DocID == 0); + Assert.IsTrue(positions.NextPosition() >= 0); + } + + [Test] + public virtual void TestNorms() + { + //TODO: Not sure how these work/should be tested + /* + try { + byte [] norms = reader.norms(DocHelper.TEXT_FIELD_1_KEY); + System.out.println("Norms: " + norms); + Assert.IsTrue(norms != null); + } catch (IOException e) { + e.printStackTrace(); + Assert.IsTrue(false); + } + */ + + CheckNorms(Reader); + } + + public static void CheckNorms(AtomicReader reader) + { + // test omit norms + for (int i = 0; i < DocHelper.Fields.Length; i++) + { + IIndexableField f = DocHelper.Fields[i]; + if (f.FieldType.IsIndexed) + { + Assert.AreEqual(reader.GetNormValues(f.Name) != null, !f.FieldType.OmitNorms); + Assert.AreEqual(reader.GetNormValues(f.Name) != null, !DocHelper.NoNorms.ContainsKey(f.Name)); + if (reader.GetNormValues(f.Name) == null) + { + // test for norms of null + NumericDocValues norms = MultiDocValues.GetNormValues(reader, f.Name); + Assert.IsNull(norms); + } + } + } + } + + [Test] + public virtual void TestTermVectors() + { + Terms result = Reader.GetTermVectors(0).GetTerms(DocHelper.TEXT_FIELD_2_KEY); + Assert.IsNotNull(result); + Assert.AreEqual(3, result.Count); + TermsEnum termsEnum = result.GetIterator(null); + while (termsEnum.Next() != null) + { + string term = termsEnum.Term.Utf8ToString(); + int freq = (int)termsEnum.TotalTermFreq; + Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != -1); + Assert.IsTrue(freq > 0); + } + + Fields results = Reader.GetTermVectors(0); + Assert.IsTrue(results != null); + Assert.AreEqual(3, results.Count, "We do not have 3 term freq vectors"); + } + + [Test] + public virtual void TestOutOfBoundsAccess() + { + int numDocs = Reader.MaxDoc; + try + { + Reader.Document(-1); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.IndexOutOfRangeException expected) +#pragma warning restore 168 + { + } + + try + { + Reader.GetTermVectors(-1); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.IndexOutOfRangeException expected) +#pragma warning restore 168 + { + } + + try + { + Reader.Document(numDocs); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.IndexOutOfRangeException expected) +#pragma warning restore 168 + { + } + + try + { + Reader.GetTermVectors(numDocs); + Assert.Fail(); + } +#pragma warning disable 168 + catch (System.IndexOutOfRangeException expected) +#pragma warning restore 168 + { + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestSegmentTermDocs.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestSegmentTermDocs.cs b/src/Lucene.Net.Tests/Index/TestSegmentTermDocs.cs new file mode 100644 index 0000000..f876774 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestSegmentTermDocs.cs @@ -0,0 +1,274 @@ +using Lucene.Net.Documents; + +namespace Lucene.Net.Index +{ + using NUnit.Framework; + using BytesRef = Lucene.Net.Util.BytesRef; + using Directory = Lucene.Net.Store.Directory; + using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator; + using Document = Documents.Document; + using Field = Field; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using TestUtil = Lucene.Net.Util.TestUtil; + + [TestFixture] + public class TestSegmentTermDocs : LuceneTestCase + { + private Document TestDoc; + private Directory Dir; + private SegmentCommitInfo Info; + + [SetUp] + public override void SetUp() + { + base.SetUp(); + TestDoc = new Document(); + Dir = NewDirectory(); + DocHelper.SetupDoc(TestDoc); + Info = DocHelper.WriteDoc(Random(), Dir, TestDoc); + } + + [TearDown] + public override void TearDown() + { + Dir.Dispose(); + base.TearDown(); + } + + [Test] + public virtual void Test() + { + Assert.IsTrue(Dir != null); + } + + [Test] + public virtual void TestTermDocs() + { + TestTermDocs(1); + } + + public virtual void TestTermDocs(int indexDivisor) + { + //After adding the document, we should be able to read it back in + SegmentReader reader = new SegmentReader(Info, indexDivisor, NewIOContext(Random())); + Assert.IsTrue(reader != null); + Assert.AreEqual(indexDivisor, reader.TermInfosIndexDivisor); + + TermsEnum terms = reader.Fields.GetTerms(DocHelper.TEXT_FIELD_2_KEY).GetIterator(null); + terms.SeekCeil(new BytesRef("field")); + DocsEnum termDocs = TestUtil.Docs(Random(), terms, reader.LiveDocs, null, DocsEnum.FLAG_FREQS); + if (termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) + { + int docId = termDocs.DocID; + Assert.IsTrue(docId == 0); + int freq = termDocs.Freq; + Assert.IsTrue(freq == 3); + } + reader.Dispose(); + } + + [Test] + public virtual void TestBadSeek() + { + TestBadSeek(1); + } + + public virtual void TestBadSeek(int indexDivisor) + { + { + //After adding the document, we should be able to read it back in + SegmentReader reader = new SegmentReader(Info, indexDivisor, NewIOContext(Random())); + Assert.IsTrue(reader != null); + DocsEnum termDocs = TestUtil.Docs(Random(), reader, "textField2", new BytesRef("bad"), reader.LiveDocs, null, 0); + + Assert.IsNull(termDocs); + reader.Dispose(); + } + { + //After adding the document, we should be able to read it back in + SegmentReader reader = new SegmentReader(Info, indexDivisor, NewIOContext(Random())); + Assert.IsTrue(reader != null); + DocsEnum termDocs = TestUtil.Docs(Random(), reader, "junk", new BytesRef("bad"), reader.LiveDocs, null, 0); + Assert.IsNull(termDocs); + reader.Dispose(); + } + } + + [Test] + public virtual void TestSkipTo() + { + TestSkipTo(1); + } + + public virtual void TestSkipTo(int indexDivisor) + { + Directory dir = NewDirectory(); + IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); + + Term ta = new Term("content", "aaa"); + for (int i = 0; i < 10; i++) + { + AddDoc(writer, "aaa aaa aaa aaa"); + } + + Term tb = new Term("content", "bbb"); + for (int i = 0; i < 16; i++) + { + AddDoc(writer, "bbb bbb bbb bbb"); + } + + Term tc = new Term("content", "ccc"); + for (int i = 0; i < 50; i++) + { + AddDoc(writer, "ccc ccc ccc ccc"); + } + + // assure that we deal with a single segment + writer.ForceMerge(1); + writer.Dispose(); + + IndexReader reader = DirectoryReader.Open(dir, indexDivisor); + + DocsEnum tdocs = TestUtil.Docs(Random(), reader, ta.Field, new BytesRef(ta.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS); + + // without optimization (assumption skipInterval == 16) + + // with next + Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(0, tdocs.DocID); + Assert.AreEqual(4, tdocs.Freq); + Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(1, tdocs.DocID); + Assert.AreEqual(4, tdocs.Freq); + Assert.IsTrue(tdocs.Advance(2) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(2, tdocs.DocID); + Assert.IsTrue(tdocs.Advance(4) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(4, tdocs.DocID); + Assert.IsTrue(tdocs.Advance(9) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(9, tdocs.DocID); + Assert.IsFalse(tdocs.Advance(10) != DocIdSetIterator.NO_MORE_DOCS); + + // without next + tdocs = TestUtil.Docs(Random(), reader, ta.Field, new BytesRef(ta.Text()), MultiFields.GetLiveDocs(reader), null, 0); + + Assert.IsTrue(tdocs.Advance(0) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(0, tdocs.DocID); + Assert.IsTrue(tdocs.Advance(4) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(4, tdocs.DocID); + Assert.IsTrue(tdocs.Advance(9) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(9, tdocs.DocID); + Assert.IsFalse(tdocs.Advance(10) != DocIdSetIterator.NO_MORE_DOCS); + + // exactly skipInterval documents and therefore with optimization + + // with next + tdocs = TestUtil.Docs(Random(), reader, tb.Field, new BytesRef(tb.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS); + + Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(10, tdocs.DocID); + Assert.AreEqual(4, tdocs.Freq); + Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(11, tdocs.DocID); + Assert.AreEqual(4, tdocs.Freq); + Assert.IsTrue(tdocs.Advance(12) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(12, tdocs.DocID); + Assert.IsTrue(tdocs.Advance(15) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(15, tdocs.DocID); + Assert.IsTrue(tdocs.Advance(24) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(24, tdocs.DocID); + Assert.IsTrue(tdocs.Advance(25) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(25, tdocs.DocID); + Assert.IsFalse(tdocs.Advance(26) != DocIdSetIterator.NO_MORE_DOCS); + + // without next + tdocs = TestUtil.Docs(Random(), reader, tb.Field, new BytesRef(tb.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS); + + Assert.IsTrue(tdocs.Advance(5) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(10, tdocs.DocID); + Assert.IsTrue(tdocs.Advance(15) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(15, tdocs.DocID); + Assert.IsTrue(tdocs.Advance(24) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(24, tdocs.DocID); + Assert.IsTrue(tdocs.Advance(25) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(25, tdocs.DocID); + Assert.IsFalse(tdocs.Advance(26) != DocIdSetIterator.NO_MORE_DOCS); + + // much more than skipInterval documents and therefore with optimization + + // with next + tdocs = TestUtil.Docs(Random(), reader, tc.Field, new BytesRef(tc.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS); + + Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(26, tdocs.DocID); + Assert.AreEqual(4, tdocs.Freq); + Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(27, tdocs.DocID); + Assert.AreEqual(4, tdocs.Freq); + Assert.IsTrue(tdocs.Advance(28) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(28, tdocs.DocID); + Assert.IsTrue(tdocs.Advance(40) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(40, tdocs.DocID); + Assert.IsTrue(tdocs.Advance(57) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(57, tdocs.DocID); + Assert.IsTrue(tdocs.Advance(74) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(74, tdocs.DocID); + Assert.IsTrue(tdocs.Advance(75) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(75, tdocs.DocID); + Assert.IsFalse(tdocs.Advance(76) != DocIdSetIterator.NO_MORE_DOCS); + + //without next + tdocs = TestUtil.Docs(Random(), reader, tc.Field, new BytesRef(tc.Text()), MultiFields.GetLiveDocs(reader), null, 0); + Assert.IsTrue(tdocs.Advance(5) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(26, tdocs.DocID); + Assert.IsTrue(tdocs.Advance(40) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(40, tdocs.DocID); + Assert.IsTrue(tdocs.Advance(57) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(57, tdocs.DocID); + Assert.IsTrue(tdocs.Advance(74) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(74, tdocs.DocID); + Assert.IsTrue(tdocs.Advance(75) != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(75, tdocs.DocID); + Assert.IsFalse(tdocs.Advance(76) != DocIdSetIterator.NO_MORE_DOCS); + + reader.Dispose(); + dir.Dispose(); + } + + [Test] + public virtual void TestIndexDivisor() + { + TestDoc = new Document(); + DocHelper.SetupDoc(TestDoc); + DocHelper.WriteDoc(Random(), Dir, TestDoc); + TestTermDocs(2); + TestBadSeek(2); + TestSkipTo(2); + } + + private void AddDoc(IndexWriter writer, string value) + { + Document doc = new Document(); + doc.Add(NewTextField("content", value, Field.Store.NO)); + writer.AddDocument(doc); + } + } +} \ No newline at end of file
